Update Hadrian
[ghc.git] / compiler / coreSyn / CoreSyn.hs
1 {-
2 (c) The University of Glasgow 2006
3 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
4 -}
5
6 {-# LANGUAGE CPP, DeriveDataTypeable, FlexibleContexts #-}
7 {-# LANGUAGE NamedFieldPuns #-}
8
9 -- | CoreSyn holds all the main data types for use by for the Glasgow Haskell Compiler midsection
10 module CoreSyn (
11 -- * Main data types
12 Expr(..), Alt, Bind(..), AltCon(..), Arg,
13 Tickish(..), TickishScoping(..), TickishPlacement(..),
14 CoreProgram, CoreExpr, CoreAlt, CoreBind, CoreArg, CoreBndr,
15 TaggedExpr, TaggedAlt, TaggedBind, TaggedArg, TaggedBndr(..), deTagExpr,
16
17 -- * In/Out type synonyms
18 InId, InBind, InExpr, InAlt, InArg, InType, InKind,
19 InBndr, InVar, InCoercion, InTyVar, InCoVar,
20 OutId, OutBind, OutExpr, OutAlt, OutArg, OutType, OutKind,
21 OutBndr, OutVar, OutCoercion, OutTyVar, OutCoVar,
22
23 -- ** 'Expr' construction
24 mkLet, mkLets, mkLetNonRec, mkLetRec, mkLams,
25 mkApps, mkTyApps, mkCoApps, mkVarApps, mkTyArg,
26
27 mkIntLit, mkIntLitInt,
28 mkWordLit, mkWordLitWord,
29 mkWord64LitWord64, mkInt64LitInt64,
30 mkCharLit, mkStringLit,
31 mkFloatLit, mkFloatLitFloat,
32 mkDoubleLit, mkDoubleLitDouble,
33
34 mkConApp, mkConApp2, mkTyBind, mkCoBind,
35 varToCoreExpr, varsToCoreExprs,
36
37 isId, cmpAltCon, cmpAlt, ltAlt,
38
39 -- ** Simple 'Expr' access functions and predicates
40 bindersOf, bindersOfBinds, rhssOfBind, rhssOfAlts,
41 collectBinders, collectTyBinders, collectTyAndValBinders,
42 collectNBinders,
43 collectArgs, collectArgsTicks, flattenBinds,
44
45 exprToType, exprToCoercion_maybe,
46 applyTypeToArg,
47
48 isValArg, isTypeArg, isTyCoArg, valArgCount, valBndrCount,
49 isRuntimeArg, isRuntimeVar,
50
51 -- * Tick-related functions
52 tickishCounts, tickishScoped, tickishScopesLike, tickishFloatable,
53 tickishCanSplit, mkNoCount, mkNoScope,
54 tickishIsCode, tickishPlace,
55 tickishContains,
56
57 -- * Unfolding data types
58 Unfolding(..), UnfoldingGuidance(..), UnfoldingSource(..),
59
60 -- ** Constructing 'Unfolding's
61 noUnfolding, bootUnfolding, evaldUnfolding, mkOtherCon,
62 unSaturatedOk, needSaturated, boringCxtOk, boringCxtNotOk,
63
64 -- ** Predicates and deconstruction on 'Unfolding'
65 unfoldingTemplate, expandUnfolding_maybe,
66 maybeUnfoldingTemplate, otherCons,
67 isValueUnfolding, isEvaldUnfolding, isCheapUnfolding,
68 isExpandableUnfolding, isConLikeUnfolding, isCompulsoryUnfolding,
69 isStableUnfolding, isFragileUnfolding, hasSomeUnfolding,
70 isBootUnfolding,
71 canUnfold, neverUnfoldGuidance, isStableSource,
72
73 -- * Annotated expression data types
74 AnnExpr, AnnExpr'(..), AnnBind(..), AnnAlt,
75
76 -- ** Operations on annotated expressions
77 collectAnnArgs, collectAnnArgsTicks,
78
79 -- ** Operations on annotations
80 deAnnotate, deAnnotate', deAnnAlt, deAnnBind,
81 collectAnnBndrs, collectNAnnBndrs,
82
83 -- * Orphanhood
84 IsOrphan(..), isOrphan, notOrphan, chooseOrphanAnchor,
85
86 -- * Core rule data types
87 CoreRule(..), RuleBase,
88 RuleName, RuleFun, IdUnfoldingFun, InScopeEnv,
89 RuleEnv(..), mkRuleEnv, emptyRuleEnv,
90
91 -- ** Operations on 'CoreRule's
92 ruleArity, ruleName, ruleIdName, ruleActivation,
93 setRuleIdName, ruleModule,
94 isBuiltinRule, isLocalRule, isAutoRule,
95
96 -- * Core vectorisation declarations data type
97 CoreVect(..)
98 ) where
99
100 #include "HsVersions.h"
101
102 import GhcPrelude
103
104 import CostCentre
105 import VarEnv( InScopeSet )
106 import Var
107 import Type
108 import Coercion
109 import Name
110 import NameSet
111 import NameEnv( NameEnv, emptyNameEnv )
112 import Literal
113 import DataCon
114 import Module
115 import TyCon
116 import BasicTypes
117 import DynFlags
118 import Outputable
119 import Util
120 import UniqSet
121 import SrcLoc ( RealSrcSpan, containsSpan )
122 import Binary
123
124 import Data.Data hiding (TyCon)
125 import Data.Int
126 import Data.Word
127
128 infixl 4 `mkApps`, `mkTyApps`, `mkVarApps`, `App`, `mkCoApps`
129 -- Left associative, so that we can say (f `mkTyApps` xs `mkVarApps` ys)
130
131 {-
132 ************************************************************************
133 * *
134 \subsection{The main data types}
135 * *
136 ************************************************************************
137
138 These data types are the heart of the compiler
139 -}
140
141 -- | This is the data type that represents GHCs core intermediate language. Currently
142 -- GHC uses System FC <https://www.microsoft.com/en-us/research/publication/system-f-with-type-equality-coercions/> for this purpose,
143 -- which is closely related to the simpler and better known System F <http://en.wikipedia.org/wiki/System_F>.
144 --
145 -- We get from Haskell source to this Core language in a number of stages:
146 --
147 -- 1. The source code is parsed into an abstract syntax tree, which is represented
148 -- by the data type 'HsExpr.HsExpr' with the names being 'RdrName.RdrNames'
149 --
150 -- 2. This syntax tree is /renamed/, which attaches a 'Unique.Unique' to every 'RdrName.RdrName'
151 -- (yielding a 'Name.Name') to disambiguate identifiers which are lexically identical.
152 -- For example, this program:
153 --
154 -- @
155 -- f x = let f x = x + 1
156 -- in f (x - 2)
157 -- @
158 --
159 -- Would be renamed by having 'Unique's attached so it looked something like this:
160 --
161 -- @
162 -- f_1 x_2 = let f_3 x_4 = x_4 + 1
163 -- in f_3 (x_2 - 2)
164 -- @
165 -- But see Note [Shadowing] below.
166 --
167 -- 3. The resulting syntax tree undergoes type checking (which also deals with instantiating
168 -- type class arguments) to yield a 'HsExpr.HsExpr' type that has 'Id.Id' as it's names.
169 --
170 -- 4. Finally the syntax tree is /desugared/ from the expressive 'HsExpr.HsExpr' type into
171 -- this 'Expr' type, which has far fewer constructors and hence is easier to perform
172 -- optimization, analysis and code generation on.
173 --
174 -- The type parameter @b@ is for the type of binders in the expression tree.
175 --
176 -- The language consists of the following elements:
177 --
178 -- * Variables
179 --
180 -- * Primitive literals
181 --
182 -- * Applications: note that the argument may be a 'Type'.
183 -- See Note [CoreSyn let/app invariant]
184 -- See Note [Levity polymorphism invariants]
185 --
186 -- * Lambda abstraction
187 -- See Note [Levity polymorphism invariants]
188 --
189 -- * Recursive and non recursive @let@s. Operationally
190 -- this corresponds to allocating a thunk for the things
191 -- bound and then executing the sub-expression.
192 --
193 -- #top_level_invariant#
194 -- #letrec_invariant#
195 --
196 -- The right hand sides of all top-level and recursive @let@s
197 -- /must/ be of lifted type (see "Type#type_classification" for
198 -- the meaning of /lifted/ vs. /unlifted/). There is one exception
199 -- to this rule, top-level @let@s are allowed to bind primitive
200 -- string literals, see Note [CoreSyn top-level string literals].
201 --
202 -- See Note [CoreSyn let/app invariant]
203 -- See Note [Levity polymorphism invariants]
204 --
205 -- #type_let#
206 -- We allow a /non-recursive/ let to bind a type variable, thus:
207 --
208 -- > Let (NonRec tv (Type ty)) body
209 --
210 -- This can be very convenient for postponing type substitutions until
211 -- the next run of the simplifier.
212 --
213 -- At the moment, the rest of the compiler only deals with type-let
214 -- in a Let expression, rather than at top level. We may want to revist
215 -- this choice.
216 --
217 -- * Case expression. Operationally this corresponds to evaluating
218 -- the scrutinee (expression examined) to weak head normal form
219 -- and then examining at most one level of resulting constructor (i.e. you
220 -- cannot do nested pattern matching directly with this).
221 --
222 -- The binder gets bound to the value of the scrutinee,
223 -- and the 'Type' must be that of all the case alternatives
224 --
225 -- #case_invariants#
226 -- This is one of the more complicated elements of the Core language,
227 -- and comes with a number of restrictions:
228 --
229 -- 1. The list of alternatives may be empty;
230 -- See Note [Empty case alternatives]
231 --
232 -- 2. The 'DEFAULT' case alternative must be first in the list,
233 -- if it occurs at all.
234 --
235 -- 3. The remaining cases are in order of increasing
236 -- tag (for 'DataAlts') or
237 -- lit (for 'LitAlts').
238 -- This makes finding the relevant constructor easy,
239 -- and makes comparison easier too.
240 --
241 -- 4. The list of alternatives must be exhaustive. An /exhaustive/ case
242 -- does not necessarily mention all constructors:
243 --
244 -- @
245 -- data Foo = Red | Green | Blue
246 -- ... case x of
247 -- Red -> True
248 -- other -> f (case x of
249 -- Green -> ...
250 -- Blue -> ... ) ...
251 -- @
252 --
253 -- The inner case does not need a @Red@ alternative, because @x@
254 -- can't be @Red@ at that program point.
255 --
256 -- 5. Floating-point values must not be scrutinised against literals.
257 -- See Trac #9238 and Note [Rules for floating-point comparisons]
258 -- in PrelRules for rationale.
259 --
260 -- * Cast an expression to a particular type.
261 -- This is used to implement @newtype@s (a @newtype@ constructor or
262 -- destructor just becomes a 'Cast' in Core) and GADTs.
263 --
264 -- * Notes. These allow general information to be added to expressions
265 -- in the syntax tree
266 --
267 -- * A type: this should only show up at the top level of an Arg
268 --
269 -- * A coercion
270
271 -- If you edit this type, you may need to update the GHC formalism
272 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
273 data Expr b
274 = Var Id
275 | Lit Literal
276 | App (Expr b) (Arg b)
277 | Lam b (Expr b)
278 | Let (Bind b) (Expr b)
279 | Case (Expr b) b Type [Alt b] -- See #case_invariants#
280 | Cast (Expr b) Coercion
281 | Tick (Tickish Id) (Expr b)
282 | Type Type
283 | Coercion Coercion
284 deriving Data
285
286 -- | Type synonym for expressions that occur in function argument positions.
287 -- Only 'Arg' should contain a 'Type' at top level, general 'Expr' should not
288 type Arg b = Expr b
289
290 -- | A case split alternative. Consists of the constructor leading to the alternative,
291 -- the variables bound from the constructor, and the expression to be executed given that binding.
292 -- The default alternative is @(DEFAULT, [], rhs)@
293
294 -- If you edit this type, you may need to update the GHC formalism
295 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
296 type Alt b = (AltCon, [b], Expr b)
297
298 -- | A case alternative constructor (i.e. pattern match)
299
300 -- If you edit this type, you may need to update the GHC formalism
301 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
302 data AltCon
303 = DataAlt DataCon -- ^ A plain data constructor: @case e of { Foo x -> ... }@.
304 -- Invariant: the 'DataCon' is always from a @data@ type, and never from a @newtype@
305
306 | LitAlt Literal -- ^ A literal: @case e of { 1 -> ... }@
307 -- Invariant: always an *unlifted* literal
308 -- See Note [Literal alternatives]
309
310 | DEFAULT -- ^ Trivial alternative: @case e of { _ -> ... }@
311 deriving (Eq, Data)
312
313 -- This instance is a bit shady. It can only be used to compare AltCons for
314 -- a single type constructor. Fortunately, it seems quite unlikely that we'll
315 -- ever need to compare AltCons for different type constructors.
316 -- The instance adheres to the order described in [CoreSyn case invariants]
317 instance Ord AltCon where
318 compare (DataAlt con1) (DataAlt con2) =
319 ASSERT( dataConTyCon con1 == dataConTyCon con2 )
320 compare (dataConTag con1) (dataConTag con2)
321 compare (DataAlt _) _ = GT
322 compare _ (DataAlt _) = LT
323 compare (LitAlt l1) (LitAlt l2) = compare l1 l2
324 compare (LitAlt _) DEFAULT = GT
325 compare DEFAULT DEFAULT = EQ
326 compare DEFAULT _ = LT
327
328 -- | Binding, used for top level bindings in a module and local bindings in a @let@.
329
330 -- If you edit this type, you may need to update the GHC formalism
331 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
332 data Bind b = NonRec b (Expr b)
333 | Rec [(b, (Expr b))]
334 deriving Data
335
336 {-
337 Note [Shadowing]
338 ~~~~~~~~~~~~~~~~
339 While various passes attempt to rename on-the-fly in a manner that
340 avoids "shadowing" (thereby simplifying downstream optimizations),
341 neither the simplifier nor any other pass GUARANTEES that shadowing is
342 avoided. Thus, all passes SHOULD work fine even in the presence of
343 arbitrary shadowing in their inputs.
344
345 In particular, scrutinee variables `x` in expressions of the form
346 `Case e x t` are often renamed to variables with a prefix
347 "wild_". These "wild" variables may appear in the body of the
348 case-expression, and further, may be shadowed within the body.
349
350 So the Unique in a Var is not really unique at all. Still, it's very
351 useful to give a constant-time equality/ordering for Vars, and to give
352 a key that can be used to make sets of Vars (VarSet), or mappings from
353 Vars to other things (VarEnv). Moreover, if you do want to eliminate
354 shadowing, you can give a new Unique to an Id without changing its
355 printable name, which makes debugging easier.
356
357 Note [Literal alternatives]
358 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
359 Literal alternatives (LitAlt lit) are always for *un-lifted* literals.
360 We have one literal, a literal Integer, that is lifted, and we don't
361 allow in a LitAlt, because LitAlt cases don't do any evaluation. Also
362 (see Trac #5603) if you say
363 case 3 of
364 S# x -> ...
365 J# _ _ -> ...
366 (where S#, J# are the constructors for Integer) we don't want the
367 simplifier calling findAlt with argument (LitAlt 3). No no. Integer
368 literals are an opaque encoding of an algebraic data type, not of
369 an unlifted literal, like all the others.
370
371 Also, we do not permit case analysis with literal patterns on floating-point
372 types. See Trac #9238 and Note [Rules for floating-point comparisons] in
373 PrelRules for the rationale for this restriction.
374
375 -------------------------- CoreSyn INVARIANTS ---------------------------
376
377 Note [CoreSyn top-level invariant]
378 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
379 See #toplevel_invariant#
380
381 Note [CoreSyn letrec invariant]
382 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
383 See #letrec_invariant#
384
385 Note [CoreSyn top-level string literals]
386 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
387 As an exception to the usual rule that top-level binders must be lifted,
388 we allow binding primitive string literals (of type Addr#) of type Addr# at the
389 top level. This allows us to share string literals earlier in the pipeline and
390 crucially allows other optimizations in the Core2Core pipeline to fire.
391 Consider,
392
393 f n = let a::Addr# = "foo"#
394 in \x -> blah
395
396 In order to be able to inline `f`, we would like to float `a` to the top.
397 Another option would be to inline `a`, but that would lead to duplicating string
398 literals, which we want to avoid. See Trac #8472.
399
400 The solution is simply to allow top-level unlifted binders. We can't allow
401 arbitrary unlifted expression at the top-level though, unlifted binders cannot
402 be thunks, so we just allow string literals.
403
404 It is important to note that top-level primitive string literals cannot be
405 wrapped in Ticks, as is otherwise done with lifted bindings. CoreToStg expects
406 to see just a plain (Lit (MachStr ...)) expression on the RHS of primitive
407 string bindings; anything else and things break. CoreLint checks this invariant.
408
409 Also see Note [Compilation plan for top-level string literals].
410
411 Note [Compilation plan for top-level string literals]
412 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
413 Here is a summary on how top-level string literals are handled by various
414 parts of the compilation pipeline.
415
416 * In the source language, there is no way to bind a primitive string literal
417 at the top leve.
418
419 * In Core, we have a special rule that permits top-level Addr# bindings. See
420 Note [CoreSyn top-level string literals]. Core-to-core passes may introduce
421 new top-level string literals.
422
423 * In STG, top-level string literals are explicitly represented in the syntax
424 tree.
425
426 * A top-level string literal may end up exported from a module. In this case,
427 in the object file, the content of the exported literal is given a label with
428 the _bytes suffix.
429
430 Note [CoreSyn let/app invariant]
431 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
432 The let/app invariant
433 the right hand side of a non-recursive 'Let', and
434 the argument of an 'App',
435 /may/ be of unlifted type, but only if
436 the expression is ok-for-speculation
437 or the 'Let' is for a join point.
438
439 This means that the let can be floated around
440 without difficulty. For example, this is OK:
441
442 y::Int# = x +# 1#
443
444 But this is not, as it may affect termination if the
445 expression is floated out:
446
447 y::Int# = fac 4#
448
449 In this situation you should use @case@ rather than a @let@. The function
450 'CoreUtils.needsCaseBinding' can help you determine which to generate, or
451 alternatively use 'MkCore.mkCoreLet' rather than this constructor directly,
452 which will generate a @case@ if necessary
453
454 The let/app invariant is initially enforced by mkCoreLet and mkCoreApp in
455 coreSyn/MkCore.
456
457 Note [CoreSyn case invariants]
458 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
459 See #case_invariants#
460
461 Note [Levity polymorphism invariants]
462 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
463 The levity-polymorphism invariants are these (as per "Levity Polymorphism",
464 PLDI '17):
465
466 * The type of a term-binder must not be levity-polymorphic,
467 unless it is a let(rec)-bound join point
468 (see Note [Invariants on join points])
469
470 * The type of the argument of an App must not be levity-polymorphic.
471
472 A type (t::TYPE r) is "levity polymorphic" if 'r' has any free variables.
473
474 For example
475 \(r::RuntimeRep). \(a::TYPE r). \(x::a). e
476 is illegal because x's type has kind (TYPE r), which has 'r' free.
477
478 See Note [Levity polymorphism checking] in DsMonad to see where these
479 invariants are established for user-written code.
480
481 Note [CoreSyn let goal]
482 ~~~~~~~~~~~~~~~~~~~~~~~
483 * The simplifier tries to ensure that if the RHS of a let is a constructor
484 application, its arguments are trivial, so that the constructor can be
485 inlined vigorously.
486
487 Note [Type let]
488 ~~~~~~~~~~~~~~~
489 See #type_let#
490
491 Note [Empty case alternatives]
492 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
493 The alternatives of a case expression should be exhaustive. But
494 this exhaustive list can be empty!
495
496 * A case expression can have empty alternatives if (and only if) the
497 scrutinee is bound to raise an exception or diverge. When do we know
498 this? See Note [Bottoming expressions] in CoreUtils.
499
500 * The possiblity of empty alternatives is one reason we need a type on
501 the case expression: if the alternatives are empty we can't get the
502 type from the alternatives!
503
504 * In the case of empty types (see Note [Bottoming expressions]), say
505 data T
506 we do NOT want to replace
507 case (x::T) of Bool {} --> error Bool "Inaccessible case"
508 because x might raise an exception, and *that*'s what we want to see!
509 (Trac #6067 is an example.) To preserve semantics we'd have to say
510 x `seq` error Bool "Inaccessible case"
511 but the 'seq' is just a case, so we are back to square 1. Or I suppose
512 we could say
513 x |> UnsafeCoerce T Bool
514 but that loses all trace of the fact that this originated with an empty
515 set of alternatives.
516
517 * We can use the empty-alternative construct to coerce error values from
518 one type to another. For example
519
520 f :: Int -> Int
521 f n = error "urk"
522
523 g :: Int -> (# Char, Bool #)
524 g x = case f x of { 0 -> ..., n -> ... }
525
526 Then if we inline f in g's RHS we get
527 case (error Int "urk") of (# Char, Bool #) { ... }
528 and we can discard the alternatives since the scrutinee is bottom to give
529 case (error Int "urk") of (# Char, Bool #) {}
530
531 This is nicer than using an unsafe coerce between Int ~ (# Char,Bool #),
532 if for no other reason that we don't need to instantiate the (~) at an
533 unboxed type.
534
535 * We treat a case expression with empty alternatives as trivial iff
536 its scrutinee is (see CoreUtils.exprIsTrivial). This is actually
537 important; see Note [Empty case is trivial] in CoreUtils
538
539 * An empty case is replaced by its scrutinee during the CoreToStg
540 conversion; remember STG is un-typed, so there is no need for
541 the empty case to do the type conversion.
542
543 Note [Join points]
544 ~~~~~~~~~~~~~~~~~~
545 In Core, a *join point* is a specially tagged function whose only occurrences
546 are saturated tail calls. A tail call can appear in these places:
547
548 1. In the branches (not the scrutinee) of a case
549 2. Underneath a let (value or join point)
550 3. Inside another join point
551
552 We write a join-point declaration as
553 join j @a @b x y = e1 in e2,
554 like a let binding but with "join" instead (or "join rec" for "let rec"). Note
555 that we put the parameters before the = rather than using lambdas; this is
556 because it's relevant how many parameters the join point takes *as a join
557 point.* This number is called the *join arity,* distinct from arity because it
558 counts types as well as values. Note that a join point may return a lambda! So
559 join j x = x + 1
560 is different from
561 join j = \x -> x + 1
562 The former has join arity 1, while the latter has join arity 0.
563
564 The identifier for a join point is called a join id or a *label.* An invocation
565 is called a *jump.* We write a jump using the jump keyword:
566
567 jump j 3
568
569 The words *label* and *jump* are evocative of assembly code (or Cmm) for a
570 reason: join points are indeed compiled as labeled blocks, and jumps become
571 actual jumps (plus argument passing and stack adjustment). There is no closure
572 allocated and only a fraction of the function-call overhead. Hence we would
573 like as many functions as possible to become join points (see OccurAnal) and
574 the type rules for join points ensure we preserve the properties that make them
575 efficient.
576
577 In the actual AST, a join point is indicated by the IdDetails of the binder: a
578 local value binding gets 'VanillaId' but a join point gets a 'JoinId' with its
579 join arity.
580
581 For more details, see the paper:
582
583 Luke Maurer, Paul Downen, Zena Ariola, and Simon Peyton Jones. "Compiling
584 without continuations." Submitted to PLDI'17.
585
586 https://www.microsoft.com/en-us/research/publication/compiling-without-continuations/
587
588 Note [Invariants on join points]
589 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
590 Join points must follow these invariants:
591
592 1. All occurrences must be tail calls. Each of these tail calls must pass the
593 same number of arguments, counting both types and values; we call this the
594 "join arity" (to distinguish from regular arity, which only counts values).
595
596 2. For join arity n, the right-hand side must begin with at least n lambdas.
597 No ticks, no casts, just lambdas! C.f. CoreUtils.joinRhsArity.
598
599 2a. Moreover, this same constraint applies to any unfolding of the binder.
600 Reason: if we want to push a continuation into the RHS we must push it
601 into the unfolding as well.
602
603 3. If the binding is recursive, then all other bindings in the recursive group
604 must also be join points.
605
606 4. The binding's type must not be polymorphic in its return type (as defined
607 in Note [The polymorphism rule of join points]).
608
609 However, join points have simpler invariants in other ways
610
611 5. A join point can have an unboxed type without the RHS being
612 ok-for-speculation (i.e. drop the let/app invariant)
613 e.g. let j :: Int# = factorial x in ...
614
615 6. A join point can have a levity-polymorphic RHS
616 e.g. let j :: r :: TYPE l = fail void# in ...
617 This happened in an intermediate program Trac #13394
618
619 Examples:
620
621 join j1 x = 1 + x in jump j (jump j x) -- Fails 1: non-tail call
622 join j1' x = 1 + x in if even a
623 then jump j1 a
624 else jump j1 a b -- Fails 1: inconsistent calls
625 join j2 x = flip (+) x in j2 1 2 -- Fails 2: not enough lambdas
626 join j2' x = \y -> x + y in j3 1 -- Passes: extra lams ok
627 join j @a (x :: a) = x -- Fails 4: polymorphic in ret type
628
629 Invariant 1 applies to left-hand sides of rewrite rules, so a rule for a join
630 point must have an exact call as its LHS.
631
632 Strictly speaking, invariant 3 is redundant, since a call from inside a lazy
633 binding isn't a tail call. Since a let-bound value can't invoke a free join
634 point, then, they can't be mutually recursive. (A Core binding group *can*
635 include spurious extra bindings if the occurrence analyser hasn't run, so
636 invariant 3 does still need to be checked.) For the rigorous definition of
637 "tail call", see Section 3 of the paper (Note [Join points]).
638
639 Invariant 4 is subtle; see Note [The polymorphism rule of join points].
640
641 Core Lint will check these invariants, anticipating that any binder whose
642 OccInfo is marked AlwaysTailCalled will become a join point as soon as the
643 simplifier (or simpleOptPgm) runs.
644
645 Note [The type of a join point]
646 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
647 A join point has the same type it would have as a function. That is, if it takes
648 an Int and a Bool and its body produces a String, its type is `Int -> Bool ->
649 String`. Natural as this may seem, it can be awkward. A join point shouldn't be
650 thought to "return" in the same sense a function does---a jump is one-way. This
651 is crucial for understanding how case-of-case interacts with join points:
652
653 case (join
654 j :: Int -> Bool -> String
655 j x y = ...
656 in
657 jump j z w) of
658 "" -> True
659 _ -> False
660
661 The simplifier will pull the case into the join point (see Note [Case-of-case
662 and join points] in Simplify):
663
664 join
665 j :: Int -> Bool -> Bool -- changed!
666 j x y = case ... of "" -> True
667 _ -> False
668 in
669 jump j z w
670
671 The body of the join point now returns a Bool, so the label `j` has to have its
672 type updated accordingly. Inconvenient though this may be, it has the advantage
673 that 'CoreUtils.exprType' can still return a type for any expression, including
674 a jump.
675
676 This differs from the paper (see Note [Invariants on join points]). In the
677 paper, we instead give j the type `Int -> Bool -> forall a. a`. Then each jump
678 carries the "return type" as a parameter, exactly the way other non-returning
679 functions like `error` work:
680
681 case (join
682 j :: Int -> Bool -> forall a. a
683 j x y = ...
684 in
685 jump j z w @String) of
686 "" -> True
687 _ -> False
688
689 Now we can move the case inward and we only have to change the jump:
690
691 join
692 j :: Int -> Bool -> forall a. a
693 j x y = case ... of "" -> True
694 _ -> False
695 in
696 jump j z w @Bool
697
698 (Core Lint would still check that the body of the join point has the right type;
699 that type would simply not be reflected in the join id.)
700
701 Note [The polymorphism rule of join points]
702 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
703 Invariant 4 of Note [Invariants on join points] forbids a join point to be
704 polymorphic in its return type. That is, if its type is
705
706 forall a1 ... ak. t1 -> ... -> tn -> r
707
708 where its join arity is k+n, none of the type parameters ai may occur free in r.
709 The most direct explanation is that given
710
711 join j @a1 ... @ak x1 ... xn = e1 in e2
712
713 our typing rules require `e1` and `e2` to have the same type. Therefore the type
714 of `e1`---the return type of the join point---must be the same as the type of
715 e2. Since the type variables aren't bound in `e2`, its type can't include them,
716 and thus neither can the type of `e1`.
717
718 There's a deeper explanation in terms of the sequent calculus in Section 5.3 of
719 a previous paper:
720
721 Paul Downen, Luke Maurer, Zena Ariola, and Simon Peyton Jones. "Sequent
722 calculus as a compiler intermediate language." ICFP'16.
723
724 https://www.microsoft.com/en-us/research/wp-content/uploads/2016/04/sequent-calculus-icfp16.pdf
725
726 The quick version: Consider the CPS term (the paper uses the sequent calculus,
727 but we can translate readily):
728
729 \k -> join j @a1 ... @ak x1 ... xn = e1 k in e2 k
730
731 Since `j` is a join point, it doesn't bind a continuation variable but reuses
732 the variable `k` from the context. But the parameters `ai` are not in `k`'s
733 scope, and `k`'s type determines the return type of `j`; thus the `ai`s don't
734 appear in the return type of `j`. (Also, since `e1` and `e2` are passed the same
735 continuation, they must have the same type; hence the direct explanation above.)
736
737 ************************************************************************
738 * *
739 In/Out type synonyms
740 * *
741 ********************************************************************* -}
742
743 {- Many passes apply a substitution, and it's very handy to have type
744 synonyms to remind us whether or not the substitution has been applied -}
745
746 -- Pre-cloning or substitution
747 type InBndr = CoreBndr
748 type InType = Type
749 type InKind = Kind
750 type InBind = CoreBind
751 type InExpr = CoreExpr
752 type InAlt = CoreAlt
753 type InArg = CoreArg
754 type InCoercion = Coercion
755
756 -- Post-cloning or substitution
757 type OutBndr = CoreBndr
758 type OutType = Type
759 type OutKind = Kind
760 type OutCoercion = Coercion
761 type OutBind = CoreBind
762 type OutExpr = CoreExpr
763 type OutAlt = CoreAlt
764 type OutArg = CoreArg
765
766
767 {- *********************************************************************
768 * *
769 Ticks
770 * *
771 ************************************************************************
772 -}
773
774 -- | Allows attaching extra information to points in expressions
775
776 -- If you edit this type, you may need to update the GHC formalism
777 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
778 data Tickish id =
779 -- | An @{-# SCC #-}@ profiling annotation, either automatically
780 -- added by the desugarer as a result of -auto-all, or added by
781 -- the user.
782 ProfNote {
783 profNoteCC :: CostCentre, -- ^ the cost centre
784 profNoteCount :: !Bool, -- ^ bump the entry count?
785 profNoteScope :: !Bool -- ^ scopes over the enclosed expression
786 -- (i.e. not just a tick)
787 }
788
789 -- | A "tick" used by HPC to track the execution of each
790 -- subexpression in the original source code.
791 | HpcTick {
792 tickModule :: Module,
793 tickId :: !Int
794 }
795
796 -- | A breakpoint for the GHCi debugger. This behaves like an HPC
797 -- tick, but has a list of free variables which will be available
798 -- for inspection in GHCi when the program stops at the breakpoint.
799 --
800 -- NB. we must take account of these Ids when (a) counting free variables,
801 -- and (b) substituting (don't substitute for them)
802 | Breakpoint
803 { breakpointId :: !Int
804 , breakpointFVs :: [id] -- ^ the order of this list is important:
805 -- it matches the order of the lists in the
806 -- appropriate entry in HscTypes.ModBreaks.
807 --
808 -- Careful about substitution! See
809 -- Note [substTickish] in CoreSubst.
810 }
811
812 -- | A source note.
813 --
814 -- Source notes are pure annotations: Their presence should neither
815 -- influence compilation nor execution. The semantics are given by
816 -- causality: The presence of a source note means that a local
817 -- change in the referenced source code span will possibly provoke
818 -- the generated code to change. On the flip-side, the functionality
819 -- of annotated code *must* be invariant against changes to all
820 -- source code *except* the spans referenced in the source notes
821 -- (see "Causality of optimized Haskell" paper for details).
822 --
823 -- Therefore extending the scope of any given source note is always
824 -- valid. Note that it is still undesirable though, as this reduces
825 -- their usefulness for debugging and profiling. Therefore we will
826 -- generally try only to make use of this property where it is
827 -- necessary to enable optimizations.
828 | SourceNote
829 { sourceSpan :: RealSrcSpan -- ^ Source covered
830 , sourceName :: String -- ^ Name for source location
831 -- (uses same names as CCs)
832 }
833
834 deriving (Eq, Ord, Data)
835
836 -- | A "counting tick" (where tickishCounts is True) is one that
837 -- counts evaluations in some way. We cannot discard a counting tick,
838 -- and the compiler should preserve the number of counting ticks as
839 -- far as possible.
840 --
841 -- However, we still allow the simplifier to increase or decrease
842 -- sharing, so in practice the actual number of ticks may vary, except
843 -- that we never change the value from zero to non-zero or vice versa.
844 tickishCounts :: Tickish id -> Bool
845 tickishCounts n@ProfNote{} = profNoteCount n
846 tickishCounts HpcTick{} = True
847 tickishCounts Breakpoint{} = True
848 tickishCounts _ = False
849
850
851 -- | Specifies the scoping behaviour of ticks. This governs the
852 -- behaviour of ticks that care about the covered code and the cost
853 -- associated with it. Important for ticks relating to profiling.
854 data TickishScoping =
855 -- | No scoping: The tick does not care about what code it
856 -- covers. Transformations can freely move code inside as well as
857 -- outside without any additional annotation obligations
858 NoScope
859
860 -- | Soft scoping: We want all code that is covered to stay
861 -- covered. Note that this scope type does not forbid
862 -- transformations from happening, as as long as all results of
863 -- the transformations are still covered by this tick or a copy of
864 -- it. For example
865 --
866 -- let x = tick<...> (let y = foo in bar) in baz
867 -- ===>
868 -- let x = tick<...> bar; y = tick<...> foo in baz
869 --
870 -- Is a valid transformation as far as "bar" and "foo" is
871 -- concerned, because both still are scoped over by the tick.
872 --
873 -- Note though that one might object to the "let" not being
874 -- covered by the tick any more. However, we are generally lax
875 -- with this - constant costs don't matter too much, and given
876 -- that the "let" was effectively merged we can view it as having
877 -- lost its identity anyway.
878 --
879 -- Also note that this scoping behaviour allows floating a tick
880 -- "upwards" in pretty much any situation. For example:
881 --
882 -- case foo of x -> tick<...> bar
883 -- ==>
884 -- tick<...> case foo of x -> bar
885 --
886 -- While this is always leagl, we want to make a best effort to
887 -- only make us of this where it exposes transformation
888 -- opportunities.
889 | SoftScope
890
891 -- | Cost centre scoping: We don't want any costs to move to other
892 -- cost-centre stacks. This means we not only want no code or cost
893 -- to get moved out of their cost centres, but we also object to
894 -- code getting associated with new cost-centre ticks - or
895 -- changing the order in which they get applied.
896 --
897 -- A rule of thumb is that we don't want any code to gain new
898 -- annotations. However, there are notable exceptions, for
899 -- example:
900 --
901 -- let f = \y -> foo in tick<...> ... (f x) ...
902 -- ==>
903 -- tick<...> ... foo[x/y] ...
904 --
905 -- In-lining lambdas like this is always legal, because inlining a
906 -- function does not change the cost-centre stack when the
907 -- function is called.
908 | CostCentreScope
909
910 deriving (Eq)
911
912 -- | Returns the intended scoping rule for a Tickish
913 tickishScoped :: Tickish id -> TickishScoping
914 tickishScoped n@ProfNote{}
915 | profNoteScope n = CostCentreScope
916 | otherwise = NoScope
917 tickishScoped HpcTick{} = NoScope
918 tickishScoped Breakpoint{} = CostCentreScope
919 -- Breakpoints are scoped: eventually we're going to do call
920 -- stacks, but also this helps prevent the simplifier from moving
921 -- breakpoints around and changing their result type (see #1531).
922 tickishScoped SourceNote{} = SoftScope
923
924 -- | Returns whether the tick scoping rule is at least as permissive
925 -- as the given scoping rule.
926 tickishScopesLike :: Tickish id -> TickishScoping -> Bool
927 tickishScopesLike t scope = tickishScoped t `like` scope
928 where NoScope `like` _ = True
929 _ `like` NoScope = False
930 SoftScope `like` _ = True
931 _ `like` SoftScope = False
932 CostCentreScope `like` _ = True
933
934 -- | Returns @True@ for ticks that can be floated upwards easily even
935 -- where it might change execution counts, such as:
936 --
937 -- Just (tick<...> foo)
938 -- ==>
939 -- tick<...> (Just foo)
940 --
941 -- This is a combination of @tickishSoftScope@ and
942 -- @tickishCounts@. Note that in principle splittable ticks can become
943 -- floatable using @mkNoTick@ -- even though there's currently no
944 -- tickish for which that is the case.
945 tickishFloatable :: Tickish id -> Bool
946 tickishFloatable t = t `tickishScopesLike` SoftScope && not (tickishCounts t)
947
948 -- | Returns @True@ for a tick that is both counting /and/ scoping and
949 -- can be split into its (tick, scope) parts using 'mkNoScope' and
950 -- 'mkNoTick' respectively.
951 tickishCanSplit :: Tickish id -> Bool
952 tickishCanSplit ProfNote{profNoteScope = True, profNoteCount = True}
953 = True
954 tickishCanSplit _ = False
955
956 mkNoCount :: Tickish id -> Tickish id
957 mkNoCount n | not (tickishCounts n) = n
958 | not (tickishCanSplit n) = panic "mkNoCount: Cannot split!"
959 mkNoCount n@ProfNote{} = n {profNoteCount = False}
960 mkNoCount _ = panic "mkNoCount: Undefined split!"
961
962 mkNoScope :: Tickish id -> Tickish id
963 mkNoScope n | tickishScoped n == NoScope = n
964 | not (tickishCanSplit n) = panic "mkNoScope: Cannot split!"
965 mkNoScope n@ProfNote{} = n {profNoteScope = False}
966 mkNoScope _ = panic "mkNoScope: Undefined split!"
967
968 -- | Return @True@ if this source annotation compiles to some backend
969 -- code. Without this flag, the tickish is seen as a simple annotation
970 -- that does not have any associated evaluation code.
971 --
972 -- What this means that we are allowed to disregard the tick if doing
973 -- so means that we can skip generating any code in the first place. A
974 -- typical example is top-level bindings:
975 --
976 -- foo = tick<...> \y -> ...
977 -- ==>
978 -- foo = \y -> tick<...> ...
979 --
980 -- Here there is just no operational difference between the first and
981 -- the second version. Therefore code generation should simply
982 -- translate the code as if it found the latter.
983 tickishIsCode :: Tickish id -> Bool
984 tickishIsCode SourceNote{} = False
985 tickishIsCode _tickish = True -- all the rest for now
986
987
988 -- | Governs the kind of expression that the tick gets placed on when
989 -- annotating for example using @mkTick@. If we find that we want to
990 -- put a tickish on an expression ruled out here, we try to float it
991 -- inwards until we find a suitable expression.
992 data TickishPlacement =
993
994 -- | Place ticks exactly on run-time expressions. We can still
995 -- move the tick through pure compile-time constructs such as
996 -- other ticks, casts or type lambdas. This is the most
997 -- restrictive placement rule for ticks, as all tickishs have in
998 -- common that they want to track runtime processes. The only
999 -- legal placement rule for counting ticks.
1000 PlaceRuntime
1001
1002 -- | As @PlaceRuntime@, but we float the tick through all
1003 -- lambdas. This makes sense where there is little difference
1004 -- between annotating the lambda and annotating the lambda's code.
1005 | PlaceNonLam
1006
1007 -- | In addition to floating through lambdas, cost-centre style
1008 -- tickishs can also be moved from constructors, non-function
1009 -- variables and literals. For example:
1010 --
1011 -- let x = scc<...> C (scc<...> y) (scc<...> 3) in ...
1012 --
1013 -- Neither the constructor application, the variable or the
1014 -- literal are likely to have any cost worth mentioning. And even
1015 -- if y names a thunk, the call would not care about the
1016 -- evaluation context. Therefore removing all annotations in the
1017 -- above example is safe.
1018 | PlaceCostCentre
1019
1020 deriving (Eq)
1021
1022 -- | Placement behaviour we want for the ticks
1023 tickishPlace :: Tickish id -> TickishPlacement
1024 tickishPlace n@ProfNote{}
1025 | profNoteCount n = PlaceRuntime
1026 | otherwise = PlaceCostCentre
1027 tickishPlace HpcTick{} = PlaceRuntime
1028 tickishPlace Breakpoint{} = PlaceRuntime
1029 tickishPlace SourceNote{} = PlaceNonLam
1030
1031 -- | Returns whether one tick "contains" the other one, therefore
1032 -- making the second tick redundant.
1033 tickishContains :: Eq b => Tickish b -> Tickish b -> Bool
1034 tickishContains (SourceNote sp1 n1) (SourceNote sp2 n2)
1035 = containsSpan sp1 sp2 && n1 == n2
1036 -- compare the String last
1037 tickishContains t1 t2
1038 = t1 == t2
1039
1040 {-
1041 ************************************************************************
1042 * *
1043 Orphans
1044 * *
1045 ************************************************************************
1046 -}
1047
1048 -- | Is this instance an orphan? If it is not an orphan, contains an 'OccName'
1049 -- witnessing the instance's non-orphanhood.
1050 -- See Note [Orphans]
1051 data IsOrphan
1052 = IsOrphan
1053 | NotOrphan OccName -- The OccName 'n' witnesses the instance's non-orphanhood
1054 -- In that case, the instance is fingerprinted as part
1055 -- of the definition of 'n's definition
1056 deriving Data
1057
1058 -- | Returns true if 'IsOrphan' is orphan.
1059 isOrphan :: IsOrphan -> Bool
1060 isOrphan IsOrphan = True
1061 isOrphan _ = False
1062
1063 -- | Returns true if 'IsOrphan' is not an orphan.
1064 notOrphan :: IsOrphan -> Bool
1065 notOrphan NotOrphan{} = True
1066 notOrphan _ = False
1067
1068 chooseOrphanAnchor :: NameSet -> IsOrphan
1069 -- Something (rule, instance) is relate to all the Names in this
1070 -- list. Choose one of them to be an "anchor" for the orphan. We make
1071 -- the choice deterministic to avoid gratuitious changes in the ABI
1072 -- hash (Trac #4012). Specifically, use lexicographic comparison of
1073 -- OccName rather than comparing Uniques
1074 --
1075 -- NB: 'minimum' use Ord, and (Ord OccName) works lexicographically
1076 --
1077 chooseOrphanAnchor local_names
1078 | isEmptyNameSet local_names = IsOrphan
1079 | otherwise = NotOrphan (minimum occs)
1080 where
1081 occs = map nameOccName $ nonDetEltsUniqSet local_names
1082 -- It's OK to use nonDetEltsUFM here, see comments above
1083
1084 instance Binary IsOrphan where
1085 put_ bh IsOrphan = putByte bh 0
1086 put_ bh (NotOrphan n) = do
1087 putByte bh 1
1088 put_ bh n
1089 get bh = do
1090 h <- getByte bh
1091 case h of
1092 0 -> return IsOrphan
1093 _ -> do
1094 n <- get bh
1095 return $ NotOrphan n
1096
1097 {-
1098 Note [Orphans]
1099 ~~~~~~~~~~~~~~
1100 Class instances, rules, and family instances are divided into orphans
1101 and non-orphans. Roughly speaking, an instance/rule is an orphan if
1102 its left hand side mentions nothing defined in this module. Orphan-hood
1103 has two major consequences
1104
1105 * A module that contains orphans is called an "orphan module". If
1106 the module being compiled depends (transitively) on an oprhan
1107 module M, then M.hi is read in regardless of whether M is oherwise
1108 needed. This is to ensure that we don't miss any instance decls in
1109 M. But it's painful, because it means we need to keep track of all
1110 the orphan modules below us.
1111
1112 * A non-orphan is not finger-printed separately. Instead, for
1113 fingerprinting purposes it is treated as part of the entity it
1114 mentions on the LHS. For example
1115 data T = T1 | T2
1116 instance Eq T where ....
1117 The instance (Eq T) is incorprated as part of T's fingerprint.
1118
1119 In contrast, orphans are all fingerprinted together in the
1120 mi_orph_hash field of the ModIface.
1121
1122 See MkIface.addFingerprints.
1123
1124 Orphan-hood is computed
1125 * For class instances:
1126 when we make a ClsInst
1127 (because it is needed during instance lookup)
1128
1129 * For rules and family instances:
1130 when we generate an IfaceRule (MkIface.coreRuleToIfaceRule)
1131 or IfaceFamInst (MkIface.instanceToIfaceInst)
1132 -}
1133
1134 {-
1135 ************************************************************************
1136 * *
1137 \subsection{Transformation rules}
1138 * *
1139 ************************************************************************
1140
1141 The CoreRule type and its friends are dealt with mainly in CoreRules,
1142 but CoreFVs, Subst, PprCore, CoreTidy also inspect the representation.
1143 -}
1144
1145 -- | Gathers a collection of 'CoreRule's. Maps (the name of) an 'Id' to its rules
1146 type RuleBase = NameEnv [CoreRule]
1147 -- The rules are unordered;
1148 -- we sort out any overlaps on lookup
1149
1150 -- | A full rule environment which we can apply rules from. Like a 'RuleBase',
1151 -- but it also includes the set of visible orphans we use to filter out orphan
1152 -- rules which are not visible (even though we can see them...)
1153 data RuleEnv
1154 = RuleEnv { re_base :: RuleBase
1155 , re_visible_orphs :: ModuleSet
1156 }
1157
1158 mkRuleEnv :: RuleBase -> [Module] -> RuleEnv
1159 mkRuleEnv rules vis_orphs = RuleEnv rules (mkModuleSet vis_orphs)
1160
1161 emptyRuleEnv :: RuleEnv
1162 emptyRuleEnv = RuleEnv emptyNameEnv emptyModuleSet
1163
1164 -- | A 'CoreRule' is:
1165 --
1166 -- * \"Local\" if the function it is a rule for is defined in the
1167 -- same module as the rule itself.
1168 --
1169 -- * \"Orphan\" if nothing on the LHS is defined in the same module
1170 -- as the rule itself
1171 data CoreRule
1172 = Rule {
1173 ru_name :: RuleName, -- ^ Name of the rule, for communication with the user
1174 ru_act :: Activation, -- ^ When the rule is active
1175
1176 -- Rough-matching stuff
1177 -- see comments with InstEnv.ClsInst( is_cls, is_rough )
1178 ru_fn :: Name, -- ^ Name of the 'Id.Id' at the head of this rule
1179 ru_rough :: [Maybe Name], -- ^ Name at the head of each argument to the left hand side
1180
1181 -- Proper-matching stuff
1182 -- see comments with InstEnv.ClsInst( is_tvs, is_tys )
1183 ru_bndrs :: [CoreBndr], -- ^ Variables quantified over
1184 ru_args :: [CoreExpr], -- ^ Left hand side arguments
1185
1186 -- And the right-hand side
1187 ru_rhs :: CoreExpr, -- ^ Right hand side of the rule
1188 -- Occurrence info is guaranteed correct
1189 -- See Note [OccInfo in unfoldings and rules]
1190
1191 -- Locality
1192 ru_auto :: Bool, -- ^ @True@ <=> this rule is auto-generated
1193 -- (notably by Specialise or SpecConstr)
1194 -- @False@ <=> generated at the user's behest
1195 -- See Note [Trimming auto-rules] in TidyPgm
1196 -- for the sole purpose of this field.
1197
1198 ru_origin :: !Module, -- ^ 'Module' the rule was defined in, used
1199 -- to test if we should see an orphan rule.
1200
1201 ru_orphan :: !IsOrphan, -- ^ Whether or not the rule is an orphan.
1202
1203 ru_local :: Bool -- ^ @True@ iff the fn at the head of the rule is
1204 -- defined in the same module as the rule
1205 -- and is not an implicit 'Id' (like a record selector,
1206 -- class operation, or data constructor). This
1207 -- is different from 'ru_orphan', where a rule
1208 -- can avoid being an orphan if *any* Name in
1209 -- LHS of the rule was defined in the same
1210 -- module as the rule.
1211 }
1212
1213 -- | Built-in rules are used for constant folding
1214 -- and suchlike. They have no free variables.
1215 -- A built-in rule is always visible (there is no such thing as
1216 -- an orphan built-in rule.)
1217 | BuiltinRule {
1218 ru_name :: RuleName, -- ^ As above
1219 ru_fn :: Name, -- ^ As above
1220 ru_nargs :: Int, -- ^ Number of arguments that 'ru_try' consumes,
1221 -- if it fires, including type arguments
1222 ru_try :: RuleFun
1223 -- ^ This function does the rewrite. It given too many
1224 -- arguments, it simply discards them; the returned 'CoreExpr'
1225 -- is just the rewrite of 'ru_fn' applied to the first 'ru_nargs' args
1226 }
1227 -- See Note [Extra args in rule matching] in Rules.hs
1228
1229 type RuleFun = DynFlags -> InScopeEnv -> Id -> [CoreExpr] -> Maybe CoreExpr
1230 type InScopeEnv = (InScopeSet, IdUnfoldingFun)
1231
1232 type IdUnfoldingFun = Id -> Unfolding
1233 -- A function that embodies how to unfold an Id if you need
1234 -- to do that in the Rule. The reason we need to pass this info in
1235 -- is that whether an Id is unfoldable depends on the simplifier phase
1236
1237 isBuiltinRule :: CoreRule -> Bool
1238 isBuiltinRule (BuiltinRule {}) = True
1239 isBuiltinRule _ = False
1240
1241 isAutoRule :: CoreRule -> Bool
1242 isAutoRule (BuiltinRule {}) = False
1243 isAutoRule (Rule { ru_auto = is_auto }) = is_auto
1244
1245 -- | The number of arguments the 'ru_fn' must be applied
1246 -- to before the rule can match on it
1247 ruleArity :: CoreRule -> Int
1248 ruleArity (BuiltinRule {ru_nargs = n}) = n
1249 ruleArity (Rule {ru_args = args}) = length args
1250
1251 ruleName :: CoreRule -> RuleName
1252 ruleName = ru_name
1253
1254 ruleModule :: CoreRule -> Maybe Module
1255 ruleModule Rule { ru_origin } = Just ru_origin
1256 ruleModule BuiltinRule {} = Nothing
1257
1258 ruleActivation :: CoreRule -> Activation
1259 ruleActivation (BuiltinRule { }) = AlwaysActive
1260 ruleActivation (Rule { ru_act = act }) = act
1261
1262 -- | The 'Name' of the 'Id.Id' at the head of the rule left hand side
1263 ruleIdName :: CoreRule -> Name
1264 ruleIdName = ru_fn
1265
1266 isLocalRule :: CoreRule -> Bool
1267 isLocalRule = ru_local
1268
1269 -- | Set the 'Name' of the 'Id.Id' at the head of the rule left hand side
1270 setRuleIdName :: Name -> CoreRule -> CoreRule
1271 setRuleIdName nm ru = ru { ru_fn = nm }
1272
1273 {-
1274 ************************************************************************
1275 * *
1276 \subsection{Vectorisation declarations}
1277 * *
1278 ************************************************************************
1279
1280 Representation of desugared vectorisation declarations that are fed to the vectoriser (via
1281 'ModGuts').
1282 -}
1283
1284 data CoreVect = Vect Id CoreExpr
1285 | NoVect Id
1286 | VectType Bool TyCon (Maybe TyCon)
1287 | VectClass TyCon -- class tycon
1288 | VectInst Id -- instance dfun (always SCALAR) !!!FIXME: should be superfluous now
1289
1290 {-
1291 ************************************************************************
1292 * *
1293 Unfoldings
1294 * *
1295 ************************************************************************
1296
1297 The @Unfolding@ type is declared here to avoid numerous loops
1298 -}
1299
1300 -- | Records the /unfolding/ of an identifier, which is approximately the form the
1301 -- identifier would have if we substituted its definition in for the identifier.
1302 -- This type should be treated as abstract everywhere except in "CoreUnfold"
1303 data Unfolding
1304 = NoUnfolding -- ^ We have no information about the unfolding.
1305
1306 | BootUnfolding -- ^ We have no information about the unfolding, because
1307 -- this 'Id' came from an @hi-boot@ file.
1308 -- See Note [Inlining and hs-boot files] in ToIface
1309 -- for what this is used for.
1310
1311 | OtherCon [AltCon] -- ^ It ain't one of these constructors.
1312 -- @OtherCon xs@ also indicates that something has been evaluated
1313 -- and hence there's no point in re-evaluating it.
1314 -- @OtherCon []@ is used even for non-data-type values
1315 -- to indicated evaluated-ness. Notably:
1316 --
1317 -- > data C = C !(Int -> Int)
1318 -- > case x of { C f -> ... }
1319 --
1320 -- Here, @f@ gets an @OtherCon []@ unfolding.
1321
1322 | DFunUnfolding { -- The Unfolding of a DFunId
1323 -- See Note [DFun unfoldings]
1324 -- df = /\a1..am. \d1..dn. MkD t1 .. tk
1325 -- (op1 a1..am d1..dn)
1326 -- (op2 a1..am d1..dn)
1327 df_bndrs :: [Var], -- The bound variables [a1..m],[d1..dn]
1328 df_con :: DataCon, -- The dictionary data constructor (never a newtype datacon)
1329 df_args :: [CoreExpr] -- Args of the data con: types, superclasses and methods,
1330 } -- in positional order
1331
1332 | CoreUnfolding { -- An unfolding for an Id with no pragma,
1333 -- or perhaps a NOINLINE pragma
1334 -- (For NOINLINE, the phase, if any, is in the
1335 -- InlinePragInfo for this Id.)
1336 uf_tmpl :: CoreExpr, -- Template; occurrence info is correct
1337 uf_src :: UnfoldingSource, -- Where the unfolding came from
1338 uf_is_top :: Bool, -- True <=> top level binding
1339 uf_is_value :: Bool, -- exprIsHNF template (cached); it is ok to discard
1340 -- a `seq` on this variable
1341 uf_is_conlike :: Bool, -- True <=> applicn of constructor or CONLIKE function
1342 -- Cached version of exprIsConLike
1343 uf_is_work_free :: Bool, -- True <=> doesn't waste (much) work to expand
1344 -- inside an inlining
1345 -- Cached version of exprIsCheap
1346 uf_expandable :: Bool, -- True <=> can expand in RULE matching
1347 -- Cached version of exprIsExpandable
1348 uf_guidance :: UnfoldingGuidance -- Tells about the *size* of the template.
1349 }
1350 -- ^ An unfolding with redundant cached information. Parameters:
1351 --
1352 -- uf_tmpl: Template used to perform unfolding;
1353 -- NB: Occurrence info is guaranteed correct:
1354 -- see Note [OccInfo in unfoldings and rules]
1355 --
1356 -- uf_is_top: Is this a top level binding?
1357 --
1358 -- uf_is_value: 'exprIsHNF' template (cached); it is ok to discard a 'seq' on
1359 -- this variable
1360 --
1361 -- uf_is_work_free: Does this waste only a little work if we expand it inside an inlining?
1362 -- Basically this is a cached version of 'exprIsWorkFree'
1363 --
1364 -- uf_guidance: Tells us about the /size/ of the unfolding template
1365
1366
1367 ------------------------------------------------
1368 data UnfoldingSource
1369 = -- See also Note [Historical note: unfoldings for wrappers]
1370
1371 InlineRhs -- The current rhs of the function
1372 -- Replace uf_tmpl each time around
1373
1374 | InlineStable -- From an INLINE or INLINABLE pragma
1375 -- INLINE if guidance is UnfWhen
1376 -- INLINABLE if guidance is UnfIfGoodArgs/UnfoldNever
1377 -- (well, technically an INLINABLE might be made
1378 -- UnfWhen if it was small enough, and then
1379 -- it will behave like INLINE outside the current
1380 -- module, but that is the way automatic unfoldings
1381 -- work so it is consistent with the intended
1382 -- meaning of INLINABLE).
1383 --
1384 -- uf_tmpl may change, but only as a result of
1385 -- gentle simplification, it doesn't get updated
1386 -- to the current RHS during compilation as with
1387 -- InlineRhs.
1388 --
1389 -- See Note [InlineStable]
1390
1391 | InlineCompulsory -- Something that *has* no binding, so you *must* inline it
1392 -- Only a few primop-like things have this property
1393 -- (see MkId.hs, calls to mkCompulsoryUnfolding).
1394 -- Inline absolutely always, however boring the context.
1395
1396
1397
1398 -- | 'UnfoldingGuidance' says when unfolding should take place
1399 data UnfoldingGuidance
1400 = UnfWhen { -- Inline without thinking about the *size* of the uf_tmpl
1401 -- Used (a) for small *and* cheap unfoldings
1402 -- (b) for INLINE functions
1403 -- See Note [INLINE for small functions] in CoreUnfold
1404 ug_arity :: Arity, -- Number of value arguments expected
1405
1406 ug_unsat_ok :: Bool, -- True <=> ok to inline even if unsaturated
1407 ug_boring_ok :: Bool -- True <=> ok to inline even if the context is boring
1408 -- So True,True means "always"
1409 }
1410
1411 | UnfIfGoodArgs { -- Arose from a normal Id; the info here is the
1412 -- result of a simple analysis of the RHS
1413
1414 ug_args :: [Int], -- Discount if the argument is evaluated.
1415 -- (i.e., a simplification will definitely
1416 -- be possible). One elt of the list per *value* arg.
1417
1418 ug_size :: Int, -- The "size" of the unfolding.
1419
1420 ug_res :: Int -- Scrutinee discount: the discount to substract if the thing is in
1421 } -- a context (case (thing args) of ...),
1422 -- (where there are the right number of arguments.)
1423
1424 | UnfNever -- The RHS is big, so don't inline it
1425 deriving (Eq)
1426
1427 {-
1428 Note [Historical note: unfoldings for wrappers]
1429 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1430 We used to have a nice clever scheme in interface files for
1431 wrappers. A wrapper's unfolding can be reconstructed from its worker's
1432 id and its strictness. This decreased .hi file size (sometimes
1433 significantly, for modules like GHC.Classes with many high-arity w/w
1434 splits) and had a slight corresponding effect on compile times.
1435
1436 However, when we added the second demand analysis, this scheme lead to
1437 some Core lint errors. The second analysis could change the strictness
1438 signatures, which sometimes resulted in a wrapper's regenerated
1439 unfolding applying the wrapper to too many arguments.
1440
1441 Instead of repairing the clever .hi scheme, we abandoned it in favor
1442 of simplicity. The .hi sizes are usually insignificant (excluding the
1443 +1M for base libraries), and compile time barely increases (~+1% for
1444 nofib). The nicer upshot is that the UnfoldingSource no longer mentions
1445 an Id, so, eg, substitutions need not traverse them.
1446
1447
1448 Note [DFun unfoldings]
1449 ~~~~~~~~~~~~~~~~~~~~~~
1450 The Arity in a DFunUnfolding is total number of args (type and value)
1451 that the DFun needs to produce a dictionary. That's not necessarily
1452 related to the ordinary arity of the dfun Id, esp if the class has
1453 one method, so the dictionary is represented by a newtype. Example
1454
1455 class C a where { op :: a -> Int }
1456 instance C a -> C [a] where op xs = op (head xs)
1457
1458 The instance translates to
1459
1460 $dfCList :: forall a. C a => C [a] -- Arity 2!
1461 $dfCList = /\a.\d. $copList {a} d |> co
1462
1463 $copList :: forall a. C a => [a] -> Int -- Arity 2!
1464 $copList = /\a.\d.\xs. op {a} d (head xs)
1465
1466 Now we might encounter (op (dfCList {ty} d) a1 a2)
1467 and we want the (op (dfList {ty} d)) rule to fire, because $dfCList
1468 has all its arguments, even though its (value) arity is 2. That's
1469 why we record the number of expected arguments in the DFunUnfolding.
1470
1471 Note that although it's an Arity, it's most convenient for it to give
1472 the *total* number of arguments, both type and value. See the use
1473 site in exprIsConApp_maybe.
1474 -}
1475
1476 -- Constants for the UnfWhen constructor
1477 needSaturated, unSaturatedOk :: Bool
1478 needSaturated = False
1479 unSaturatedOk = True
1480
1481 boringCxtNotOk, boringCxtOk :: Bool
1482 boringCxtOk = True
1483 boringCxtNotOk = False
1484
1485 ------------------------------------------------
1486 noUnfolding :: Unfolding
1487 -- ^ There is no known 'Unfolding'
1488 evaldUnfolding :: Unfolding
1489 -- ^ This unfolding marks the associated thing as being evaluated
1490
1491 noUnfolding = NoUnfolding
1492 evaldUnfolding = OtherCon []
1493
1494 -- | There is no known 'Unfolding', because this came from an
1495 -- hi-boot file.
1496 bootUnfolding :: Unfolding
1497 bootUnfolding = BootUnfolding
1498
1499 mkOtherCon :: [AltCon] -> Unfolding
1500 mkOtherCon = OtherCon
1501
1502 isStableSource :: UnfoldingSource -> Bool
1503 -- Keep the unfolding template
1504 isStableSource InlineCompulsory = True
1505 isStableSource InlineStable = True
1506 isStableSource InlineRhs = False
1507
1508 -- | Retrieves the template of an unfolding: panics if none is known
1509 unfoldingTemplate :: Unfolding -> CoreExpr
1510 unfoldingTemplate = uf_tmpl
1511
1512 -- | Retrieves the template of an unfolding if possible
1513 -- maybeUnfoldingTemplate is used mainly wnen specialising, and we do
1514 -- want to specialise DFuns, so it's important to return a template
1515 -- for DFunUnfoldings
1516 maybeUnfoldingTemplate :: Unfolding -> Maybe CoreExpr
1517 maybeUnfoldingTemplate (CoreUnfolding { uf_tmpl = expr })
1518 = Just expr
1519 maybeUnfoldingTemplate (DFunUnfolding { df_bndrs = bndrs, df_con = con, df_args = args })
1520 = Just (mkLams bndrs (mkApps (Var (dataConWorkId con)) args))
1521 maybeUnfoldingTemplate _
1522 = Nothing
1523
1524 -- | The constructors that the unfolding could never be:
1525 -- returns @[]@ if no information is available
1526 otherCons :: Unfolding -> [AltCon]
1527 otherCons (OtherCon cons) = cons
1528 otherCons _ = []
1529
1530 -- | Determines if it is certainly the case that the unfolding will
1531 -- yield a value (something in HNF): returns @False@ if unsure
1532 isValueUnfolding :: Unfolding -> Bool
1533 -- Returns False for OtherCon
1534 isValueUnfolding (CoreUnfolding { uf_is_value = is_evald }) = is_evald
1535 isValueUnfolding _ = False
1536
1537 -- | Determines if it possibly the case that the unfolding will
1538 -- yield a value. Unlike 'isValueUnfolding' it returns @True@
1539 -- for 'OtherCon'
1540 isEvaldUnfolding :: Unfolding -> Bool
1541 -- Returns True for OtherCon
1542 isEvaldUnfolding (OtherCon _) = True
1543 isEvaldUnfolding (CoreUnfolding { uf_is_value = is_evald }) = is_evald
1544 isEvaldUnfolding _ = False
1545
1546 -- | @True@ if the unfolding is a constructor application, the application
1547 -- of a CONLIKE function or 'OtherCon'
1548 isConLikeUnfolding :: Unfolding -> Bool
1549 isConLikeUnfolding (OtherCon _) = True
1550 isConLikeUnfolding (CoreUnfolding { uf_is_conlike = con }) = con
1551 isConLikeUnfolding _ = False
1552
1553 -- | Is the thing we will unfold into certainly cheap?
1554 isCheapUnfolding :: Unfolding -> Bool
1555 isCheapUnfolding (CoreUnfolding { uf_is_work_free = is_wf }) = is_wf
1556 isCheapUnfolding _ = False
1557
1558 isExpandableUnfolding :: Unfolding -> Bool
1559 isExpandableUnfolding (CoreUnfolding { uf_expandable = is_expable }) = is_expable
1560 isExpandableUnfolding _ = False
1561
1562 expandUnfolding_maybe :: Unfolding -> Maybe CoreExpr
1563 -- Expand an expandable unfolding; this is used in rule matching
1564 -- See Note [Expanding variables] in Rules.hs
1565 -- The key point here is that CONLIKE things can be expanded
1566 expandUnfolding_maybe (CoreUnfolding { uf_expandable = True, uf_tmpl = rhs }) = Just rhs
1567 expandUnfolding_maybe _ = Nothing
1568
1569 isCompulsoryUnfolding :: Unfolding -> Bool
1570 isCompulsoryUnfolding (CoreUnfolding { uf_src = InlineCompulsory }) = True
1571 isCompulsoryUnfolding _ = False
1572
1573 isStableUnfolding :: Unfolding -> Bool
1574 -- True of unfoldings that should not be overwritten
1575 -- by a CoreUnfolding for the RHS of a let-binding
1576 isStableUnfolding (CoreUnfolding { uf_src = src }) = isStableSource src
1577 isStableUnfolding (DFunUnfolding {}) = True
1578 isStableUnfolding _ = False
1579
1580 -- | Only returns False if there is no unfolding information available at all
1581 hasSomeUnfolding :: Unfolding -> Bool
1582 hasSomeUnfolding NoUnfolding = False
1583 hasSomeUnfolding BootUnfolding = False
1584 hasSomeUnfolding _ = True
1585
1586 isBootUnfolding :: Unfolding -> Bool
1587 isBootUnfolding BootUnfolding = True
1588 isBootUnfolding _ = False
1589
1590 neverUnfoldGuidance :: UnfoldingGuidance -> Bool
1591 neverUnfoldGuidance UnfNever = True
1592 neverUnfoldGuidance _ = False
1593
1594 isFragileUnfolding :: Unfolding -> Bool
1595 -- An unfolding is fragile if it mentions free variables or
1596 -- is otherwise subject to change. A robust one can be kept.
1597 -- See Note [Fragile unfoldings]
1598 isFragileUnfolding (CoreUnfolding {}) = True
1599 isFragileUnfolding (DFunUnfolding {}) = True
1600 isFragileUnfolding _ = False
1601 -- NoUnfolding, BootUnfolding, OtherCon are all non-fragile
1602
1603 canUnfold :: Unfolding -> Bool
1604 canUnfold (CoreUnfolding { uf_guidance = g }) = not (neverUnfoldGuidance g)
1605 canUnfold _ = False
1606
1607 {- Note [Fragile unfoldings]
1608 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1609 An unfolding is "fragile" if it mentions free variables (and hence would
1610 need substitution) or might be affected by optimisation. The non-fragile
1611 ones are
1612
1613 NoUnfolding, BootUnfolding
1614
1615 OtherCon {} If we know this binder (say a lambda binder) will be
1616 bound to an evaluated thing, we want to retain that
1617 info in simpleOptExpr; see Trac #13077.
1618
1619 We consider even a StableUnfolding as fragile, because it needs substitution.
1620
1621 Note [InlineStable]
1622 ~~~~~~~~~~~~~~~~~
1623 When you say
1624 {-# INLINE f #-}
1625 f x = <rhs>
1626 you intend that calls (f e) are replaced by <rhs>[e/x] So we
1627 should capture (\x.<rhs>) in the Unfolding of 'f', and never meddle
1628 with it. Meanwhile, we can optimise <rhs> to our heart's content,
1629 leaving the original unfolding intact in Unfolding of 'f'. For example
1630 all xs = foldr (&&) True xs
1631 any p = all . map p {-# INLINE any #-}
1632 We optimise any's RHS fully, but leave the InlineRule saying "all . map p",
1633 which deforests well at the call site.
1634
1635 So INLINE pragma gives rise to an InlineRule, which captures the original RHS.
1636
1637 Moreover, it's only used when 'f' is applied to the
1638 specified number of arguments; that is, the number of argument on
1639 the LHS of the '=' sign in the original source definition.
1640 For example, (.) is now defined in the libraries like this
1641 {-# INLINE (.) #-}
1642 (.) f g = \x -> f (g x)
1643 so that it'll inline when applied to two arguments. If 'x' appeared
1644 on the left, thus
1645 (.) f g x = f (g x)
1646 it'd only inline when applied to three arguments. This slightly-experimental
1647 change was requested by Roman, but it seems to make sense.
1648
1649 See also Note [Inlining an InlineRule] in CoreUnfold.
1650
1651
1652 Note [OccInfo in unfoldings and rules]
1653 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1654 In unfoldings and rules, we guarantee that the template is occ-analysed,
1655 so that the occurrence info on the binders is correct. This is important,
1656 because the Simplifier does not re-analyse the template when using it. If
1657 the occurrence info is wrong
1658 - We may get more simplifier iterations than necessary, because
1659 once-occ info isn't there
1660 - More seriously, we may get an infinite loop if there's a Rec
1661 without a loop breaker marked
1662
1663
1664 ************************************************************************
1665 * *
1666 AltCon
1667 * *
1668 ************************************************************************
1669 -}
1670
1671 -- The Ord is needed for the FiniteMap used in the lookForConstructor
1672 -- in SimplEnv. If you declared that lookForConstructor *ignores*
1673 -- constructor-applications with LitArg args, then you could get
1674 -- rid of this Ord.
1675
1676 instance Outputable AltCon where
1677 ppr (DataAlt dc) = ppr dc
1678 ppr (LitAlt lit) = ppr lit
1679 ppr DEFAULT = text "__DEFAULT"
1680
1681 cmpAlt :: (AltCon, a, b) -> (AltCon, a, b) -> Ordering
1682 cmpAlt (con1, _, _) (con2, _, _) = con1 `cmpAltCon` con2
1683
1684 ltAlt :: (AltCon, a, b) -> (AltCon, a, b) -> Bool
1685 ltAlt a1 a2 = (a1 `cmpAlt` a2) == LT
1686
1687 cmpAltCon :: AltCon -> AltCon -> Ordering
1688 -- ^ Compares 'AltCon's within a single list of alternatives
1689 -- DEFAULT comes out smallest, so that sorting by AltCon
1690 -- puts alternatives in the order required by #case_invariants#
1691 cmpAltCon DEFAULT DEFAULT = EQ
1692 cmpAltCon DEFAULT _ = LT
1693
1694 cmpAltCon (DataAlt d1) (DataAlt d2) = dataConTag d1 `compare` dataConTag d2
1695 cmpAltCon (DataAlt _) DEFAULT = GT
1696 cmpAltCon (LitAlt l1) (LitAlt l2) = l1 `compare` l2
1697 cmpAltCon (LitAlt _) DEFAULT = GT
1698
1699 cmpAltCon con1 con2 = WARN( True, text "Comparing incomparable AltCons" <+>
1700 ppr con1 <+> ppr con2 )
1701 LT
1702
1703 {-
1704 ************************************************************************
1705 * *
1706 \subsection{Useful synonyms}
1707 * *
1708 ************************************************************************
1709
1710 Note [CoreProgram]
1711 ~~~~~~~~~~~~~~~~~~
1712 The top level bindings of a program, a CoreProgram, are represented as
1713 a list of CoreBind
1714
1715 * Later bindings in the list can refer to earlier ones, but not vice
1716 versa. So this is OK
1717 NonRec { x = 4 }
1718 Rec { p = ...q...x...
1719 ; q = ...p...x }
1720 Rec { f = ...p..x..f.. }
1721 NonRec { g = ..f..q...x.. }
1722 But it would NOT be ok for 'f' to refer to 'g'.
1723
1724 * The occurrence analyser does strongly-connected component analysis
1725 on each Rec binding, and splits it into a sequence of smaller
1726 bindings where possible. So the program typically starts life as a
1727 single giant Rec, which is then dependency-analysed into smaller
1728 chunks.
1729 -}
1730
1731 -- If you edit this type, you may need to update the GHC formalism
1732 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
1733 type CoreProgram = [CoreBind] -- See Note [CoreProgram]
1734
1735 -- | The common case for the type of binders and variables when
1736 -- we are manipulating the Core language within GHC
1737 type CoreBndr = Var
1738 -- | Expressions where binders are 'CoreBndr's
1739 type CoreExpr = Expr CoreBndr
1740 -- | Argument expressions where binders are 'CoreBndr's
1741 type CoreArg = Arg CoreBndr
1742 -- | Binding groups where binders are 'CoreBndr's
1743 type CoreBind = Bind CoreBndr
1744 -- | Case alternatives where binders are 'CoreBndr's
1745 type CoreAlt = Alt CoreBndr
1746
1747 {-
1748 ************************************************************************
1749 * *
1750 \subsection{Tagging}
1751 * *
1752 ************************************************************************
1753 -}
1754
1755 -- | Binders are /tagged/ with a t
1756 data TaggedBndr t = TB CoreBndr t -- TB for "tagged binder"
1757
1758 type TaggedBind t = Bind (TaggedBndr t)
1759 type TaggedExpr t = Expr (TaggedBndr t)
1760 type TaggedArg t = Arg (TaggedBndr t)
1761 type TaggedAlt t = Alt (TaggedBndr t)
1762
1763 instance Outputable b => Outputable (TaggedBndr b) where
1764 ppr (TB b l) = char '<' <> ppr b <> comma <> ppr l <> char '>'
1765
1766 deTagExpr :: TaggedExpr t -> CoreExpr
1767 deTagExpr (Var v) = Var v
1768 deTagExpr (Lit l) = Lit l
1769 deTagExpr (Type ty) = Type ty
1770 deTagExpr (Coercion co) = Coercion co
1771 deTagExpr (App e1 e2) = App (deTagExpr e1) (deTagExpr e2)
1772 deTagExpr (Lam (TB b _) e) = Lam b (deTagExpr e)
1773 deTagExpr (Let bind body) = Let (deTagBind bind) (deTagExpr body)
1774 deTagExpr (Case e (TB b _) ty alts) = Case (deTagExpr e) b ty (map deTagAlt alts)
1775 deTagExpr (Tick t e) = Tick t (deTagExpr e)
1776 deTagExpr (Cast e co) = Cast (deTagExpr e) co
1777
1778 deTagBind :: TaggedBind t -> CoreBind
1779 deTagBind (NonRec (TB b _) rhs) = NonRec b (deTagExpr rhs)
1780 deTagBind (Rec prs) = Rec [(b, deTagExpr rhs) | (TB b _, rhs) <- prs]
1781
1782 deTagAlt :: TaggedAlt t -> CoreAlt
1783 deTagAlt (con, bndrs, rhs) = (con, [b | TB b _ <- bndrs], deTagExpr rhs)
1784
1785 {-
1786 ************************************************************************
1787 * *
1788 \subsection{Core-constructing functions with checking}
1789 * *
1790 ************************************************************************
1791 -}
1792
1793 -- | Apply a list of argument expressions to a function expression in a nested fashion. Prefer to
1794 -- use 'MkCore.mkCoreApps' if possible
1795 mkApps :: Expr b -> [Arg b] -> Expr b
1796 -- | Apply a list of type argument expressions to a function expression in a nested fashion
1797 mkTyApps :: Expr b -> [Type] -> Expr b
1798 -- | Apply a list of coercion argument expressions to a function expression in a nested fashion
1799 mkCoApps :: Expr b -> [Coercion] -> Expr b
1800 -- | Apply a list of type or value variables to a function expression in a nested fashion
1801 mkVarApps :: Expr b -> [Var] -> Expr b
1802 -- | Apply a list of argument expressions to a data constructor in a nested fashion. Prefer to
1803 -- use 'MkCore.mkCoreConApps' if possible
1804 mkConApp :: DataCon -> [Arg b] -> Expr b
1805
1806 mkApps f args = foldl App f args
1807 mkCoApps f args = foldl (\ e a -> App e (Coercion a)) f args
1808 mkVarApps f vars = foldl (\ e a -> App e (varToCoreExpr a)) f vars
1809 mkConApp con args = mkApps (Var (dataConWorkId con)) args
1810
1811 mkTyApps f args = foldl (\ e a -> App e (mkTyArg a)) f args
1812
1813 mkConApp2 :: DataCon -> [Type] -> [Var] -> Expr b
1814 mkConApp2 con tys arg_ids = Var (dataConWorkId con)
1815 `mkApps` map Type tys
1816 `mkApps` map varToCoreExpr arg_ids
1817
1818 mkTyArg :: Type -> Expr b
1819 mkTyArg ty
1820 | Just co <- isCoercionTy_maybe ty = Coercion co
1821 | otherwise = Type ty
1822
1823 -- | Create a machine integer literal expression of type @Int#@ from an @Integer@.
1824 -- If you want an expression of type @Int@ use 'MkCore.mkIntExpr'
1825 mkIntLit :: DynFlags -> Integer -> Expr b
1826 -- | Create a machine integer literal expression of type @Int#@ from an @Int@.
1827 -- If you want an expression of type @Int@ use 'MkCore.mkIntExpr'
1828 mkIntLitInt :: DynFlags -> Int -> Expr b
1829
1830 mkIntLit dflags n = Lit (mkMachInt dflags n)
1831 mkIntLitInt dflags n = Lit (mkMachInt dflags (toInteger n))
1832
1833 -- | Create a machine word literal expression of type @Word#@ from an @Integer@.
1834 -- If you want an expression of type @Word@ use 'MkCore.mkWordExpr'
1835 mkWordLit :: DynFlags -> Integer -> Expr b
1836 -- | Create a machine word literal expression of type @Word#@ from a @Word@.
1837 -- If you want an expression of type @Word@ use 'MkCore.mkWordExpr'
1838 mkWordLitWord :: DynFlags -> Word -> Expr b
1839
1840 mkWordLit dflags w = Lit (mkMachWord dflags w)
1841 mkWordLitWord dflags w = Lit (mkMachWord dflags (toInteger w))
1842
1843 mkWord64LitWord64 :: Word64 -> Expr b
1844 mkWord64LitWord64 w = Lit (mkMachWord64 (toInteger w))
1845
1846 mkInt64LitInt64 :: Int64 -> Expr b
1847 mkInt64LitInt64 w = Lit (mkMachInt64 (toInteger w))
1848
1849 -- | Create a machine character literal expression of type @Char#@.
1850 -- If you want an expression of type @Char@ use 'MkCore.mkCharExpr'
1851 mkCharLit :: Char -> Expr b
1852 -- | Create a machine string literal expression of type @Addr#@.
1853 -- If you want an expression of type @String@ use 'MkCore.mkStringExpr'
1854 mkStringLit :: String -> Expr b
1855
1856 mkCharLit c = Lit (mkMachChar c)
1857 mkStringLit s = Lit (mkMachString s)
1858
1859 -- | Create a machine single precision literal expression of type @Float#@ from a @Rational@.
1860 -- If you want an expression of type @Float@ use 'MkCore.mkFloatExpr'
1861 mkFloatLit :: Rational -> Expr b
1862 -- | Create a machine single precision literal expression of type @Float#@ from a @Float@.
1863 -- If you want an expression of type @Float@ use 'MkCore.mkFloatExpr'
1864 mkFloatLitFloat :: Float -> Expr b
1865
1866 mkFloatLit f = Lit (mkMachFloat f)
1867 mkFloatLitFloat f = Lit (mkMachFloat (toRational f))
1868
1869 -- | Create a machine double precision literal expression of type @Double#@ from a @Rational@.
1870 -- If you want an expression of type @Double@ use 'MkCore.mkDoubleExpr'
1871 mkDoubleLit :: Rational -> Expr b
1872 -- | Create a machine double precision literal expression of type @Double#@ from a @Double@.
1873 -- If you want an expression of type @Double@ use 'MkCore.mkDoubleExpr'
1874 mkDoubleLitDouble :: Double -> Expr b
1875
1876 mkDoubleLit d = Lit (mkMachDouble d)
1877 mkDoubleLitDouble d = Lit (mkMachDouble (toRational d))
1878
1879 -- | Bind all supplied binding groups over an expression in a nested let expression. Assumes
1880 -- that the rhs satisfies the let/app invariant. Prefer to use 'MkCore.mkCoreLets' if
1881 -- possible, which does guarantee the invariant
1882 mkLets :: [Bind b] -> Expr b -> Expr b
1883 -- | Bind all supplied binders over an expression in a nested lambda expression. Prefer to
1884 -- use 'MkCore.mkCoreLams' if possible
1885 mkLams :: [b] -> Expr b -> Expr b
1886
1887 mkLams binders body = foldr Lam body binders
1888 mkLets binds body = foldr mkLet body binds
1889
1890 mkLet :: Bind b -> Expr b -> Expr b
1891 -- The desugarer sometimes generates an empty Rec group
1892 -- which Lint rejects, so we kill it off right away
1893 mkLet (Rec []) body = body
1894 mkLet bind body = Let bind body
1895
1896 -- | @mkLetNonRec bndr rhs body@ wraps @body@ in a @let@ binding @bndr@.
1897 mkLetNonRec :: b -> Expr b -> Expr b -> Expr b
1898 mkLetNonRec b rhs body = Let (NonRec b rhs) body
1899
1900 -- | @mkLetRec binds body@ wraps @body@ in a @let rec@ with the given set of
1901 -- @binds@ if binds is non-empty.
1902 mkLetRec :: [(b, Expr b)] -> Expr b -> Expr b
1903 mkLetRec [] body = body
1904 mkLetRec bs body = Let (Rec bs) body
1905
1906 -- | Create a binding group where a type variable is bound to a type. Per "CoreSyn#type_let",
1907 -- this can only be used to bind something in a non-recursive @let@ expression
1908 mkTyBind :: TyVar -> Type -> CoreBind
1909 mkTyBind tv ty = NonRec tv (Type ty)
1910
1911 -- | Create a binding group where a type variable is bound to a type. Per "CoreSyn#type_let",
1912 -- this can only be used to bind something in a non-recursive @let@ expression
1913 mkCoBind :: CoVar -> Coercion -> CoreBind
1914 mkCoBind cv co = NonRec cv (Coercion co)
1915
1916 -- | Convert a binder into either a 'Var' or 'Type' 'Expr' appropriately
1917 varToCoreExpr :: CoreBndr -> Expr b
1918 varToCoreExpr v | isTyVar v = Type (mkTyVarTy v)
1919 | isCoVar v = Coercion (mkCoVarCo v)
1920 | otherwise = ASSERT( isId v ) Var v
1921
1922 varsToCoreExprs :: [CoreBndr] -> [Expr b]
1923 varsToCoreExprs vs = map varToCoreExpr vs
1924
1925 {-
1926 ************************************************************************
1927 * *
1928 Getting a result type
1929 * *
1930 ************************************************************************
1931
1932 These are defined here to avoid a module loop between CoreUtils and CoreFVs
1933
1934 -}
1935
1936 applyTypeToArg :: Type -> CoreExpr -> Type
1937 -- ^ Determines the type resulting from applying an expression with given type
1938 -- to a given argument expression
1939 applyTypeToArg fun_ty arg = piResultTy fun_ty (exprToType arg)
1940
1941 -- | If the expression is a 'Type', converts. Otherwise,
1942 -- panics. NB: This does /not/ convert 'Coercion' to 'CoercionTy'.
1943 exprToType :: CoreExpr -> Type
1944 exprToType (Type ty) = ty
1945 exprToType _bad = pprPanic "exprToType" empty
1946
1947 -- | If the expression is a 'Coercion', converts.
1948 exprToCoercion_maybe :: CoreExpr -> Maybe Coercion
1949 exprToCoercion_maybe (Coercion co) = Just co
1950 exprToCoercion_maybe _ = Nothing
1951
1952 {-
1953 ************************************************************************
1954 * *
1955 \subsection{Simple access functions}
1956 * *
1957 ************************************************************************
1958 -}
1959
1960 -- | Extract every variable by this group
1961 bindersOf :: Bind b -> [b]
1962 -- If you edit this function, you may need to update the GHC formalism
1963 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
1964 bindersOf (NonRec binder _) = [binder]
1965 bindersOf (Rec pairs) = [binder | (binder, _) <- pairs]
1966
1967 -- | 'bindersOf' applied to a list of binding groups
1968 bindersOfBinds :: [Bind b] -> [b]
1969 bindersOfBinds binds = foldr ((++) . bindersOf) [] binds
1970
1971 rhssOfBind :: Bind b -> [Expr b]
1972 rhssOfBind (NonRec _ rhs) = [rhs]
1973 rhssOfBind (Rec pairs) = [rhs | (_,rhs) <- pairs]
1974
1975 rhssOfAlts :: [Alt b] -> [Expr b]
1976 rhssOfAlts alts = [e | (_,_,e) <- alts]
1977
1978 -- | Collapse all the bindings in the supplied groups into a single
1979 -- list of lhs\/rhs pairs suitable for binding in a 'Rec' binding group
1980 flattenBinds :: [Bind b] -> [(b, Expr b)]
1981 flattenBinds (NonRec b r : binds) = (b,r) : flattenBinds binds
1982 flattenBinds (Rec prs1 : binds) = prs1 ++ flattenBinds binds
1983 flattenBinds [] = []
1984
1985 -- | We often want to strip off leading lambdas before getting down to
1986 -- business. Variants are 'collectTyBinders', 'collectValBinders',
1987 -- and 'collectTyAndValBinders'
1988 collectBinders :: Expr b -> ([b], Expr b)
1989 collectTyBinders :: CoreExpr -> ([TyVar], CoreExpr)
1990 collectValBinders :: CoreExpr -> ([Id], CoreExpr)
1991 collectTyAndValBinders :: CoreExpr -> ([TyVar], [Id], CoreExpr)
1992 -- | Strip off exactly N leading lambdas (type or value). Good for use with
1993 -- join points.
1994 collectNBinders :: Int -> Expr b -> ([b], Expr b)
1995
1996 collectBinders expr
1997 = go [] expr
1998 where
1999 go bs (Lam b e) = go (b:bs) e
2000 go bs e = (reverse bs, e)
2001
2002 collectTyBinders expr
2003 = go [] expr
2004 where
2005 go tvs (Lam b e) | isTyVar b = go (b:tvs) e
2006 go tvs e = (reverse tvs, e)
2007
2008 collectValBinders expr
2009 = go [] expr
2010 where
2011 go ids (Lam b e) | isId b = go (b:ids) e
2012 go ids body = (reverse ids, body)
2013
2014 collectTyAndValBinders expr
2015 = (tvs, ids, body)
2016 where
2017 (tvs, body1) = collectTyBinders expr
2018 (ids, body) = collectValBinders body1
2019
2020 collectNBinders orig_n orig_expr
2021 = go orig_n [] orig_expr
2022 where
2023 go 0 bs expr = (reverse bs, expr)
2024 go n bs (Lam b e) = go (n-1) (b:bs) e
2025 go _ _ _ = pprPanic "collectNBinders" $ int orig_n
2026
2027 -- | Takes a nested application expression and returns the the function
2028 -- being applied and the arguments to which it is applied
2029 collectArgs :: Expr b -> (Expr b, [Arg b])
2030 collectArgs expr
2031 = go expr []
2032 where
2033 go (App f a) as = go f (a:as)
2034 go e as = (e, as)
2035
2036 -- | Like @collectArgs@, but also collects looks through floatable
2037 -- ticks if it means that we can find more arguments.
2038 collectArgsTicks :: (Tickish Id -> Bool) -> Expr b
2039 -> (Expr b, [Arg b], [Tickish Id])
2040 collectArgsTicks skipTick expr
2041 = go expr [] []
2042 where
2043 go (App f a) as ts = go f (a:as) ts
2044 go (Tick t e) as ts
2045 | skipTick t = go e as (t:ts)
2046 go e as ts = (e, as, reverse ts)
2047
2048
2049 {-
2050 ************************************************************************
2051 * *
2052 \subsection{Predicates}
2053 * *
2054 ************************************************************************
2055
2056 At one time we optionally carried type arguments through to runtime.
2057 @isRuntimeVar v@ returns if (Lam v _) really becomes a lambda at runtime,
2058 i.e. if type applications are actual lambdas because types are kept around
2059 at runtime. Similarly isRuntimeArg.
2060 -}
2061
2062 -- | Will this variable exist at runtime?
2063 isRuntimeVar :: Var -> Bool
2064 isRuntimeVar = isId
2065
2066 -- | Will this argument expression exist at runtime?
2067 isRuntimeArg :: CoreExpr -> Bool
2068 isRuntimeArg = isValArg
2069
2070 -- | Returns @True@ for value arguments, false for type args
2071 -- NB: coercions are value arguments (zero width, to be sure,
2072 -- like State#, but still value args).
2073 isValArg :: Expr b -> Bool
2074 isValArg e = not (isTypeArg e)
2075
2076 -- | Returns @True@ iff the expression is a 'Type' or 'Coercion'
2077 -- expression at its top level
2078 isTyCoArg :: Expr b -> Bool
2079 isTyCoArg (Type {}) = True
2080 isTyCoArg (Coercion {}) = True
2081 isTyCoArg _ = False
2082
2083 -- | Returns @True@ iff the expression is a 'Type' expression at its
2084 -- top level. Note this does NOT include 'Coercion's.
2085 isTypeArg :: Expr b -> Bool
2086 isTypeArg (Type {}) = True
2087 isTypeArg _ = False
2088
2089 -- | The number of binders that bind values rather than types
2090 valBndrCount :: [CoreBndr] -> Int
2091 valBndrCount = count isId
2092
2093 -- | The number of argument expressions that are values rather than types at their top level
2094 valArgCount :: [Arg b] -> Int
2095 valArgCount = count isValArg
2096
2097 {-
2098 ************************************************************************
2099 * *
2100 \subsection{Annotated core}
2101 * *
2102 ************************************************************************
2103 -}
2104
2105 -- | Annotated core: allows annotation at every node in the tree
2106 type AnnExpr bndr annot = (annot, AnnExpr' bndr annot)
2107
2108 -- | A clone of the 'Expr' type but allowing annotation at every tree node
2109 data AnnExpr' bndr annot
2110 = AnnVar Id
2111 | AnnLit Literal
2112 | AnnLam bndr (AnnExpr bndr annot)
2113 | AnnApp (AnnExpr bndr annot) (AnnExpr bndr annot)
2114 | AnnCase (AnnExpr bndr annot) bndr Type [AnnAlt bndr annot]
2115 | AnnLet (AnnBind bndr annot) (AnnExpr bndr annot)
2116 | AnnCast (AnnExpr bndr annot) (annot, Coercion)
2117 -- Put an annotation on the (root of) the coercion
2118 | AnnTick (Tickish Id) (AnnExpr bndr annot)
2119 | AnnType Type
2120 | AnnCoercion Coercion
2121
2122 -- | A clone of the 'Alt' type but allowing annotation at every tree node
2123 type AnnAlt bndr annot = (AltCon, [bndr], AnnExpr bndr annot)
2124
2125 -- | A clone of the 'Bind' type but allowing annotation at every tree node
2126 data AnnBind bndr annot
2127 = AnnNonRec bndr (AnnExpr bndr annot)
2128 | AnnRec [(bndr, AnnExpr bndr annot)]
2129
2130 -- | Takes a nested application expression and returns the the function
2131 -- being applied and the arguments to which it is applied
2132 collectAnnArgs :: AnnExpr b a -> (AnnExpr b a, [AnnExpr b a])
2133 collectAnnArgs expr
2134 = go expr []
2135 where
2136 go (_, AnnApp f a) as = go f (a:as)
2137 go e as = (e, as)
2138
2139 collectAnnArgsTicks :: (Tickish Var -> Bool) -> AnnExpr b a
2140 -> (AnnExpr b a, [AnnExpr b a], [Tickish Var])
2141 collectAnnArgsTicks tickishOk expr
2142 = go expr [] []
2143 where
2144 go (_, AnnApp f a) as ts = go f (a:as) ts
2145 go (_, AnnTick t e) as ts | tickishOk t
2146 = go e as (t:ts)
2147 go e as ts = (e, as, reverse ts)
2148
2149 deAnnotate :: AnnExpr bndr annot -> Expr bndr
2150 deAnnotate (_, e) = deAnnotate' e
2151
2152 deAnnotate' :: AnnExpr' bndr annot -> Expr bndr
2153 deAnnotate' (AnnType t) = Type t
2154 deAnnotate' (AnnCoercion co) = Coercion co
2155 deAnnotate' (AnnVar v) = Var v
2156 deAnnotate' (AnnLit lit) = Lit lit
2157 deAnnotate' (AnnLam binder body) = Lam binder (deAnnotate body)
2158 deAnnotate' (AnnApp fun arg) = App (deAnnotate fun) (deAnnotate arg)
2159 deAnnotate' (AnnCast e (_,co)) = Cast (deAnnotate e) co
2160 deAnnotate' (AnnTick tick body) = Tick tick (deAnnotate body)
2161
2162 deAnnotate' (AnnLet bind body)
2163 = Let (deAnnBind bind) (deAnnotate body)
2164 deAnnotate' (AnnCase scrut v t alts)
2165 = Case (deAnnotate scrut) v t (map deAnnAlt alts)
2166
2167 deAnnAlt :: AnnAlt bndr annot -> Alt bndr
2168 deAnnAlt (con,args,rhs) = (con,args,deAnnotate rhs)
2169
2170 deAnnBind :: AnnBind b annot -> Bind b
2171 deAnnBind (AnnNonRec var rhs) = NonRec var (deAnnotate rhs)
2172 deAnnBind (AnnRec pairs) = Rec [(v,deAnnotate rhs) | (v,rhs) <- pairs]
2173
2174 -- | As 'collectBinders' but for 'AnnExpr' rather than 'Expr'
2175 collectAnnBndrs :: AnnExpr bndr annot -> ([bndr], AnnExpr bndr annot)
2176 collectAnnBndrs e
2177 = collect [] e
2178 where
2179 collect bs (_, AnnLam b body) = collect (b:bs) body
2180 collect bs body = (reverse bs, body)
2181
2182 -- | As 'collectNBinders' but for 'AnnExpr' rather than 'Expr'
2183 collectNAnnBndrs :: Int -> AnnExpr bndr annot -> ([bndr], AnnExpr bndr annot)
2184 collectNAnnBndrs orig_n e
2185 = collect orig_n [] e
2186 where
2187 collect 0 bs body = (reverse bs, body)
2188 collect n bs (_, AnnLam b body) = collect (n-1) (b:bs) body
2189 collect _ _ _ = pprPanic "collectNBinders" $ int orig_n