Fix #13233 by checking for lev-poly primops
[ghc.git] / compiler / coreSyn / CoreSyn.hs
1 {-
2 (c) The University of Glasgow 2006
3 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
4 -}
5
6 {-# LANGUAGE CPP, DeriveDataTypeable, FlexibleContexts #-}
7 {-# LANGUAGE NamedFieldPuns #-}
8
9 -- | CoreSyn holds all the main data types for use by for the Glasgow Haskell Compiler midsection
10 module CoreSyn (
11 -- * Main data types
12 Expr(..), Alt, Bind(..), AltCon(..), Arg,
13 Tickish(..), TickishScoping(..), TickishPlacement(..),
14 CoreProgram, CoreExpr, CoreAlt, CoreBind, CoreArg, CoreBndr,
15 TaggedExpr, TaggedAlt, TaggedBind, TaggedArg, TaggedBndr(..), deTagExpr,
16
17 -- * In/Out type synonyms
18 InId, InBind, InExpr, InAlt, InArg, InType, InKind,
19 InBndr, InVar, InCoercion, InTyVar, InCoVar,
20 OutId, OutBind, OutExpr, OutAlt, OutArg, OutType, OutKind,
21 OutBndr, OutVar, OutCoercion, OutTyVar, OutCoVar,
22
23 -- ** 'Expr' construction
24 mkLet, mkLets, mkLetNonRec, mkLetRec, mkLams,
25 mkApps, mkTyApps, mkCoApps, mkVarApps, mkTyArg,
26
27 mkIntLit, mkIntLitInt,
28 mkWordLit, mkWordLitWord,
29 mkWord64LitWord64, mkInt64LitInt64,
30 mkCharLit, mkStringLit,
31 mkFloatLit, mkFloatLitFloat,
32 mkDoubleLit, mkDoubleLitDouble,
33
34 mkConApp, mkConApp2, mkTyBind, mkCoBind,
35 varToCoreExpr, varsToCoreExprs,
36
37 isId, cmpAltCon, cmpAlt, ltAlt,
38
39 -- ** Simple 'Expr' access functions and predicates
40 bindersOf, bindersOfBinds, rhssOfBind, rhssOfAlts,
41 collectBinders, collectTyBinders, collectTyAndValBinders,
42 collectNBinders,
43 collectArgs, collectArgsTicks, flattenBinds,
44
45 exprToType, exprToCoercion_maybe,
46 applyTypeToArg,
47
48 isValArg, isTypeArg, isTyCoArg, valArgCount, valBndrCount,
49 isRuntimeArg, isRuntimeVar,
50
51 -- * Tick-related functions
52 tickishCounts, tickishScoped, tickishScopesLike, tickishFloatable,
53 tickishCanSplit, mkNoCount, mkNoScope,
54 tickishIsCode, tickishPlace,
55 tickishContains,
56
57 -- * Unfolding data types
58 Unfolding(..), UnfoldingGuidance(..), UnfoldingSource(..),
59
60 -- ** Constructing 'Unfolding's
61 noUnfolding, bootUnfolding, evaldUnfolding, mkOtherCon,
62 unSaturatedOk, needSaturated, boringCxtOk, boringCxtNotOk,
63
64 -- ** Predicates and deconstruction on 'Unfolding'
65 unfoldingTemplate, expandUnfolding_maybe,
66 maybeUnfoldingTemplate, otherCons,
67 isValueUnfolding, isEvaldUnfolding, isCheapUnfolding,
68 isExpandableUnfolding, isConLikeUnfolding, isCompulsoryUnfolding,
69 isStableUnfolding, isFragileUnfolding, hasSomeUnfolding,
70 isBootUnfolding,
71 canUnfold, neverUnfoldGuidance, isStableSource,
72
73 -- * Annotated expression data types
74 AnnExpr, AnnExpr'(..), AnnBind(..), AnnAlt,
75
76 -- ** Operations on annotated expressions
77 collectAnnArgs, collectAnnArgsTicks,
78
79 -- ** Operations on annotations
80 deAnnotate, deAnnotate', deAnnAlt,
81 collectAnnBndrs, collectNAnnBndrs,
82
83 -- * Orphanhood
84 IsOrphan(..), isOrphan, notOrphan, chooseOrphanAnchor,
85
86 -- * Core rule data types
87 CoreRule(..), RuleBase,
88 RuleName, RuleFun, IdUnfoldingFun, InScopeEnv,
89 RuleEnv(..), mkRuleEnv, emptyRuleEnv,
90
91 -- ** Operations on 'CoreRule's
92 ruleArity, ruleName, ruleIdName, ruleActivation,
93 setRuleIdName, ruleModule,
94 isBuiltinRule, isLocalRule, isAutoRule,
95
96 -- * Core vectorisation declarations data type
97 CoreVect(..)
98 ) where
99
100 #include "HsVersions.h"
101
102 import CostCentre
103 import VarEnv( InScopeSet )
104 import Var
105 import Type
106 import Coercion
107 import Name
108 import NameSet
109 import NameEnv( NameEnv, emptyNameEnv )
110 import Literal
111 import DataCon
112 import Module
113 import TyCon
114 import BasicTypes
115 import DynFlags
116 import Outputable
117 import Util
118 import UniqSet
119 import SrcLoc ( RealSrcSpan, containsSpan )
120 import Binary
121
122 import Data.Data hiding (TyCon)
123 import Data.Int
124 import Data.Word
125
126 infixl 4 `mkApps`, `mkTyApps`, `mkVarApps`, `App`, `mkCoApps`
127 -- Left associative, so that we can say (f `mkTyApps` xs `mkVarApps` ys)
128
129 {-
130 ************************************************************************
131 * *
132 \subsection{The main data types}
133 * *
134 ************************************************************************
135
136 These data types are the heart of the compiler
137 -}
138
139 -- | This is the data type that represents GHCs core intermediate language. Currently
140 -- GHC uses System FC <http://research.microsoft.com/~simonpj/papers/ext-f/> for this purpose,
141 -- which is closely related to the simpler and better known System F <http://en.wikipedia.org/wiki/System_F>.
142 --
143 -- We get from Haskell source to this Core language in a number of stages:
144 --
145 -- 1. The source code is parsed into an abstract syntax tree, which is represented
146 -- by the data type 'HsExpr.HsExpr' with the names being 'RdrName.RdrNames'
147 --
148 -- 2. This syntax tree is /renamed/, which attaches a 'Unique.Unique' to every 'RdrName.RdrName'
149 -- (yielding a 'Name.Name') to disambiguate identifiers which are lexically identical.
150 -- For example, this program:
151 --
152 -- @
153 -- f x = let f x = x + 1
154 -- in f (x - 2)
155 -- @
156 --
157 -- Would be renamed by having 'Unique's attached so it looked something like this:
158 --
159 -- @
160 -- f_1 x_2 = let f_3 x_4 = x_4 + 1
161 -- in f_3 (x_2 - 2)
162 -- @
163 -- But see Note [Shadowing] below.
164 --
165 -- 3. The resulting syntax tree undergoes type checking (which also deals with instantiating
166 -- type class arguments) to yield a 'HsExpr.HsExpr' type that has 'Id.Id' as it's names.
167 --
168 -- 4. Finally the syntax tree is /desugared/ from the expressive 'HsExpr.HsExpr' type into
169 -- this 'Expr' type, which has far fewer constructors and hence is easier to perform
170 -- optimization, analysis and code generation on.
171 --
172 -- The type parameter @b@ is for the type of binders in the expression tree.
173 --
174 -- The language consists of the following elements:
175 --
176 -- * Variables
177 --
178 -- * Primitive literals
179 --
180 -- * Applications: note that the argument may be a 'Type'.
181 -- See Note [CoreSyn let/app invariant]
182 -- See Note [Levity polymorphism invariants]
183 --
184 -- * Lambda abstraction
185 -- See Note [Levity polymorphism invariants]
186 --
187 -- * Recursive and non recursive @let@s. Operationally
188 -- this corresponds to allocating a thunk for the things
189 -- bound and then executing the sub-expression.
190 --
191 -- #top_level_invariant#
192 -- #letrec_invariant#
193 --
194 -- The right hand sides of all top-level and recursive @let@s
195 -- /must/ be of lifted type (see "Type#type_classification" for
196 -- the meaning of /lifted/ vs. /unlifted/). There is one exception
197 -- to this rule, top-level @let@s are allowed to bind primitive
198 -- string literals, see Note [CoreSyn top-level string literals].
199 --
200 -- See Note [CoreSyn let/app invariant]
201 -- See Note [Levity polymorphism invariants]
202 --
203 -- #type_let#
204 -- We allow a /non-recursive/ let to bind a type variable, thus:
205 --
206 -- > Let (NonRec tv (Type ty)) body
207 --
208 -- This can be very convenient for postponing type substitutions until
209 -- the next run of the simplifier.
210 --
211 -- At the moment, the rest of the compiler only deals with type-let
212 -- in a Let expression, rather than at top level. We may want to revist
213 -- this choice.
214 --
215 -- * Case expression. Operationally this corresponds to evaluating
216 -- the scrutinee (expression examined) to weak head normal form
217 -- and then examining at most one level of resulting constructor (i.e. you
218 -- cannot do nested pattern matching directly with this).
219 --
220 -- The binder gets bound to the value of the scrutinee,
221 -- and the 'Type' must be that of all the case alternatives
222 --
223 -- #case_invariants#
224 -- This is one of the more complicated elements of the Core language,
225 -- and comes with a number of restrictions:
226 --
227 -- 1. The list of alternatives may be empty;
228 -- See Note [Empty case alternatives]
229 --
230 -- 2. The 'DEFAULT' case alternative must be first in the list,
231 -- if it occurs at all.
232 --
233 -- 3. The remaining cases are in order of increasing
234 -- tag (for 'DataAlts') or
235 -- lit (for 'LitAlts').
236 -- This makes finding the relevant constructor easy,
237 -- and makes comparison easier too.
238 --
239 -- 4. The list of alternatives must be exhaustive. An /exhaustive/ case
240 -- does not necessarily mention all constructors:
241 --
242 -- @
243 -- data Foo = Red | Green | Blue
244 -- ... case x of
245 -- Red -> True
246 -- other -> f (case x of
247 -- Green -> ...
248 -- Blue -> ... ) ...
249 -- @
250 --
251 -- The inner case does not need a @Red@ alternative, because @x@
252 -- can't be @Red@ at that program point.
253 --
254 -- 5. Floating-point values must not be scrutinised against literals.
255 -- See Trac #9238 and Note [Rules for floating-point comparisons]
256 -- in PrelRules for rationale.
257 --
258 -- * Cast an expression to a particular type.
259 -- This is used to implement @newtype@s (a @newtype@ constructor or
260 -- destructor just becomes a 'Cast' in Core) and GADTs.
261 --
262 -- * Notes. These allow general information to be added to expressions
263 -- in the syntax tree
264 --
265 -- * A type: this should only show up at the top level of an Arg
266 --
267 -- * A coercion
268
269 -- If you edit this type, you may need to update the GHC formalism
270 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
271 data Expr b
272 = Var Id
273 | Lit Literal
274 | App (Expr b) (Arg b)
275 | Lam b (Expr b)
276 | Let (Bind b) (Expr b)
277 | Case (Expr b) b Type [Alt b] -- See #case_invariants#
278 | Cast (Expr b) Coercion
279 | Tick (Tickish Id) (Expr b)
280 | Type Type
281 | Coercion Coercion
282 deriving Data
283
284 -- | Type synonym for expressions that occur in function argument positions.
285 -- Only 'Arg' should contain a 'Type' at top level, general 'Expr' should not
286 type Arg b = Expr b
287
288 -- | A case split alternative. Consists of the constructor leading to the alternative,
289 -- the variables bound from the constructor, and the expression to be executed given that binding.
290 -- The default alternative is @(DEFAULT, [], rhs)@
291
292 -- If you edit this type, you may need to update the GHC formalism
293 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
294 type Alt b = (AltCon, [b], Expr b)
295
296 -- | A case alternative constructor (i.e. pattern match)
297
298 -- If you edit this type, you may need to update the GHC formalism
299 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
300 data AltCon
301 = DataAlt DataCon -- ^ A plain data constructor: @case e of { Foo x -> ... }@.
302 -- Invariant: the 'DataCon' is always from a @data@ type, and never from a @newtype@
303
304 | LitAlt Literal -- ^ A literal: @case e of { 1 -> ... }@
305 -- Invariant: always an *unlifted* literal
306 -- See Note [Literal alternatives]
307
308 | DEFAULT -- ^ Trivial alternative: @case e of { _ -> ... }@
309 deriving (Eq, Data)
310
311 -- This instance is a bit shady. It can only be used to compare AltCons for
312 -- a single type constructor. Fortunately, it seems quite unlikely that we'll
313 -- ever need to compare AltCons for different type constructors.
314 instance Ord AltCon where
315 compare (DataAlt con1) (DataAlt con2) =
316 ASSERT( dataConTyCon con1 == dataConTyCon con2 )
317 compare (dataConTag con1) (dataConTag con2)
318 compare (DataAlt _) _ = LT
319 compare _ (DataAlt _) = GT
320 compare (LitAlt l1) (LitAlt l2) = compare l1 l2
321 compare (LitAlt _) DEFAULT = LT
322 compare DEFAULT DEFAULT = EQ
323 compare DEFAULT _ = GT
324
325 -- | Binding, used for top level bindings in a module and local bindings in a @let@.
326
327 -- If you edit this type, you may need to update the GHC formalism
328 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
329 data Bind b = NonRec b (Expr b)
330 | Rec [(b, (Expr b))]
331 deriving Data
332
333 {-
334 Note [Shadowing]
335 ~~~~~~~~~~~~~~~~
336 While various passes attempt to rename on-the-fly in a manner that
337 avoids "shadowing" (thereby simplifying downstream optimizations),
338 neither the simplifier nor any other pass GUARANTEES that shadowing is
339 avoided. Thus, all passes SHOULD work fine even in the presence of
340 arbitrary shadowing in their inputs.
341
342 In particular, scrutinee variables `x` in expressions of the form
343 `Case e x t` are often renamed to variables with a prefix
344 "wild_". These "wild" variables may appear in the body of the
345 case-expression, and further, may be shadowed within the body.
346
347 So the Unique in an Var is not really unique at all. Still, it's very
348 useful to give a constant-time equality/ordering for Vars, and to give
349 a key that can be used to make sets of Vars (VarSet), or mappings from
350 Vars to other things (VarEnv). Moreover, if you do want to eliminate
351 shadowing, you can give a new Unique to an Id without changing its
352 printable name, which makes debugging easier.
353
354 Note [Literal alternatives]
355 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
356 Literal alternatives (LitAlt lit) are always for *un-lifted* literals.
357 We have one literal, a literal Integer, that is lifted, and we don't
358 allow in a LitAlt, because LitAlt cases don't do any evaluation. Also
359 (see Trac #5603) if you say
360 case 3 of
361 S# x -> ...
362 J# _ _ -> ...
363 (where S#, J# are the constructors for Integer) we don't want the
364 simplifier calling findAlt with argument (LitAlt 3). No no. Integer
365 literals are an opaque encoding of an algebraic data type, not of
366 an unlifted literal, like all the others.
367
368 Also, we do not permit case analysis with literal patterns on floating-point
369 types. See Trac #9238 and Note [Rules for floating-point comparisons] in
370 PrelRules for the rationale for this restriction.
371
372 -------------------------- CoreSyn INVARIANTS ---------------------------
373
374 Note [CoreSyn top-level invariant]
375 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
376 See #toplevel_invariant#
377
378 Note [CoreSyn letrec invariant]
379 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
380 See #letrec_invariant#
381
382 Note [CoreSyn top-level string literals]
383 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
384 As an exception to the usual rule that top-level binders must be lifted,
385 we allow binding primitive string literals (of type Addr#) of type Addr# at the
386 top level. This allows us to share string literals earlier in the pipeline and
387 crucially allows other optimizations in the Core2Core pipeline to fire.
388 Consider,
389
390 f n = let a::Addr# = "foo"#
391 in \x -> blah
392
393 In order to be able to inline `f`, we would like to float `a` to the top.
394 Another option would be to inline `a`, but that would lead to duplicating string
395 literals, which we want to avoid. See Trac #8472.
396
397 The solution is simply to allow top-level unlifted binders. We can't allow
398 arbitrary unlifted expression at the top-level though, unlifted binders cannot
399 be thunks, so we just allow string literals.
400
401 It is important to note that top-level primitive string literals cannot be
402 wrapped in Ticks, as is otherwise done with lifted bindings. CoreToStg expects
403 to see just a plain (Lit (MachStr ...)) expression on the RHS of primitive
404 string bindings; anything else and things break. CoreLint checks this invariant.
405
406 Also see Note [Compilation plan for top-level string literals].
407
408 Note [Compilation plan for top-level string literals]
409 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
410 Here is a summary on how top-level string literals are handled by various
411 parts of the compilation pipeline.
412
413 * In the source language, there is no way to bind a primitive string literal
414 at the top leve.
415
416 * In Core, we have a special rule that permits top-level Addr# bindings. See
417 Note [CoreSyn top-level string literals]. Core-to-core passes may introduce
418 new top-level string literals.
419
420 * In STG, top-level string literals are explicitly represented in the syntax
421 tree.
422
423 * A top-level string literal may end up exported from a module. In this case,
424 in the object file, the content of the exported literal is given a label with
425 the _bytes suffix.
426
427 Note [CoreSyn let/app invariant]
428 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
429 The let/app invariant
430 the right hand side of a non-recursive 'Let', and
431 the argument of an 'App',
432 /may/ be of unlifted type, but only if
433 the expression is ok-for-speculation
434 or the 'Let' is for a join point.
435
436 This means that the let can be floated around
437 without difficulty. For example, this is OK:
438
439 y::Int# = x +# 1#
440
441 But this is not, as it may affect termination if the
442 expression is floated out:
443
444 y::Int# = fac 4#
445
446 In this situation you should use @case@ rather than a @let@. The function
447 'CoreUtils.needsCaseBinding' can help you determine which to generate, or
448 alternatively use 'MkCore.mkCoreLet' rather than this constructor directly,
449 which will generate a @case@ if necessary
450
451 The let/app invariant is initially enforced by mkCoreLet and mkCoreApp in
452 coreSyn/MkCore.
453
454 Note [CoreSyn case invariants]
455 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
456 See #case_invariants#
457
458 Note [Levity polymorphism invariants]
459 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
460 The levity-polymorphism invariants are these (as per "Levity Polymorphism",
461 PLDI '17):
462
463 * The type of a term-binder must not be levity-polymorphic,
464 unless it is a let(rec)-bound join point
465 (see Note [Invariants on join points])
466
467 * The type of the argument of an App must not be levity-polymorphic.
468
469 A type (t::TYPE r) is "levity polymorphic" if 'r' has any free variables.
470
471 For example
472 \(r::RuntimeRep). \(a::TYPE r). \(x::a). e
473 is illegal because x's type has kind (TYPE r), which has 'r' free.
474
475 See Note [Levity polymorphism checking] in DsMonad to see where these
476 invariants are established for user-written code.
477
478 Note [CoreSyn let goal]
479 ~~~~~~~~~~~~~~~~~~~~~~~
480 * The simplifier tries to ensure that if the RHS of a let is a constructor
481 application, its arguments are trivial, so that the constructor can be
482 inlined vigorously.
483
484 Note [Type let]
485 ~~~~~~~~~~~~~~~
486 See #type_let#
487
488 Note [Empty case alternatives]
489 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
490 The alternatives of a case expression should be exhaustive. But
491 this exhaustive list can be empty!
492
493 * A case expression can have empty alternatives if (and only if) the
494 scrutinee is bound to raise an exception or diverge. When do we know
495 this? See Note [Bottoming expressions] in CoreUtils.
496
497 * The possiblity of empty alternatives is one reason we need a type on
498 the case expression: if the alternatives are empty we can't get the
499 type from the alternatives!
500
501 * In the case of empty types (see Note [Bottoming expressions]), say
502 data T
503 we do NOT want to replace
504 case (x::T) of Bool {} --> error Bool "Inaccessible case"
505 because x might raise an exception, and *that*'s what we want to see!
506 (Trac #6067 is an example.) To preserve semantics we'd have to say
507 x `seq` error Bool "Inaccessible case"
508 but the 'seq' is just a case, so we are back to square 1. Or I suppose
509 we could say
510 x |> UnsafeCoerce T Bool
511 but that loses all trace of the fact that this originated with an empty
512 set of alternatives.
513
514 * We can use the empty-alternative construct to coerce error values from
515 one type to another. For example
516
517 f :: Int -> Int
518 f n = error "urk"
519
520 g :: Int -> (# Char, Bool #)
521 g x = case f x of { 0 -> ..., n -> ... }
522
523 Then if we inline f in g's RHS we get
524 case (error Int "urk") of (# Char, Bool #) { ... }
525 and we can discard the alternatives since the scrutinee is bottom to give
526 case (error Int "urk") of (# Char, Bool #) {}
527
528 This is nicer than using an unsafe coerce between Int ~ (# Char,Bool #),
529 if for no other reason that we don't need to instantiate the (~) at an
530 unboxed type.
531
532 * We treat a case expression with empty alternatives as trivial iff
533 its scrutinee is (see CoreUtils.exprIsTrivial). This is actually
534 important; see Note [Empty case is trivial] in CoreUtils
535
536 * An empty case is replaced by its scrutinee during the CoreToStg
537 conversion; remember STG is un-typed, so there is no need for
538 the empty case to do the type conversion.
539
540 Note [Join points]
541 ~~~~~~~~~~~~~~~~~~
542 In Core, a *join point* is a specially tagged function whose only occurrences
543 are saturated tail calls. A tail call can appear in these places:
544
545 1. In the branches (not the scrutinee) of a case
546 2. Underneath a let (value or join point)
547 3. Inside another join point
548
549 We write a join-point declaration as
550 join j @a @b x y = e1 in e2,
551 like a let binding but with "join" instead (or "join rec" for "let rec"). Note
552 that we put the parameters before the = rather than using lambdas; this is
553 because it's relevant how many parameters the join point takes *as a join
554 point.* This number is called the *join arity,* distinct from arity because it
555 counts types as well as values. Note that a join point may return a lambda! So
556 join j x = x + 1
557 is different from
558 join j = \x -> x + 1
559 The former has join arity 1, while the latter has join arity 0.
560
561 The identifier for a join point is called a join id or a *label.* An invocation
562 is called a *jump.* We write a jump using the jump keyword:
563
564 jump j 3
565
566 The words *label* and *jump* are evocative of assembly code (or Cmm) for a
567 reason: join points are indeed compiled as labeled blocks, and jumps become
568 actual jumps (plus argument passing and stack adjustment). There is no closure
569 allocated and only a fraction of the function-call overhead. Hence we would
570 like as many functions as possible to become join points (see OccurAnal) and
571 the type rules for join points ensure we preserve the properties that make them
572 efficient.
573
574 In the actual AST, a join point is indicated by the IdDetails of the binder: a
575 local value binding gets 'VanillaId' but a join point gets a 'JoinId' with its
576 join arity.
577
578 For more details, see the paper:
579
580 Luke Maurer, Paul Downen, Zena Ariola, and Simon Peyton Jones. "Compiling
581 without continuations." Submitted to PLDI'17.
582
583 https://www.microsoft.com/en-us/research/publication/compiling-without-continuations/
584
585 Note [Invariants on join points]
586 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
587 Join points must follow these invariants:
588
589 1. All occurrences must be tail calls. Each of these tail calls must pass the
590 same number of arguments, counting both types and values; we call this the
591 "join arity" (to distinguish from regular arity, which only counts values).
592
593 2. For join arity n, the right-hand side must begin with at least n lambdas.
594 No ticks, no casts, just lambdas! C.f. CoreUtils.joinRhsArity.
595
596 2a. Moreover, this same constraint applies to any unfolding of the binder.
597 Reason: if we want to push a continuation into the RHS we must push it
598 into the unfolding as well.
599
600 3. If the binding is recursive, then all other bindings in the recursive group
601 must also be join points.
602
603 4. The binding's type must not be polymorphic in its return type (as defined
604 in Note [The polymorphism rule of join points]).
605
606 However, join points have simpler invariants in other ways
607
608 5. A join point can have an unboxed type without the RHS being
609 ok-for-speculation (i.e. drop the let/app invariant)
610 e.g. let j :: Int# = factorial x in ...
611
612 6. A join point can have a levity-polymorphic RHS
613 e.g. let j :: r :: TYPE l = fail void# in ...
614 This happened in an intermediate program Trac #13394
615
616 Examples:
617
618 join j1 x = 1 + x in jump j (jump j x) -- Fails 1: non-tail call
619 join j1' x = 1 + x in if even a
620 then jump j1 a
621 else jump j1 a b -- Fails 1: inconsistent calls
622 join j2 x = flip (+) x in j2 1 2 -- Fails 2: not enough lambdas
623 join j2' x = \y -> x + y in j3 1 -- Passes: extra lams ok
624 join j @a (x :: a) = x -- Fails 4: polymorphic in ret type
625
626 Invariant 1 applies to left-hand sides of rewrite rules, so a rule for a join
627 point must have an exact call as its LHS.
628
629 Strictly speaking, invariant 3 is redundant, since a call from inside a lazy
630 binding isn't a tail call. Since a let-bound value can't invoke a free join
631 point, then, they can't be mutually recursive. (A Core binding group *can*
632 include spurious extra bindings if the occurrence analyser hasn't run, so
633 invariant 3 does still need to be checked.) For the rigorous definition of
634 "tail call", see Section 3 of the paper (Note [Join points]).
635
636 Invariant 4 is subtle; see Note [The polymorphism rule of join points].
637
638 Core Lint will check these invariants, anticipating that any binder whose
639 OccInfo is marked AlwaysTailCalled will become a join point as soon as the
640 simplifier (or simpleOptPgm) runs.
641
642 Note [The type of a join point]
643 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
644 A join point has the same type it would have as a function. That is, if it takes
645 an Int and a Bool and its body produces a String, its type is `Int -> Bool ->
646 String`. Natural as this may seem, it can be awkward. A join point shouldn't be
647 thought to "return" in the same sense a function does---a jump is one-way. This
648 is crucial for understanding how case-of-case interacts with join points:
649
650 case (join
651 j :: Int -> Bool -> String
652 j x y = ...
653 in
654 jump j z w) of
655 "" -> True
656 _ -> False
657
658 The simplifier will pull the case into the join point (see Note [Case-of-case
659 and join points] in Simplify):
660
661 join
662 j :: Int -> Bool -> Bool -- changed!
663 j x y = case ... of "" -> True
664 _ -> False
665 in
666 jump j z w
667
668 The body of the join point now returns a Bool, so the label `j` has to have its
669 type updated accordingly. Inconvenient though this may be, it has the advantage
670 that 'CoreUtils.exprType' can still return a type for any expression, including
671 a jump.
672
673 This differs from the paper (see Note [Invariants on join points]). In the
674 paper, we instead give j the type `Int -> Bool -> forall a. a`. Then each jump
675 carries the "return type" as a parameter, exactly the way other non-returning
676 functions like `error` work:
677
678 case (join
679 j :: Int -> Bool -> forall a. a
680 j x y = ...
681 in
682 jump j z w @String) of
683 "" -> True
684 _ -> False
685
686 Now we can move the case inward and we only have to change the jump:
687
688 join
689 j :: Int -> Bool -> forall a. a
690 j x y = case ... of "" -> True
691 _ -> False
692 in
693 jump j z w @Bool
694
695 (Core Lint would still check that the body of the join point has the right type;
696 that type would simply not be reflected in the join id.)
697
698 Note [The polymorphism rule of join points]
699 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
700 Invariant 4 of Note [Invariants on join points] forbids a join point to be
701 polymorphic in its return type. That is, if its type is
702
703 forall a1 ... ak. t1 -> ... -> tn -> r
704
705 where its join arity is k+n, none of the type parameters ai may occur free in r.
706 The most direct explanation is that given
707
708 join j @a1 ... @ak x1 ... xn = e1 in e2
709
710 our typing rules require `e1` and `e2` to have the same type. Therefore the type
711 of `e1`---the return type of the join point---must be the same as the type of
712 e2. Since the type variables aren't bound in `e2`, its type can't include them,
713 and thus neither can the type of `e1`.
714
715 There's a deeper explanation in terms of the sequent calculus in Section 5.3 of
716 a previous paper:
717
718 Paul Downen, Luke Maurer, Zena Ariola, and Simon Peyton Jones. "Sequent
719 calculus as a compiler intermediate language." ICFP'16.
720
721 https://www.microsoft.com/en-us/research/wp-content/uploads/2016/04/sequent-calculus-icfp16.pdf
722
723 The quick version: Consider the CPS term (the paper uses the sequent calculus,
724 but we can translate readily):
725
726 \k -> join j @a1 ... @ak x1 ... xn = e1 k in e2 k
727
728 Since `j` is a join point, it doesn't bind a continuation variable but reuses
729 the variable `k` from the context. But the parameters `ai` are not in `k`'s
730 scope, and `k`'s type determines the return type of `j`; thus the `ai`s don't
731 appear in the return type of `j`. (Also, since `e1` and `e2` are passed the same
732 continuation, they must have the same type; hence the direct explanation above.)
733
734 ************************************************************************
735 * *
736 In/Out type synonyms
737 * *
738 ********************************************************************* -}
739
740 {- Many passes apply a substitution, and it's very handy to have type
741 synonyms to remind us whether or not the substitution has been applied -}
742
743 -- Pre-cloning or substitution
744 type InBndr = CoreBndr
745 type InType = Type
746 type InKind = Kind
747 type InBind = CoreBind
748 type InExpr = CoreExpr
749 type InAlt = CoreAlt
750 type InArg = CoreArg
751 type InCoercion = Coercion
752
753 -- Post-cloning or substitution
754 type OutBndr = CoreBndr
755 type OutType = Type
756 type OutKind = Kind
757 type OutCoercion = Coercion
758 type OutBind = CoreBind
759 type OutExpr = CoreExpr
760 type OutAlt = CoreAlt
761 type OutArg = CoreArg
762
763
764 {- *********************************************************************
765 * *
766 Ticks
767 * *
768 ************************************************************************
769 -}
770
771 -- | Allows attaching extra information to points in expressions
772
773 -- If you edit this type, you may need to update the GHC formalism
774 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
775 data Tickish id =
776 -- | An @{-# SCC #-}@ profiling annotation, either automatically
777 -- added by the desugarer as a result of -auto-all, or added by
778 -- the user.
779 ProfNote {
780 profNoteCC :: CostCentre, -- ^ the cost centre
781 profNoteCount :: !Bool, -- ^ bump the entry count?
782 profNoteScope :: !Bool -- ^ scopes over the enclosed expression
783 -- (i.e. not just a tick)
784 }
785
786 -- | A "tick" used by HPC to track the execution of each
787 -- subexpression in the original source code.
788 | HpcTick {
789 tickModule :: Module,
790 tickId :: !Int
791 }
792
793 -- | A breakpoint for the GHCi debugger. This behaves like an HPC
794 -- tick, but has a list of free variables which will be available
795 -- for inspection in GHCi when the program stops at the breakpoint.
796 --
797 -- NB. we must take account of these Ids when (a) counting free variables,
798 -- and (b) substituting (don't substitute for them)
799 | Breakpoint
800 { breakpointId :: !Int
801 , breakpointFVs :: [id] -- ^ the order of this list is important:
802 -- it matches the order of the lists in the
803 -- appropriate entry in HscTypes.ModBreaks.
804 --
805 -- Careful about substitution! See
806 -- Note [substTickish] in CoreSubst.
807 }
808
809 -- | A source note.
810 --
811 -- Source notes are pure annotations: Their presence should neither
812 -- influence compilation nor execution. The semantics are given by
813 -- causality: The presence of a source note means that a local
814 -- change in the referenced source code span will possibly provoke
815 -- the generated code to change. On the flip-side, the functionality
816 -- of annotated code *must* be invariant against changes to all
817 -- source code *except* the spans referenced in the source notes
818 -- (see "Causality of optimized Haskell" paper for details).
819 --
820 -- Therefore extending the scope of any given source note is always
821 -- valid. Note that it is still undesirable though, as this reduces
822 -- their usefulness for debugging and profiling. Therefore we will
823 -- generally try only to make use of this property where it is
824 -- necessary to enable optimizations.
825 | SourceNote
826 { sourceSpan :: RealSrcSpan -- ^ Source covered
827 , sourceName :: String -- ^ Name for source location
828 -- (uses same names as CCs)
829 }
830
831 deriving (Eq, Ord, Data)
832
833 -- | A "counting tick" (where tickishCounts is True) is one that
834 -- counts evaluations in some way. We cannot discard a counting tick,
835 -- and the compiler should preserve the number of counting ticks as
836 -- far as possible.
837 --
838 -- However, we still allow the simplifier to increase or decrease
839 -- sharing, so in practice the actual number of ticks may vary, except
840 -- that we never change the value from zero to non-zero or vice versa.
841 tickishCounts :: Tickish id -> Bool
842 tickishCounts n@ProfNote{} = profNoteCount n
843 tickishCounts HpcTick{} = True
844 tickishCounts Breakpoint{} = True
845 tickishCounts _ = False
846
847
848 -- | Specifies the scoping behaviour of ticks. This governs the
849 -- behaviour of ticks that care about the covered code and the cost
850 -- associated with it. Important for ticks relating to profiling.
851 data TickishScoping =
852 -- | No scoping: The tick does not care about what code it
853 -- covers. Transformations can freely move code inside as well as
854 -- outside without any additional annotation obligations
855 NoScope
856
857 -- | Soft scoping: We want all code that is covered to stay
858 -- covered. Note that this scope type does not forbid
859 -- transformations from happening, as as long as all results of
860 -- the transformations are still covered by this tick or a copy of
861 -- it. For example
862 --
863 -- let x = tick<...> (let y = foo in bar) in baz
864 -- ===>
865 -- let x = tick<...> bar; y = tick<...> foo in baz
866 --
867 -- Is a valid transformation as far as "bar" and "foo" is
868 -- concerned, because both still are scoped over by the tick.
869 --
870 -- Note though that one might object to the "let" not being
871 -- covered by the tick any more. However, we are generally lax
872 -- with this - constant costs don't matter too much, and given
873 -- that the "let" was effectively merged we can view it as having
874 -- lost its identity anyway.
875 --
876 -- Also note that this scoping behaviour allows floating a tick
877 -- "upwards" in pretty much any situation. For example:
878 --
879 -- case foo of x -> tick<...> bar
880 -- ==>
881 -- tick<...> case foo of x -> bar
882 --
883 -- While this is always leagl, we want to make a best effort to
884 -- only make us of this where it exposes transformation
885 -- opportunities.
886 | SoftScope
887
888 -- | Cost centre scoping: We don't want any costs to move to other
889 -- cost-centre stacks. This means we not only want no code or cost
890 -- to get moved out of their cost centres, but we also object to
891 -- code getting associated with new cost-centre ticks - or
892 -- changing the order in which they get applied.
893 --
894 -- A rule of thumb is that we don't want any code to gain new
895 -- annotations. However, there are notable exceptions, for
896 -- example:
897 --
898 -- let f = \y -> foo in tick<...> ... (f x) ...
899 -- ==>
900 -- tick<...> ... foo[x/y] ...
901 --
902 -- In-lining lambdas like this is always legal, because inlining a
903 -- function does not change the cost-centre stack when the
904 -- function is called.
905 | CostCentreScope
906
907 deriving (Eq)
908
909 -- | Returns the intended scoping rule for a Tickish
910 tickishScoped :: Tickish id -> TickishScoping
911 tickishScoped n@ProfNote{}
912 | profNoteScope n = CostCentreScope
913 | otherwise = NoScope
914 tickishScoped HpcTick{} = NoScope
915 tickishScoped Breakpoint{} = CostCentreScope
916 -- Breakpoints are scoped: eventually we're going to do call
917 -- stacks, but also this helps prevent the simplifier from moving
918 -- breakpoints around and changing their result type (see #1531).
919 tickishScoped SourceNote{} = SoftScope
920
921 -- | Returns whether the tick scoping rule is at least as permissive
922 -- as the given scoping rule.
923 tickishScopesLike :: Tickish id -> TickishScoping -> Bool
924 tickishScopesLike t scope = tickishScoped t `like` scope
925 where NoScope `like` _ = True
926 _ `like` NoScope = False
927 SoftScope `like` _ = True
928 _ `like` SoftScope = False
929 CostCentreScope `like` _ = True
930
931 -- | Returns @True@ for ticks that can be floated upwards easily even
932 -- where it might change execution counts, such as:
933 --
934 -- Just (tick<...> foo)
935 -- ==>
936 -- tick<...> (Just foo)
937 --
938 -- This is a combination of @tickishSoftScope@ and
939 -- @tickishCounts@. Note that in principle splittable ticks can become
940 -- floatable using @mkNoTick@ -- even though there's currently no
941 -- tickish for which that is the case.
942 tickishFloatable :: Tickish id -> Bool
943 tickishFloatable t = t `tickishScopesLike` SoftScope && not (tickishCounts t)
944
945 -- | Returns @True@ for a tick that is both counting /and/ scoping and
946 -- can be split into its (tick, scope) parts using 'mkNoScope' and
947 -- 'mkNoTick' respectively.
948 tickishCanSplit :: Tickish id -> Bool
949 tickishCanSplit ProfNote{profNoteScope = True, profNoteCount = True}
950 = True
951 tickishCanSplit _ = False
952
953 mkNoCount :: Tickish id -> Tickish id
954 mkNoCount n | not (tickishCounts n) = n
955 | not (tickishCanSplit n) = panic "mkNoCount: Cannot split!"
956 mkNoCount n@ProfNote{} = n {profNoteCount = False}
957 mkNoCount _ = panic "mkNoCount: Undefined split!"
958
959 mkNoScope :: Tickish id -> Tickish id
960 mkNoScope n | tickishScoped n == NoScope = n
961 | not (tickishCanSplit n) = panic "mkNoScope: Cannot split!"
962 mkNoScope n@ProfNote{} = n {profNoteScope = False}
963 mkNoScope _ = panic "mkNoScope: Undefined split!"
964
965 -- | Return @True@ if this source annotation compiles to some backend
966 -- code. Without this flag, the tickish is seen as a simple annotation
967 -- that does not have any associated evaluation code.
968 --
969 -- What this means that we are allowed to disregard the tick if doing
970 -- so means that we can skip generating any code in the first place. A
971 -- typical example is top-level bindings:
972 --
973 -- foo = tick<...> \y -> ...
974 -- ==>
975 -- foo = \y -> tick<...> ...
976 --
977 -- Here there is just no operational difference between the first and
978 -- the second version. Therefore code generation should simply
979 -- translate the code as if it found the latter.
980 tickishIsCode :: Tickish id -> Bool
981 tickishIsCode SourceNote{} = False
982 tickishIsCode _tickish = True -- all the rest for now
983
984
985 -- | Governs the kind of expression that the tick gets placed on when
986 -- annotating for example using @mkTick@. If we find that we want to
987 -- put a tickish on an expression ruled out here, we try to float it
988 -- inwards until we find a suitable expression.
989 data TickishPlacement =
990
991 -- | Place ticks exactly on run-time expressions. We can still
992 -- move the tick through pure compile-time constructs such as
993 -- other ticks, casts or type lambdas. This is the most
994 -- restrictive placement rule for ticks, as all tickishs have in
995 -- common that they want to track runtime processes. The only
996 -- legal placement rule for counting ticks.
997 PlaceRuntime
998
999 -- | As @PlaceRuntime@, but we float the tick through all
1000 -- lambdas. This makes sense where there is little difference
1001 -- between annotating the lambda and annotating the lambda's code.
1002 | PlaceNonLam
1003
1004 -- | In addition to floating through lambdas, cost-centre style
1005 -- tickishs can also be moved from constructors, non-function
1006 -- variables and literals. For example:
1007 --
1008 -- let x = scc<...> C (scc<...> y) (scc<...> 3) in ...
1009 --
1010 -- Neither the constructor application, the variable or the
1011 -- literal are likely to have any cost worth mentioning. And even
1012 -- if y names a thunk, the call would not care about the
1013 -- evaluation context. Therefore removing all annotations in the
1014 -- above example is safe.
1015 | PlaceCostCentre
1016
1017 deriving (Eq)
1018
1019 -- | Placement behaviour we want for the ticks
1020 tickishPlace :: Tickish id -> TickishPlacement
1021 tickishPlace n@ProfNote{}
1022 | profNoteCount n = PlaceRuntime
1023 | otherwise = PlaceCostCentre
1024 tickishPlace HpcTick{} = PlaceRuntime
1025 tickishPlace Breakpoint{} = PlaceRuntime
1026 tickishPlace SourceNote{} = PlaceNonLam
1027
1028 -- | Returns whether one tick "contains" the other one, therefore
1029 -- making the second tick redundant.
1030 tickishContains :: Eq b => Tickish b -> Tickish b -> Bool
1031 tickishContains (SourceNote sp1 n1) (SourceNote sp2 n2)
1032 = containsSpan sp1 sp2 && n1 == n2
1033 -- compare the String last
1034 tickishContains t1 t2
1035 = t1 == t2
1036
1037 {-
1038 ************************************************************************
1039 * *
1040 Orphans
1041 * *
1042 ************************************************************************
1043 -}
1044
1045 -- | Is this instance an orphan? If it is not an orphan, contains an 'OccName'
1046 -- witnessing the instance's non-orphanhood.
1047 -- See Note [Orphans]
1048 data IsOrphan
1049 = IsOrphan
1050 | NotOrphan OccName -- The OccName 'n' witnesses the instance's non-orphanhood
1051 -- In that case, the instance is fingerprinted as part
1052 -- of the definition of 'n's definition
1053 deriving Data
1054
1055 -- | Returns true if 'IsOrphan' is orphan.
1056 isOrphan :: IsOrphan -> Bool
1057 isOrphan IsOrphan = True
1058 isOrphan _ = False
1059
1060 -- | Returns true if 'IsOrphan' is not an orphan.
1061 notOrphan :: IsOrphan -> Bool
1062 notOrphan NotOrphan{} = True
1063 notOrphan _ = False
1064
1065 chooseOrphanAnchor :: NameSet -> IsOrphan
1066 -- Something (rule, instance) is relate to all the Names in this
1067 -- list. Choose one of them to be an "anchor" for the orphan. We make
1068 -- the choice deterministic to avoid gratuitious changes in the ABI
1069 -- hash (Trac #4012). Specifically, use lexicographic comparison of
1070 -- OccName rather than comparing Uniques
1071 --
1072 -- NB: 'minimum' use Ord, and (Ord OccName) works lexicographically
1073 --
1074 chooseOrphanAnchor local_names
1075 | isEmptyNameSet local_names = IsOrphan
1076 | otherwise = NotOrphan (minimum occs)
1077 where
1078 occs = map nameOccName $ nonDetEltsUniqSet local_names
1079 -- It's OK to use nonDetEltsUFM here, see comments above
1080
1081 instance Binary IsOrphan where
1082 put_ bh IsOrphan = putByte bh 0
1083 put_ bh (NotOrphan n) = do
1084 putByte bh 1
1085 put_ bh n
1086 get bh = do
1087 h <- getByte bh
1088 case h of
1089 0 -> return IsOrphan
1090 _ -> do
1091 n <- get bh
1092 return $ NotOrphan n
1093
1094 {-
1095 Note [Orphans]
1096 ~~~~~~~~~~~~~~
1097 Class instances, rules, and family instances are divided into orphans
1098 and non-orphans. Roughly speaking, an instance/rule is an orphan if
1099 its left hand side mentions nothing defined in this module. Orphan-hood
1100 has two major consequences
1101
1102 * A module that contains orphans is called an "orphan module". If
1103 the module being compiled depends (transitively) on an oprhan
1104 module M, then M.hi is read in regardless of whether M is oherwise
1105 needed. This is to ensure that we don't miss any instance decls in
1106 M. But it's painful, because it means we need to keep track of all
1107 the orphan modules below us.
1108
1109 * A non-orphan is not finger-printed separately. Instead, for
1110 fingerprinting purposes it is treated as part of the entity it
1111 mentions on the LHS. For example
1112 data T = T1 | T2
1113 instance Eq T where ....
1114 The instance (Eq T) is incorprated as part of T's fingerprint.
1115
1116 In contrast, orphans are all fingerprinted together in the
1117 mi_orph_hash field of the ModIface.
1118
1119 See MkIface.addFingerprints.
1120
1121 Orphan-hood is computed
1122 * For class instances:
1123 when we make a ClsInst
1124 (because it is needed during instance lookup)
1125
1126 * For rules and family instances:
1127 when we generate an IfaceRule (MkIface.coreRuleToIfaceRule)
1128 or IfaceFamInst (MkIface.instanceToIfaceInst)
1129 -}
1130
1131 {-
1132 ************************************************************************
1133 * *
1134 \subsection{Transformation rules}
1135 * *
1136 ************************************************************************
1137
1138 The CoreRule type and its friends are dealt with mainly in CoreRules,
1139 but CoreFVs, Subst, PprCore, CoreTidy also inspect the representation.
1140 -}
1141
1142 -- | Gathers a collection of 'CoreRule's. Maps (the name of) an 'Id' to its rules
1143 type RuleBase = NameEnv [CoreRule]
1144 -- The rules are unordered;
1145 -- we sort out any overlaps on lookup
1146
1147 -- | A full rule environment which we can apply rules from. Like a 'RuleBase',
1148 -- but it also includes the set of visible orphans we use to filter out orphan
1149 -- rules which are not visible (even though we can see them...)
1150 data RuleEnv
1151 = RuleEnv { re_base :: RuleBase
1152 , re_visible_orphs :: ModuleSet
1153 }
1154
1155 mkRuleEnv :: RuleBase -> [Module] -> RuleEnv
1156 mkRuleEnv rules vis_orphs = RuleEnv rules (mkModuleSet vis_orphs)
1157
1158 emptyRuleEnv :: RuleEnv
1159 emptyRuleEnv = RuleEnv emptyNameEnv emptyModuleSet
1160
1161 -- | A 'CoreRule' is:
1162 --
1163 -- * \"Local\" if the function it is a rule for is defined in the
1164 -- same module as the rule itself.
1165 --
1166 -- * \"Orphan\" if nothing on the LHS is defined in the same module
1167 -- as the rule itself
1168 data CoreRule
1169 = Rule {
1170 ru_name :: RuleName, -- ^ Name of the rule, for communication with the user
1171 ru_act :: Activation, -- ^ When the rule is active
1172
1173 -- Rough-matching stuff
1174 -- see comments with InstEnv.ClsInst( is_cls, is_rough )
1175 ru_fn :: Name, -- ^ Name of the 'Id.Id' at the head of this rule
1176 ru_rough :: [Maybe Name], -- ^ Name at the head of each argument to the left hand side
1177
1178 -- Proper-matching stuff
1179 -- see comments with InstEnv.ClsInst( is_tvs, is_tys )
1180 ru_bndrs :: [CoreBndr], -- ^ Variables quantified over
1181 ru_args :: [CoreExpr], -- ^ Left hand side arguments
1182
1183 -- And the right-hand side
1184 ru_rhs :: CoreExpr, -- ^ Right hand side of the rule
1185 -- Occurrence info is guaranteed correct
1186 -- See Note [OccInfo in unfoldings and rules]
1187
1188 -- Locality
1189 ru_auto :: Bool, -- ^ @True@ <=> this rule is auto-generated
1190 -- (notably by Specialise or SpecConstr)
1191 -- @False@ <=> generated at the user's behest
1192 -- See Note [Trimming auto-rules] in TidyPgm
1193 -- for the sole purpose of this field.
1194
1195 ru_origin :: !Module, -- ^ 'Module' the rule was defined in, used
1196 -- to test if we should see an orphan rule.
1197
1198 ru_orphan :: !IsOrphan, -- ^ Whether or not the rule is an orphan.
1199
1200 ru_local :: Bool -- ^ @True@ iff the fn at the head of the rule is
1201 -- defined in the same module as the rule
1202 -- and is not an implicit 'Id' (like a record selector,
1203 -- class operation, or data constructor). This
1204 -- is different from 'ru_orphan', where a rule
1205 -- can avoid being an orphan if *any* Name in
1206 -- LHS of the rule was defined in the same
1207 -- module as the rule.
1208 }
1209
1210 -- | Built-in rules are used for constant folding
1211 -- and suchlike. They have no free variables.
1212 -- A built-in rule is always visible (there is no such thing as
1213 -- an orphan built-in rule.)
1214 | BuiltinRule {
1215 ru_name :: RuleName, -- ^ As above
1216 ru_fn :: Name, -- ^ As above
1217 ru_nargs :: Int, -- ^ Number of arguments that 'ru_try' consumes,
1218 -- if it fires, including type arguments
1219 ru_try :: RuleFun
1220 -- ^ This function does the rewrite. It given too many
1221 -- arguments, it simply discards them; the returned 'CoreExpr'
1222 -- is just the rewrite of 'ru_fn' applied to the first 'ru_nargs' args
1223 }
1224 -- See Note [Extra args in rule matching] in Rules.hs
1225
1226 type RuleFun = DynFlags -> InScopeEnv -> Id -> [CoreExpr] -> Maybe CoreExpr
1227 type InScopeEnv = (InScopeSet, IdUnfoldingFun)
1228
1229 type IdUnfoldingFun = Id -> Unfolding
1230 -- A function that embodies how to unfold an Id if you need
1231 -- to do that in the Rule. The reason we need to pass this info in
1232 -- is that whether an Id is unfoldable depends on the simplifier phase
1233
1234 isBuiltinRule :: CoreRule -> Bool
1235 isBuiltinRule (BuiltinRule {}) = True
1236 isBuiltinRule _ = False
1237
1238 isAutoRule :: CoreRule -> Bool
1239 isAutoRule (BuiltinRule {}) = False
1240 isAutoRule (Rule { ru_auto = is_auto }) = is_auto
1241
1242 -- | The number of arguments the 'ru_fn' must be applied
1243 -- to before the rule can match on it
1244 ruleArity :: CoreRule -> Int
1245 ruleArity (BuiltinRule {ru_nargs = n}) = n
1246 ruleArity (Rule {ru_args = args}) = length args
1247
1248 ruleName :: CoreRule -> RuleName
1249 ruleName = ru_name
1250
1251 ruleModule :: CoreRule -> Maybe Module
1252 ruleModule Rule { ru_origin } = Just ru_origin
1253 ruleModule BuiltinRule {} = Nothing
1254
1255 ruleActivation :: CoreRule -> Activation
1256 ruleActivation (BuiltinRule { }) = AlwaysActive
1257 ruleActivation (Rule { ru_act = act }) = act
1258
1259 -- | The 'Name' of the 'Id.Id' at the head of the rule left hand side
1260 ruleIdName :: CoreRule -> Name
1261 ruleIdName = ru_fn
1262
1263 isLocalRule :: CoreRule -> Bool
1264 isLocalRule = ru_local
1265
1266 -- | Set the 'Name' of the 'Id.Id' at the head of the rule left hand side
1267 setRuleIdName :: Name -> CoreRule -> CoreRule
1268 setRuleIdName nm ru = ru { ru_fn = nm }
1269
1270 {-
1271 ************************************************************************
1272 * *
1273 \subsection{Vectorisation declarations}
1274 * *
1275 ************************************************************************
1276
1277 Representation of desugared vectorisation declarations that are fed to the vectoriser (via
1278 'ModGuts').
1279 -}
1280
1281 data CoreVect = Vect Id CoreExpr
1282 | NoVect Id
1283 | VectType Bool TyCon (Maybe TyCon)
1284 | VectClass TyCon -- class tycon
1285 | VectInst Id -- instance dfun (always SCALAR) !!!FIXME: should be superfluous now
1286
1287 {-
1288 ************************************************************************
1289 * *
1290 Unfoldings
1291 * *
1292 ************************************************************************
1293
1294 The @Unfolding@ type is declared here to avoid numerous loops
1295 -}
1296
1297 -- | Records the /unfolding/ of an identifier, which is approximately the form the
1298 -- identifier would have if we substituted its definition in for the identifier.
1299 -- This type should be treated as abstract everywhere except in "CoreUnfold"
1300 data Unfolding
1301 = NoUnfolding -- ^ We have no information about the unfolding.
1302
1303 | BootUnfolding -- ^ We have no information about the unfolding, because
1304 -- this 'Id' came from an @hi-boot@ file.
1305 -- See Note [Inlining and hs-boot files] in ToIface
1306 -- for what this is used for.
1307
1308 | OtherCon [AltCon] -- ^ It ain't one of these constructors.
1309 -- @OtherCon xs@ also indicates that something has been evaluated
1310 -- and hence there's no point in re-evaluating it.
1311 -- @OtherCon []@ is used even for non-data-type values
1312 -- to indicated evaluated-ness. Notably:
1313 --
1314 -- > data C = C !(Int -> Int)
1315 -- > case x of { C f -> ... }
1316 --
1317 -- Here, @f@ gets an @OtherCon []@ unfolding.
1318
1319 | DFunUnfolding { -- The Unfolding of a DFunId
1320 -- See Note [DFun unfoldings]
1321 -- df = /\a1..am. \d1..dn. MkD t1 .. tk
1322 -- (op1 a1..am d1..dn)
1323 -- (op2 a1..am d1..dn)
1324 df_bndrs :: [Var], -- The bound variables [a1..m],[d1..dn]
1325 df_con :: DataCon, -- The dictionary data constructor (never a newtype datacon)
1326 df_args :: [CoreExpr] -- Args of the data con: types, superclasses and methods,
1327 } -- in positional order
1328
1329 | CoreUnfolding { -- An unfolding for an Id with no pragma,
1330 -- or perhaps a NOINLINE pragma
1331 -- (For NOINLINE, the phase, if any, is in the
1332 -- InlinePragInfo for this Id.)
1333 uf_tmpl :: CoreExpr, -- Template; occurrence info is correct
1334 uf_src :: UnfoldingSource, -- Where the unfolding came from
1335 uf_is_top :: Bool, -- True <=> top level binding
1336 uf_is_value :: Bool, -- exprIsHNF template (cached); it is ok to discard
1337 -- a `seq` on this variable
1338 uf_is_conlike :: Bool, -- True <=> applicn of constructor or CONLIKE function
1339 -- Cached version of exprIsConLike
1340 uf_is_work_free :: Bool, -- True <=> doesn't waste (much) work to expand
1341 -- inside an inlining
1342 -- Cached version of exprIsCheap
1343 uf_expandable :: Bool, -- True <=> can expand in RULE matching
1344 -- Cached version of exprIsExpandable
1345 uf_guidance :: UnfoldingGuidance -- Tells about the *size* of the template.
1346 }
1347 -- ^ An unfolding with redundant cached information. Parameters:
1348 --
1349 -- uf_tmpl: Template used to perform unfolding;
1350 -- NB: Occurrence info is guaranteed correct:
1351 -- see Note [OccInfo in unfoldings and rules]
1352 --
1353 -- uf_is_top: Is this a top level binding?
1354 --
1355 -- uf_is_value: 'exprIsHNF' template (cached); it is ok to discard a 'seq' on
1356 -- this variable
1357 --
1358 -- uf_is_work_free: Does this waste only a little work if we expand it inside an inlining?
1359 -- Basically this is a cached version of 'exprIsWorkFree'
1360 --
1361 -- uf_guidance: Tells us about the /size/ of the unfolding template
1362
1363
1364 ------------------------------------------------
1365 data UnfoldingSource
1366 = -- See also Note [Historical note: unfoldings for wrappers]
1367
1368 InlineRhs -- The current rhs of the function
1369 -- Replace uf_tmpl each time around
1370
1371 | InlineStable -- From an INLINE or INLINABLE pragma
1372 -- INLINE if guidance is UnfWhen
1373 -- INLINABLE if guidance is UnfIfGoodArgs/UnfoldNever
1374 -- (well, technically an INLINABLE might be made
1375 -- UnfWhen if it was small enough, and then
1376 -- it will behave like INLINE outside the current
1377 -- module, but that is the way automatic unfoldings
1378 -- work so it is consistent with the intended
1379 -- meaning of INLINABLE).
1380 --
1381 -- uf_tmpl may change, but only as a result of
1382 -- gentle simplification, it doesn't get updated
1383 -- to the current RHS during compilation as with
1384 -- InlineRhs.
1385 --
1386 -- See Note [InlineStable]
1387
1388 | InlineCompulsory -- Something that *has* no binding, so you *must* inline it
1389 -- Only a few primop-like things have this property
1390 -- (see MkId.hs, calls to mkCompulsoryUnfolding).
1391 -- Inline absolutely always, however boring the context.
1392
1393
1394
1395 -- | 'UnfoldingGuidance' says when unfolding should take place
1396 data UnfoldingGuidance
1397 = UnfWhen { -- Inline without thinking about the *size* of the uf_tmpl
1398 -- Used (a) for small *and* cheap unfoldings
1399 -- (b) for INLINE functions
1400 -- See Note [INLINE for small functions] in CoreUnfold
1401 ug_arity :: Arity, -- Number of value arguments expected
1402
1403 ug_unsat_ok :: Bool, -- True <=> ok to inline even if unsaturated
1404 ug_boring_ok :: Bool -- True <=> ok to inline even if the context is boring
1405 -- So True,True means "always"
1406 }
1407
1408 | UnfIfGoodArgs { -- Arose from a normal Id; the info here is the
1409 -- result of a simple analysis of the RHS
1410
1411 ug_args :: [Int], -- Discount if the argument is evaluated.
1412 -- (i.e., a simplification will definitely
1413 -- be possible). One elt of the list per *value* arg.
1414
1415 ug_size :: Int, -- The "size" of the unfolding.
1416
1417 ug_res :: Int -- Scrutinee discount: the discount to substract if the thing is in
1418 } -- a context (case (thing args) of ...),
1419 -- (where there are the right number of arguments.)
1420
1421 | UnfNever -- The RHS is big, so don't inline it
1422 deriving (Eq)
1423
1424 {-
1425 Note [Historical note: unfoldings for wrappers]
1426 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1427 We used to have a nice clever scheme in interface files for
1428 wrappers. A wrapper's unfolding can be reconstructed from its worker's
1429 id and its strictness. This decreased .hi file size (sometimes
1430 significantly, for modules like GHC.Classes with many high-arity w/w
1431 splits) and had a slight corresponding effect on compile times.
1432
1433 However, when we added the second demand analysis, this scheme lead to
1434 some Core lint errors. The second analysis could change the strictness
1435 signatures, which sometimes resulted in a wrapper's regenerated
1436 unfolding applying the wrapper to too many arguments.
1437
1438 Instead of repairing the clever .hi scheme, we abandoned it in favor
1439 of simplicity. The .hi sizes are usually insignificant (excluding the
1440 +1M for base libraries), and compile time barely increases (~+1% for
1441 nofib). The nicer upshot is that the UnfoldingSource no longer mentions
1442 an Id, so, eg, substitutions need not traverse them.
1443
1444
1445 Note [DFun unfoldings]
1446 ~~~~~~~~~~~~~~~~~~~~~~
1447 The Arity in a DFunUnfolding is total number of args (type and value)
1448 that the DFun needs to produce a dictionary. That's not necessarily
1449 related to the ordinary arity of the dfun Id, esp if the class has
1450 one method, so the dictionary is represented by a newtype. Example
1451
1452 class C a where { op :: a -> Int }
1453 instance C a -> C [a] where op xs = op (head xs)
1454
1455 The instance translates to
1456
1457 $dfCList :: forall a. C a => C [a] -- Arity 2!
1458 $dfCList = /\a.\d. $copList {a} d |> co
1459
1460 $copList :: forall a. C a => [a] -> Int -- Arity 2!
1461 $copList = /\a.\d.\xs. op {a} d (head xs)
1462
1463 Now we might encounter (op (dfCList {ty} d) a1 a2)
1464 and we want the (op (dfList {ty} d)) rule to fire, because $dfCList
1465 has all its arguments, even though its (value) arity is 2. That's
1466 why we record the number of expected arguments in the DFunUnfolding.
1467
1468 Note that although it's an Arity, it's most convenient for it to give
1469 the *total* number of arguments, both type and value. See the use
1470 site in exprIsConApp_maybe.
1471 -}
1472
1473 -- Constants for the UnfWhen constructor
1474 needSaturated, unSaturatedOk :: Bool
1475 needSaturated = False
1476 unSaturatedOk = True
1477
1478 boringCxtNotOk, boringCxtOk :: Bool
1479 boringCxtOk = True
1480 boringCxtNotOk = False
1481
1482 ------------------------------------------------
1483 noUnfolding :: Unfolding
1484 -- ^ There is no known 'Unfolding'
1485 evaldUnfolding :: Unfolding
1486 -- ^ This unfolding marks the associated thing as being evaluated
1487
1488 noUnfolding = NoUnfolding
1489 evaldUnfolding = OtherCon []
1490
1491 -- | There is no known 'Unfolding', because this came from an
1492 -- hi-boot file.
1493 bootUnfolding :: Unfolding
1494 bootUnfolding = BootUnfolding
1495
1496 mkOtherCon :: [AltCon] -> Unfolding
1497 mkOtherCon = OtherCon
1498
1499 isStableSource :: UnfoldingSource -> Bool
1500 -- Keep the unfolding template
1501 isStableSource InlineCompulsory = True
1502 isStableSource InlineStable = True
1503 isStableSource InlineRhs = False
1504
1505 -- | Retrieves the template of an unfolding: panics if none is known
1506 unfoldingTemplate :: Unfolding -> CoreExpr
1507 unfoldingTemplate = uf_tmpl
1508
1509 -- | Retrieves the template of an unfolding if possible
1510 -- maybeUnfoldingTemplate is used mainly wnen specialising, and we do
1511 -- want to specialise DFuns, so it's important to return a template
1512 -- for DFunUnfoldings
1513 maybeUnfoldingTemplate :: Unfolding -> Maybe CoreExpr
1514 maybeUnfoldingTemplate (CoreUnfolding { uf_tmpl = expr })
1515 = Just expr
1516 maybeUnfoldingTemplate (DFunUnfolding { df_bndrs = bndrs, df_con = con, df_args = args })
1517 = Just (mkLams bndrs (mkApps (Var (dataConWorkId con)) args))
1518 maybeUnfoldingTemplate _
1519 = Nothing
1520
1521 -- | The constructors that the unfolding could never be:
1522 -- returns @[]@ if no information is available
1523 otherCons :: Unfolding -> [AltCon]
1524 otherCons (OtherCon cons) = cons
1525 otherCons _ = []
1526
1527 -- | Determines if it is certainly the case that the unfolding will
1528 -- yield a value (something in HNF): returns @False@ if unsure
1529 isValueUnfolding :: Unfolding -> Bool
1530 -- Returns False for OtherCon
1531 isValueUnfolding (CoreUnfolding { uf_is_value = is_evald }) = is_evald
1532 isValueUnfolding _ = False
1533
1534 -- | Determines if it possibly the case that the unfolding will
1535 -- yield a value. Unlike 'isValueUnfolding' it returns @True@
1536 -- for 'OtherCon'
1537 isEvaldUnfolding :: Unfolding -> Bool
1538 -- Returns True for OtherCon
1539 isEvaldUnfolding (OtherCon _) = True
1540 isEvaldUnfolding (CoreUnfolding { uf_is_value = is_evald }) = is_evald
1541 isEvaldUnfolding _ = False
1542
1543 -- | @True@ if the unfolding is a constructor application, the application
1544 -- of a CONLIKE function or 'OtherCon'
1545 isConLikeUnfolding :: Unfolding -> Bool
1546 isConLikeUnfolding (OtherCon _) = True
1547 isConLikeUnfolding (CoreUnfolding { uf_is_conlike = con }) = con
1548 isConLikeUnfolding _ = False
1549
1550 -- | Is the thing we will unfold into certainly cheap?
1551 isCheapUnfolding :: Unfolding -> Bool
1552 isCheapUnfolding (CoreUnfolding { uf_is_work_free = is_wf }) = is_wf
1553 isCheapUnfolding _ = False
1554
1555 isExpandableUnfolding :: Unfolding -> Bool
1556 isExpandableUnfolding (CoreUnfolding { uf_expandable = is_expable }) = is_expable
1557 isExpandableUnfolding _ = False
1558
1559 expandUnfolding_maybe :: Unfolding -> Maybe CoreExpr
1560 -- Expand an expandable unfolding; this is used in rule matching
1561 -- See Note [Expanding variables] in Rules.hs
1562 -- The key point here is that CONLIKE things can be expanded
1563 expandUnfolding_maybe (CoreUnfolding { uf_expandable = True, uf_tmpl = rhs }) = Just rhs
1564 expandUnfolding_maybe _ = Nothing
1565
1566 isCompulsoryUnfolding :: Unfolding -> Bool
1567 isCompulsoryUnfolding (CoreUnfolding { uf_src = InlineCompulsory }) = True
1568 isCompulsoryUnfolding _ = False
1569
1570 isStableUnfolding :: Unfolding -> Bool
1571 -- True of unfoldings that should not be overwritten
1572 -- by a CoreUnfolding for the RHS of a let-binding
1573 isStableUnfolding (CoreUnfolding { uf_src = src }) = isStableSource src
1574 isStableUnfolding (DFunUnfolding {}) = True
1575 isStableUnfolding _ = False
1576
1577 -- | Only returns False if there is no unfolding information available at all
1578 hasSomeUnfolding :: Unfolding -> Bool
1579 hasSomeUnfolding NoUnfolding = False
1580 hasSomeUnfolding BootUnfolding = False
1581 hasSomeUnfolding _ = True
1582
1583 isBootUnfolding :: Unfolding -> Bool
1584 isBootUnfolding BootUnfolding = True
1585 isBootUnfolding _ = False
1586
1587 neverUnfoldGuidance :: UnfoldingGuidance -> Bool
1588 neverUnfoldGuidance UnfNever = True
1589 neverUnfoldGuidance _ = False
1590
1591 isFragileUnfolding :: Unfolding -> Bool
1592 -- An unfolding is fragile if it mentions free variables or
1593 -- is otherwise subject to change. A robust one can be kept.
1594 -- See Note [Fragile unfoldings]
1595 isFragileUnfolding (CoreUnfolding {}) = True
1596 isFragileUnfolding (DFunUnfolding {}) = True
1597 isFragileUnfolding _ = False
1598 -- NoUnfolding, BootUnfolding, OtherCon are all non-fragile
1599
1600 canUnfold :: Unfolding -> Bool
1601 canUnfold (CoreUnfolding { uf_guidance = g }) = not (neverUnfoldGuidance g)
1602 canUnfold _ = False
1603
1604 {- Note [Fragile unfoldings]
1605 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1606 An unfolding is "fragile" if it mentions free variables (and hence would
1607 need substitution) or might be affected by optimisation. The non-fragile
1608 ones are
1609
1610 NoUnfolding, BootUnfolding
1611
1612 OtherCon {} If we know this binder (say a lambda binder) will be
1613 bound to an evaluated thing, we want to retain that
1614 info in simpleOptExpr; see Trac #13077.
1615
1616 We consider even a StableUnfolding as fragile, because it needs substitution.
1617
1618 Note [InlineStable]
1619 ~~~~~~~~~~~~~~~~~
1620 When you say
1621 {-# INLINE f #-}
1622 f x = <rhs>
1623 you intend that calls (f e) are replaced by <rhs>[e/x] So we
1624 should capture (\x.<rhs>) in the Unfolding of 'f', and never meddle
1625 with it. Meanwhile, we can optimise <rhs> to our heart's content,
1626 leaving the original unfolding intact in Unfolding of 'f'. For example
1627 all xs = foldr (&&) True xs
1628 any p = all . map p {-# INLINE any #-}
1629 We optimise any's RHS fully, but leave the InlineRule saying "all . map p",
1630 which deforests well at the call site.
1631
1632 So INLINE pragma gives rise to an InlineRule, which captures the original RHS.
1633
1634 Moreover, it's only used when 'f' is applied to the
1635 specified number of arguments; that is, the number of argument on
1636 the LHS of the '=' sign in the original source definition.
1637 For example, (.) is now defined in the libraries like this
1638 {-# INLINE (.) #-}
1639 (.) f g = \x -> f (g x)
1640 so that it'll inline when applied to two arguments. If 'x' appeared
1641 on the left, thus
1642 (.) f g x = f (g x)
1643 it'd only inline when applied to three arguments. This slightly-experimental
1644 change was requested by Roman, but it seems to make sense.
1645
1646 See also Note [Inlining an InlineRule] in CoreUnfold.
1647
1648
1649 Note [OccInfo in unfoldings and rules]
1650 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1651 In unfoldings and rules, we guarantee that the template is occ-analysed,
1652 so that the occurrence info on the binders is correct. This is important,
1653 because the Simplifier does not re-analyse the template when using it. If
1654 the occurrence info is wrong
1655 - We may get more simpifier iterations than necessary, because
1656 once-occ info isn't there
1657 - More seriously, we may get an infinite loop if there's a Rec
1658 without a loop breaker marked
1659
1660
1661 ************************************************************************
1662 * *
1663 AltCon
1664 * *
1665 ************************************************************************
1666 -}
1667
1668 -- The Ord is needed for the FiniteMap used in the lookForConstructor
1669 -- in SimplEnv. If you declared that lookForConstructor *ignores*
1670 -- constructor-applications with LitArg args, then you could get
1671 -- rid of this Ord.
1672
1673 instance Outputable AltCon where
1674 ppr (DataAlt dc) = ppr dc
1675 ppr (LitAlt lit) = ppr lit
1676 ppr DEFAULT = text "__DEFAULT"
1677
1678 cmpAlt :: (AltCon, a, b) -> (AltCon, a, b) -> Ordering
1679 cmpAlt (con1, _, _) (con2, _, _) = con1 `cmpAltCon` con2
1680
1681 ltAlt :: (AltCon, a, b) -> (AltCon, a, b) -> Bool
1682 ltAlt a1 a2 = (a1 `cmpAlt` a2) == LT
1683
1684 cmpAltCon :: AltCon -> AltCon -> Ordering
1685 -- ^ Compares 'AltCon's within a single list of alternatives
1686 -- DEFAULT comes out smallest, so that sorting by AltCon
1687 -- puts alternatives in the order required by #case_invariants#
1688 cmpAltCon DEFAULT DEFAULT = EQ
1689 cmpAltCon DEFAULT _ = LT
1690
1691 cmpAltCon (DataAlt d1) (DataAlt d2) = dataConTag d1 `compare` dataConTag d2
1692 cmpAltCon (DataAlt _) DEFAULT = GT
1693 cmpAltCon (LitAlt l1) (LitAlt l2) = l1 `compare` l2
1694 cmpAltCon (LitAlt _) DEFAULT = GT
1695
1696 cmpAltCon con1 con2 = WARN( True, text "Comparing incomparable AltCons" <+>
1697 ppr con1 <+> ppr con2 )
1698 LT
1699
1700 {-
1701 ************************************************************************
1702 * *
1703 \subsection{Useful synonyms}
1704 * *
1705 ************************************************************************
1706
1707 Note [CoreProgram]
1708 ~~~~~~~~~~~~~~~~~~
1709 The top level bindings of a program, a CoreProgram, are represented as
1710 a list of CoreBind
1711
1712 * Later bindings in the list can refer to earlier ones, but not vice
1713 versa. So this is OK
1714 NonRec { x = 4 }
1715 Rec { p = ...q...x...
1716 ; q = ...p...x }
1717 Rec { f = ...p..x..f.. }
1718 NonRec { g = ..f..q...x.. }
1719 But it would NOT be ok for 'f' to refer to 'g'.
1720
1721 * The occurrence analyser does strongly-connected component analysis
1722 on each Rec binding, and splits it into a sequence of smaller
1723 bindings where possible. So the program typically starts life as a
1724 single giant Rec, which is then dependency-analysed into smaller
1725 chunks.
1726 -}
1727
1728 -- If you edit this type, you may need to update the GHC formalism
1729 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
1730 type CoreProgram = [CoreBind] -- See Note [CoreProgram]
1731
1732 -- | The common case for the type of binders and variables when
1733 -- we are manipulating the Core language within GHC
1734 type CoreBndr = Var
1735 -- | Expressions where binders are 'CoreBndr's
1736 type CoreExpr = Expr CoreBndr
1737 -- | Argument expressions where binders are 'CoreBndr's
1738 type CoreArg = Arg CoreBndr
1739 -- | Binding groups where binders are 'CoreBndr's
1740 type CoreBind = Bind CoreBndr
1741 -- | Case alternatives where binders are 'CoreBndr's
1742 type CoreAlt = Alt CoreBndr
1743
1744 {-
1745 ************************************************************************
1746 * *
1747 \subsection{Tagging}
1748 * *
1749 ************************************************************************
1750 -}
1751
1752 -- | Binders are /tagged/ with a t
1753 data TaggedBndr t = TB CoreBndr t -- TB for "tagged binder"
1754
1755 type TaggedBind t = Bind (TaggedBndr t)
1756 type TaggedExpr t = Expr (TaggedBndr t)
1757 type TaggedArg t = Arg (TaggedBndr t)
1758 type TaggedAlt t = Alt (TaggedBndr t)
1759
1760 instance Outputable b => Outputable (TaggedBndr b) where
1761 ppr (TB b l) = char '<' <> ppr b <> comma <> ppr l <> char '>'
1762
1763 deTagExpr :: TaggedExpr t -> CoreExpr
1764 deTagExpr (Var v) = Var v
1765 deTagExpr (Lit l) = Lit l
1766 deTagExpr (Type ty) = Type ty
1767 deTagExpr (Coercion co) = Coercion co
1768 deTagExpr (App e1 e2) = App (deTagExpr e1) (deTagExpr e2)
1769 deTagExpr (Lam (TB b _) e) = Lam b (deTagExpr e)
1770 deTagExpr (Let bind body) = Let (deTagBind bind) (deTagExpr body)
1771 deTagExpr (Case e (TB b _) ty alts) = Case (deTagExpr e) b ty (map deTagAlt alts)
1772 deTagExpr (Tick t e) = Tick t (deTagExpr e)
1773 deTagExpr (Cast e co) = Cast (deTagExpr e) co
1774
1775 deTagBind :: TaggedBind t -> CoreBind
1776 deTagBind (NonRec (TB b _) rhs) = NonRec b (deTagExpr rhs)
1777 deTagBind (Rec prs) = Rec [(b, deTagExpr rhs) | (TB b _, rhs) <- prs]
1778
1779 deTagAlt :: TaggedAlt t -> CoreAlt
1780 deTagAlt (con, bndrs, rhs) = (con, [b | TB b _ <- bndrs], deTagExpr rhs)
1781
1782 {-
1783 ************************************************************************
1784 * *
1785 \subsection{Core-constructing functions with checking}
1786 * *
1787 ************************************************************************
1788 -}
1789
1790 -- | Apply a list of argument expressions to a function expression in a nested fashion. Prefer to
1791 -- use 'MkCore.mkCoreApps' if possible
1792 mkApps :: Expr b -> [Arg b] -> Expr b
1793 -- | Apply a list of type argument expressions to a function expression in a nested fashion
1794 mkTyApps :: Expr b -> [Type] -> Expr b
1795 -- | Apply a list of coercion argument expressions to a function expression in a nested fashion
1796 mkCoApps :: Expr b -> [Coercion] -> Expr b
1797 -- | Apply a list of type or value variables to a function expression in a nested fashion
1798 mkVarApps :: Expr b -> [Var] -> Expr b
1799 -- | Apply a list of argument expressions to a data constructor in a nested fashion. Prefer to
1800 -- use 'MkCore.mkCoreConApps' if possible
1801 mkConApp :: DataCon -> [Arg b] -> Expr b
1802
1803 mkApps f args = foldl App f args
1804 mkCoApps f args = foldl (\ e a -> App e (Coercion a)) f args
1805 mkVarApps f vars = foldl (\ e a -> App e (varToCoreExpr a)) f vars
1806 mkConApp con args = mkApps (Var (dataConWorkId con)) args
1807
1808 mkTyApps f args = foldl (\ e a -> App e (mkTyArg a)) f args
1809
1810 mkConApp2 :: DataCon -> [Type] -> [Var] -> Expr b
1811 mkConApp2 con tys arg_ids = Var (dataConWorkId con)
1812 `mkApps` map Type tys
1813 `mkApps` map varToCoreExpr arg_ids
1814
1815 mkTyArg :: Type -> Expr b
1816 mkTyArg ty
1817 | Just co <- isCoercionTy_maybe ty = Coercion co
1818 | otherwise = Type ty
1819
1820 -- | Create a machine integer literal expression of type @Int#@ from an @Integer@.
1821 -- If you want an expression of type @Int@ use 'MkCore.mkIntExpr'
1822 mkIntLit :: DynFlags -> Integer -> Expr b
1823 -- | Create a machine integer literal expression of type @Int#@ from an @Int@.
1824 -- If you want an expression of type @Int@ use 'MkCore.mkIntExpr'
1825 mkIntLitInt :: DynFlags -> Int -> Expr b
1826
1827 mkIntLit dflags n = Lit (mkMachInt dflags n)
1828 mkIntLitInt dflags n = Lit (mkMachInt dflags (toInteger n))
1829
1830 -- | Create a machine word literal expression of type @Word#@ from an @Integer@.
1831 -- If you want an expression of type @Word@ use 'MkCore.mkWordExpr'
1832 mkWordLit :: DynFlags -> Integer -> Expr b
1833 -- | Create a machine word literal expression of type @Word#@ from a @Word@.
1834 -- If you want an expression of type @Word@ use 'MkCore.mkWordExpr'
1835 mkWordLitWord :: DynFlags -> Word -> Expr b
1836
1837 mkWordLit dflags w = Lit (mkMachWord dflags w)
1838 mkWordLitWord dflags w = Lit (mkMachWord dflags (toInteger w))
1839
1840 mkWord64LitWord64 :: Word64 -> Expr b
1841 mkWord64LitWord64 w = Lit (mkMachWord64 (toInteger w))
1842
1843 mkInt64LitInt64 :: Int64 -> Expr b
1844 mkInt64LitInt64 w = Lit (mkMachInt64 (toInteger w))
1845
1846 -- | Create a machine character literal expression of type @Char#@.
1847 -- If you want an expression of type @Char@ use 'MkCore.mkCharExpr'
1848 mkCharLit :: Char -> Expr b
1849 -- | Create a machine string literal expression of type @Addr#@.
1850 -- If you want an expression of type @String@ use 'MkCore.mkStringExpr'
1851 mkStringLit :: String -> Expr b
1852
1853 mkCharLit c = Lit (mkMachChar c)
1854 mkStringLit s = Lit (mkMachString s)
1855
1856 -- | Create a machine single precision literal expression of type @Float#@ from a @Rational@.
1857 -- If you want an expression of type @Float@ use 'MkCore.mkFloatExpr'
1858 mkFloatLit :: Rational -> Expr b
1859 -- | Create a machine single precision literal expression of type @Float#@ from a @Float@.
1860 -- If you want an expression of type @Float@ use 'MkCore.mkFloatExpr'
1861 mkFloatLitFloat :: Float -> Expr b
1862
1863 mkFloatLit f = Lit (mkMachFloat f)
1864 mkFloatLitFloat f = Lit (mkMachFloat (toRational f))
1865
1866 -- | Create a machine double precision literal expression of type @Double#@ from a @Rational@.
1867 -- If you want an expression of type @Double@ use 'MkCore.mkDoubleExpr'
1868 mkDoubleLit :: Rational -> Expr b
1869 -- | Create a machine double precision literal expression of type @Double#@ from a @Double@.
1870 -- If you want an expression of type @Double@ use 'MkCore.mkDoubleExpr'
1871 mkDoubleLitDouble :: Double -> Expr b
1872
1873 mkDoubleLit d = Lit (mkMachDouble d)
1874 mkDoubleLitDouble d = Lit (mkMachDouble (toRational d))
1875
1876 -- | Bind all supplied binding groups over an expression in a nested let expression. Assumes
1877 -- that the rhs satisfies the let/app invariant. Prefer to use 'MkCore.mkCoreLets' if
1878 -- possible, which does guarantee the invariant
1879 mkLets :: [Bind b] -> Expr b -> Expr b
1880 -- | Bind all supplied binders over an expression in a nested lambda expression. Prefer to
1881 -- use 'MkCore.mkCoreLams' if possible
1882 mkLams :: [b] -> Expr b -> Expr b
1883
1884 mkLams binders body = foldr Lam body binders
1885 mkLets binds body = foldr mkLet body binds
1886
1887 mkLet :: Bind b -> Expr b -> Expr b
1888 -- The desugarer sometimes generates an empty Rec group
1889 -- which Lint rejects, so we kill it off right away
1890 mkLet (Rec []) body = body
1891 mkLet bind body = Let bind body
1892
1893 -- | @mkLetNonRec bndr rhs body@ wraps @body@ in a @let@ binding @bndr@.
1894 mkLetNonRec :: b -> Expr b -> Expr b -> Expr b
1895 mkLetNonRec b rhs body = Let (NonRec b rhs) body
1896
1897 -- | @mkLetRec binds body@ wraps @body@ in a @let rec@ with the given set of
1898 -- @binds@ if binds is non-empty.
1899 mkLetRec :: [(b, Expr b)] -> Expr b -> Expr b
1900 mkLetRec [] body = body
1901 mkLetRec bs body = Let (Rec bs) body
1902
1903 -- | Create a binding group where a type variable is bound to a type. Per "CoreSyn#type_let",
1904 -- this can only be used to bind something in a non-recursive @let@ expression
1905 mkTyBind :: TyVar -> Type -> CoreBind
1906 mkTyBind tv ty = NonRec tv (Type ty)
1907
1908 -- | Create a binding group where a type variable is bound to a type. Per "CoreSyn#type_let",
1909 -- this can only be used to bind something in a non-recursive @let@ expression
1910 mkCoBind :: CoVar -> Coercion -> CoreBind
1911 mkCoBind cv co = NonRec cv (Coercion co)
1912
1913 -- | Convert a binder into either a 'Var' or 'Type' 'Expr' appropriately
1914 varToCoreExpr :: CoreBndr -> Expr b
1915 varToCoreExpr v | isTyVar v = Type (mkTyVarTy v)
1916 | isCoVar v = Coercion (mkCoVarCo v)
1917 | otherwise = ASSERT( isId v ) Var v
1918
1919 varsToCoreExprs :: [CoreBndr] -> [Expr b]
1920 varsToCoreExprs vs = map varToCoreExpr vs
1921
1922 {-
1923 ************************************************************************
1924 * *
1925 Getting a result type
1926 * *
1927 ************************************************************************
1928
1929 These are defined here to avoid a module loop between CoreUtils and CoreFVs
1930
1931 -}
1932
1933 applyTypeToArg :: Type -> CoreExpr -> Type
1934 -- ^ Determines the type resulting from applying an expression with given type
1935 -- to a given argument expression
1936 applyTypeToArg fun_ty arg = piResultTy fun_ty (exprToType arg)
1937
1938 -- | If the expression is a 'Type', converts. Otherwise,
1939 -- panics. NB: This does /not/ convert 'Coercion' to 'CoercionTy'.
1940 exprToType :: CoreExpr -> Type
1941 exprToType (Type ty) = ty
1942 exprToType _bad = pprPanic "exprToType" empty
1943
1944 -- | If the expression is a 'Coercion', converts.
1945 exprToCoercion_maybe :: CoreExpr -> Maybe Coercion
1946 exprToCoercion_maybe (Coercion co) = Just co
1947 exprToCoercion_maybe _ = Nothing
1948
1949 {-
1950 ************************************************************************
1951 * *
1952 \subsection{Simple access functions}
1953 * *
1954 ************************************************************************
1955 -}
1956
1957 -- | Extract every variable by this group
1958 bindersOf :: Bind b -> [b]
1959 -- If you edit this function, you may need to update the GHC formalism
1960 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
1961 bindersOf (NonRec binder _) = [binder]
1962 bindersOf (Rec pairs) = [binder | (binder, _) <- pairs]
1963
1964 -- | 'bindersOf' applied to a list of binding groups
1965 bindersOfBinds :: [Bind b] -> [b]
1966 bindersOfBinds binds = foldr ((++) . bindersOf) [] binds
1967
1968 rhssOfBind :: Bind b -> [Expr b]
1969 rhssOfBind (NonRec _ rhs) = [rhs]
1970 rhssOfBind (Rec pairs) = [rhs | (_,rhs) <- pairs]
1971
1972 rhssOfAlts :: [Alt b] -> [Expr b]
1973 rhssOfAlts alts = [e | (_,_,e) <- alts]
1974
1975 -- | Collapse all the bindings in the supplied groups into a single
1976 -- list of lhs\/rhs pairs suitable for binding in a 'Rec' binding group
1977 flattenBinds :: [Bind b] -> [(b, Expr b)]
1978 flattenBinds (NonRec b r : binds) = (b,r) : flattenBinds binds
1979 flattenBinds (Rec prs1 : binds) = prs1 ++ flattenBinds binds
1980 flattenBinds [] = []
1981
1982 -- | We often want to strip off leading lambdas before getting down to
1983 -- business. Variants are 'collectTyBinders', 'collectValBinders',
1984 -- and 'collectTyAndValBinders'
1985 collectBinders :: Expr b -> ([b], Expr b)
1986 collectTyBinders :: CoreExpr -> ([TyVar], CoreExpr)
1987 collectValBinders :: CoreExpr -> ([Id], CoreExpr)
1988 collectTyAndValBinders :: CoreExpr -> ([TyVar], [Id], CoreExpr)
1989 -- | Strip off exactly N leading lambdas (type or value). Good for use with
1990 -- join points.
1991 collectNBinders :: Int -> Expr b -> ([b], Expr b)
1992
1993 collectBinders expr
1994 = go [] expr
1995 where
1996 go bs (Lam b e) = go (b:bs) e
1997 go bs e = (reverse bs, e)
1998
1999 collectTyBinders expr
2000 = go [] expr
2001 where
2002 go tvs (Lam b e) | isTyVar b = go (b:tvs) e
2003 go tvs e = (reverse tvs, e)
2004
2005 collectValBinders expr
2006 = go [] expr
2007 where
2008 go ids (Lam b e) | isId b = go (b:ids) e
2009 go ids body = (reverse ids, body)
2010
2011 collectTyAndValBinders expr
2012 = (tvs, ids, body)
2013 where
2014 (tvs, body1) = collectTyBinders expr
2015 (ids, body) = collectValBinders body1
2016
2017 collectNBinders orig_n orig_expr
2018 = go orig_n [] orig_expr
2019 where
2020 go 0 bs expr = (reverse bs, expr)
2021 go n bs (Lam b e) = go (n-1) (b:bs) e
2022 go _ _ _ = pprPanic "collectNBinders" $ int orig_n
2023
2024 -- | Takes a nested application expression and returns the the function
2025 -- being applied and the arguments to which it is applied
2026 collectArgs :: Expr b -> (Expr b, [Arg b])
2027 collectArgs expr
2028 = go expr []
2029 where
2030 go (App f a) as = go f (a:as)
2031 go e as = (e, as)
2032
2033 -- | Like @collectArgs@, but also collects looks through floatable
2034 -- ticks if it means that we can find more arguments.
2035 collectArgsTicks :: (Tickish Id -> Bool) -> Expr b
2036 -> (Expr b, [Arg b], [Tickish Id])
2037 collectArgsTicks skipTick expr
2038 = go expr [] []
2039 where
2040 go (App f a) as ts = go f (a:as) ts
2041 go (Tick t e) as ts
2042 | skipTick t = go e as (t:ts)
2043 go e as ts = (e, as, reverse ts)
2044
2045
2046 {-
2047 ************************************************************************
2048 * *
2049 \subsection{Predicates}
2050 * *
2051 ************************************************************************
2052
2053 At one time we optionally carried type arguments through to runtime.
2054 @isRuntimeVar v@ returns if (Lam v _) really becomes a lambda at runtime,
2055 i.e. if type applications are actual lambdas because types are kept around
2056 at runtime. Similarly isRuntimeArg.
2057 -}
2058
2059 -- | Will this variable exist at runtime?
2060 isRuntimeVar :: Var -> Bool
2061 isRuntimeVar = isId
2062
2063 -- | Will this argument expression exist at runtime?
2064 isRuntimeArg :: CoreExpr -> Bool
2065 isRuntimeArg = isValArg
2066
2067 -- | Returns @True@ for value arguments, false for type args
2068 -- NB: coercions are value arguments (zero width, to be sure,
2069 -- like State#, but still value args).
2070 isValArg :: Expr b -> Bool
2071 isValArg e = not (isTypeArg e)
2072
2073 -- | Returns @True@ iff the expression is a 'Type' or 'Coercion'
2074 -- expression at its top level
2075 isTyCoArg :: Expr b -> Bool
2076 isTyCoArg (Type {}) = True
2077 isTyCoArg (Coercion {}) = True
2078 isTyCoArg _ = False
2079
2080 -- | Returns @True@ iff the expression is a 'Type' expression at its
2081 -- top level. Note this does NOT include 'Coercion's.
2082 isTypeArg :: Expr b -> Bool
2083 isTypeArg (Type {}) = True
2084 isTypeArg _ = False
2085
2086 -- | The number of binders that bind values rather than types
2087 valBndrCount :: [CoreBndr] -> Int
2088 valBndrCount = count isId
2089
2090 -- | The number of argument expressions that are values rather than types at their top level
2091 valArgCount :: [Arg b] -> Int
2092 valArgCount = count isValArg
2093
2094 {-
2095 ************************************************************************
2096 * *
2097 \subsection{Annotated core}
2098 * *
2099 ************************************************************************
2100 -}
2101
2102 -- | Annotated core: allows annotation at every node in the tree
2103 type AnnExpr bndr annot = (annot, AnnExpr' bndr annot)
2104
2105 -- | A clone of the 'Expr' type but allowing annotation at every tree node
2106 data AnnExpr' bndr annot
2107 = AnnVar Id
2108 | AnnLit Literal
2109 | AnnLam bndr (AnnExpr bndr annot)
2110 | AnnApp (AnnExpr bndr annot) (AnnExpr bndr annot)
2111 | AnnCase (AnnExpr bndr annot) bndr Type [AnnAlt bndr annot]
2112 | AnnLet (AnnBind bndr annot) (AnnExpr bndr annot)
2113 | AnnCast (AnnExpr bndr annot) (annot, Coercion)
2114 -- Put an annotation on the (root of) the coercion
2115 | AnnTick (Tickish Id) (AnnExpr bndr annot)
2116 | AnnType Type
2117 | AnnCoercion Coercion
2118
2119 -- | A clone of the 'Alt' type but allowing annotation at every tree node
2120 type AnnAlt bndr annot = (AltCon, [bndr], AnnExpr bndr annot)
2121
2122 -- | A clone of the 'Bind' type but allowing annotation at every tree node
2123 data AnnBind bndr annot
2124 = AnnNonRec bndr (AnnExpr bndr annot)
2125 | AnnRec [(bndr, AnnExpr bndr annot)]
2126
2127 -- | Takes a nested application expression and returns the the function
2128 -- being applied and the arguments to which it is applied
2129 collectAnnArgs :: AnnExpr b a -> (AnnExpr b a, [AnnExpr b a])
2130 collectAnnArgs expr
2131 = go expr []
2132 where
2133 go (_, AnnApp f a) as = go f (a:as)
2134 go e as = (e, as)
2135
2136 collectAnnArgsTicks :: (Tickish Var -> Bool) -> AnnExpr b a
2137 -> (AnnExpr b a, [AnnExpr b a], [Tickish Var])
2138 collectAnnArgsTicks tickishOk expr
2139 = go expr [] []
2140 where
2141 go (_, AnnApp f a) as ts = go f (a:as) ts
2142 go (_, AnnTick t e) as ts | tickishOk t
2143 = go e as (t:ts)
2144 go e as ts = (e, as, reverse ts)
2145
2146 deAnnotate :: AnnExpr bndr annot -> Expr bndr
2147 deAnnotate (_, e) = deAnnotate' e
2148
2149 deAnnotate' :: AnnExpr' bndr annot -> Expr bndr
2150 deAnnotate' (AnnType t) = Type t
2151 deAnnotate' (AnnCoercion co) = Coercion co
2152 deAnnotate' (AnnVar v) = Var v
2153 deAnnotate' (AnnLit lit) = Lit lit
2154 deAnnotate' (AnnLam binder body) = Lam binder (deAnnotate body)
2155 deAnnotate' (AnnApp fun arg) = App (deAnnotate fun) (deAnnotate arg)
2156 deAnnotate' (AnnCast e (_,co)) = Cast (deAnnotate e) co
2157 deAnnotate' (AnnTick tick body) = Tick tick (deAnnotate body)
2158
2159 deAnnotate' (AnnLet bind body)
2160 = Let (deAnnBind bind) (deAnnotate body)
2161 where
2162 deAnnBind (AnnNonRec var rhs) = NonRec var (deAnnotate rhs)
2163 deAnnBind (AnnRec pairs) = Rec [(v,deAnnotate rhs) | (v,rhs) <- pairs]
2164
2165 deAnnotate' (AnnCase scrut v t alts)
2166 = Case (deAnnotate scrut) v t (map deAnnAlt alts)
2167
2168 deAnnAlt :: AnnAlt bndr annot -> Alt bndr
2169 deAnnAlt (con,args,rhs) = (con,args,deAnnotate rhs)
2170
2171 -- | As 'collectBinders' but for 'AnnExpr' rather than 'Expr'
2172 collectAnnBndrs :: AnnExpr bndr annot -> ([bndr], AnnExpr bndr annot)
2173 collectAnnBndrs e
2174 = collect [] e
2175 where
2176 collect bs (_, AnnLam b body) = collect (b:bs) body
2177 collect bs body = (reverse bs, body)
2178
2179 -- | As 'collectNBinders' but for 'AnnExpr' rather than 'Expr'
2180 collectNAnnBndrs :: Int -> AnnExpr bndr annot -> ([bndr], AnnExpr bndr annot)
2181 collectNAnnBndrs orig_n e
2182 = collect orig_n [] e
2183 where
2184 collect 0 bs body = (reverse bs, body)
2185 collect n bs (_, AnnLam b body) = collect (n-1) (b:bs) body
2186 collect _ _ _ = pprPanic "collectNBinders" $ int orig_n