Implement unboxed sum primitive type
[ghc.git] / libraries / ghc-prim / GHC / Types.hs
1 {-# LANGUAGE MagicHash, NoImplicitPrelude, TypeFamilies, UnboxedTuples,
2 MultiParamTypeClasses, RoleAnnotations, CPP, TypeOperators,
3 PolyKinds #-}
4 -----------------------------------------------------------------------------
5 -- |
6 -- Module : GHC.Types
7 -- Copyright : (c) The University of Glasgow 2009
8 -- License : see libraries/ghc-prim/LICENSE
9 --
10 -- Maintainer : cvs-ghc@haskell.org
11 -- Stability : internal
12 -- Portability : non-portable (GHC Extensions)
13 --
14 -- GHC type definitions.
15 -- Use GHC.Exts from the base package instead of importing this
16 -- module directly.
17 --
18 -----------------------------------------------------------------------------
19
20 module GHC.Types (
21 -- Data types that are built-in syntax
22 -- They are defined here, but not explicitly exported
23 --
24 -- Lists: []( [], (:) )
25 -- Type equality: (~)( Eq# )
26
27 Bool(..), Char(..), Int(..), Word(..),
28 Float(..), Double(..),
29 Ordering(..), IO(..),
30 isTrue#,
31 SPEC(..),
32 Nat, Symbol,
33 Any,
34 type (~~), Coercible,
35 TYPE, RuntimeRep(..), Type, type (*), type (), Constraint,
36 -- The historical type * should ideally be written as
37 -- `type *`, without the parentheses. But that's a true
38 -- pain to parse, and for little gain.
39 VecCount(..), VecElem(..),
40
41 -- * Runtime type representation
42 Module(..), TrName(..), TyCon(..)
43 ) where
44
45 import GHC.Prim
46
47 infixr 5 :
48
49 -- Take note: All types defined here must have associated type representations
50 -- defined in Data.Typeable.Internal.
51 -- See Note [Representation of types defined in GHC.Types] below.
52
53 {- *********************************************************************
54 * *
55 Kinds
56 * *
57 ********************************************************************* -}
58
59 -- | The kind of constraints, like @Show a@
60 data Constraint
61
62 -- | The kind of types with values. For example @Int :: Type@.
63 type Type = TYPE 'PtrRepLifted
64
65 -- | A backward-compatible (pre-GHC 8.0) synonym for 'Type'
66 type * = TYPE 'PtrRepLifted
67
68 -- | A unicode backward-compatible (pre-GHC 8.0) synonym for 'Type'
69 type= TYPE 'PtrRepLifted
70
71 {- *********************************************************************
72 * *
73 Nat and Symbol
74 * *
75 ********************************************************************* -}
76
77 -- | (Kind) This is the kind of type-level natural numbers.
78 data Nat
79
80 -- | (Kind) This is the kind of type-level symbols.
81 -- Declared here because class IP needs it
82 data Symbol
83
84 {- *********************************************************************
85 * *
86 Any
87 * *
88 ********************************************************************* -}
89
90 -- | The type constructor 'Any' is type to which you can unsafely coerce any
91 -- lifted type, and back. More concretely, for a lifted type @t@ and
92 -- value @x :: t@, -- @unsafeCoerce (unsafeCoerce x :: Any) :: t@ is equivalent
93 -- to @x@.
94 --
95 type family Any :: k where { }
96 -- See Note [Any types] in TysWiredIn. Also, for a bit of history on Any see
97 -- #10886. Note that this must be a *closed* type family: we need to ensure
98 -- that this can't reduce to a `data` type for the results discussed in
99 -- Note [Any types].
100
101 {- *********************************************************************
102 * *
103 Lists
104
105 NB: lists are built-in syntax, and hence not explicitly exported
106 * *
107 ********************************************************************* -}
108
109 data [] a = [] | a : [a]
110
111
112 {- *********************************************************************
113 * *
114 Ordering
115 * *
116 ********************************************************************* -}
117
118 data Ordering = LT | EQ | GT
119
120
121 {- *********************************************************************
122 * *
123 Int, Char, Word, Float, Double
124 * *
125 ********************************************************************* -}
126
127 {- | The character type 'Char' is an enumeration whose values represent
128 Unicode (or equivalently ISO\/IEC 10646) characters (see
129 <http://www.unicode.org/> for details). This set extends the ISO 8859-1
130 (Latin-1) character set (the first 256 characters), which is itself an extension
131 of the ASCII character set (the first 128 characters). A character literal in
132 Haskell has type 'Char'.
133
134 To convert a 'Char' to or from the corresponding 'Int' value defined
135 by Unicode, use 'Prelude.toEnum' and 'Prelude.fromEnum' from the
136 'Prelude.Enum' class respectively (or equivalently 'ord' and 'chr').
137 -}
138 data {-# CTYPE "HsChar" #-} Char = C# Char#
139
140 -- | A fixed-precision integer type with at least the range @[-2^29 .. 2^29-1]@.
141 -- The exact range for a given implementation can be determined by using
142 -- 'Prelude.minBound' and 'Prelude.maxBound' from the 'Prelude.Bounded' class.
143 data {-# CTYPE "HsInt" #-} Int = I# Int#
144
145 -- |A 'Word' is an unsigned integral type, with the same size as 'Int'.
146 data {-# CTYPE "HsWord" #-} Word = W# Word#
147
148 -- | Single-precision floating point numbers.
149 -- It is desirable that this type be at least equal in range and precision
150 -- to the IEEE single-precision type.
151 data {-# CTYPE "HsFloat" #-} Float = F# Float#
152
153 -- | Double-precision floating point numbers.
154 -- It is desirable that this type be at least equal in range and precision
155 -- to the IEEE double-precision type.
156 data {-# CTYPE "HsDouble" #-} Double = D# Double#
157
158
159 {- *********************************************************************
160 * *
161 IO
162 * *
163 ********************************************************************* -}
164
165 {- |
166 A value of type @'IO' a@ is a computation which, when performed,
167 does some I\/O before returning a value of type @a@.
168
169 There is really only one way to \"perform\" an I\/O action: bind it to
170 @Main.main@ in your program. When your program is run, the I\/O will
171 be performed. It isn't possible to perform I\/O from an arbitrary
172 function, unless that function is itself in the 'IO' monad and called
173 at some point, directly or indirectly, from @Main.main@.
174
175 'IO' is a monad, so 'IO' actions can be combined using either the do-notation
176 or the '>>' and '>>=' operations from the 'Monad' class.
177 -}
178 newtype IO a = IO (State# RealWorld -> (# State# RealWorld, a #))
179 type role IO representational
180
181 {- The 'type role' role annotation for IO is redundant but is included
182 because this role is significant in the normalisation of FFI
183 types. Specifically, if this role were to become nominal (which would
184 be very strange, indeed!), changes elsewhere in GHC would be
185 necessary. See [FFI type roles] in TcForeign. -}
186
187
188 {- *********************************************************************
189 * *
190 (~) and Coercible
191
192 NB: (~) is built-in syntax, and hence not explicitly exported
193 * *
194 ********************************************************************* -}
195
196 {-
197 Note [Kind-changing of (~) and Coercible]
198 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
199
200 (~) and Coercible are tricky to define. To the user, they must appear as
201 constraints, but we cannot define them as such in Haskell. But we also cannot
202 just define them only in GHC.Prim (like (->)), because we need a real module
203 for them, e.g. to compile the constructor's info table.
204
205 Furthermore the type of MkCoercible cannot be written in Haskell
206 (no syntax for ~#R).
207
208 So we define them as regular data types in GHC.Types, and do magic in TysWiredIn,
209 inside GHC, to change the kind and type.
210 -}
211
212
213 -- | Lifted, heterogeneous equality. By lifted, we mean that it
214 -- can be bogus (deferred type error). By heterogeneous, the two
215 -- types @a@ and @b@ might have different kinds. Because @~~@ can
216 -- appear unexpectedly in error messages to users who do not care
217 -- about the difference between heterogeneous equality @~~@ and
218 -- homogeneous equality @~@, this is printed as @~@ unless
219 -- @-fprint-equality-relations@ is set.
220 class a ~~ b
221 -- See also Note [The equality types story] in TysPrim
222
223 -- | @Coercible@ is a two-parameter class that has instances for types @a@ and @b@ if
224 -- the compiler can infer that they have the same representation. This class
225 -- does not have regular instances; instead they are created on-the-fly during
226 -- type-checking. Trying to manually declare an instance of @Coercible@
227 -- is an error.
228 --
229 -- Nevertheless one can pretend that the following three kinds of instances
230 -- exist. First, as a trivial base-case:
231 --
232 -- @instance a a@
233 --
234 -- Furthermore, for every type constructor there is
235 -- an instance that allows to coerce under the type constructor. For
236 -- example, let @D@ be a prototypical type constructor (@data@ or
237 -- @newtype@) with three type arguments, which have roles @nominal@,
238 -- @representational@ resp. @phantom@. Then there is an instance of
239 -- the form
240 --
241 -- @instance Coercible b b\' => Coercible (D a b c) (D a b\' c\')@
242 --
243 -- Note that the @nominal@ type arguments are equal, the
244 -- @representational@ type arguments can differ, but need to have a
245 -- @Coercible@ instance themself, and the @phantom@ type arguments can be
246 -- changed arbitrarily.
247 --
248 -- The third kind of instance exists for every @newtype NT = MkNT T@ and
249 -- comes in two variants, namely
250 --
251 -- @instance Coercible a T => Coercible a NT@
252 --
253 -- @instance Coercible T b => Coercible NT b@
254 --
255 -- This instance is only usable if the constructor @MkNT@ is in scope.
256 --
257 -- If, as a library author of a type constructor like @Set a@, you
258 -- want to prevent a user of your module to write
259 -- @coerce :: Set T -> Set NT@,
260 -- you need to set the role of @Set@\'s type parameter to @nominal@,
261 -- by writing
262 --
263 -- @type role Set nominal@
264 --
265 -- For more details about this feature, please refer to
266 -- <http://www.cis.upenn.edu/~eir/papers/2014/coercible/coercible.pdf Safe Coercions>
267 -- by Joachim Breitner, Richard A. Eisenberg, Simon Peyton Jones and Stephanie Weirich.
268 --
269 -- @since 4.7.0.0
270 class Coercible a b
271 -- See also Note [The equality types story] in TysPrim
272
273 {- *********************************************************************
274 * *
275 Bool, and isTrue#
276 * *
277 ********************************************************************* -}
278
279 data {-# CTYPE "HsBool" #-} Bool = False | True
280
281 {-# INLINE isTrue# #-}
282 -- | Alias for 'tagToEnum#'. Returns True if its parameter is 1# and False
283 -- if it is 0#.
284 isTrue# :: Int# -> Bool -- See Note [Optimizing isTrue#]
285 isTrue# x = tagToEnum# x
286
287 {- Note [Optimizing isTrue#]
288 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
289 Current definition of isTrue# is a temporary workaround. We would like to
290 have functions isTrue# and isFalse# defined like this:
291
292 isTrue# :: Int# -> Bool
293 isTrue# 1# = True
294 isTrue# _ = False
295
296 isFalse# :: Int# -> Bool
297 isFalse# 0# = True
298 isFalse# _ = False
299
300 These functions would allow us to safely check if a tag can represent True
301 or False. Using isTrue# and isFalse# as defined above will not introduce
302 additional case into the code. When we scrutinize return value of isTrue#
303 or isFalse#, either explicitly in a case expression or implicitly in a guard,
304 the result will always be a single case expression (given that optimizations
305 are turned on). This results from case-of-case transformation. Consider this
306 code (this is both valid Haskell and Core):
307
308 case isTrue# (a ># b) of
309 True -> e1
310 False -> e2
311
312 Inlining isTrue# gives:
313
314 case (case (a ># b) of { 1# -> True; _ -> False } ) of
315 True -> e1
316 False -> e2
317
318 Case-of-case transforms that to:
319
320 case (a ># b) of
321 1# -> case True of
322 True -> e1
323 False -> e2
324 _ -> case False of
325 True -> e1
326 False -> e2
327
328 Which is then simplified by case-of-known-constructor:
329
330 case (a ># b) of
331 1# -> e1
332 _ -> e2
333
334 While we get good Core here, the code generator will generate very bad Cmm
335 if e1 or e2 do allocation. It will push heap checks into case alternatives
336 which results in about 2.5% increase in code size. Until this is improved we
337 just make isTrue# an alias to tagToEnum#. This is a temporary solution (if
338 you're reading this in 2023 then things went wrong). See #8326.
339 -}
340
341
342 {- *********************************************************************
343 * *
344 SPEC
345 * *
346 ********************************************************************* -}
347
348 -- | 'SPEC' is used by GHC in the @SpecConstr@ pass in order to inform
349 -- the compiler when to be particularly aggressive. In particular, it
350 -- tells GHC to specialize regardless of size or the number of
351 -- specializations. However, not all loops fall into this category.
352 --
353 -- Libraries can specify this by using 'SPEC' data type to inform which
354 -- loops should be aggressively specialized.
355 data SPEC = SPEC | SPEC2
356
357
358 {- *********************************************************************
359 * *
360 RuntimeRep
361 * *
362 ********************************************************************* -}
363
364
365 -- | GHC maintains a property that the kind of all inhabited types
366 -- (as distinct from type constructors or type-level data) tells us
367 -- the runtime representation of values of that type. This datatype
368 -- encodes the choice of runtime value.
369 -- Note that 'TYPE' is parameterised by 'RuntimeRep'; this is precisely
370 -- what we mean by the fact that a type's kind encodes the runtime
371 -- representation.
372 --
373 -- For boxed values (that is, values that are represented by a pointer),
374 -- a further distinction is made, between lifted types (that contain ⊥),
375 -- and unlifted ones (that don't).
376 data RuntimeRep = VecRep VecCount VecElem -- ^ a SIMD vector type
377 | PtrRepLifted -- ^ lifted; represented by a pointer
378 | PtrRepUnlifted -- ^ unlifted; represented by a pointer
379 | VoidRep -- ^ erased entirely
380 | IntRep -- ^ signed, word-sized value
381 | WordRep -- ^ unsigned, word-sized value
382 | Int64Rep -- ^ signed, 64-bit value (on 32-bit only)
383 | Word64Rep -- ^ unsigned, 64-bit value (on 32-bit only)
384 | AddrRep -- ^ A pointer, but /not/ to a Haskell value
385 | FloatRep -- ^ a 32-bit floating point number
386 | DoubleRep -- ^ a 64-bit floating point number
387 | UnboxedTupleRep -- ^ An unboxed tuple; this doesn't specify a concrete rep
388 | UnboxedSumRep -- ^ An unboxed sum; this doesn't specify a concrete rep
389
390 -- See also Note [Wiring in RuntimeRep] in TysWiredIn
391
392 -- | Length of a SIMD vector type
393 data VecCount = Vec2
394 | Vec4
395 | Vec8
396 | Vec16
397 | Vec32
398 | Vec64
399
400 -- | Element of a SIMD vector type
401 data VecElem = Int8ElemRep
402 | Int16ElemRep
403 | Int32ElemRep
404 | Int64ElemRep
405 | Word8ElemRep
406 | Word16ElemRep
407 | Word32ElemRep
408 | Word64ElemRep
409 | FloatElemRep
410 | DoubleElemRep
411
412 {- *********************************************************************
413 * *
414 Runtime representation of TyCon
415 * *
416 ********************************************************************* -}
417
418 {- Note [Runtime representation of modules and tycons]
419 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
420 We generate a binding for M.$modName and M.$tcT for every module M and
421 data type T. Things to think about
422
423 - We want them to be economical on space; ideally pure data with no thunks.
424
425 - We do this for every module (except this module GHC.Types), so we can't
426 depend on anything else (eg string unpacking code)
427
428 That's why we have these terribly low-level representations. The TrName
429 type lets us use the TrNameS constructor when allocating static data;
430 but we also need TrNameD for the case where we are deserialising a TyCon
431 or Module (for example when deserialising a TypeRep), in which case we
432 can't conveniently come up with an Addr#.
433 -}
434
435 #include "MachDeps.h"
436
437 data Module = Module
438 TrName -- Package name
439 TrName -- Module name
440
441 data TrName
442 = TrNameS Addr# -- Static
443 | TrNameD [Char] -- Dynamic
444
445 #if WORD_SIZE_IN_BITS < 64
446 data TyCon = TyCon
447 Word64# Word64# -- Fingerprint
448 Module -- Module in which this is defined
449 TrName -- Type constructor name
450 #else
451 data TyCon = TyCon
452 Word# Word#
453 Module
454 TrName
455 #endif