Spelling fixes in comments [ci skip]
[ghc.git] / compiler / basicTypes / Literal.hs
1 {-
2 (c) The University of Glasgow 2006
3 (c) The GRASP/AQUA Project, Glasgow University, 1998
4
5 \section[Literal]{@Literal@: Machine literals (unboxed, of course)}
6 -}
7
8 {-# LANGUAGE CPP, DeriveDataTypeable #-}
9
10 module Literal
11 (
12 -- * Main data type
13 Literal(..) -- Exported to ParseIface
14
15 -- ** Creating Literals
16 , mkMachInt, mkMachWord
17 , mkMachInt64, mkMachWord64
18 , mkMachFloat, mkMachDouble
19 , mkMachChar, mkMachString
20 , mkLitInteger
21
22 -- ** Operations on Literals
23 , literalType
24 , absentLiteralOf
25 , pprLiteral
26
27 -- ** Predicates on Literals and their contents
28 , litIsDupable, litIsTrivial, litIsLifted
29 , inIntRange, inWordRange, tARGET_MAX_INT, inCharRange
30 , isZeroLit
31 , litFitsInChar
32 , litValue, isLitValue, isLitValue_maybe, mapLitValue
33
34 -- ** Coercions
35 , word2IntLit, int2WordLit
36 , narrow8IntLit, narrow16IntLit, narrow32IntLit
37 , narrow8WordLit, narrow16WordLit, narrow32WordLit
38 , char2IntLit, int2CharLit
39 , float2IntLit, int2FloatLit, double2IntLit, int2DoubleLit
40 , nullAddrLit, float2DoubleLit, double2FloatLit
41 ) where
42
43 #include "HsVersions.h"
44
45 import TysPrim
46 import PrelNames
47 import Type
48 import TyCon
49 import Outputable
50 import FastString
51 import BasicTypes
52 import Binary
53 import Constants
54 import DynFlags
55 import UniqFM
56 import Util
57
58 import Data.ByteString (ByteString)
59 import Data.Int
60 import Data.Word
61 import Data.Char
62 import Data.Maybe ( isJust )
63 import Data.Data ( Data )
64 import Numeric ( fromRat )
65
66 {-
67 ************************************************************************
68 * *
69 \subsection{Literals}
70 * *
71 ************************************************************************
72 -}
73
74 -- | So-called 'Literal's are one of:
75 --
76 -- * An unboxed (/machine/) literal ('MachInt', 'MachFloat', etc.),
77 -- which is presumed to be surrounded by appropriate constructors
78 -- (@Int#@, etc.), so that the overall thing makes sense.
79 --
80 -- * The literal derived from the label mentioned in a \"foreign label\"
81 -- declaration ('MachLabel')
82 data Literal
83 = ------------------
84 -- First the primitive guys
85 MachChar Char -- ^ @Char#@ - at least 31 bits. Create with 'mkMachChar'
86
87 | MachStr ByteString -- ^ A string-literal: stored and emitted
88 -- UTF-8 encoded, we'll arrange to decode it
89 -- at runtime. Also emitted with a @'\0'@
90 -- terminator. Create with 'mkMachString'
91
92 | MachNullAddr -- ^ The @NULL@ pointer, the only pointer value
93 -- that can be represented as a Literal. Create
94 -- with 'nullAddrLit'
95
96 | MachInt Integer -- ^ @Int#@ - at least @WORD_SIZE_IN_BITS@ bits. Create with 'mkMachInt'
97 | MachInt64 Integer -- ^ @Int64#@ - at least 64 bits. Create with 'mkMachInt64'
98 | MachWord Integer -- ^ @Word#@ - at least @WORD_SIZE_IN_BITS@ bits. Create with 'mkMachWord'
99 | MachWord64 Integer -- ^ @Word64#@ - at least 64 bits. Create with 'mkMachWord64'
100
101 | MachFloat Rational -- ^ @Float#@. Create with 'mkMachFloat'
102 | MachDouble Rational -- ^ @Double#@. Create with 'mkMachDouble'
103
104 | MachLabel FastString
105 (Maybe Int)
106 FunctionOrData
107 -- ^ A label literal. Parameters:
108 --
109 -- 1) The name of the symbol mentioned in the declaration
110 --
111 -- 2) The size (in bytes) of the arguments
112 -- the label expects. Only applicable with
113 -- @stdcall@ labels. @Just x@ => @\<x\>@ will
114 -- be appended to label name when emitting assembly.
115
116 | LitInteger Integer Type -- ^ Integer literals
117 -- See Note [Integer literals]
118 deriving Data
119
120 {-
121 Note [Integer literals]
122 ~~~~~~~~~~~~~~~~~~~~~~~
123 An Integer literal is represented using, well, an Integer, to make it
124 easier to write RULEs for them. They also contain the Integer type, so
125 that e.g. literalType can return the right Type for them.
126
127 They only get converted into real Core,
128 mkInteger [c1, c2, .., cn]
129 during the CorePrep phase, although TidyPgm looks ahead at what the
130 core will be, so that it can see whether it involves CAFs.
131
132 When we initally build an Integer literal, notably when
133 deserialising it from an interface file (see the Binary instance
134 below), we don't have convenient access to the mkInteger Id. So we
135 just use an error thunk, and fill in the real Id when we do tcIfaceLit
136 in TcIface.
137
138
139 Binary instance
140 -}
141
142 instance Binary Literal where
143 put_ bh (MachChar aa) = do putByte bh 0; put_ bh aa
144 put_ bh (MachStr ab) = do putByte bh 1; put_ bh ab
145 put_ bh (MachNullAddr) = do putByte bh 2
146 put_ bh (MachInt ad) = do putByte bh 3; put_ bh ad
147 put_ bh (MachInt64 ae) = do putByte bh 4; put_ bh ae
148 put_ bh (MachWord af) = do putByte bh 5; put_ bh af
149 put_ bh (MachWord64 ag) = do putByte bh 6; put_ bh ag
150 put_ bh (MachFloat ah) = do putByte bh 7; put_ bh ah
151 put_ bh (MachDouble ai) = do putByte bh 8; put_ bh ai
152 put_ bh (MachLabel aj mb fod)
153 = do putByte bh 9
154 put_ bh aj
155 put_ bh mb
156 put_ bh fod
157 put_ bh (LitInteger i _) = do putByte bh 10; put_ bh i
158 get bh = do
159 h <- getByte bh
160 case h of
161 0 -> do
162 aa <- get bh
163 return (MachChar aa)
164 1 -> do
165 ab <- get bh
166 return (MachStr ab)
167 2 -> do
168 return (MachNullAddr)
169 3 -> do
170 ad <- get bh
171 return (MachInt ad)
172 4 -> do
173 ae <- get bh
174 return (MachInt64 ae)
175 5 -> do
176 af <- get bh
177 return (MachWord af)
178 6 -> do
179 ag <- get bh
180 return (MachWord64 ag)
181 7 -> do
182 ah <- get bh
183 return (MachFloat ah)
184 8 -> do
185 ai <- get bh
186 return (MachDouble ai)
187 9 -> do
188 aj <- get bh
189 mb <- get bh
190 fod <- get bh
191 return (MachLabel aj mb fod)
192 _ -> do
193 i <- get bh
194 -- See Note [Integer literals]
195 return $ mkLitInteger i (panic "Evaluated the place holder for mkInteger")
196
197 instance Outputable Literal where
198 ppr lit = pprLiteral (\d -> d) lit
199
200 instance Eq Literal where
201 a == b = case (a `compare` b) of { EQ -> True; _ -> False }
202 a /= b = case (a `compare` b) of { EQ -> False; _ -> True }
203
204 instance Ord Literal where
205 a <= b = case (a `compare` b) of { LT -> True; EQ -> True; GT -> False }
206 a < b = case (a `compare` b) of { LT -> True; EQ -> False; GT -> False }
207 a >= b = case (a `compare` b) of { LT -> False; EQ -> True; GT -> True }
208 a > b = case (a `compare` b) of { LT -> False; EQ -> False; GT -> True }
209 compare a b = cmpLit a b
210
211 {-
212 Construction
213 ~~~~~~~~~~~~
214 -}
215
216 -- | Creates a 'Literal' of type @Int#@
217 mkMachInt :: DynFlags -> Integer -> Literal
218 mkMachInt dflags x = ASSERT2( inIntRange dflags x, integer x )
219 MachInt x
220
221 -- | Creates a 'Literal' of type @Word#@
222 mkMachWord :: DynFlags -> Integer -> Literal
223 mkMachWord dflags x = ASSERT2( inWordRange dflags x, integer x )
224 MachWord x
225
226 -- | Creates a 'Literal' of type @Int64#@
227 mkMachInt64 :: Integer -> Literal
228 mkMachInt64 x = MachInt64 x
229
230 -- | Creates a 'Literal' of type @Word64#@
231 mkMachWord64 :: Integer -> Literal
232 mkMachWord64 x = MachWord64 x
233
234 -- | Creates a 'Literal' of type @Float#@
235 mkMachFloat :: Rational -> Literal
236 mkMachFloat = MachFloat
237
238 -- | Creates a 'Literal' of type @Double#@
239 mkMachDouble :: Rational -> Literal
240 mkMachDouble = MachDouble
241
242 -- | Creates a 'Literal' of type @Char#@
243 mkMachChar :: Char -> Literal
244 mkMachChar = MachChar
245
246 -- | Creates a 'Literal' of type @Addr#@, which is appropriate for passing to
247 -- e.g. some of the \"error\" functions in GHC.Err such as @GHC.Err.runtimeError@
248 mkMachString :: String -> Literal
249 -- stored UTF-8 encoded
250 mkMachString s = MachStr (fastStringToByteString $ mkFastString s)
251
252 mkLitInteger :: Integer -> Type -> Literal
253 mkLitInteger = LitInteger
254
255 inIntRange, inWordRange :: DynFlags -> Integer -> Bool
256 inIntRange dflags x = x >= tARGET_MIN_INT dflags && x <= tARGET_MAX_INT dflags
257 inWordRange dflags x = x >= 0 && x <= tARGET_MAX_WORD dflags
258
259 inCharRange :: Char -> Bool
260 inCharRange c = c >= '\0' && c <= chr tARGET_MAX_CHAR
261
262 -- | Tests whether the literal represents a zero of whatever type it is
263 isZeroLit :: Literal -> Bool
264 isZeroLit (MachInt 0) = True
265 isZeroLit (MachInt64 0) = True
266 isZeroLit (MachWord 0) = True
267 isZeroLit (MachWord64 0) = True
268 isZeroLit (MachFloat 0) = True
269 isZeroLit (MachDouble 0) = True
270 isZeroLit _ = False
271
272 -- | Returns the 'Integer' contained in the 'Literal', for when that makes
273 -- sense, i.e. for 'Char', 'Int', 'Word' and 'LitInteger'.
274 litValue :: Literal -> Integer
275 litValue l = case isLitValue_maybe l of
276 Just x -> x
277 Nothing -> pprPanic "litValue" (ppr l)
278
279 -- | Returns the 'Integer' contained in the 'Literal', for when that makes
280 -- sense, i.e. for 'Char', 'Int', 'Word' and 'LitInteger'.
281 isLitValue_maybe :: Literal -> Maybe Integer
282 isLitValue_maybe (MachChar c) = Just $ toInteger $ ord c
283 isLitValue_maybe (MachInt i) = Just i
284 isLitValue_maybe (MachInt64 i) = Just i
285 isLitValue_maybe (MachWord i) = Just i
286 isLitValue_maybe (MachWord64 i) = Just i
287 isLitValue_maybe (LitInteger i _) = Just i
288 isLitValue_maybe _ = Nothing
289
290 -- | Apply a function to the 'Integer' contained in the 'Literal', for when that
291 -- makes sense, e.g. for 'Char', 'Int', 'Word' and 'LitInteger'.
292 mapLitValue :: (Integer -> Integer) -> Literal -> Literal
293 mapLitValue f (MachChar c) = MachChar (fchar c)
294 where fchar = chr . fromInteger . f . toInteger . ord
295 mapLitValue f (MachInt i) = MachInt (f i)
296 mapLitValue f (MachInt64 i) = MachInt64 (f i)
297 mapLitValue f (MachWord i) = MachWord (f i)
298 mapLitValue f (MachWord64 i) = MachWord64 (f i)
299 mapLitValue f (LitInteger i t) = LitInteger (f i) t
300 mapLitValue _ l = pprPanic "mapLitValue" (ppr l)
301
302 -- | Indicate if the `Literal` contains an 'Integer' value, e.g. 'Char',
303 -- 'Int', 'Word' and 'LitInteger'.
304 isLitValue :: Literal -> Bool
305 isLitValue = isJust . isLitValue_maybe
306
307 {-
308 Coercions
309 ~~~~~~~~~
310 -}
311
312 narrow8IntLit, narrow16IntLit, narrow32IntLit,
313 narrow8WordLit, narrow16WordLit, narrow32WordLit,
314 char2IntLit, int2CharLit,
315 float2IntLit, int2FloatLit, double2IntLit, int2DoubleLit,
316 float2DoubleLit, double2FloatLit
317 :: Literal -> Literal
318
319 word2IntLit, int2WordLit :: DynFlags -> Literal -> Literal
320 word2IntLit dflags (MachWord w)
321 | w > tARGET_MAX_INT dflags = MachInt (w - tARGET_MAX_WORD dflags - 1)
322 | otherwise = MachInt w
323 word2IntLit _ l = pprPanic "word2IntLit" (ppr l)
324
325 int2WordLit dflags (MachInt i)
326 | i < 0 = MachWord (1 + tARGET_MAX_WORD dflags + i) -- (-1) ---> tARGET_MAX_WORD
327 | otherwise = MachWord i
328 int2WordLit _ l = pprPanic "int2WordLit" (ppr l)
329
330 narrow8IntLit (MachInt i) = MachInt (toInteger (fromInteger i :: Int8))
331 narrow8IntLit l = pprPanic "narrow8IntLit" (ppr l)
332 narrow16IntLit (MachInt i) = MachInt (toInteger (fromInteger i :: Int16))
333 narrow16IntLit l = pprPanic "narrow16IntLit" (ppr l)
334 narrow32IntLit (MachInt i) = MachInt (toInteger (fromInteger i :: Int32))
335 narrow32IntLit l = pprPanic "narrow32IntLit" (ppr l)
336 narrow8WordLit (MachWord w) = MachWord (toInteger (fromInteger w :: Word8))
337 narrow8WordLit l = pprPanic "narrow8WordLit" (ppr l)
338 narrow16WordLit (MachWord w) = MachWord (toInteger (fromInteger w :: Word16))
339 narrow16WordLit l = pprPanic "narrow16WordLit" (ppr l)
340 narrow32WordLit (MachWord w) = MachWord (toInteger (fromInteger w :: Word32))
341 narrow32WordLit l = pprPanic "narrow32WordLit" (ppr l)
342
343 char2IntLit (MachChar c) = MachInt (toInteger (ord c))
344 char2IntLit l = pprPanic "char2IntLit" (ppr l)
345 int2CharLit (MachInt i) = MachChar (chr (fromInteger i))
346 int2CharLit l = pprPanic "int2CharLit" (ppr l)
347
348 float2IntLit (MachFloat f) = MachInt (truncate f)
349 float2IntLit l = pprPanic "float2IntLit" (ppr l)
350 int2FloatLit (MachInt i) = MachFloat (fromInteger i)
351 int2FloatLit l = pprPanic "int2FloatLit" (ppr l)
352
353 double2IntLit (MachDouble f) = MachInt (truncate f)
354 double2IntLit l = pprPanic "double2IntLit" (ppr l)
355 int2DoubleLit (MachInt i) = MachDouble (fromInteger i)
356 int2DoubleLit l = pprPanic "int2DoubleLit" (ppr l)
357
358 float2DoubleLit (MachFloat f) = MachDouble f
359 float2DoubleLit l = pprPanic "float2DoubleLit" (ppr l)
360 double2FloatLit (MachDouble d) = MachFloat d
361 double2FloatLit l = pprPanic "double2FloatLit" (ppr l)
362
363 nullAddrLit :: Literal
364 nullAddrLit = MachNullAddr
365
366 {-
367 Predicates
368 ~~~~~~~~~~
369 -}
370
371 -- | True if there is absolutely no penalty to duplicating the literal.
372 -- False principally of strings.
373 --
374 -- "Why?", you say? I'm glad you asked. Well, for one duplicating strings would
375 -- blow up code sizes. Not only this, it's also unsafe.
376 --
377 -- Consider a program that wants to traverse a string. One way it might do this
378 -- is to first compute the Addr# pointing to the end of the string, and then,
379 -- starting from the beginning, bump a pointer using eqAddr# to determine the
380 -- end. For instance,
381 --
382 -- @
383 -- -- Given pointers to the start and end of a string, count how many zeros
384 -- -- the string contains.
385 -- countZeros :: Addr# -> Addr# -> -> Int
386 -- countZeros start end = go start 0
387 -- where
388 -- go off n
389 -- | off `addrEq#` end = n
390 -- | otherwise = go (off `plusAddr#` 1) n'
391 -- where n' | isTrue# (indexInt8OffAddr# off 0# ==# 0#) = n + 1
392 -- | otherwise = n
393 -- @
394 --
395 -- Consider what happens if we considered strings to be trivial (and therefore
396 -- duplicable) and emitted a call like @countZeros "hello"# ("hello"#
397 -- `plusAddr`# 5)@. The beginning and end pointers do not belong to the same
398 -- string, meaning that an iteration like the above would blow up terribly.
399 -- This is what happened in #12757.
400 --
401 -- Ultimately the solution here is to make primitive strings a bit more
402 -- structured, ensuring that the compiler can't inline in ways that will break
403 -- user code. One approach to this is described in #8472.
404 litIsTrivial :: Literal -> Bool
405 -- c.f. CoreUtils.exprIsTrivial
406 litIsTrivial (MachStr _) = False
407 litIsTrivial (LitInteger {}) = False
408 litIsTrivial _ = True
409
410 -- | True if code space does not go bad if we duplicate this literal
411 -- Currently we treat it just like 'litIsTrivial'
412 litIsDupable :: DynFlags -> Literal -> Bool
413 -- c.f. CoreUtils.exprIsDupable
414 litIsDupable _ (MachStr _) = False
415 litIsDupable dflags (LitInteger i _) = inIntRange dflags i
416 litIsDupable _ _ = True
417
418 litFitsInChar :: Literal -> Bool
419 litFitsInChar (MachInt i) = i >= toInteger (ord minBound)
420 && i <= toInteger (ord maxBound)
421 litFitsInChar _ = False
422
423 litIsLifted :: Literal -> Bool
424 litIsLifted (LitInteger {}) = True
425 litIsLifted _ = False
426
427 {-
428 Types
429 ~~~~~
430 -}
431
432 -- | Find the Haskell 'Type' the literal occupies
433 literalType :: Literal -> Type
434 literalType MachNullAddr = addrPrimTy
435 literalType (MachChar _) = charPrimTy
436 literalType (MachStr _) = addrPrimTy
437 literalType (MachInt _) = intPrimTy
438 literalType (MachWord _) = wordPrimTy
439 literalType (MachInt64 _) = int64PrimTy
440 literalType (MachWord64 _) = word64PrimTy
441 literalType (MachFloat _) = floatPrimTy
442 literalType (MachDouble _) = doublePrimTy
443 literalType (MachLabel _ _ _) = addrPrimTy
444 literalType (LitInteger _ t) = t
445
446 absentLiteralOf :: TyCon -> Maybe Literal
447 -- Return a literal of the appropriate primtive
448 -- TyCon, to use as a placeholder when it doesn't matter
449 absentLiteralOf tc = lookupUFM absent_lits (tyConName tc)
450
451 absent_lits :: UniqFM Literal
452 absent_lits = listToUFM [ (addrPrimTyConKey, MachNullAddr)
453 , (charPrimTyConKey, MachChar 'x')
454 , (intPrimTyConKey, MachInt 0)
455 , (int64PrimTyConKey, MachInt64 0)
456 , (floatPrimTyConKey, MachFloat 0)
457 , (doublePrimTyConKey, MachDouble 0)
458 , (wordPrimTyConKey, MachWord 0)
459 , (word64PrimTyConKey, MachWord64 0) ]
460
461 {-
462 Comparison
463 ~~~~~~~~~~
464 -}
465
466 cmpLit :: Literal -> Literal -> Ordering
467 cmpLit (MachChar a) (MachChar b) = a `compare` b
468 cmpLit (MachStr a) (MachStr b) = a `compare` b
469 cmpLit (MachNullAddr) (MachNullAddr) = EQ
470 cmpLit (MachInt a) (MachInt b) = a `compare` b
471 cmpLit (MachWord a) (MachWord b) = a `compare` b
472 cmpLit (MachInt64 a) (MachInt64 b) = a `compare` b
473 cmpLit (MachWord64 a) (MachWord64 b) = a `compare` b
474 cmpLit (MachFloat a) (MachFloat b) = a `compare` b
475 cmpLit (MachDouble a) (MachDouble b) = a `compare` b
476 cmpLit (MachLabel a _ _) (MachLabel b _ _) = a `compare` b
477 cmpLit (LitInteger a _) (LitInteger b _) = a `compare` b
478 cmpLit lit1 lit2 | litTag lit1 < litTag lit2 = LT
479 | otherwise = GT
480
481 litTag :: Literal -> Int
482 litTag (MachChar _) = 1
483 litTag (MachStr _) = 2
484 litTag (MachNullAddr) = 3
485 litTag (MachInt _) = 4
486 litTag (MachWord _) = 5
487 litTag (MachInt64 _) = 6
488 litTag (MachWord64 _) = 7
489 litTag (MachFloat _) = 8
490 litTag (MachDouble _) = 9
491 litTag (MachLabel _ _ _) = 10
492 litTag (LitInteger {}) = 11
493
494 {-
495 Printing
496 ~~~~~~~~
497 * See Note [Printing of literals in Core]
498 -}
499
500 pprLiteral :: (SDoc -> SDoc) -> Literal -> SDoc
501 pprLiteral _ (MachChar c) = pprPrimChar c
502 pprLiteral _ (MachStr s) = pprHsBytes s
503 pprLiteral _ (MachNullAddr) = text "__NULL"
504 pprLiteral _ (MachInt i) = pprPrimInt i
505 pprLiteral _ (MachInt64 i) = pprPrimInt64 i
506 pprLiteral _ (MachWord w) = pprPrimWord w
507 pprLiteral _ (MachWord64 w) = pprPrimWord64 w
508 pprLiteral _ (MachFloat f) = float (fromRat f) <> primFloatSuffix
509 pprLiteral _ (MachDouble d) = double (fromRat d) <> primDoubleSuffix
510 pprLiteral add_par (LitInteger i _) = pprIntegerVal add_par i
511 pprLiteral add_par (MachLabel l mb fod) = add_par (text "__label" <+> b <+> ppr fod)
512 where b = case mb of
513 Nothing -> pprHsString l
514 Just x -> doubleQuotes (text (unpackFS l ++ '@':show x))
515
516 pprIntegerVal :: (SDoc -> SDoc) -> Integer -> SDoc
517 -- See Note [Printing of literals in Core].
518 pprIntegerVal add_par i | i < 0 = add_par (integer i)
519 | otherwise = integer i
520
521 {-
522 Note [Printing of literals in Core]
523 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
524 The function `add_par` is used to wrap parenthesis around negative integers
525 (`LitInteger`) and labels (`MachLabel`), if they occur in a context requiring
526 an atomic thing (for example function application).
527
528 Although not all Core literals would be valid Haskell, we are trying to stay
529 as close as possible to Haskell syntax in the printing of Core, to make it
530 easier for a Haskell user to read Core.
531
532 To that end:
533 * We do print parenthesis around negative `LitInteger`, because we print
534 `LitInteger` using plain number literals (no prefix or suffix), and plain
535 number literals in Haskell require parenthesis in contexts like function
536 application (i.e. `1 - -1` is not valid Haskell).
537
538 * We don't print parenthesis around other (negative) literals, because they
539 aren't needed in GHC/Haskell either (i.e. `1# -# -1#` is accepted by GHC's
540 parser).
541
542 Literal Output Output if context requires
543 an atom (if different)
544 ------- ------- ----------------------
545 MachChar 'a'#
546 MachStr "aaa"#
547 MachNullAddr "__NULL"
548 MachInt -1#
549 MachInt64 -1L#
550 MachWord 1##
551 MachWord64 1L##
552 MachFloat -1.0#
553 MachDouble -1.0##
554 LitInteger -1 (-1)
555 MachLabel "__label" ... ("__label" ...)
556 -}