Add Haddock `/Since: 4.5.[01].0/` comments to symbols
[packages/base.git] / Text / Read / Lex.hs
1 {-# LANGUAGE Trustworthy #-}
2 {-# LANGUAGE NoImplicitPrelude #-}
3
4 -----------------------------------------------------------------------------
5 -- |
6 -- Module : Text.Read.Lex
7 -- Copyright : (c) The University of Glasgow 2002
8 -- License : BSD-style (see the file libraries/base/LICENSE)
9 --
10 -- Maintainer : libraries@haskell.org
11 -- Stability : provisional
12 -- Portability : non-portable (uses Text.ParserCombinators.ReadP)
13 --
14 -- The cut-down Haskell lexer, used by Text.Read
15 --
16 -----------------------------------------------------------------------------
17
18 module Text.Read.Lex
19 -- lexing types
20 ( Lexeme(..)
21
22 , numberToInteger, numberToFixed, numberToRational, numberToRangedRational
23
24 -- lexer
25 , lex, expect
26 , hsLex
27 , lexChar
28
29 , readIntP
30 , readOctP
31 , readDecP
32 , readHexP
33 )
34 where
35
36 import Text.ParserCombinators.ReadP
37
38 import GHC.Base
39 import GHC.Char
40 import GHC.Num( Num(..), Integer )
41 import GHC.Show( Show(..) )
42 import {-# SOURCE #-} GHC.Unicode ( isSpace, isAlpha, isAlphaNum )
43 import GHC.Real( Rational, (%), fromIntegral,
44 toInteger, (^) )
45 import GHC.List
46 import GHC.Enum( minBound, maxBound )
47 import Data.Maybe
48 import Control.Monad
49
50 -- -----------------------------------------------------------------------------
51 -- Lexing types
52
53 -- ^ Haskell lexemes.
54 data Lexeme
55 = Char Char -- ^ Character literal
56 | String String -- ^ String literal, with escapes interpreted
57 | Punc String -- ^ Punctuation or reserved symbol, e.g. @(@, @::@
58 | Ident String -- ^ Haskell identifier, e.g. @foo@, @Baz@
59 | Symbol String -- ^ Haskell symbol, e.g. @>>@, @:%@
60 | Number Number -- ^ /Since: 4.6.0.0/
61 | EOF
62 deriving (Eq, Show)
63
64 -- | /Since: 4.6.0.0/
65 data Number = MkNumber Int -- Base
66 Digits -- Integral part
67 | MkDecimal Digits -- Integral part
68 (Maybe Digits) -- Fractional part
69 (Maybe Integer) -- Exponent
70 deriving (Eq, Show)
71
72 -- | /Since: 4.5.1.0/
73 numberToInteger :: Number -> Maybe Integer
74 numberToInteger (MkNumber base iPart) = Just (val (fromIntegral base) 0 iPart)
75 numberToInteger (MkDecimal iPart Nothing Nothing) = Just (val 10 0 iPart)
76 numberToInteger _ = Nothing
77
78 -- | /Since: 4.7.0.0/
79 numberToFixed :: Integer -> Number -> Maybe (Integer, Integer)
80 numberToFixed _ (MkNumber base iPart) = Just (val (fromIntegral base) 0 iPart, 0)
81 numberToFixed _ (MkDecimal iPart Nothing Nothing) = Just (val 10 0 iPart, 0)
82 numberToFixed p (MkDecimal iPart (Just fPart) Nothing)
83 = let i = val 10 0 iPart
84 f = val 10 0 (integerTake p (fPart ++ repeat 0))
85 -- Sigh, we really want genericTake, but that's above us in
86 -- the hierarchy, so we define our own version here (actually
87 -- specialised to Integer)
88 integerTake :: Integer -> [a] -> [a]
89 integerTake n _ | n <= 0 = []
90 integerTake _ [] = []
91 integerTake n (x:xs) = x : integerTake (n-1) xs
92 in Just (i, f)
93 numberToFixed _ _ = Nothing
94
95 -- This takes a floatRange, and if the Rational would be outside of
96 -- the floatRange then it may return Nothing. Not that it will not
97 -- /necessarily/ return Nothing, but it is good enough to fix the
98 -- space problems in #5688
99 -- Ways this is conservative:
100 -- * the floatRange is in base 2, but we pretend it is in base 10
101 -- * we pad the floateRange a bit, just in case it is very small
102 -- and we would otherwise hit an edge case
103 -- * We only worry about numbers that have an exponent. If they don't
104 -- have an exponent then the Rational won't be much larger than the
105 -- Number, so there is no problem
106 -- | /Since: 4.5.1.0/
107 numberToRangedRational :: (Int, Int) -> Number
108 -> Maybe Rational -- Nothing = Inf
109 numberToRangedRational (neg, pos) n@(MkDecimal iPart mFPart (Just exp))
110 -- if exp is out of integer bounds,
111 -- then the number is definitely out of range
112 | exp > fromIntegral (maxBound :: Int) ||
113 exp < fromIntegral (minBound :: Int)
114 = Nothing
115 | otherwise
116 = let mFirstDigit = case dropWhile (0 ==) iPart of
117 iPart'@(_ : _) -> Just (length iPart')
118 [] -> case mFPart of
119 Nothing -> Nothing
120 Just fPart ->
121 case span (0 ==) fPart of
122 (_, []) -> Nothing
123 (zeroes, _) ->
124 Just (negate (length zeroes))
125 in case mFirstDigit of
126 Nothing -> Just 0
127 Just firstDigit ->
128 let firstDigit' = firstDigit + fromInteger exp
129 in if firstDigit' > (pos + 3)
130 then Nothing
131 else if firstDigit' < (neg - 3)
132 then Just 0
133 else Just (numberToRational n)
134 numberToRangedRational _ n = Just (numberToRational n)
135
136 -- | /Since: 4.6.0.0/
137 numberToRational :: Number -> Rational
138 numberToRational (MkNumber base iPart) = val (fromIntegral base) 0 iPart % 1
139 numberToRational (MkDecimal iPart mFPart mExp)
140 = let i = val 10 0 iPart
141 in case (mFPart, mExp) of
142 (Nothing, Nothing) -> i % 1
143 (Nothing, Just exp)
144 | exp >= 0 -> (i * (10 ^ exp)) % 1
145 | otherwise -> i % (10 ^ (- exp))
146 (Just fPart, Nothing) -> fracExp 0 i fPart
147 (Just fPart, Just exp) -> fracExp exp i fPart
148 -- fracExp is a bit more efficient in calculating the Rational.
149 -- Instead of calculating the fractional part alone, then
150 -- adding the integral part and finally multiplying with
151 -- 10 ^ exp if an exponent was given, do it all at once.
152
153 -- -----------------------------------------------------------------------------
154 -- Lexing
155
156 lex :: ReadP Lexeme
157 lex = skipSpaces >> lexToken
158
159 -- | /Since: 4.7.0.0/
160 expect :: Lexeme -> ReadP ()
161 expect lexeme = do { skipSpaces
162 ; thing <- lexToken
163 ; if thing == lexeme then return () else pfail }
164
165 hsLex :: ReadP String
166 -- ^ Haskell lexer: returns the lexed string, rather than the lexeme
167 hsLex = do skipSpaces
168 (s,_) <- gather lexToken
169 return s
170
171 lexToken :: ReadP Lexeme
172 lexToken = lexEOF +++
173 lexLitChar +++
174 lexString +++
175 lexPunc +++
176 lexSymbol +++
177 lexId +++
178 lexNumber
179
180
181 -- ----------------------------------------------------------------------
182 -- End of file
183 lexEOF :: ReadP Lexeme
184 lexEOF = do s <- look
185 guard (null s)
186 return EOF
187
188 -- ---------------------------------------------------------------------------
189 -- Single character lexemes
190
191 lexPunc :: ReadP Lexeme
192 lexPunc =
193 do c <- satisfy isPuncChar
194 return (Punc [c])
195 where
196 isPuncChar c = c `elem` ",;()[]{}`"
197
198 -- ----------------------------------------------------------------------
199 -- Symbols
200
201 lexSymbol :: ReadP Lexeme
202 lexSymbol =
203 do s <- munch1 isSymbolChar
204 if s `elem` reserved_ops then
205 return (Punc s) -- Reserved-ops count as punctuation
206 else
207 return (Symbol s)
208 where
209 isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
210 reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]
211
212 -- ----------------------------------------------------------------------
213 -- identifiers
214
215 lexId :: ReadP Lexeme
216 lexId = do c <- satisfy isIdsChar
217 s <- munch isIdfChar
218 return (Ident (c:s))
219 where
220 -- Identifiers can start with a '_'
221 isIdsChar c = isAlpha c || c == '_'
222 isIdfChar c = isAlphaNum c || c `elem` "_'"
223
224 -- ---------------------------------------------------------------------------
225 -- Lexing character literals
226
227 lexLitChar :: ReadP Lexeme
228 lexLitChar =
229 do _ <- char '\''
230 (c,esc) <- lexCharE
231 guard (esc || c /= '\'') -- Eliminate '' possibility
232 _ <- char '\''
233 return (Char c)
234
235 lexChar :: ReadP Char
236 lexChar = do { (c,_) <- lexCharE; return c }
237
238 lexCharE :: ReadP (Char, Bool) -- "escaped or not"?
239 lexCharE =
240 do c1 <- get
241 if c1 == '\\'
242 then do c2 <- lexEsc; return (c2, True)
243 else do return (c1, False)
244 where
245 lexEsc =
246 lexEscChar
247 +++ lexNumeric
248 +++ lexCntrlChar
249 +++ lexAscii
250
251 lexEscChar =
252 do c <- get
253 case c of
254 'a' -> return '\a'
255 'b' -> return '\b'
256 'f' -> return '\f'
257 'n' -> return '\n'
258 'r' -> return '\r'
259 't' -> return '\t'
260 'v' -> return '\v'
261 '\\' -> return '\\'
262 '\"' -> return '\"'
263 '\'' -> return '\''
264 _ -> pfail
265
266 lexNumeric =
267 do base <- lexBaseChar <++ return 10
268 n <- lexInteger base
269 guard (n <= toInteger (ord maxBound))
270 return (chr (fromInteger n))
271
272 lexCntrlChar =
273 do _ <- char '^'
274 c <- get
275 case c of
276 '@' -> return '\^@'
277 'A' -> return '\^A'
278 'B' -> return '\^B'
279 'C' -> return '\^C'
280 'D' -> return '\^D'
281 'E' -> return '\^E'
282 'F' -> return '\^F'
283 'G' -> return '\^G'
284 'H' -> return '\^H'
285 'I' -> return '\^I'
286 'J' -> return '\^J'
287 'K' -> return '\^K'
288 'L' -> return '\^L'
289 'M' -> return '\^M'
290 'N' -> return '\^N'
291 'O' -> return '\^O'
292 'P' -> return '\^P'
293 'Q' -> return '\^Q'
294 'R' -> return '\^R'
295 'S' -> return '\^S'
296 'T' -> return '\^T'
297 'U' -> return '\^U'
298 'V' -> return '\^V'
299 'W' -> return '\^W'
300 'X' -> return '\^X'
301 'Y' -> return '\^Y'
302 'Z' -> return '\^Z'
303 '[' -> return '\^['
304 '\\' -> return '\^\'
305 ']' -> return '\^]'
306 '^' -> return '\^^'
307 '_' -> return '\^_'
308 _ -> pfail
309
310 lexAscii =
311 do choice
312 [ (string "SOH" >> return '\SOH') <++
313 (string "SO" >> return '\SO')
314 -- \SO and \SOH need maximal-munch treatment
315 -- See the Haskell report Sect 2.6
316
317 , string "NUL" >> return '\NUL'
318 , string "STX" >> return '\STX'
319 , string "ETX" >> return '\ETX'
320 , string "EOT" >> return '\EOT'
321 , string "ENQ" >> return '\ENQ'
322 , string "ACK" >> return '\ACK'
323 , string "BEL" >> return '\BEL'
324 , string "BS" >> return '\BS'
325 , string "HT" >> return '\HT'
326 , string "LF" >> return '\LF'
327 , string "VT" >> return '\VT'
328 , string "FF" >> return '\FF'
329 , string "CR" >> return '\CR'
330 , string "SI" >> return '\SI'
331 , string "DLE" >> return '\DLE'
332 , string "DC1" >> return '\DC1'
333 , string "DC2" >> return '\DC2'
334 , string "DC3" >> return '\DC3'
335 , string "DC4" >> return '\DC4'
336 , string "NAK" >> return '\NAK'
337 , string "SYN" >> return '\SYN'
338 , string "ETB" >> return '\ETB'
339 , string "CAN" >> return '\CAN'
340 , string "EM" >> return '\EM'
341 , string "SUB" >> return '\SUB'
342 , string "ESC" >> return '\ESC'
343 , string "FS" >> return '\FS'
344 , string "GS" >> return '\GS'
345 , string "RS" >> return '\RS'
346 , string "US" >> return '\US'
347 , string "SP" >> return '\SP'
348 , string "DEL" >> return '\DEL'
349 ]
350
351
352 -- ---------------------------------------------------------------------------
353 -- string literal
354
355 lexString :: ReadP Lexeme
356 lexString =
357 do _ <- char '"'
358 body id
359 where
360 body f =
361 do (c,esc) <- lexStrItem
362 if c /= '"' || esc
363 then body (f.(c:))
364 else let s = f "" in
365 return (String s)
366
367 lexStrItem = (lexEmpty >> lexStrItem)
368 +++ lexCharE
369
370 lexEmpty =
371 do _ <- char '\\'
372 c <- get
373 case c of
374 '&' -> do return ()
375 _ | isSpace c -> do skipSpaces; _ <- char '\\'; return ()
376 _ -> do pfail
377
378 -- ---------------------------------------------------------------------------
379 -- Lexing numbers
380
381 type Base = Int
382 type Digits = [Int]
383
384 lexNumber :: ReadP Lexeme
385 lexNumber
386 = lexHexOct <++ -- First try for hex or octal 0x, 0o etc
387 -- If that fails, try for a decimal number
388 lexDecNumber -- Start with ordinary digits
389
390 lexHexOct :: ReadP Lexeme
391 lexHexOct
392 = do _ <- char '0'
393 base <- lexBaseChar
394 digits <- lexDigits base
395 return (Number (MkNumber base digits))
396
397 lexBaseChar :: ReadP Int
398 -- Lex a single character indicating the base; fail if not there
399 lexBaseChar = do { c <- get;
400 case c of
401 'o' -> return 8
402 'O' -> return 8
403 'x' -> return 16
404 'X' -> return 16
405 _ -> pfail }
406
407 lexDecNumber :: ReadP Lexeme
408 lexDecNumber =
409 do xs <- lexDigits 10
410 mFrac <- lexFrac <++ return Nothing
411 mExp <- lexExp <++ return Nothing
412 return (Number (MkDecimal xs mFrac mExp))
413
414 lexFrac :: ReadP (Maybe Digits)
415 -- Read the fractional part; fail if it doesn't
416 -- start ".d" where d is a digit
417 lexFrac = do _ <- char '.'
418 fraction <- lexDigits 10
419 return (Just fraction)
420
421 lexExp :: ReadP (Maybe Integer)
422 lexExp = do _ <- char 'e' +++ char 'E'
423 exp <- signedExp +++ lexInteger 10
424 return (Just exp)
425 where
426 signedExp
427 = do c <- char '-' +++ char '+'
428 n <- lexInteger 10
429 return (if c == '-' then -n else n)
430
431 lexDigits :: Int -> ReadP Digits
432 -- Lex a non-empty sequence of digits in specified base
433 lexDigits base =
434 do s <- look
435 xs <- scan s id
436 guard (not (null xs))
437 return xs
438 where
439 scan (c:cs) f = case valDig base c of
440 Just n -> do _ <- get; scan cs (f.(n:))
441 Nothing -> do return (f [])
442 scan [] f = do return (f [])
443
444 lexInteger :: Base -> ReadP Integer
445 lexInteger base =
446 do xs <- lexDigits base
447 return (val (fromIntegral base) 0 xs)
448
449 val :: Num a => a -> a -> Digits -> a
450 -- val base y [d1,..,dn] = y ++ [d1,..,dn], as it were
451 val _ y [] = y
452 val base y (x:xs) = y' `seq` val base y' xs
453 where
454 y' = y * base + fromIntegral x
455
456 -- Calculate a Rational from the exponent [of 10 to multiply with],
457 -- the integral part of the mantissa and the digits of the fractional
458 -- part. Leaving the calculation of the power of 10 until the end,
459 -- when we know the effective exponent, saves multiplications.
460 -- More importantly, this way we need at most one gcd instead of three.
461 --
462 -- frac was never used with anything but Integer and base 10, so
463 -- those are hardcoded now (trivial to change if necessary).
464 fracExp :: Integer -> Integer -> Digits -> Rational
465 fracExp exp mant []
466 | exp < 0 = mant % (10 ^ (-exp))
467 | otherwise = fromInteger (mant * 10 ^ exp)
468 fracExp exp mant (d:ds) = exp' `seq` mant' `seq` fracExp exp' mant' ds
469 where
470 exp' = exp - 1
471 mant' = mant * 10 + fromIntegral d
472
473 valDig :: (Eq a, Num a) => a -> Char -> Maybe Int
474 valDig 8 c
475 | '0' <= c && c <= '7' = Just (ord c - ord '0')
476 | otherwise = Nothing
477
478 valDig 10 c = valDecDig c
479
480 valDig 16 c
481 | '0' <= c && c <= '9' = Just (ord c - ord '0')
482 | 'a' <= c && c <= 'f' = Just (ord c - ord 'a' + 10)
483 | 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10)
484 | otherwise = Nothing
485
486 valDig _ _ = error "valDig: Bad base"
487
488 valDecDig :: Char -> Maybe Int
489 valDecDig c
490 | '0' <= c && c <= '9' = Just (ord c - ord '0')
491 | otherwise = Nothing
492
493 -- ----------------------------------------------------------------------
494 -- other numeric lexing functions
495
496 readIntP :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
497 readIntP base isDigit valDigit =
498 do s <- munch1 isDigit
499 return (val base 0 (map valDigit s))
500
501 readIntP' :: (Eq a, Num a) => a -> ReadP a
502 readIntP' base = readIntP base isDigit valDigit
503 where
504 isDigit c = maybe False (const True) (valDig base c)
505 valDigit c = maybe 0 id (valDig base c)
506
507 readOctP, readDecP, readHexP :: (Eq a, Num a) => ReadP a
508 readOctP = readIntP' 8
509 readDecP = readIntP' 10
510 readHexP = readIntP' 16
511