SafeHaskell: Added SafeHaskell to base
[ghc.git] / libraries / base / Text / Read / Lex.hs
1 {-# LANGUAGE Trustworthy #-}
2 {-# LANGUAGE CPP, NoImplicitPrelude #-}
3
4 -----------------------------------------------------------------------------
5 -- |
6 -- Module : Text.Read.Lex
7 -- Copyright : (c) The University of Glasgow 2002
8 -- License : BSD-style (see the file libraries/base/LICENSE)
9 --
10 -- Maintainer : libraries@haskell.org
11 -- Stability : provisional
12 -- Portability : non-portable (uses Text.ParserCombinators.ReadP)
13 --
14 -- The cut-down Haskell lexer, used by Text.Read
15 --
16 -----------------------------------------------------------------------------
17
18 module Text.Read.Lex
19 -- lexing types
20 ( Lexeme(..) -- :: *; Show, Eq
21
22 -- lexer
23 , lex -- :: ReadP Lexeme Skips leading spaces
24 , hsLex -- :: ReadP String
25 , lexChar -- :: ReadP Char Reads just one char, with H98 escapes
26
27 , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
28 , readOctP -- :: Num a => ReadP a
29 , readDecP -- :: Num a => ReadP a
30 , readHexP -- :: Num a => ReadP a
31 )
32 where
33
34 import Text.ParserCombinators.ReadP
35
36 #ifdef __GLASGOW_HASKELL__
37 import GHC.Base
38 import GHC.Num( Num(..), Integer )
39 import GHC.Show( Show(..) )
40 #ifndef __HADDOCK__
41 import {-# SOURCE #-} GHC.Unicode ( isSpace, isAlpha, isAlphaNum )
42 #endif
43 import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral,
44 toInteger, (^), (^^), infinity, notANumber )
45 import GHC.List
46 import GHC.Enum( maxBound )
47 #else
48 import Prelude hiding ( lex )
49 import Data.Char( chr, ord, isSpace, isAlpha, isAlphaNum )
50 import Data.Ratio( Ratio, (%) )
51 #endif
52 #ifdef __HUGS__
53 import Hugs.Prelude( Ratio(..) )
54 #endif
55 import Data.Maybe
56 import Control.Monad
57
58 -- -----------------------------------------------------------------------------
59 -- Lexing types
60
61 -- ^ Haskell lexemes.
62 data Lexeme
63 = Char Char -- ^ Character literal
64 | String String -- ^ String literal, with escapes interpreted
65 | Punc String -- ^ Punctuation or reserved symbol, e.g. @(@, @::@
66 | Ident String -- ^ Haskell identifier, e.g. @foo@, @Baz@
67 | Symbol String -- ^ Haskell symbol, e.g. @>>@, @:%@
68 | Int Integer -- ^ Integer literal
69 | Rat Rational -- ^ Floating point literal
70 | EOF
71 deriving (Eq, Show)
72
73 -- -----------------------------------------------------------------------------
74 -- Lexing
75
76 lex :: ReadP Lexeme
77 lex = skipSpaces >> lexToken
78
79 hsLex :: ReadP String
80 -- ^ Haskell lexer: returns the lexed string, rather than the lexeme
81 hsLex = do skipSpaces
82 (s,_) <- gather lexToken
83 return s
84
85 lexToken :: ReadP Lexeme
86 lexToken = lexEOF +++
87 lexLitChar +++
88 lexString +++
89 lexPunc +++
90 lexSymbol +++
91 lexId +++
92 lexNumber
93
94
95 -- ----------------------------------------------------------------------
96 -- End of file
97 lexEOF :: ReadP Lexeme
98 lexEOF = do s <- look
99 guard (null s)
100 return EOF
101
102 -- ---------------------------------------------------------------------------
103 -- Single character lexemes
104
105 lexPunc :: ReadP Lexeme
106 lexPunc =
107 do c <- satisfy isPuncChar
108 return (Punc [c])
109 where
110 isPuncChar c = c `elem` ",;()[]{}`"
111
112 -- ----------------------------------------------------------------------
113 -- Symbols
114
115 lexSymbol :: ReadP Lexeme
116 lexSymbol =
117 do s <- munch1 isSymbolChar
118 if s `elem` reserved_ops then
119 return (Punc s) -- Reserved-ops count as punctuation
120 else
121 return (Symbol s)
122 where
123 isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
124 reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]
125
126 -- ----------------------------------------------------------------------
127 -- identifiers
128
129 lexId :: ReadP Lexeme
130 lexId = lex_nan <++ lex_id
131 where
132 -- NaN and Infinity look like identifiers, so
133 -- we parse them first.
134 lex_nan = (string "NaN" >> return (Rat notANumber)) +++
135 (string "Infinity" >> return (Rat infinity))
136
137 lex_id = do c <- satisfy isIdsChar
138 s <- munch isIdfChar
139 return (Ident (c:s))
140
141 -- Identifiers can start with a '_'
142 isIdsChar c = isAlpha c || c == '_'
143 isIdfChar c = isAlphaNum c || c `elem` "_'"
144
145 #ifndef __GLASGOW_HASKELL__
146 infinity, notANumber :: Rational
147 infinity = 1 :% 0
148 notANumber = 0 :% 0
149 #endif
150
151 -- ---------------------------------------------------------------------------
152 -- Lexing character literals
153
154 lexLitChar :: ReadP Lexeme
155 lexLitChar =
156 do _ <- char '\''
157 (c,esc) <- lexCharE
158 guard (esc || c /= '\'') -- Eliminate '' possibility
159 _ <- char '\''
160 return (Char c)
161
162 lexChar :: ReadP Char
163 lexChar = do { (c,_) <- lexCharE; return c }
164
165 lexCharE :: ReadP (Char, Bool) -- "escaped or not"?
166 lexCharE =
167 do c1 <- get
168 if c1 == '\\'
169 then do c2 <- lexEsc; return (c2, True)
170 else do return (c1, False)
171 where
172 lexEsc =
173 lexEscChar
174 +++ lexNumeric
175 +++ lexCntrlChar
176 +++ lexAscii
177
178 lexEscChar =
179 do c <- get
180 case c of
181 'a' -> return '\a'
182 'b' -> return '\b'
183 'f' -> return '\f'
184 'n' -> return '\n'
185 'r' -> return '\r'
186 't' -> return '\t'
187 'v' -> return '\v'
188 '\\' -> return '\\'
189 '\"' -> return '\"'
190 '\'' -> return '\''
191 _ -> pfail
192
193 lexNumeric =
194 do base <- lexBaseChar <++ return 10
195 n <- lexInteger base
196 guard (n <= toInteger (ord maxBound))
197 return (chr (fromInteger n))
198
199 lexCntrlChar =
200 do _ <- char '^'
201 c <- get
202 case c of
203 '@' -> return '\^@'
204 'A' -> return '\^A'
205 'B' -> return '\^B'
206 'C' -> return '\^C'
207 'D' -> return '\^D'
208 'E' -> return '\^E'
209 'F' -> return '\^F'
210 'G' -> return '\^G'
211 'H' -> return '\^H'
212 'I' -> return '\^I'
213 'J' -> return '\^J'
214 'K' -> return '\^K'
215 'L' -> return '\^L'
216 'M' -> return '\^M'
217 'N' -> return '\^N'
218 'O' -> return '\^O'
219 'P' -> return '\^P'
220 'Q' -> return '\^Q'
221 'R' -> return '\^R'
222 'S' -> return '\^S'
223 'T' -> return '\^T'
224 'U' -> return '\^U'
225 'V' -> return '\^V'
226 'W' -> return '\^W'
227 'X' -> return '\^X'
228 'Y' -> return '\^Y'
229 'Z' -> return '\^Z'
230 '[' -> return '\^['
231 '\\' -> return '\^\'
232 ']' -> return '\^]'
233 '^' -> return '\^^'
234 '_' -> return '\^_'
235 _ -> pfail
236
237 lexAscii =
238 do choice
239 [ (string "SOH" >> return '\SOH') <++
240 (string "SO" >> return '\SO')
241 -- \SO and \SOH need maximal-munch treatment
242 -- See the Haskell report Sect 2.6
243
244 , string "NUL" >> return '\NUL'
245 , string "STX" >> return '\STX'
246 , string "ETX" >> return '\ETX'
247 , string "EOT" >> return '\EOT'
248 , string "ENQ" >> return '\ENQ'
249 , string "ACK" >> return '\ACK'
250 , string "BEL" >> return '\BEL'
251 , string "BS" >> return '\BS'
252 , string "HT" >> return '\HT'
253 , string "LF" >> return '\LF'
254 , string "VT" >> return '\VT'
255 , string "FF" >> return '\FF'
256 , string "CR" >> return '\CR'
257 , string "SI" >> return '\SI'
258 , string "DLE" >> return '\DLE'
259 , string "DC1" >> return '\DC1'
260 , string "DC2" >> return '\DC2'
261 , string "DC3" >> return '\DC3'
262 , string "DC4" >> return '\DC4'
263 , string "NAK" >> return '\NAK'
264 , string "SYN" >> return '\SYN'
265 , string "ETB" >> return '\ETB'
266 , string "CAN" >> return '\CAN'
267 , string "EM" >> return '\EM'
268 , string "SUB" >> return '\SUB'
269 , string "ESC" >> return '\ESC'
270 , string "FS" >> return '\FS'
271 , string "GS" >> return '\GS'
272 , string "RS" >> return '\RS'
273 , string "US" >> return '\US'
274 , string "SP" >> return '\SP'
275 , string "DEL" >> return '\DEL'
276 ]
277
278
279 -- ---------------------------------------------------------------------------
280 -- string literal
281
282 lexString :: ReadP Lexeme
283 lexString =
284 do _ <- char '"'
285 body id
286 where
287 body f =
288 do (c,esc) <- lexStrItem
289 if c /= '"' || esc
290 then body (f.(c:))
291 else let s = f "" in
292 return (String s)
293
294 lexStrItem = (lexEmpty >> lexStrItem)
295 +++ lexCharE
296
297 lexEmpty =
298 do _ <- char '\\'
299 c <- get
300 case c of
301 '&' -> do return ()
302 _ | isSpace c -> do skipSpaces; _ <- char '\\'; return ()
303 _ -> do pfail
304
305 -- ---------------------------------------------------------------------------
306 -- Lexing numbers
307
308 type Base = Int
309 type Digits = [Int]
310
311 lexNumber :: ReadP Lexeme
312 lexNumber
313 = lexHexOct <++ -- First try for hex or octal 0x, 0o etc
314 -- If that fails, try for a decimal number
315 lexDecNumber -- Start with ordinary digits
316
317 lexHexOct :: ReadP Lexeme
318 lexHexOct
319 = do _ <- char '0'
320 base <- lexBaseChar
321 digits <- lexDigits base
322 return (Int (val (fromIntegral base) 0 digits))
323
324 lexBaseChar :: ReadP Int
325 -- Lex a single character indicating the base; fail if not there
326 lexBaseChar = do { c <- get;
327 case c of
328 'o' -> return 8
329 'O' -> return 8
330 'x' -> return 16
331 'X' -> return 16
332 _ -> pfail }
333
334 lexDecNumber :: ReadP Lexeme
335 lexDecNumber =
336 do xs <- lexDigits 10
337 mFrac <- lexFrac <++ return Nothing
338 mExp <- lexExp <++ return Nothing
339 return (value xs mFrac mExp)
340 where
341 value xs mFrac mExp = valueFracExp (val 10 0 xs) mFrac mExp
342
343 valueFracExp :: Integer -> Maybe Digits -> Maybe Integer
344 -> Lexeme
345 valueFracExp a Nothing Nothing
346 = Int a -- 43
347 valueFracExp a Nothing (Just exp)
348 | exp >= 0 = Int (a * (10 ^ exp)) -- 43e7
349 | otherwise = Rat (valExp (fromInteger a) exp) -- 43e-7
350 valueFracExp a (Just fs) mExp
351 = case mExp of
352 Nothing -> Rat rat -- 4.3
353 Just exp -> Rat (valExp rat exp) -- 4.3e-4
354 where
355 rat :: Rational
356 rat = fromInteger a + frac 10 0 1 fs
357
358 valExp :: Rational -> Integer -> Rational
359 valExp rat exp = rat * (10 ^^ exp)
360
361 lexFrac :: ReadP (Maybe Digits)
362 -- Read the fractional part; fail if it doesn't
363 -- start ".d" where d is a digit
364 lexFrac = do _ <- char '.'
365 fraction <- lexDigits 10
366 return (Just fraction)
367
368 lexExp :: ReadP (Maybe Integer)
369 lexExp = do _ <- char 'e' +++ char 'E'
370 exp <- signedExp +++ lexInteger 10
371 return (Just exp)
372 where
373 signedExp
374 = do c <- char '-' +++ char '+'
375 n <- lexInteger 10
376 return (if c == '-' then -n else n)
377
378 lexDigits :: Int -> ReadP Digits
379 -- Lex a non-empty sequence of digits in specified base
380 lexDigits base =
381 do s <- look
382 xs <- scan s id
383 guard (not (null xs))
384 return xs
385 where
386 scan (c:cs) f = case valDig base c of
387 Just n -> do _ <- get; scan cs (f.(n:))
388 Nothing -> do return (f [])
389 scan [] f = do return (f [])
390
391 lexInteger :: Base -> ReadP Integer
392 lexInteger base =
393 do xs <- lexDigits base
394 return (val (fromIntegral base) 0 xs)
395
396 val :: Num a => a -> a -> Digits -> a
397 -- val base y [d1,..,dn] = y ++ [d1,..,dn], as it were
398 val _ y [] = y
399 val base y (x:xs) = y' `seq` val base y' xs
400 where
401 y' = y * base + fromIntegral x
402
403 frac :: Integral a => a -> a -> a -> Digits -> Ratio a
404 frac _ a b [] = a % b
405 frac base a b (x:xs) = a' `seq` b' `seq` frac base a' b' xs
406 where
407 a' = a * base + fromIntegral x
408 b' = b * base
409
410 valDig :: Num a => a -> Char -> Maybe Int
411 valDig 8 c
412 | '0' <= c && c <= '7' = Just (ord c - ord '0')
413 | otherwise = Nothing
414
415 valDig 10 c = valDecDig c
416
417 valDig 16 c
418 | '0' <= c && c <= '9' = Just (ord c - ord '0')
419 | 'a' <= c && c <= 'f' = Just (ord c - ord 'a' + 10)
420 | 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10)
421 | otherwise = Nothing
422
423 valDig _ _ = error "valDig: Bad base"
424
425 valDecDig :: Char -> Maybe Int
426 valDecDig c
427 | '0' <= c && c <= '9' = Just (ord c - ord '0')
428 | otherwise = Nothing
429
430 -- ----------------------------------------------------------------------
431 -- other numeric lexing functions
432
433 readIntP :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
434 readIntP base isDigit valDigit =
435 do s <- munch1 isDigit
436 return (val base 0 (map valDigit s))
437
438 readIntP' :: Num a => a -> ReadP a
439 readIntP' base = readIntP base isDigit valDigit
440 where
441 isDigit c = maybe False (const True) (valDig base c)
442 valDigit c = maybe 0 id (valDig base c)
443
444 readOctP, readDecP, readHexP :: Num a => ReadP a
445 readOctP = readIntP' 8
446 readDecP = readIntP' 10
447 readHexP = readIntP' 16