Remove Control.Parallel*, now in package parallel
[packages/random.git] / Text / Read / Lex.hs
1 {-# OPTIONS_GHC -fno-implicit-prelude #-}
2 -----------------------------------------------------------------------------
3 -- |
4 -- Module : Text.Read.Lex
5 -- Copyright : (c) The University of Glasgow 2002
6 -- License : BSD-style (see the file libraries/base/LICENSE)
7 --
8 -- Maintainer : libraries@haskell.org
9 -- Stability : provisional
10 -- Portability : non-portable (uses Text.ParserCombinators.ReadP)
11 --
12 -- The cut-down Haskell lexer, used by Text.Read
13 --
14 -----------------------------------------------------------------------------
15
16 module Text.Read.Lex
17 -- lexing types
18 ( Lexeme(..) -- :: *; Show, Eq
19
20 -- lexer
21 , lex -- :: ReadP Lexeme Skips leading spaces
22 , hsLex -- :: ReadP String
23 , lexChar -- :: ReadP Char Reads just one char, with H98 escapes
24
25 , readIntP -- :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
26 , readOctP -- :: Num a => ReadP a
27 , readDecP -- :: Num a => ReadP a
28 , readHexP -- :: Num a => ReadP a
29 )
30 where
31
32 import Text.ParserCombinators.ReadP
33
34 #ifdef __GLASGOW_HASKELL__
35 import GHC.Base
36 import GHC.Num( Num(..), Integer )
37 import GHC.Show( Show(..) )
38 #ifndef __HADDOCK__
39 import {-# SOURCE #-} GHC.Unicode ( isSpace, isAlpha, isAlphaNum )
40 #endif
41 import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral,
42 toInteger, (^), (^^), infinity, notANumber )
43 import GHC.List
44 import GHC.Enum( maxBound )
45 #else
46 import Prelude hiding ( lex )
47 import Data.Char( chr, ord, isSpace, isAlpha, isAlphaNum )
48 import Data.Ratio( Ratio, (%) )
49 #endif
50 #ifdef __HUGS__
51 import Hugs.Prelude( Ratio(..) )
52 #endif
53 import Data.Maybe
54 import Control.Monad
55
56 -- -----------------------------------------------------------------------------
57 -- Lexing types
58
59 -- ^ Haskell lexemes.
60 data Lexeme
61 = Char Char -- ^ Character literal
62 | String String -- ^ String literal, with escapes interpreted
63 | Punc String -- ^ Punctuation or reserved symbol, e.g. @(@, @::@
64 | Ident String -- ^ Haskell identifier, e.g. @foo@, @Baz@
65 | Symbol String -- ^ Haskell symbol, e.g. @>>@, @:%@
66 | Int Integer -- ^ Integer literal
67 | Rat Rational -- ^ Floating point literal
68 | EOF
69 deriving (Eq, Show)
70
71 -- -----------------------------------------------------------------------------
72 -- Lexing
73
74 lex :: ReadP Lexeme
75 lex = skipSpaces >> lexToken
76
77 hsLex :: ReadP String
78 -- ^ Haskell lexer: returns the lexed string, rather than the lexeme
79 hsLex = do skipSpaces
80 (s,_) <- gather lexToken
81 return s
82
83 lexToken :: ReadP Lexeme
84 lexToken = lexEOF +++
85 lexLitChar +++
86 lexString +++
87 lexPunc +++
88 lexSymbol +++
89 lexId +++
90 lexNumber
91
92
93 -- ----------------------------------------------------------------------
94 -- End of file
95 lexEOF :: ReadP Lexeme
96 lexEOF = do s <- look
97 guard (null s)
98 return EOF
99
100 -- ---------------------------------------------------------------------------
101 -- Single character lexemes
102
103 lexPunc :: ReadP Lexeme
104 lexPunc =
105 do c <- satisfy isPuncChar
106 return (Punc [c])
107 where
108 isPuncChar c = c `elem` ",;()[]{}`"
109
110 -- ----------------------------------------------------------------------
111 -- Symbols
112
113 lexSymbol :: ReadP Lexeme
114 lexSymbol =
115 do s <- munch1 isSymbolChar
116 if s `elem` reserved_ops then
117 return (Punc s) -- Reserved-ops count as punctuation
118 else
119 return (Symbol s)
120 where
121 isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
122 reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]
123
124 -- ----------------------------------------------------------------------
125 -- identifiers
126
127 lexId :: ReadP Lexeme
128 lexId = lex_nan <++ lex_id
129 where
130 -- NaN and Infinity look like identifiers, so
131 -- we parse them first.
132 lex_nan = (string "NaN" >> return (Rat notANumber)) +++
133 (string "Infinity" >> return (Rat infinity))
134
135 lex_id = do c <- satisfy isIdsChar
136 s <- munch isIdfChar
137 return (Ident (c:s))
138
139 -- Identifiers can start with a '_'
140 isIdsChar c = isAlpha c || c == '_'
141 isIdfChar c = isAlphaNum c || c `elem` "_'"
142
143 #ifndef __GLASGOW_HASKELL__
144 infinity, notANumber :: Rational
145 infinity = 1 :% 0
146 notANumber = 0 :% 0
147 #endif
148
149 -- ---------------------------------------------------------------------------
150 -- Lexing character literals
151
152 lexLitChar :: ReadP Lexeme
153 lexLitChar =
154 do char '\''
155 (c,esc) <- lexCharE
156 guard (esc || c /= '\'') -- Eliminate '' possibility
157 char '\''
158 return (Char c)
159
160 lexChar :: ReadP Char
161 lexChar = do { (c,_) <- lexCharE; return c }
162
163 lexCharE :: ReadP (Char, Bool) -- "escaped or not"?
164 lexCharE =
165 do c <- get
166 if c == '\\'
167 then do c <- lexEsc; return (c, True)
168 else do return (c, False)
169 where
170 lexEsc =
171 lexEscChar
172 +++ lexNumeric
173 +++ lexCntrlChar
174 +++ lexAscii
175
176 lexEscChar =
177 do c <- get
178 case c of
179 'a' -> return '\a'
180 'b' -> return '\b'
181 'f' -> return '\f'
182 'n' -> return '\n'
183 'r' -> return '\r'
184 't' -> return '\t'
185 'v' -> return '\v'
186 '\\' -> return '\\'
187 '\"' -> return '\"'
188 '\'' -> return '\''
189 _ -> pfail
190
191 lexNumeric =
192 do base <- lexBaseChar <++ return 10
193 n <- lexInteger base
194 guard (n <= toInteger (ord maxBound))
195 return (chr (fromInteger n))
196
197 lexCntrlChar =
198 do char '^'
199 c <- get
200 case c of
201 '@' -> return '\^@'
202 'A' -> return '\^A'
203 'B' -> return '\^B'
204 'C' -> return '\^C'
205 'D' -> return '\^D'
206 'E' -> return '\^E'
207 'F' -> return '\^F'
208 'G' -> return '\^G'
209 'H' -> return '\^H'
210 'I' -> return '\^I'
211 'J' -> return '\^J'
212 'K' -> return '\^K'
213 'L' -> return '\^L'
214 'M' -> return '\^M'
215 'N' -> return '\^N'
216 'O' -> return '\^O'
217 'P' -> return '\^P'
218 'Q' -> return '\^Q'
219 'R' -> return '\^R'
220 'S' -> return '\^S'
221 'T' -> return '\^T'
222 'U' -> return '\^U'
223 'V' -> return '\^V'
224 'W' -> return '\^W'
225 'X' -> return '\^X'
226 'Y' -> return '\^Y'
227 'Z' -> return '\^Z'
228 '[' -> return '\^['
229 '\\' -> return '\^\'
230 ']' -> return '\^]'
231 '^' -> return '\^^'
232 '_' -> return '\^_'
233 _ -> pfail
234
235 lexAscii =
236 do choice
237 [ (string "SOH" >> return '\SOH') <++
238 (string "SO" >> return '\SO')
239 -- \SO and \SOH need maximal-munch treatment
240 -- See the Haskell report Sect 2.6
241
242 , string "NUL" >> return '\NUL'
243 , string "STX" >> return '\STX'
244 , string "ETX" >> return '\ETX'
245 , string "EOT" >> return '\EOT'
246 , string "ENQ" >> return '\ENQ'
247 , string "ACK" >> return '\ACK'
248 , string "BEL" >> return '\BEL'
249 , string "BS" >> return '\BS'
250 , string "HT" >> return '\HT'
251 , string "LF" >> return '\LF'
252 , string "VT" >> return '\VT'
253 , string "FF" >> return '\FF'
254 , string "CR" >> return '\CR'
255 , string "SI" >> return '\SI'
256 , string "DLE" >> return '\DLE'
257 , string "DC1" >> return '\DC1'
258 , string "DC2" >> return '\DC2'
259 , string "DC3" >> return '\DC3'
260 , string "DC4" >> return '\DC4'
261 , string "NAK" >> return '\NAK'
262 , string "SYN" >> return '\SYN'
263 , string "ETB" >> return '\ETB'
264 , string "CAN" >> return '\CAN'
265 , string "EM" >> return '\EM'
266 , string "SUB" >> return '\SUB'
267 , string "ESC" >> return '\ESC'
268 , string "FS" >> return '\FS'
269 , string "GS" >> return '\GS'
270 , string "RS" >> return '\RS'
271 , string "US" >> return '\US'
272 , string "SP" >> return '\SP'
273 , string "DEL" >> return '\DEL'
274 ]
275
276
277 -- ---------------------------------------------------------------------------
278 -- string literal
279
280 lexString :: ReadP Lexeme
281 lexString =
282 do char '"'
283 body id
284 where
285 body f =
286 do (c,esc) <- lexStrItem
287 if c /= '"' || esc
288 then body (f.(c:))
289 else let s = f "" in
290 return (String s)
291
292 lexStrItem = (lexEmpty >> lexStrItem)
293 +++ lexCharE
294
295 lexEmpty =
296 do char '\\'
297 c <- get
298 case c of
299 '&' -> do return ()
300 _ | isSpace c -> do skipSpaces; char '\\'; return ()
301 _ -> do pfail
302
303 -- ---------------------------------------------------------------------------
304 -- Lexing numbers
305
306 type Base = Int
307 type Digits = [Int]
308
309 lexNumber :: ReadP Lexeme
310 lexNumber
311 = lexHexOct <++ -- First try for hex or octal 0x, 0o etc
312 -- If that fails, try for a decimal number
313 lexDecNumber -- Start with ordinary digits
314
315 lexHexOct :: ReadP Lexeme
316 lexHexOct
317 = do char '0'
318 base <- lexBaseChar
319 digits <- lexDigits base
320 return (Int (val (fromIntegral base) 0 digits))
321
322 lexBaseChar :: ReadP Int
323 -- Lex a single character indicating the base; fail if not there
324 lexBaseChar = do { c <- get;
325 case c of
326 'o' -> return 8
327 'O' -> return 8
328 'x' -> return 16
329 'X' -> return 16
330 _ -> pfail }
331
332 lexDecNumber :: ReadP Lexeme
333 lexDecNumber =
334 do xs <- lexDigits 10
335 mFrac <- lexFrac <++ return Nothing
336 mExp <- lexExp <++ return Nothing
337 return (value xs mFrac mExp)
338 where
339 value xs mFrac mExp = valueFracExp (val 10 0 xs) mFrac mExp
340
341 valueFracExp :: Integer -> Maybe Digits -> Maybe Integer
342 -> Lexeme
343 valueFracExp a Nothing Nothing
344 = Int a -- 43
345 valueFracExp a Nothing (Just exp)
346 | exp >= 0 = Int (a * (10 ^ exp)) -- 43e7
347 | otherwise = Rat (valExp (fromInteger a) exp) -- 43e-7
348 valueFracExp a (Just fs) mExp
349 = case mExp of
350 Nothing -> Rat rat -- 4.3
351 Just exp -> Rat (valExp rat exp) -- 4.3e-4
352 where
353 rat :: Rational
354 rat = fromInteger a + frac 10 0 1 fs
355
356 valExp :: Rational -> Integer -> Rational
357 valExp rat exp = rat * (10 ^^ exp)
358
359 lexFrac :: ReadP (Maybe Digits)
360 -- Read the fractional part; fail if it doesn't
361 -- start ".d" where d is a digit
362 lexFrac = do char '.'
363 frac <- lexDigits 10
364 return (Just frac)
365
366 lexExp :: ReadP (Maybe Integer)
367 lexExp = do char 'e' +++ char 'E'
368 exp <- signedExp +++ lexInteger 10
369 return (Just exp)
370 where
371 signedExp
372 = do c <- char '-' +++ char '+'
373 n <- lexInteger 10
374 return (if c == '-' then -n else n)
375
376 lexDigits :: Int -> ReadP Digits
377 -- Lex a non-empty sequence of digits in specified base
378 lexDigits base =
379 do s <- look
380 xs <- scan s id
381 guard (not (null xs))
382 return xs
383 where
384 scan (c:cs) f = case valDig base c of
385 Just n -> do get; scan cs (f.(n:))
386 Nothing -> do return (f [])
387 scan [] f = do return (f [])
388
389 lexInteger :: Base -> ReadP Integer
390 lexInteger base =
391 do xs <- lexDigits base
392 return (val (fromIntegral base) 0 xs)
393
394 val :: Num a => a -> a -> Digits -> a
395 -- val base y [d1,..,dn] = y ++ [d1,..,dn], as it were
396 val base y [] = y
397 val base y (x:xs) = y' `seq` val base y' xs
398 where
399 y' = y * base + fromIntegral x
400
401 frac :: Integral a => a -> a -> a -> Digits -> Ratio a
402 frac base a b [] = a % b
403 frac base a b (x:xs) = a' `seq` b' `seq` frac base a' b' xs
404 where
405 a' = a * base + fromIntegral x
406 b' = b * base
407
408 valDig :: Num a => a -> Char -> Maybe Int
409 valDig 8 c
410 | '0' <= c && c <= '7' = Just (ord c - ord '0')
411 | otherwise = Nothing
412
413 valDig 10 c = valDecDig c
414
415 valDig 16 c
416 | '0' <= c && c <= '9' = Just (ord c - ord '0')
417 | 'a' <= c && c <= 'f' = Just (ord c - ord 'a' + 10)
418 | 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10)
419 | otherwise = Nothing
420
421 valDecDig c
422 | '0' <= c && c <= '9' = Just (ord c - ord '0')
423 | otherwise = Nothing
424
425 -- ----------------------------------------------------------------------
426 -- other numeric lexing functions
427
428 readIntP :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
429 readIntP base isDigit valDigit =
430 do s <- munch1 isDigit
431 return (val base 0 (map valDigit s))
432
433 readIntP' :: Num a => a -> ReadP a
434 readIntP' base = readIntP base isDigit valDigit
435 where
436 isDigit c = maybe False (const True) (valDig base c)
437 valDigit c = maybe 0 id (valDig base c)
438
439 readOctP, readDecP, readHexP :: Num a => ReadP a
440 readOctP = readIntP' 8
441 readDecP = readIntP' 10
442 readHexP = readIntP' 16