27bba5404e158530acabcad7fb3f799dc7f61129
[ghc.git] / libraries / base / Text / ParserCombinators / ReadP.hs
1 {-# LANGUAGE CPP, NoImplicitPrelude #-}
2 #ifndef __NHC__
3 {-# LANGUAGE Rank2Types #-}
4 #endif
5 #ifdef __GLASGOW_HASKELL__
6 {-# LANGUAGE MagicHash #-}
7 #endif
8
9 -----------------------------------------------------------------------------
10 -- |
11 -- Module : Text.ParserCombinators.ReadP
12 -- Copyright : (c) The University of Glasgow 2002
13 -- License : BSD-style (see the file libraries/base/LICENSE)
14 --
15 -- Maintainer : libraries@haskell.org
16 -- Stability : provisional
17 -- Portability : non-portable (local universal quantification)
18 --
19 -- This is a library of parser combinators, originally written by Koen Claessen.
20 -- It parses all alternatives in parallel, so it never keeps hold of
21 -- the beginning of the input string, a common source of space leaks with
22 -- other parsers. The '(+++)' choice combinator is genuinely commutative;
23 -- it makes no difference which branch is \"shorter\".
24
25 -----------------------------------------------------------------------------
26
27 module Text.ParserCombinators.ReadP
28 (
29 -- * The 'ReadP' type
30 #ifndef __NHC__
31 ReadP, -- :: * -> *; instance Functor, Monad, MonadPlus
32 #else
33 ReadPN, -- :: * -> * -> *; instance Functor, Monad, MonadPlus
34 #endif
35
36 -- * Primitive operations
37 get, -- :: ReadP Char
38 look, -- :: ReadP String
39 (+++), -- :: ReadP a -> ReadP a -> ReadP a
40 (<++), -- :: ReadP a -> ReadP a -> ReadP a
41 gather, -- :: ReadP a -> ReadP (String, a)
42
43 -- * Other operations
44 pfail, -- :: ReadP a
45 eof, -- :: ReadP ()
46 satisfy, -- :: (Char -> Bool) -> ReadP Char
47 char, -- :: Char -> ReadP Char
48 string, -- :: String -> ReadP String
49 munch, -- :: (Char -> Bool) -> ReadP String
50 munch1, -- :: (Char -> Bool) -> ReadP String
51 skipSpaces, -- :: ReadP ()
52 choice, -- :: [ReadP a] -> ReadP a
53 count, -- :: Int -> ReadP a -> ReadP [a]
54 between, -- :: ReadP open -> ReadP close -> ReadP a -> ReadP a
55 option, -- :: a -> ReadP a -> ReadP a
56 optional, -- :: ReadP a -> ReadP ()
57 many, -- :: ReadP a -> ReadP [a]
58 many1, -- :: ReadP a -> ReadP [a]
59 skipMany, -- :: ReadP a -> ReadP ()
60 skipMany1, -- :: ReadP a -> ReadP ()
61 sepBy, -- :: ReadP a -> ReadP sep -> ReadP [a]
62 sepBy1, -- :: ReadP a -> ReadP sep -> ReadP [a]
63 endBy, -- :: ReadP a -> ReadP sep -> ReadP [a]
64 endBy1, -- :: ReadP a -> ReadP sep -> ReadP [a]
65 chainr, -- :: ReadP a -> ReadP (a -> a -> a) -> a -> ReadP a
66 chainl, -- :: ReadP a -> ReadP (a -> a -> a) -> a -> ReadP a
67 chainl1, -- :: ReadP a -> ReadP (a -> a -> a) -> ReadP a
68 chainr1, -- :: ReadP a -> ReadP (a -> a -> a) -> ReadP a
69 manyTill, -- :: ReadP a -> ReadP end -> ReadP [a]
70
71 -- * Running a parser
72 ReadS, -- :: *; = String -> [(a,String)]
73 readP_to_S, -- :: ReadP a -> ReadS a
74 readS_to_P, -- :: ReadS a -> ReadP a
75
76 -- * Properties
77 -- $properties
78 )
79 where
80
81 import Control.Monad( MonadPlus(..), sequence, liftM2 )
82
83 #ifdef __GLASGOW_HASKELL__
84 #ifndef __HADDOCK__
85 import {-# SOURCE #-} GHC.Unicode ( isSpace )
86 #endif
87 import GHC.List ( replicate, null )
88 import GHC.Base
89 #else
90 import Data.Char( isSpace )
91 #endif
92
93 infixr 5 +++, <++
94
95 #ifdef __GLASGOW_HASKELL__
96 ------------------------------------------------------------------------
97 -- ReadS
98
99 -- | A parser for a type @a@, represented as a function that takes a
100 -- 'String' and returns a list of possible parses as @(a,'String')@ pairs.
101 --
102 -- Note that this kind of backtracking parser is very inefficient;
103 -- reading a large structure may be quite slow (cf 'ReadP').
104 type ReadS a = String -> [(a,String)]
105 #endif
106
107 -- ---------------------------------------------------------------------------
108 -- The P type
109 -- is representation type -- should be kept abstract
110
111 data P a
112 = Get (Char -> P a)
113 | Look (String -> P a)
114 | Fail
115 | Result a (P a)
116 | Final [(a,String)] -- invariant: list is non-empty!
117
118 -- Monad, MonadPlus
119
120 instance Monad P where
121 return x = Result x Fail
122
123 (Get f) >>= k = Get (\c -> f c >>= k)
124 (Look f) >>= k = Look (\s -> f s >>= k)
125 Fail >>= _ = Fail
126 (Result x p) >>= k = k x `mplus` (p >>= k)
127 (Final r) >>= k = final [ys' | (x,s) <- r, ys' <- run (k x) s]
128
129 fail _ = Fail
130
131 instance MonadPlus P where
132 mzero = Fail
133
134 -- most common case: two gets are combined
135 Get f1 `mplus` Get f2 = Get (\c -> f1 c `mplus` f2 c)
136
137 -- results are delivered as soon as possible
138 Result x p `mplus` q = Result x (p `mplus` q)
139 p `mplus` Result x q = Result x (p `mplus` q)
140
141 -- fail disappears
142 Fail `mplus` p = p
143 p `mplus` Fail = p
144
145 -- two finals are combined
146 -- final + look becomes one look and one final (=optimization)
147 -- final + sthg else becomes one look and one final
148 Final r `mplus` Final t = Final (r ++ t)
149 Final r `mplus` Look f = Look (\s -> Final (r ++ run (f s) s))
150 Final r `mplus` p = Look (\s -> Final (r ++ run p s))
151 Look f `mplus` Final r = Look (\s -> Final (run (f s) s ++ r))
152 p `mplus` Final r = Look (\s -> Final (run p s ++ r))
153
154 -- two looks are combined (=optimization)
155 -- look + sthg else floats upwards
156 Look f `mplus` Look g = Look (\s -> f s `mplus` g s)
157 Look f `mplus` p = Look (\s -> f s `mplus` p)
158 p `mplus` Look f = Look (\s -> p `mplus` f s)
159
160 -- ---------------------------------------------------------------------------
161 -- The ReadP type
162
163 #ifndef __NHC__
164 newtype ReadP a = R (forall b . (a -> P b) -> P b)
165 #else
166 #define ReadP (ReadPN b)
167 newtype ReadPN b a = R ((a -> P b) -> P b)
168 #endif
169
170 -- Functor, Monad, MonadPlus
171
172 instance Functor ReadP where
173 fmap h (R f) = R (\k -> f (k . h))
174
175 instance Monad ReadP where
176 return x = R (\k -> k x)
177 fail _ = R (\_ -> Fail)
178 R m >>= f = R (\k -> m (\a -> let R m' = f a in m' k))
179
180 instance MonadPlus ReadP where
181 mzero = pfail
182 mplus = (+++)
183
184 -- ---------------------------------------------------------------------------
185 -- Operations over P
186
187 final :: [(a,String)] -> P a
188 -- Maintains invariant for Final constructor
189 final [] = Fail
190 final r = Final r
191
192 run :: P a -> ReadS a
193 run (Get f) (c:s) = run (f c) s
194 run (Look f) s = run (f s) s
195 run (Result x p) s = (x,s) : run p s
196 run (Final r) _ = r
197 run _ _ = []
198
199 -- ---------------------------------------------------------------------------
200 -- Operations over ReadP
201
202 get :: ReadP Char
203 -- ^ Consumes and returns the next character.
204 -- Fails if there is no input left.
205 get = R Get
206
207 look :: ReadP String
208 -- ^ Look-ahead: returns the part of the input that is left, without
209 -- consuming it.
210 look = R Look
211
212 pfail :: ReadP a
213 -- ^ Always fails.
214 pfail = R (\_ -> Fail)
215
216 (+++) :: ReadP a -> ReadP a -> ReadP a
217 -- ^ Symmetric choice.
218 R f1 +++ R f2 = R (\k -> f1 k `mplus` f2 k)
219
220 #ifndef __NHC__
221 (<++) :: ReadP a -> ReadP a -> ReadP a
222 #else
223 (<++) :: ReadPN a a -> ReadPN a a -> ReadPN a a
224 #endif
225 -- ^ Local, exclusive, left-biased choice: If left parser
226 -- locally produces any result at all, then right parser is
227 -- not used.
228 #ifdef __GLASGOW_HASKELL__
229 R f0 <++ q =
230 do s <- look
231 probe (f0 return) s 0#
232 where
233 probe (Get f) (c:s) n = probe (f c) s (n+#1#)
234 probe (Look f) s n = probe (f s) s n
235 probe p@(Result _ _) _ n = discard n >> R (p >>=)
236 probe (Final r) _ _ = R (Final r >>=)
237 probe _ _ _ = q
238
239 discard 0# = return ()
240 discard n = get >> discard (n-#1#)
241 #else
242 R f <++ q =
243 do s <- look
244 probe (f return) s 0
245 where
246 probe (Get f) (c:s) n = probe (f c) s (n+1)
247 probe (Look f) s n = probe (f s) s n
248 probe p@(Result _ _) _ n = discard n >> R (p >>=)
249 probe (Final r) _ _ = R (Final r >>=)
250 probe _ _ _ = q
251
252 discard 0 = return ()
253 discard n = get >> discard (n-1)
254 #endif
255
256 #ifndef __NHC__
257 gather :: ReadP a -> ReadP (String, a)
258 #else
259 -- gather :: ReadPN (String->P b) a -> ReadPN (String->P b) (String, a)
260 #endif
261 -- ^ Transforms a parser into one that does the same, but
262 -- in addition returns the exact characters read.
263 -- IMPORTANT NOTE: 'gather' gives a runtime error if its first argument
264 -- is built using any occurrences of readS_to_P.
265 gather (R m)
266 = R (\k -> gath id (m (\a -> return (\s -> k (s,a)))))
267 where
268 gath :: (String -> String) -> P (String -> P b) -> P b
269 gath l (Get f) = Get (\c -> gath (l.(c:)) (f c))
270 gath _ Fail = Fail
271 gath l (Look f) = Look (\s -> gath l (f s))
272 gath l (Result k p) = k (l []) `mplus` gath l p
273 gath _ (Final _) = error "do not use readS_to_P in gather!"
274
275 -- ---------------------------------------------------------------------------
276 -- Derived operations
277
278 satisfy :: (Char -> Bool) -> ReadP Char
279 -- ^ Consumes and returns the next character, if it satisfies the
280 -- specified predicate.
281 satisfy p = do c <- get; if p c then return c else pfail
282
283 char :: Char -> ReadP Char
284 -- ^ Parses and returns the specified character.
285 char c = satisfy (c ==)
286
287 eof :: ReadP ()
288 -- ^ Succeeds iff we are at the end of input
289 eof = do { s <- look
290 ; if null s then return ()
291 else pfail }
292
293 string :: String -> ReadP String
294 -- ^ Parses and returns the specified string.
295 string this = do s <- look; scan this s
296 where
297 scan [] _ = do return this
298 scan (x:xs) (y:ys) | x == y = do _ <- get; scan xs ys
299 scan _ _ = do pfail
300
301 munch :: (Char -> Bool) -> ReadP String
302 -- ^ Parses the first zero or more characters satisfying the predicate.
303 -- Always succeds, exactly once having consumed all the characters
304 -- Hence NOT the same as (many (satisfy p))
305 munch p =
306 do s <- look
307 scan s
308 where
309 scan (c:cs) | p c = do _ <- get; s <- scan cs; return (c:s)
310 scan _ = do return ""
311
312 munch1 :: (Char -> Bool) -> ReadP String
313 -- ^ Parses the first one or more characters satisfying the predicate.
314 -- Fails if none, else succeeds exactly once having consumed all the characters
315 -- Hence NOT the same as (many1 (satisfy p))
316 munch1 p =
317 do c <- get
318 if p c then do s <- munch p; return (c:s)
319 else pfail
320
321 choice :: [ReadP a] -> ReadP a
322 -- ^ Combines all parsers in the specified list.
323 choice [] = pfail
324 choice [p] = p
325 choice (p:ps) = p +++ choice ps
326
327 skipSpaces :: ReadP ()
328 -- ^ Skips all whitespace.
329 skipSpaces =
330 do s <- look
331 skip s
332 where
333 skip (c:s) | isSpace c = do _ <- get; skip s
334 skip _ = do return ()
335
336 count :: Int -> ReadP a -> ReadP [a]
337 -- ^ @count n p@ parses @n@ occurrences of @p@ in sequence. A list of
338 -- results is returned.
339 count n p = sequence (replicate n p)
340
341 between :: ReadP open -> ReadP close -> ReadP a -> ReadP a
342 -- ^ @between open close p@ parses @open@, followed by @p@ and finally
343 -- @close@. Only the value of @p@ is returned.
344 between open close p = do _ <- open
345 x <- p
346 _ <- close
347 return x
348
349 option :: a -> ReadP a -> ReadP a
350 -- ^ @option x p@ will either parse @p@ or return @x@ without consuming
351 -- any input.
352 option x p = p +++ return x
353
354 optional :: ReadP a -> ReadP ()
355 -- ^ @optional p@ optionally parses @p@ and always returns @()@.
356 optional p = (p >> return ()) +++ return ()
357
358 many :: ReadP a -> ReadP [a]
359 -- ^ Parses zero or more occurrences of the given parser.
360 many p = return [] +++ many1 p
361
362 many1 :: ReadP a -> ReadP [a]
363 -- ^ Parses one or more occurrences of the given parser.
364 many1 p = liftM2 (:) p (many p)
365
366 skipMany :: ReadP a -> ReadP ()
367 -- ^ Like 'many', but discards the result.
368 skipMany p = many p >> return ()
369
370 skipMany1 :: ReadP a -> ReadP ()
371 -- ^ Like 'many1', but discards the result.
372 skipMany1 p = p >> skipMany p
373
374 sepBy :: ReadP a -> ReadP sep -> ReadP [a]
375 -- ^ @sepBy p sep@ parses zero or more occurrences of @p@, separated by @sep@.
376 -- Returns a list of values returned by @p@.
377 sepBy p sep = sepBy1 p sep +++ return []
378
379 sepBy1 :: ReadP a -> ReadP sep -> ReadP [a]
380 -- ^ @sepBy1 p sep@ parses one or more occurrences of @p@, separated by @sep@.
381 -- Returns a list of values returned by @p@.
382 sepBy1 p sep = liftM2 (:) p (many (sep >> p))
383
384 endBy :: ReadP a -> ReadP sep -> ReadP [a]
385 -- ^ @endBy p sep@ parses zero or more occurrences of @p@, separated and ended
386 -- by @sep@.
387 endBy p sep = many (do x <- p ; _ <- sep ; return x)
388
389 endBy1 :: ReadP a -> ReadP sep -> ReadP [a]
390 -- ^ @endBy p sep@ parses one or more occurrences of @p@, separated and ended
391 -- by @sep@.
392 endBy1 p sep = many1 (do x <- p ; _ <- sep ; return x)
393
394 chainr :: ReadP a -> ReadP (a -> a -> a) -> a -> ReadP a
395 -- ^ @chainr p op x@ parses zero or more occurrences of @p@, separated by @op@.
396 -- Returns a value produced by a /right/ associative application of all
397 -- functions returned by @op@. If there are no occurrences of @p@, @x@ is
398 -- returned.
399 chainr p op x = chainr1 p op +++ return x
400
401 chainl :: ReadP a -> ReadP (a -> a -> a) -> a -> ReadP a
402 -- ^ @chainl p op x@ parses zero or more occurrences of @p@, separated by @op@.
403 -- Returns a value produced by a /left/ associative application of all
404 -- functions returned by @op@. If there are no occurrences of @p@, @x@ is
405 -- returned.
406 chainl p op x = chainl1 p op +++ return x
407
408 chainr1 :: ReadP a -> ReadP (a -> a -> a) -> ReadP a
409 -- ^ Like 'chainr', but parses one or more occurrences of @p@.
410 chainr1 p op = scan
411 where scan = p >>= rest
412 rest x = do f <- op
413 y <- scan
414 return (f x y)
415 +++ return x
416
417 chainl1 :: ReadP a -> ReadP (a -> a -> a) -> ReadP a
418 -- ^ Like 'chainl', but parses one or more occurrences of @p@.
419 chainl1 p op = p >>= rest
420 where rest x = do f <- op
421 y <- p
422 rest (f x y)
423 +++ return x
424
425 #ifndef __NHC__
426 manyTill :: ReadP a -> ReadP end -> ReadP [a]
427 #else
428 manyTill :: ReadPN [a] a -> ReadPN [a] end -> ReadPN [a] [a]
429 #endif
430 -- ^ @manyTill p end@ parses zero or more occurrences of @p@, until @end@
431 -- succeeds. Returns a list of values returned by @p@.
432 manyTill p end = scan
433 where scan = (end >> return []) <++ (liftM2 (:) p scan)
434
435 -- ---------------------------------------------------------------------------
436 -- Converting between ReadP and Read
437
438 #ifndef __NHC__
439 readP_to_S :: ReadP a -> ReadS a
440 #else
441 readP_to_S :: ReadPN a a -> ReadS a
442 #endif
443 -- ^ Converts a parser into a Haskell ReadS-style function.
444 -- This is the main way in which you can \"run\" a 'ReadP' parser:
445 -- the expanded type is
446 -- @ readP_to_S :: ReadP a -> String -> [(a,String)] @
447 readP_to_S (R f) = run (f return)
448
449 readS_to_P :: ReadS a -> ReadP a
450 -- ^ Converts a Haskell ReadS-style function into a parser.
451 -- Warning: This introduces local backtracking in the resulting
452 -- parser, and therefore a possible inefficiency.
453 readS_to_P r =
454 R (\k -> Look (\s -> final [bs'' | (a,s') <- r s, bs'' <- run (k a) s']))
455
456 -- ---------------------------------------------------------------------------
457 -- QuickCheck properties that hold for the combinators
458
459 {- $properties
460 The following are QuickCheck specifications of what the combinators do.
461 These can be seen as formal specifications of the behavior of the
462 combinators.
463
464 We use bags to give semantics to the combinators.
465
466 > type Bag a = [a]
467
468 Equality on bags does not care about the order of elements.
469
470 > (=~) :: Ord a => Bag a -> Bag a -> Bool
471 > xs =~ ys = sort xs == sort ys
472
473 A special equality operator to avoid unresolved overloading
474 when testing the properties.
475
476 > (=~.) :: Bag (Int,String) -> Bag (Int,String) -> Bool
477 > (=~.) = (=~)
478
479 Here follow the properties:
480
481 > prop_Get_Nil =
482 > readP_to_S get [] =~ []
483 >
484 > prop_Get_Cons c s =
485 > readP_to_S get (c:s) =~ [(c,s)]
486 >
487 > prop_Look s =
488 > readP_to_S look s =~ [(s,s)]
489 >
490 > prop_Fail s =
491 > readP_to_S pfail s =~. []
492 >
493 > prop_Return x s =
494 > readP_to_S (return x) s =~. [(x,s)]
495 >
496 > prop_Bind p k s =
497 > readP_to_S (p >>= k) s =~.
498 > [ ys''
499 > | (x,s') <- readP_to_S p s
500 > , ys'' <- readP_to_S (k (x::Int)) s'
501 > ]
502 >
503 > prop_Plus p q s =
504 > readP_to_S (p +++ q) s =~.
505 > (readP_to_S p s ++ readP_to_S q s)
506 >
507 > prop_LeftPlus p q s =
508 > readP_to_S (p <++ q) s =~.
509 > (readP_to_S p s +<+ readP_to_S q s)
510 > where
511 > [] +<+ ys = ys
512 > xs +<+ _ = xs
513 >
514 > prop_Gather s =
515 > forAll readPWithoutReadS $ \p ->
516 > readP_to_S (gather p) s =~
517 > [ ((pre,x::Int),s')
518 > | (x,s') <- readP_to_S p s
519 > , let pre = take (length s - length s') s
520 > ]
521 >
522 > prop_String_Yes this s =
523 > readP_to_S (string this) (this ++ s) =~
524 > [(this,s)]
525 >
526 > prop_String_Maybe this s =
527 > readP_to_S (string this) s =~
528 > [(this, drop (length this) s) | this `isPrefixOf` s]
529 >
530 > prop_Munch p s =
531 > readP_to_S (munch p) s =~
532 > [(takeWhile p s, dropWhile p s)]
533 >
534 > prop_Munch1 p s =
535 > readP_to_S (munch1 p) s =~
536 > [(res,s') | let (res,s') = (takeWhile p s, dropWhile p s), not (null res)]
537 >
538 > prop_Choice ps s =
539 > readP_to_S (choice ps) s =~.
540 > readP_to_S (foldr (+++) pfail ps) s
541 >
542 > prop_ReadS r s =
543 > readP_to_S (readS_to_P r) s =~. r s
544 -}