a0e6e2206249bad4dc4e6c5f550548d4e09e008f
[ghc.git] / libraries / base / Text / ParserCombinators / ReadP.hs
1 {-# LANGUAGE Trustworthy #-}
2 {-# LANGUAGE NoImplicitPrelude #-}
3 {-# LANGUAGE RankNTypes #-}
4 {-# LANGUAGE MagicHash #-}
5
6 -----------------------------------------------------------------------------
7 -- |
8 -- Module : Text.ParserCombinators.ReadP
9 -- Copyright : (c) The University of Glasgow 2002
10 -- License : BSD-style (see the file libraries/base/LICENSE)
11 --
12 -- Maintainer : libraries@haskell.org
13 -- Stability : provisional
14 -- Portability : non-portable (local universal quantification)
15 --
16 -- This is a library of parser combinators, originally written by Koen Claessen.
17 -- It parses all alternatives in parallel, so it never keeps hold of
18 -- the beginning of the input string, a common source of space leaks with
19 -- other parsers. The '(+++)' choice combinator is genuinely commutative;
20 -- it makes no difference which branch is \"shorter\".
21
22 -----------------------------------------------------------------------------
23
24 module Text.ParserCombinators.ReadP
25 (
26 -- * The 'ReadP' type
27 ReadP,
28
29 -- * Primitive operations
30 get,
31 look,
32 (+++),
33 (<++),
34 gather,
35
36 -- * Other operations
37 pfail,
38 eof,
39 satisfy,
40 char,
41 string,
42 munch,
43 munch1,
44 skipSpaces,
45 choice,
46 count,
47 between,
48 option,
49 optional,
50 many,
51 many1,
52 skipMany,
53 skipMany1,
54 sepBy,
55 sepBy1,
56 endBy,
57 endBy1,
58 chainr,
59 chainl,
60 chainl1,
61 chainr1,
62 manyTill,
63
64 -- * Running a parser
65 ReadS,
66 readP_to_S,
67 readS_to_P,
68
69 -- * Properties
70 -- $properties
71 )
72 where
73
74 import Control.Monad( MonadPlus(..), sequence, liftM2 )
75
76 import {-# SOURCE #-} GHC.Unicode ( isSpace )
77 import GHC.List ( replicate, null )
78 import GHC.Base
79
80 infixr 5 +++, <++
81
82 ------------------------------------------------------------------------
83 -- ReadS
84
85 -- | A parser for a type @a@, represented as a function that takes a
86 -- 'String' and returns a list of possible parses as @(a,'String')@ pairs.
87 --
88 -- Note that this kind of backtracking parser is very inefficient;
89 -- reading a large structure may be quite slow (cf 'ReadP').
90 type ReadS a = String -> [(a,String)]
91
92 -- ---------------------------------------------------------------------------
93 -- The P type
94 -- is representation type -- should be kept abstract
95
96 data P a
97 = Get (Char -> P a)
98 | Look (String -> P a)
99 | Fail
100 | Result a (P a)
101 | Final [(a,String)] -- invariant: list is non-empty!
102
103 -- Monad, MonadPlus
104
105 instance Monad P where
106 return x = Result x Fail
107
108 (Get f) >>= k = Get (\c -> f c >>= k)
109 (Look f) >>= k = Look (\s -> f s >>= k)
110 Fail >>= _ = Fail
111 (Result x p) >>= k = k x `mplus` (p >>= k)
112 (Final r) >>= k = final [ys' | (x,s) <- r, ys' <- run (k x) s]
113
114 fail _ = Fail
115
116 instance MonadPlus P where
117 mzero = Fail
118
119 -- most common case: two gets are combined
120 Get f1 `mplus` Get f2 = Get (\c -> f1 c `mplus` f2 c)
121
122 -- results are delivered as soon as possible
123 Result x p `mplus` q = Result x (p `mplus` q)
124 p `mplus` Result x q = Result x (p `mplus` q)
125
126 -- fail disappears
127 Fail `mplus` p = p
128 p `mplus` Fail = p
129
130 -- two finals are combined
131 -- final + look becomes one look and one final (=optimization)
132 -- final + sthg else becomes one look and one final
133 Final r `mplus` Final t = Final (r ++ t)
134 Final r `mplus` Look f = Look (\s -> Final (r ++ run (f s) s))
135 Final r `mplus` p = Look (\s -> Final (r ++ run p s))
136 Look f `mplus` Final r = Look (\s -> Final (run (f s) s ++ r))
137 p `mplus` Final r = Look (\s -> Final (run p s ++ r))
138
139 -- two looks are combined (=optimization)
140 -- look + sthg else floats upwards
141 Look f `mplus` Look g = Look (\s -> f s `mplus` g s)
142 Look f `mplus` p = Look (\s -> f s `mplus` p)
143 p `mplus` Look f = Look (\s -> p `mplus` f s)
144
145 -- ---------------------------------------------------------------------------
146 -- The ReadP type
147
148 newtype ReadP a = R (forall b . (a -> P b) -> P b)
149
150 -- Functor, Monad, MonadPlus
151
152 instance Functor ReadP where
153 fmap h (R f) = R (\k -> f (k . h))
154
155 instance Monad ReadP where
156 return x = R (\k -> k x)
157 fail _ = R (\_ -> Fail)
158 R m >>= f = R (\k -> m (\a -> let R m' = f a in m' k))
159
160 instance MonadPlus ReadP where
161 mzero = pfail
162 mplus = (+++)
163
164 -- ---------------------------------------------------------------------------
165 -- Operations over P
166
167 final :: [(a,String)] -> P a
168 -- Maintains invariant for Final constructor
169 final [] = Fail
170 final r = Final r
171
172 run :: P a -> ReadS a
173 run (Get f) (c:s) = run (f c) s
174 run (Look f) s = run (f s) s
175 run (Result x p) s = (x,s) : run p s
176 run (Final r) _ = r
177 run _ _ = []
178
179 -- ---------------------------------------------------------------------------
180 -- Operations over ReadP
181
182 get :: ReadP Char
183 -- ^ Consumes and returns the next character.
184 -- Fails if there is no input left.
185 get = R Get
186
187 look :: ReadP String
188 -- ^ Look-ahead: returns the part of the input that is left, without
189 -- consuming it.
190 look = R Look
191
192 pfail :: ReadP a
193 -- ^ Always fails.
194 pfail = R (\_ -> Fail)
195
196 (+++) :: ReadP a -> ReadP a -> ReadP a
197 -- ^ Symmetric choice.
198 R f1 +++ R f2 = R (\k -> f1 k `mplus` f2 k)
199
200 (<++) :: ReadP a -> ReadP a -> ReadP a
201 -- ^ Local, exclusive, left-biased choice: If left parser
202 -- locally produces any result at all, then right parser is
203 -- not used.
204 R f0 <++ q =
205 do s <- look
206 probe (f0 return) s 0#
207 where
208 probe (Get f) (c:s) n = probe (f c) s (n+#1#)
209 probe (Look f) s n = probe (f s) s n
210 probe p@(Result _ _) _ n = discard n >> R (p >>=)
211 probe (Final r) _ _ = R (Final r >>=)
212 probe _ _ _ = q
213
214 discard 0# = return ()
215 discard n = get >> discard (n-#1#)
216
217 gather :: ReadP a -> ReadP (String, a)
218 -- ^ Transforms a parser into one that does the same, but
219 -- in addition returns the exact characters read.
220 -- IMPORTANT NOTE: 'gather' gives a runtime error if its first argument
221 -- is built using any occurrences of readS_to_P.
222 gather (R m)
223 = R (\k -> gath id (m (\a -> return (\s -> k (s,a)))))
224 where
225 gath :: (String -> String) -> P (String -> P b) -> P b
226 gath l (Get f) = Get (\c -> gath (l.(c:)) (f c))
227 gath _ Fail = Fail
228 gath l (Look f) = Look (\s -> gath l (f s))
229 gath l (Result k p) = k (l []) `mplus` gath l p
230 gath _ (Final _) = error "do not use readS_to_P in gather!"
231
232 -- ---------------------------------------------------------------------------
233 -- Derived operations
234
235 satisfy :: (Char -> Bool) -> ReadP Char
236 -- ^ Consumes and returns the next character, if it satisfies the
237 -- specified predicate.
238 satisfy p = do c <- get; if p c then return c else pfail
239
240 char :: Char -> ReadP Char
241 -- ^ Parses and returns the specified character.
242 char c = satisfy (c ==)
243
244 eof :: ReadP ()
245 -- ^ Succeeds iff we are at the end of input
246 eof = do { s <- look
247 ; if null s then return ()
248 else pfail }
249
250 string :: String -> ReadP String
251 -- ^ Parses and returns the specified string.
252 string this = do s <- look; scan this s
253 where
254 scan [] _ = do return this
255 scan (x:xs) (y:ys) | x == y = do _ <- get; scan xs ys
256 scan _ _ = do pfail
257
258 munch :: (Char -> Bool) -> ReadP String
259 -- ^ Parses the first zero or more characters satisfying the predicate.
260 -- Always succeds, exactly once having consumed all the characters
261 -- Hence NOT the same as (many (satisfy p))
262 munch p =
263 do s <- look
264 scan s
265 where
266 scan (c:cs) | p c = do _ <- get; s <- scan cs; return (c:s)
267 scan _ = do return ""
268
269 munch1 :: (Char -> Bool) -> ReadP String
270 -- ^ Parses the first one or more characters satisfying the predicate.
271 -- Fails if none, else succeeds exactly once having consumed all the characters
272 -- Hence NOT the same as (many1 (satisfy p))
273 munch1 p =
274 do c <- get
275 if p c then do s <- munch p; return (c:s)
276 else pfail
277
278 choice :: [ReadP a] -> ReadP a
279 -- ^ Combines all parsers in the specified list.
280 choice [] = pfail
281 choice [p] = p
282 choice (p:ps) = p +++ choice ps
283
284 skipSpaces :: ReadP ()
285 -- ^ Skips all whitespace.
286 skipSpaces =
287 do s <- look
288 skip s
289 where
290 skip (c:s) | isSpace c = do _ <- get; skip s
291 skip _ = do return ()
292
293 count :: Int -> ReadP a -> ReadP [a]
294 -- ^ @count n p@ parses @n@ occurrences of @p@ in sequence. A list of
295 -- results is returned.
296 count n p = sequence (replicate n p)
297
298 between :: ReadP open -> ReadP close -> ReadP a -> ReadP a
299 -- ^ @between open close p@ parses @open@, followed by @p@ and finally
300 -- @close@. Only the value of @p@ is returned.
301 between open close p = do _ <- open
302 x <- p
303 _ <- close
304 return x
305
306 option :: a -> ReadP a -> ReadP a
307 -- ^ @option x p@ will either parse @p@ or return @x@ without consuming
308 -- any input.
309 option x p = p +++ return x
310
311 optional :: ReadP a -> ReadP ()
312 -- ^ @optional p@ optionally parses @p@ and always returns @()@.
313 optional p = (p >> return ()) +++ return ()
314
315 many :: ReadP a -> ReadP [a]
316 -- ^ Parses zero or more occurrences of the given parser.
317 many p = return [] +++ many1 p
318
319 many1 :: ReadP a -> ReadP [a]
320 -- ^ Parses one or more occurrences of the given parser.
321 many1 p = liftM2 (:) p (many p)
322
323 skipMany :: ReadP a -> ReadP ()
324 -- ^ Like 'many', but discards the result.
325 skipMany p = many p >> return ()
326
327 skipMany1 :: ReadP a -> ReadP ()
328 -- ^ Like 'many1', but discards the result.
329 skipMany1 p = p >> skipMany p
330
331 sepBy :: ReadP a -> ReadP sep -> ReadP [a]
332 -- ^ @sepBy p sep@ parses zero or more occurrences of @p@, separated by @sep@.
333 -- Returns a list of values returned by @p@.
334 sepBy p sep = sepBy1 p sep +++ return []
335
336 sepBy1 :: ReadP a -> ReadP sep -> ReadP [a]
337 -- ^ @sepBy1 p sep@ parses one or more occurrences of @p@, separated by @sep@.
338 -- Returns a list of values returned by @p@.
339 sepBy1 p sep = liftM2 (:) p (many (sep >> p))
340
341 endBy :: ReadP a -> ReadP sep -> ReadP [a]
342 -- ^ @endBy p sep@ parses zero or more occurrences of @p@, separated and ended
343 -- by @sep@.
344 endBy p sep = many (do x <- p ; _ <- sep ; return x)
345
346 endBy1 :: ReadP a -> ReadP sep -> ReadP [a]
347 -- ^ @endBy p sep@ parses one or more occurrences of @p@, separated and ended
348 -- by @sep@.
349 endBy1 p sep = many1 (do x <- p ; _ <- sep ; return x)
350
351 chainr :: ReadP a -> ReadP (a -> a -> a) -> a -> ReadP a
352 -- ^ @chainr p op x@ parses zero or more occurrences of @p@, separated by @op@.
353 -- Returns a value produced by a /right/ associative application of all
354 -- functions returned by @op@. If there are no occurrences of @p@, @x@ is
355 -- returned.
356 chainr p op x = chainr1 p op +++ return x
357
358 chainl :: ReadP a -> ReadP (a -> a -> a) -> a -> ReadP a
359 -- ^ @chainl p op x@ parses zero or more occurrences of @p@, separated by @op@.
360 -- Returns a value produced by a /left/ associative application of all
361 -- functions returned by @op@. If there are no occurrences of @p@, @x@ is
362 -- returned.
363 chainl p op x = chainl1 p op +++ return x
364
365 chainr1 :: ReadP a -> ReadP (a -> a -> a) -> ReadP a
366 -- ^ Like 'chainr', but parses one or more occurrences of @p@.
367 chainr1 p op = scan
368 where scan = p >>= rest
369 rest x = do f <- op
370 y <- scan
371 return (f x y)
372 +++ return x
373
374 chainl1 :: ReadP a -> ReadP (a -> a -> a) -> ReadP a
375 -- ^ Like 'chainl', but parses one or more occurrences of @p@.
376 chainl1 p op = p >>= rest
377 where rest x = do f <- op
378 y <- p
379 rest (f x y)
380 +++ return x
381
382 manyTill :: ReadP a -> ReadP end -> ReadP [a]
383 -- ^ @manyTill p end@ parses zero or more occurrences of @p@, until @end@
384 -- succeeds. Returns a list of values returned by @p@.
385 manyTill p end = scan
386 where scan = (end >> return []) <++ (liftM2 (:) p scan)
387
388 -- ---------------------------------------------------------------------------
389 -- Converting between ReadP and Read
390
391 readP_to_S :: ReadP a -> ReadS a
392 -- ^ Converts a parser into a Haskell ReadS-style function.
393 -- This is the main way in which you can \"run\" a 'ReadP' parser:
394 -- the expanded type is
395 -- @ readP_to_S :: ReadP a -> String -> [(a,String)] @
396 readP_to_S (R f) = run (f return)
397
398 readS_to_P :: ReadS a -> ReadP a
399 -- ^ Converts a Haskell ReadS-style function into a parser.
400 -- Warning: This introduces local backtracking in the resulting
401 -- parser, and therefore a possible inefficiency.
402 readS_to_P r =
403 R (\k -> Look (\s -> final [bs'' | (a,s') <- r s, bs'' <- run (k a) s']))
404
405 -- ---------------------------------------------------------------------------
406 -- QuickCheck properties that hold for the combinators
407
408 {- $properties
409 The following are QuickCheck specifications of what the combinators do.
410 These can be seen as formal specifications of the behavior of the
411 combinators.
412
413 We use bags to give semantics to the combinators.
414
415 > type Bag a = [a]
416
417 Equality on bags does not care about the order of elements.
418
419 > (=~) :: Ord a => Bag a -> Bag a -> Bool
420 > xs =~ ys = sort xs == sort ys
421
422 A special equality operator to avoid unresolved overloading
423 when testing the properties.
424
425 > (=~.) :: Bag (Int,String) -> Bag (Int,String) -> Bool
426 > (=~.) = (=~)
427
428 Here follow the properties:
429
430 > prop_Get_Nil =
431 > readP_to_S get [] =~ []
432 >
433 > prop_Get_Cons c s =
434 > readP_to_S get (c:s) =~ [(c,s)]
435 >
436 > prop_Look s =
437 > readP_to_S look s =~ [(s,s)]
438 >
439 > prop_Fail s =
440 > readP_to_S pfail s =~. []
441 >
442 > prop_Return x s =
443 > readP_to_S (return x) s =~. [(x,s)]
444 >
445 > prop_Bind p k s =
446 > readP_to_S (p >>= k) s =~.
447 > [ ys''
448 > | (x,s') <- readP_to_S p s
449 > , ys'' <- readP_to_S (k (x::Int)) s'
450 > ]
451 >
452 > prop_Plus p q s =
453 > readP_to_S (p +++ q) s =~.
454 > (readP_to_S p s ++ readP_to_S q s)
455 >
456 > prop_LeftPlus p q s =
457 > readP_to_S (p <++ q) s =~.
458 > (readP_to_S p s +<+ readP_to_S q s)
459 > where
460 > [] +<+ ys = ys
461 > xs +<+ _ = xs
462 >
463 > prop_Gather s =
464 > forAll readPWithoutReadS $ \p ->
465 > readP_to_S (gather p) s =~
466 > [ ((pre,x::Int),s')
467 > | (x,s') <- readP_to_S p s
468 > , let pre = take (length s - length s') s
469 > ]
470 >
471 > prop_String_Yes this s =
472 > readP_to_S (string this) (this ++ s) =~
473 > [(this,s)]
474 >
475 > prop_String_Maybe this s =
476 > readP_to_S (string this) s =~
477 > [(this, drop (length this) s) | this `isPrefixOf` s]
478 >
479 > prop_Munch p s =
480 > readP_to_S (munch p) s =~
481 > [(takeWhile p s, dropWhile p s)]
482 >
483 > prop_Munch1 p s =
484 > readP_to_S (munch1 p) s =~
485 > [(res,s') | let (res,s') = (takeWhile p s, dropWhile p s), not (null res)]
486 >
487 > prop_Choice ps s =
488 > readP_to_S (choice ps) s =~.
489 > readP_to_S (foldr (+++) pfail ps) s
490 >
491 > prop_ReadS r s =
492 > readP_to_S (readS_to_P r) s =~. r s
493 -}
494