missing case in parserPlus
[packages/parsec.git] / Text / Parsec / Prim.hs
1 -----------------------------------------------------------------------------
2 -- |
3 -- Module : Text.Parsec.Prim
4 -- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007
5 -- License : BSD-style (see the LICENSE file)
6 --
7 -- Maintainer : derek.a.elkins@gmail.com
8 -- Stability : provisional
9 -- Portability : portable
10 --
11 -- The primitive parser combinators.
12 --
13 -----------------------------------------------------------------------------
14
15 {-# LANGUAGE MultiParamTypeClasses, FunctionalDependencies, FlexibleContexts #-}
16
17 module Text.Parsec.Prim where
18
19 import qualified Control.Applicative as Applicative ( Applicative(..), Alternative(..) )
20 import Control.Monad
21 import Control.Monad.Trans
22 import Control.Monad.Identity
23
24 import Text.Parsec.Pos
25 import Text.Parsec.Error
26
27 unknownError state = newErrorUnknown (statePos state)
28
29 sysUnExpectError msg pos = Error (newErrorMessage (SysUnExpect msg) pos)
30
31 -- | The parser @unexpected msg@ always fails with an unexpected error
32 -- message @msg@ without consuming any input.
33 --
34 -- The parsers 'fail', ('<?>') and @unexpected@ are the three parsers
35 -- used to generate error messages. Of these, only ('<?>') is commonly
36 -- used. For an example of the use of @unexpected@, see the definition
37 -- of 'Text.Parsec.Combinator.notFollowedBy'.
38
39 unexpected :: (Stream s m t) => String -> ParsecT s u m a
40 unexpected msg
41 = ParsecT $ \s -> return $ Empty $ return $
42 Error (newErrorMessage (UnExpect msg) (statePos s))
43
44 -- | ParserT monad transformer and Parser type
45
46 -- | @ParsecT s u m a@ is a parser with stream type @s@, user state type @u@,
47 -- underlying monad @m@ and return type @a@
48
49 data ParsecT s u m a
50 = ParsecT { runParsecT :: State s u -> m (Consumed (m (Reply s u a))) }
51
52 type Parsec s u a = ParsecT s u Identity a
53
54 data Consumed a = Consumed a
55 | Empty !a
56
57 data Reply s u a = Ok !a !(State s u) ParseError
58 | Error ParseError
59
60 data State s u = State {
61 stateInput :: s,
62 statePos :: !SourcePos,
63 stateUser :: !u
64 }
65
66 instance Functor Consumed where
67 fmap f (Consumed x) = Consumed (f x)
68 fmap f (Empty x) = Empty (f x)
69
70 instance Functor (Reply s u) where
71 fmap f (Ok x s e) = Ok (f x) s e
72 fmap f (Error e) = Error e -- XXX
73
74 instance (Monad m) => Functor (ParsecT s u m) where
75 fmap f p = parsecMap f p
76
77 parsecMap :: (Monad m) => (a -> b) -> ParsecT s u m a -> ParsecT s u m b
78 parsecMap f p
79 = ParsecT $ \s -> liftM (fmap (liftM (fmap f))) (runParsecT p s)
80
81 instance (Monad m) => Applicative.Applicative (ParsecT s u m) where
82 pure = return
83 (<*>) = ap -- TODO: Can this be optimized?
84
85 instance (Monad m) => Applicative.Alternative (ParsecT s u m) where
86 empty = mzero
87 (<|>) = mplus
88
89 instance (Monad m) => Monad (ParsecT s u m) where
90 return x = parserReturn x
91 p >>= f = parserBind p f
92 fail msg = parserFail msg
93
94 parserReturn :: (Monad m) => a -> ParsecT s u m a
95 parserReturn x
96 = ParsecT $ \s -> return $ Empty $ return (Ok x s (unknownError s))
97
98 parserBind :: (Monad m)
99 => ParsecT s u m a -> (a -> ParsecT s u m b) -> ParsecT s u m b
100
101 parserBind p f
102 = ParsecT $ \s@(State _ u _) -> do
103 res1 <- runParsecT p s
104 case res1 of
105
106 Empty mReply1
107 -> do reply1 <- mReply1
108 case reply1 of
109 Ok x s' err1 -> do
110 res2 <- runParsecT (f x) s'
111 case res2 of
112 Empty mReply2
113 -> do reply2 <- mReply2
114 return $ Empty $
115 return $ mergeErrorReply err1 reply2
116 other
117 -> do return $ other
118 Error err1 -> return $ Empty $ return $ Error err1
119
120 Consumed mReply1
121 -> do reply1 <- mReply1
122 return $ Consumed $ -- `early' returning
123 case reply1 of
124 Ok x s' err1 -> do
125 res2 <- runParsecT (f x) s'
126 case res2 of
127 Empty mReply2
128 -> do reply2 <- mReply2
129 return $ mergeErrorReply err1 reply2
130 Consumed reply2 -> reply2
131 Error err1 -> return $ Error err1
132
133
134 mergeErrorReply err1 reply -- XXX where to put it?
135 = case reply of
136 Ok x state err2 -> Ok x state (mergeError err1 err2)
137 Error err2 -> Error (mergeError err1 err2)
138
139 parserFail :: (Monad m) => String -> ParsecT s u m a
140 parserFail msg
141 = ParsecT $ \s -> return $ Empty $ return $
142 Error (newErrorMessage (Message msg) (statePos s))
143
144 instance (Monad m) => MonadPlus (ParsecT s u m) where
145 mzero = parserZero
146 mplus p1 p2 = parserPlus p1 p2
147
148 -- | @parserZero@ always fails without consuming any input. @parserZero@ is defined
149 -- equal to the 'mzero' member of the 'MonadPlus' class and to the 'Control.Applicative.empty' member
150 -- of the 'Control.Applicative.Applicative' class.
151
152 parserZero :: (Monad m) => ParsecT s u m a
153 parserZero
154 = ParsecT $ \s -> return $ Empty $ return $ Error (unknownError s)
155
156 parserPlus :: (Monad m)
157 => ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
158 parserPlus (ParsecT p1) (ParsecT p2)
159 = ParsecT $ \s -> do
160 c1 <- p1 s
161 case c1 of
162 Empty mReply1
163 -> do r1 <- mReply1
164 case r1 of
165 Error err -> do
166 c2 <- p2 s
167 case c2 of
168 Empty mReply2
169 -> do reply2 <- mReply2
170 return $ Empty $ return (mergeErrorReply err reply2)
171 consumed
172 -> return $ consumed
173 other -> return $ Empty $ return $ other
174 other -> return $ other
175
176 instance MonadTrans (ParsecT s u) where
177 lift amb = ParsecT $ \s -> do
178 a <- amb
179 return $ Empty $ return $ Ok a s (unknownError s)
180
181 infix 0 <?>
182 infixr 1 <|>
183
184 -- | The parser @p <?> msg@ behaves as parser @p@, but whenever the
185 -- parser @p@ fails /without consuming any input/, it replaces expect
186 -- error messages with the expect error message @msg@.
187 --
188 -- This is normally used at the end of a set alternatives where we want
189 -- to return an error message in terms of a higher level construct
190 -- rather than returning all possible characters. For example, if the
191 -- @expr@ parser from the 'try' example would fail, the error
192 -- message is: '...: expecting expression'. Without the @(\<?>)@
193 -- combinator, the message would be like '...: expecting \"let\" or
194 -- letter', which is less friendly.
195
196 (<?>) :: (Monad m)
197 => (ParsecT s u m a) -> String -> (ParsecT s u m a)
198 p <?> msg = label p msg
199
200 -- | This combinator implements choice. The parser @p \<|> q@ first
201 -- applies @p@. If it succeeds, the value of @p@ is returned. If @p@
202 -- fails /without consuming any input/, parser @q@ is tried. This
203 -- combinator is defined equal to the 'mplus' member of the 'MonadPlus'
204 -- class and the ('Control.Applicative.<|>') member of 'Control.Applicative.Alternative'.
205 --
206 -- The parser is called /predictive/ since @q@ is only tried when
207 -- parser @p@ didn't consume any input (i.e.. the look ahead is 1).
208 -- This non-backtracking behaviour allows for both an efficient
209 -- implementation of the parser combinators and the generation of good
210 -- error messages.
211
212 (<|>) :: (Monad m)
213 => (ParsecT s u m a) -> (ParsecT s u m a) -> (ParsecT s u m a)
214 p1 <|> p2 = mplus p1 p2
215
216 label :: (Monad m) => ParsecT s u m a -> String -> ParsecT s u m a
217 label p msg
218 = labels p [msg]
219
220 labels :: (Monad m) => ParsecT s u m a -> [String] -> ParsecT s u m a
221 labels p msgs
222 = ParsecT $ \s -> do
223 r <- runParsecT p s
224 case r of
225 Empty mReply -> do
226 reply <- mReply
227 return $ Empty $ case reply of
228 Error err
229 -> return $ Error (setExpectErrors err msgs)
230 Ok x s' err
231 | errorIsUnknown err -> return $ reply
232 | otherwise -> return (Ok x s' (setExpectErrors err msgs))
233 other -> return $ other
234 where
235 setExpectErrors err [] = setErrorMessage (Expect "") err
236 setExpectErrors err [msg] = setErrorMessage (Expect msg) err
237 setExpectErrors err (msg:msgs)
238 = foldr (\msg err -> addErrorMessage (Expect msg) err)
239 (setErrorMessage (Expect msg) err) msgs
240
241 -- | An instance of @Stream@ has stream type @s@, underlying monad @m@ and token type @t@ determined by the stream
242 class (Monad m) => Stream s m t | s -> t where
243 uncons :: s -> m (Maybe (t,s))
244
245 tokens :: (Stream s m t, Eq t)
246 => ([t] -> String) -- Pretty print a list of tokens
247 -> (SourcePos -> [t] -> SourcePos)
248 -> [t] -- List of tokens to parse
249 -> ParsecT s u m [t]
250 tokens _ _ []
251 = ParsecT $ \s -> return $ Empty $ return $ Ok [] s (unknownError s)
252 tokens shows nextposs tts@(t:ts)
253 = ParsecT $ \s@(State input pos u) ->
254 let
255 errEof = return $ Error (setErrorMessage (Expect (shows tts))
256 (newErrorMessage (SysUnExpect "") pos))
257 errExpect x = return $ Error (setErrorMessage (Expect (shows tts))
258 (newErrorMessage (SysUnExpect (shows [x])) pos))
259 walk [] rs = return (ok rs)
260 walk (t:ts) rs = do
261 sr <- uncons rs
262 case sr of
263 Nothing -> errEof
264 Just (x,xs) | t == x -> walk ts xs
265 | otherwise -> errExpect x
266 ok rs = let pos' = nextposs pos tts
267 s' = State rs pos' u
268 in Ok tts s' (newErrorUnknown pos')
269 in do
270 sr <- uncons input
271 return $ case sr of
272 Nothing -> Empty $ errEof
273 Just (x,xs)
274 | t == x -> Consumed $ walk ts xs
275 | otherwise -> Empty $ errExpect x
276
277 -- | The parser @try p@ behaves like parser @p@, except that it
278 -- pretends that it hasn't consumed any input when an error occurs.
279 --
280 -- This combinator is used whenever arbitrary look ahead is needed.
281 -- Since it pretends that it hasn't consumed any input when @p@ fails,
282 -- the ('<|>') combinator will try its second alternative even when the
283 -- first parser failed while consuming input.
284 --
285 -- The @try@ combinator can for example be used to distinguish
286 -- identifiers and reserved words. Both reserved words and identifiers
287 -- are a sequence of letters. Whenever we expect a certain reserved
288 -- word where we can also expect an identifier we have to use the @try@
289 -- combinator. Suppose we write:
290 --
291 -- > expr = letExpr <|> identifier <?> "expression"
292 -- >
293 -- > letExpr = do{ string "let"; ... }
294 -- > identifier = many1 letter
295 --
296 -- If the user writes \"lexical\", the parser fails with: @unexpected
297 -- \'x\', expecting \'t\' in \"let\"@. Indeed, since the ('<|>') combinator
298 -- only tries alternatives when the first alternative hasn't consumed
299 -- input, the @identifier@ parser is never tried (because the prefix
300 -- \"le\" of the @string \"let\"@ parser is already consumed). The
301 -- right behaviour can be obtained by adding the @try@ combinator:
302 --
303 -- > expr = letExpr <|> identifier <?> "expression"
304 -- >
305 -- > letExpr = do{ try (string "let"); ... }
306 -- > identifier = many1 letter
307
308 try :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m a
309 try (ParsecT p)
310 = ParsecT $ \s@(State _ pos _) -> do
311 res <- p s
312 case res of
313 Consumed rep -> do r <- rep
314 case r of
315 Error err -> return $ Empty $ return $ Error
316 (setErrorPos pos err)
317 ok -> return $ Consumed $ return $ ok
318 empty -> return $ empty
319
320 -- | The parser @token showTok posFromTok testTok@ accepts a token @t@
321 -- with result @x@ when the function @testTok t@ returns @'Just' x@. The
322 -- source position of the @t@ should be returned by @posFromTok t@ and
323 -- the token can be shown using @showTok t@.
324 --
325 -- This combinator is expressed in terms of 'tokenPrim'.
326 -- It is used to accept user defined token streams. For example,
327 -- suppose that we have a stream of basic tokens tupled with source
328 -- positions. We can than define a parser that accepts single tokens as:
329 --
330 -- > mytoken x
331 -- > = token showTok posFromTok testTok
332 -- > where
333 -- > showTok (pos,t) = show t
334 -- > posFromTok (pos,t) = pos
335 -- > testTok (pos,t) = if x == t then Just t else Nothing
336
337 token :: (Stream s Identity t)
338 => (t -> String) -- ^ Token pretty-printing function.
339 -> (t -> SourcePos) -- ^ Computes the position of a token.
340 -> (t -> Maybe a) -- ^ Matching function for the token to parse.
341 -> Parsec s u a
342 token show tokpos test = tokenPrim show nextpos test
343 where
344 nextpos _ tok ts = case runIdentity (uncons ts) of
345 Nothing -> tokpos tok
346 Just (tok',_) -> tokpos tok'
347
348 -- | The parser @token showTok nextPos testTok@ accepts a token @t@
349 -- with result @x@ when the function @testTok t@ returns @'Just' x@. The
350 -- token can be shown using @showTok t@. The position of the /next/
351 -- token should be returned when @nextPos@ is called with the current
352 -- source position @pos@, the current token @t@ and the rest of the
353 -- tokens @toks@, @nextPos pos t toks@.
354 --
355 -- This is the most primitive combinator for accepting tokens. For
356 -- example, the 'Text.Parsec.Char.char' parser could be implemented as:
357 --
358 -- > char c
359 -- > = tokenPrim showChar nextPos testChar
360 -- > where
361 -- > showChar x = "'" ++ x ++ "'"
362 -- > testChar x = if x == c then Just x else Nothing
363 -- > nextPos pos x xs = updatePosChar pos x
364
365 tokenPrim :: (Stream s m t)
366 => (t -> String) -- ^ Token pretty-printing function.
367 -> (SourcePos -> t -> s -> SourcePos) -- ^ Next position calculating function.
368 -> (t -> Maybe a) -- ^ Matching function for the token to parse.
369 -> ParsecT s u m a
370 tokenPrim show nextpos test = tokenPrimEx show nextpos Nothing test
371
372 tokenPrimEx :: (Stream s m t)
373 => (t -> String)
374 -> (SourcePos -> t -> s -> SourcePos)
375 -> Maybe (SourcePos -> t -> s -> u -> u)
376 -> (t -> Maybe a)
377 -> ParsecT s u m a
378 tokenPrimEx show nextpos mbNextState test
379 = case mbNextState of
380 Nothing
381 -> ParsecT $ \s@(State input pos user) -> do
382 r <- uncons input
383 case r of
384 Nothing -> return $ Empty $ return (sysUnExpectError "" pos)
385 Just (c,cs)
386 -> case test c of
387 Just x -> let newpos = nextpos pos c cs
388 newstate = State cs newpos user
389 in seq newpos $ seq newstate $
390 return $ Consumed $ return $
391 (Ok x newstate (newErrorUnknown newpos))
392 Nothing -> return $ Empty $ return $
393 (sysUnExpectError (show c) pos)
394 Just nextState
395 -> ParsecT $ \s@(State input pos user) -> do
396 r <- uncons input
397 case r of
398 Nothing -> return $ Empty $ return (sysUnExpectError "" pos)
399 Just (c,cs)
400 -> case test c of
401 Just x -> let newpos = nextpos pos c cs
402 newuser = nextState pos c cs user
403 newstate = State cs newpos newuser
404 in seq newpos $ seq newstate $
405 return $ Consumed $ return $
406 (Ok x newstate (newErrorUnknown newpos))
407 Nothing -> return $ Empty $ return $
408 (sysUnExpectError (show c) pos)
409
410 -- | @many p@ applies the parser @p@ /zero/ or more times. Returns a
411 -- list of the returned values of @p@.
412 --
413 -- > identifier = do{ c <- letter
414 -- > ; cs <- many (alphaNum <|> char '_')
415 -- > ; return (c:cs)
416 -- > }
417
418 many :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m [a]
419 many p
420 = do xs <- manyAccum (:) p
421 return (reverse xs)
422
423 -- | @skipMany p@ applies the parser @p@ /zero/ or more times, skipping
424 -- its result.
425 --
426 -- > spaces = skipMany space
427
428 skipMany :: (Stream s m t) => ParsecT s u m a -> ParsecT s u m ()
429 skipMany p
430 = do manyAccum (\x xs -> []) p
431 return ()
432
433 manyAccum :: (Stream s m t)
434 => (a -> [a] -> [a])
435 -> ParsecT s u m a
436 -> ParsecT s u m [a]
437 manyAccum accum p
438 = ParsecT $ \s ->
439 let walk xs state mr
440 = do r <- mr
441 case r of
442 Empty mReply
443 -> do reply <- mReply
444 case reply of
445 Error err -> return $ Ok xs state err
446 ok -> error "Text.Parsec.Prim.many: combinator 'many' is applied to a parser that accepts an empty string."
447 Consumed mReply
448 -> do reply <- mReply
449 case reply of
450 Error err
451 -> return $ Error err
452 Ok x s' err
453 -> let ys = accum x xs
454 in seq ys (walk ys s' (runParsecT p s'))
455 in do r <- runParsecT p s
456 case r of
457 Empty mReply
458 -> do reply <- mReply
459 case reply of
460 Ok x s' err
461 -> error "Text.ParserCombinators.Parsec.Prim.many: combinator 'many' is applied to a parser that accepts an empty string."
462 Error err
463 -> return $ Empty $ return (Ok [] s err)
464 consumed
465 -> return $ Consumed $ walk [] s (return consumed)
466
467
468 -- < Running a parser: monadic (runPT) and pure (runP)
469
470 runPT :: (Stream s m t)
471 => ParsecT s u m a -> u -> SourceName -> s -> m (Either ParseError a)
472 runPT p u name s
473 = do res <- runParsecT p (State s (initialPos name) u)
474 r <- parserReply res
475 case r of
476 Ok x _ _ -> return (Right x)
477 Error err -> return (Left err)
478 where
479 parserReply res
480 = case res of
481 Consumed r -> r
482 Empty r -> r
483
484 runP :: (Stream s Identity t)
485 => Parsec s u a -> u -> SourceName -> s -> Either ParseError a
486 runP p u name s = runIdentity $ runPT p u name s
487
488 -- | The most general way to run a parser. @runParserT p state filePath
489 -- input@ runs parser @p@ on the input list of tokens @input@,
490 -- obtained from source @filePath@ with the initial user state @st@.
491 -- The @filePath@ is only used in error messages and may be the empty
492 -- string. Returns a computation in the underlying monad @m@ that return either a 'ParseError' ('Left') or a
493 -- value of type @a@ ('Right').
494
495 runParserT :: (Stream s m t)
496 => ParsecT s u m a -> u -> SourceName -> s -> m (Either ParseError a)
497 runParserT = runPT
498
499 -- | The most general way to run a parser over the Identity monad. @runParser p state filePath
500 -- input@ runs parser @p@ on the input list of tokens @input@,
501 -- obtained from source @filePath@ with the initial user state @st@.
502 -- The @filePath@ is only used in error messages and may be the empty
503 -- string. Returns either a 'ParseError' ('Left') or a
504 -- value of type @a@ ('Right').
505 --
506 -- > parseFromFile p fname
507 -- > = do{ input <- readFile fname
508 -- > ; return (runParser p () fname input)
509 -- > }
510
511 runParser :: (Stream s Identity t)
512 => Parsec s u a -> u -> SourceName -> s -> Either ParseError a
513 runParser = runP
514
515 -- | @parse p filePath input@ runs a parser @p@ over Identity without user
516 -- state. The @filePath@ is only used in error messages and may be the
517 -- empty string. Returns either a 'ParseError' ('Left')
518 -- or a value of type @a@ ('Right').
519 --
520 -- > main = case (parse numbers "" "11, 2, 43") of
521 -- > Left err -> print err
522 -- > Right xs -> print (sum xs)
523 -- >
524 -- > numbers = commaSep integer
525
526 parse :: (Stream s Identity t)
527 => Parsec s () a -> SourceName -> s -> Either ParseError a
528 parse p = runP p ()
529
530 -- | The expression @parseTest p input@ applies a parser @p@ against
531 -- input @input@ and prints the result to stdout. Used for testing
532 -- parsers.
533
534 parseTest :: (Stream s Identity t, Show a)
535 => Parsec s () a -> s -> IO ()
536 parseTest p input
537 = case parse p "" input of
538 Left err -> do putStr "parse error at "
539 print err
540 Right x -> print x
541
542 -- < Parser state combinators
543
544 -- | Returns the current source position. See also 'SourcePos'.
545
546 getPosition :: (Monad m) => ParsecT s u m SourcePos
547 getPosition = do state <- getParserState
548 return (statePos state)
549
550 -- | Returns the current input
551
552 getInput :: (Monad m) => ParsecT s u m s
553 getInput = do state <- getParserState
554 return (stateInput state)
555
556 -- | @setPosition pos@ sets the current source position to @pos@.
557
558 setPosition :: (Monad m) => SourcePos -> ParsecT s u m ()
559 setPosition pos
560 = do updateParserState (\(State input _ user) -> State input pos user)
561 return ()
562
563 -- | @setInput input@ continues parsing with @input@. The 'getInput' and
564 -- @setInput@ functions can for example be used to deal with #include
565 -- files.
566
567 setInput :: (Monad m) => s -> ParsecT s u m ()
568 setInput input
569 = do updateParserState (\(State _ pos user) -> State input pos user)
570 return ()
571
572 -- | Returns the full parser state as a 'State' record.
573
574 getParserState :: (Monad m) => ParsecT s u m (State s u)
575 getParserState = updateParserState id
576
577 -- | @setParserState st@ set the full parser state to @st@.
578
579 setParserState :: (Monad m) => State s u -> ParsecT s u m (State s u)
580 setParserState st = updateParserState (const st)
581
582 -- | @updateParserState f@ applies function @f@ to the parser state.
583
584 updateParserState :: (Monad m)
585 => (State s u -> State s u) -> ParsecT s u m (State s u)
586 updateParserState f
587 = ParsecT $ \s -> let s' = f s
588 in return $ Empty $ return (Ok s' s' (unknownError s'))
589
590 -- < User state combinators
591
592 -- | Returns the current user state.
593
594 getState :: (Monad m) => ParsecT s u m u
595 getState = stateUser `liftM` getParserState
596
597 -- | @putState st@ set the user state to @st@.
598
599 putState :: (Monad m) => u -> ParsecT s u m ()
600 putState u = do updateParserState $ \s -> s { stateUser = u }
601 return ()
602
603 -- | @updateState f@ applies function @f@ to the user state. Suppose
604 -- that we want to count identifiers in a source, we could use the user
605 -- state as:
606 --
607 -- > expr = do{ x <- identifier
608 -- > ; updateState (+1)
609 -- > ; return (Id x)
610 -- > }
611
612 modifyState :: (Monad m) => (u -> u) -> ParsecT s u m ()
613 modifyState f = do updateParserState $ \s -> s { stateUser = f (stateUser s) }
614 return ()
615
616 -- XXX Compat
617
618 -- | An alias for putState for backwards compatibility.
619
620 setState :: (Monad m) => u -> ParsecT s u m ()
621 setState = putState
622
623 -- | An alias for modifyState for backwards compatibility.
624
625 updateState :: (Monad m) => (u -> u) -> ParsecT s u m ()
626 updateState = modifyState