YAML kills a kitten again...
[hsc2hs.git] / HSCParser.hs
1 module HSCParser where
2 import Control.Applicative hiding ( many )
3 import Control.Monad ( MonadPlus(..), liftM, liftM2, ap )
4 import Data.Char ( isAlpha, isAlphaNum, isSpace, isDigit )
5
6 ------------------------------------------------------------------------
7 -- A deterministic parser which remembers the text which has been parsed.
8
9 newtype Parser a = Parser (SourcePos -> String -> ParseResult a)
10
11 runParser :: Parser a -> String -> String -> ParseResult a
12 runParser (Parser p) file_name = p (SourcePos file_name 1 1)
13
14 data ParseResult a = Success !SourcePos String String a
15 | Failure !SourcePos String
16
17 data SourcePos = SourcePos String !Int !Int
18
19 updatePos :: SourcePos -> Char -> SourcePos
20 updatePos (SourcePos name line col) ch = case ch of
21 '\n' -> SourcePos name (line + 1) 1
22 _ -> SourcePos name line (col + 1)
23
24 instance Functor Parser where
25 fmap = liftM
26
27 instance Applicative Parser where
28 pure a = Parser $ \pos s -> Success pos [] s a
29 (<*>) = ap
30
31 instance Monad Parser where
32 return = pure
33 Parser m >>= k =
34 Parser $ \pos s -> case m pos s of
35 Success pos' out1 s' a -> case k a of
36 Parser k' -> case k' pos' s' of
37 Success pos'' out2 imp'' b ->
38 Success pos'' (out1++out2) imp'' b
39 Failure pos'' msg -> Failure pos'' msg
40 Failure pos' msg -> Failure pos' msg
41 fail msg = Parser $ \pos _ -> Failure pos msg
42
43 instance Alternative Parser where
44 empty = mzero
45 (<|>) = mplus
46
47 instance MonadPlus Parser where
48 mzero = fail "mzero"
49 Parser m `mplus` Parser n =
50 Parser $ \pos s -> case m pos s of
51 success@(Success _ _ _ _) -> success
52 Failure _ _ -> n pos s
53
54 getPos :: Parser SourcePos
55 getPos = Parser $ \pos s -> Success pos [] s pos
56
57 setPos :: SourcePos -> Parser ()
58 setPos pos = Parser $ \_ s -> Success pos [] s ()
59
60 message :: Parser a -> String -> Parser a
61 Parser m `message` msg =
62 Parser $ \pos s -> case m pos s of
63 success@(Success _ _ _ _) -> success
64 Failure pos' _ -> Failure pos' msg
65
66 catchOutput_ :: Parser a -> Parser String
67 catchOutput_ (Parser m) =
68 Parser $ \pos s -> case m pos s of
69 Success pos' out s' _ -> Success pos' [] s' out
70 Failure pos' msg -> Failure pos' msg
71
72 fakeOutput :: Parser a -> String -> Parser a
73 Parser m `fakeOutput` out =
74 Parser $ \pos s -> case m pos s of
75 Success pos' _ s' a -> Success pos' out s' a
76 Failure pos' msg -> Failure pos' msg
77
78 lookAhead :: Parser String
79 lookAhead = Parser $ \pos s -> Success pos [] s s
80
81 satisfy :: (Char -> Bool) -> Parser Char
82 satisfy p =
83 Parser $ \pos s -> case s of
84 c:cs | p c -> Success (updatePos pos c) [c] cs c
85 _ -> Failure pos "Bad character"
86
87 satisfy_ :: (Char -> Bool) -> Parser ()
88 satisfy_ p = satisfy p >> return ()
89
90 char_ :: Char -> Parser ()
91 char_ c = do
92 satisfy_ (== c) `message` (show c++" expected")
93
94 anyChar_ :: Parser ()
95 anyChar_ = do
96 satisfy_ (const True) `message` "Unexpected end of file"
97
98 any2Chars_ :: Parser ()
99 any2Chars_ = anyChar_ >> anyChar_
100
101 many :: Parser a -> Parser [a]
102 many p = many1 p `mplus` return []
103
104 many1 :: Parser a -> Parser [a]
105 many1 p = liftM2 (:) p (many p)
106
107 many_ :: Parser a -> Parser ()
108 many_ p = many1_ p `mplus` return ()
109
110 many1_ :: Parser a -> Parser ()
111 many1_ p = p >> many_ p
112
113 manySatisfy, manySatisfy1 :: (Char -> Bool) -> Parser String
114 manySatisfy = many . satisfy
115 manySatisfy1 = many1 . satisfy
116
117 manySatisfy_, manySatisfy1_ :: (Char -> Bool) -> Parser ()
118 manySatisfy_ = many_ . satisfy
119 manySatisfy1_ = many1_ . satisfy
120
121 ------------------------------------------------------------------------
122 -- Parser of hsc syntax.
123
124 data Token
125 = Text SourcePos String
126 | Special SourcePos String String
127
128 tokenIsSpecial :: Token -> Bool
129 tokenIsSpecial (Text {}) = False
130 tokenIsSpecial (Special {}) = True
131
132 parser :: Parser [Token]
133 parser = do
134 pos <- getPos
135 t <- catchOutput_ text
136 s <- lookAhead
137 rest <- case s of
138 [] -> return []
139 _:_ -> liftM2 (:) (special `fakeOutput` []) parser
140 return (if null t then rest else Text pos t : rest)
141
142 text :: Parser ()
143 text = do
144 s <- lookAhead
145 case s of
146 [] -> return ()
147 c:_ | isAlpha c || c == '_' -> do
148 anyChar_
149 manySatisfy_ (\c' -> isAlphaNum c' || c' == '_' || c' == '\'')
150 text
151 c:_ | isHsSymbol c -> do
152 symb <- catchOutput_ (manySatisfy_ isHsSymbol)
153 case symb of
154 "#" -> return ()
155 '-':'-':symb' | all (== '-') symb' -> do
156 return () `fakeOutput` symb
157 manySatisfy_ (/= '\n')
158 text
159 _ -> do
160 return () `fakeOutput` unescapeHashes symb
161 text
162 '\"':_ -> do anyChar_; hsString '\"'; text
163 '\'':_ -> do anyChar_; hsString '\''; text
164 '{':'-':_ -> do any2Chars_; linePragma `mplus`
165 columnPragma `mplus`
166 hsComment; text
167 _:_ -> do anyChar_; text
168
169 hsString :: Char -> Parser ()
170 hsString quote = do
171 s <- lookAhead
172 case s of
173 [] -> return ()
174 c:_ | c == quote -> anyChar_
175 '\\':c:_
176 | isSpace c -> do
177 anyChar_
178 manySatisfy_ isSpace
179 char_ '\\' `mplus` return ()
180 hsString quote
181 | otherwise -> do any2Chars_; hsString quote
182 _:_ -> do anyChar_; hsString quote
183
184 hsComment :: Parser ()
185 hsComment = do
186 s <- lookAhead
187 case s of
188 [] -> return ()
189 '-':'}':_ -> any2Chars_
190 '{':'-':_ -> do any2Chars_; hsComment; hsComment
191 _:_ -> do anyChar_; hsComment
192
193 linePragma :: Parser ()
194 linePragma = do
195 char_ '#'
196 manySatisfy_ isSpace
197 satisfy_ (\c -> c == 'L' || c == 'l')
198 satisfy_ (\c -> c == 'I' || c == 'i')
199 satisfy_ (\c -> c == 'N' || c == 'n')
200 satisfy_ (\c -> c == 'E' || c == 'e')
201 manySatisfy1_ isSpace
202 line <- liftM read $ manySatisfy1 isDigit
203 manySatisfy1_ isSpace
204 char_ '\"'
205 name <- manySatisfy (/= '\"')
206 char_ '\"'
207 manySatisfy_ isSpace
208 char_ '#'
209 char_ '-'
210 char_ '}'
211 setPos (SourcePos name (line - 1) 1)
212
213 columnPragma :: Parser ()
214 columnPragma = do
215 char_ '#'
216 manySatisfy_ isSpace
217 satisfy_ (\c -> c == 'C' || c == 'c')
218 satisfy_ (\c -> c == 'O' || c == 'o')
219 satisfy_ (\c -> c == 'L' || c == 'l')
220 satisfy_ (\c -> c == 'U' || c == 'u')
221 satisfy_ (\c -> c == 'M' || c == 'm')
222 satisfy_ (\c -> c == 'N' || c == 'n')
223 manySatisfy1_ isSpace
224 column <- liftM read $ manySatisfy1 isDigit
225 manySatisfy_ isSpace
226 char_ '#'
227 char_ '-'
228 char_ '}'
229 SourcePos name line _ <- getPos
230 setPos (SourcePos name line column)
231
232 isHsSymbol :: Char -> Bool
233 isHsSymbol '!' = True; isHsSymbol '#' = True; isHsSymbol '$' = True
234 isHsSymbol '%' = True; isHsSymbol '&' = True; isHsSymbol '*' = True
235 isHsSymbol '+' = True; isHsSymbol '.' = True; isHsSymbol '/' = True
236 isHsSymbol '<' = True; isHsSymbol '=' = True; isHsSymbol '>' = True
237 isHsSymbol '?' = True; isHsSymbol '@' = True; isHsSymbol '\\' = True
238 isHsSymbol '^' = True; isHsSymbol '|' = True; isHsSymbol '-' = True
239 isHsSymbol '~' = True
240 isHsSymbol _ = False
241
242 unescapeHashes :: String -> String
243 unescapeHashes [] = []
244 unescapeHashes ('#':'#':s) = '#' : unescapeHashes s
245 unescapeHashes (c:s) = c : unescapeHashes s
246
247 lookAheadC :: Parser String
248 lookAheadC = liftM joinLines lookAhead
249 where
250 joinLines [] = []
251 joinLines ('\\':'\n':s) = joinLines s
252 joinLines (c:s) = c : joinLines s
253
254 satisfyC :: (Char -> Bool) -> Parser Char
255 satisfyC p = do
256 s <- lookAhead
257 case s of
258 '\\':'\n':_ -> do any2Chars_ `fakeOutput` []; satisfyC p
259 _ -> satisfy p
260
261 satisfyC_ :: (Char -> Bool) -> Parser ()
262 satisfyC_ p = satisfyC p >> return ()
263
264 charC_ :: Char -> Parser ()
265 charC_ c = satisfyC_ (== c) `message` (show c++" expected")
266
267 anyCharC_ :: Parser ()
268 anyCharC_ = satisfyC_ (const True) `message` "Unexpected end of file"
269
270 any2CharsC_ :: Parser ()
271 any2CharsC_ = anyCharC_ >> anyCharC_
272
273 manySatisfyC :: (Char -> Bool) -> Parser String
274 manySatisfyC = many . satisfyC
275
276 manySatisfyC_ :: (Char -> Bool) -> Parser ()
277 manySatisfyC_ = many_ . satisfyC
278
279 special :: Parser Token
280 special = do
281 manySatisfyC_ (\c -> isSpace c && c /= '\n')
282 s <- lookAheadC
283 case s of
284 '{':_ -> do
285 anyCharC_
286 manySatisfyC_ isSpace
287 sp <- keyArg (== '\n')
288 charC_ '}'
289 return sp
290 _ -> keyArg (const False)
291
292 keyArg :: (Char -> Bool) -> Parser Token
293 keyArg eol = do
294 pos <- getPos
295 key <- keyword `message` "hsc keyword or '{' expected"
296 manySatisfyC_ (\c' -> isSpace c' && c' /= '\n' || eol c')
297 arg <- catchOutput_ (argument eol)
298 return (Special pos key arg)
299
300 keyword :: Parser String
301 keyword = do
302 c <- satisfyC (\c' -> isAlpha c' || c' == '_')
303 cs <- manySatisfyC (\c' -> isAlphaNum c' || c' == '_')
304 return (c:cs)
305
306 argument :: (Char -> Bool) -> Parser ()
307 argument eol = do
308 s <- lookAheadC
309 case s of
310 [] -> return ()
311 c:_ | eol c -> do anyCharC_; argument eol
312 '\n':_ -> return ()
313 '\"':_ -> do anyCharC_; cString '\"'; argument eol
314 '\'':_ -> do anyCharC_; cString '\''; argument eol
315 '(':_ -> do anyCharC_; nested ')'; argument eol
316 ')':_ -> return ()
317 '/':'*':_ -> do any2CharsC_; cComment; argument eol
318 '/':'/':_ -> do
319 any2CharsC_; manySatisfyC_ (/= '\n'); argument eol
320 '[':_ -> do anyCharC_; nested ']'; argument eol
321 ']':_ -> return ()
322 '{':_ -> do anyCharC_; nested '}'; argument eol
323 '}':_ -> return ()
324 _:_ -> do anyCharC_; argument eol
325
326 nested :: Char -> Parser ()
327 nested c = do argument (== '\n'); charC_ c
328
329 cComment :: Parser ()
330 cComment = do
331 s <- lookAheadC
332 case s of
333 [] -> return ()
334 '*':'/':_ -> do any2CharsC_
335 _:_ -> do anyCharC_; cComment
336
337 cString :: Char -> Parser ()
338 cString quote = do
339 s <- lookAheadC
340 case s of
341 [] -> return ()
342 c:_ | c == quote -> anyCharC_
343 '\\':_:_ -> do any2CharsC_; cString quote
344 _:_ -> do anyCharC_; cString quote
345