Follow changes in GHC build system
[hsc2hs.git] / HSCParser.hs
1 module HSCParser where
2 import Control.Applicative hiding ( many )
3 import Control.Monad ( MonadPlus(..), liftM, liftM2, ap )
4 import Data.Char ( isAlpha, isAlphaNum, isSpace, isDigit )
5
6 ------------------------------------------------------------------------
7 -- A deterministic parser which remembers the text which has been parsed.
8
9 newtype Parser a = Parser (SourcePos -> String -> ParseResult a)
10
11 runParser :: Parser a -> String -> String -> ParseResult a
12 runParser (Parser p) file_name = p (SourcePos file_name 1)
13
14 data ParseResult a = Success !SourcePos String String a
15 | Failure !SourcePos String
16
17 data SourcePos = SourcePos String !Int
18
19 updatePos :: SourcePos -> Char -> SourcePos
20 updatePos pos@(SourcePos name line) ch = case ch of
21 '\n' -> SourcePos name (line + 1)
22 _ -> pos
23
24 instance Functor Parser where
25 fmap = liftM
26
27 instance Applicative Parser where
28 pure = return
29 (<*>) = ap
30
31 instance Monad Parser where
32 return a = Parser $ \pos s -> Success pos [] s a
33 Parser m >>= k =
34 Parser $ \pos s -> case m pos s of
35 Success pos' out1 s' a -> case k a of
36 Parser k' -> case k' pos' s' of
37 Success pos'' out2 imp'' b ->
38 Success pos'' (out1++out2) imp'' b
39 Failure pos'' msg -> Failure pos'' msg
40 Failure pos' msg -> Failure pos' msg
41 fail msg = Parser $ \pos _ -> Failure pos msg
42
43 instance Alternative Parser where
44 empty = mzero
45 (<|>) = mplus
46
47 instance MonadPlus Parser where
48 mzero = fail "mzero"
49 Parser m `mplus` Parser n =
50 Parser $ \pos s -> case m pos s of
51 success@(Success _ _ _ _) -> success
52 Failure _ _ -> n pos s
53
54 getPos :: Parser SourcePos
55 getPos = Parser $ \pos s -> Success pos [] s pos
56
57 setPos :: SourcePos -> Parser ()
58 setPos pos = Parser $ \_ s -> Success pos [] s ()
59
60 message :: Parser a -> String -> Parser a
61 Parser m `message` msg =
62 Parser $ \pos s -> case m pos s of
63 success@(Success _ _ _ _) -> success
64 Failure pos' _ -> Failure pos' msg
65
66 catchOutput_ :: Parser a -> Parser String
67 catchOutput_ (Parser m) =
68 Parser $ \pos s -> case m pos s of
69 Success pos' out s' _ -> Success pos' [] s' out
70 Failure pos' msg -> Failure pos' msg
71
72 fakeOutput :: Parser a -> String -> Parser a
73 Parser m `fakeOutput` out =
74 Parser $ \pos s -> case m pos s of
75 Success pos' _ s' a -> Success pos' out s' a
76 Failure pos' msg -> Failure pos' msg
77
78 lookAhead :: Parser String
79 lookAhead = Parser $ \pos s -> Success pos [] s s
80
81 satisfy :: (Char -> Bool) -> Parser Char
82 satisfy p =
83 Parser $ \pos s -> case s of
84 c:cs | p c -> Success (updatePos pos c) [c] cs c
85 _ -> Failure pos "Bad character"
86
87 satisfy_ :: (Char -> Bool) -> Parser ()
88 satisfy_ p = satisfy p >> return ()
89
90 char_ :: Char -> Parser ()
91 char_ c = do
92 satisfy_ (== c) `message` (show c++" expected")
93
94 anyChar_ :: Parser ()
95 anyChar_ = do
96 satisfy_ (const True) `message` "Unexpected end of file"
97
98 any2Chars_ :: Parser ()
99 any2Chars_ = anyChar_ >> anyChar_
100
101 many :: Parser a -> Parser [a]
102 many p = many1 p `mplus` return []
103
104 many1 :: Parser a -> Parser [a]
105 many1 p = liftM2 (:) p (many p)
106
107 many_ :: Parser a -> Parser ()
108 many_ p = many1_ p `mplus` return ()
109
110 many1_ :: Parser a -> Parser ()
111 many1_ p = p >> many_ p
112
113 manySatisfy, manySatisfy1 :: (Char -> Bool) -> Parser String
114 manySatisfy = many . satisfy
115 manySatisfy1 = many1 . satisfy
116
117 manySatisfy_, manySatisfy1_ :: (Char -> Bool) -> Parser ()
118 manySatisfy_ = many_ . satisfy
119 manySatisfy1_ = many1_ . satisfy
120
121 ------------------------------------------------------------------------
122 -- Parser of hsc syntax.
123
124 data Token
125 = Text SourcePos String
126 | Special SourcePos String String
127
128 parser :: Parser [Token]
129 parser = do
130 pos <- getPos
131 t <- catchOutput_ text
132 s <- lookAhead
133 rest <- case s of
134 [] -> return []
135 _:_ -> liftM2 (:) (special `fakeOutput` []) parser
136 return (if null t then rest else Text pos t : rest)
137
138 text :: Parser ()
139 text = do
140 s <- lookAhead
141 case s of
142 [] -> return ()
143 c:_ | isAlpha c || c == '_' -> do
144 anyChar_
145 manySatisfy_ (\c' -> isAlphaNum c' || c' == '_' || c' == '\'')
146 text
147 c:_ | isHsSymbol c -> do
148 symb <- catchOutput_ (manySatisfy_ isHsSymbol)
149 case symb of
150 "#" -> return ()
151 '-':'-':symb' | all (== '-') symb' -> do
152 return () `fakeOutput` symb
153 manySatisfy_ (/= '\n')
154 text
155 _ -> do
156 return () `fakeOutput` unescapeHashes symb
157 text
158 '\"':_ -> do anyChar_; hsString '\"'; text
159 '\'':_ -> do anyChar_; hsString '\''; text
160 '{':'-':_ -> do any2Chars_; linePragma `mplus` hsComment; text
161 _:_ -> do anyChar_; text
162
163 hsString :: Char -> Parser ()
164 hsString quote = do
165 s <- lookAhead
166 case s of
167 [] -> return ()
168 c:_ | c == quote -> anyChar_
169 '\\':c:_
170 | isSpace c -> do
171 anyChar_
172 manySatisfy_ isSpace
173 char_ '\\' `mplus` return ()
174 hsString quote
175 | otherwise -> do any2Chars_; hsString quote
176 _:_ -> do anyChar_; hsString quote
177
178 hsComment :: Parser ()
179 hsComment = do
180 s <- lookAhead
181 case s of
182 [] -> return ()
183 '-':'}':_ -> any2Chars_
184 '{':'-':_ -> do any2Chars_; hsComment; hsComment
185 _:_ -> do anyChar_; hsComment
186
187 linePragma :: Parser ()
188 linePragma = do
189 char_ '#'
190 manySatisfy_ isSpace
191 satisfy_ (\c -> c == 'L' || c == 'l')
192 satisfy_ (\c -> c == 'I' || c == 'i')
193 satisfy_ (\c -> c == 'N' || c == 'n')
194 satisfy_ (\c -> c == 'E' || c == 'e')
195 manySatisfy1_ isSpace
196 line <- liftM read $ manySatisfy1 isDigit
197 manySatisfy1_ isSpace
198 char_ '\"'
199 name <- manySatisfy (/= '\"')
200 char_ '\"'
201 manySatisfy_ isSpace
202 char_ '#'
203 char_ '-'
204 char_ '}'
205 setPos (SourcePos name (line - 1))
206
207 isHsSymbol :: Char -> Bool
208 isHsSymbol '!' = True; isHsSymbol '#' = True; isHsSymbol '$' = True
209 isHsSymbol '%' = True; isHsSymbol '&' = True; isHsSymbol '*' = True
210 isHsSymbol '+' = True; isHsSymbol '.' = True; isHsSymbol '/' = True
211 isHsSymbol '<' = True; isHsSymbol '=' = True; isHsSymbol '>' = True
212 isHsSymbol '?' = True; isHsSymbol '@' = True; isHsSymbol '\\' = True
213 isHsSymbol '^' = True; isHsSymbol '|' = True; isHsSymbol '-' = True
214 isHsSymbol '~' = True
215 isHsSymbol _ = False
216
217 unescapeHashes :: String -> String
218 unescapeHashes [] = []
219 unescapeHashes ('#':'#':s) = '#' : unescapeHashes s
220 unescapeHashes (c:s) = c : unescapeHashes s
221
222 lookAheadC :: Parser String
223 lookAheadC = liftM joinLines lookAhead
224 where
225 joinLines [] = []
226 joinLines ('\\':'\n':s) = joinLines s
227 joinLines (c:s) = c : joinLines s
228
229 satisfyC :: (Char -> Bool) -> Parser Char
230 satisfyC p = do
231 s <- lookAhead
232 case s of
233 '\\':'\n':_ -> do any2Chars_ `fakeOutput` []; satisfyC p
234 _ -> satisfy p
235
236 satisfyC_ :: (Char -> Bool) -> Parser ()
237 satisfyC_ p = satisfyC p >> return ()
238
239 charC_ :: Char -> Parser ()
240 charC_ c = satisfyC_ (== c) `message` (show c++" expected")
241
242 anyCharC_ :: Parser ()
243 anyCharC_ = satisfyC_ (const True) `message` "Unexpected end of file"
244
245 any2CharsC_ :: Parser ()
246 any2CharsC_ = anyCharC_ >> anyCharC_
247
248 manySatisfyC :: (Char -> Bool) -> Parser String
249 manySatisfyC = many . satisfyC
250
251 manySatisfyC_ :: (Char -> Bool) -> Parser ()
252 manySatisfyC_ = many_ . satisfyC
253
254 special :: Parser Token
255 special = do
256 manySatisfyC_ (\c -> isSpace c && c /= '\n')
257 s <- lookAheadC
258 case s of
259 '{':_ -> do
260 anyCharC_
261 manySatisfyC_ isSpace
262 sp <- keyArg (== '\n')
263 charC_ '}'
264 return sp
265 _ -> keyArg (const False)
266
267 keyArg :: (Char -> Bool) -> Parser Token
268 keyArg eol = do
269 pos <- getPos
270 key <- keyword `message` "hsc keyword or '{' expected"
271 manySatisfyC_ (\c' -> isSpace c' && c' /= '\n' || eol c')
272 arg <- catchOutput_ (argument eol)
273 return (Special pos key arg)
274
275 keyword :: Parser String
276 keyword = do
277 c <- satisfyC (\c' -> isAlpha c' || c' == '_')
278 cs <- manySatisfyC (\c' -> isAlphaNum c' || c' == '_')
279 return (c:cs)
280
281 argument :: (Char -> Bool) -> Parser ()
282 argument eol = do
283 s <- lookAheadC
284 case s of
285 [] -> return ()
286 c:_ | eol c -> do anyCharC_; argument eol
287 '\n':_ -> return ()
288 '\"':_ -> do anyCharC_; cString '\"'; argument eol
289 '\'':_ -> do anyCharC_; cString '\''; argument eol
290 '(':_ -> do anyCharC_; nested ')'; argument eol
291 ')':_ -> return ()
292 '/':'*':_ -> do any2CharsC_; cComment; argument eol
293 '/':'/':_ -> do
294 any2CharsC_; manySatisfyC_ (/= '\n'); argument eol
295 '[':_ -> do anyCharC_; nested ']'; argument eol
296 ']':_ -> return ()
297 '{':_ -> do anyCharC_; nested '}'; argument eol
298 '}':_ -> return ()
299 _:_ -> do anyCharC_; argument eol
300
301 nested :: Char -> Parser ()
302 nested c = do argument (== '\n'); charC_ c
303
304 cComment :: Parser ()
305 cComment = do
306 s <- lookAheadC
307 case s of
308 [] -> return ()
309 '*':'/':_ -> do any2CharsC_
310 _:_ -> do anyCharC_; cComment
311
312 cString :: Char -> Parser ()
313 cString quote = do
314 s <- lookAheadC
315 case s of
316 [] -> return ()
317 c:_ | c == quote -> anyCharC_
318 '\\':_:_ -> do any2CharsC_; cString quote
319 _:_ -> do anyCharC_; cString quote
320