Recognize huge unsigned long long values as integers when cross compiling.
[hsc2hs.git] / HSCParser.hs
1 module HSCParser where
2
3 import Control.Monad ( MonadPlus(..), liftM, liftM2 )
4 import Data.Char ( isAlpha, isAlphaNum, isSpace, isDigit )
5
6 ------------------------------------------------------------------------
7 -- A deterministic parser which remembers the text which has been parsed.
8
9 newtype Parser a = Parser (SourcePos -> String -> ParseResult a)
10
11 runParser :: Parser a -> String -> String -> ParseResult a
12 runParser (Parser p) file_name = p (SourcePos file_name 1)
13
14 data ParseResult a = Success !SourcePos String String a
15 | Failure !SourcePos String
16
17 data SourcePos = SourcePos String !Int
18
19 updatePos :: SourcePos -> Char -> SourcePos
20 updatePos pos@(SourcePos name line) ch = case ch of
21 '\n' -> SourcePos name (line + 1)
22 _ -> pos
23
24 instance Monad Parser where
25 return a = Parser $ \pos s -> Success pos [] s a
26 Parser m >>= k =
27 Parser $ \pos s -> case m pos s of
28 Success pos' out1 s' a -> case k a of
29 Parser k' -> case k' pos' s' of
30 Success pos'' out2 imp'' b ->
31 Success pos'' (out1++out2) imp'' b
32 Failure pos'' msg -> Failure pos'' msg
33 Failure pos' msg -> Failure pos' msg
34 fail msg = Parser $ \pos _ -> Failure pos msg
35
36 instance MonadPlus Parser where
37 mzero = fail "mzero"
38 Parser m `mplus` Parser n =
39 Parser $ \pos s -> case m pos s of
40 success@(Success _ _ _ _) -> success
41 Failure _ _ -> n pos s
42
43 getPos :: Parser SourcePos
44 getPos = Parser $ \pos s -> Success pos [] s pos
45
46 setPos :: SourcePos -> Parser ()
47 setPos pos = Parser $ \_ s -> Success pos [] s ()
48
49 message :: Parser a -> String -> Parser a
50 Parser m `message` msg =
51 Parser $ \pos s -> case m pos s of
52 success@(Success _ _ _ _) -> success
53 Failure pos' _ -> Failure pos' msg
54
55 catchOutput_ :: Parser a -> Parser String
56 catchOutput_ (Parser m) =
57 Parser $ \pos s -> case m pos s of
58 Success pos' out s' _ -> Success pos' [] s' out
59 Failure pos' msg -> Failure pos' msg
60
61 fakeOutput :: Parser a -> String -> Parser a
62 Parser m `fakeOutput` out =
63 Parser $ \pos s -> case m pos s of
64 Success pos' _ s' a -> Success pos' out s' a
65 Failure pos' msg -> Failure pos' msg
66
67 lookAhead :: Parser String
68 lookAhead = Parser $ \pos s -> Success pos [] s s
69
70 satisfy :: (Char -> Bool) -> Parser Char
71 satisfy p =
72 Parser $ \pos s -> case s of
73 c:cs | p c -> Success (updatePos pos c) [c] cs c
74 _ -> Failure pos "Bad character"
75
76 satisfy_ :: (Char -> Bool) -> Parser ()
77 satisfy_ p = satisfy p >> return ()
78
79 char_ :: Char -> Parser ()
80 char_ c = do
81 satisfy_ (== c) `message` (show c++" expected")
82
83 anyChar_ :: Parser ()
84 anyChar_ = do
85 satisfy_ (const True) `message` "Unexpected end of file"
86
87 any2Chars_ :: Parser ()
88 any2Chars_ = anyChar_ >> anyChar_
89
90 many :: Parser a -> Parser [a]
91 many p = many1 p `mplus` return []
92
93 many1 :: Parser a -> Parser [a]
94 many1 p = liftM2 (:) p (many p)
95
96 many_ :: Parser a -> Parser ()
97 many_ p = many1_ p `mplus` return ()
98
99 many1_ :: Parser a -> Parser ()
100 many1_ p = p >> many_ p
101
102 manySatisfy, manySatisfy1 :: (Char -> Bool) -> Parser String
103 manySatisfy = many . satisfy
104 manySatisfy1 = many1 . satisfy
105
106 manySatisfy_, manySatisfy1_ :: (Char -> Bool) -> Parser ()
107 manySatisfy_ = many_ . satisfy
108 manySatisfy1_ = many1_ . satisfy
109
110 ------------------------------------------------------------------------
111 -- Parser of hsc syntax.
112
113 data Token
114 = Text SourcePos String
115 | Special SourcePos String String
116
117 parser :: Parser [Token]
118 parser = do
119 pos <- getPos
120 t <- catchOutput_ text
121 s <- lookAhead
122 rest <- case s of
123 [] -> return []
124 _:_ -> liftM2 (:) (special `fakeOutput` []) parser
125 return (if null t then rest else Text pos t : rest)
126
127 text :: Parser ()
128 text = do
129 s <- lookAhead
130 case s of
131 [] -> return ()
132 c:_ | isAlpha c || c == '_' -> do
133 anyChar_
134 manySatisfy_ (\c' -> isAlphaNum c' || c' == '_' || c' == '\'')
135 text
136 c:_ | isHsSymbol c -> do
137 symb <- catchOutput_ (manySatisfy_ isHsSymbol)
138 case symb of
139 "#" -> return ()
140 '-':'-':symb' | all (== '-') symb' -> do
141 return () `fakeOutput` symb
142 manySatisfy_ (/= '\n')
143 text
144 _ -> do
145 return () `fakeOutput` unescapeHashes symb
146 text
147 '\"':_ -> do anyChar_; hsString '\"'; text
148 '\'':_ -> do anyChar_; hsString '\''; text
149 '{':'-':_ -> do any2Chars_; linePragma `mplus` hsComment; text
150 _:_ -> do anyChar_; text
151
152 hsString :: Char -> Parser ()
153 hsString quote = do
154 s <- lookAhead
155 case s of
156 [] -> return ()
157 c:_ | c == quote -> anyChar_
158 '\\':c:_
159 | isSpace c -> do
160 anyChar_
161 manySatisfy_ isSpace
162 char_ '\\' `mplus` return ()
163 hsString quote
164 | otherwise -> do any2Chars_; hsString quote
165 _:_ -> do anyChar_; hsString quote
166
167 hsComment :: Parser ()
168 hsComment = do
169 s <- lookAhead
170 case s of
171 [] -> return ()
172 '-':'}':_ -> any2Chars_
173 '{':'-':_ -> do any2Chars_; hsComment; hsComment
174 _:_ -> do anyChar_; hsComment
175
176 linePragma :: Parser ()
177 linePragma = do
178 char_ '#'
179 manySatisfy_ isSpace
180 satisfy_ (\c -> c == 'L' || c == 'l')
181 satisfy_ (\c -> c == 'I' || c == 'i')
182 satisfy_ (\c -> c == 'N' || c == 'n')
183 satisfy_ (\c -> c == 'E' || c == 'e')
184 manySatisfy1_ isSpace
185 line <- liftM read $ manySatisfy1 isDigit
186 manySatisfy1_ isSpace
187 char_ '\"'
188 name <- manySatisfy (/= '\"')
189 char_ '\"'
190 manySatisfy_ isSpace
191 char_ '#'
192 char_ '-'
193 char_ '}'
194 setPos (SourcePos name (line - 1))
195
196 isHsSymbol :: Char -> Bool
197 isHsSymbol '!' = True; isHsSymbol '#' = True; isHsSymbol '$' = True
198 isHsSymbol '%' = True; isHsSymbol '&' = True; isHsSymbol '*' = True
199 isHsSymbol '+' = True; isHsSymbol '.' = True; isHsSymbol '/' = True
200 isHsSymbol '<' = True; isHsSymbol '=' = True; isHsSymbol '>' = True
201 isHsSymbol '?' = True; isHsSymbol '@' = True; isHsSymbol '\\' = True
202 isHsSymbol '^' = True; isHsSymbol '|' = True; isHsSymbol '-' = True
203 isHsSymbol '~' = True
204 isHsSymbol _ = False
205
206 unescapeHashes :: String -> String
207 unescapeHashes [] = []
208 unescapeHashes ('#':'#':s) = '#' : unescapeHashes s
209 unescapeHashes (c:s) = c : unescapeHashes s
210
211 lookAheadC :: Parser String
212 lookAheadC = liftM joinLines lookAhead
213 where
214 joinLines [] = []
215 joinLines ('\\':'\n':s) = joinLines s
216 joinLines (c:s) = c : joinLines s
217
218 satisfyC :: (Char -> Bool) -> Parser Char
219 satisfyC p = do
220 s <- lookAhead
221 case s of
222 '\\':'\n':_ -> do any2Chars_ `fakeOutput` []; satisfyC p
223 _ -> satisfy p
224
225 satisfyC_ :: (Char -> Bool) -> Parser ()
226 satisfyC_ p = satisfyC p >> return ()
227
228 charC_ :: Char -> Parser ()
229 charC_ c = satisfyC_ (== c) `message` (show c++" expected")
230
231 anyCharC_ :: Parser ()
232 anyCharC_ = satisfyC_ (const True) `message` "Unexpected end of file"
233
234 any2CharsC_ :: Parser ()
235 any2CharsC_ = anyCharC_ >> anyCharC_
236
237 manySatisfyC :: (Char -> Bool) -> Parser String
238 manySatisfyC = many . satisfyC
239
240 manySatisfyC_ :: (Char -> Bool) -> Parser ()
241 manySatisfyC_ = many_ . satisfyC
242
243 special :: Parser Token
244 special = do
245 manySatisfyC_ (\c -> isSpace c && c /= '\n')
246 s <- lookAheadC
247 case s of
248 '{':_ -> do
249 anyCharC_
250 manySatisfyC_ isSpace
251 sp <- keyArg (== '\n')
252 charC_ '}'
253 return sp
254 _ -> keyArg (const False)
255
256 keyArg :: (Char -> Bool) -> Parser Token
257 keyArg eol = do
258 pos <- getPos
259 key <- keyword `message` "hsc keyword or '{' expected"
260 manySatisfyC_ (\c' -> isSpace c' && c' /= '\n' || eol c')
261 arg <- catchOutput_ (argument eol)
262 return (Special pos key arg)
263
264 keyword :: Parser String
265 keyword = do
266 c <- satisfyC (\c' -> isAlpha c' || c' == '_')
267 cs <- manySatisfyC (\c' -> isAlphaNum c' || c' == '_')
268 return (c:cs)
269
270 argument :: (Char -> Bool) -> Parser ()
271 argument eol = do
272 s <- lookAheadC
273 case s of
274 [] -> return ()
275 c:_ | eol c -> do anyCharC_; argument eol
276 '\n':_ -> return ()
277 '\"':_ -> do anyCharC_; cString '\"'; argument eol
278 '\'':_ -> do anyCharC_; cString '\''; argument eol
279 '(':_ -> do anyCharC_; nested ')'; argument eol
280 ')':_ -> return ()
281 '/':'*':_ -> do any2CharsC_; cComment; argument eol
282 '/':'/':_ -> do
283 any2CharsC_; manySatisfyC_ (/= '\n'); argument eol
284 '[':_ -> do anyCharC_; nested ']'; argument eol
285 ']':_ -> return ()
286 '{':_ -> do anyCharC_; nested '}'; argument eol
287 '}':_ -> return ()
288 _:_ -> do anyCharC_; argument eol
289
290 nested :: Char -> Parser ()
291 nested c = do argument (== '\n'); charC_ c
292
293 cComment :: Parser ()
294 cComment = do
295 s <- lookAheadC
296 case s of
297 [] -> return ()
298 '*':'/':_ -> do any2CharsC_
299 _:_ -> do anyCharC_; cComment
300
301 cString :: Char -> Parser ()
302 cString quote = do
303 s <- lookAheadC
304 case s of
305 [] -> return ()
306 c:_ | c == quote -> anyCharC_
307 '\\':_:_ -> do any2CharsC_; cString quote
308 _:_ -> do anyCharC_; cString quote
309