remove tabs (+ ignore tabs option) from LexCore.hs
[ghc.git] / compiler / parser / LexCore.hs
1 module LexCore where
2
3 import ParserCoreUtils
4 import Panic
5 import Data.Char
6 import Numeric
7
8 isNameChar :: Char -> Bool
9 isNameChar c = isAlpha c || isDigit c || (c == '_') || (c == '\'')
10 || (c == '$') || (c == '-') || (c == '.')
11
12 isKeywordChar :: Char -> Bool
13 isKeywordChar c = isAlpha c || (c == '_')
14
15 lexer :: (Token -> P a) -> P a
16 lexer cont [] = cont TKEOF []
17 lexer cont ('\n':cs) = \line -> lexer cont cs (line+1)
18 lexer cont ('-':'>':cs) = cont TKrarrow cs
19
20 lexer cont (c:cs)
21 | isSpace c = lexer cont cs
22 | isLower c || (c == '_') = lexName cont TKname (c:cs)
23 | isUpper c = lexName cont TKcname (c:cs)
24 | isDigit c || (c == '-') = lexNum cont (c:cs)
25
26 lexer cont ('%':cs) = lexKeyword cont cs
27 lexer cont ('\'':cs) = lexChar cont cs
28 lexer cont ('\"':cs) = lexString [] cont cs
29 lexer cont ('#':cs) = cont TKhash cs
30 lexer cont ('(':cs) = cont TKoparen cs
31 lexer cont (')':cs) = cont TKcparen cs
32 lexer cont ('{':cs) = cont TKobrace cs
33 lexer cont ('}':cs) = cont TKcbrace cs
34 lexer cont ('=':cs) = cont TKeq cs
35 lexer cont (':':'=':':':cs) = cont TKcoloneqcolon cs
36 lexer cont (':':':':cs) = cont TKcoloncolon cs
37 lexer cont ('*':cs) = cont TKstar cs
38 lexer cont ('.':cs) = cont TKdot cs
39 lexer cont ('\\':cs) = cont TKlambda cs
40 lexer cont ('@':cs) = cont TKat cs
41 lexer cont ('?':cs) = cont TKquestion cs
42 lexer cont (';':cs) = cont TKsemicolon cs
43 -- 20060420 GHC spits out constructors with colon in them nowadays. jds
44 -- 20061103 but it's easier to parse if we split on the colon, and treat them
45 -- as several tokens
46 lexer cont (':':cs) = cont TKcolon cs
47 -- 20060420 Likewise does it create identifiers starting with dollar. jds
48 lexer cont ('$':cs) = lexName cont TKname ('$':cs)
49 lexer _ (c:_) = failP "invalid character" [c]
50
51 lexChar :: (Token -> String -> Int -> ParseResult a) -> String -> Int
52 -> ParseResult a
53 lexChar cont ('\\':'x':h1:h0:'\'':cs)
54 | isHexEscape [h1,h0] = cont (TKchar (hexToChar h1 h0)) cs
55 lexChar _ ('\\':cs) = failP "invalid char character" ('\\':(take 10 cs))
56 lexChar _ ('\'':_) = failP "invalid char character" ['\'']
57 lexChar _ ('\"':_) = failP "invalid char character" ['\"']
58 lexChar cont (c:'\'':cs) = cont (TKchar c) cs
59 lexChar _ cs = panic ("lexChar: " ++ show cs)
60
61 lexString :: String -> (Token -> [Char] -> Int -> ParseResult a)
62 -> String -> Int -> ParseResult a
63 lexString s cont ('\\':'x':h1:h0:cs)
64 | isHexEscape [h1,h0] = lexString (s++[hexToChar h1 h0]) cont cs
65 lexString _ _ ('\\':_) = failP "invalid string character" ['\\']
66 lexString _ _ ('\'':_) = failP "invalid string character" ['\'']
67 lexString s cont ('\"':cs) = cont (TKstring s) cs
68 lexString s cont (c:cs) = lexString (s++[c]) cont cs
69 lexString _ _ [] = panic "lexString []"
70
71 isHexEscape :: String -> Bool
72 isHexEscape = all (\c -> isHexDigit c && (isDigit c || isLower c))
73
74 hexToChar :: Char -> Char -> Char
75 hexToChar h1 h0 = chr (digitToInt h1 * 16 + digitToInt h0)
76
77 lexNum :: (Token -> String -> a) -> String -> a
78 lexNum cont cs =
79 case cs of
80 ('-':cs) -> f (-1) cs
81 _ -> f 1 cs
82 where f sgn cs =
83 case span isDigit cs of
84 (digits,'.':c:rest)
85 | isDigit c -> cont (TKrational (fromInteger sgn * r)) rest'
86 where ((r,rest'):_) = readFloat (digits ++ ('.':c:rest))
87 -- When reading a floating-point number, which is
88 -- a bit complicated, use the standard library function
89 -- "readFloat"
90 (digits,rest) -> cont (TKinteger (sgn * (read digits))) rest
91
92 lexName :: (a -> String -> b) -> (String -> a) -> String -> b
93 lexName cont cstr cs = cont (cstr name) rest
94 where (name,rest) = span isNameChar cs
95
96 lexKeyword :: (Token -> [Char] -> Int -> ParseResult a) -> String -> Int
97 -> ParseResult a
98 lexKeyword cont cs =
99 case span isKeywordChar cs of
100 ("module",rest) -> cont TKmodule rest
101 ("data",rest) -> cont TKdata rest
102 ("newtype",rest) -> cont TKnewtype rest
103 ("forall",rest) -> cont TKforall rest
104 ("rec",rest) -> cont TKrec rest
105 ("let",rest) -> cont TKlet rest
106 ("in",rest) -> cont TKin rest
107 ("case",rest) -> cont TKcase rest
108 ("of",rest) -> cont TKof rest
109 ("cast",rest) -> cont TKcast rest
110 ("note",rest) -> cont TKnote rest
111 ("external",rest) -> cont TKexternal rest
112 ("local",rest) -> cont TKlocal rest
113 ("_",rest) -> cont TKwild rest
114 _ -> failP "invalid keyword" ('%':cs)
115