b3d8d63fbd4beb370a4e713f8681199a6e8beefa
[ghc.git] / compiler / parser / LexCore.hs
1
2 {-# OPTIONS -fno-warn-tabs #-}
3 -- The above warning supression flag is a temporary kludge.
4 -- While working on this module you are encouraged to remove it and
5 -- detab the module (please do the detabbing in a separate patch). See
6 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
7 -- for details
8
9 module LexCore where
10
11 import ParserCoreUtils
12 import Panic
13 import Data.Char
14 import Numeric
15
16 isNameChar :: Char -> Bool
17 isNameChar c = isAlpha c || isDigit c || (c == '_') || (c == '\'')
18 || (c == '$') || (c == '-') || (c == '.')
19
20 isKeywordChar :: Char -> Bool
21 isKeywordChar c = isAlpha c || (c == '_')
22
23 lexer :: (Token -> P a) -> P a
24 lexer cont [] = cont TKEOF []
25 lexer cont ('\n':cs) = \line -> lexer cont cs (line+1)
26 lexer cont ('-':'>':cs) = cont TKrarrow cs
27
28 lexer cont (c:cs)
29 | isSpace c = lexer cont cs
30 | isLower c || (c == '_') = lexName cont TKname (c:cs)
31 | isUpper c = lexName cont TKcname (c:cs)
32 | isDigit c || (c == '-') = lexNum cont (c:cs)
33
34 lexer cont ('%':cs) = lexKeyword cont cs
35 lexer cont ('\'':cs) = lexChar cont cs
36 lexer cont ('\"':cs) = lexString [] cont cs
37 lexer cont ('#':cs) = cont TKhash cs
38 lexer cont ('(':cs) = cont TKoparen cs
39 lexer cont (')':cs) = cont TKcparen cs
40 lexer cont ('{':cs) = cont TKobrace cs
41 lexer cont ('}':cs) = cont TKcbrace cs
42 lexer cont ('=':cs) = cont TKeq cs
43 lexer cont (':':'=':':':cs) = cont TKcoloneqcolon cs
44 lexer cont (':':':':cs) = cont TKcoloncolon cs
45 lexer cont ('*':cs) = cont TKstar cs
46 lexer cont ('.':cs) = cont TKdot cs
47 lexer cont ('\\':cs) = cont TKlambda cs
48 lexer cont ('@':cs) = cont TKat cs
49 lexer cont ('?':cs) = cont TKquestion cs
50 lexer cont (';':cs) = cont TKsemicolon cs
51 -- 20060420 GHC spits out constructors with colon in them nowadays. jds
52 -- 20061103 but it's easier to parse if we split on the colon, and treat them
53 -- as several tokens
54 lexer cont (':':cs) = cont TKcolon cs
55 -- 20060420 Likewise does it create identifiers starting with dollar. jds
56 lexer cont ('$':cs) = lexName cont TKname ('$':cs)
57 lexer _ (c:_) = failP "invalid character" [c]
58
59 lexChar :: (Token -> String -> Int -> ParseResult a) -> String -> Int
60 -> ParseResult a
61 lexChar cont ('\\':'x':h1:h0:'\'':cs)
62 | isHexEscape [h1,h0] = cont (TKchar (hexToChar h1 h0)) cs
63 lexChar _ ('\\':cs) = failP "invalid char character" ('\\':(take 10 cs))
64 lexChar _ ('\'':_) = failP "invalid char character" ['\'']
65 lexChar _ ('\"':_) = failP "invalid char character" ['\"']
66 lexChar cont (c:'\'':cs) = cont (TKchar c) cs
67 lexChar _ cs = panic ("lexChar: " ++ show cs)
68
69 lexString :: String -> (Token -> [Char] -> Int -> ParseResult a)
70 -> String -> Int -> ParseResult a
71 lexString s cont ('\\':'x':h1:h0:cs)
72 | isHexEscape [h1,h0] = lexString (s++[hexToChar h1 h0]) cont cs
73 lexString _ _ ('\\':_) = failP "invalid string character" ['\\']
74 lexString _ _ ('\'':_) = failP "invalid string character" ['\'']
75 lexString s cont ('\"':cs) = cont (TKstring s) cs
76 lexString s cont (c:cs) = lexString (s++[c]) cont cs
77 lexString _ _ [] = panic "lexString []"
78
79 isHexEscape :: String -> Bool
80 isHexEscape = all (\c -> isHexDigit c && (isDigit c || isLower c))
81
82 hexToChar :: Char -> Char -> Char
83 hexToChar h1 h0 = chr (digitToInt h1 * 16 + digitToInt h0)
84
85 lexNum :: (Token -> String -> a) -> String -> a
86 lexNum cont cs =
87 case cs of
88 ('-':cs) -> f (-1) cs
89 _ -> f 1 cs
90 where f sgn cs =
91 case span isDigit cs of
92 (digits,'.':c:rest)
93 | isDigit c -> cont (TKrational (fromInteger sgn * r)) rest'
94 where ((r,rest'):_) = readFloat (digits ++ ('.':c:rest))
95 -- When reading a floating-point number, which is
96 -- a bit complicated, use the standard library function
97 -- "readFloat"
98 (digits,rest) -> cont (TKinteger (sgn * (read digits))) rest
99
100 lexName :: (a -> String -> b) -> (String -> a) -> String -> b
101 lexName cont cstr cs = cont (cstr name) rest
102 where (name,rest) = span isNameChar cs
103
104 lexKeyword :: (Token -> [Char] -> Int -> ParseResult a) -> String -> Int
105 -> ParseResult a
106 lexKeyword cont cs =
107 case span isKeywordChar cs of
108 ("module",rest) -> cont TKmodule rest
109 ("data",rest) -> cont TKdata rest
110 ("newtype",rest) -> cont TKnewtype rest
111 ("forall",rest) -> cont TKforall rest
112 ("rec",rest) -> cont TKrec rest
113 ("let",rest) -> cont TKlet rest
114 ("in",rest) -> cont TKin rest
115 ("case",rest) -> cont TKcase rest
116 ("of",rest) -> cont TKof rest
117 ("cast",rest) -> cont TKcast rest
118 ("note",rest) -> cont TKnote rest
119 ("external",rest) -> cont TKexternal rest
120 ("local",rest) -> cont TKlocal rest
121 ("_",rest) -> cont TKwild rest
122 _ -> failP "invalid keyword" ('%':cs)
123