a2409614d1f4cccc3e98a1340e0591d056f60b9a
[ghc.git] / compiler / basicTypes / Lexeme.hs
1 -- (c) The GHC Team
2 --
3 -- Functions to evaluate whether or not a string is a valid identifier.
4 -- There is considerable overlap between the logic here and the logic
5 -- in Lexer.x, but sadly there seems to be way to merge them.
6
7 module Lexeme (
8 -- * Lexical characteristics of Haskell names
9
10 -- | Use these functions to figure what kind of name a 'FastString'
11 -- represents; these functions do /not/ check that the identifier
12 -- is valid.
13
14 isLexCon, isLexVar, isLexId, isLexSym,
15 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
16 startsVarSym, startsVarId, startsConSym, startsConId,
17
18 -- * Validating identifiers
19
20 -- | These functions (working over plain old 'String's) check
21 -- to make sure that the identifier is valid.
22 okVarOcc, okConOcc, okTcOcc,
23 okVarIdOcc, okVarSymOcc, okConIdOcc, okConSymOcc
24
25 -- Some of the exports above are not used within GHC, but may
26 -- be of value to GHC API users.
27
28 ) where
29
30 import FastString
31 import Util ((<||>))
32
33 import Data.Char
34 import qualified Data.Set as Set
35
36 {-
37
38 ************************************************************************
39 * *
40 Lexical categories
41 * *
42 ************************************************************************
43
44 These functions test strings to see if they fit the lexical categories
45 defined in the Haskell report.
46
47 Note [Classification of generated names]
48 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
49
50 Some names generated for internal use can show up in debugging output,
51 e.g. when using -ddump-simpl. These generated names start with a $
52 but should still be pretty-printed using prefix notation. We make sure
53 this is the case in isLexVarSym by only classifying a name as a symbol
54 if all its characters are symbols, not just its first one.
55 -}
56
57 isLexCon, isLexVar, isLexId, isLexSym :: FastString -> Bool
58 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
59
60 isLexCon cs = isLexConId cs || isLexConSym cs
61 isLexVar cs = isLexVarId cs || isLexVarSym cs
62
63 isLexId cs = isLexConId cs || isLexVarId cs
64 isLexSym cs = isLexConSym cs || isLexVarSym cs
65
66 -------------
67 isLexConId cs -- Prefix type or data constructors
68 | nullFS cs = False -- e.g. "Foo", "[]", "(,)"
69 | cs == (fsLit "[]") = True
70 | otherwise = startsConId (headFS cs)
71
72 isLexVarId cs -- Ordinary prefix identifiers
73 | nullFS cs = False -- e.g. "x", "_x"
74 | otherwise = startsVarId (headFS cs)
75
76 isLexConSym cs -- Infix type or data constructors
77 | nullFS cs = False -- e.g. ":-:", ":", "->"
78 | cs == (fsLit "->") = True
79 | otherwise = startsConSym (headFS cs)
80
81 isLexVarSym fs -- Infix identifiers e.g. "+"
82 | fs == (fsLit "~R#") = True
83 | otherwise
84 = case (if nullFS fs then [] else unpackFS fs) of
85 [] -> False
86 (c:cs) -> startsVarSym c && all isVarSymChar cs
87 -- See Note [Classification of generated names]
88
89 -------------
90 startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
91 startsVarSym c = startsVarSymASCII c || (ord c > 0x7f && isSymbol c) -- Infix Ids
92 startsConSym c = c == ':' -- Infix data constructors
93 startsVarId c = c == '_' || case generalCategory c of -- Ordinary Ids
94 LowercaseLetter -> True
95 OtherLetter -> True -- See #1103
96 _ -> False
97 startsConId c = isUpper c || c == '(' -- Ordinary type constructors and data constructors
98
99 startsVarSymASCII :: Char -> Bool
100 startsVarSymASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
101
102 isVarSymChar :: Char -> Bool
103 isVarSymChar c = c == ':' || startsVarSym c
104
105 {-
106
107 ************************************************************************
108 * *
109 Detecting valid names for Template Haskell
110 * *
111 ************************************************************************
112
113 -}
114
115 ----------------------
116 -- External interface
117 ----------------------
118
119 -- | Is this an acceptable variable name?
120 okVarOcc :: String -> Bool
121 okVarOcc str@(c:_)
122 | startsVarId c
123 = okVarIdOcc str
124 | startsVarSym c
125 = okVarSymOcc str
126 okVarOcc _ = False
127
128 -- | Is this an acceptable constructor name?
129 okConOcc :: String -> Bool
130 okConOcc str@(c:_)
131 | startsConId c
132 = okConIdOcc str
133 | startsConSym c
134 = okConSymOcc str
135 | str == "[]"
136 = True
137 okConOcc _ = False
138
139 -- | Is this an acceptable type name?
140 okTcOcc :: String -> Bool
141 okTcOcc "[]" = True
142 okTcOcc "->" = True
143 okTcOcc "~" = True
144 okTcOcc str@(c:_)
145 | startsConId c
146 = okConIdOcc str
147 | startsConSym c
148 = okConSymOcc str
149 | startsVarSym c
150 = okVarSymOcc str
151 okTcOcc _ = False
152
153 -- | Is this an acceptable alphanumeric variable name, assuming it starts
154 -- with an acceptable letter?
155 okVarIdOcc :: String -> Bool
156 okVarIdOcc str = okIdOcc str &&
157 not (str `Set.member` reservedIds)
158
159 -- | Is this an acceptable symbolic variable name, assuming it starts
160 -- with an acceptable character?
161 okVarSymOcc :: String -> Bool
162 okVarSymOcc str = all okSymChar str &&
163 not (str `Set.member` reservedOps) &&
164 not (isDashes str)
165
166 -- | Is this an acceptable alphanumeric constructor name, assuming it
167 -- starts with an acceptable letter?
168 okConIdOcc :: String -> Bool
169 okConIdOcc str = okIdOcc str ||
170 is_tuple_name1 str
171 where
172 -- check for tuple name, starting at the beginning
173 is_tuple_name1 ('(' : rest) = is_tuple_name2 rest
174 is_tuple_name1 _ = False
175
176 -- check for tuple tail
177 is_tuple_name2 ")" = True
178 is_tuple_name2 (',' : rest) = is_tuple_name2 rest
179 is_tuple_name2 (ws : rest)
180 | isSpace ws = is_tuple_name2 rest
181 is_tuple_name2 _ = False
182
183 -- | Is this an acceptable symbolic constructor name, assuming it
184 -- starts with an acceptable character?
185 okConSymOcc :: String -> Bool
186 okConSymOcc ":" = True
187 okConSymOcc str = all okSymChar str &&
188 not (str `Set.member` reservedOps)
189
190 ----------------------
191 -- Internal functions
192 ----------------------
193
194 -- | Is this string an acceptable id, possibly with a suffix of hashes,
195 -- but not worrying about case or clashing with reserved words?
196 okIdOcc :: String -> Bool
197 okIdOcc str
198 -- TODO. #10196. Only allow modifier letters in the suffix of an identifier.
199 = let hashes = dropWhile (okIdChar <||> okIdSuffixChar) str in
200 all (== '#') hashes -- -XMagicHash allows a suffix of hashes
201 -- of course, `all` says "True" to an empty list
202
203 -- | Is this character acceptable in an identifier (after the first letter)?
204 -- See alexGetByte in Lexer.x
205 okIdChar :: Char -> Bool
206 okIdChar c = case generalCategory c of
207 UppercaseLetter -> True
208 LowercaseLetter -> True
209 OtherLetter -> True
210 TitlecaseLetter -> True
211 DecimalNumber -> True
212 OtherNumber -> True
213 _ -> c == '\'' || c == '_'
214
215 -- | Is this character acceptable in the suffix of an identifier.
216 -- See alexGetByte in Lexer.x
217 okIdSuffixChar :: Char -> Bool
218 okIdSuffixChar c = case generalCategory c of
219 ModifierLetter -> True -- See #10196
220 _ -> False
221
222 -- | Is this character acceptable in a symbol (after the first char)?
223 -- See alexGetByte in Lexer.x
224 okSymChar :: Char -> Bool
225 okSymChar c
226 | c `elem` specialSymbols
227 = False
228 | c `elem` "_\"'"
229 = False
230 | otherwise
231 = case generalCategory c of
232 ConnectorPunctuation -> True
233 DashPunctuation -> True
234 OtherPunctuation -> True
235 MathSymbol -> True
236 CurrencySymbol -> True
237 ModifierSymbol -> True
238 OtherSymbol -> True
239 _ -> False
240
241 -- | All reserved identifiers. Taken from section 2.4 of the 2010 Report.
242 reservedIds :: Set.Set String
243 reservedIds = Set.fromList [ "case", "class", "data", "default", "deriving"
244 , "do", "else", "foreign", "if", "import", "in"
245 , "infix", "infixl", "infixr", "instance", "let"
246 , "module", "newtype", "of", "then", "type", "where"
247 , "_" ]
248
249 -- | All punctuation that cannot appear in symbols. See $special in Lexer.x.
250 specialSymbols :: [Char]
251 specialSymbols = "(),;[]`{}"
252
253 -- | All reserved operators. Taken from section 2.4 of the 2010 Report.
254 reservedOps :: Set.Set String
255 reservedOps = Set.fromList [ "..", ":", "::", "=", "\\", "|", "<-", "->"
256 , "@", "~", "=>" ]
257
258 -- | Does this string contain only dashes and has at least 2 of them?
259 isDashes :: String -> Bool
260 isDashes ('-' : '-' : rest) = all (== '-') rest
261 isDashes _ = False