Fix #7484, checking for good binder names in Convert.
[ghc.git] / compiler / basicTypes / Lexeme.hs
1 -- (c) The GHC Team
2 --
3 -- Functions to evaluate whether or not a string is a valid identifier.
4 -- There is considerable overlap between the logic here and the logic
5 -- in Lexer.x, but sadly there seems to be way to merge them.
6
7 module Lexeme (
8 -- * Lexical characteristics of Haskell names
9
10 -- | Use these functions to figure what kind of name a 'FastString'
11 -- represents; these functions do /not/ check that the identifier
12 -- is valid.
13
14 isLexCon, isLexVar, isLexId, isLexSym,
15 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
16 startsVarSym, startsVarId, startsConSym, startsConId,
17
18 -- * Validating identifiers
19
20 -- | These functions (working over plain old 'String's) check
21 -- to make sure that the identifier is valid.
22 okVarOcc, okConOcc, okTcOcc,
23 okVarIdOcc, okVarSymOcc, okConIdOcc, okConSymOcc
24
25 -- Some of the exports above are not used within GHC, but may
26 -- be of value to GHC API users.
27
28 ) where
29
30 import FastString
31
32 import Data.Char
33 import qualified Data.Set as Set
34
35 {-
36
37 ************************************************************************
38 * *
39 Lexical categories
40 * *
41 ************************************************************************
42
43 These functions test strings to see if they fit the lexical categories
44 defined in the Haskell report.
45
46 Note [Classification of generated names]
47 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
48
49 Some names generated for internal use can show up in debugging output,
50 e.g. when using -ddump-simpl. These generated names start with a $
51 but should still be pretty-printed using prefix notation. We make sure
52 this is the case in isLexVarSym by only classifying a name as a symbol
53 if all its characters are symbols, not just its first one.
54 -}
55
56 isLexCon, isLexVar, isLexId, isLexSym :: FastString -> Bool
57 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
58
59 isLexCon cs = isLexConId cs || isLexConSym cs
60 isLexVar cs = isLexVarId cs || isLexVarSym cs
61
62 isLexId cs = isLexConId cs || isLexVarId cs
63 isLexSym cs = isLexConSym cs || isLexVarSym cs
64
65 -------------
66 isLexConId cs -- Prefix type or data constructors
67 | nullFS cs = False -- e.g. "Foo", "[]", "(,)"
68 | cs == (fsLit "[]") = True
69 | otherwise = startsConId (headFS cs)
70
71 isLexVarId cs -- Ordinary prefix identifiers
72 | nullFS cs = False -- e.g. "x", "_x"
73 | otherwise = startsVarId (headFS cs)
74
75 isLexConSym cs -- Infix type or data constructors
76 | nullFS cs = False -- e.g. ":-:", ":", "->"
77 | cs == (fsLit "->") = True
78 | otherwise = startsConSym (headFS cs)
79
80 isLexVarSym fs -- Infix identifiers e.g. "+"
81 | fs == (fsLit "~R#") = True
82 | otherwise
83 = case (if nullFS fs then [] else unpackFS fs) of
84 [] -> False
85 (c:cs) -> startsVarSym c && all isVarSymChar cs
86 -- See Note [Classification of generated names]
87
88 -------------
89 startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
90 startsVarSym c = startsVarSymASCII c || (ord c > 0x7f && isSymbol c) -- Infix Ids
91 startsConSym c = c == ':' -- Infix data constructors
92 startsVarId c = c == '_' || case generalCategory c of -- Ordinary Ids
93 LowercaseLetter -> True
94 OtherLetter -> True -- See #1103
95 _ -> False
96 startsConId c = isUpper c || c == '(' -- Ordinary type constructors and data constructors
97
98 startsVarSymASCII :: Char -> Bool
99 startsVarSymASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
100
101 isVarSymChar :: Char -> Bool
102 isVarSymChar c = c == ':' || startsVarSym c
103
104 {-
105
106 ************************************************************************
107 * *
108 Detecting valid names for Template Haskell
109 * *
110 ************************************************************************
111
112 -}
113
114 ----------------------
115 -- External interface
116 ----------------------
117
118 -- | Is this an acceptable variable name?
119 okVarOcc :: String -> Bool
120 okVarOcc str@(c:_)
121 | startsVarId c
122 = okVarIdOcc str
123 | startsVarSym c
124 = okVarSymOcc str
125 okVarOcc _ = False
126
127 -- | Is this an acceptable constructor name?
128 okConOcc :: String -> Bool
129 okConOcc str@(c:_)
130 | startsConId c
131 = okConIdOcc str
132 | startsConSym c
133 = okConSymOcc str
134 | str == "[]"
135 = True
136 okConOcc _ = False
137
138 -- | Is this an acceptable type name?
139 okTcOcc :: String -> Bool
140 okTcOcc "[]" = True
141 okTcOcc "->" = True
142 okTcOcc "~" = True
143 okTcOcc str@(c:_)
144 | startsConId c
145 = okConIdOcc str
146 | startsConSym c
147 = okConSymOcc str
148 | startsVarSym c
149 = okVarSymOcc str
150 okTcOcc _ = False
151
152 -- | Is this an acceptable alphanumeric variable name, assuming it starts
153 -- with an acceptable letter?
154 okVarIdOcc :: String -> Bool
155 okVarIdOcc str = okIdOcc str &&
156 not (str `Set.member` reservedIds)
157
158 -- | Is this an acceptable symbolic variable name, assuming it starts
159 -- with an acceptable character?
160 okVarSymOcc :: String -> Bool
161 okVarSymOcc str = all okSymChar str &&
162 not (str `Set.member` reservedOps) &&
163 not (isDashes str)
164
165 -- | Is this an acceptable alphanumeric constructor name, assuming it
166 -- starts with an acceptable letter?
167 okConIdOcc :: String -> Bool
168 okConIdOcc str = okIdOcc str ||
169 is_tuple_name1 str
170 where
171 -- check for tuple name, starting at the beginning
172 is_tuple_name1 ('(' : rest) = is_tuple_name2 rest
173 is_tuple_name1 _ = False
174
175 -- check for tuple tail
176 is_tuple_name2 ")" = True
177 is_tuple_name2 (',' : rest) = is_tuple_name2 rest
178 is_tuple_name2 (ws : rest)
179 | isSpace ws = is_tuple_name2 rest
180 is_tuple_name2 _ = False
181
182 -- | Is this an acceptable symbolic constructor name, assuming it
183 -- starts with an acceptable character?
184 okConSymOcc :: String -> Bool
185 okConSymOcc ":" = True
186 okConSymOcc str = all okSymChar str &&
187 not (str `Set.member` reservedOps)
188
189 ----------------------
190 -- Internal functions
191 ----------------------
192
193 -- | Is this string an acceptable id, possibly with a suffix of hashes,
194 -- but not worrying about case or clashing with reserved words?
195 okIdOcc :: String -> Bool
196 okIdOcc str
197 = let hashes = dropWhile okIdChar str in
198 all (== '#') hashes -- -XMagicHash allows a suffix of hashes
199 -- of course, `all` says "True" to an empty list
200
201 -- | Is this character acceptable in an identifier (after the first letter)?
202 -- See alexGetByte in Lexer.x
203 okIdChar :: Char -> Bool
204 okIdChar c = case generalCategory c of
205 UppercaseLetter -> True
206 LowercaseLetter -> True
207 OtherLetter -> True
208 TitlecaseLetter -> True
209 DecimalNumber -> True
210 OtherNumber -> True
211 _ -> c == '\'' || c == '_'
212
213 -- | Is this character acceptable in a symbol (after the first char)?
214 -- See alexGetByte in Lexer.x
215 okSymChar :: Char -> Bool
216 okSymChar c
217 | c `elem` specialSymbols
218 = False
219 | c `elem` "_\"'"
220 = False
221 | otherwise
222 = case generalCategory c of
223 ConnectorPunctuation -> True
224 DashPunctuation -> True
225 OtherPunctuation -> True
226 MathSymbol -> True
227 CurrencySymbol -> True
228 ModifierSymbol -> True
229 OtherSymbol -> True
230 _ -> False
231
232 -- | All reserved identifiers. Taken from section 2.4 of the 2010 Report.
233 reservedIds :: Set.Set String
234 reservedIds = Set.fromList [ "case", "class", "data", "default", "deriving"
235 , "do", "else", "foreign", "if", "import", "in"
236 , "infix", "infixl", "infixr", "instance", "let"
237 , "module", "newtype", "of", "then", "type", "where"
238 , "_" ]
239
240 -- | All punctuation that cannot appear in symbols. See $special in Lexer.x.
241 specialSymbols :: [Char]
242 specialSymbols = "(),;[]`{}"
243
244 -- | All reserved operators. Taken from section 2.4 of the 2010 Report.
245 reservedOps :: Set.Set String
246 reservedOps = Set.fromList [ "..", ":", "::", "=", "\\", "|", "<-", "->"
247 , "@", "~", "=>" ]
248
249 -- | Does this string contain only dashes and has at least 2 of them?
250 isDashes :: String -> Bool
251 isDashes ('-' : '-' : rest) = all (== '-') rest
252 isDashes _ = False