2049e001bf5a4eb5547fd99f83353a3517bbe2d9
[ghc.git] / compiler / basicTypes / Lexeme.hs
1 -- (c) The GHC Team
2 --
3 -- Functions to evaluate whether or not a string is a valid identifier.
4 -- There is considerable overlap between the logic here and the logic
5 -- in Lexer.x, but sadly there seems to be way to merge them.
6
7 module Lexeme (
8 -- * Lexical characteristics of Haskell names
9
10 -- | Use these functions to figure what kind of name a 'FastString'
11 -- represents; these functions do /not/ check that the identifier
12 -- is valid.
13
14 isLexCon, isLexVar, isLexId, isLexSym,
15 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
16 startsVarSym, startsVarId, startsConSym, startsConId,
17
18 -- * Validating identifiers
19
20 -- | These functions (working over plain old 'String's) check
21 -- to make sure that the identifier is valid.
22 okVarOcc, okConOcc, okTcOcc,
23 okVarIdOcc, okVarSymOcc, okConIdOcc, okConSymOcc
24
25 -- Some of the exports above are not used within GHC, but may
26 -- be of value to GHC API users.
27
28 ) where
29
30 import FastString
31 import Util ((<||>))
32
33 import Data.Char
34 import qualified Data.Set as Set
35
36 import GHC.Lexeme
37
38 {-
39
40 ************************************************************************
41 * *
42 Lexical categories
43 * *
44 ************************************************************************
45
46 These functions test strings to see if they fit the lexical categories
47 defined in the Haskell report.
48
49 Note [Classification of generated names]
50 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
51
52 Some names generated for internal use can show up in debugging output,
53 e.g. when using -ddump-simpl. These generated names start with a $
54 but should still be pretty-printed using prefix notation. We make sure
55 this is the case in isLexVarSym by only classifying a name as a symbol
56 if all its characters are symbols, not just its first one.
57 -}
58
59 isLexCon, isLexVar, isLexId, isLexSym :: FastString -> Bool
60 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
61
62 isLexCon cs = isLexConId cs || isLexConSym cs
63 isLexVar cs = isLexVarId cs || isLexVarSym cs
64
65 isLexId cs = isLexConId cs || isLexVarId cs
66 isLexSym cs = isLexConSym cs || isLexVarSym cs
67
68 -------------
69 isLexConId cs -- Prefix type or data constructors
70 | nullFS cs = False -- e.g. "Foo", "[]", "(,)"
71 | cs == (fsLit "[]") = True
72 | otherwise = startsConId (headFS cs)
73
74 isLexVarId cs -- Ordinary prefix identifiers
75 | nullFS cs = False -- e.g. "x", "_x"
76 | otherwise = startsVarId (headFS cs)
77
78 isLexConSym cs -- Infix type or data constructors
79 | nullFS cs = False -- e.g. ":-:", ":", "->"
80 | cs == (fsLit "->") = True
81 | otherwise = startsConSym (headFS cs)
82
83 isLexVarSym fs -- Infix identifiers e.g. "+"
84 | fs == (fsLit "~R#") = True
85 | otherwise
86 = case (if nullFS fs then [] else unpackFS fs) of
87 [] -> False
88 (c:cs) -> startsVarSym c && all isVarSymChar cs
89 -- See Note [Classification of generated names]
90
91 {-
92
93 ************************************************************************
94 * *
95 Detecting valid names for Template Haskell
96 * *
97 ************************************************************************
98
99 -}
100
101 ----------------------
102 -- External interface
103 ----------------------
104
105 -- | Is this an acceptable variable name?
106 okVarOcc :: String -> Bool
107 okVarOcc str@(c:_)
108 | startsVarId c
109 = okVarIdOcc str
110 | startsVarSym c
111 = okVarSymOcc str
112 okVarOcc _ = False
113
114 -- | Is this an acceptable constructor name?
115 okConOcc :: String -> Bool
116 okConOcc str@(c:_)
117 | startsConId c
118 = okConIdOcc str
119 | startsConSym c
120 = okConSymOcc str
121 | str == "[]"
122 = True
123 okConOcc _ = False
124
125 -- | Is this an acceptable type name?
126 okTcOcc :: String -> Bool
127 okTcOcc "[]" = True
128 okTcOcc "->" = True
129 okTcOcc "~" = True
130 okTcOcc str@(c:_)
131 | startsConId c
132 = okConIdOcc str
133 | startsConSym c
134 = okConSymOcc str
135 | startsVarSym c
136 = okVarSymOcc str
137 okTcOcc _ = False
138
139 -- | Is this an acceptable alphanumeric variable name, assuming it starts
140 -- with an acceptable letter?
141 okVarIdOcc :: String -> Bool
142 okVarIdOcc str = okIdOcc str &&
143 not (str `Set.member` reservedIds)
144
145 -- | Is this an acceptable symbolic variable name, assuming it starts
146 -- with an acceptable character?
147 okVarSymOcc :: String -> Bool
148 okVarSymOcc str = all okSymChar str &&
149 not (str `Set.member` reservedOps) &&
150 not (isDashes str)
151
152 -- | Is this an acceptable alphanumeric constructor name, assuming it
153 -- starts with an acceptable letter?
154 okConIdOcc :: String -> Bool
155 okConIdOcc str = okIdOcc str ||
156 is_tuple_name1 str
157 where
158 -- check for tuple name, starting at the beginning
159 is_tuple_name1 ('(' : rest) = is_tuple_name2 rest
160 is_tuple_name1 _ = False
161
162 -- check for tuple tail
163 is_tuple_name2 ")" = True
164 is_tuple_name2 (',' : rest) = is_tuple_name2 rest
165 is_tuple_name2 (ws : rest)
166 | isSpace ws = is_tuple_name2 rest
167 is_tuple_name2 _ = False
168
169 -- | Is this an acceptable symbolic constructor name, assuming it
170 -- starts with an acceptable character?
171 okConSymOcc :: String -> Bool
172 okConSymOcc ":" = True
173 okConSymOcc str = all okSymChar str &&
174 not (str `Set.member` reservedOps)
175
176 ----------------------
177 -- Internal functions
178 ----------------------
179
180 -- | Is this string an acceptable id, possibly with a suffix of hashes,
181 -- but not worrying about case or clashing with reserved words?
182 okIdOcc :: String -> Bool
183 okIdOcc str
184 -- TODO. #10196. Only allow modifier letters in the suffix of an identifier.
185 = let hashes = dropWhile (okIdChar <||> okIdSuffixChar) str in
186 all (== '#') hashes -- -XMagicHash allows a suffix of hashes
187 -- of course, `all` says "True" to an empty list
188
189 -- | Is this character acceptable in an identifier (after the first letter)?
190 -- See alexGetByte in Lexer.x
191 okIdChar :: Char -> Bool
192 okIdChar c = case generalCategory c of
193 UppercaseLetter -> True
194 LowercaseLetter -> True
195 OtherLetter -> True
196 TitlecaseLetter -> True
197 DecimalNumber -> True
198 OtherNumber -> True
199 _ -> c == '\'' || c == '_'
200
201 -- | Is this character acceptable in the suffix of an identifier.
202 -- See alexGetByte in Lexer.x
203 okIdSuffixChar :: Char -> Bool
204 okIdSuffixChar c = case generalCategory c of
205 ModifierLetter -> True -- See #10196
206 _ -> False
207
208 -- | Is this character acceptable in a symbol (after the first char)?
209 -- See alexGetByte in Lexer.x
210 okSymChar :: Char -> Bool
211 okSymChar c
212 | c `elem` specialSymbols
213 = False
214 | c `elem` "_\"'"
215 = False
216 | otherwise
217 = case generalCategory c of
218 ConnectorPunctuation -> True
219 DashPunctuation -> True
220 OtherPunctuation -> True
221 MathSymbol -> True
222 CurrencySymbol -> True
223 ModifierSymbol -> True
224 OtherSymbol -> True
225 _ -> False
226
227 -- | All reserved identifiers. Taken from section 2.4 of the 2010 Report.
228 reservedIds :: Set.Set String
229 reservedIds = Set.fromList [ "case", "class", "data", "default", "deriving"
230 , "do", "else", "foreign", "if", "import", "in"
231 , "infix", "infixl", "infixr", "instance", "let"
232 , "module", "newtype", "of", "then", "type", "where"
233 , "_" ]
234
235 -- | All punctuation that cannot appear in symbols. See $special in Lexer.x.
236 specialSymbols :: [Char]
237 specialSymbols = "(),;[]`{}"
238
239 -- | All reserved operators. Taken from section 2.4 of the 2010 Report.
240 reservedOps :: Set.Set String
241 reservedOps = Set.fromList [ "..", ":", "::", "=", "\\", "|", "<-", "->"
242 , "@", "~", "=>" ]
243
244 -- | Does this string contain only dashes and has at least 2 of them?
245 isDashes :: String -> Bool
246 isDashes ('-' : '-' : rest) = all (== '-') rest
247 isDashes _ = False