Add typed holes support in Template Haskell.
[ghc.git] / compiler / basicTypes / Lexeme.hs
1 -- (c) The GHC Team
2 --
3 -- Functions to evaluate whether or not a string is a valid identifier.
4 -- There is considerable overlap between the logic here and the logic
5 -- in Lexer.x, but sadly there seems to be way to merge them.
6
7 module Lexeme (
8 -- * Lexical characteristics of Haskell names
9
10 -- | Use these functions to figure what kind of name a 'FastString'
11 -- represents; these functions do /not/ check that the identifier
12 -- is valid.
13
14 isLexCon, isLexVar, isLexId, isLexSym,
15 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
16 startsVarSym, startsVarId, startsConSym, startsConId,
17
18 -- * Validating identifiers
19
20 -- | These functions (working over plain old 'String's) check
21 -- to make sure that the identifier is valid.
22 okVarOcc, okConOcc, okTcOcc,
23 okVarIdOcc, okVarSymOcc, okConIdOcc, okConSymOcc
24
25 -- Some of the exports above are not used within GHC, but may
26 -- be of value to GHC API users.
27
28 ) where
29
30 import FastString
31 import Util ((<||>))
32
33 import Data.Char
34 import qualified Data.Set as Set
35
36 import GHC.Lexeme
37
38 {-
39
40 ************************************************************************
41 * *
42 Lexical categories
43 * *
44 ************************************************************************
45
46 These functions test strings to see if they fit the lexical categories
47 defined in the Haskell report.
48
49 Note [Classification of generated names]
50 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
51
52 Some names generated for internal use can show up in debugging output,
53 e.g. when using -ddump-simpl. These generated names start with a $
54 but should still be pretty-printed using prefix notation. We make sure
55 this is the case in isLexVarSym by only classifying a name as a symbol
56 if all its characters are symbols, not just its first one.
57 -}
58
59 isLexCon, isLexVar, isLexId, isLexSym :: FastString -> Bool
60 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
61
62 isLexCon cs = isLexConId cs || isLexConSym cs
63 isLexVar cs = isLexVarId cs || isLexVarSym cs
64
65 isLexId cs = isLexConId cs || isLexVarId cs
66 isLexSym cs = isLexConSym cs || isLexVarSym cs
67
68 -------------
69 isLexConId cs -- Prefix type or data constructors
70 | nullFS cs = False -- e.g. "Foo", "[]", "(,)"
71 | cs == (fsLit "[]") = True
72 | otherwise = startsConId (headFS cs)
73
74 isLexVarId cs -- Ordinary prefix identifiers
75 | nullFS cs = False -- e.g. "x", "_x"
76 | otherwise = startsVarId (headFS cs)
77
78 isLexConSym cs -- Infix type or data constructors
79 | nullFS cs = False -- e.g. ":-:", ":", "->"
80 | cs == (fsLit "->") = True
81 | otherwise = startsConSym (headFS cs)
82
83 isLexVarSym fs -- Infix identifiers e.g. "+"
84 | fs == (fsLit "~R#") = True
85 | otherwise
86 = case (if nullFS fs then [] else unpackFS fs) of
87 [] -> False
88 (c:cs) -> startsVarSym c && all isVarSymChar cs
89 -- See Note [Classification of generated names]
90
91 {-
92
93 ************************************************************************
94 * *
95 Detecting valid names for Template Haskell
96 * *
97 ************************************************************************
98
99 -}
100
101 ----------------------
102 -- External interface
103 ----------------------
104
105 -- | Is this an acceptable variable name?
106 okVarOcc :: String -> Bool
107 okVarOcc str@(c:_)
108 | startsVarId c
109 = okVarIdOcc str
110 | startsVarSym c
111 = okVarSymOcc str
112 okVarOcc _ = False
113
114 -- | Is this an acceptable constructor name?
115 okConOcc :: String -> Bool
116 okConOcc str@(c:_)
117 | startsConId c
118 = okConIdOcc str
119 | startsConSym c
120 = okConSymOcc str
121 | str == "[]"
122 = True
123 okConOcc _ = False
124
125 -- | Is this an acceptable type name?
126 okTcOcc :: String -> Bool
127 okTcOcc "[]" = True
128 okTcOcc "->" = True
129 okTcOcc "~" = True
130 okTcOcc str@(c:_)
131 | startsConId c
132 = okConIdOcc str
133 | startsConSym c
134 = okConSymOcc str
135 | startsVarSym c
136 = okVarSymOcc str
137 okTcOcc _ = False
138
139 -- | Is this an acceptable alphanumeric variable name, assuming it starts
140 -- with an acceptable letter?
141 okVarIdOcc :: String -> Bool
142 okVarIdOcc str = okIdOcc str &&
143 -- admit "_" as a valid identifier. Required to support typed
144 -- holes in Template Haskell. See #10267
145 (str == "_" || not (str `Set.member` reservedIds))
146
147 -- | Is this an acceptable symbolic variable name, assuming it starts
148 -- with an acceptable character?
149 okVarSymOcc :: String -> Bool
150 okVarSymOcc str = all okSymChar str &&
151 not (str `Set.member` reservedOps) &&
152 not (isDashes str)
153
154 -- | Is this an acceptable alphanumeric constructor name, assuming it
155 -- starts with an acceptable letter?
156 okConIdOcc :: String -> Bool
157 okConIdOcc str = okIdOcc str ||
158 is_tuple_name1 str
159 where
160 -- check for tuple name, starting at the beginning
161 is_tuple_name1 ('(' : rest) = is_tuple_name2 rest
162 is_tuple_name1 _ = False
163
164 -- check for tuple tail
165 is_tuple_name2 ")" = True
166 is_tuple_name2 (',' : rest) = is_tuple_name2 rest
167 is_tuple_name2 (ws : rest)
168 | isSpace ws = is_tuple_name2 rest
169 is_tuple_name2 _ = False
170
171 -- | Is this an acceptable symbolic constructor name, assuming it
172 -- starts with an acceptable character?
173 okConSymOcc :: String -> Bool
174 okConSymOcc ":" = True
175 okConSymOcc str = all okSymChar str &&
176 not (str `Set.member` reservedOps)
177
178 ----------------------
179 -- Internal functions
180 ----------------------
181
182 -- | Is this string an acceptable id, possibly with a suffix of hashes,
183 -- but not worrying about case or clashing with reserved words?
184 okIdOcc :: String -> Bool
185 okIdOcc str
186 -- TODO. #10196. Only allow modifier letters in the suffix of an identifier.
187 = let hashes = dropWhile (okIdChar <||> okIdSuffixChar) str in
188 all (== '#') hashes -- -XMagicHash allows a suffix of hashes
189 -- of course, `all` says "True" to an empty list
190
191 -- | Is this character acceptable in an identifier (after the first letter)?
192 -- See alexGetByte in Lexer.x
193 okIdChar :: Char -> Bool
194 okIdChar c = case generalCategory c of
195 UppercaseLetter -> True
196 LowercaseLetter -> True
197 OtherLetter -> True
198 TitlecaseLetter -> True
199 DecimalNumber -> True
200 OtherNumber -> True
201 _ -> c == '\'' || c == '_'
202
203 -- | Is this character acceptable in the suffix of an identifier.
204 -- See alexGetByte in Lexer.x
205 okIdSuffixChar :: Char -> Bool
206 okIdSuffixChar c = case generalCategory c of
207 ModifierLetter -> True -- See #10196
208 _ -> False
209
210 -- | Is this character acceptable in a symbol (after the first char)?
211 -- See alexGetByte in Lexer.x
212 okSymChar :: Char -> Bool
213 okSymChar c
214 | c `elem` specialSymbols
215 = False
216 | c `elem` "_\"'"
217 = False
218 | otherwise
219 = case generalCategory c of
220 ConnectorPunctuation -> True
221 DashPunctuation -> True
222 OtherPunctuation -> True
223 MathSymbol -> True
224 CurrencySymbol -> True
225 ModifierSymbol -> True
226 OtherSymbol -> True
227 _ -> False
228
229
230 -- | All reserved identifiers. Taken from section 2.4 of the 2010 Report.
231 reservedIds :: Set.Set String
232 reservedIds = Set.fromList [ "case", "class", "data", "default", "deriving"
233 , "do", "else", "foreign", "if", "import", "in"
234 , "infix", "infixl", "infixr", "instance", "let"
235 , "module", "newtype", "of", "then", "type", "where"
236 , "_" ]
237
238 -- | All punctuation that cannot appear in symbols. See $special in Lexer.x.
239 specialSymbols :: [Char]
240 specialSymbols = "(),;[]`{}"
241
242 -- | All reserved operators. Taken from section 2.4 of the 2010 Report.
243 reservedOps :: Set.Set String
244 reservedOps = Set.fromList [ "..", ":", "::", "=", "\\", "|", "<-", "->"
245 , "@", "~", "=>" ]
246
247 -- | Does this string contain only dashes and has at least 2 of them?
248 isDashes :: String -> Bool
249 isDashes ('-' : '-' : rest) = all (== '-') rest
250 isDashes _ = False