Spelling fixes in comments [ci skip]
[ghc.git] / compiler / basicTypes / Lexeme.hs
1 -- (c) The GHC Team
2 --
3 -- Functions to evaluate whether or not a string is a valid identifier.
4 -- There is considerable overlap between the logic here and the logic
5 -- in Lexer.x, but sadly there seems to be way to merge them.
6
7 module Lexeme (
8 -- * Lexical characteristics of Haskell names
9
10 -- | Use these functions to figure what kind of name a 'FastString'
11 -- represents; these functions do /not/ check that the identifier
12 -- is valid.
13
14 isLexCon, isLexVar, isLexId, isLexSym,
15 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
16 startsVarSym, startsVarId, startsConSym, startsConId,
17
18 -- * Validating identifiers
19
20 -- | These functions (working over plain old 'String's) check
21 -- to make sure that the identifier is valid.
22 okVarOcc, okConOcc, okTcOcc,
23 okVarIdOcc, okVarSymOcc, okConIdOcc, okConSymOcc
24
25 -- Some of the exports above are not used within GHC, but may
26 -- be of value to GHC API users.
27
28 ) where
29
30 import FastString
31
32 import Data.Char
33 import qualified Data.Set as Set
34
35 import GHC.Lexeme
36
37 {-
38
39 ************************************************************************
40 * *
41 Lexical categories
42 * *
43 ************************************************************************
44
45 These functions test strings to see if they fit the lexical categories
46 defined in the Haskell report.
47
48 Note [Classification of generated names]
49 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
50
51 Some names generated for internal use can show up in debugging output,
52 e.g. when using -ddump-simpl. These generated names start with a $
53 but should still be pretty-printed using prefix notation. We make sure
54 this is the case in isLexVarSym by only classifying a name as a symbol
55 if all its characters are symbols, not just its first one.
56 -}
57
58 isLexCon, isLexVar, isLexId, isLexSym :: FastString -> Bool
59 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
60
61 isLexCon cs = isLexConId cs || isLexConSym cs
62 isLexVar cs = isLexVarId cs || isLexVarSym cs
63
64 isLexId cs = isLexConId cs || isLexVarId cs
65 isLexSym cs = isLexConSym cs || isLexVarSym cs
66
67 -------------
68 isLexConId cs -- Prefix type or data constructors
69 | nullFS cs = False -- e.g. "Foo", "[]", "(,)"
70 | cs == (fsLit "[]") = True
71 | otherwise = startsConId (headFS cs)
72
73 isLexVarId cs -- Ordinary prefix identifiers
74 | nullFS cs = False -- e.g. "x", "_x"
75 | otherwise = startsVarId (headFS cs)
76
77 isLexConSym cs -- Infix type or data constructors
78 | nullFS cs = False -- e.g. ":-:", ":", "->"
79 | cs == (fsLit "->") = True
80 | otherwise = startsConSym (headFS cs)
81
82 isLexVarSym fs -- Infix identifiers e.g. "+"
83 | fs == (fsLit "~R#") = True
84 | otherwise
85 = case (if nullFS fs then [] else unpackFS fs) of
86 [] -> False
87 (c:cs) -> startsVarSym c && all isVarSymChar cs
88 -- See Note [Classification of generated names]
89
90 {-
91
92 ************************************************************************
93 * *
94 Detecting valid names for Template Haskell
95 * *
96 ************************************************************************
97
98 -}
99
100 ----------------------
101 -- External interface
102 ----------------------
103
104 -- | Is this an acceptable variable name?
105 okVarOcc :: String -> Bool
106 okVarOcc str@(c:_)
107 | startsVarId c
108 = okVarIdOcc str
109 | startsVarSym c
110 = okVarSymOcc str
111 okVarOcc _ = False
112
113 -- | Is this an acceptable constructor name?
114 okConOcc :: String -> Bool
115 okConOcc str@(c:_)
116 | startsConId c
117 = okConIdOcc str
118 | startsConSym c
119 = okConSymOcc str
120 | str == "[]"
121 = True
122 okConOcc _ = False
123
124 -- | Is this an acceptable type name?
125 okTcOcc :: String -> Bool
126 okTcOcc "[]" = True
127 okTcOcc "->" = True
128 okTcOcc "~" = True
129 okTcOcc str@(c:_)
130 | startsConId c
131 = okConIdOcc str
132 | startsConSym c
133 = okConSymOcc str
134 | startsVarSym c
135 = okVarSymOcc str
136 okTcOcc _ = False
137
138 -- | Is this an acceptable alphanumeric variable name, assuming it starts
139 -- with an acceptable letter?
140 okVarIdOcc :: String -> Bool
141 okVarIdOcc str = okIdOcc str &&
142 -- admit "_" as a valid identifier. Required to support typed
143 -- holes in Template Haskell. See #10267
144 (str == "_" || not (str `Set.member` reservedIds))
145
146 -- | Is this an acceptable symbolic variable name, assuming it starts
147 -- with an acceptable character?
148 okVarSymOcc :: String -> Bool
149 okVarSymOcc str = all okSymChar str &&
150 not (str `Set.member` reservedOps) &&
151 not (isDashes str)
152
153 -- | Is this an acceptable alphanumeric constructor name, assuming it
154 -- starts with an acceptable letter?
155 okConIdOcc :: String -> Bool
156 okConIdOcc str = okIdOcc str ||
157 is_tuple_name1 True str ||
158 -- Is it a boxed tuple...
159 is_tuple_name1 False str ||
160 -- ...or an unboxed tuple (Trac #12407)...
161 is_sum_name1 str
162 -- ...or an unboxed sum (Trac #12514)?
163 where
164 -- check for tuple name, starting at the beginning
165 is_tuple_name1 True ('(' : rest) = is_tuple_name2 True rest
166 is_tuple_name1 False ('(' : '#' : rest) = is_tuple_name2 False rest
167 is_tuple_name1 _ _ = False
168
169 -- check for tuple tail
170 is_tuple_name2 True ")" = True
171 is_tuple_name2 False "#)" = True
172 is_tuple_name2 boxed (',' : rest) = is_tuple_name2 boxed rest
173 is_tuple_name2 boxed (ws : rest)
174 | isSpace ws = is_tuple_name2 boxed rest
175 is_tuple_name2 _ _ = False
176
177 -- check for sum name, starting at the beginning
178 is_sum_name1 ('(' : '#' : rest) = is_sum_name2 False rest
179 is_sum_name1 _ = False
180
181 -- check for sum tail, only allowing at most one underscore
182 is_sum_name2 _ "#)" = True
183 is_sum_name2 underscore ('|' : rest) = is_sum_name2 underscore rest
184 is_sum_name2 False ('_' : rest) = is_sum_name2 True rest
185 is_sum_name2 underscore (ws : rest)
186 | isSpace ws = is_sum_name2 underscore rest
187 is_sum_name2 _ _ = False
188
189 -- | Is this an acceptable symbolic constructor name, assuming it
190 -- starts with an acceptable character?
191 okConSymOcc :: String -> Bool
192 okConSymOcc ":" = True
193 okConSymOcc str = all okSymChar str &&
194 not (str `Set.member` reservedOps)
195
196 ----------------------
197 -- Internal functions
198 ----------------------
199
200 -- | Is this string an acceptable id, possibly with a suffix of hashes,
201 -- but not worrying about case or clashing with reserved words?
202 okIdOcc :: String -> Bool
203 okIdOcc str
204 = let hashes = dropWhile okIdChar str in
205 all (== '#') hashes -- -XMagicHash allows a suffix of hashes
206 -- of course, `all` says "True" to an empty list
207
208 -- | Is this character acceptable in an identifier (after the first letter)?
209 -- See alexGetByte in Lexer.x
210 okIdChar :: Char -> Bool
211 okIdChar c = case generalCategory c of
212 UppercaseLetter -> True
213 LowercaseLetter -> True
214 TitlecaseLetter -> True
215 ModifierLetter -> True -- See #10196
216 OtherLetter -> True -- See #1103
217 NonSpacingMark -> True -- See #7650
218 DecimalNumber -> True
219 OtherNumber -> True -- See #4373
220 _ -> c == '\'' || c == '_'
221
222 -- | All reserved identifiers. Taken from section 2.4 of the 2010 Report.
223 reservedIds :: Set.Set String
224 reservedIds = Set.fromList [ "case", "class", "data", "default", "deriving"
225 , "do", "else", "foreign", "if", "import", "in"
226 , "infix", "infixl", "infixr", "instance", "let"
227 , "module", "newtype", "of", "then", "type", "where"
228 , "_" ]
229
230 -- | All reserved operators. Taken from section 2.4 of the 2010 Report.
231 reservedOps :: Set.Set String
232 reservedOps = Set.fromList [ "..", ":", "::", "=", "\\", "|", "<-", "->"
233 , "@", "~", "=>" ]
234
235 -- | Does this string contain only dashes and has at least 2 of them?
236 isDashes :: String -> Bool
237 isDashes ('-' : '-' : rest) = all (== '-') rest
238 isDashes _ = False