d397deaea8af642d77515141532a17d475b08ec4
[ghc.git] / compiler / basicTypes / Lexeme.hs
1 -- (c) The GHC Team
2 --
3 -- Functions to evaluate whether or not a string is a valid identifier.
4 -- There is considerable overlap between the logic here and the logic
5 -- in Lexer.x, but sadly there seems to be no way to merge them.
6
7 module Lexeme (
8 -- * Lexical characteristics of Haskell names
9
10 -- | Use these functions to figure what kind of name a 'FastString'
11 -- represents; these functions do /not/ check that the identifier
12 -- is valid.
13
14 isLexCon, isLexVar, isLexId, isLexSym,
15 isLexConId, isLexConSym, isLexVarId, isLexVarSym,
16 startsVarSym, startsVarId, startsConSym, startsConId,
17
18 -- * Validating identifiers
19
20 -- | These functions (working over plain old 'String's) check
21 -- to make sure that the identifier is valid.
22 okVarOcc, okConOcc, okTcOcc,
23 okVarIdOcc, okVarSymOcc, okConIdOcc, okConSymOcc
24
25 -- Some of the exports above are not used within GHC, but may
26 -- be of value to GHC API users.
27
28 ) where
29
30 import GhcPrelude
31
32 import FastString
33
34 import Data.Char
35 import qualified Data.Set as Set
36
37 import GHC.Lexeme
38
39 {-
40
41 ************************************************************************
42 * *
43 Lexical categories
44 * *
45 ************************************************************************
46
47 These functions test strings to see if they fit the lexical categories
48 defined in the Haskell report.
49
50 Note [Classification of generated names]
51 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
52
53 Some names generated for internal use can show up in debugging output,
54 e.g. when using -ddump-simpl. These generated names start with a $
55 but should still be pretty-printed using prefix notation. We make sure
56 this is the case in isLexVarSym by only classifying a name as a symbol
57 if all its characters are symbols, not just its first one.
58 -}
59
60 isLexCon, isLexVar, isLexId, isLexSym :: FastString -> Bool
61 isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
62
63 isLexCon cs = isLexConId cs || isLexConSym cs
64 isLexVar cs = isLexVarId cs || isLexVarSym cs
65
66 isLexId cs = isLexConId cs || isLexVarId cs
67 isLexSym cs = isLexConSym cs || isLexVarSym cs
68
69 -------------
70 isLexConId cs -- Prefix type or data constructors
71 | nullFS cs = False -- e.g. "Foo", "[]", "(,)"
72 | cs == (fsLit "[]") = True
73 | otherwise = startsConId (headFS cs)
74
75 isLexVarId cs -- Ordinary prefix identifiers
76 | nullFS cs = False -- e.g. "x", "_x"
77 | otherwise = startsVarId (headFS cs)
78
79 isLexConSym cs -- Infix type or data constructors
80 | nullFS cs = False -- e.g. ":-:", ":", "->"
81 | cs == (fsLit "->") = True
82 | otherwise = startsConSym (headFS cs)
83
84 isLexVarSym fs -- Infix identifiers e.g. "+"
85 | fs == (fsLit "~R#") = True
86 | otherwise
87 = case (if nullFS fs then [] else unpackFS fs) of
88 [] -> False
89 (c:cs) -> startsVarSym c && all isVarSymChar cs
90 -- See Note [Classification of generated names]
91
92 {-
93
94 ************************************************************************
95 * *
96 Detecting valid names for Template Haskell
97 * *
98 ************************************************************************
99
100 -}
101
102 ----------------------
103 -- External interface
104 ----------------------
105
106 -- | Is this an acceptable variable name?
107 okVarOcc :: String -> Bool
108 okVarOcc str@(c:_)
109 | startsVarId c
110 = okVarIdOcc str
111 | startsVarSym c
112 = okVarSymOcc str
113 okVarOcc _ = False
114
115 -- | Is this an acceptable constructor name?
116 okConOcc :: String -> Bool
117 okConOcc str@(c:_)
118 | startsConId c
119 = okConIdOcc str
120 | startsConSym c
121 = okConSymOcc str
122 | str == "[]"
123 = True
124 okConOcc _ = False
125
126 -- | Is this an acceptable type name?
127 okTcOcc :: String -> Bool
128 okTcOcc "[]" = True
129 okTcOcc "->" = True
130 okTcOcc "~" = True
131 okTcOcc str@(c:_)
132 | startsConId c
133 = okConIdOcc str
134 | startsConSym c
135 = okConSymOcc str
136 | startsVarSym c
137 = okVarSymOcc str
138 okTcOcc _ = False
139
140 -- | Is this an acceptable alphanumeric variable name, assuming it starts
141 -- with an acceptable letter?
142 okVarIdOcc :: String -> Bool
143 okVarIdOcc str = okIdOcc str &&
144 -- admit "_" as a valid identifier. Required to support typed
145 -- holes in Template Haskell. See #10267
146 (str == "_" || not (str `Set.member` reservedIds))
147
148 -- | Is this an acceptable symbolic variable name, assuming it starts
149 -- with an acceptable character?
150 okVarSymOcc :: String -> Bool
151 okVarSymOcc str = all okSymChar str &&
152 not (str `Set.member` reservedOps) &&
153 not (isDashes str)
154
155 -- | Is this an acceptable alphanumeric constructor name, assuming it
156 -- starts with an acceptable letter?
157 okConIdOcc :: String -> Bool
158 okConIdOcc str = okIdOcc str ||
159 is_tuple_name1 True str ||
160 -- Is it a boxed tuple...
161 is_tuple_name1 False str ||
162 -- ...or an unboxed tuple (Trac #12407)...
163 is_sum_name1 str
164 -- ...or an unboxed sum (Trac #12514)?
165 where
166 -- check for tuple name, starting at the beginning
167 is_tuple_name1 True ('(' : rest) = is_tuple_name2 True rest
168 is_tuple_name1 False ('(' : '#' : rest) = is_tuple_name2 False rest
169 is_tuple_name1 _ _ = False
170
171 -- check for tuple tail
172 is_tuple_name2 True ")" = True
173 is_tuple_name2 False "#)" = True
174 is_tuple_name2 boxed (',' : rest) = is_tuple_name2 boxed rest
175 is_tuple_name2 boxed (ws : rest)
176 | isSpace ws = is_tuple_name2 boxed rest
177 is_tuple_name2 _ _ = False
178
179 -- check for sum name, starting at the beginning
180 is_sum_name1 ('(' : '#' : rest) = is_sum_name2 False rest
181 is_sum_name1 _ = False
182
183 -- check for sum tail, only allowing at most one underscore
184 is_sum_name2 _ "#)" = True
185 is_sum_name2 underscore ('|' : rest) = is_sum_name2 underscore rest
186 is_sum_name2 False ('_' : rest) = is_sum_name2 True rest
187 is_sum_name2 underscore (ws : rest)
188 | isSpace ws = is_sum_name2 underscore rest
189 is_sum_name2 _ _ = False
190
191 -- | Is this an acceptable symbolic constructor name, assuming it
192 -- starts with an acceptable character?
193 okConSymOcc :: String -> Bool
194 okConSymOcc ":" = True
195 okConSymOcc str = all okSymChar str &&
196 not (str `Set.member` reservedOps)
197
198 ----------------------
199 -- Internal functions
200 ----------------------
201
202 -- | Is this string an acceptable id, possibly with a suffix of hashes,
203 -- but not worrying about case or clashing with reserved words?
204 okIdOcc :: String -> Bool
205 okIdOcc str
206 = let hashes = dropWhile okIdChar str in
207 all (== '#') hashes -- -XMagicHash allows a suffix of hashes
208 -- of course, `all` says "True" to an empty list
209
210 -- | Is this character acceptable in an identifier (after the first letter)?
211 -- See alexGetByte in Lexer.x
212 okIdChar :: Char -> Bool
213 okIdChar c = case generalCategory c of
214 UppercaseLetter -> True
215 LowercaseLetter -> True
216 TitlecaseLetter -> True
217 ModifierLetter -> True -- See #10196
218 OtherLetter -> True -- See #1103
219 NonSpacingMark -> True -- See #7650
220 DecimalNumber -> True
221 OtherNumber -> True -- See #4373
222 _ -> c == '\'' || c == '_'
223
224 -- | All reserved identifiers. Taken from section 2.4 of the 2010 Report.
225 reservedIds :: Set.Set String
226 reservedIds = Set.fromList [ "case", "class", "data", "default", "deriving"
227 , "do", "else", "foreign", "if", "import", "in"
228 , "infix", "infixl", "infixr", "instance", "let"
229 , "module", "newtype", "of", "then", "type", "where"
230 , "_" ]
231
232 -- | All reserved operators. Taken from section 2.4 of the 2010 Report.
233 reservedOps :: Set.Set String
234 reservedOps = Set.fromList [ "..", ":", "::", "=", "\\", "|", "<-", "->"
235 , "@", "~", "=>" ]
236
237 -- | Does this string contain only dashes and has at least 2 of them?
238 isDashes :: String -> Bool
239 isDashes ('-' : '-' : rest) = all (== '-') rest
240 isDashes _ = False