Fail fast on pattern synonyms
[ghc.git] / compiler / parser / Ctype.hs
1 -- Character classification
2 {-# LANGUAGE CPP #-}
3 module Ctype
4 ( is_ident -- Char# -> Bool
5 , is_symbol -- Char# -> Bool
6 , is_any -- Char# -> Bool
7 , is_space -- Char# -> Bool
8 , is_lower -- Char# -> Bool
9 , is_upper -- Char# -> Bool
10 , is_digit -- Char# -> Bool
11 , is_alphanum -- Char# -> Bool
12
13 , is_decdigit, is_hexdigit, is_octdigit, is_bindigit
14 , hexDigit, octDecDigit
15 ) where
16
17 #include "HsVersions.h"
18
19 import GhcPrelude
20
21 import Data.Int ( Int32 )
22 import Data.Bits ( Bits((.&.)) )
23 import Data.Char ( ord, chr )
24 import Panic
25
26 -- Bit masks
27
28 cIdent, cSymbol, cAny, cSpace, cLower, cUpper, cDigit :: Int
29 cIdent = 1
30 cSymbol = 2
31 cAny = 4
32 cSpace = 8
33 cLower = 16
34 cUpper = 32
35 cDigit = 64
36
37 -- | The predicates below look costly, but aren't, GHC+GCC do a great job
38 -- at the big case below.
39
40 {-# INLINE is_ctype #-}
41 is_ctype :: Int -> Char -> Bool
42 is_ctype mask c = (fromIntegral (charType c) .&. fromIntegral mask) /= (0::Int32)
43
44 is_ident, is_symbol, is_any, is_space, is_lower, is_upper, is_digit,
45 is_alphanum :: Char -> Bool
46 is_ident = is_ctype cIdent
47 is_symbol = is_ctype cSymbol
48 is_any = is_ctype cAny
49 is_space = is_ctype cSpace
50 is_lower = is_ctype cLower
51 is_upper = is_ctype cUpper
52 is_digit = is_ctype cDigit
53 is_alphanum = is_ctype (cLower+cUpper+cDigit)
54
55 -- Utils
56
57 hexDigit :: Char -> Int
58 hexDigit c | is_decdigit c = ord c - ord '0'
59 | otherwise = ord (to_lower c) - ord 'a' + 10
60
61 octDecDigit :: Char -> Int
62 octDecDigit c = ord c - ord '0'
63
64 is_decdigit :: Char -> Bool
65 is_decdigit c
66 = c >= '0' && c <= '9'
67
68 is_hexdigit :: Char -> Bool
69 is_hexdigit c
70 = is_decdigit c
71 || (c >= 'a' && c <= 'f')
72 || (c >= 'A' && c <= 'F')
73
74 is_octdigit :: Char -> Bool
75 is_octdigit c = c >= '0' && c <= '7'
76
77 is_bindigit :: Char -> Bool
78 is_bindigit c = c == '0' || c == '1'
79
80 to_lower :: Char -> Char
81 to_lower c
82 | c >= 'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a'))
83 | otherwise = c
84
85 -- | We really mean .|. instead of + below, but GHC currently doesn't do
86 -- any constant folding with bitops. *sigh*
87
88 charType :: Char -> Int
89 charType c = case c of
90 '\0' -> 0 -- \000
91 '\1' -> 0 -- \001
92 '\2' -> 0 -- \002
93 '\3' -> 0 -- \003
94 '\4' -> 0 -- \004
95 '\5' -> 0 -- \005
96 '\6' -> 0 -- \006
97 '\7' -> 0 -- \007
98 '\8' -> 0 -- \010
99 '\9' -> cSpace -- \t (not allowed in strings, so !cAny)
100 '\10' -> cSpace -- \n (ditto)
101 '\11' -> cSpace -- \v (ditto)
102 '\12' -> cSpace -- \f (ditto)
103 '\13' -> cSpace -- ^M (ditto)
104 '\14' -> 0 -- \016
105 '\15' -> 0 -- \017
106 '\16' -> 0 -- \020
107 '\17' -> 0 -- \021
108 '\18' -> 0 -- \022
109 '\19' -> 0 -- \023
110 '\20' -> 0 -- \024
111 '\21' -> 0 -- \025
112 '\22' -> 0 -- \026
113 '\23' -> 0 -- \027
114 '\24' -> 0 -- \030
115 '\25' -> 0 -- \031
116 '\26' -> 0 -- \032
117 '\27' -> 0 -- \033
118 '\28' -> 0 -- \034
119 '\29' -> 0 -- \035
120 '\30' -> 0 -- \036
121 '\31' -> 0 -- \037
122 '\32' -> cAny + cSpace --
123 '\33' -> cAny + cSymbol -- !
124 '\34' -> cAny -- "
125 '\35' -> cAny + cSymbol -- #
126 '\36' -> cAny + cSymbol -- $
127 '\37' -> cAny + cSymbol -- %
128 '\38' -> cAny + cSymbol -- &
129 '\39' -> cAny + cIdent -- '
130 '\40' -> cAny -- (
131 '\41' -> cAny -- )
132 '\42' -> cAny + cSymbol -- *
133 '\43' -> cAny + cSymbol -- +
134 '\44' -> cAny -- ,
135 '\45' -> cAny + cSymbol -- -
136 '\46' -> cAny + cSymbol -- .
137 '\47' -> cAny + cSymbol -- /
138 '\48' -> cAny + cIdent + cDigit -- 0
139 '\49' -> cAny + cIdent + cDigit -- 1
140 '\50' -> cAny + cIdent + cDigit -- 2
141 '\51' -> cAny + cIdent + cDigit -- 3
142 '\52' -> cAny + cIdent + cDigit -- 4
143 '\53' -> cAny + cIdent + cDigit -- 5
144 '\54' -> cAny + cIdent + cDigit -- 6
145 '\55' -> cAny + cIdent + cDigit -- 7
146 '\56' -> cAny + cIdent + cDigit -- 8
147 '\57' -> cAny + cIdent + cDigit -- 9
148 '\58' -> cAny + cSymbol -- :
149 '\59' -> cAny -- ;
150 '\60' -> cAny + cSymbol -- <
151 '\61' -> cAny + cSymbol -- =
152 '\62' -> cAny + cSymbol -- >
153 '\63' -> cAny + cSymbol -- ?
154 '\64' -> cAny + cSymbol -- @
155 '\65' -> cAny + cIdent + cUpper -- A
156 '\66' -> cAny + cIdent + cUpper -- B
157 '\67' -> cAny + cIdent + cUpper -- C
158 '\68' -> cAny + cIdent + cUpper -- D
159 '\69' -> cAny + cIdent + cUpper -- E
160 '\70' -> cAny + cIdent + cUpper -- F
161 '\71' -> cAny + cIdent + cUpper -- G
162 '\72' -> cAny + cIdent + cUpper -- H
163 '\73' -> cAny + cIdent + cUpper -- I
164 '\74' -> cAny + cIdent + cUpper -- J
165 '\75' -> cAny + cIdent + cUpper -- K
166 '\76' -> cAny + cIdent + cUpper -- L
167 '\77' -> cAny + cIdent + cUpper -- M
168 '\78' -> cAny + cIdent + cUpper -- N
169 '\79' -> cAny + cIdent + cUpper -- O
170 '\80' -> cAny + cIdent + cUpper -- P
171 '\81' -> cAny + cIdent + cUpper -- Q
172 '\82' -> cAny + cIdent + cUpper -- R
173 '\83' -> cAny + cIdent + cUpper -- S
174 '\84' -> cAny + cIdent + cUpper -- T
175 '\85' -> cAny + cIdent + cUpper -- U
176 '\86' -> cAny + cIdent + cUpper -- V
177 '\87' -> cAny + cIdent + cUpper -- W
178 '\88' -> cAny + cIdent + cUpper -- X
179 '\89' -> cAny + cIdent + cUpper -- Y
180 '\90' -> cAny + cIdent + cUpper -- Z
181 '\91' -> cAny -- [
182 '\92' -> cAny + cSymbol -- backslash
183 '\93' -> cAny -- ]
184 '\94' -> cAny + cSymbol -- ^
185 '\95' -> cAny + cIdent + cLower -- _
186 '\96' -> cAny -- `
187 '\97' -> cAny + cIdent + cLower -- a
188 '\98' -> cAny + cIdent + cLower -- b
189 '\99' -> cAny + cIdent + cLower -- c
190 '\100' -> cAny + cIdent + cLower -- d
191 '\101' -> cAny + cIdent + cLower -- e
192 '\102' -> cAny + cIdent + cLower -- f
193 '\103' -> cAny + cIdent + cLower -- g
194 '\104' -> cAny + cIdent + cLower -- h
195 '\105' -> cAny + cIdent + cLower -- i
196 '\106' -> cAny + cIdent + cLower -- j
197 '\107' -> cAny + cIdent + cLower -- k
198 '\108' -> cAny + cIdent + cLower -- l
199 '\109' -> cAny + cIdent + cLower -- m
200 '\110' -> cAny + cIdent + cLower -- n
201 '\111' -> cAny + cIdent + cLower -- o
202 '\112' -> cAny + cIdent + cLower -- p
203 '\113' -> cAny + cIdent + cLower -- q
204 '\114' -> cAny + cIdent + cLower -- r
205 '\115' -> cAny + cIdent + cLower -- s
206 '\116' -> cAny + cIdent + cLower -- t
207 '\117' -> cAny + cIdent + cLower -- u
208 '\118' -> cAny + cIdent + cLower -- v
209 '\119' -> cAny + cIdent + cLower -- w
210 '\120' -> cAny + cIdent + cLower -- x
211 '\121' -> cAny + cIdent + cLower -- y
212 '\122' -> cAny + cIdent + cLower -- z
213 '\123' -> cAny -- {
214 '\124' -> cAny + cSymbol -- |
215 '\125' -> cAny -- }
216 '\126' -> cAny + cSymbol -- ~
217 '\127' -> 0 -- \177
218 _ -> panic ("charType: " ++ show c)