Make sum and product INLINABLE
[packages/base.git] / Data / Char.hs
1 {-# LANGUAGE Trustworthy #-}
2 {-# LANGUAGE CPP, NoImplicitPrelude #-}
3
4 -----------------------------------------------------------------------------
5 -- |
6 -- Module : Data.Char
7 -- Copyright : (c) The University of Glasgow 2001
8 -- License : BSD-style (see the file libraries/base/LICENSE)
9 --
10 -- Maintainer : libraries@haskell.org
11 -- Stability : stable
12 -- Portability : portable
13 --
14 -- The Char type and associated operations.
15 --
16 -----------------------------------------------------------------------------
17
18 module Data.Char
19 (
20 Char
21
22 -- * Character classification
23 -- | Unicode characters are divided into letters, numbers, marks,
24 -- punctuation, symbols, separators (including spaces) and others
25 -- (including control characters).
26 , isControl, isSpace
27 , isLower, isUpper, isAlpha, isAlphaNum, isPrint
28 , isDigit, isOctDigit, isHexDigit
29 , isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator
30
31 -- ** Subranges
32 , isAscii, isLatin1
33 , isAsciiUpper, isAsciiLower
34
35 -- ** Unicode general categories
36 , GeneralCategory(..), generalCategory
37
38 -- * Case conversion
39 , toUpper, toLower, toTitle
40
41 -- * Single digit characters
42 , digitToInt
43 , intToDigit
44
45 -- * Numeric representations
46 , ord
47 , chr
48
49 -- * String representations
50 , showLitChar
51 , lexLitChar
52 , readLitChar
53 ) where
54
55 #ifdef __GLASGOW_HASKELL__
56 import GHC.Base
57 import GHC.Arr (Ix)
58 import GHC.Char
59 import GHC.Real (fromIntegral)
60 import GHC.Show
61 import GHC.Read (Read, readLitChar, lexLitChar)
62 import GHC.Unicode
63 import GHC.Num
64 import GHC.Enum
65 #endif
66
67 #ifdef __HUGS__
68 import Hugs.Prelude (Ix)
69 import Hugs.Char
70 #endif
71
72 #ifdef __NHC__
73 import Prelude
74 import Prelude(Char,String)
75 import Char
76 import Ix
77 import NHC.FFI (CInt)
78 foreign import ccall unsafe "WCsubst.h u_gencat" wgencat :: CInt -> CInt
79 #endif
80
81 -- | Convert a single digit 'Char' to the corresponding 'Int'.
82 -- This function fails unless its argument satisfies 'isHexDigit',
83 -- but recognises both upper and lower-case hexadecimal digits
84 -- (i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@).
85 digitToInt :: Char -> Int
86 digitToInt c
87 | isDigit c = ord c - ord '0'
88 | c >= 'a' && c <= 'f' = ord c - ord 'a' + 10
89 | c >= 'A' && c <= 'F' = ord c - ord 'A' + 10
90 | otherwise = error ("Char.digitToInt: not a digit " ++ show c) -- sigh
91
92 #ifndef __GLASGOW_HASKELL__
93 isAsciiUpper, isAsciiLower :: Char -> Bool
94 isAsciiLower c = c >= 'a' && c <= 'z'
95 isAsciiUpper c = c >= 'A' && c <= 'Z'
96 #endif
97
98 -- | Unicode General Categories (column 2 of the UnicodeData table)
99 -- in the order they are listed in the Unicode standard.
100
101 data GeneralCategory
102 = UppercaseLetter -- ^ Lu: Letter, Uppercase
103 | LowercaseLetter -- ^ Ll: Letter, Lowercase
104 | TitlecaseLetter -- ^ Lt: Letter, Titlecase
105 | ModifierLetter -- ^ Lm: Letter, Modifier
106 | OtherLetter -- ^ Lo: Letter, Other
107 | NonSpacingMark -- ^ Mn: Mark, Non-Spacing
108 | SpacingCombiningMark -- ^ Mc: Mark, Spacing Combining
109 | EnclosingMark -- ^ Me: Mark, Enclosing
110 | DecimalNumber -- ^ Nd: Number, Decimal
111 | LetterNumber -- ^ Nl: Number, Letter
112 | OtherNumber -- ^ No: Number, Other
113 | ConnectorPunctuation -- ^ Pc: Punctuation, Connector
114 | DashPunctuation -- ^ Pd: Punctuation, Dash
115 | OpenPunctuation -- ^ Ps: Punctuation, Open
116 | ClosePunctuation -- ^ Pe: Punctuation, Close
117 | InitialQuote -- ^ Pi: Punctuation, Initial quote
118 | FinalQuote -- ^ Pf: Punctuation, Final quote
119 | OtherPunctuation -- ^ Po: Punctuation, Other
120 | MathSymbol -- ^ Sm: Symbol, Math
121 | CurrencySymbol -- ^ Sc: Symbol, Currency
122 | ModifierSymbol -- ^ Sk: Symbol, Modifier
123 | OtherSymbol -- ^ So: Symbol, Other
124 | Space -- ^ Zs: Separator, Space
125 | LineSeparator -- ^ Zl: Separator, Line
126 | ParagraphSeparator -- ^ Zp: Separator, Paragraph
127 | Control -- ^ Cc: Other, Control
128 | Format -- ^ Cf: Other, Format
129 | Surrogate -- ^ Cs: Other, Surrogate
130 | PrivateUse -- ^ Co: Other, Private Use
131 | NotAssigned -- ^ Cn: Other, Not Assigned
132 deriving (Eq, Ord, Enum, Read, Show, Bounded, Ix)
133
134 -- | The Unicode general category of the character.
135 generalCategory :: Char -> GeneralCategory
136 #if defined(__GLASGOW_HASKELL__) || defined(__NHC__)
137 generalCategory c = toEnum $ fromIntegral $ wgencat $ fromIntegral $ ord c
138 #endif
139 #ifdef __HUGS__
140 generalCategory c = toEnum (primUniGenCat c)
141 #endif
142
143 -- derived character classifiers
144
145 -- | Selects alphabetic Unicode characters (lower-case, upper-case and
146 -- title-case letters, plus letters of caseless scripts and modifiers letters).
147 -- This function is equivalent to 'Data.Char.isAlpha'.
148 isLetter :: Char -> Bool
149 isLetter c = case generalCategory c of
150 UppercaseLetter -> True
151 LowercaseLetter -> True
152 TitlecaseLetter -> True
153 ModifierLetter -> True
154 OtherLetter -> True
155 _ -> False
156
157 -- | Selects Unicode mark characters, e.g. accents and the like, which
158 -- combine with preceding letters.
159 isMark :: Char -> Bool
160 isMark c = case generalCategory c of
161 NonSpacingMark -> True
162 SpacingCombiningMark -> True
163 EnclosingMark -> True
164 _ -> False
165
166 -- | Selects Unicode numeric characters, including digits from various
167 -- scripts, Roman numerals, etc.
168 isNumber :: Char -> Bool
169 isNumber c = case generalCategory c of
170 DecimalNumber -> True
171 LetterNumber -> True
172 OtherNumber -> True
173 _ -> False
174
175 -- | Selects Unicode punctuation characters, including various kinds
176 -- of connectors, brackets and quotes.
177 isPunctuation :: Char -> Bool
178 isPunctuation c = case generalCategory c of
179 ConnectorPunctuation -> True
180 DashPunctuation -> True
181 OpenPunctuation -> True
182 ClosePunctuation -> True
183 InitialQuote -> True
184 FinalQuote -> True
185 OtherPunctuation -> True
186 _ -> False
187
188 -- | Selects Unicode symbol characters, including mathematical and
189 -- currency symbols.
190 isSymbol :: Char -> Bool
191 isSymbol c = case generalCategory c of
192 MathSymbol -> True
193 CurrencySymbol -> True
194 ModifierSymbol -> True
195 OtherSymbol -> True
196 _ -> False
197
198 -- | Selects Unicode space and separator characters.
199 isSeparator :: Char -> Bool
200 isSeparator c = case generalCategory c of
201 Space -> True
202 LineSeparator -> True
203 ParagraphSeparator -> True
204 _ -> False
205
206 #ifdef __NHC__
207 -- dummy implementation
208 toTitle :: Char -> Char
209 toTitle = toUpper
210 #endif
211