Remove Hugs98 specific code
[packages/base.git] / Data / Char.hs
1 {-# LANGUAGE Trustworthy #-}
2 {-# LANGUAGE CPP, NoImplicitPrelude #-}
3
4 -----------------------------------------------------------------------------
5 -- |
6 -- Module : Data.Char
7 -- Copyright : (c) The University of Glasgow 2001
8 -- License : BSD-style (see the file libraries/base/LICENSE)
9 --
10 -- Maintainer : libraries@haskell.org
11 -- Stability : stable
12 -- Portability : portable
13 --
14 -- The Char type and associated operations.
15 --
16 -----------------------------------------------------------------------------
17
18 module Data.Char
19 (
20 Char
21
22 -- * Character classification
23 -- | Unicode characters are divided into letters, numbers, marks,
24 -- punctuation, symbols, separators (including spaces) and others
25 -- (including control characters).
26 , isControl, isSpace
27 , isLower, isUpper, isAlpha, isAlphaNum, isPrint
28 , isDigit, isOctDigit, isHexDigit
29 , isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator
30
31 -- ** Subranges
32 , isAscii, isLatin1
33 , isAsciiUpper, isAsciiLower
34
35 -- ** Unicode general categories
36 , GeneralCategory(..), generalCategory
37
38 -- * Case conversion
39 , toUpper, toLower, toTitle
40
41 -- * Single digit characters
42 , digitToInt
43 , intToDigit
44
45 -- * Numeric representations
46 , ord
47 , chr
48
49 -- * String representations
50 , showLitChar
51 , lexLitChar
52 , readLitChar
53 ) where
54
55 #ifdef __GLASGOW_HASKELL__
56 import GHC.Base
57 import GHC.Arr (Ix)
58 import GHC.Char
59 import GHC.Real (fromIntegral)
60 import GHC.Show
61 import GHC.Read (Read, readLitChar, lexLitChar)
62 import GHC.Unicode
63 import GHC.Num
64 import GHC.Enum
65 #endif
66
67 -- | Convert a single digit 'Char' to the corresponding 'Int'.
68 -- This function fails unless its argument satisfies 'isHexDigit',
69 -- but recognises both upper and lower-case hexadecimal digits
70 -- (i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@).
71 digitToInt :: Char -> Int
72 digitToInt c
73 | isDigit c = ord c - ord '0'
74 | c >= 'a' && c <= 'f' = ord c - ord 'a' + 10
75 | c >= 'A' && c <= 'F' = ord c - ord 'A' + 10
76 | otherwise = error ("Char.digitToInt: not a digit " ++ show c) -- sigh
77
78 #ifndef __GLASGOW_HASKELL__
79 isAsciiUpper, isAsciiLower :: Char -> Bool
80 isAsciiLower c = c >= 'a' && c <= 'z'
81 isAsciiUpper c = c >= 'A' && c <= 'Z'
82 #endif
83
84 -- | Unicode General Categories (column 2 of the UnicodeData table)
85 -- in the order they are listed in the Unicode standard.
86
87 data GeneralCategory
88 = UppercaseLetter -- ^ Lu: Letter, Uppercase
89 | LowercaseLetter -- ^ Ll: Letter, Lowercase
90 | TitlecaseLetter -- ^ Lt: Letter, Titlecase
91 | ModifierLetter -- ^ Lm: Letter, Modifier
92 | OtherLetter -- ^ Lo: Letter, Other
93 | NonSpacingMark -- ^ Mn: Mark, Non-Spacing
94 | SpacingCombiningMark -- ^ Mc: Mark, Spacing Combining
95 | EnclosingMark -- ^ Me: Mark, Enclosing
96 | DecimalNumber -- ^ Nd: Number, Decimal
97 | LetterNumber -- ^ Nl: Number, Letter
98 | OtherNumber -- ^ No: Number, Other
99 | ConnectorPunctuation -- ^ Pc: Punctuation, Connector
100 | DashPunctuation -- ^ Pd: Punctuation, Dash
101 | OpenPunctuation -- ^ Ps: Punctuation, Open
102 | ClosePunctuation -- ^ Pe: Punctuation, Close
103 | InitialQuote -- ^ Pi: Punctuation, Initial quote
104 | FinalQuote -- ^ Pf: Punctuation, Final quote
105 | OtherPunctuation -- ^ Po: Punctuation, Other
106 | MathSymbol -- ^ Sm: Symbol, Math
107 | CurrencySymbol -- ^ Sc: Symbol, Currency
108 | ModifierSymbol -- ^ Sk: Symbol, Modifier
109 | OtherSymbol -- ^ So: Symbol, Other
110 | Space -- ^ Zs: Separator, Space
111 | LineSeparator -- ^ Zl: Separator, Line
112 | ParagraphSeparator -- ^ Zp: Separator, Paragraph
113 | Control -- ^ Cc: Other, Control
114 | Format -- ^ Cf: Other, Format
115 | Surrogate -- ^ Cs: Other, Surrogate
116 | PrivateUse -- ^ Co: Other, Private Use
117 | NotAssigned -- ^ Cn: Other, Not Assigned
118 deriving (Eq, Ord, Enum, Read, Show, Bounded, Ix)
119
120 -- | The Unicode general category of the character.
121 generalCategory :: Char -> GeneralCategory
122 #if defined(__GLASGOW_HASKELL__)
123 generalCategory c = toEnum $ fromIntegral $ wgencat $ fromIntegral $ ord c
124 #endif
125
126 -- derived character classifiers
127
128 -- | Selects alphabetic Unicode characters (lower-case, upper-case and
129 -- title-case letters, plus letters of caseless scripts and modifiers letters).
130 -- This function is equivalent to 'Data.Char.isAlpha'.
131 isLetter :: Char -> Bool
132 isLetter c = case generalCategory c of
133 UppercaseLetter -> True
134 LowercaseLetter -> True
135 TitlecaseLetter -> True
136 ModifierLetter -> True
137 OtherLetter -> True
138 _ -> False
139
140 -- | Selects Unicode mark characters, e.g. accents and the like, which
141 -- combine with preceding letters.
142 isMark :: Char -> Bool
143 isMark c = case generalCategory c of
144 NonSpacingMark -> True
145 SpacingCombiningMark -> True
146 EnclosingMark -> True
147 _ -> False
148
149 -- | Selects Unicode numeric characters, including digits from various
150 -- scripts, Roman numerals, etc.
151 isNumber :: Char -> Bool
152 isNumber c = case generalCategory c of
153 DecimalNumber -> True
154 LetterNumber -> True
155 OtherNumber -> True
156 _ -> False
157
158 -- | Selects Unicode punctuation characters, including various kinds
159 -- of connectors, brackets and quotes.
160 isPunctuation :: Char -> Bool
161 isPunctuation c = case generalCategory c of
162 ConnectorPunctuation -> True
163 DashPunctuation -> True
164 OpenPunctuation -> True
165 ClosePunctuation -> True
166 InitialQuote -> True
167 FinalQuote -> True
168 OtherPunctuation -> True
169 _ -> False
170
171 -- | Selects Unicode symbol characters, including mathematical and
172 -- currency symbols.
173 isSymbol :: Char -> Bool
174 isSymbol c = case generalCategory c of
175 MathSymbol -> True
176 CurrencySymbol -> True
177 ModifierSymbol -> True
178 OtherSymbol -> True
179 _ -> False
180
181 -- | Selects Unicode space and separator characters.
182 isSeparator :: Char -> Bool
183 isSeparator c = case generalCategory c of
184 Space -> True
185 LineSeparator -> True
186 ParagraphSeparator -> True
187 _ -> False
188