--------------------------------
[haskell-report.git] / report / lib-code / Char.hs
1 module Char (
2 isAscii, isLatin1, isControl, isPrint, isSpace, isUpper, isLower,
3 isAlpha, isDigit, isOctDigit, isHexDigit, isAlphaNum,
4 digitToInt, intToDigit,
5 toUpper, toLower,
6 ord, chr,
7 readLitChar, showLitChar, lexLitChar,
8
9 -- ...and what the Prelude exports
10 Char, String
11 ) where
12
13 import Array -- Used for character name table.
14 import Numeric (readDec, readOct, lexDigits, readHex)
15 import UnicodePrims -- Source of primitive Unicode functions.
16
17 -- Character-testing operations
18 isAscii, isLatin1, isControl, isPrint, isSpace, isUpper, isLower,
19 isAlpha, isDigit, isOctDigit, isHexDigit, isAlphaNum :: Char -> Bool
20
21 isAscii c = c < '\x80'
22
23 isLatin1 c = c <= '\xff'
24
25 isControl c = c < ' ' || c >= '\DEL' && c <= '\x9f'
26
27 isPrint = primUnicodeIsPrint
28
29 isSpace c = c `elem` " \t\n\r\f\v\xA0"
30 -- Only Latin-1 spaces recognized
31
32 isUpper = primUnicodeIsUpper -- 'A'..'Z'
33
34 isLower = primUnicodeIsLower -- 'a'..'z'
35
36 isAlpha c = isUpper c || isLower c
37
38 isDigit c = c >= '0' && c <= '9'
39
40 isOctDigit c = c >= '0' && c <= '7'
41
42 isHexDigit c = isDigit c || c >= 'A' && c <= 'F' ||
43 c >= 'a' && c <= 'f'
44
45 isAlphaNum = primUnicodeIsAlphaNum
46
47
48 -- Digit conversion operations
49 digitToInt :: Char -> Int
50 digitToInt c
51 | isDigit c = fromEnum c - fromEnum '0'
52 | c >= 'a' && c <= 'f' = fromEnum c - fromEnum 'a' + 10
53 | c >= 'A' && c <= 'F' = fromEnum c - fromEnum 'A' + 10
54 | otherwise = error "Char.digitToInt: not a digit"
55
56 intToDigit :: Int -> Char
57 intToDigit i
58 | i >= 0 && i <= 9 = toEnum (fromEnum '0' + i)
59 | i >= 10 && i <= 15 = toEnum (fromEnum 'a' + i - 10)
60 | otherwise = error "Char.intToDigit: not a digit"
61
62
63 -- Case-changing operations
64 toUpper :: Char -> Char
65 toUpper = primUnicodeToUpper
66
67 toLower :: Char -> Char
68 toLower = primUnicodeToLower
69
70 -- Character code functions
71 ord :: Char -> Int
72 ord = fromEnum
73
74 chr :: Int -> Char
75 chr = toEnum
76
77 -- Text functions
78 readLitChar :: ReadS Char
79 readLitChar ('\\':s) = readEsc s
80 readLitChar (c:s) = [(c,s)]
81
82 readEsc :: ReadS Char
83 readEsc ('a':s) = [('\a',s)]
84 readEsc ('b':s) = [('\b',s)]
85 readEsc ('f':s) = [('\f',s)]
86 readEsc ('n':s) = [('\n',s)]
87 readEsc ('r':s) = [('\r',s)]
88 readEsc ('t':s) = [('\t',s)]
89 readEsc ('v':s) = [('\v',s)]
90 readEsc ('\\':s) = [('\\',s)]
91 readEsc ('"':s) = [('"',s)]
92 readEsc ('\'':s) = [('\'',s)]
93 readEsc ('^':c:s) | c >= '@' && c <= '_'
94 = [(chr (ord c - ord '@'), s)]
95 readEsc s@(d:_) | isDigit d
96 = [(chr n, t) | (n,t) <- readDec s]
97 readEsc ('o':s) = [(chr n, t) | (n,t) <- readOct s]
98 readEsc ('x':s) = [(chr n, t) | (n,t) <- readHex s]
99 readEsc s@(c:_) | isUpper c
100 = let table = ('\DEL', "DEL") : assocs asciiTab
101 in case [(c,s') | (c, mne) <- table,
102 ([],s') <- [match mne s]]
103 of (pr:_) -> [pr]
104 [] -> []
105 readEsc _ = []
106
107 match :: (Eq a) => [a] -> [a] -> ([a],[a])
108 match (x:xs) (y:ys) | x == y = match xs ys
109 match xs ys = (xs,ys)
110
111 showLitChar :: Char -> ShowS
112 showLitChar c | c > '\DEL' = showChar '\\' .
113 protectEsc isDigit (shows (ord c))
114 showLitChar '\DEL' = showString "\\DEL"
115 showLitChar '\\' = showString "\\\\"
116 showLitChar c | c >= ' ' = showChar c
117 showLitChar '\a' = showString "\\a"
118 showLitChar '\b' = showString "\\b"
119 showLitChar '\f' = showString "\\f"
120 showLitChar '\n' = showString "\\n"
121 showLitChar '\r' = showString "\\r"
122 showLitChar '\t' = showString "\\t"
123 showLitChar '\v' = showString "\\v"
124 showLitChar '\SO' = protectEsc (== 'H') (showString "\\SO")
125 showLitChar c = showString ('\\' : asciiTab!c)
126
127 protectEsc p f = f . cont
128 where cont s@(c:_) | p c = "\\&" ++ s
129 cont s = s
130 asciiTab = listArray ('\NUL', ' ')
131 ["NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
132 "BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI",
133 "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
134 "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
135 "SP"]
136
137 lexLitChar :: ReadS String
138 lexLitChar ('\\':s) = map (prefix '\\') (lexEsc s)
139 where
140 lexEsc (c:s) | c `elem` "abfnrtv\\\"'" = [([c],s)]
141 lexEsc ('^':c:s) | c >= '@' && c <= '_' = [(['^',c],s)]
142
143 -- Numeric escapes
144 lexEsc ('o':s) = [prefix 'o' (span isOctDigit s)]
145 lexEsc ('x':s) = [prefix 'x' (span isHexDigit s)]
146 lexEsc s@(d:_) | isDigit d = [span isDigit s]
147
148 -- Very crude approximation to \XYZ.
149 lexEsc s@(c:_) | isUpper c = [span isCharName s]
150 lexEsc _ = []
151
152 isCharName c = isUpper c || isDigit c
153 prefix c (t,s) = (c:t, s)
154
155 lexLitChar (c:s) = [([c],s)]
156 lexLitChar "" = []
157