Update base for latest Safe Haskell.
[packages/base.git] / GHC / IO / Encoding / CodePage.hs
1 {-# LANGUAGE Trustworthy #-}
2 {-# LANGUAGE CPP, BangPatterns, ForeignFunctionInterface, NoImplicitPrelude,
3 NondecreasingIndentation, MagicHash #-}
4
5 module GHC.IO.Encoding.CodePage(
6 #if !defined(mingw32_HOST_OS)
7 ) where
8 #else
9 codePageEncoding, mkCodePageEncoding,
10 localeEncoding, mkLocaleEncoding
11 ) where
12
13 import GHC.Base
14 import GHC.Show
15 import GHC.Num
16 import GHC.Enum
17 import GHC.Word
18 import GHC.IO (unsafePerformIO)
19 import GHC.IO.Encoding.Failure
20 import GHC.IO.Encoding.Types
21 import GHC.IO.Buffer
22 import Data.Bits
23 import Data.Maybe
24 import Data.List (lookup)
25
26 import GHC.IO.Encoding.CodePage.Table
27
28 import GHC.IO.Encoding.Latin1 (mkLatin1)
29 import GHC.IO.Encoding.UTF8 (mkUTF8)
30 import GHC.IO.Encoding.UTF16 (mkUTF16le, mkUTF16be)
31 import GHC.IO.Encoding.UTF32 (mkUTF32le, mkUTF32be)
32
33 -- note CodePage = UInt which might not work on Win64. But the Win32 package
34 -- also has this issue.
35 getCurrentCodePage :: IO Word32
36 getCurrentCodePage = do
37 conCP <- getConsoleCP
38 if conCP > 0
39 then return conCP
40 else getACP
41
42 -- Since the Win32 package depends on base, we have to import these ourselves:
43 foreign import stdcall unsafe "windows.h GetConsoleCP"
44 getConsoleCP :: IO Word32
45
46 foreign import stdcall unsafe "windows.h GetACP"
47 getACP :: IO Word32
48
49 {-# NOINLINE currentCodePage #-}
50 currentCodePage :: Word32
51 currentCodePage = unsafePerformIO getCurrentCodePage
52
53 localeEncoding :: TextEncoding
54 localeEncoding = mkLocaleEncoding ErrorOnCodingFailure
55
56 mkLocaleEncoding :: CodingFailureMode -> TextEncoding
57 mkLocaleEncoding cfm = mkCodePageEncoding cfm currentCodePage
58
59
60 codePageEncoding :: Word32 -> TextEncoding
61 codePageEncoding = mkCodePageEncoding ErrorOnCodingFailure
62
63 mkCodePageEncoding :: CodingFailureMode -> Word32 -> TextEncoding
64 mkCodePageEncoding cfm 65001 = mkUTF8 cfm
65 mkCodePageEncoding cfm 1200 = mkUTF16le cfm
66 mkCodePageEncoding cfm 1201 = mkUTF16be cfm
67 mkCodePageEncoding cfm 12000 = mkUTF32le cfm
68 mkCodePageEncoding cfm 12001 = mkUTF32be cfm
69 mkCodePageEncoding cfm cp = maybe (mkLatin1 cfm) (buildEncoding cfm cp) (lookup cp codePageMap)
70
71 buildEncoding :: CodingFailureMode -> Word32 -> CodePageArrays -> TextEncoding
72 buildEncoding cfm cp SingleByteCP {decoderArray = dec, encoderArray = enc}
73 = TextEncoding {
74 textEncodingName = "CP" ++ show cp
75 , mkTextDecoder = return $ simpleCodec (recoverDecode cfm) $ decodeFromSingleByte dec
76 , mkTextEncoder = return $ simpleCodec (recoverEncode cfm) $ encodeToSingleByte enc
77 }
78
79 simpleCodec :: (Buffer from -> Buffer to -> IO (Buffer from, Buffer to))
80 -> (Buffer from -> Buffer to -> IO (CodingProgress, Buffer from, Buffer to))
81 -> BufferCodec from to ()
82 simpleCodec r f = BufferCodec {
83 encode = f,
84 recover = r,
85 close = return (),
86 getState = return (),
87 setState = return
88 }
89
90 decodeFromSingleByte :: ConvArray Char -> DecodeBuffer
91 decodeFromSingleByte convArr
92 input@Buffer { bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
93 output@Buffer { bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
94 = let
95 done why !ir !ow = return (why,
96 if ir==iw then input{ bufL=0, bufR=0}
97 else input{ bufL=ir},
98 output {bufR=ow})
99 loop !ir !ow
100 | ow >= os = done OutputUnderflow ir ow
101 | ir >= iw = done InputUnderflow ir ow
102 | otherwise = do
103 b <- readWord8Buf iraw ir
104 let c = lookupConv convArr b
105 if c=='\0' && b /= 0 then invalid else do
106 ow' <- writeCharBuf oraw ow c
107 loop (ir+1) ow'
108 where
109 invalid = done InvalidSequence ir ow
110 in loop ir0 ow0
111
112 encodeToSingleByte :: CompactArray Char Word8 -> EncodeBuffer
113 encodeToSingleByte CompactArray { encoderMax = maxChar,
114 encoderIndices = indices,
115 encoderValues = values }
116 input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
117 output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
118 = let
119 done why !ir !ow = return (why,
120 if ir==iw then input { bufL=0, bufR=0 }
121 else input { bufL=ir },
122 output {bufR=ow})
123 loop !ir !ow
124 | ow >= os = done OutputUnderflow ir ow
125 | ir >= iw = done InputUnderflow ir ow
126 | otherwise = do
127 (c,ir') <- readCharBuf iraw ir
128 case lookupCompact maxChar indices values c of
129 Nothing -> invalid
130 Just 0 | c /= '\0' -> invalid
131 Just b -> do
132 writeWord8Buf oraw ow b
133 loop ir' (ow+1)
134 where
135 invalid = done InvalidSequence ir ow
136 in
137 loop ir0 ow0
138
139
140 --------------------------------------------
141 -- Array access functions
142
143 -- {-# INLINE lookupConv #-}
144 lookupConv :: ConvArray Char -> Word8 -> Char
145 lookupConv a = indexChar a . fromEnum
146
147 {-# INLINE lookupCompact #-}
148 lookupCompact :: Char -> ConvArray Int -> ConvArray Word8 -> Char -> Maybe Word8
149 lookupCompact maxVal indexes values x
150 | x > maxVal = Nothing
151 | otherwise = Just $ indexWord8 values $ j + (i .&. mask)
152 where
153 i = fromEnum x
154 mask = (1 `shiftL` n) - 1
155 k = i `shiftR` n
156 j = indexInt indexes k
157 n = blockBitSize
158
159 {-# INLINE indexInt #-}
160 indexInt :: ConvArray Int -> Int -> Int
161 indexInt (ConvArray p) (I# i) = I# (indexInt16OffAddr# p i)
162
163 {-# INLINE indexWord8 #-}
164 indexWord8 :: ConvArray Word8 -> Int -> Word8
165 indexWord8 (ConvArray p) (I# i) = W8# (indexWord8OffAddr# p i)
166
167 {-# INLINE indexChar #-}
168 indexChar :: ConvArray Char -> Int -> Char
169 indexChar (ConvArray p) (I# i) = C# (chr# (indexInt16OffAddr# p i))
170
171 #endif
172