Rewrite of the IO library, including Unicode support
[packages/base.git] / GHC / IO / Encoding.hs
1 {-# OPTIONS_GHC -fno-implicit-prelude -funbox-strict-fields #-}
2 -----------------------------------------------------------------------------
3 -- |
4 -- Module : GHC.IO.Encoding
5 -- Copyright : (c) The University of Glasgow, 2008-2009
6 -- License : see libraries/base/LICENSE
7 --
8 -- Maintainer : libraries@haskell.org
9 -- Stability : internal
10 -- Portability : non-portable
11 --
12 -- Text codecs for I/O
13 --
14 -----------------------------------------------------------------------------
15
16 module GHC.IO.Encoding (
17 BufferCodec(..), TextEncoding(..), TextEncoder, TextDecoder,
18 latin1, latin1_encode, latin1_decode,
19 utf8,
20 utf16, utf16le, utf16be,
21 utf32, utf32le, utf32be,
22 localeEncoding,
23 mkTextEncoding,
24 ) where
25
26 import GHC.Base
27 import GHC.IO
28 import GHC.IO.Buffer
29 import GHC.IO.Encoding.Types
30 import GHC.Word
31 #if !defined(mingw32_HOST_OS)
32 import qualified GHC.IO.Encoding.Iconv as Iconv
33 #endif
34 import qualified GHC.IO.Encoding.Latin1 as Latin1
35 import qualified GHC.IO.Encoding.UTF8 as UTF8
36 import qualified GHC.IO.Encoding.UTF16 as UTF16
37 import qualified GHC.IO.Encoding.UTF32 as UTF32
38
39 #if defined(mingw32_HOST_OS)
40 import Data.Maybe
41 import GHC.IO.Exception
42 #endif
43
44 -- -----------------------------------------------------------------------------
45
46 latin1, utf8, utf16, utf16le, utf16be, utf32, utf32le, utf32be, localeEncoding
47 :: TextEncoding
48
49 -- | The Latin1 (ISO8859-1) encoding. This encoding maps bytes
50 -- directly to the first 256 Unicode code points, and is thus not a
51 -- complete Unicode encoding.
52 latin1 = Latin1.latin1_checked
53
54 -- | The UTF-8 unicode encoding
55 utf8 = UTF8.utf8
56
57 -- | The UTF-16 unicode encoding (a byte-order-mark should be used to
58 -- indicate endianness).
59 utf16 = UTF16.utf16
60
61 -- | The UTF-16 unicode encoding (litte-endian)
62 utf16le = UTF16.utf16le
63
64 -- | The UTF-16 unicode encoding (big-endian)
65 utf16be = UTF16.utf16be
66
67 -- | The UTF-32 unicode encoding (a byte-order-mark should be used to
68 -- indicate endianness).
69 utf32 = UTF32.utf32
70
71 -- | The UTF-32 unicode encoding (litte-endian)
72 utf32le = UTF32.utf32le
73
74 -- | The UTF-32 unicode encoding (big-endian)
75 utf32be = UTF32.utf32be
76
77 -- | The text encoding of the current locale
78 #if !defined(mingw32_HOST_OS)
79 localeEncoding = Iconv.localeEncoding
80 #else
81 localeEncoding = Latin1.latin1
82 #endif
83
84 -- | Acquire the named text encoding
85 mkTextEncoding :: String -> IO TextEncoding
86 #if !defined(mingw32_HOST_OS)
87 mkTextEncoding = Iconv.mkTextEncoding
88 #else
89 mkTextEncoding "UTF-8" = return utf8
90 mkTextEncoding "UTF-16" = return utf16
91 mkTextEncoding "UTF-16LE" = return utf16le
92 mkTextEncoding "UTF-16BE" = return utf16be
93 mkTextEncoding "UTF-32" = return utf32
94 mkTextEncoding "UTF-32LE" = return utf32le
95 mkTextEncoding "UTF-32BE" = return utf32be
96 mkTextEncoding e = ioException
97 (IOError Nothing InvalidArgument "mkTextEncoding"
98 ("unknown encoding:" ++ e) Nothing Nothing)
99 #endif
100
101 latin1_encode :: CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8)
102 latin1_encode = Latin1.latin1_encode -- unchecked, used for binary
103 --latin1_encode = unsafePerformIO $ do mkTextEncoder Iconv.latin1 >>= return.encode
104
105 latin1_decode :: Buffer Word8 -> CharBuffer -> IO (Buffer Word8, CharBuffer)
106 latin1_decode = Latin1.latin1_decode
107 --latin1_decode = unsafePerformIO $ do mkTextDecoder Iconv.latin1 >>= return.encode