SafeHaskell: Added SafeHaskell to base
[packages/base.git] / GHC / IO / Encoding / Iconv.hs
1 {-# LANGUAGE Trustworthy #-}
2 {-# LANGUAGE CPP
3 , NoImplicitPrelude
4 , ForeignFunctionInterface
5 , NondecreasingIndentation
6 #-}
7
8 -----------------------------------------------------------------------------
9 -- |
10 -- Module : GHC.IO.Encoding.Iconv
11 -- Copyright : (c) The University of Glasgow, 2008-2009
12 -- License : see libraries/base/LICENSE
13 --
14 -- Maintainer : libraries@haskell.org
15 -- Stability : internal
16 -- Portability : non-portable
17 --
18 -- This module provides text encoding/decoding using iconv
19 --
20 -----------------------------------------------------------------------------
21
22 -- #hide
23 module GHC.IO.Encoding.Iconv (
24 #if !defined(mingw32_HOST_OS)
25 iconvEncoding, mkIconvEncoding,
26 localeEncoding, mkLocaleEncoding
27 #endif
28 ) where
29
30 #include "MachDeps.h"
31 #include "HsBaseConfig.h"
32
33 #if !defined(mingw32_HOST_OS)
34
35 import Foreign.Safe
36 import Foreign.C
37 import Data.Maybe
38 import GHC.Base
39 import GHC.IO.Buffer
40 import GHC.IO.Encoding.Failure
41 import GHC.IO.Encoding.Types
42 import GHC.List (span)
43 import GHC.Num
44 import GHC.Show
45 import GHC.Real
46 import System.IO.Unsafe (unsafePerformIO)
47 import System.Posix.Internals
48
49 c_DEBUG_DUMP :: Bool
50 c_DEBUG_DUMP = False
51
52 iconv_trace :: String -> IO ()
53 iconv_trace s
54 | c_DEBUG_DUMP = puts s
55 | otherwise = return ()
56
57 -- -----------------------------------------------------------------------------
58 -- iconv encoders/decoders
59
60 {-# NOINLINE localeEncodingName #-}
61 localeEncodingName :: String
62 localeEncodingName = unsafePerformIO $ do
63 -- Use locale_charset() or nl_langinfo(CODESET) to get the encoding
64 -- if we have either of them.
65 cstr <- c_localeEncoding
66 peekCAString cstr -- Assume charset names are ASCII
67
68 localeEncoding :: TextEncoding
69 localeEncoding = mkLocaleEncoding ErrorOnCodingFailure
70
71 mkLocaleEncoding :: CodingFailureMode -> TextEncoding
72 mkLocaleEncoding cfm = unsafePerformIO $ mkIconvEncoding cfm localeEncodingName
73
74 -- We hope iconv_t is a storable type. It should be, since it has at least the
75 -- value -1, which is a possible return value from iconv_open.
76 type IConv = CLong -- ToDo: (#type iconv_t)
77
78 foreign import ccall unsafe "hs_iconv_open"
79 hs_iconv_open :: CString -> CString -> IO IConv
80
81 foreign import ccall unsafe "hs_iconv_close"
82 hs_iconv_close :: IConv -> IO CInt
83
84 foreign import ccall unsafe "hs_iconv"
85 hs_iconv :: IConv -> Ptr CString -> Ptr CSize -> Ptr CString -> Ptr CSize
86 -> IO CSize
87
88 foreign import ccall unsafe "localeEncoding"
89 c_localeEncoding :: IO CString
90
91 haskellChar :: String
92 #ifdef WORDS_BIGENDIAN
93 haskellChar | charSize == 2 = "UTF-16BE"
94 | otherwise = "UTF-32BE"
95 #else
96 haskellChar | charSize == 2 = "UTF-16LE"
97 | otherwise = "UTF-32LE"
98 #endif
99
100 char_shift :: Int
101 char_shift | charSize == 2 = 1
102 | otherwise = 2
103
104 iconvEncoding :: String -> IO TextEncoding
105 iconvEncoding = mkIconvEncoding ErrorOnCodingFailure
106
107 mkIconvEncoding :: CodingFailureMode -> String -> IO TextEncoding
108 mkIconvEncoding cfm charset = do
109 return (TextEncoding {
110 textEncodingName = charset,
111 mkTextDecoder = newIConv raw_charset (haskellChar ++ suffix) (recoverDecode cfm) iconvDecode,
112 mkTextEncoder = newIConv haskellChar charset (recoverEncode cfm) iconvEncode})
113 where
114 -- An annoying feature of GNU iconv is that the //PREFIXES only take
115 -- effect when they appear on the tocode parameter to iconv_open:
116 (raw_charset, suffix) = span (/= '/') charset
117
118 newIConv :: String -> String
119 -> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b))
120 -> (IConv -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b))
121 -> IO (BufferCodec a b ())
122 newIConv from to rec fn =
123 -- Assume charset names are ASCII
124 withCAString from $ \ from_str ->
125 withCAString to $ \ to_str -> do
126 iconvt <- throwErrnoIfMinus1 "mkTextEncoding" $ hs_iconv_open to_str from_str
127 let iclose = throwErrnoIfMinus1_ "Iconv.close" $ hs_iconv_close iconvt
128 return BufferCodec{
129 encode = fn iconvt,
130 recover = rec,
131 close = iclose,
132 -- iconv doesn't supply a way to save/restore the state
133 getState = return (),
134 setState = const $ return ()
135 }
136
137 iconvDecode :: IConv -> DecodeBuffer
138 iconvDecode iconv_t ibuf obuf = iconvRecode iconv_t ibuf 0 obuf char_shift
139
140 iconvEncode :: IConv -> EncodeBuffer
141 iconvEncode iconv_t ibuf obuf = iconvRecode iconv_t ibuf char_shift obuf 0
142
143 iconvRecode :: IConv -> Buffer a -> Int -> Buffer b -> Int
144 -> IO (CodingProgress, Buffer a, Buffer b)
145 iconvRecode iconv_t
146 input@Buffer{ bufRaw=iraw, bufL=ir, bufR=iw, bufSize=_ } iscale
147 output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow, bufSize=os } oscale
148 = do
149 iconv_trace ("haskelChar=" ++ show haskellChar)
150 iconv_trace ("iconvRecode before, input=" ++ show (summaryBuffer input))
151 iconv_trace ("iconvRecode before, output=" ++ show (summaryBuffer output))
152 withRawBuffer iraw $ \ piraw -> do
153 withRawBuffer oraw $ \ poraw -> do
154 with (piraw `plusPtr` (ir `shiftL` iscale)) $ \ p_inbuf -> do
155 with (poraw `plusPtr` (ow `shiftL` oscale)) $ \ p_outbuf -> do
156 with (fromIntegral ((iw-ir) `shiftL` iscale)) $ \ p_inleft -> do
157 with (fromIntegral ((os-ow) `shiftL` oscale)) $ \ p_outleft -> do
158 res <- hs_iconv iconv_t p_inbuf p_inleft p_outbuf p_outleft
159 new_inleft <- peek p_inleft
160 new_outleft <- peek p_outleft
161 let
162 new_inleft' = fromIntegral new_inleft `shiftR` iscale
163 new_outleft' = fromIntegral new_outleft `shiftR` oscale
164 new_input
165 | new_inleft == 0 = input { bufL = 0, bufR = 0 }
166 | otherwise = input { bufL = iw - new_inleft' }
167 new_output = output{ bufR = os - new_outleft' }
168 iconv_trace ("iconv res=" ++ show res)
169 iconv_trace ("iconvRecode after, input=" ++ show (summaryBuffer new_input))
170 iconv_trace ("iconvRecode after, output=" ++ show (summaryBuffer new_output))
171 if (res /= -1)
172 then do -- all input translated
173 return (InputUnderflow, new_input, new_output)
174 else do
175 errno <- getErrno
176 case errno of
177 e | e == e2BIG -> return (OutputUnderflow, new_input, new_output)
178 | e == eINVAL -> return (InputUnderflow, new_input, new_output)
179 -- Sometimes iconv reports EILSEQ for a
180 -- character in the input even when there is no room
181 -- in the output; in this case we might be about to
182 -- change the encoding anyway, so the following bytes
183 -- could very well be in a different encoding.
184 --
185 -- Because we can only say InvalidSequence if there is at least
186 -- one element left in the output, we have to special case this.
187 | e == eILSEQ -> return (if new_outleft' == 0 then OutputUnderflow else InvalidSequence, new_input, new_output)
188 | otherwise -> do
189 iconv_trace ("iconv returned error: " ++ show (errnoToIOError "iconv" e Nothing Nothing))
190 throwErrno "iconvRecoder"
191
192 #endif /* !mingw32_HOST_OS */