Update base for new Safe Haskell design
[ghc.git] / libraries / base / GHC / Foreign.hs
1 {-# LANGUAGE Trustworthy #-}
2 {-# LANGUAGE CPP, NoImplicitPrelude #-}
3
4 -----------------------------------------------------------------------------
5 -- |
6 -- Module : GHC.Foreign
7 -- Copyright : (c) The University of Glasgow, 2008-2011
8 -- License : see libraries/base/LICENSE
9 --
10 -- Maintainer : libraries@haskell.org
11 -- Stability : internal
12 -- Portability : non-portable
13 --
14 -- Foreign marshalling support for CStrings with configurable encodings
15 --
16 -----------------------------------------------------------------------------
17
18 module GHC.Foreign (
19 -- * C strings with a configurable encoding
20
21 -- conversion of C strings into Haskell strings
22 --
23 peekCString, -- :: TextEncoding -> CString -> IO String
24 peekCStringLen, -- :: TextEncoding -> CStringLen -> IO String
25
26 -- conversion of Haskell strings into C strings
27 --
28 newCString, -- :: TextEncoding -> String -> IO CString
29 newCStringLen, -- :: TextEncoding -> String -> IO CStringLen
30
31 -- conversion of Haskell strings into C strings using temporary storage
32 --
33 withCString, -- :: TextEncoding -> String -> (CString -> IO a) -> IO a
34 withCStringLen, -- :: TextEncoding -> String -> (CStringLen -> IO a) -> IO a
35
36 charIsRepresentable, -- :: TextEncoding -> Char -> IO Bool
37 ) where
38
39 import Foreign.Marshal.Array
40 import Foreign.C.Types
41 import Foreign.Ptr
42 import Foreign.Storable
43
44 import Data.Word
45
46 -- Imports for the locale-encoding version of marshallers
47 import Control.Monad
48
49 import Data.Tuple (fst)
50 import Data.Maybe
51
52 import {-# SOURCE #-} System.Posix.Internals (puts)
53 import GHC.Show ( show )
54
55 import Foreign.Marshal.Alloc
56 import Foreign.ForeignPtr
57
58 import GHC.Err (undefined)
59 import GHC.List
60 import GHC.Num
61 import GHC.Base
62
63 import GHC.IO
64 import GHC.IO.Exception
65 import GHC.IO.Buffer
66 import GHC.IO.Encoding.Failure (surrogatifyRoundtripCharacter, desurrogatifyRoundtripCharacter)
67 import GHC.IO.Encoding.Types
68
69
70 c_DEBUG_DUMP :: Bool
71 c_DEBUG_DUMP = False
72
73 putDebugMsg :: String -> IO ()
74 putDebugMsg | c_DEBUG_DUMP = puts
75 | otherwise = const (return ())
76
77
78 -- These definitions are identical to those in Foreign.C.String, but copied in here to avoid a cycle:
79 type CString = Ptr CChar
80 type CStringLen = (Ptr CChar, Int)
81
82 -- exported functions
83 -- ------------------
84
85 -- | Marshal a NUL terminated C string into a Haskell string.
86 --
87 peekCString :: TextEncoding -> CString -> IO String
88 peekCString enc cp = do
89 sz <- lengthArray0 nUL cp
90 peekEncodedCString enc (cp, sz * cCharSize)
91
92 -- | Marshal a C string with explicit length into a Haskell string.
93 --
94 peekCStringLen :: TextEncoding -> CStringLen -> IO String
95 peekCStringLen = peekEncodedCString
96
97 -- | Marshal a Haskell string into a NUL terminated C string.
98 --
99 -- * the Haskell string may /not/ contain any NUL characters
100 --
101 -- * new storage is allocated for the C string and must be
102 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
103 -- 'Foreign.Marshal.Alloc.finalizerFree'.
104 --
105 newCString :: TextEncoding -> String -> IO CString
106 newCString enc = liftM fst . newEncodedCString enc True
107
108 -- | Marshal a Haskell string into a C string (ie, character array) with
109 -- explicit length information.
110 --
111 -- * new storage is allocated for the C string and must be
112 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
113 -- 'Foreign.Marshal.Alloc.finalizerFree'.
114 --
115 newCStringLen :: TextEncoding -> String -> IO CStringLen
116 newCStringLen enc = newEncodedCString enc False
117
118 -- | Marshal a Haskell string into a NUL terminated C string using temporary
119 -- storage.
120 --
121 -- * the Haskell string may /not/ contain any NUL characters
122 --
123 -- * the memory is freed when the subcomputation terminates (either
124 -- normally or via an exception), so the pointer to the temporary
125 -- storage must /not/ be used after this.
126 --
127 withCString :: TextEncoding -> String -> (CString -> IO a) -> IO a
128 withCString enc s act = withEncodedCString enc True s $ \(cp, _sz) -> act cp
129
130 -- | Marshal a Haskell string into a C string (ie, character array)
131 -- in temporary storage, with explicit length information.
132 --
133 -- * the memory is freed when the subcomputation terminates (either
134 -- normally or via an exception), so the pointer to the temporary
135 -- storage must /not/ be used after this.
136 --
137 withCStringLen :: TextEncoding -> String -> (CStringLen -> IO a) -> IO a
138 withCStringLen enc = withEncodedCString enc False
139
140
141 -- | Determines whether a character can be accurately encoded in a 'CString'.
142 --
143 -- Pretty much anyone who uses this function is in a state of sin because
144 -- whether or not a character is encodable will, in general, depend on the
145 -- context in which it occurs.
146 charIsRepresentable :: TextEncoding -> Char -> IO Bool
147 charIsRepresentable enc c = withCString enc [c] (fmap (== [c]) . peekCString enc) `catchException` (\e -> let _ = e :: IOException in return False)
148
149 -- auxiliary definitions
150 -- ----------------------
151
152 -- C's end of string character
153 nUL :: CChar
154 nUL = 0
155
156 -- Size of a CChar in bytes
157 cCharSize :: Int
158 cCharSize = sizeOf (undefined :: CChar)
159
160
161 {-# INLINE peekEncodedCString #-}
162 peekEncodedCString :: TextEncoding -- ^ Encoding of CString
163 -> CStringLen
164 -> IO String -- ^ String in Haskell terms
165 peekEncodedCString (TextEncoding { mkTextDecoder = mk_decoder }) (p, sz_bytes)
166 = bracket mk_decoder close $ \decoder -> do
167 let chunk_size = sz_bytes `max` 1 -- Decode buffer chunk size in characters: one iteration only for ASCII
168 from0 <- fmap (\fp -> bufferAdd sz_bytes (emptyBuffer fp sz_bytes ReadBuffer)) $ newForeignPtr_ (castPtr p)
169 to <- newCharBuffer chunk_size WriteBuffer
170
171 let go iteration from = do
172 (why, from', to') <- encode decoder from to
173 if isEmptyBuffer from'
174 then
175 -- No input remaining: @why@ will be InputUnderflow, but we don't care
176 fmap (map desurrogatifyRoundtripCharacter) $ withBuffer to' $ peekArray (bufferElems to')
177 else do
178 -- Input remaining: what went wrong?
179 putDebugMsg ("peekEncodedCString: " ++ show iteration ++ " " ++ show why)
180 (from'', to'') <- case why of InvalidSequence -> recover decoder from' to' -- These conditions are equally bad because
181 InputUnderflow -> recover decoder from' to' -- they indicate malformed/truncated input
182 OutputUnderflow -> return (from', to') -- We will have more space next time round
183 putDebugMsg ("peekEncodedCString: from " ++ summaryBuffer from ++ " " ++ summaryBuffer from' ++ " " ++ summaryBuffer from'')
184 putDebugMsg ("peekEncodedCString: to " ++ summaryBuffer to ++ " " ++ summaryBuffer to' ++ " " ++ summaryBuffer to'')
185 to_chars <- withBuffer to'' $ peekArray (bufferElems to'')
186 fmap (map desurrogatifyRoundtripCharacter to_chars++) $ go (iteration + 1) from''
187
188 go (0 :: Int) from0
189
190 {-# INLINE withEncodedCString #-}
191 withEncodedCString :: TextEncoding -- ^ Encoding of CString to create
192 -> Bool -- ^ Null-terminate?
193 -> String -- ^ String to encode
194 -> (CStringLen -> IO a) -- ^ Worker that can safely use the allocated memory
195 -> IO a
196 withEncodedCString (TextEncoding { mkTextEncoder = mk_encoder }) null_terminate s act
197 = bracket mk_encoder close $ \encoder -> withArrayLen (map surrogatifyRoundtripCharacter s) $ \sz p -> do
198 from <- fmap (\fp -> bufferAdd sz (emptyBuffer fp sz ReadBuffer)) $ newForeignPtr_ p
199
200 let go iteration to_sz_bytes = do
201 putDebugMsg ("withEncodedCString: " ++ show iteration)
202 allocaBytes to_sz_bytes $ \to_p -> do
203 mb_res <- tryFillBufferAndCall encoder null_terminate from to_p to_sz_bytes act
204 case mb_res of
205 Nothing -> go (iteration + 1) (to_sz_bytes * 2)
206 Just res -> return res
207
208 -- If the input string is ASCII, this value will ensure we only allocate once
209 go (0 :: Int) (cCharSize * (sz + 1))
210
211 {-# INLINE newEncodedCString #-}
212 newEncodedCString :: TextEncoding -- ^ Encoding of CString to create
213 -> Bool -- ^ Null-terminate?
214 -> String -- ^ String to encode
215 -> IO CStringLen
216 newEncodedCString (TextEncoding { mkTextEncoder = mk_encoder }) null_terminate s
217 = bracket mk_encoder close $ \encoder -> withArrayLen (map surrogatifyRoundtripCharacter s) $ \sz p -> do
218 from <- fmap (\fp -> bufferAdd sz (emptyBuffer fp sz ReadBuffer)) $ newForeignPtr_ p
219
220 let go iteration to_p to_sz_bytes = do
221 putDebugMsg ("newEncodedCString: " ++ show iteration)
222 mb_res <- tryFillBufferAndCall encoder null_terminate from to_p to_sz_bytes return
223 case mb_res of
224 Nothing -> do
225 let to_sz_bytes' = to_sz_bytes * 2
226 to_p' <- reallocBytes to_p to_sz_bytes'
227 go (iteration + 1) to_p' to_sz_bytes'
228 Just res -> return res
229
230 -- If the input string is ASCII, this value will ensure we only allocate once
231 let to_sz_bytes = cCharSize * (sz + 1)
232 to_p <- mallocBytes to_sz_bytes
233 go (0 :: Int) to_p to_sz_bytes
234
235
236 tryFillBufferAndCall :: TextEncoder dstate -> Bool -> Buffer Char -> Ptr Word8 -> Int
237 -> (CStringLen -> IO a) -> IO (Maybe a)
238 tryFillBufferAndCall encoder null_terminate from0 to_p to_sz_bytes act = do
239 to_fp <- newForeignPtr_ to_p
240 go (0 :: Int) (from0, emptyBuffer to_fp to_sz_bytes WriteBuffer)
241 where
242 go iteration (from, to) = do
243 (why, from', to') <- encode encoder from to
244 putDebugMsg ("tryFillBufferAndCall: " ++ show iteration ++ " " ++ show why ++ " " ++ summaryBuffer from ++ " " ++ summaryBuffer from')
245 if isEmptyBuffer from'
246 then if null_terminate && bufferAvailable to' == 0
247 then return Nothing -- We had enough for the string but not the terminator: ask the caller for more buffer
248 else do
249 -- Awesome, we had enough buffer
250 let bytes = bufferElems to'
251 withBuffer to' $ \to_ptr -> do
252 when null_terminate $ pokeElemOff to_ptr (bufR to') 0
253 fmap Just $ act (castPtr to_ptr, bytes) -- NB: the length information is specified as being in *bytes*
254 else case why of -- We didn't consume all of the input
255 InputUnderflow -> recover encoder from' to' >>= go (iteration + 1) -- These conditions are equally bad
256 InvalidSequence -> recover encoder from' to' >>= go (iteration + 1) -- since the input was truncated/invalid
257 OutputUnderflow -> return Nothing -- Oops, out of buffer during decoding: ask the caller for more