e5c6c8739e872db59e58c3eefc26a4d8b68b4d2e
[packages/base.git] / Foreign / C / String.hs
1 {-# OPTIONS_GHC -XNoImplicitPrelude #-}
2 -----------------------------------------------------------------------------
3 -- |
4 -- Module : Foreign.C.String
5 -- Copyright : (c) The FFI task force 2001
6 -- License : BSD-style (see the file libraries/base/LICENSE)
7 --
8 -- Maintainer : ffi@haskell.org
9 -- Stability : provisional
10 -- Portability : portable
11 --
12 -- Utilities for primitive marshalling of C strings.
13 --
14 -- The marshalling converts each Haskell character, representing a Unicode
15 -- code point, to one or more bytes in a manner that, by default, is
16 -- determined by the current locale. As a consequence, no guarantees
17 -- can be made about the relative length of a Haskell string and its
18 -- corresponding C string, and therefore all the marshalling routines
19 -- include memory allocation. The translation between Unicode and the
20 -- encoding of the current locale may be lossy.
21 --
22 -----------------------------------------------------------------------------
23
24 module Foreign.C.String ( -- representation of strings in C
25
26 -- * C strings
27
28 CString, -- = Ptr CChar
29 CStringLen, -- = (Ptr CChar, Int)
30
31 -- ** Using a locale-dependent encoding
32
33 -- | Currently these functions are identical to their @CAString@ counterparts;
34 -- eventually they will use an encoding determined by the current locale.
35
36 -- conversion of C strings into Haskell strings
37 --
38 peekCString, -- :: CString -> IO String
39 peekCStringLen, -- :: CStringLen -> IO String
40
41 -- conversion of Haskell strings into C strings
42 --
43 newCString, -- :: String -> IO CString
44 newCStringLen, -- :: String -> IO CStringLen
45
46 -- conversion of Haskell strings into C strings using temporary storage
47 --
48 withCString, -- :: String -> (CString -> IO a) -> IO a
49 withCStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
50
51 charIsRepresentable, -- :: Char -> IO Bool
52
53 -- ** Using 8-bit characters
54
55 -- | These variants of the above functions are for use with C libraries
56 -- that are ignorant of Unicode. These functions should be used with
57 -- care, as a loss of information can occur.
58
59 castCharToCChar, -- :: Char -> CChar
60 castCCharToChar, -- :: CChar -> Char
61
62 peekCAString, -- :: CString -> IO String
63 peekCAStringLen, -- :: CStringLen -> IO String
64 newCAString, -- :: String -> IO CString
65 newCAStringLen, -- :: String -> IO CStringLen
66 withCAString, -- :: String -> (CString -> IO a) -> IO a
67 withCAStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
68
69 -- * C wide strings
70
71 -- | These variants of the above functions are for use with C libraries
72 -- that encode Unicode using the C @wchar_t@ type in a system-dependent
73 -- way. The only encodings supported are
74 --
75 -- * UTF-32 (the C compiler defines @__STDC_ISO_10646__@), or
76 --
77 -- * UTF-16 (as used on Windows systems).
78
79 CWString, -- = Ptr CWchar
80 CWStringLen, -- = (Ptr CWchar, Int)
81
82 peekCWString, -- :: CWString -> IO String
83 peekCWStringLen, -- :: CWStringLen -> IO String
84 newCWString, -- :: String -> IO CWString
85 newCWStringLen, -- :: String -> IO CWStringLen
86 withCWString, -- :: String -> (CWString -> IO a) -> IO a
87 withCWStringLen, -- :: String -> (CWStringLen -> IO a) -> IO a
88
89 ) where
90
91 import Foreign.Marshal.Array
92 import Foreign.C.Types
93 import Foreign.Ptr
94 import Foreign.Storable
95
96 import Data.Word
97
98 #ifdef __GLASGOW_HASKELL__
99 import GHC.List
100 import GHC.Real
101 import GHC.Num
102 import GHC.IOBase
103 import GHC.Base
104 #else
105 import Data.Char ( chr, ord )
106 #define unsafeChr chr
107 #endif
108
109 -----------------------------------------------------------------------------
110 -- Strings
111
112 -- representation of strings in C
113 -- ------------------------------
114
115 -- | A C string is a reference to an array of C characters terminated by NUL.
116 type CString = Ptr CChar
117
118 -- | A string with explicit length information in bytes instead of a
119 -- terminating NUL (allowing NUL characters in the middle of the string).
120 type CStringLen = (Ptr CChar, Int)
121
122 -- exported functions
123 -- ------------------
124 --
125 -- * the following routines apply the default conversion when converting the
126 -- C-land character encoding into the Haskell-land character encoding
127
128 -- | Marshal a NUL terminated C string into a Haskell string.
129 --
130 peekCString :: CString -> IO String
131 peekCString = peekCAString
132
133 -- | Marshal a C string with explicit length into a Haskell string.
134 --
135 peekCStringLen :: CStringLen -> IO String
136 peekCStringLen = peekCAStringLen
137
138 -- | Marshal a Haskell string into a NUL terminated C string.
139 --
140 -- * the Haskell string may /not/ contain any NUL characters
141 --
142 -- * new storage is allocated for the C string and must be
143 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
144 -- 'Foreign.Marshal.Alloc.finalizerFree'.
145 --
146 newCString :: String -> IO CString
147 newCString = newCAString
148
149 -- | Marshal a Haskell string into a C string (ie, character array) with
150 -- explicit length information.
151 --
152 -- * new storage is allocated for the C string and must be
153 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
154 -- 'Foreign.Marshal.Alloc.finalizerFree'.
155 --
156 newCStringLen :: String -> IO CStringLen
157 newCStringLen = newCAStringLen
158
159 -- | Marshal a Haskell string into a NUL terminated C string using temporary
160 -- storage.
161 --
162 -- * the Haskell string may /not/ contain any NUL characters
163 --
164 -- * the memory is freed when the subcomputation terminates (either
165 -- normally or via an exception), so the pointer to the temporary
166 -- storage must /not/ be used after this.
167 --
168 withCString :: String -> (CString -> IO a) -> IO a
169 withCString = withCAString
170
171 -- | Marshal a Haskell string into a C string (ie, character array)
172 -- in temporary storage, with explicit length information.
173 --
174 -- * the memory is freed when the subcomputation terminates (either
175 -- normally or via an exception), so the pointer to the temporary
176 -- storage must /not/ be used after this.
177 --
178 withCStringLen :: String -> (CStringLen -> IO a) -> IO a
179 withCStringLen = withCAStringLen
180
181 -- | Determines whether a character can be accurately encoded in a 'CString'.
182 -- Unrepresentable characters are converted to @\'?\'@.
183 --
184 -- Currently only Latin-1 characters are representable.
185 charIsRepresentable :: Char -> IO Bool
186 charIsRepresentable c = return (ord c < 256)
187
188 -- single byte characters
189 -- ----------------------
190 --
191 -- ** NOTE: These routines don't handle conversions! **
192
193 -- | Convert a C byte, representing a Latin-1 character, to the corresponding
194 -- Haskell character.
195 castCCharToChar :: CChar -> Char
196 castCCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
197
198 -- | Convert a Haskell character to a C character.
199 -- This function is only safe on the first 256 characters.
200 castCharToCChar :: Char -> CChar
201 castCharToCChar ch = fromIntegral (ord ch)
202
203 -- | Marshal a NUL terminated C string into a Haskell string.
204 --
205 peekCAString :: CString -> IO String
206 #ifndef __GLASGOW_HASKELL__
207 peekCAString cp = do
208 cs <- peekArray0 nUL cp
209 return (cCharsToChars cs)
210 #else
211 peekCAString cp = do
212 l <- lengthArray0 nUL cp
213 if l <= 0 then return "" else loop "" (l-1)
214 where
215 loop s i = do
216 xval <- peekElemOff cp i
217 let val = castCCharToChar xval
218 val `seq` if i <= 0 then return (val:s) else loop (val:s) (i-1)
219 #endif
220
221 -- | Marshal a C string with explicit length into a Haskell string.
222 --
223 peekCAStringLen :: CStringLen -> IO String
224 #ifndef __GLASGOW_HASKELL__
225 peekCAStringLen (cp, len) = do
226 cs <- peekArray len cp
227 return (cCharsToChars cs)
228 #else
229 peekCAStringLen (cp, len)
230 | len <= 0 = return "" -- being (too?) nice.
231 | otherwise = loop [] (len-1)
232 where
233 loop acc i = do
234 xval <- peekElemOff cp i
235 let val = castCCharToChar xval
236 -- blow away the coercion ASAP.
237 if (val `seq` (i == 0))
238 then return (val:acc)
239 else loop (val:acc) (i-1)
240 #endif
241
242 -- | Marshal a Haskell string into a NUL terminated C string.
243 --
244 -- * the Haskell string may /not/ contain any NUL characters
245 --
246 -- * new storage is allocated for the C string and must be
247 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
248 -- 'Foreign.Marshal.Alloc.finalizerFree'.
249 --
250 newCAString :: String -> IO CString
251 #ifndef __GLASGOW_HASKELL__
252 newCAString = newArray0 nUL . charsToCChars
253 #else
254 newCAString str = do
255 ptr <- mallocArray0 (length str)
256 let
257 go [] n = pokeElemOff ptr n nUL
258 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
259 go str 0
260 return ptr
261 #endif
262
263 -- | Marshal a Haskell string into a C string (ie, character array) with
264 -- explicit length information.
265 --
266 -- * new storage is allocated for the C string and must be
267 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
268 -- 'Foreign.Marshal.Alloc.finalizerFree'.
269 --
270 newCAStringLen :: String -> IO CStringLen
271 #ifndef __GLASGOW_HASKELL__
272 newCAStringLen str = newArrayLen (charsToCChars str)
273 #else
274 newCAStringLen str = do
275 ptr <- mallocArray0 len
276 let
277 go [] n = n `seq` return () -- make it strict in n
278 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
279 go str 0
280 return (ptr, len)
281 where
282 len = length str
283 #endif
284
285 -- | Marshal a Haskell string into a NUL terminated C string using temporary
286 -- storage.
287 --
288 -- * the Haskell string may /not/ contain any NUL characters
289 --
290 -- * the memory is freed when the subcomputation terminates (either
291 -- normally or via an exception), so the pointer to the temporary
292 -- storage must /not/ be used after this.
293 --
294 withCAString :: String -> (CString -> IO a) -> IO a
295 #ifndef __GLASGOW_HASKELL__
296 withCAString = withArray0 nUL . charsToCChars
297 #else
298 withCAString str f =
299 allocaArray0 (length str) $ \ptr ->
300 let
301 go [] n = pokeElemOff ptr n nUL
302 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
303 in do
304 go str 0
305 f ptr
306 #endif
307
308 -- | Marshal a Haskell string into a C string (ie, character array)
309 -- in temporary storage, with explicit length information.
310 --
311 -- * the memory is freed when the subcomputation terminates (either
312 -- normally or via an exception), so the pointer to the temporary
313 -- storage must /not/ be used after this.
314 --
315 withCAStringLen :: String -> (CStringLen -> IO a) -> IO a
316 withCAStringLen str f =
317 #ifndef __GLASGOW_HASKELL__
318 withArrayLen (charsToCChars str) $ \ len ptr -> f (ptr, len)
319 #else
320 allocaArray len $ \ptr ->
321 let
322 go [] n = n `seq` return () -- make it strict in n
323 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
324 in do
325 go str 0
326 f (ptr,len)
327 where
328 len = length str
329 #endif
330
331 -- auxiliary definitions
332 -- ----------------------
333
334 -- C's end of string character
335 --
336 nUL :: CChar
337 nUL = 0
338
339 -- allocate an array to hold the list and pair it with the number of elements
340 newArrayLen :: Storable a => [a] -> IO (Ptr a, Int)
341 newArrayLen xs = do
342 a <- newArray xs
343 return (a, length xs)
344
345 #ifndef __GLASGOW_HASKELL__
346 -- cast [CChar] to [Char]
347 --
348 cCharsToChars :: [CChar] -> [Char]
349 cCharsToChars xs = map castCCharToChar xs
350
351 -- cast [Char] to [CChar]
352 --
353 charsToCChars :: [Char] -> [CChar]
354 charsToCChars xs = map castCharToCChar xs
355 #endif
356
357 -----------------------------------------------------------------------------
358 -- Wide strings
359
360 -- representation of wide strings in C
361 -- -----------------------------------
362
363 -- | A C wide string is a reference to an array of C wide characters
364 -- terminated by NUL.
365 type CWString = Ptr CWchar
366
367 -- | A wide character string with explicit length information in 'CWchar's
368 -- instead of a terminating NUL (allowing NUL characters in the middle
369 -- of the string).
370 type CWStringLen = (Ptr CWchar, Int)
371
372 -- | Marshal a NUL terminated C wide string into a Haskell string.
373 --
374 peekCWString :: CWString -> IO String
375 peekCWString cp = do
376 cs <- peekArray0 wNUL cp
377 return (cWcharsToChars cs)
378
379 -- | Marshal a C wide string with explicit length into a Haskell string.
380 --
381 peekCWStringLen :: CWStringLen -> IO String
382 peekCWStringLen (cp, len) = do
383 cs <- peekArray len cp
384 return (cWcharsToChars cs)
385
386 -- | Marshal a Haskell string into a NUL terminated C wide string.
387 --
388 -- * the Haskell string may /not/ contain any NUL characters
389 --
390 -- * new storage is allocated for the C wide string and must
391 -- be explicitly freed using 'Foreign.Marshal.Alloc.free' or
392 -- 'Foreign.Marshal.Alloc.finalizerFree'.
393 --
394 newCWString :: String -> IO CWString
395 newCWString = newArray0 wNUL . charsToCWchars
396
397 -- | Marshal a Haskell string into a C wide string (ie, wide character array)
398 -- with explicit length information.
399 --
400 -- * new storage is allocated for the C wide string and must
401 -- be explicitly freed using 'Foreign.Marshal.Alloc.free' or
402 -- 'Foreign.Marshal.Alloc.finalizerFree'.
403 --
404 newCWStringLen :: String -> IO CWStringLen
405 newCWStringLen str = newArrayLen (charsToCWchars str)
406
407 -- | Marshal a Haskell string into a NUL terminated C wide string using
408 -- temporary storage.
409 --
410 -- * the Haskell string may /not/ contain any NUL characters
411 --
412 -- * the memory is freed when the subcomputation terminates (either
413 -- normally or via an exception), so the pointer to the temporary
414 -- storage must /not/ be used after this.
415 --
416 withCWString :: String -> (CWString -> IO a) -> IO a
417 withCWString = withArray0 wNUL . charsToCWchars
418
419 -- | Marshal a Haskell string into a NUL terminated C wide string using
420 -- temporary storage.
421 --
422 -- * the Haskell string may /not/ contain any NUL characters
423 --
424 -- * the memory is freed when the subcomputation terminates (either
425 -- normally or via an exception), so the pointer to the temporary
426 -- storage must /not/ be used after this.
427 --
428 withCWStringLen :: String -> (CWStringLen -> IO a) -> IO a
429 withCWStringLen str f =
430 withArrayLen (charsToCWchars str) $ \ len ptr -> f (ptr, len)
431
432 -- auxiliary definitions
433 -- ----------------------
434
435 wNUL :: CWchar
436 wNUL = 0
437
438 cWcharsToChars :: [CWchar] -> [Char]
439 charsToCWchars :: [Char] -> [CWchar]
440
441 #ifdef mingw32_HOST_OS
442
443 -- On Windows, wchar_t is 16 bits wide and CWString uses the UTF-16 encoding.
444
445 -- coding errors generate Chars in the surrogate range
446 cWcharsToChars = map chr . fromUTF16 . map fromIntegral
447 where
448 fromUTF16 (c1:c2:wcs)
449 | 0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff =
450 ((c1 - 0xd800)*0x400 + (c2 - 0xdc00) + 0x10000) : fromUTF16 wcs
451 fromUTF16 (c:wcs) = c : fromUTF16 wcs
452 fromUTF16 [] = []
453
454 charsToCWchars = foldr utf16Char [] . map ord
455 where
456 utf16Char c wcs
457 | c < 0x10000 = fromIntegral c : wcs
458 | otherwise = let c' = c - 0x10000 in
459 fromIntegral (c' `div` 0x400 + 0xd800) :
460 fromIntegral (c' `mod` 0x400 + 0xdc00) : wcs
461
462 #else /* !mingw32_HOST_OS */
463
464 cWcharsToChars xs = map castCWcharToChar xs
465 charsToCWchars xs = map castCharToCWchar xs
466
467 -- These conversions only make sense if __STDC_ISO_10646__ is defined
468 -- (meaning that wchar_t is ISO 10646, aka Unicode)
469
470 castCWcharToChar :: CWchar -> Char
471 castCWcharToChar ch = chr (fromIntegral ch )
472
473 castCharToCWchar :: Char -> CWchar
474 castCharToCWchar ch = fromIntegral (ord ch)
475
476 #endif /* !mingw32_HOST_OS */