Use explicit language extensions & remove extension fields from base.cabal
[packages/base.git] / Foreign / C / String.hs
1 {-# LANGUAGE CPP, NoImplicitPrelude #-}
2
3 -----------------------------------------------------------------------------
4 -- |
5 -- Module : Foreign.C.String
6 -- Copyright : (c) The FFI task force 2001
7 -- License : BSD-style (see the file libraries/base/LICENSE)
8 --
9 -- Maintainer : ffi@haskell.org
10 -- Stability : provisional
11 -- Portability : portable
12 --
13 -- Utilities for primitive marshalling of C strings.
14 --
15 -- The marshalling converts each Haskell character, representing a Unicode
16 -- code point, to one or more bytes in a manner that, by default, is
17 -- determined by the current locale. As a consequence, no guarantees
18 -- can be made about the relative length of a Haskell string and its
19 -- corresponding C string, and therefore all the marshalling routines
20 -- include memory allocation. The translation between Unicode and the
21 -- encoding of the current locale may be lossy.
22 --
23 -----------------------------------------------------------------------------
24
25 module Foreign.C.String ( -- representation of strings in C
26
27 -- * C strings
28
29 CString, -- = Ptr CChar
30 CStringLen, -- = (Ptr CChar, Int)
31
32 -- ** Using a locale-dependent encoding
33
34 -- | Currently these functions are identical to their @CAString@ counterparts;
35 -- eventually they will use an encoding determined by the current locale.
36
37 -- conversion of C strings into Haskell strings
38 --
39 peekCString, -- :: CString -> IO String
40 peekCStringLen, -- :: CStringLen -> IO String
41
42 -- conversion of Haskell strings into C strings
43 --
44 newCString, -- :: String -> IO CString
45 newCStringLen, -- :: String -> IO CStringLen
46
47 -- conversion of Haskell strings into C strings using temporary storage
48 --
49 withCString, -- :: String -> (CString -> IO a) -> IO a
50 withCStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
51
52 charIsRepresentable, -- :: Char -> IO Bool
53
54 -- ** Using 8-bit characters
55
56 -- | These variants of the above functions are for use with C libraries
57 -- that are ignorant of Unicode. These functions should be used with
58 -- care, as a loss of information can occur.
59
60 castCharToCChar, -- :: Char -> CChar
61 castCCharToChar, -- :: CChar -> Char
62
63 castCharToCUChar, -- :: Char -> CUChar
64 castCUCharToChar, -- :: CUChar -> Char
65 castCharToCSChar, -- :: Char -> CSChar
66 castCSCharToChar, -- :: CSChar -> Char
67
68 peekCAString, -- :: CString -> IO String
69 peekCAStringLen, -- :: CStringLen -> IO String
70 newCAString, -- :: String -> IO CString
71 newCAStringLen, -- :: String -> IO CStringLen
72 withCAString, -- :: String -> (CString -> IO a) -> IO a
73 withCAStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
74
75 -- * C wide strings
76
77 -- | These variants of the above functions are for use with C libraries
78 -- that encode Unicode using the C @wchar_t@ type in a system-dependent
79 -- way. The only encodings supported are
80 --
81 -- * UTF-32 (the C compiler defines @__STDC_ISO_10646__@), or
82 --
83 -- * UTF-16 (as used on Windows systems).
84
85 CWString, -- = Ptr CWchar
86 CWStringLen, -- = (Ptr CWchar, Int)
87
88 peekCWString, -- :: CWString -> IO String
89 peekCWStringLen, -- :: CWStringLen -> IO String
90 newCWString, -- :: String -> IO CWString
91 newCWStringLen, -- :: String -> IO CWStringLen
92 withCWString, -- :: String -> (CWString -> IO a) -> IO a
93 withCWStringLen, -- :: String -> (CWStringLen -> IO a) -> IO a
94
95 ) where
96
97 import Foreign.Marshal.Array
98 import Foreign.C.Types
99 import Foreign.Ptr
100 import Foreign.Storable
101
102 import Data.Word
103
104 #ifdef __GLASGOW_HASKELL__
105 import GHC.List
106 import GHC.Real
107 import GHC.Num
108 import GHC.Base
109 #else
110 import Data.Char ( chr, ord )
111 #define unsafeChr chr
112 #endif
113
114 -----------------------------------------------------------------------------
115 -- Strings
116
117 -- representation of strings in C
118 -- ------------------------------
119
120 -- | A C string is a reference to an array of C characters terminated by NUL.
121 type CString = Ptr CChar
122
123 -- | A string with explicit length information in bytes instead of a
124 -- terminating NUL (allowing NUL characters in the middle of the string).
125 type CStringLen = (Ptr CChar, Int)
126
127 -- exported functions
128 -- ------------------
129 --
130 -- * the following routines apply the default conversion when converting the
131 -- C-land character encoding into the Haskell-land character encoding
132
133 -- | Marshal a NUL terminated C string into a Haskell string.
134 --
135 peekCString :: CString -> IO String
136 peekCString = peekCAString
137
138 -- | Marshal a C string with explicit length into a Haskell string.
139 --
140 peekCStringLen :: CStringLen -> IO String
141 peekCStringLen = peekCAStringLen
142
143 -- | Marshal a Haskell string into a NUL terminated C string.
144 --
145 -- * the Haskell string may /not/ contain any NUL characters
146 --
147 -- * new storage is allocated for the C string and must be
148 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
149 -- 'Foreign.Marshal.Alloc.finalizerFree'.
150 --
151 newCString :: String -> IO CString
152 newCString = newCAString
153
154 -- | Marshal a Haskell string into a C string (ie, character array) with
155 -- explicit length information.
156 --
157 -- * new storage is allocated for the C string and must be
158 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
159 -- 'Foreign.Marshal.Alloc.finalizerFree'.
160 --
161 newCStringLen :: String -> IO CStringLen
162 newCStringLen = newCAStringLen
163
164 -- | Marshal a Haskell string into a NUL terminated C string using temporary
165 -- storage.
166 --
167 -- * the Haskell string may /not/ contain any NUL characters
168 --
169 -- * the memory is freed when the subcomputation terminates (either
170 -- normally or via an exception), so the pointer to the temporary
171 -- storage must /not/ be used after this.
172 --
173 withCString :: String -> (CString -> IO a) -> IO a
174 withCString = withCAString
175
176 -- | Marshal a Haskell string into a C string (ie, character array)
177 -- in temporary storage, with explicit length information.
178 --
179 -- * the memory is freed when the subcomputation terminates (either
180 -- normally or via an exception), so the pointer to the temporary
181 -- storage must /not/ be used after this.
182 --
183 withCStringLen :: String -> (CStringLen -> IO a) -> IO a
184 withCStringLen = withCAStringLen
185
186 -- | Determines whether a character can be accurately encoded in a 'CString'.
187 -- Unrepresentable characters are converted to @\'?\'@.
188 --
189 -- Currently only Latin-1 characters are representable.
190 charIsRepresentable :: Char -> IO Bool
191 charIsRepresentable c = return (ord c < 256)
192
193 -- single byte characters
194 -- ----------------------
195 --
196 -- ** NOTE: These routines don't handle conversions! **
197
198 -- | Convert a C byte, representing a Latin-1 character, to the corresponding
199 -- Haskell character.
200 castCCharToChar :: CChar -> Char
201 castCCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
202
203 -- | Convert a Haskell character to a C character.
204 -- This function is only safe on the first 256 characters.
205 castCharToCChar :: Char -> CChar
206 castCharToCChar ch = fromIntegral (ord ch)
207
208 -- | Convert a C @unsigned char@, representing a Latin-1 character, to
209 -- the corresponding Haskell character.
210 castCUCharToChar :: CUChar -> Char
211 castCUCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
212
213 -- | Convert a Haskell character to a C @unsigned char@.
214 -- This function is only safe on the first 256 characters.
215 castCharToCUChar :: Char -> CUChar
216 castCharToCUChar ch = fromIntegral (ord ch)
217
218 -- | Convert a C @signed char@, representing a Latin-1 character, to the
219 -- corresponding Haskell character.
220 castCSCharToChar :: CSChar -> Char
221 castCSCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
222
223 -- | Convert a Haskell character to a C @signed char@.
224 -- This function is only safe on the first 256 characters.
225 castCharToCSChar :: Char -> CSChar
226 castCharToCSChar ch = fromIntegral (ord ch)
227
228 -- | Marshal a NUL terminated C string into a Haskell string.
229 --
230 peekCAString :: CString -> IO String
231 #ifndef __GLASGOW_HASKELL__
232 peekCAString cp = do
233 cs <- peekArray0 nUL cp
234 return (cCharsToChars cs)
235 #else
236 peekCAString cp = do
237 l <- lengthArray0 nUL cp
238 if l <= 0 then return "" else loop "" (l-1)
239 where
240 loop s i = do
241 xval <- peekElemOff cp i
242 let val = castCCharToChar xval
243 val `seq` if i <= 0 then return (val:s) else loop (val:s) (i-1)
244 #endif
245
246 -- | Marshal a C string with explicit length into a Haskell string.
247 --
248 peekCAStringLen :: CStringLen -> IO String
249 #ifndef __GLASGOW_HASKELL__
250 peekCAStringLen (cp, len) = do
251 cs <- peekArray len cp
252 return (cCharsToChars cs)
253 #else
254 peekCAStringLen (cp, len)
255 | len <= 0 = return "" -- being (too?) nice.
256 | otherwise = loop [] (len-1)
257 where
258 loop acc i = do
259 xval <- peekElemOff cp i
260 let val = castCCharToChar xval
261 -- blow away the coercion ASAP.
262 if (val `seq` (i == 0))
263 then return (val:acc)
264 else loop (val:acc) (i-1)
265 #endif
266
267 -- | Marshal a Haskell string into a NUL terminated C string.
268 --
269 -- * the Haskell string may /not/ contain any NUL characters
270 --
271 -- * new storage is allocated for the C string and must be
272 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
273 -- 'Foreign.Marshal.Alloc.finalizerFree'.
274 --
275 newCAString :: String -> IO CString
276 #ifndef __GLASGOW_HASKELL__
277 newCAString = newArray0 nUL . charsToCChars
278 #else
279 newCAString str = do
280 ptr <- mallocArray0 (length str)
281 let
282 go [] n = pokeElemOff ptr n nUL
283 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
284 go str 0
285 return ptr
286 #endif
287
288 -- | Marshal a Haskell string into a C string (ie, character array) with
289 -- explicit length information.
290 --
291 -- * new storage is allocated for the C string and must be
292 -- explicitly freed using 'Foreign.Marshal.Alloc.free' or
293 -- 'Foreign.Marshal.Alloc.finalizerFree'.
294 --
295 newCAStringLen :: String -> IO CStringLen
296 #ifndef __GLASGOW_HASKELL__
297 newCAStringLen str = newArrayLen (charsToCChars str)
298 #else
299 newCAStringLen str = do
300 ptr <- mallocArray0 len
301 let
302 go [] n = n `seq` return () -- make it strict in n
303 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
304 go str 0
305 return (ptr, len)
306 where
307 len = length str
308 #endif
309
310 -- | Marshal a Haskell string into a NUL terminated C string using temporary
311 -- storage.
312 --
313 -- * the Haskell string may /not/ contain any NUL characters
314 --
315 -- * the memory is freed when the subcomputation terminates (either
316 -- normally or via an exception), so the pointer to the temporary
317 -- storage must /not/ be used after this.
318 --
319 withCAString :: String -> (CString -> IO a) -> IO a
320 #ifndef __GLASGOW_HASKELL__
321 withCAString = withArray0 nUL . charsToCChars
322 #else
323 withCAString str f =
324 allocaArray0 (length str) $ \ptr ->
325 let
326 go [] n = pokeElemOff ptr n nUL
327 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
328 in do
329 go str 0
330 f ptr
331 #endif
332
333 -- | Marshal a Haskell string into a C string (ie, character array)
334 -- in temporary storage, with explicit length information.
335 --
336 -- * the memory is freed when the subcomputation terminates (either
337 -- normally or via an exception), so the pointer to the temporary
338 -- storage must /not/ be used after this.
339 --
340 withCAStringLen :: String -> (CStringLen -> IO a) -> IO a
341 withCAStringLen str f =
342 #ifndef __GLASGOW_HASKELL__
343 withArrayLen (charsToCChars str) $ \ len ptr -> f (ptr, len)
344 #else
345 allocaArray len $ \ptr ->
346 let
347 go [] n = n `seq` return () -- make it strict in n
348 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
349 in do
350 go str 0
351 f (ptr,len)
352 where
353 len = length str
354 #endif
355
356 -- auxiliary definitions
357 -- ----------------------
358
359 -- C's end of string character
360 --
361 nUL :: CChar
362 nUL = 0
363
364 -- allocate an array to hold the list and pair it with the number of elements
365 newArrayLen :: Storable a => [a] -> IO (Ptr a, Int)
366 newArrayLen xs = do
367 a <- newArray xs
368 return (a, length xs)
369
370 #ifndef __GLASGOW_HASKELL__
371 -- cast [CChar] to [Char]
372 --
373 cCharsToChars :: [CChar] -> [Char]
374 cCharsToChars xs = map castCCharToChar xs
375
376 -- cast [Char] to [CChar]
377 --
378 charsToCChars :: [Char] -> [CChar]
379 charsToCChars xs = map castCharToCChar xs
380 #endif
381
382 -----------------------------------------------------------------------------
383 -- Wide strings
384
385 -- representation of wide strings in C
386 -- -----------------------------------
387
388 -- | A C wide string is a reference to an array of C wide characters
389 -- terminated by NUL.
390 type CWString = Ptr CWchar
391
392 -- | A wide character string with explicit length information in 'CWchar's
393 -- instead of a terminating NUL (allowing NUL characters in the middle
394 -- of the string).
395 type CWStringLen = (Ptr CWchar, Int)
396
397 -- | Marshal a NUL terminated C wide string into a Haskell string.
398 --
399 peekCWString :: CWString -> IO String
400 peekCWString cp = do
401 cs <- peekArray0 wNUL cp
402 return (cWcharsToChars cs)
403
404 -- | Marshal a C wide string with explicit length into a Haskell string.
405 --
406 peekCWStringLen :: CWStringLen -> IO String
407 peekCWStringLen (cp, len) = do
408 cs <- peekArray len cp
409 return (cWcharsToChars cs)
410
411 -- | Marshal a Haskell string into a NUL terminated C wide string.
412 --
413 -- * the Haskell string may /not/ contain any NUL characters
414 --
415 -- * new storage is allocated for the C wide string and must
416 -- be explicitly freed using 'Foreign.Marshal.Alloc.free' or
417 -- 'Foreign.Marshal.Alloc.finalizerFree'.
418 --
419 newCWString :: String -> IO CWString
420 newCWString = newArray0 wNUL . charsToCWchars
421
422 -- | Marshal a Haskell string into a C wide string (ie, wide character array)
423 -- with explicit length information.
424 --
425 -- * new storage is allocated for the C wide string and must
426 -- be explicitly freed using 'Foreign.Marshal.Alloc.free' or
427 -- 'Foreign.Marshal.Alloc.finalizerFree'.
428 --
429 newCWStringLen :: String -> IO CWStringLen
430 newCWStringLen str = newArrayLen (charsToCWchars str)
431
432 -- | Marshal a Haskell string into a NUL terminated C wide string using
433 -- temporary storage.
434 --
435 -- * the Haskell string may /not/ contain any NUL characters
436 --
437 -- * the memory is freed when the subcomputation terminates (either
438 -- normally or via an exception), so the pointer to the temporary
439 -- storage must /not/ be used after this.
440 --
441 withCWString :: String -> (CWString -> IO a) -> IO a
442 withCWString = withArray0 wNUL . charsToCWchars
443
444 -- | Marshal a Haskell string into a NUL terminated C wide string using
445 -- temporary storage.
446 --
447 -- * the Haskell string may /not/ contain any NUL characters
448 --
449 -- * the memory is freed when the subcomputation terminates (either
450 -- normally or via an exception), so the pointer to the temporary
451 -- storage must /not/ be used after this.
452 --
453 withCWStringLen :: String -> (CWStringLen -> IO a) -> IO a
454 withCWStringLen str f =
455 withArrayLen (charsToCWchars str) $ \ len ptr -> f (ptr, len)
456
457 -- auxiliary definitions
458 -- ----------------------
459
460 wNUL :: CWchar
461 wNUL = 0
462
463 cWcharsToChars :: [CWchar] -> [Char]
464 charsToCWchars :: [Char] -> [CWchar]
465
466 #ifdef mingw32_HOST_OS
467
468 -- On Windows, wchar_t is 16 bits wide and CWString uses the UTF-16 encoding.
469
470 -- coding errors generate Chars in the surrogate range
471 cWcharsToChars = map chr . fromUTF16 . map fromIntegral
472 where
473 fromUTF16 (c1:c2:wcs)
474 | 0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff =
475 ((c1 - 0xd800)*0x400 + (c2 - 0xdc00) + 0x10000) : fromUTF16 wcs
476 fromUTF16 (c:wcs) = c : fromUTF16 wcs
477 fromUTF16 [] = []
478
479 charsToCWchars = foldr utf16Char [] . map ord
480 where
481 utf16Char c wcs
482 | c < 0x10000 = fromIntegral c : wcs
483 | otherwise = let c' = c - 0x10000 in
484 fromIntegral (c' `div` 0x400 + 0xd800) :
485 fromIntegral (c' `mod` 0x400 + 0xdc00) : wcs
486
487 #else /* !mingw32_HOST_OS */
488
489 cWcharsToChars xs = map castCWcharToChar xs
490 charsToCWchars xs = map castCharToCWchar xs
491
492 -- These conversions only make sense if __STDC_ISO_10646__ is defined
493 -- (meaning that wchar_t is ISO 10646, aka Unicode)
494
495 castCWcharToChar :: CWchar -> Char
496 castCWcharToChar ch = chr (fromIntegral ch )
497
498 castCharToCWchar :: Char -> CWchar
499 castCharToCWchar ch = fromIntegral (ord ch)
500
501 #endif /* !mingw32_HOST_OS */