[project @ 2004-06-22 12:45:55 by ross]
[packages/old-time.git] / Foreign / C / String.hs
1 {-# OPTIONS -fno-implicit-prelude #-}
2 -----------------------------------------------------------------------------
3 -- |
4 -- Module : Foreign.C.String
5 -- Copyright : (c) The FFI task force 2001
6 -- License : BSD-style (see the file libraries/base/LICENSE)
7 --
8 -- Maintainer : ffi@haskell.org
9 -- Stability : provisional
10 -- Portability : portable
11 --
12 -- Utilities for primitive marshalling of C strings.
13 --
14 -- The marshalling converts each Haskell character, representing a Unicode
15 -- code point, to one or more bytes in a manner that, by default, is
16 -- determined by the current locale. As a consequence, no guarantees
17 -- can be made about the relative length of a Haskell string and its
18 -- corresponding C string, and therefore all the marshalling routines
19 -- include memory allocation. The translation between Unicode and the
20 -- encoding of the current locale may be lossy.
21 --
22 -----------------------------------------------------------------------------
23
24 module Foreign.C.String ( -- representation of strings in C
25
26 -- * C strings
27
28 CString, -- = Ptr CChar
29 CStringLen, -- = (Ptr CChar, Int)
30
31 -- ** Using a locale-dependent encoding
32
33 -- | Currently these functions are identical to their @CAString@ counterparts;
34 -- eventually they will use an encoding determined by the current locale.
35
36 -- conversion of C strings into Haskell strings
37 --
38 peekCString, -- :: CString -> IO String
39 peekCStringLen, -- :: CStringLen -> IO String
40
41 -- conversion of Haskell strings into C strings
42 --
43 newCString, -- :: String -> IO CString
44 newCStringLen, -- :: String -> IO CStringLen
45
46 -- conversion of Haskell strings into C strings using temporary storage
47 --
48 withCString, -- :: String -> (CString -> IO a) -> IO a
49 withCStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
50
51 charIsRepresentable, -- :: Char -> IO Bool
52
53 -- ** Using 8-bit characters
54
55 -- | These variants of the above functions are for use with C libraries
56 -- that are ignorant of Unicode. These functions should be used with
57 -- care, as a loss of information can occur.
58
59 castCharToCChar, -- :: Char -> CChar
60 castCCharToChar, -- :: CChar -> Char
61
62 peekCAString, -- :: CString -> IO String
63 peekCAStringLen, -- :: CStringLen -> IO String
64 newCAString, -- :: String -> IO CString
65 newCAStringLen, -- :: String -> IO CStringLen
66 withCAString, -- :: String -> (CString -> IO a) -> IO a
67 withCAStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
68
69 -- * C wide strings
70
71 -- | These variants of the above functions are for use with C libraries
72 -- that encode Unicode using the C @wchar_t@ type in a system-dependent
73 -- way. The only encodings supported are
74 --
75 -- * UTF-32 (the C compiler defines @__STDC_ISO_10646__@), or
76 --
77 -- * UTF-16 (as used on Windows systems).
78
79 CWString, -- = Ptr CWchar
80 CWStringLen, -- = (Ptr CWchar, Int)
81
82 peekCWString, -- :: CWString -> IO String
83 peekCWStringLen, -- :: CWStringLen -> IO String
84 newCWString, -- :: String -> IO CWString
85 newCWStringLen, -- :: String -> IO CWStringLen
86 withCWString, -- :: String -> (CWString -> IO a) -> IO a
87 withCWStringLen, -- :: String -> (CWStringLen -> IO a) -> IO a
88
89 ) where
90
91 import Foreign.Marshal.Array
92 import Foreign.C.Types
93 import Foreign.Ptr
94 import Foreign.Storable
95
96 import Data.Word
97
98 #ifdef __GLASGOW_HASKELL__
99 import GHC.List
100 import GHC.Real
101 import GHC.Num
102 import GHC.IOBase
103 import GHC.Base
104 #else
105 import Data.Char ( chr, ord )
106 #define unsafeChr chr
107 #endif
108
109 -----------------------------------------------------------------------------
110 -- Strings
111
112 -- representation of strings in C
113 -- ------------------------------
114
115 -- | A C string is a reference to an array of C characters terminated by NUL.
116 type CString = Ptr CChar
117
118 -- | A string with explicit length information in bytes instead of a
119 -- terminating NUL (allowing NUL characters in the middle of the string).
120 type CStringLen = (Ptr CChar, Int)
121
122 -- exported functions
123 -- ------------------
124 --
125 -- * the following routines apply the default conversion when converting the
126 -- C-land character encoding into the Haskell-land character encoding
127
128 -- | Marshal a NUL terminated C string into a Haskell string.
129 --
130 peekCString :: CString -> IO String
131 peekCString = peekCAString
132
133 -- | Marshal a C string with explicit length into a Haskell string.
134 --
135 peekCStringLen :: CStringLen -> IO String
136 peekCStringLen = peekCAStringLen
137
138 -- | Marshal a Haskell string into a NUL terminated C string.
139 --
140 -- * the Haskell string may /not/ contain any NUL characters
141 --
142 -- * new storage is allocated for the C string and must be explicitly freed
143 --
144 newCString :: String -> IO CString
145 newCString = newCAString
146
147 -- | Marshal a Haskell string into a C string (ie, character array) with
148 -- explicit length information.
149 --
150 -- * new storage is allocated for the C string and must be explicitly freed
151 --
152 newCStringLen :: String -> IO CStringLen
153 newCStringLen = newCAStringLen
154
155 -- | Marshal a Haskell string into a NUL terminated C string using temporary
156 -- storage.
157 --
158 -- * the Haskell string may /not/ contain any NUL characters
159 --
160 -- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
161 --
162 withCString :: String -> (CString -> IO a) -> IO a
163 withCString = withCAString
164
165 -- | Marshal a Haskell string into a NUL terminated C string using temporary
166 -- storage.
167 --
168 -- * the Haskell string may /not/ contain any NUL characters
169 --
170 -- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
171 --
172 withCStringLen :: String -> (CStringLen -> IO a) -> IO a
173 withCStringLen = withCAStringLen
174
175 -- | Determines whether a character can be accurately encoded in a 'CString'.
176 -- Unrepresentable characters are converted to @\'?\'@.
177 --
178 -- Currently only Latin-1 characters are representable.
179 charIsRepresentable :: Char -> IO Bool
180 charIsRepresentable c = return (ord c < 256)
181
182 -- single byte characters
183 -- ----------------------
184 --
185 -- ** NOTE: These routines don't handle conversions! **
186
187 -- | Convert a C byte, representing a Latin-1 character, to the corresponding
188 -- Haskell character.
189 castCCharToChar :: CChar -> Char
190 castCCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
191
192 -- | Convert a Haskell character to a C character.
193 -- This function is only safe on the first 256 characters.
194 castCharToCChar :: Char -> CChar
195 castCharToCChar ch = fromIntegral (ord ch)
196
197 -- | Marshal a NUL terminated C string into a Haskell string.
198 --
199 peekCAString :: CString -> IO String
200 #ifndef __GLASGOW_HASKELL__
201 peekCAString cp = do
202 cs <- peekArray0 nUL cp
203 return (cCharsToChars cs)
204 #else
205 peekCAString cp = do
206 l <- lengthArray0 nUL cp
207 if l <= 0 then return "" else loop "" (l-1)
208 where
209 loop s i = do
210 xval <- peekElemOff cp i
211 let val = castCCharToChar xval
212 val `seq` if i <= 0 then return (val:s) else loop (val:s) (i-1)
213 #endif
214
215 -- | Marshal a C string with explicit length into a Haskell string.
216 --
217 peekCAStringLen :: CStringLen -> IO String
218 #ifndef __GLASGOW_HASKELL__
219 peekCAStringLen (cp, len) = do
220 cs <- peekArray len cp
221 return (cCharsToChars cs)
222 #else
223 peekCAStringLen (cp, len)
224 | len <= 0 = return "" -- being (too?) nice.
225 | otherwise = loop [] (len-1)
226 where
227 loop acc i = do
228 xval <- peekElemOff cp i
229 let val = castCCharToChar xval
230 -- blow away the coercion ASAP.
231 if (val `seq` (i == 0))
232 then return (val:acc)
233 else loop (val:acc) (i-1)
234 #endif
235
236 -- | Marshal a Haskell string into a NUL terminated C string.
237 --
238 -- * the Haskell string may /not/ contain any NUL characters
239 --
240 -- * new storage is allocated for the C string and must be explicitly freed
241 --
242 newCAString :: String -> IO CString
243 #ifndef __GLASGOW_HASKELL__
244 newCAString = newArray0 nUL . charsToCChars
245 #else
246 newCAString str = do
247 ptr <- mallocArray0 (length str)
248 let
249 go [] n = pokeElemOff ptr n nUL
250 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
251 go str 0
252 return ptr
253 #endif
254
255 -- | Marshal a Haskell string into a C string (ie, character array) with
256 -- explicit length information.
257 --
258 -- * new storage is allocated for the C string and must be explicitly freed
259 --
260 newCAStringLen :: String -> IO CStringLen
261 #ifndef __GLASGOW_HASKELL__
262 newCAStringLen str = do
263 a <- newArray (charsToCChars str)
264 return (pairLength str a)
265 #else
266 newCAStringLen str = do
267 ptr <- mallocArray0 len
268 let
269 go [] n = n `seq` return () -- make it strict in n
270 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
271 go str 0
272 return (ptr, len)
273 where
274 len = length str
275 #endif
276
277 -- | Marshal a Haskell string into a NUL terminated C string using temporary
278 -- storage.
279 --
280 -- * the Haskell string may /not/ contain any NUL characters
281 --
282 -- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
283 --
284 withCAString :: String -> (CString -> IO a) -> IO a
285 #ifndef __GLASGOW_HASKELL__
286 withCAString = withArray0 nUL . charsToCChars
287 #else
288 withCAString str f =
289 allocaArray0 (length str) $ \ptr ->
290 let
291 go [] n = pokeElemOff ptr n nUL
292 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
293 in do
294 go str 0
295 f ptr
296 #endif
297
298 -- | Marshal a Haskell string into a NUL terminated C string using temporary
299 -- storage.
300 --
301 -- * the Haskell string may /not/ contain any NUL characters
302 --
303 -- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
304 --
305 withCAStringLen :: String -> (CStringLen -> IO a) -> IO a
306 #ifndef __GLASGOW_HASKELL__
307 withCAStringLen str act = withArray (charsToCChars str) $ act . pairLength str
308 #else
309 withCAStringLen str f =
310 allocaArray len $ \ptr ->
311 let
312 go [] n = n `seq` return () -- make it strict in n
313 go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
314 in do
315 go str 0
316 f (ptr,len)
317 where
318 len = length str
319 #endif
320
321 -- auxiliary definitions
322 -- ----------------------
323
324 -- C's end of string character
325 --
326 nUL :: CChar
327 nUL = 0
328
329 -- pair a C string with the length of the given Haskell string
330 --
331 pairLength :: String -> a -> (a, Int)
332 pairLength = flip (,) . length
333
334 #ifndef __GLASGOW_HASKELL__
335 -- cast [CChar] to [Char]
336 --
337 cCharsToChars :: [CChar] -> [Char]
338 cCharsToChars xs = map castCCharToChar xs
339
340 -- cast [Char] to [CChar]
341 --
342 charsToCChars :: [Char] -> [CChar]
343 charsToCChars xs = map castCharToCChar xs
344 #endif
345
346 -----------------------------------------------------------------------------
347 -- Wide strings
348
349 -- representation of wide strings in C
350 -- -----------------------------------
351
352 -- | A C wide string is a reference to an array of C wide characters
353 -- terminated by NUL.
354 type CWString = Ptr CWchar
355
356 -- | A wide character string with explicit length information in bytes
357 -- instead of a terminating NUL (allowing NUL characters in the middle
358 -- of the string).
359 type CWStringLen = (Ptr CWchar, Int)
360
361 -- | Marshal a NUL terminated C wide string into a Haskell string.
362 --
363 peekCWString :: CWString -> IO String
364 peekCWString cp = do
365 cs <- peekArray0 wNUL cp
366 return (cWcharsToChars cs)
367
368 -- | Marshal a C wide string with explicit length into a Haskell string.
369 --
370 peekCWStringLen :: CWStringLen -> IO String
371 peekCWStringLen (cp, len) = do
372 cs <- peekArray len cp
373 return (cWcharsToChars cs)
374
375 -- | Marshal a Haskell string into a NUL terminated C wide string.
376 --
377 -- * the Haskell string may /not/ contain any NUL characters
378 --
379 -- * new storage is allocated for the C string and must be explicitly freed
380 --
381 newCWString :: String -> IO CWString
382 newCWString = newArray0 wNUL . charsToCWchars
383
384 -- | Marshal a Haskell string into a C wide string (ie, wide character array)
385 -- with explicit length information.
386 --
387 -- * new storage is allocated for the C string and must be explicitly freed
388 --
389 newCWStringLen :: String -> IO CWStringLen
390 newCWStringLen str = do
391 a <- newArray (charsToCWchars str)
392 return (pairLength str a)
393
394 -- | Marshal a Haskell string into a NUL terminated C wide string using
395 -- temporary storage.
396 --
397 -- * the Haskell string may /not/ contain any NUL characters
398 --
399 -- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
400 --
401 withCWString :: String -> (CWString -> IO a) -> IO a
402 withCWString = withArray0 wNUL . charsToCWchars
403
404 -- | Marshal a Haskell string into a NUL terminated C wide string using
405 -- temporary storage.
406 --
407 -- * the Haskell string may /not/ contain any NUL characters
408 --
409 -- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
410 --
411 withCWStringLen :: String -> (CWStringLen -> IO a) -> IO a
412 withCWStringLen str act = withArray (charsToCWchars str) $ act . pairLength str
413
414 -- auxiliary definitions
415 -- ----------------------
416
417 wNUL :: CWchar
418 wNUL = 0
419
420 cWcharsToChars :: [CWchar] -> [Char]
421 charsToCWchars :: [Char] -> [CWchar]
422
423 #ifdef mingw32_TARGET_OS
424
425 -- On Windows, wchar_t is 16 bits wide and CWString uses the UTF-16 encoding.
426
427 -- coding errors generate Chars in the surrogate range
428 cWcharsToChars = map chr . fromUTF16 . map fromIntegral
429 where
430 fromUTF16 (c1:c2:wcs)
431 | 0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff =
432 ((c1 - 0xd800)*0x400 + (c2 - 0xdc00) + 0x10000) : fromUTF16 wcs
433 fromUTF16 (c:wcs) = c : fromUTF16 wcs
434 fromUTF16 [] = []
435
436 charsToCWchars = foldr utf16Char [] . map ord
437 where
438 utf16Char c wcs
439 | c < 0x10000 = fromIntegral c : wcs
440 | otherwise = let c' = c - 0x10000 in
441 fromIntegral (c' `div` 0x400 + 0xd800) :
442 fromIntegral (c' `mod` 0x400 + 0xdc00) : wcs
443
444 #else /* !mingw32_TARGET_OS */
445
446 cWcharsToChars xs = map castCWcharToChar xs
447 charsToCWchars xs = map castCharToCWchar xs
448
449 -- These conversions only make sense if __STDC_ISO_10646__ is defined
450 -- (meaning that wchar_t is ISO 10646, aka Unicode)
451
452 castCWcharToChar :: CWchar -> Char
453 castCWcharToChar ch = chr (fromIntegral ch )
454
455 castCharToCWchar :: Char -> CWchar
456 castCharToCWchar ch = fromIntegral (ord ch)
457
458 #endif /* !mingw32_TARGET_OS */