add install-includes: field
[packages/pretty.git] / Data / ByteString / Base.hs
1 {-# OPTIONS_GHC -cpp -fglasgow-exts #-}
2 -- |
3 -- Module : Data.ByteString.Base
4 -- License : BSD-style
5 -- Maintainer : dons@cse.unsw.edu.au
6 -- Stability : experimental
7 -- Portability : portable
8 --
9 -- A module containing semi-public 'ByteString' internals. This exposes
10 -- the 'ByteString' representation and low level construction functions.
11 -- Modules which extend the 'ByteString' system will need to use this module
12 -- while ideally most users will be able to make do with the public interface
13 -- modules.
14 --
15 module Data.ByteString.Base (
16
17 -- * The @ByteString@ type and representation
18 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
19 LazyByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
20
21 -- * Unchecked access
22 unsafeHead, -- :: ByteString -> Word8
23 unsafeTail, -- :: ByteString -> ByteString
24 unsafeIndex, -- :: ByteString -> Int -> Word8
25 unsafeTake, -- :: Int -> ByteString -> ByteString
26 unsafeDrop, -- :: Int -> ByteString -> ByteString
27
28 -- * Low level introduction and elimination
29 empty, -- :: ByteString
30 create, -- :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString
31 createAndTrim, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
32 createAndTrim', -- :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a)
33 mallocByteString, -- :: Int -> IO (ForeignPtr a)
34
35 unsafeCreate, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
36 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
37 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
38
39 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
40 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
41
42 #if defined(__GLASGOW_HASKELL__)
43 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
44 packAddress, -- :: Addr# -> ByteString
45 unsafePackAddress, -- :: Int -> Addr# -> ByteString
46 unsafeFinalize, -- :: ByteString -> IO ()
47 #endif
48
49 -- * Utilities
50 inlinePerformIO, -- :: IO a -> a
51 nullForeignPtr, -- :: ForeignPtr Word8
52
53 countOccurrences, -- :: (Storable a, Num a) => Ptr a -> Ptr Word8 -> Int -> IO ()
54
55 -- * Standard C Functions
56 c_strlen, -- :: CString -> IO CInt
57 c_malloc, -- :: CInt -> IO (Ptr Word8)
58 c_free, -- :: Ptr Word8 -> IO ()
59 c_free_finalizer, -- :: FunPtr (Ptr Word8 -> IO ())
60
61 memchr, -- :: Ptr Word8 -> Word8 -> CSize -> IO Ptr Word8
62 memcmp, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
63 memcpy, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
64 memmove, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
65 memset, -- :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
66
67 -- * cbits functions
68 c_reverse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
69 c_intersperse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
70 c_maximum, -- :: Ptr Word8 -> CInt -> IO Word8
71 c_minimum, -- :: Ptr Word8 -> CInt -> IO Word8
72 c_count, -- :: Ptr Word8 -> CInt -> Word8 -> IO CInt
73
74 -- * Internal GHC magic
75 #if defined(__GLASGOW_HASKELL__)
76 memcpy_ptr_baoff, -- :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
77 #endif
78
79 -- * Chars
80 w2c, c2w, isSpaceWord8
81
82 ) where
83
84 import Foreign.ForeignPtr (ForeignPtr, newForeignPtr_, withForeignPtr)
85 import Foreign.Ptr (Ptr, FunPtr, plusPtr, castPtr)
86 import Foreign.Storable (Storable(..))
87 import Foreign.C.Types (CInt, CSize, CULong)
88 import Foreign.C.String (CString, CStringLen)
89
90 import Control.Exception (assert)
91
92 import Data.Char (ord)
93 import Data.Word (Word8)
94
95 #if defined(__GLASGOW_HASKELL__)
96 import qualified Foreign.ForeignPtr as FC (finalizeForeignPtr)
97 import qualified Foreign.Concurrent as FC (newForeignPtr)
98
99 import Data.Generics (Data(..), Typeable(..))
100 import GHC.Prim (Addr#)
101 import GHC.Ptr (Ptr(..))
102 import GHC.Base (realWorld#,unsafeChr)
103 import GHC.IOBase (IO(IO), unsafePerformIO, RawBuffer)
104 #else
105 import Data.Char (chr)
106 import System.IO.Unsafe (unsafePerformIO)
107 #endif
108
109 #if __GLASGOW_HASKELL__ >= 605 && !defined(SLOW_FOREIGN_PTR)
110 import GHC.ForeignPtr (mallocPlainForeignPtrBytes)
111 #else
112 import Foreign.ForeignPtr (mallocForeignPtrBytes)
113 #endif
114
115 #if __GLASGOW_HASKELL__>=605
116 import GHC.ForeignPtr (ForeignPtr(ForeignPtr))
117 import GHC.Base (nullAddr#)
118 #else
119 import Foreign.Ptr (nullPtr)
120 #endif
121
122 -- CFILES stuff is Hugs only
123 {-# CFILES cbits/fpstring.c #-}
124
125 -- -----------------------------------------------------------------------------
126 --
127 -- Useful macros, until we have bang patterns
128 --
129
130 #define STRICT1(f) f a | a `seq` False = undefined
131 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
132 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
133 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
134 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
135
136 -- -----------------------------------------------------------------------------
137
138 -- | A space-efficient representation of a Word8 vector, supporting many
139 -- efficient operations. A 'ByteString' contains 8-bit characters only.
140 --
141 -- Instances of Eq, Ord, Read, Show, Data, Typeable
142 --
143 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
144 {-# UNPACK #-} !Int -- offset
145 {-# UNPACK #-} !Int -- length
146
147 #if defined(__GLASGOW_HASKELL__)
148 deriving (Data, Typeable)
149 #endif
150
151 instance Show ByteString where
152 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
153
154 instance Read ByteString where
155 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
156
157 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
158 unpackWith :: (Word8 -> a) -> ByteString -> [a]
159 unpackWith _ (PS _ _ 0) = []
160 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
161 go (p `plusPtr` s) (l - 1) []
162 where
163 STRICT3(go)
164 go p 0 acc = peek p >>= \e -> return (k e : acc)
165 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
166 {-# INLINE unpackWith #-}
167 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
168
169 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
170 -- conversion function
171 packWith :: (a -> Word8) -> [a] -> ByteString
172 packWith k str = unsafeCreate (length str) $ \p -> go p str
173 where
174 STRICT2(go)
175 go _ [] = return ()
176 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
177 {-# INLINE packWith #-}
178 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
179
180 ------------------------------------------------------------------------
181
182 -- | A space-efficient representation of a Word8 vector, supporting many
183 -- efficient operations. A 'ByteString' contains 8-bit characters only.
184 --
185 -- Instances of Eq, Ord, Read, Show, Data, Typeable
186 --
187 newtype LazyByteString = LPS [ByteString] -- LPS for lazy packed string
188 deriving (Show,Read
189 #if defined(__GLASGOW_HASKELL__)
190 ,Data, Typeable
191 #endif
192 )
193
194 ------------------------------------------------------------------------
195
196 -- | /O(1)/ The empty 'ByteString'
197 empty :: ByteString
198 empty = PS nullForeignPtr 0 0
199
200 nullForeignPtr :: ForeignPtr Word8
201 #if __GLASGOW_HASKELL__>=605
202 nullForeignPtr = ForeignPtr nullAddr# undefined --TODO: should ForeignPtrContents be strict?
203 #else
204 nullForeignPtr = unsafePerformIO $ newForeignPtr_ nullPtr
205 {-# NOINLINE nullForeignPtr #-}
206 #endif
207
208 -- ---------------------------------------------------------------------
209 --
210 -- Extensions to the basic interface
211 --
212
213 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
214 -- check for the empty case, so there is an obligation on the programmer
215 -- to provide a proof that the ByteString is non-empty.
216 unsafeHead :: ByteString -> Word8
217 unsafeHead (PS x s l) = assert (l > 0) $
218 inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
219 {-# INLINE unsafeHead #-}
220
221 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
222 -- check for the empty case. As with 'unsafeHead', the programmer must
223 -- provide a separate proof that the ByteString is non-empty.
224 unsafeTail :: ByteString -> ByteString
225 unsafeTail (PS ps s l) = assert (l > 0) $ PS ps (s+1) (l-1)
226 {-# INLINE unsafeTail #-}
227
228 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
229 -- This omits the bounds check, which means there is an accompanying
230 -- obligation on the programmer to ensure the bounds are checked in some
231 -- other way.
232 unsafeIndex :: ByteString -> Int -> Word8
233 unsafeIndex (PS x s l) i = assert (i >= 0 && i < l) $
234 inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
235 {-# INLINE unsafeIndex #-}
236
237 -- | A variety of 'take' which omits the checks on @n@ so there is an
238 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
239 unsafeTake :: Int -> ByteString -> ByteString
240 unsafeTake n (PS x s l) = assert (0 <= n && n <= l) $ PS x s n
241 {-# INLINE unsafeTake #-}
242
243 -- | A variety of 'drop' which omits the checks on @n@ so there is an
244 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
245 unsafeDrop :: Int -> ByteString -> ByteString
246 unsafeDrop n (PS x s l) = assert (0 <= n && n <= l) $ PS x (s+n) (l-n)
247 {-# INLINE unsafeDrop #-}
248
249 -- ---------------------------------------------------------------------
250 -- Low level constructors
251
252 -- | /O(1)/ Build a ByteString from a ForeignPtr
253 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
254 fromForeignPtr fp l = PS fp 0 l
255
256 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
257 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
258 toForeignPtr (PS ps s l) = (ps, s, l)
259
260 -- | A way of creating ByteStrings outside the IO monad. The @Int@
261 -- argument gives the final size of the ByteString. Unlike
262 -- 'createAndTrim' the ByteString is not reallocated if the final size
263 -- is less than the estimated size.
264 unsafeCreate :: Int -> (Ptr Word8 -> IO ()) -> ByteString
265 unsafeCreate l f = unsafePerformIO (create l f)
266 {-# INLINE unsafeCreate #-}
267
268 -- | Create ByteString of size @l@ and use action @f@ to fill it's contents.
269 create :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString
270 create l f = do
271 fp <- mallocByteString l
272 withForeignPtr fp $ \p -> f p
273 return $! PS fp 0 l
274
275 -- | Given the maximum size needed and a function to make the contents
276 -- of a ByteString, createAndTrim makes the 'ByteString'. The generating
277 -- function is required to return the actual final size (<= the maximum
278 -- size), and the resulting byte array is realloced to this size.
279 --
280 -- createAndTrim is the main mechanism for creating custom, efficient
281 -- ByteString functions, using Haskell or C functions to fill the space.
282 --
283 createAndTrim :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
284 createAndTrim l f = do
285 fp <- mallocByteString l
286 withForeignPtr fp $ \p -> do
287 l' <- f p
288 if assert (l' <= l) $ l' >= l
289 then return $! PS fp 0 l
290 else create l' $ \p' -> memcpy p' p (fromIntegral l')
291
292 createAndTrim' :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a)
293 createAndTrim' l f = do
294 fp <- mallocByteString l
295 withForeignPtr fp $ \p -> do
296 (off, l', res) <- f p
297 if assert (l' <= l) $ l' >= l
298 then return $! (PS fp 0 l, res)
299 else do ps <- create l' $ \p' ->
300 memcpy p' (p `plusPtr` off) (fromIntegral l')
301 return $! (ps, res)
302
303 -- | Wrapper of mallocForeignPtrBytes with faster implementation
304 -- for GHC 6.5 builds newer than 06/06/06
305 mallocByteString :: Int -> IO (ForeignPtr a)
306 mallocByteString l = do
307 #if __GLASGOW_HASKELL__ >= 605 && !defined(SLOW_FOREIGN_PTR)
308 mallocPlainForeignPtrBytes l
309 #else
310 mallocForeignPtrBytes l
311 #endif
312
313 #if defined(__GLASGOW_HASKELL__)
314 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
315 -- Addr\# (an arbitrary machine address assumed to point outside the
316 -- garbage-collected heap) into a @ByteString@. A much faster way to
317 -- create an Addr\# is with an unboxed string literal, than to pack a
318 -- boxed string. A unboxed string literal is compiled to a static @char
319 -- []@ by GHC. Establishing the length of the string requires a call to
320 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
321 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
322 -- if you know the length of the string statically.
323 --
324 -- An example:
325 --
326 -- > literalFS = packAddress "literal"#
327 --
328 packAddress :: Addr# -> ByteString
329 packAddress addr# = inlinePerformIO $ do
330 p <- newForeignPtr_ cstr
331 l <- c_strlen cstr
332 return $ PS p 0 (fromIntegral l)
333 where
334 cstr = Ptr addr#
335 {-# INLINE packAddress #-}
336
337 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
338 -- 'ByteStrings' -- which is ideal for string literals. It packs a
339 -- null-terminated sequence of bytes into a 'ByteString', given a raw
340 -- 'Addr\#' to the string, and the length of the string. Make sure the
341 -- length is correct, otherwise use the safer 'packAddress' (where the
342 -- length will be calculated once at runtime).
343 unsafePackAddress :: Int -> Addr# -> ByteString
344 unsafePackAddress len addr# = inlinePerformIO $ do
345 p <- newForeignPtr_ cstr
346 return $ PS p 0 len
347 where cstr = Ptr addr#
348
349 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
350 -- length, and an IO action representing a finalizer. This function is
351 -- not available on Hugs.
352 --
353 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
354 packCStringFinalizer p l f = do
355 fp <- FC.newForeignPtr p f
356 return $ PS fp 0 l
357
358 -- | Explicitly run the finaliser associated with a 'ByteString'.
359 -- Further references to this value may generate invalid memory
360 -- references. This operation is unsafe, as there may be other
361 -- 'ByteStrings' referring to the same underlying pages. If you use
362 -- this, you need to have a proof of some kind that all 'ByteString's
363 -- ever generated from the underlying byte array are no longer live.
364 unsafeFinalize :: ByteString -> IO ()
365 unsafeFinalize (PS p _ _) = FC.finalizeForeignPtr p
366
367 #endif
368
369 ------------------------------------------------------------------------
370
371 -- | Conversion between 'Word8' and 'Char'. Should compile to a no-op.
372 w2c :: Word8 -> Char
373 #if !defined(__GLASGOW_HASKELL__)
374 w2c = chr . fromIntegral
375 #else
376 w2c = unsafeChr . fromIntegral
377 #endif
378 {-# INLINE w2c #-}
379
380 -- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and
381 -- silently truncates to 8 bits Chars > '\255'. It is provided as
382 -- convenience for ByteString construction.
383 c2w :: Char -> Word8
384 c2w = fromIntegral . ord
385 {-# INLINE c2w #-}
386
387 -- Selects white-space characters in the Latin-1 range
388 -- ordered by frequency
389 -- Idea from Ketil
390 isSpaceWord8 :: Word8 -> Bool
391 isSpaceWord8 w = case w of
392 0x20 -> True -- SPACE
393 0x0A -> True -- LF, \n
394 0x09 -> True -- HT, \t
395 0x0C -> True -- FF, \f
396 0x0D -> True -- CR, \r
397 0x0B -> True -- VT, \v
398 0xA0 -> True -- spotted by QC..
399 _ -> False
400 {-# INLINE isSpaceWord8 #-}
401
402 ------------------------------------------------------------------------
403 -- | Just like unsafePerformIO, but we inline it. Big performance gains as
404 -- it exposes lots of things to further inlining
405 --
406 {-# INLINE inlinePerformIO #-}
407 inlinePerformIO :: IO a -> a
408 #if defined(__GLASGOW_HASKELL__)
409 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
410 #else
411 inlinePerformIO = unsafePerformIO
412 #endif
413
414 -- | Count the number of occurrences of each byte.
415 --
416 {-# SPECIALIZE countOccurrences :: Ptr CSize -> Ptr Word8 -> Int -> IO () #-}
417 countOccurrences :: (Storable a, Num a) => Ptr a -> Ptr Word8 -> Int -> IO ()
418 STRICT3(countOccurrences)
419 countOccurrences counts str l = go 0
420 where
421 STRICT1(go)
422 go i | i == l = return ()
423 | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
424 x <- peekElemOff counts k
425 pokeElemOff counts k (x + 1)
426 go (i + 1)
427
428 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a
429 -- @CString@. Warning: modifying the @CString@ will affect the
430 -- @ByteString@. Why is this function unsafe? It relies on the null
431 -- byte at the end of the ByteString to be there. Unless you can
432 -- guarantee the null byte, you should use the safe version, which will
433 -- copy the string first.
434 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
435 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
436
437 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a
438 -- @CStringLen@.
439 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
440 unsafeUseAsCStringLen (PS ps s l) f = withForeignPtr ps $ \p -> f (castPtr p `plusPtr` s,l)
441
442 -- ---------------------------------------------------------------------
443 --
444 -- Standard C functions
445 --
446
447 foreign import ccall unsafe "string.h strlen" c_strlen
448 :: CString -> IO CSize
449
450 foreign import ccall unsafe "stdlib.h malloc" c_malloc
451 :: CSize -> IO (Ptr Word8)
452
453 foreign import ccall unsafe "static stdlib.h free" c_free
454 :: Ptr Word8 -> IO ()
455
456 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
457 :: FunPtr (Ptr Word8 -> IO ())
458
459 foreign import ccall unsafe "string.h memchr" c_memchr
460 :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8)
461
462 memchr :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
463 memchr p w s = c_memchr p (fromIntegral w) s
464
465 foreign import ccall unsafe "string.h memcmp" memcmp
466 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
467
468 foreign import ccall unsafe "string.h memcpy" c_memcpy
469 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8)
470
471 memcpy :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
472 memcpy p q s = do c_memcpy p q s
473 return ()
474
475 foreign import ccall unsafe "string.h memmove" c_memmove
476 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8)
477
478 memmove :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
479 memmove p q s = do c_memmove p q s
480 return ()
481
482 foreign import ccall unsafe "string.h memset" c_memset
483 :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8)
484
485 memset :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
486 memset p w s = c_memset p (fromIntegral w) s
487
488 -- ---------------------------------------------------------------------
489 --
490 -- Uses our C code
491 --
492
493 foreign import ccall unsafe "static fpstring.h fps_reverse" c_reverse
494 :: Ptr Word8 -> Ptr Word8 -> CULong -> IO ()
495
496 foreign import ccall unsafe "static fpstring.h fps_intersperse" c_intersperse
497 :: Ptr Word8 -> Ptr Word8 -> CULong -> Word8 -> IO ()
498
499 foreign import ccall unsafe "static fpstring.h fps_maximum" c_maximum
500 :: Ptr Word8 -> CULong -> IO Word8
501
502 foreign import ccall unsafe "static fpstring.h fps_minimum" c_minimum
503 :: Ptr Word8 -> CULong -> IO Word8
504
505 foreign import ccall unsafe "static fpstring.h fps_count" c_count
506 :: Ptr Word8 -> CULong -> Word8 -> IO CULong
507
508 -- ---------------------------------------------------------------------
509 -- Internal GHC Haskell magic
510
511 #if defined(__GLASGOW_HASKELL__)
512 foreign import ccall unsafe "__hscore_memcpy_src_off"
513 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
514 #endif