Sync with FPS head
[packages/old-time.git] / Data / ByteString.hs
1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
2 --
3 -- Module : ByteString
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
9 -- License : BSD-style
10 --
11 -- Maintainer : dons@cse.unsw.edu.au
12 -- Stability : experimental
13 -- Portability : portable, requires ffi and cpp
14 -- Tested with : GHC 6.4.1 and Hugs March 2005
15 --
16
17 --
18 -- | A time and space-efficient implementation of byte vectors using
19 -- packed Word8 arrays, suitable for high performance use, both in terms
20 -- of large data quantities, or high speed requirements. Byte vectors
21 -- are encoded as Word8 arrays of bytes, held in a ForeignPtr, and can
22 -- be passed between C and Haskell with little effort.
23 --
24 -- This module is intended to be imported @qualified@, to avoid name
25 -- clashes with Prelude functions. eg.
26 --
27 -- > import qualified Data.ByteString as B
28 --
29 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
30 -- UArray by Simon Marlow. Rewritten to support slices and use
31 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
32 --
33
34 module Data.ByteString (
35
36 -- * The @ByteString@ type
37 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
38
39 -- * Introducing and eliminating 'ByteString's
40 empty, -- :: ByteString
41 packByte, -- :: Word8 -> ByteString
42 pack, -- :: [Word8] -> ByteString
43 unpack, -- :: ByteString -> [Word8]
44 packWith, -- :: (a -> Word8) -> [a] -> ByteString
45 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
46
47 -- * Basic interface
48 cons, -- :: Word8 -> ByteString -> ByteString
49 snoc, -- :: Word8 -> ByteString -> ByteString
50 null, -- :: ByteString -> Bool
51 length, -- :: ByteString -> Int
52 head, -- :: ByteString -> Word8
53 tail, -- :: ByteString -> ByteString
54 last, -- :: ByteString -> Word8
55 init, -- :: ByteString -> ByteString
56 append, -- :: ByteString -> ByteString -> ByteString
57
58 -- * Special ByteStrings
59 inits, -- :: ByteString -> [ByteString]
60 tails, -- :: ByteString -> [ByteString]
61 elems, -- :: ByteString -> [ByteString]
62
63 -- * Transformating ByteStrings
64 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
65 reverse, -- :: ByteString -> ByteString
66 intersperse, -- :: Word8 -> ByteString -> ByteString
67 transpose, -- :: [ByteString] -> [ByteString]
68
69 -- * Reducing 'ByteString's
70 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
71 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
72 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
73 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
74
75 -- ** Special folds
76 concat, -- :: [ByteString] -> ByteString
77 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
78 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
79 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
80 maximum, -- :: ByteString -> Word8
81 minimum, -- :: ByteString -> Word8
82 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
83
84 -- * Generating and unfolding ByteStrings
85 replicate, -- :: Int -> Word8 -> ByteString
86 unfoldrN, -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
87
88 -- * Substrings
89
90 -- ** Breaking strings
91 take, -- :: Int -> ByteString -> ByteString
92 drop, -- :: Int -> ByteString -> ByteString
93 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
94 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
95 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
96 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
97 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
98 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
99
100 -- ** Breaking and dropping on specific bytes
101 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
102 spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
103 breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
104 breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
105
106 -- ** Breaking into many substrings
107 split, -- :: Word8 -> ByteString -> [ByteString]
108 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
109 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
110 group, -- :: ByteString -> [ByteString]
111 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
112
113 -- ** Joining strings
114 join, -- :: ByteString -> [ByteString] -> ByteString
115 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
116
117 -- * Indexing ByteStrings
118 index, -- :: ByteString -> Int -> Word8
119 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
120 elemIndices, -- :: Word8 -> ByteString -> [Int]
121 elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int
122 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
123 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
124 count, -- :: Word8 -> ByteString -> Int
125
126 -- * Ordered ByteStrings
127 sort, -- :: ByteString -> ByteString
128
129 -- * Searching ByteStrings
130
131 -- ** Searching by equality
132 -- | These functions use memchr(3) to efficiently search the ByteString
133
134 elem, -- :: Word8 -> ByteString -> Bool
135 notElem, -- :: Word8 -> ByteString -> Bool
136 filterByte, -- :: Word8 -> ByteString -> ByteString
137 filterNotByte, -- :: Word8 -> ByteString -> ByteString
138
139 -- ** Searching with a predicate
140 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
141 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
142
143 -- ** Prefixes and suffixes
144 -- | These functions use memcmp(3) to efficiently compare substrings
145 isPrefixOf, -- :: ByteString -> ByteString -> Bool
146 isSuffixOf, -- :: ByteString -> ByteString -> Bool
147
148 -- ** Search for arbitrary substrings
149 isSubstringOf, -- :: ByteString -> ByteString -> Bool
150 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
151 findSubstrings, -- :: ByteString -> ByteString -> [Int]
152
153 -- * Zipping and unzipping ByteStrings
154 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
155 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
156 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
157
158 -- * Unchecked access
159 unsafeHead, -- :: ByteString -> Word8
160 unsafeTail, -- :: ByteString -> ByteString
161 unsafeIndex, -- :: ByteString -> Int -> Word8
162
163 -- * Low level introduction and elimination
164 generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
165 create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
166 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
167 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
168 skipIndex, -- :: ByteString -> Int
169
170 -- ** Packing CStrings and pointers
171 packCString, -- :: CString -> ByteString
172 packCStringLen, -- :: CString -> ByteString
173 packMallocCString, -- :: CString -> ByteString
174
175 #if defined(__GLASGOW_HASKELL__)
176 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
177 packAddress, -- :: Addr# -> ByteString
178 unsafePackAddress, -- :: Int -> Addr# -> ByteString
179 unsafeFinalize, -- :: ByteString -> IO ()
180 #endif
181
182 -- ** Using ByteStrings as CStrings
183 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
184 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
185 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
186
187 -- ** Copying ByteStrings
188 -- | These functions perform memcpy(3) operations
189 copy, -- :: ByteString -> ByteString
190 copyCString, -- :: CString -> ByteString
191 copyCStringLen, -- :: CStringLen -> ByteString
192
193 -- * I\/O with @ByteString@s
194
195 -- ** Standard input and output
196
197 #if defined(__GLASGOW_HASKELL__)
198 getLine, -- :: IO ByteString
199 #endif
200 getContents, -- :: IO ByteString
201 putStr, -- :: ByteString -> IO ()
202 putStrLn, -- :: ByteString -> IO ()
203
204 -- ** Files
205 readFile, -- :: FilePath -> IO ByteString
206 writeFile, -- :: FilePath -> ByteString -> IO ()
207 -- mmapFile, -- :: FilePath -> IO ByteString
208
209 -- ** I\/O with Handles
210 #if defined(__GLASGOW_HASKELL__)
211 getArgs, -- :: IO [ByteString]
212 hGetLine, -- :: Handle -> IO ByteString
213 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
214 #endif
215 hGetContents, -- :: Handle -> IO ByteString
216 hGet, -- :: Handle -> Int -> IO ByteString
217 hPut, -- :: Handle -> ByteString -> IO ()
218
219 #if defined(__GLASGOW_HASKELL__)
220 -- * Miscellaneous
221 unpackList, -- eek, otherwise it gets thrown away by the simplifier
222 #endif
223
224 ) where
225
226 import qualified Prelude as P
227 import Prelude hiding (reverse,head,tail,last,init,null
228 ,length,map,lines,foldl,foldr,unlines
229 ,concat,any,take,drop,splitAt,takeWhile
230 ,dropWhile,span,break,elem,filter,maximum
231 ,minimum,all,concatMap,foldl1,foldr1
232 ,readFile,writeFile,replicate
233 ,getContents,getLine,putStr,putStrLn
234 ,zip,zipWith,unzip,notElem)
235
236 import qualified Data.List as List
237
238 import Data.Char
239 import Data.Word (Word8)
240 import Data.Maybe (listToMaybe)
241 import Data.Array (listArray)
242 import qualified Data.Array as Array ((!))
243
244 -- Control.Exception.bracket not available in yhc or nhc
245 import Control.Exception (bracket)
246 import Control.Monad (when)
247
248 import Foreign.C.String (CString, CStringLen)
249 import Foreign.C.Types (CSize, CInt)
250 import Foreign.ForeignPtr
251 import Foreign.Marshal.Array
252 import Foreign.Ptr
253 import Foreign.Storable (Storable(..))
254
255 -- hGetBuf and hPutBuf not available in yhc or nhc
256 import System.IO (stdin,stdout,hClose,hFileSize
257 ,hGetBuf,hPutBuf,openBinaryFile
258 ,Handle,IOMode(..))
259
260 #if !defined(__GLASGOW_HASKELL__)
261 import System.IO.Unsafe
262 #endif
263
264 #if defined(__GLASGOW_HASKELL__)
265
266 import Data.Generics (Data(..), Typeable(..))
267
268 import System.IO (hGetBufNonBlocking)
269 import System.IO.Error (isEOFError)
270
271 import Foreign.Marshal (alloca)
272 import qualified Foreign.Concurrent as FC (newForeignPtr)
273
274 import GHC.Handle
275 import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
276 import GHC.Base (build, unsafeChr)
277 import GHC.Word hiding (Word8)
278 import GHC.Ptr (Ptr(..))
279 import GHC.ST (ST(..))
280 import GHC.IOBase
281
282 #endif
283
284 -- CFILES stuff is Hugs only
285 {-# CFILES cbits/fpstring.c #-}
286
287 -- -----------------------------------------------------------------------------
288 --
289 -- Useful macros, until we have bang patterns
290 --
291
292 #define STRICT1(f) f a | a `seq` False = undefined
293 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
294 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
295 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
296 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
297
298 -- -----------------------------------------------------------------------------
299
300 -- | A space-efficient representation of a Word8 vector, supporting many
301 -- efficient operations. A 'ByteString' contains 8-bit characters only.
302 --
303 -- Instances of Eq, Ord, Read, Show, Data, Typeable
304 --
305 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
306 {-# UNPACK #-} !Int
307 {-# UNPACK #-} !Int
308
309 #if defined(__GLASGOW_HASKELL__)
310 deriving (Data, Typeable)
311 #endif
312
313 instance Eq ByteString
314 where (==) = eq
315
316 instance Ord ByteString
317 where compare = compareBytes
318
319 instance Show ByteString where
320 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
321
322 instance Read ByteString where
323 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
324
325 {-
326 instance Arbitrary PackedString where
327 arbitrary = P.pack `fmap` arbitrary
328 coarbitrary s = coarbitrary (P.unpack s)
329 -}
330
331 -- | /O(n)/ Equality on the 'ByteString' type.
332 eq :: ByteString -> ByteString -> Bool
333 eq a b = (compareBytes a b) == EQ
334 {-# INLINE eq #-}
335
336 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
337 compareBytes :: ByteString -> ByteString -> Ordering
338 compareBytes (PS _ _ 0) (PS _ _ 0) = EQ -- short cut for empty strings
339 compareBytes (PS x1 s1 l1) (PS x2 s2 l2) = inlinePerformIO $
340 withForeignPtr x1 $ \p1 ->
341 withForeignPtr x2 $ \p2 -> do
342 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
343 return $ case i `compare` 0 of
344 EQ -> l1 `compare` l2
345 x -> x
346 {-# INLINE compareBytes #-}
347
348 {-
349 --
350 -- About 4x slower over 32M
351 --
352 compareBytes :: ByteString -> ByteString -> Ordering
353 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
354 withForeignPtr fp1 $ \p1 ->
355 withForeignPtr fp2 $ \p2 ->
356 cmp (p1 `plusPtr` off1)
357 (p2 `plusPtr` off2) 0 len1 len2
358
359 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
360 STRICT5(cmp)
361 cmp p1 p2 n len1 len2
362 | n == len1 = if n == len2 then return EQ else return LT
363 | n == len2 = return GT
364 | otherwise = do
365 (a :: Word8) <- peekByteOff p1 n
366 (b :: Word8) <- peekByteOff p2 n
367 case a `compare` b of
368 EQ -> cmp p1 p2 (n+1) len1 len2
369 LT -> return LT
370 GT -> return GT
371 {-# INLINE compareBytes #-}
372 -}
373
374 -- -----------------------------------------------------------------------------
375 -- Introducing and eliminating 'ByteString's
376
377 -- | /O(1)/ The empty 'ByteString'
378 empty :: ByteString
379 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
380 {-# NOINLINE empty #-}
381
382 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
383 packByte :: Word8 -> ByteString
384 packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do
385 withForeignPtr fp $ \p -> poke p c
386 return $ PS fp 0 1
387 {-# NOINLINE packByte #-}
388
389 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
390 --
391 -- For applications with large numbers of string literals, pack can be a
392 -- bottleneck. In such cases, consider using packAddress (GHC only).
393 pack :: [Word8] -> ByteString
394
395 #if !defined(__GLASGOW_HASKELL__)
396
397 pack str = create (P.length str) $ \p -> go p str
398 where
399 go _ [] = return ()
400 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
401
402 #else /* hack away */
403
404 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
405 where
406 go _ _ [] = return ()
407 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
408
409 writeByte p i c = ST $ \s# ->
410 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
411
412 #endif
413
414 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
415 unpack :: ByteString -> [Word8]
416
417 #if !defined(__GLASGOW_HASKELL__)
418
419 unpack (PS _ _ 0) = []
420 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
421 go (p `plusPtr` s) (l - 1) []
422 where
423 STRICT3(go)
424 go p 0 acc = peek p >>= \e -> return (e : acc)
425 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
426 {-# INLINE unpack #-}
427
428 #else
429
430 unpack ps = build (unpackFoldr ps)
431 {-# INLINE unpack #-}
432
433 unpackList :: ByteString -> [Word8]
434 unpackList (PS fp off len) = withPtr fp $ \p -> do
435 let STRICT3(loop)
436 loop _ (-1) acc = return acc
437 loop q n acc = do
438 a <- peekByteOff q n
439 loop q (n-1) (a : acc)
440 loop (p `plusPtr` off) (len-1) []
441
442 {-# RULES
443 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
444 #-}
445
446 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
447 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
448 let STRICT3(loop)
449 loop _ (-1) acc = return acc
450 loop q n acc = do
451 a <- peekByteOff q n
452 loop q (n-1) (a `f` acc)
453 loop (p `plusPtr` off) (len-1) ch
454 {-# INLINE [0] unpackFoldr #-}
455
456 #endif
457
458 ------------------------------------------------------------------------
459
460 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
461 -- conversion function
462 packWith :: (a -> Word8) -> [a] -> ByteString
463 packWith k str = create (P.length str) $ \p -> go p str
464 where
465 STRICT2(go)
466 go _ [] = return ()
467 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
468 {-# INLINE packWith #-}
469 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
470
471 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
472 unpackWith :: (Word8 -> a) -> ByteString -> [a]
473 unpackWith _ (PS _ _ 0) = []
474 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
475 go (p `plusPtr` s) (l - 1) []
476 where
477 STRICT3(go)
478 go p 0 acc = peek p >>= \e -> return (k e : acc)
479 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
480 {-# INLINE unpackWith #-}
481 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
482
483 -- ---------------------------------------------------------------------
484 -- Basic interface
485
486 -- | /O(1)/ Test whether a ByteString is empty.
487 null :: ByteString -> Bool
488 null (PS _ _ l) = l == 0
489 {-# INLINE null #-}
490
491 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
492 length :: ByteString -> Int
493 length (PS _ _ l) = l
494 {-# INLINE length #-}
495
496 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
497 -- complexity, as it requires a memcpy.
498 cons :: Word8 -> ByteString -> ByteString
499 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
500 memcpy (p `plusPtr` 1) (f `plusPtr` s) l
501 poke p c
502 {-# INLINE cons #-}
503
504 -- | /O(n)/ Append a byte to the end of a 'ByteString'
505 snoc :: ByteString -> Word8 -> ByteString
506 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
507 memcpy p (f `plusPtr` s) l
508 poke (p `plusPtr` l) c
509 {-# INLINE snoc #-}
510
511 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
512 head :: ByteString -> Word8
513 head ps@(PS x s _)
514 | null ps = errorEmptyList "head"
515 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
516 {-# INLINE head #-}
517
518 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
519 tail :: ByteString -> ByteString
520 tail (PS p s l)
521 | l <= 0 = errorEmptyList "tail"
522 | otherwise = PS p (s+1) (l-1)
523 {-# INLINE tail #-}
524
525 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
526 last :: ByteString -> Word8
527 last ps@(PS x s l)
528 | null ps = errorEmptyList "last"
529 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
530 {-# INLINE last #-}
531
532 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
533 init :: ByteString -> ByteString
534 init (PS p s l)
535 | l <= 0 = errorEmptyList "init"
536 | otherwise = PS p s (l-1)
537 {-# INLINE init #-}
538
539 -- | /O(n)/ Append two ByteStrings
540 append :: ByteString -> ByteString -> ByteString
541 append xs ys | null xs = ys
542 | null ys = xs
543 | otherwise = concat [xs,ys]
544 {-# INLINE append #-}
545
546 {-
547 --
548 -- About 30% faster, but allocating in a big chunk isn't good for memory use
549 --
550 append :: ByteString -> ByteString -> ByteString
551 append xs@(PS ffp s l) ys@(PS fgp t m)
552 | null xs = ys
553 | null ys = xs
554 | otherwise = create len $ \ptr ->
555 withForeignPtr ffp $ \fp ->
556 withForeignPtr fgp $ \gp -> do
557 memcpy ptr (fp `plusPtr` s) l
558 memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m
559 where len = length xs + length ys
560 -}
561
562 -- ---------------------------------------------------------------------
563 -- Transformations
564
565 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
566 -- element of @xs@
567 --
568 map :: (Word8 -> Word8) -> ByteString -> ByteString
569 map f (PS fp start len) = inlinePerformIO $ withForeignPtr fp $ \p -> do
570 new_fp <- mallocByteString len
571 withForeignPtr new_fp $ \new_p -> do
572 map_ f (len-1) (p `plusPtr` start) new_p
573 return (PS new_fp 0 len)
574 {-# INLINE map #-}
575
576 map_ :: (Word8 -> Word8) -> Int -> Ptr Word8 -> Ptr Word8 -> IO ()
577 STRICT4(map_)
578 map_ f n p1 p2
579 | n < 0 = return ()
580 | otherwise = do
581 x <- peekByteOff p1 n
582 pokeByteOff p2 n (f x)
583 map_ f (n-1) p1 p2
584 {-# INLINE map_ #-}
585
586 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
587 reverse :: ByteString -> ByteString
588 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
589 c_reverse p (f `plusPtr` s) l
590
591 {-
592 reverse = pack . P.reverse . unpack
593 -}
594
595 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
596 -- 'ByteString' and \`intersperses\' that byte between the elements of
597 -- the 'ByteString'. It is analogous to the intersperse function on
598 -- Lists.
599 intersperse :: Word8 -> ByteString -> ByteString
600 intersperse c ps@(PS x s l)
601 | length ps < 2 = ps
602 | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
603 c_intersperse p (f `plusPtr` s) l c
604
605 {-
606 intersperse c = pack . List.intersperse c . unpack
607 -}
608
609 -- | The 'transpose' function transposes the rows and columns of its
610 -- 'ByteString' argument.
611 transpose :: [ByteString] -> [ByteString]
612 transpose ps = P.map pack (List.transpose (P.map unpack ps))
613
614 -- ---------------------------------------------------------------------
615 -- Reducing 'ByteString's
616
617 -- | 'foldl', applied to a binary operator, a starting value (typically
618 -- the left-identity of the operator), and a ByteString, reduces the
619 -- ByteString using the binary operator, from left to right.
620 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
621 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
622 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
623 where
624 STRICT3(lgo)
625 lgo z p q | p == q = return z
626 | otherwise = do c <- peek p
627 lgo (f z c) (p `plusPtr` 1) q
628
629 -- | 'foldr', applied to a binary operator, a starting value
630 -- (typically the right-identity of the operator), and a ByteString,
631 -- reduces the ByteString using the binary operator, from right to left.
632 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
633 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
634 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
635 where
636 STRICT2(go)
637 go p q | p == q = return z
638 | otherwise = do c <- peek p
639 ws <- go (p `plusPtr` 1) q
640 return $ c `k` ws
641
642 -- | 'foldl1' is a variant of 'foldl' that has no starting value
643 -- argument, and thus must be applied to non-empty 'ByteStrings'.
644 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
645 foldl1 f ps
646 | null ps = errorEmptyList "foldl1"
647 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
648
649 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
650 -- and thus must be applied to non-empty 'ByteString's
651 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
652 foldr1 f ps
653 | null ps = errorEmptyList "foldr1"
654 | otherwise = foldr f (last ps) (init ps)
655
656 -- ---------------------------------------------------------------------
657 -- Special folds
658
659 -- | /O(n)/ Concatenate a list of ByteStrings.
660 concat :: [ByteString] -> ByteString
661 concat [] = empty
662 concat [ps] = ps
663 concat xs = inlinePerformIO $ do
664 let start_size = 1024
665 p <- mallocArray start_size
666 f p 0 1024 xs
667
668 where f ptr len _ [] = do
669 ptr' <- reallocArray ptr (len+1)
670 poke (ptr' `plusPtr` len) (0::Word8) -- XXX so CStrings work
671 fp <- newForeignFreePtr ptr'
672 return $ PS fp 0 len
673
674 f ptr len to_go pss@(PS p s l:pss')
675 | l <= to_go = do withForeignPtr p $ \pf ->
676 memcpy (ptr `plusPtr` len)
677 (pf `plusPtr` s) l
678 f ptr (len + l) (to_go - l) pss'
679
680 | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
681 ptr' <- reallocArray ptr new_total
682 f ptr' len (new_total - len) pss
683
684 -- | Map a function over a 'ByteString' and concatenate the results
685 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
686 concatMap f = foldr (append . f) empty
687
688 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
689 -- any element of the 'ByteString' satisfies the predicate.
690 any :: (Word8 -> Bool) -> ByteString -> Bool
691 any _ (PS _ _ 0) = False
692 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
693 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
694 where
695 STRICT2(go)
696 go p q | p == q = return False
697 | otherwise = do c <- peek p
698 if f c then return True
699 else go (p `plusPtr` 1) q
700
701 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
702 -- if all elements of the 'ByteString' satisfy the predicate.
703 all :: (Word8 -> Bool) -> ByteString -> Bool
704 all _ (PS _ _ 0) = True
705 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
706 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
707 where
708 STRICT2(go)
709 go p q | p == q = return True -- end of list
710 | otherwise = do c <- peek p
711 if f c
712 then go (p `plusPtr` 1) q
713 else return False
714
715 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
716 maximum :: ByteString -> Word8
717 maximum xs@(PS x s l)
718 | null xs = errorEmptyList "maximum"
719 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
720 return $ c_maximum (p `plusPtr` s) l
721 {-# INLINE maximum #-}
722
723 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
724 minimum :: ByteString -> Word8
725 minimum xs@(PS x s l)
726 | null xs = errorEmptyList "minimum"
727 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
728 return $ c_minimum (p `plusPtr` s) l
729 {-# INLINE minimum #-}
730
731 {-
732 maximum xs@(PS x s l)
733 | null xs = errorEmptyList "maximum"
734 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
735 w <- peek p
736 maximum_ (p `plusPtr` s) 0 l w
737
738 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
739 STRICT4(maximum_)
740 maximum_ ptr n m c
741 | n >= m = return c
742 | otherwise = do w <- peekByteOff ptr n
743 maximum_ ptr (n+1) m (if w > c then w else c)
744
745 minimum xs@(PS x s l)
746 | null xs = errorEmptyList "minimum"
747 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
748 w <- peek p
749 minimum_ (p `plusPtr` s) 0 l w
750
751 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
752 STRICT4(minimum_)
753 minimum_ ptr n m c
754 | n >= m = return c
755 | otherwise = do w <- peekByteOff ptr n
756 minimum_ ptr (n+1) m (if w < c then w else c)
757 -}
758
759 -- | /O(n)/ map Word8 functions, provided with the index at each position
760 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
761 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
762 go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
763 where
764 go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
765 STRICT4(go)
766 go n f t p | f == p = return ()
767 | otherwise = do w <- peek f
768 ((poke t) . k n) w
769 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
770
771 -- ---------------------------------------------------------------------
772 -- Unfolds and replicates
773
774 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
775 -- the value of every element. The following holds:
776 --
777 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
778 --
779 -- This implemenation uses @memset(3)@
780 replicate :: Int -> Word8 -> ByteString
781 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
782
783 {-
784 -- About 5x slower
785 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
786 where
787 STRICT2(go)
788 go _ 0 = return w
789 go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
790 -}
791
792 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
793 -- 'unfoldrN' builds a ByteString from a seed value. The function takes
794 -- the element and returns 'Nothing' if it is done producing the
795 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
796 -- prepending to the ByteString and @b@ is used as the next element in a
797 -- recursive call.
798 --
799 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
800 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
801 -- final size of the ByteString as an argument. 'cons' can then be
802 -- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has
803 -- linear complexity. The depth of the recursion is limited to this
804 -- size, but may be less. For lazy, infinite unfoldr, use
805 -- 'Data.List.unfoldr' (from 'Data.List').
806 --
807 -- Examples:
808 --
809 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
810 --
811 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
812 --
813 -- > unfoldrN n == take n $ List.unfoldr
814 unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
815 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
816 where
817 STRICT3(go)
818 go q c n | n == i = return n -- stop if we reach `i'
819 | otherwise = case f c of
820 Nothing -> return n
821 Just (a,new_c) -> do
822 poke q a
823 go (q `plusPtr` 1) new_c (n+1)
824
825 -- ---------------------------------------------------------------------
826 -- Substrings
827
828 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
829 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
830 take :: Int -> ByteString -> ByteString
831 take n ps@(PS x s l)
832 | n < 0 = empty
833 | n >= l = ps
834 | otherwise = PS x s n
835 {-# INLINE take #-}
836
837 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
838 -- elements, or @[]@ if @n > 'length' xs@.
839 drop :: Int -> ByteString -> ByteString
840 drop n ps@(PS x s l)
841 | n <= 0 = ps
842 | n > l = empty
843 | otherwise = PS x (s+n) (l-n)
844 {-# INLINE drop #-}
845
846 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
847 splitAt :: Int -> ByteString -> (ByteString, ByteString)
848 splitAt n ps = (take n ps, drop n ps)
849 {-# INLINE splitAt #-}
850
851 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
852 -- returns the longest prefix (possibly empty) of @xs@ of elements that
853 -- satisfy @p@.
854 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
855 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
856 {-# INLINE takeWhile #-}
857
858 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
859 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
860 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
861 {-# INLINE dropWhile #-}
862
863 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
864 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
865 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
866 {-# INLINE break #-}
867
868 -- | 'breakByte' breaks its ByteString argument at the first occurence
869 -- of the specified byte. It is more efficient than 'break' as it is
870 -- implemented with @memchr(3)@. I.e.
871 --
872 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
873 --
874 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
875 breakByte c p = case elemIndex c p of
876 Nothing -> (p,empty)
877 Just n -> (take n p, drop n p)
878 {-# INLINE breakByte #-}
879
880 -- | 'spanByte' breaks its ByteString argument at the first
881 -- occurence of a byte other than its argument. It is more efficient
882 -- than 'span (==)'
883 --
884 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
885 --
886 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
887 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
888 go (p `plusPtr` s) 0
889 where
890 STRICT2(go)
891 go p i | i >= l = return (ps, empty)
892 | otherwise = do c' <- peekByteOff p i
893 if c /= c'
894 then return (take i ps, drop i ps)
895 else go p (i+1)
896 {-# INLINE spanByte #-}
897
898 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
899 -- occurence of @w@. It behaves like 'break', except the delimiter is
900 -- not returned, and @Nothing@ is returned if the delimiter is not in
901 -- the ByteString. I.e.
902 --
903 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
904 --
905 -- > breakFirst c xs ==
906 -- > let (x,y) = break (== c) xs
907 -- > in if null y then Nothing else Just (x, drop 1 y))
908 --
909 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
910 breakFirst c p = case elemIndex c p of
911 Nothing -> Nothing
912 Just n -> Just (take n p, drop (n+1) p)
913 {-# INLINE breakFirst #-}
914
915 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
916 -- ByteString.
917 --
918 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
919 --
920 -- and the following are equivalent:
921 --
922 -- > breakLast 'c' "abcdef"
923 -- > let (x,y) = break (=='c') (reverse "abcdef")
924 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
925 --
926 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
927 breakLast c p = case elemIndexLast c p of
928 Nothing -> Nothing
929 Just n -> Just (take n p, drop (n+1) p)
930 {-# INLINE breakLast #-}
931
932 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
933 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
934 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
935 span p ps = break (not . p) ps
936 {-# INLINE span #-}
937
938 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
939 -- We have
940 --
941 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
942 --
943 -- and
944 --
945 -- > spanEnd (not . isSpace) ps
946 -- > ==
947 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
948 --
949 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
950 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
951
952 -- | /O(n)/ Splits a 'ByteString' into components delimited by
953 -- separators, where the predicate returns True for a separator element.
954 -- The resulting components do not contain the separators. Two adjacent
955 -- separators result in an empty component in the output. eg.
956 --
957 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
958 -- > splitWith (=='a') [] == []
959 --
960 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
961
962 #if defined(__GLASGOW_HASKELL__)
963 splitWith _pred (PS _ _ 0) = []
964 splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
965 where pred# c# = pred_ (W8# c#)
966
967 splitWith' pred' off' len' fp' = withPtr fp $ \p ->
968 splitLoop pred' p 0 off' len' fp'
969
970 splitLoop :: (Word# -> Bool)
971 -> Ptr Word8
972 -> Int -> Int -> Int
973 -> ForeignPtr Word8
974 -> IO [ByteString]
975
976 splitLoop pred' p idx' off' len' fp'
977 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
978 | idx' >= len' = return [PS fp' off' idx']
979 | otherwise = do
980 w <- peekElemOff p (off'+idx')
981 if pred' (case w of W8# w# -> w#)
982 then return (PS fp' off' idx' :
983 splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
984 else splitLoop pred' p (idx'+1) off' len' fp'
985 {-# INLINE splitWith #-}
986
987 #else
988 splitWith _ (PS _ _ 0) = []
989 splitWith p ps = splitWith' p ps
990 where
991 STRICT2(splitWith')
992 splitWith' q qs = if null rest then [chunk]
993 else chunk : splitWith' q (unsafeTail rest)
994 where (chunk,rest) = break q qs
995 #endif
996
997 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
998 -- argument, consuming the delimiter. I.e.
999 --
1000 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1001 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
1002 -- > split 'x' "x" == ["",""]
1003 --
1004 -- and
1005 --
1006 -- > join [c] . split c == id
1007 -- > split == splitWith . (==)
1008 --
1009 -- As for all splitting functions in this library, this function does
1010 -- not copy the substrings, it just constructs new 'ByteStrings' that
1011 -- are slices of the original.
1012 --
1013 split :: Word8 -> ByteString -> [ByteString]
1014 split _ (PS _ _ 0) = []
1015 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1016 let ptr = p `plusPtr` s
1017
1018 STRICT1(loop)
1019 loop n = do
1020 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1021 if q == nullPtr
1022 then return [PS x (s+n) (l-n)]
1023 else do let i = q `minusPtr` ptr
1024 ls <- loop (i+1)
1025 return $! PS x (s+n) (i-n) : ls
1026 loop 0
1027 {-# INLINE split #-}
1028
1029 {-
1030 -- slower. but stays inside Haskell.
1031 split _ (PS _ _ 0) = []
1032 split (W8# w#) (PS fp off len) = splitWith' off len fp
1033 where
1034 splitWith' off' len' fp' = withPtr fp $ \p ->
1035 splitLoop p 0 off' len' fp'
1036
1037 splitLoop :: Ptr Word8
1038 -> Int -> Int -> Int
1039 -> ForeignPtr Word8
1040 -> IO [ByteString]
1041
1042 STRICT5(splitLoop)
1043 splitLoop p idx' off' len' fp'
1044 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1045 | idx' >= len' = return [PS fp' off' idx']
1046 | otherwise = do
1047 (W8# x#) <- peekElemOff p (off'+idx')
1048 if word2Int# w# ==# word2Int# x#
1049 then return (PS fp' off' idx' :
1050 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1051 else splitLoop p (idx'+1) off' len' fp'
1052 -}
1053
1054 -- | Like 'splitWith', except that sequences of adjacent separators are
1055 -- treated as a single separator. eg.
1056 --
1057 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1058 --
1059 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1060 tokens f = P.filter (not.null) . splitWith f
1061
1062 -- | The 'group' function takes a ByteString and returns a list of
1063 -- ByteStrings such that the concatenation of the result is equal to the
1064 -- argument. Moreover, each sublist in the result contains only equal
1065 -- elements. For example,
1066 --
1067 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1068 --
1069 -- It is a special case of 'groupBy', which allows the programmer to
1070 -- supply their own equality test. It is about 40% faster than
1071 -- /groupBy (==)/
1072 group :: ByteString -> [ByteString]
1073 group xs
1074 | null xs = []
1075 | otherwise = ys : group zs
1076 where
1077 (ys, zs) = spanByte (unsafeHead xs) xs
1078
1079 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1080 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1081 groupBy k xs
1082 | null xs = []
1083 | otherwise = take n xs : groupBy k (drop n xs)
1084 where
1085 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1086
1087 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1088 -- 'ByteString's and concatenates the list after interspersing the first
1089 -- argument between each element of the list.
1090 join :: ByteString -> [ByteString] -> ByteString
1091 join filler pss = concat (splice pss)
1092 where
1093 splice [] = []
1094 splice [x] = [x]
1095 splice (x:y:xs) = x:filler:splice (y:xs)
1096
1097 --
1098 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1099 -- with a char. Around 4 times faster than the generalised join.
1100 --
1101 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1102 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1103 withForeignPtr ffp $ \fp ->
1104 withForeignPtr fgp $ \gp -> do
1105 memcpy ptr (fp `plusPtr` s) l
1106 poke (ptr `plusPtr` l) c
1107 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
1108 where
1109 len = length f + length g + 1
1110 {-# INLINE joinWithByte #-}
1111
1112 -- ---------------------------------------------------------------------
1113 -- Indexing ByteStrings
1114
1115 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1116 index :: ByteString -> Int -> Word8
1117 index ps n
1118 | n < 0 = error $ "ByteString.indexWord8: negative index: " ++ show n
1119 | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
1120 ++ ", length = " ++ show (length ps)
1121 | otherwise = ps `unsafeIndex` n
1122 {-# INLINE index #-}
1123
1124 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1125 -- element in the given 'ByteString' which is equal to the query
1126 -- element, or 'Nothing' if there is no such element.
1127 -- This implementation uses memchr(3).
1128 elemIndex :: Word8 -> ByteString -> Maybe Int
1129 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1130 let p' = p `plusPtr` s
1131 q = memchr p' c (fromIntegral l)
1132 return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1133 {-# INLINE elemIndex #-}
1134
1135 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1136 -- element in the given 'ByteString' which is equal to the query
1137 -- element, or 'Nothing' if there is no such element. The following
1138 -- holds:
1139 --
1140 -- > elemIndexLast c xs ==
1141 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1142 --
1143 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1144 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1145 go (p `plusPtr` s) (l-1)
1146 where
1147 STRICT2(go)
1148 go p i | i < 0 = return Nothing
1149 | otherwise = do ch' <- peekByteOff p i
1150 if ch == ch'
1151 then return $ Just i
1152 else go p (i-1)
1153 {-# INLINE elemIndexLast #-}
1154
1155 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1156 -- the indices of all elements equal to the query element, in ascending order.
1157 -- This implementation uses memchr(3).
1158 elemIndices :: Word8 -> ByteString -> [Int]
1159 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1160 let ptr = p `plusPtr` s
1161
1162 STRICT1(loop)
1163 loop n = do
1164 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1165 if q == nullPtr
1166 then return []
1167 else do let i = q `minusPtr` ptr
1168 ls <- loop (i+1)
1169 return $! i:ls
1170 loop 0
1171
1172 {-
1173 -- much slower
1174 elemIndices :: Word8 -> ByteString -> [Int]
1175 elemIndices c ps = loop 0 ps
1176 where STRICT2(loop)
1177 loop _ ps' | null ps' = []
1178 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1179 | otherwise = loop (n+1) (unsafeTail ps')
1180 -}
1181
1182 -- | count returns the number of times its argument appears in the ByteString
1183 --
1184 -- > count = length . elemIndices
1185 --
1186 -- But more efficiently than using length on the intermediate list.
1187 count :: Word8 -> ByteString -> Int
1188 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1189 return $ c_count (p `plusPtr` s) (fromIntegral m) w
1190 {-# INLINE count #-}
1191
1192 {-
1193 --
1194 -- around 30% slower
1195 --
1196 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1197 go (p `plusPtr` s) (fromIntegral m) 0
1198 where
1199 go :: Ptr Word8 -> CSize -> Int -> IO Int
1200 STRICT3(go)
1201 go p l i = do
1202 let q = memchr p w l
1203 if q == nullPtr
1204 then return i
1205 else do let k = fromIntegral $ q `minusPtr` p
1206 go (q `plusPtr` 1) (l-k-1) (i+1)
1207 -}
1208
1209 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1210 -- returns the index of the first element in the ByteString
1211 -- satisfying the predicate.
1212 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1213 findIndex = (listToMaybe .) . findIndices
1214
1215 -- | The 'findIndices' function extends 'findIndex', by returning the
1216 -- indices of all elements satisfying the predicate, in ascending order.
1217 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1218 findIndices p ps = loop 0 ps
1219 where
1220 STRICT2(loop)
1221 loop _ qs | null qs = []
1222 loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1223 | otherwise = loop (n+1) (unsafeTail qs)
1224
1225 -- ---------------------------------------------------------------------
1226 -- Searching ByteStrings
1227
1228 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1229 elem :: Word8 -> ByteString -> Bool
1230 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1231 {-# INLINE elem #-}
1232
1233 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1234 notElem :: Word8 -> ByteString -> Bool
1235 notElem c ps = case elemIndex c ps of Nothing -> True ; _ -> False
1236 {-# INLINE notElem #-}
1237
1238 --
1239 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1240 -- case of filtering a single byte. It is more efficient to use
1241 -- /filterByte/ in this case.
1242 --
1243 -- > filterByte == filter . (==)
1244 --
1245 -- filterByte is around 10x faster, and uses much less space, than its
1246 -- filter equivalent
1247 filterByte :: Word8 -> ByteString -> ByteString
1248 filterByte w ps = replicate (count w ps) w
1249
1250 {-
1251 -- slower than the replicate version
1252
1253 filterByte ch ps@(PS x s l)
1254 | null ps = ps
1255 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1256 t <- go (f `plusPtr` s) p l
1257 return (t `minusPtr` p) -- actual length
1258 where
1259 STRICT3(go)
1260 go _ t 0 = return t
1261 go f t e = do w <- peek f
1262 if w == ch
1263 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1264 else go (f `plusPtr` 1) t (e-1)
1265 -}
1266
1267 --
1268 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1269 -- case of filtering a single byte out of a list. It is more efficient
1270 -- to use /filterNotByte/ in this case.
1271 --
1272 -- > filterNotByte == filter . (/=)
1273 --
1274 -- filterNotByte is around 3x faster, and uses much less space, than its
1275 -- filter equivalent
1276 filterNotByte :: Word8 -> ByteString -> ByteString
1277 filterNotByte ch ps@(PS x s l)
1278 | null ps = ps
1279 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1280 t <- go (f `plusPtr` s) p l
1281 return (t `minusPtr` p) -- actual length
1282 where
1283 STRICT3(go)
1284 go _ t 0 = return t
1285 go f t e = do w <- peek f
1286 if w /= ch
1287 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1288 else go (f `plusPtr` 1) t (e-1)
1289
1290 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1291 -- returns a ByteString containing those characters that satisfy the
1292 -- predicate.
1293 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1294 filter k ps@(PS x s l)
1295 | null ps = ps
1296 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1297 t <- go (f `plusPtr` s) p l
1298 return (t `minusPtr` p) -- actual length
1299 where
1300 STRICT3(go)
1301 go _ t 0 = return t
1302 go f t e = do w <- peek f
1303 if k w
1304 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
1305 else go (f `plusPtr` 1) t (e - 1)
1306
1307 -- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
1308
1309 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1310 -- and returns the first element in matching the predicate, or 'Nothing'
1311 -- if there is no such element.
1312 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1313 find p ps = case filter p ps of
1314 q | null q -> Nothing
1315 | otherwise -> Just (unsafeHead q)
1316
1317 -- ---------------------------------------------------------------------
1318 -- Searching for substrings
1319
1320 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1321 -- iff the first is a prefix of the second.
1322 isPrefixOf :: ByteString -> ByteString -> Bool
1323 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1324 | l1 == 0 = True
1325 | l2 < l1 = False
1326 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1327 withForeignPtr x2 $ \p2 -> do
1328 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
1329 return (i == 0)
1330
1331 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1332 -- iff the first is a suffix of the second.
1333 --
1334 -- The following holds:
1335 --
1336 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1337 --
1338 -- However, the real implemenation uses memcmp to compare the end of the
1339 -- string only, with no reverse required..
1340 isSuffixOf :: ByteString -> ByteString -> Bool
1341 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1342 | l1 == 0 = True
1343 | l2 < l1 = False
1344 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1345 withForeignPtr x2 $ \p2 -> do
1346 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
1347 return (i == 0)
1348
1349 -- | Check whether one string is a substring of another. @isSubstringOf
1350 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1351 isSubstringOf :: ByteString -- ^ String to search for.
1352 -> ByteString -- ^ String to search in.
1353 -> Bool
1354 isSubstringOf p s = not $ P.null $ findSubstrings p s
1355
1356 -- | Get the first index of a substring in another string,
1357 -- or 'Nothing' if the string is not found.
1358 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1359 findSubstring :: ByteString -- ^ String to search for.
1360 -> ByteString -- ^ String to seach in.
1361 -> Maybe Int
1362 findSubstring = (listToMaybe .) . findSubstrings
1363
1364 -- | Find the indexes of all (possibly overlapping) occurances of a
1365 -- substring in a string. This function uses the Knuth-Morris-Pratt
1366 -- string matching algorithm.
1367 findSubstrings :: ByteString -- ^ String to search for.
1368 -> ByteString -- ^ String to seach in.
1369 -> [Int]
1370
1371 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1372 where
1373 patc x = pat `unsafeIndex` x
1374 strc x = str `unsafeIndex` x
1375
1376 -- maybe we should make kmpNext a UArray before using it in search?
1377 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1378 kmpNextL p _ | null p = []
1379 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1380 ps = unsafeTail p
1381 x = if not (null ps) && unsafeHead ps == patc j'
1382 then kmpNext Array.! j' else j'
1383 in x:kmpNextL ps j'
1384 search i j = match ++ rest -- i: position in string, j: position in pattern
1385 where match = if j == m then [(i - j)] else []
1386 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1387 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1388 | otherwise = j
1389
1390 -- ---------------------------------------------------------------------
1391 -- Zipping
1392
1393 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1394 -- corresponding pairs of bytes. If one input ByteString is short,
1395 -- excess elements of the longer ByteString are discarded. This is
1396 -- equivalent to a pair of 'unpack' operations.
1397 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1398 zip ps qs
1399 | null ps || null qs = []
1400 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1401
1402 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1403 -- the first argument, instead of a tupling function. For example,
1404 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1405 -- corresponding sums.
1406 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1407 zipWith f ps qs
1408 | null ps || null qs = []
1409 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1410
1411 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1412 -- ByteStrings. Note that this performs two 'pack' operations.
1413 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1414 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1415 {-# INLINE unzip #-}
1416
1417 -- ---------------------------------------------------------------------
1418 -- Special lists
1419
1420 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1421 inits :: ByteString -> [ByteString]
1422 inits (PS x s l) = [PS x s n | n <- [0..l]]
1423
1424 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1425 tails :: ByteString -> [ByteString]
1426 tails p | null p = [empty]
1427 | otherwise = p : tails (unsafeTail p)
1428
1429 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1430
1431 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1432 elems :: ByteString -> [ByteString]
1433 elems (PS _ _ 0) = []
1434 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1435 {-# INLINE elems #-}
1436
1437 -- ---------------------------------------------------------------------
1438 -- ** Ordered 'ByteString's
1439
1440 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1441 sort :: ByteString -> ByteString
1442 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1443
1444 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1445 withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1446
1447 let STRICT2(go)
1448 go 256 _ = return ()
1449 go i ptr = do n <- peekElemOff arr i
1450 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1451 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1452 go 0 p
1453
1454 -- "countEach counts str l" counts the number of occurences of each Word8 in
1455 -- str, and stores the result in counts.
1456 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1457 STRICT3(countEach)
1458 countEach counts str l = go 0
1459 where
1460 STRICT1(go)
1461 go i | i == l = return ()
1462 | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1463 x <- peekElemOff counts k
1464 pokeElemOff counts k (x + 1)
1465 go (i + 1)
1466
1467 {-
1468 sort :: ByteString -> ByteString
1469 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1470 memcpy p (f `plusPtr` s) l
1471 c_qsort p l -- inplace
1472 -}
1473
1474 {-
1475 sort = pack . List.sort . unpack
1476 -}
1477
1478 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1479 --
1480 -- Try some linear sorts: radix, counting
1481 -- Or mergesort.
1482 --
1483 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1484 -- sortBy f ps = undefined
1485
1486 -- ---------------------------------------------------------------------
1487 --
1488 -- Extensions to the basic interface
1489 --
1490
1491 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1492 -- check for the empty case, so there is an obligation on the programmer
1493 -- to provide a proof that the ByteString is non-empty.
1494 unsafeHead :: ByteString -> Word8
1495 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1496 {-# INLINE unsafeHead #-}
1497
1498 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1499 -- check for the empty case. As with 'unsafeHead', the programmer must
1500 -- provide a separate proof that the ByteString is non-empty.
1501 unsafeTail :: ByteString -> ByteString
1502 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1503 {-# INLINE unsafeTail #-}
1504
1505 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1506 -- This omits the bounds check, which means there is an accompanying
1507 -- obligation on the programmer to ensure the bounds are checked in some
1508 -- other way.
1509 unsafeIndex :: ByteString -> Int -> Word8
1510 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1511 {-# INLINE unsafeIndex #-}
1512
1513 -- ---------------------------------------------------------------------
1514 -- Low level constructors
1515
1516 #if defined(__GLASGOW_HASKELL__)
1517 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1518 -- Addr\# (an arbitrary machine address assumed to point outside the
1519 -- garbage-collected heap) into a @ByteString@. A much faster way to
1520 -- create an Addr\# is with an unboxed string literal, than to pack a
1521 -- boxed string. A unboxed string literal is compiled to a static @char
1522 -- []@ by GHC. Establishing the length of the string requires a call to
1523 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1524 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1525 -- if you know the length of the string statically.
1526 --
1527 -- An example:
1528 --
1529 -- > literalFS = packAddress "literal"#
1530 --
1531 packAddress :: Addr# -> ByteString
1532 packAddress addr# = inlinePerformIO $ do
1533 p <- newForeignPtr_ cstr
1534 return $ PS p 0 (fromIntegral $ c_strlen cstr)
1535 where
1536 cstr = Ptr addr#
1537 {-# INLINE packAddress #-}
1538
1539 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1540 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1541 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1542 -- 'Addr\#' to the string, and the length of the string. Make sure the
1543 -- length is correct, otherwise use the safer 'packAddress' (where the
1544 -- length will be calculated once at runtime).
1545 unsafePackAddress :: Int -> Addr# -> ByteString
1546 unsafePackAddress len addr# = inlinePerformIO $ do
1547 p <- newForeignPtr_ cstr
1548 return $ PS p 0 len
1549 where cstr = Ptr addr#
1550
1551 #endif
1552
1553 -- | /O(1)/ Build a ByteString from a ForeignPtr
1554 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1555 fromForeignPtr fp l = PS fp 0 l
1556
1557 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1558 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1559 toForeignPtr (PS ps s l) = (ps, s, l)
1560
1561 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1562 -- current 'ByteString' from any larger string it was created from, as
1563 -- an 'Int'.
1564 skipIndex :: ByteString -> Int
1565 skipIndex (PS _ s _) = s
1566 {-# INLINE skipIndex #-}
1567
1568 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1569 -- finalizer associated to it. The ByteString length is calculated using
1570 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1571 packCString :: CString -> ByteString
1572 packCString cstr = inlinePerformIO $ do
1573 fp <- newForeignPtr_ (castPtr cstr)
1574 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1575
1576 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1577 -- have /no/ finalizer associated with it. This operation has /O(1)/
1578 -- complexity as we already know the final size, so no /strlen(3)/ is
1579 -- required.
1580 packCStringLen :: CStringLen -> ByteString
1581 packCStringLen (ptr,len) = inlinePerformIO $ do
1582 fp <- newForeignPtr_ (castPtr ptr)
1583 return $ PS fp 0 (fromIntegral len)
1584
1585 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1586 -- have a @free(3)@ finalizer associated to it.
1587 packMallocCString :: CString -> ByteString
1588 packMallocCString cstr = inlinePerformIO $ do
1589 fp <- newForeignFreePtr (castPtr cstr)
1590 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1591
1592 #if defined(__GLASGOW_HASKELL__)
1593 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1594 -- length, and an IO action representing a finalizer. This function is
1595 -- not available on Hugs.
1596 --
1597 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1598 packCStringFinalizer p l f = do
1599 fp <- FC.newForeignPtr p f
1600 return $ PS fp 0 l
1601
1602 -- | Explicitly run the finaliser associated with a 'ByteString'.
1603 -- Further references to this value may generate invalid memory
1604 -- references. This operation is unsafe, as there may be other
1605 -- 'ByteStrings' referring to the same underlying pages. If you use
1606 -- this, you need to have a proof of some kind that all 'ByteString's
1607 -- ever generated from the underlying byte array are no longer live.
1608 unsafeFinalize :: ByteString -> IO ()
1609 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1610
1611 #endif
1612
1613 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1614 -- The @CString@ should not be freed afterwards. This is a memcpy(3).
1615 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1616 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1617 where
1618 alloc = withForeignPtr ps $ \p -> do
1619 buf <- c_malloc (fromIntegral l+1)
1620 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1621 poke (buf `plusPtr` l) (0::Word8)
1622 return $ castPtr buf
1623
1624 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1625 -- Warning: modifying the @CString@ will affect the @ByteString@.
1626 -- Why is this function unsafe? It relies on the null byte at the end of
1627 -- the ByteString to be there. This is /not/ the case if your ByteString
1628 -- has been spliced from a larger string (i.e. with take or drop).
1629 -- Unless you can guarantee the null byte, you should use the safe
1630 -- version, which will copy the string first.
1631 --
1632 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1633 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1634
1635 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1636 -- This is mainly useful to allow the rest of the data pointed
1637 -- to by the 'ByteString' to be garbage collected, for example
1638 -- if a large string has been read in, and only a small part of it
1639 -- is needed in the rest of the program.
1640 copy :: ByteString -> ByteString
1641 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
1642
1643 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1644 -- CString is going to be deallocated from C land.
1645 copyCString :: CString -> ByteString
1646 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1647
1648 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1649 copyCStringLen :: CStringLen -> ByteString
1650 copyCStringLen (cstr, len) = inlinePerformIO $ do
1651 fp <- mallocForeignPtrArray (len+1)
1652 withForeignPtr fp $ \p -> do
1653 memcpy p (castPtr cstr) len
1654 poke (p `plusPtr` len) (0 :: Word8)
1655 return $! PS fp 0 len
1656
1657 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1658 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1659 -- This is analogous to unsafeUseAsCString, and comes with the same
1660 -- safety requirements.
1661 --
1662 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1663 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1664
1665 -- | Given the maximum size needed and a function to make the contents
1666 -- of a ByteString, generate makes the 'ByteString'. The generating
1667 -- function is required to return the actual final size (<= the maximum
1668 -- size), and the resulting byte array is realloced to this size. The
1669 -- string is padded at the end with a null byte.
1670 --
1671 -- generate is the main mechanism for creating custom, efficient
1672 -- ByteString functions, using Haskell or C functions to fill the space.
1673 --
1674 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1675 generate i f = do
1676 p <- mallocArray i
1677 i' <- f p
1678 p' <- reallocArray p (i'+1)
1679 poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
1680 fp <- newForeignFreePtr p'
1681 return $ PS fp 0 i'
1682
1683 -- ---------------------------------------------------------------------
1684 -- line IO
1685
1686 #if defined(__GLASGOW_HASKELL__)
1687
1688 -- | getLine, read a line from stdin.
1689 getLine :: IO ByteString
1690 getLine = hGetLine stdin
1691
1692 -- | hGetLine. read a ByteString from a handle
1693 hGetLine :: Handle -> IO ByteString
1694 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1695 case haBufferMode handle_ of
1696 NoBuffering -> error "no buffering"
1697 _other -> hGetLineBuffered handle_
1698
1699 where
1700 hGetLineBuffered handle_ = do
1701 let ref = haBuffer handle_
1702 buf <- readIORef ref
1703 hGetLineBufferedLoop handle_ ref buf 0 []
1704
1705 hGetLineBufferedLoop handle_ ref
1706 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1707 len `seq` do
1708 off <- findEOL r w raw
1709 let new_len = len + off - r
1710 xs <- mkPS raw r off
1711
1712 -- if eol == True, then off is the offset of the '\n'
1713 -- otherwise off == w and the buffer is now empty.
1714 if off /= w
1715 then do if (w == off + 1)
1716 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1717 else writeIORef ref buf{ bufRPtr = off + 1 }
1718 mkBigPS new_len (xs:xss)
1719 else do
1720 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1721 buf{ bufWPtr=0, bufRPtr=0 }
1722 case maybe_buf of
1723 -- Nothing indicates we caught an EOF, and we may have a
1724 -- partial line to return.
1725 Nothing -> do
1726 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1727 if new_len > 0
1728 then mkBigPS new_len (xs:xss)
1729 else ioe_EOF
1730 Just new_buf ->
1731 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1732
1733 -- find the end-of-line character, if there is one
1734 findEOL r w raw
1735 | r == w = return w
1736 | otherwise = do
1737 (c,r') <- readCharFromBuffer raw r
1738 if c == '\n'
1739 then return r -- NB. not r': don't include the '\n'
1740 else findEOL r' w raw
1741
1742 maybeFillReadBuffer fd is_line is_stream buf = catch
1743 (do buf' <- fillReadBuffer fd is_line is_stream buf
1744 return (Just buf'))
1745 (\e -> if isEOFError e then return Nothing else ioError e)
1746
1747 -- TODO, rewrite to use normal memcpy
1748 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1749 mkPS buf start end = do
1750 let len = end - start
1751 fp <- mallocByteString len
1752 withForeignPtr fp $ \p -> do
1753 memcpy_ptr_baoff p buf start (fromIntegral len)
1754 return (PS fp 0 len)
1755
1756 mkBigPS :: Int -> [ByteString] -> IO ByteString
1757 mkBigPS _ [ps] = return ps
1758 mkBigPS _ pss = return $! concat (P.reverse pss)
1759
1760 #endif
1761
1762 -- ---------------------------------------------------------------------
1763 -- Block IO
1764
1765 -- | Outputs a 'ByteString' to the specified 'Handle'.
1766 hPut :: Handle -> ByteString -> IO ()
1767 hPut _ (PS _ _ 0) = return ()
1768 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1769 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1770
1771 -- | Write a ByteString to stdout
1772 putStr :: ByteString -> IO ()
1773 putStr = hPut stdout
1774
1775 -- | Write a ByteString to stdout, appending a newline byte
1776 putStrLn :: ByteString -> IO ()
1777 putStrLn ps = hPut stdout ps >> hPut stdout nl
1778 where nl = packByte 0x0a
1779
1780 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1781 -- is far more efficient than reading the characters into a 'String'
1782 -- and then using 'pack'.
1783 hGet :: Handle -> Int -> IO ByteString
1784 hGet _ 0 = return empty
1785 hGet h i = do fp <- mallocByteString i
1786 l <- withForeignPtr fp $ \p-> hGetBuf h p i
1787 return $ PS fp 0 l
1788
1789 #if defined(__GLASGOW_HASKELL__)
1790 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1791 -- waiting for data to become available, instead it returns only whatever data
1792 -- is available.
1793 hGetNonBlocking :: Handle -> Int -> IO ByteString
1794 hGetNonBlocking _ 0 = return empty
1795 hGetNonBlocking h i = do
1796 fp <- mallocByteString i
1797 l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1798 return $ PS fp 0 l
1799 #endif
1800
1801 -- | Read entire handle contents into a 'ByteString'.
1802 --
1803 -- As with 'hGet', the string representation in the file is assumed to
1804 -- be ISO-8859-1.
1805 --
1806 hGetContents :: Handle -> IO ByteString
1807 hGetContents h = do
1808 let start_size = 1024
1809 p <- mallocArray start_size
1810 i <- hGetBuf h p start_size
1811 if i < start_size
1812 then do p' <- reallocArray p i
1813 fp <- newForeignFreePtr p'
1814 return $ PS fp 0 i
1815 else f p start_size
1816 where
1817 f p s = do
1818 let s' = 2 * s
1819 p' <- reallocArray p s'
1820 i <- hGetBuf h (p' `plusPtr` s) s
1821 if i < s
1822 then do let i' = s + i
1823 p'' <- reallocArray p' i'
1824 fp <- newForeignFreePtr p''
1825 return $ PS fp 0 i'
1826 else f p' s'
1827
1828 -- | getContents. Equivalent to hGetContents stdin
1829 getContents :: IO ByteString
1830 getContents = hGetContents stdin
1831
1832 -- | Read an entire file directly into a 'ByteString'. This is far more
1833 -- efficient than reading the characters into a 'String' and then using
1834 -- 'pack'. It also may be more efficient than opening the file and
1835 -- reading it using hGet.
1836 readFile :: FilePath -> IO ByteString
1837 readFile f = do
1838 h <- openBinaryFile f ReadMode
1839 l <- hFileSize h
1840 s <- hGet h $ fromIntegral l
1841 hClose h
1842 return s
1843
1844 -- | Write a 'ByteString' to a file.
1845 writeFile :: FilePath -> ByteString -> IO ()
1846 writeFile f ps = do
1847 h <- openBinaryFile f WriteMode
1848 hPut h ps
1849 hClose h
1850
1851 {-
1852 --
1853 -- Disable until we can move it into a portable .hsc file
1854 --
1855
1856 -- | Like readFile, this reads an entire file directly into a
1857 -- 'ByteString', but it is even more efficient. It involves directly
1858 -- mapping the file to memory. This has the advantage that the contents
1859 -- of the file never need to be copied. Also, under memory pressure the
1860 -- page may simply be discarded, while in the case of readFile it would
1861 -- need to be written to swap. If you read many small files, mmapFile
1862 -- will be less memory-efficient than readFile, since each mmapFile
1863 -- takes up a separate page of memory. Also, you can run into bus
1864 -- errors if the file is modified. As with 'readFile', the string
1865 -- representation in the file is assumed to be ISO-8859-1.
1866 --
1867 -- On systems without mmap, this is the same as a readFile.
1868 --
1869 mmapFile :: FilePath -> IO ByteString
1870 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1871
1872 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1873 mmap f = do
1874 h <- openBinaryFile f ReadMode
1875 l <- fromIntegral `fmap` hFileSize h
1876 -- Don't bother mmaping small files because each mmapped file takes up
1877 -- at least one full VM block.
1878 if l < mmap_limit
1879 then do thefp <- mallocByteString l
1880 withForeignPtr thefp $ \p-> hGetBuf h p l
1881 hClose h
1882 return (thefp, l)
1883 else do
1884 -- unix only :(
1885 fd <- fromIntegral `fmap` handleToFd h
1886 p <- my_mmap l fd
1887 fp <- if p == nullPtr
1888 then do thefp <- mallocByteString l
1889 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1890 return thefp
1891 else do
1892 -- The munmap leads to crashes on OpenBSD.
1893 -- maybe there's a use after unmap in there somewhere?
1894 #if !defined(__OpenBSD__)
1895 let unmap = c_munmap p l >> return ()
1896 #else
1897 let unmap = return ()
1898 #endif
1899 fp <- FC.newForeignPtr p unmap
1900 return fp
1901 c_close fd
1902 hClose h
1903 return (fp, l)
1904 where mmap_limit = 16*1024
1905 -}
1906
1907 #if defined(__GLASGOW_HASKELL__)
1908 --
1909 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1910 --
1911 getArgs :: IO [ByteString]
1912 getArgs =
1913 alloca $ \ p_argc ->
1914 alloca $ \ p_argv -> do
1915 getProgArgv p_argc p_argv
1916 p <- fromIntegral `fmap` peek p_argc
1917 argv <- peek p_argv
1918 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1919 #endif
1920
1921 -- ---------------------------------------------------------------------
1922 -- Internal utilities
1923
1924 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1925 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1926 -- convenience for ByteString construction.
1927 w2c :: Word8 -> Char
1928 #if !defined(__GLASGOW_HASKELL__)
1929 w2c = chr . fromIntegral
1930 #else
1931 w2c = unsafeChr . fromIntegral
1932 #endif
1933 {-# INLINE w2c #-}
1934
1935 c2w :: Char -> Word8
1936 c2w = fromIntegral . ord
1937 {-# INLINE c2w #-}
1938
1939 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1940 -- is padded with a null byte.
1941 mallocByteString :: Int -> IO (ForeignPtr Word8)
1942 mallocByteString l = do
1943 fp <- mallocForeignPtrArray (l+1)
1944 withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1945 return fp
1946
1947 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1948 -- argument gives the final size of the ByteString. Unlike 'generate'
1949 -- the ByteString is not reallocated if the final size is less than the
1950 -- estimated size. Also, unlike 'generate' ByteString's created this way
1951 -- are managed on the Haskell heap.
1952 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
1953 create l write_ptr = inlinePerformIO $ do
1954 fp <- mallocByteString (l+1)
1955 withForeignPtr fp $ \p -> write_ptr p
1956 return $ PS fp 0 l
1957 {-# INLINE create #-}
1958
1959 -- | Perform an operation with a temporary ByteString
1960 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1961 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1962 {-# INLINE withPtr #-}
1963
1964 -- Common up near identical calls to `error' to reduce the number
1965 -- constant strings created when compiled:
1966 errorEmptyList :: String -> a
1967 errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
1968 {-# INLINE errorEmptyList #-}
1969
1970 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
1971 -- of the string if no element is found, rather than Nothing.
1972 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1973 STRICT2(findIndexOrEnd)
1974 findIndexOrEnd f ps
1975 | null ps = 0
1976 | f (unsafeHead ps) = 0
1977 | otherwise = 1 + findIndexOrEnd f (unsafeTail ps)
1978 {-# INLINE findIndexOrEnd #-}
1979
1980 -- Find from the end of the string using predicate
1981 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
1982 STRICT2(findFromEndUntil)
1983 findFromEndUntil f ps@(PS x s l) =
1984 if null ps then 0
1985 else if f (last ps) then l
1986 else findFromEndUntil f (PS x s (l-1))
1987
1988 -- Just like inlinePerformIO, but we inline it. Big performance gains as
1989 -- it exposes lots of things to further inlining
1990 --
1991 {-# INLINE inlinePerformIO #-}
1992 inlinePerformIO :: IO a -> a
1993 #if defined(__GLASGOW_HASKELL__)
1994 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
1995 #else
1996 inlinePerformIO = unsafePerformIO
1997 #endif
1998
1999 {-# INLINE newForeignFreePtr #-}
2000 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2001 #if defined(__GLASGOW_HASKELL__)
2002 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2003 #else
2004 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2005 #endif
2006
2007 -- ---------------------------------------------------------------------
2008 --
2009 -- Standard C functions
2010 --
2011
2012 foreign import ccall unsafe "string.h strlen" c_strlen
2013 :: CString -> CInt
2014
2015 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2016 :: CInt -> IO (Ptr Word8)
2017
2018 foreign import ccall unsafe "static stdlib.h free" c_free
2019 :: Ptr Word8 -> IO ()
2020
2021 #if !defined(__GLASGOW_HASKELL__)
2022 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2023 :: FunPtr (Ptr Word8 -> IO ())
2024 #endif
2025
2026 foreign import ccall unsafe "string.h memset" memset
2027 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2028
2029 foreign import ccall unsafe "string.h memchr" memchr
2030 :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2031
2032 foreign import ccall unsafe "string.h memcmp" memcmp
2033 :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
2034
2035 foreign import ccall unsafe "string.h memcpy" memcpy
2036 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
2037
2038 -- ---------------------------------------------------------------------
2039 --
2040 -- Uses our C code
2041 --
2042
2043 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2044 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
2045
2046 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2047 :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
2048
2049 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2050 :: Ptr Word8 -> Int -> Word8
2051
2052 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2053 :: Ptr Word8 -> Int -> Word8
2054
2055 foreign import ccall unsafe "static fpstring.h count" c_count
2056 :: Ptr Word8 -> Int -> Word8 -> Int
2057
2058 -- ---------------------------------------------------------------------
2059 -- MMap
2060
2061 {-
2062 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2063 :: Int -> Int -> IO (Ptr Word8)
2064
2065 foreign import ccall unsafe "static unistd.h close" c_close
2066 :: Int -> IO Int
2067
2068 # if !defined(__OpenBSD__)
2069 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2070 :: Ptr Word8 -> Int -> IO Int
2071 # endif
2072 -}
2073
2074 -- ---------------------------------------------------------------------
2075 -- Internal GHC Haskell magic
2076
2077 #if defined(__GLASGOW_HASKELL__)
2078 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2079 getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2080
2081 foreign import ccall unsafe "__hscore_memcpy_src_off"
2082 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())
2083 #endif