6eb861a881733ce4729103e3cb16833ee0d37a55
[packages/old-time.git] / Data / ByteString.hs
1 {-# OPTIONS_GHC -cpp -fffi #-}
2 --
3 -- Module : ByteString
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
9 -- License : BSD-style
10 --
11 -- Maintainer : dons@cse.unsw.edu.au
12 -- Stability : experimental
13 -- Portability : portable, requires ffi and cpp
14 -- Tested with : GHC 6.4.1 and Hugs March 2005
15 --
16
17 --
18 -- | A time and space-efficient implementation of byte vectors using
19 -- packed Word8 arrays, suitable for high performance use, both in terms
20 -- of large data quantities, or high speed requirements. Byte vectors
21 -- are encoded as Word8 arrays of bytes, held in a ForeignPtr, and can
22 -- be passed between C and Haskell with little effort.
23 --
24 -- This module is intended to be imported @qualified@, to avoid name
25 -- clashes with Prelude functions. eg.
26 --
27 -- > import qualified Data.ByteString as B
28 --
29 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
30 -- UArray by Simon Marlow. Rewritten to support slices and use
31 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
32 --
33
34 module Data.ByteString (
35
36 -- * The @ByteString@ type
37 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
38
39 -- * Introducing and eliminating 'ByteString's
40 empty, -- :: ByteString
41 packByte, -- :: Word8 -> ByteString
42 pack, -- :: [Word8] -> ByteString
43 unpack, -- :: ByteString -> [Word8]
44 packWith, -- :: (a -> Word8) -> [a] -> ByteString
45 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
46
47 -- * Basic interface
48 cons, -- :: Word8 -> ByteString -> ByteString
49 snoc, -- :: Word8 -> ByteString -> ByteString
50 null, -- :: ByteString -> Bool
51 length, -- :: ByteString -> Int
52 head, -- :: ByteString -> Word8
53 tail, -- :: ByteString -> ByteString
54 last, -- :: ByteString -> Word8
55 init, -- :: ByteString -> ByteString
56 append, -- :: ByteString -> ByteString -> ByteString
57
58 -- * Special ByteStrings
59 inits, -- :: ByteString -> [ByteString]
60 tails, -- :: ByteString -> [ByteString]
61 elems, -- :: ByteString -> [ByteString]
62
63 -- * Transformating ByteStrings
64 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
65 reverse, -- :: ByteString -> ByteString
66 intersperse, -- :: Word8 -> ByteString -> ByteString
67 transpose, -- :: [ByteString] -> [ByteString]
68
69 -- * Reducing 'ByteString's
70 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
71 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
72 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
73 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
74
75 -- ** Special folds
76 concat, -- :: [ByteString] -> ByteString
77 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
78 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
79 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
80 maximum, -- :: ByteString -> Word8
81 minimum, -- :: ByteString -> Word8
82 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
83
84 -- * Generating and unfolding ByteStrings
85 replicate, -- :: Int -> Word8 -> ByteString
86 unfoldrN, -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
87
88 -- * Substrings
89
90 -- ** Breaking strings
91 take, -- :: Int -> ByteString -> ByteString
92 drop, -- :: Int -> ByteString -> ByteString
93 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
94 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
95 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
96 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
97 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
98 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
99
100 -- ** Breaking and dropping on specific bytes
101 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
102 breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
103 breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
104
105 -- ** Breaking into many substrings
106 split, -- :: Word8 -> ByteString -> [ByteString]
107 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
108 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
109
110 -- ** Joining strings
111 join, -- :: ByteString -> [ByteString] -> ByteString
112 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
113
114 -- * Indexing ByteStrings
115 index, -- :: ByteString -> Int -> Word8
116 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
117 elemIndices, -- :: Word8 -> ByteString -> [Int]
118 elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int
119 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
120 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
121 count, -- :: Word8 -> ByteString -> Int
122
123 -- * Ordered ByteStrings
124 sort, -- :: ByteString -> ByteString
125
126 -- * Searching ByteStrings
127
128 -- ** Searching by equality
129 -- | These functions use memchr(3) to efficiently search the ByteString
130
131 elem, -- :: Word8 -> ByteString -> Bool
132 notElem, -- :: Word8 -> ByteString -> Bool
133 filterByte, -- :: Word8 -> ByteString -> ByteString
134 filterNotByte, -- :: Word8 -> ByteString -> ByteString
135
136 -- ** Searching with a predicate
137 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
138 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
139
140 -- ** Prefixes and suffixes
141 -- | These functions use memcmp(3) to efficiently compare substrings
142 isPrefixOf, -- :: ByteString -> ByteString -> Bool
143 isSuffixOf, -- :: ByteString -> ByteString -> Bool
144
145 -- ** Search for arbitrary substrings
146 isSubstringOf, -- :: ByteString -> ByteString -> Bool
147 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
148 findSubstrings, -- :: ByteString -> ByteString -> [Int]
149
150 -- * Zipping and unzipping ByteStrings
151 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
152 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
153 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
154
155 -- * Unchecked access
156 unsafeHead, -- :: ByteString -> Word8
157 unsafeTail, -- :: ByteString -> ByteString
158 unsafeIndex, -- :: ByteString -> Int -> Word8
159
160 -- * Low level introduction and elimination
161 generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
162 create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
163 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
164 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
165 skipIndex, -- :: ByteString -> Int
166
167 -- ** Packing CStrings and pointers
168 packCString, -- :: CString -> ByteString
169 packCStringLen, -- :: CString -> ByteString
170 packMallocCString, -- :: CString -> ByteString
171
172 #if defined(__GLASGOW_HASKELL__)
173 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
174 packAddress, -- :: Addr# -> ByteString
175 unsafePackAddress, -- :: Int -> Addr# -> ByteString
176 unsafeFinalize, -- :: ByteString -> IO ()
177 #endif
178
179 -- ** Using ByteStrings as CStrings
180 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
181 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
182 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
183
184 -- ** Copying ByteStrings
185 -- | These functions perform memcpy(3) operations
186 copy, -- :: ByteString -> ByteString
187 copyCString, -- :: CString -> ByteString
188 copyCStringLen, -- :: CStringLen -> ByteString
189
190 -- * I\/O with @ByteString@s
191
192 -- ** Standard input and output
193
194 #if defined(__GLASGOW_HASKELL__)
195 getLine, -- :: IO ByteString
196 #endif
197 getContents, -- :: IO ByteString
198 putStr, -- :: ByteString -> IO ()
199 putStrLn, -- :: ByteString -> IO ()
200
201 -- ** Files
202 readFile, -- :: FilePath -> IO ByteString
203 writeFile, -- :: FilePath -> ByteString -> IO ()
204 -- mmapFile, -- :: FilePath -> IO ByteString
205
206 -- ** I\/O with Handles
207 #if defined(__GLASGOW_HASKELL__)
208 getArgs, -- :: IO [ByteString]
209 hGetLine, -- :: Handle -> IO ByteString
210 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
211 #endif
212 hGetContents, -- :: Handle -> IO ByteString
213 hGet, -- :: Handle -> Int -> IO ByteString
214 hPut, -- :: Handle -> ByteString -> IO ()
215
216 #if defined(__GLASGOW_HASKELL__)
217 -- * Miscellaneous
218 unpackList, -- eek, otherwise it gets thrown away by the simplifier
219 #endif
220
221 ) where
222
223 import qualified Prelude as P
224 import Prelude hiding (reverse,head,tail,last,init,null
225 ,length,map,lines,foldl,foldr,unlines
226 ,concat,any,take,drop,splitAt,takeWhile
227 ,dropWhile,span,break,elem,filter,maximum
228 ,minimum,all,concatMap,foldl1,foldr1
229 ,readFile,writeFile,replicate
230 ,getContents,getLine,putStr,putStrLn
231 ,zip,zipWith,unzip,notElem)
232
233 import qualified Data.List as List
234
235 import Data.Char
236 import Data.Word (Word8)
237 import Data.Maybe (listToMaybe)
238 import Data.Array (listArray)
239 import qualified Data.Array as Array ((!))
240
241 -- Control.Exception.bracket not available in yhc or nhc
242 import Control.Exception (bracket)
243
244 import Foreign.C.String (CString, CStringLen)
245 import Foreign.C.Types (CSize, CInt)
246 import Foreign.ForeignPtr
247 import Foreign.Marshal.Array
248 import Foreign.Ptr
249 import Foreign.Storable (Storable(..))
250
251 -- hGetBuf and hPutBuf not available in yhc or nhc
252 import System.IO (stdin,stdout,hClose,hFileSize
253 ,hGetBuf,hPutBuf,openBinaryFile
254 ,Handle,IOMode(..))
255
256 #if !defined(__GLASGOW_HASKELL__)
257 import System.IO.Unsafe
258 #endif
259
260 #if defined(__GLASGOW_HASKELL__)
261
262 import Data.Generics (Data(..), Typeable(..))
263
264 import System.IO (hGetBufNonBlocking)
265 import System.IO.Error (isEOFError)
266
267 import Foreign.Marshal (alloca)
268 import qualified Foreign.Concurrent as FC (newForeignPtr)
269
270 import GHC.Handle
271 import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
272 import GHC.Base (build, unsafeChr)
273 import GHC.Word hiding (Word8)
274 import GHC.Ptr (Ptr(..))
275 import GHC.ST (ST(..))
276 import GHC.IOBase
277
278 #endif
279
280 -- CFILES stuff is Hugs only
281 {-# CFILES cbits/fpstring.c #-}
282
283 -- -----------------------------------------------------------------------------
284 --
285 -- Useful macros, until we have bang patterns
286 --
287
288 #define STRICT1(f) f a | a `seq` False = undefined
289 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
290 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
291 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
292 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
293
294 -- -----------------------------------------------------------------------------
295
296 -- | A space-efficient representation of a Word8 vector, supporting many
297 -- efficient operations. A 'ByteString' contains 8-bit characters only.
298 --
299 -- Instances of Eq, Ord, Read, Show, Data, Typeable
300 --
301 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
302 {-# UNPACK #-} !Int
303 {-# UNPACK #-} !Int
304
305 #if defined(__GLASGOW_HASKELL__)
306 deriving (Data, Typeable)
307 #endif
308
309 instance Eq ByteString
310 where (==) = eq
311
312 instance Ord ByteString
313 where compare = compareBytes
314
315 instance Show ByteString where
316 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
317
318 instance Read ByteString where
319 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
320
321 {-
322 instance Arbitrary PackedString where
323 arbitrary = P.pack `fmap` arbitrary
324 coarbitrary s = coarbitrary (P.unpack s)
325 -}
326
327 -- | /O(n)/ Equality on the 'ByteString' type.
328 eq :: ByteString -> ByteString -> Bool
329 eq a b = (compareBytes a b) == EQ
330 {-# INLINE eq #-}
331
332 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
333 compareBytes :: ByteString -> ByteString -> Ordering
334 compareBytes (PS _ _ 0) (PS _ _ 0) = EQ -- short cut for empty strings
335 compareBytes (PS x1 s1 l1) (PS x2 s2 l2) = inlinePerformIO $
336 withForeignPtr x1 $ \p1 ->
337 withForeignPtr x2 $ \p2 -> do
338 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
339 return $ case i `compare` 0 of
340 EQ -> l1 `compare` l2
341 x -> x
342 {-# INLINE compareBytes #-}
343
344 {-
345 --
346 -- About 4x slower over 32M
347 --
348 compareBytes :: ByteString -> ByteString -> Ordering
349 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
350 withForeignPtr fp1 $ \p1 ->
351 withForeignPtr fp2 $ \p2 ->
352 cmp (p1 `plusPtr` off1)
353 (p2 `plusPtr` off2) 0 len1 len2
354
355 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
356 STRICT5(cmp)
357 cmp p1 p2 n len1 len2
358 | n == len1 = if n == len2 then return EQ else return LT
359 | n == len2 = return GT
360 | otherwise = do
361 (a :: Word8) <- peekByteOff p1 n
362 (b :: Word8) <- peekByteOff p2 n
363 case a `compare` b of
364 EQ -> cmp p1 p2 (n+1) len1 len2
365 LT -> return LT
366 GT -> return GT
367 {-# INLINE compareBytes #-}
368 -}
369
370 -- -----------------------------------------------------------------------------
371 -- Introducing and eliminating 'ByteString's
372
373 -- | /O(1)/ The empty 'ByteString'
374 empty :: ByteString
375 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
376 {-# NOINLINE empty #-}
377
378 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
379 packByte :: Word8 -> ByteString
380 packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do
381 withForeignPtr fp $ \p -> poke p c
382 return $ PS fp 0 1
383 {-# NOINLINE packByte #-}
384
385 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
386 --
387 -- For applications with large numbers of string literals, pack can be a
388 -- bottleneck. In such cases, consider using packAddress (GHC only).
389 pack :: [Word8] -> ByteString
390
391 #if !defined(__GLASGOW_HASKELL__)
392
393 pack str = create (P.length str) $ \p -> go p str
394 where
395 go _ [] = return ()
396 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
397
398 #else /* hack away */
399
400 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
401 where
402 go _ _ [] = return ()
403 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
404
405 writeByte p i c = ST $ \s# ->
406 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
407
408 #endif
409
410 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
411 unpack :: ByteString -> [Word8]
412
413 #if !defined(__GLASGOW_HASKELL__)
414
415 unpack (PS _ _ 0) = []
416 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
417 go (p `plusPtr` s) (l - 1) []
418 where
419 STRICT3(go)
420 go p 0 acc = peek p >>= \e -> return (e : acc)
421 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
422 {-# INLINE unpack #-}
423
424 #else
425
426 unpack ps = build (unpackFoldr ps)
427 {-# INLINE unpack #-}
428
429 unpackList :: ByteString -> [Word8]
430 unpackList (PS fp off len) = withPtr fp $ \p -> do
431 let STRICT3(loop)
432 loop _ (-1) acc = return acc
433 loop q n acc = do
434 a <- peekByteOff q n
435 loop q (n-1) (a : acc)
436 loop (p `plusPtr` off) (len-1) []
437
438 {-# RULES
439 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
440 #-}
441
442 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
443 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
444 let STRICT3(loop)
445 loop _ (-1) acc = return acc
446 loop q n acc = do
447 a <- peekByteOff q n
448 loop q (n-1) (a `f` acc)
449 loop (p `plusPtr` off) (len-1) ch
450 {-# INLINE [0] unpackFoldr #-}
451
452 #endif
453
454 ------------------------------------------------------------------------
455
456 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
457 -- conversion function
458 packWith :: (a -> Word8) -> [a] -> ByteString
459 packWith k str = create (P.length str) $ \p -> go p str
460 where
461 STRICT2(go)
462 go _ [] = return ()
463 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
464 {-# INLINE packWith #-}
465 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
466
467 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
468 unpackWith :: (Word8 -> a) -> ByteString -> [a]
469 unpackWith _ (PS _ _ 0) = []
470 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
471 go (p `plusPtr` s) (l - 1) []
472 where
473 STRICT3(go)
474 go p 0 acc = peek p >>= \e -> return (k e : acc)
475 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
476 {-# INLINE unpackWith #-}
477 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
478
479 -- ---------------------------------------------------------------------
480 -- Basic interface
481
482 -- | /O(1)/ Test whether a ByteString is empty.
483 null :: ByteString -> Bool
484 null (PS _ _ l) = l == 0
485 {-# INLINE null #-}
486
487 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
488 length :: ByteString -> Int
489 length (PS _ _ l) = l
490 {-# INLINE length #-}
491
492 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
493 -- complexity, as it requires a memcpy.
494 cons :: Word8 -> ByteString -> ByteString
495 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
496 memcpy (p `plusPtr` 1) (f `plusPtr` s) l
497 poke p c
498 {-# INLINE cons #-}
499
500 -- | /O(n)/ Append a byte to the end of a 'ByteString'
501 snoc :: ByteString -> Word8 -> ByteString
502 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
503 memcpy p (f `plusPtr` s) l
504 poke (p `plusPtr` l) c
505 {-# INLINE snoc #-}
506
507 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
508 head :: ByteString -> Word8
509 head ps@(PS x s _)
510 | null ps = errorEmptyList "head"
511 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
512 {-# INLINE head #-}
513
514 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
515 tail :: ByteString -> ByteString
516 tail (PS p s l)
517 | l <= 0 = errorEmptyList "tail"
518 | otherwise = PS p (s+1) (l-1)
519 {-# INLINE tail #-}
520
521 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
522 last :: ByteString -> Word8
523 last ps@(PS x s l)
524 | null ps = errorEmptyList "last"
525 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
526 {-# INLINE last #-}
527
528 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
529 init :: ByteString -> ByteString
530 init (PS p s l)
531 | l <= 0 = errorEmptyList "init"
532 | otherwise = PS p s (l-1)
533 {-# INLINE init #-}
534
535 -- | /O(n)/ Append two ByteStrings
536 append :: ByteString -> ByteString -> ByteString
537 append xs ys | null xs = ys
538 | null ys = xs
539 | otherwise = concat [xs,ys]
540 {-# INLINE append #-}
541
542 {-
543 --
544 -- About 30% faster, but allocating in a big chunk isn't good for memory use
545 --
546 append :: ByteString -> ByteString -> ByteString
547 append xs@(PS ffp s l) ys@(PS fgp t m)
548 | null xs = ys
549 | null ys = xs
550 | otherwise = create len $ \ptr ->
551 withForeignPtr ffp $ \fp ->
552 withForeignPtr fgp $ \gp -> do
553 memcpy ptr (fp `plusPtr` s) l
554 memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m
555 where len = length xs + length ys
556 -}
557
558 -- ---------------------------------------------------------------------
559 -- Transformations
560
561 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
562 -- element of @xs@
563 --
564 map :: (Word8 -> Word8) -> ByteString -> ByteString
565 map f (PS fp start len) = inlinePerformIO $ withForeignPtr fp $ \p -> do
566 new_fp <- mallocByteString len
567 withForeignPtr new_fp $ \new_p -> do
568 map_ f (len-1) (p `plusPtr` start) new_p
569 return (PS new_fp 0 len)
570 {-# INLINE map #-}
571
572 map_ :: (Word8 -> Word8) -> Int -> Ptr Word8 -> Ptr Word8 -> IO ()
573 STRICT4(map_)
574 map_ f n p1 p2
575 | n < 0 = return ()
576 | otherwise = do
577 x <- peekByteOff p1 n
578 pokeByteOff p2 n (f x)
579 map_ f (n-1) p1 p2
580 {-# INLINE map_ #-}
581
582 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
583 reverse :: ByteString -> ByteString
584 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
585 c_reverse p (f `plusPtr` s) l
586
587 {-
588 reverse = pack . P.reverse . unpack
589 -}
590
591 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
592 -- 'ByteString' and \`intersperses\' that byte between the elements of
593 -- the 'ByteString'. It is analogous to the intersperse function on
594 -- Lists.
595 intersperse :: Word8 -> ByteString -> ByteString
596 intersperse c ps@(PS x s l)
597 | length ps < 2 = ps
598 | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
599 c_intersperse p (f `plusPtr` s) l c
600
601 {-
602 intersperse c = pack . List.intersperse c . unpack
603 -}
604
605 -- | The 'transpose' function transposes the rows and columns of its
606 -- 'ByteString' argument.
607 transpose :: [ByteString] -> [ByteString]
608 transpose ps = P.map pack (List.transpose (P.map unpack ps))
609
610 -- ---------------------------------------------------------------------
611 -- Reducing 'ByteString's
612
613 -- | 'foldl', applied to a binary operator, a starting value (typically
614 -- the left-identity of the operator), and a ByteString, reduces the
615 -- ByteString using the binary operator, from left to right.
616 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
617 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
618 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
619 where
620 STRICT3(lgo)
621 lgo z p q | p == q = return z
622 | otherwise = do c <- peek p
623 lgo (f z c) (p `plusPtr` 1) q
624
625 -- | 'foldr', applied to a binary operator, a starting value
626 -- (typically the right-identity of the operator), and a ByteString,
627 -- reduces the ByteString using the binary operator, from right to left.
628 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
629 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
630 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
631 where
632 STRICT2(go)
633 go p q | p == q = return z
634 | otherwise = do c <- peek p
635 ws <- go (p `plusPtr` 1) q
636 return $ c `k` ws
637
638 -- | 'foldl1' is a variant of 'foldl' that has no starting value
639 -- argument, and thus must be applied to non-empty 'ByteStrings'.
640 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
641 foldl1 f ps
642 | null ps = errorEmptyList "foldl1"
643 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
644
645 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
646 -- and thus must be applied to non-empty 'ByteString's
647 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
648 foldr1 f ps
649 | null ps = errorEmptyList "foldr1"
650 | otherwise = f (unsafeHead ps) (foldr1 f (unsafeTail ps))
651
652 -- ---------------------------------------------------------------------
653 -- Special folds
654
655 -- | /O(n)/ Concatenate a list of ByteStrings.
656 concat :: [ByteString] -> ByteString
657 concat [] = empty
658 concat [ps] = ps
659 concat xs = inlinePerformIO $ do
660 let start_size = 1024
661 p <- mallocArray start_size
662 f p 0 1024 xs
663
664 where f ptr len _ [] = do
665 ptr' <- reallocArray ptr (len+1)
666 poke (ptr' `plusPtr` len) (0::Word8) -- XXX so CStrings work
667 fp <- newForeignFreePtr ptr'
668 return $ PS fp 0 len
669
670 f ptr len to_go pss@(PS p s l:pss')
671 | l <= to_go = do withForeignPtr p $ \pf ->
672 memcpy (ptr `plusPtr` len)
673 (pf `plusPtr` s) l
674 f ptr (len + l) (to_go - l) pss'
675
676 | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
677 ptr' <- reallocArray ptr new_total
678 f ptr' len (new_total - len) pss
679
680 -- | Map a function over a 'ByteString' and concatenate the results
681 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
682 concatMap f = foldr (append . f) empty
683
684 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
685 -- any element of the 'ByteString' satisfies the predicate.
686 any :: (Word8 -> Bool) -> ByteString -> Bool
687 any _ (PS _ _ 0) = False
688 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
689 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
690 where
691 STRICT2(go)
692 go p q | p == q = return False
693 | otherwise = do c <- peek p
694 if f c then return True
695 else go (p `plusPtr` 1) q
696
697 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
698 -- if all elements of the 'ByteString' satisfy the predicate.
699 all :: (Word8 -> Bool) -> ByteString -> Bool
700 all _ (PS _ _ 0) = True
701 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
702 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
703 where
704 STRICT2(go)
705 go p q | p == q = return True -- end of list
706 | otherwise = do c <- peek p
707 if f c
708 then go (p `plusPtr` 1) q
709 else return False
710
711 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
712 maximum :: ByteString -> Word8
713 maximum xs@(PS x s l)
714 | null xs = errorEmptyList "maximum"
715 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
716 return $ c_maximum (p `plusPtr` s) l
717 {-# INLINE maximum #-}
718
719 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
720 minimum :: ByteString -> Word8
721 minimum xs@(PS x s l)
722 | null xs = errorEmptyList "minimum"
723 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
724 return $ c_minimum (p `plusPtr` s) l
725 {-# INLINE minimum #-}
726
727 {-
728 maximum xs@(PS x s l)
729 | null xs = errorEmptyList "maximum"
730 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
731 w <- peek p
732 maximum_ (p `plusPtr` s) 0 l w
733
734 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
735 STRICT4(maximum_)
736 maximum_ ptr n m c
737 | n >= m = return c
738 | otherwise = do w <- peekByteOff ptr n
739 maximum_ ptr (n+1) m (if w > c then w else c)
740
741 minimum xs@(PS x s l)
742 | null xs = errorEmptyList "minimum"
743 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
744 w <- peek p
745 minimum_ (p `plusPtr` s) 0 l w
746
747 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
748 STRICT4(minimum_)
749 minimum_ ptr n m c
750 | n >= m = return c
751 | otherwise = do w <- peekByteOff ptr n
752 minimum_ ptr (n+1) m (if w < c then w else c)
753 -}
754
755 -- | /O(n)/ map Word8 functions, provided with the index at each position
756 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
757 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
758 go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
759 where
760 go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
761 STRICT4(go)
762 go n f t p | f == p = return ()
763 | otherwise = do w <- peek f
764 ((poke t) . k n) w
765 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
766
767 -- ---------------------------------------------------------------------
768 -- Unfolds and replicates
769
770 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
771 -- the value of every element. The following holds:
772 --
773 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
774 --
775 -- This implemenation uses @memset(3)@
776 replicate :: Int -> Word8 -> ByteString
777 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
778
779 {-
780 -- About 5x slower
781 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
782 where
783 STRICT2(go)
784 go _ 0 = return w
785 go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
786 -}
787
788 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
789 -- 'unfoldrN' builds a ByteString from a seed value. The function takes
790 -- the element and returns 'Nothing' if it is done producing the
791 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
792 -- prepending to the ByteString and @b@ is used as the next element in a
793 -- recursive call.
794 --
795 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
796 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
797 -- final size of the ByteString as an argument. 'cons' can then be
798 -- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has
799 -- linear complexity. The depth of the recursion is limited to this
800 -- size, but may be less. For lazy, infinite unfoldr, use
801 -- 'Data.List.unfoldr' (from 'Data.List').
802 --
803 -- Examples:
804 --
805 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
806 --
807 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
808 --
809 -- > unfoldrN n == take n $ List.unfoldr
810 unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
811 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
812 where
813 STRICT3(go)
814 go q c n | n == i = return n -- stop if we reach `i'
815 | otherwise = case f c of
816 Nothing -> return n
817 Just (a,new_c) -> do
818 poke q a
819 go (q `plusPtr` 1) new_c (n+1)
820
821 -- ---------------------------------------------------------------------
822 -- Substrings
823
824 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
825 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
826 take :: Int -> ByteString -> ByteString
827 take n ps@(PS x s l)
828 | n < 0 = empty
829 | n >= l = ps
830 | otherwise = PS x s n
831 {-# INLINE take #-}
832
833 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
834 -- elements, or @[]@ if @n > 'length' xs@.
835 drop :: Int -> ByteString -> ByteString
836 drop n ps@(PS x s l)
837 | n <= 0 = ps
838 | n > l = empty
839 | otherwise = PS x (s+n) (l-n)
840 {-# INLINE drop #-}
841
842 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
843 splitAt :: Int -> ByteString -> (ByteString, ByteString)
844 splitAt n ps = (take n ps, drop n ps)
845 {-# INLINE splitAt #-}
846
847 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
848 -- returns the longest prefix (possibly empty) of @xs@ of elements that
849 -- satisfy @p@.
850 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
851 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
852 {-# INLINE takeWhile #-}
853
854 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
855 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
856 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
857 {-# INLINE dropWhile #-}
858
859 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
860 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
861 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
862 {-# INLINE break #-}
863
864 -- | 'breakByte' breaks its ByteString argument at the first occurence
865 -- of the specified byte. It is more efficient than 'break' as it is
866 -- implemented with @memchr(3)@. I.e.
867 --
868 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
869 --
870 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
871 breakByte c p = case elemIndex c p of
872 Nothing -> (p,empty)
873 Just n -> (take n p, drop n p)
874 {-# INLINE breakByte #-}
875
876 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
877 -- occurence of @w@. It behaves like 'break', except the delimiter is
878 -- not returned, and @Nothing@ is returned if the delimiter is not in
879 -- the ByteString. I.e.
880 --
881 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
882 --
883 -- > breakFirst c xs ==
884 -- > let (x,y) = break (== c) xs
885 -- > in if null y then Nothing else Just (x, drop 1 y))
886 --
887 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
888 breakFirst c p = case elemIndex c p of
889 Nothing -> Nothing
890 Just n -> Just (take n p, drop (n+1) p)
891 {-# INLINE breakFirst #-}
892
893 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
894 -- ByteString.
895 --
896 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
897 --
898 -- and the following are equivalent:
899 --
900 -- > breakLast 'c' "abcdef"
901 -- > let (x,y) = break (=='c') (reverse "abcdef")
902 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
903 --
904 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
905 breakLast c p = case elemIndexLast c p of
906 Nothing -> Nothing
907 Just n -> Just (take n p, drop (n+1) p)
908 {-# INLINE breakLast #-}
909
910 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
911 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
912 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
913 span p ps = break (not . p) ps
914 {-# INLINE span #-}
915
916 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
917 -- We have
918 --
919 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
920 --
921 -- and
922 --
923 -- > spanEnd (not . isSpace) ps
924 -- > ==
925 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
926 --
927 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
928 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
929
930 -- | /O(n)/ Splits a 'ByteString' into components delimited by
931 -- separators, where the predicate returns True for a separator element.
932 -- The resulting components do not contain the separators. Two adjacent
933 -- separators result in an empty component in the output. eg.
934 --
935 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
936 -- > splitWith (=='a') [] == []
937 --
938 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
939
940 #if defined(__GLASGOW_HASKELL__)
941 splitWith _pred (PS _ _ 0) = []
942 splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
943 where pred# c# = pred_ (W8# c#)
944
945 splitWith' pred' off' len' fp' = withPtr fp $ \p ->
946 splitLoop pred' p 0 off' len' fp'
947
948 splitLoop :: (Word# -> Bool)
949 -> Ptr Word8
950 -> Int -> Int -> Int
951 -> ForeignPtr Word8
952 -> IO [ByteString]
953
954 splitLoop pred' p idx' off' len' fp'
955 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
956 | idx' >= len' = return [PS fp' off' idx']
957 | otherwise = do
958 w <- peekElemOff p (off'+idx')
959 if pred' (case w of W8# w# -> w#)
960 then return (PS fp' off' idx' :
961 splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
962 else splitLoop pred' p (idx'+1) off' len' fp'
963 {-# INLINE splitWith #-}
964
965 #else
966 splitWith _ (PS _ _ 0) = []
967 splitWith p ps = splitWith' p ps
968 where
969 STRICT2(splitWith')
970 splitWith' q qs = if null rest then [chunk]
971 else chunk : splitWith' q (unsafeTail rest)
972 where (chunk,rest) = break q qs
973 #endif
974
975 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
976 -- argument, consuming the delimiter. I.e.
977 --
978 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
979 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
980 -- > split 'x' "x" == ["",""]
981 --
982 -- and
983 --
984 -- > join [c] . split c == id
985 -- > split == splitWith . (==)
986 --
987 -- As for all splitting functions in this library, this function does
988 -- not copy the substrings, it just constructs new 'ByteStrings' that
989 -- are slices of the original.
990 --
991 split :: Word8 -> ByteString -> [ByteString]
992 split _ (PS _ _ 0) = []
993 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
994 let ptr = p `plusPtr` s
995
996 STRICT1(loop)
997 loop n = do
998 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
999 if q == nullPtr
1000 then return [PS x (s+n) (l-n)]
1001 else do let i = q `minusPtr` ptr
1002 ls <- loop (i+1)
1003 return $! PS x (s+n) (i-n) : ls
1004 loop 0
1005 {-# INLINE split #-}
1006
1007 {-
1008 -- slower. but stays inside Haskell.
1009 split _ (PS _ _ 0) = []
1010 split (W8# w#) (PS fp off len) = splitWith' off len fp
1011 where
1012 splitWith' off' len' fp' = withPtr fp $ \p ->
1013 splitLoop p 0 off' len' fp'
1014
1015 splitLoop :: Ptr Word8
1016 -> Int -> Int -> Int
1017 -> ForeignPtr Word8
1018 -> IO [ByteString]
1019
1020 STRICT5(splitLoop)
1021 splitLoop p idx' off' len' fp'
1022 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1023 | idx' >= len' = return [PS fp' off' idx']
1024 | otherwise = do
1025 (W8# x#) <- peekElemOff p (off'+idx')
1026 if word2Int# w# ==# word2Int# x#
1027 then return (PS fp' off' idx' :
1028 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1029 else splitLoop p (idx'+1) off' len' fp'
1030 -}
1031
1032 -- | Like 'splitWith', except that sequences of adjacent separators are
1033 -- treated as a single separator. eg.
1034 --
1035 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1036 --
1037 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1038 tokens f = P.filter (not.null) . splitWith f
1039
1040 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1041 -- 'ByteString's and concatenates the list after interspersing the first
1042 -- argument between each element of the list.
1043 join :: ByteString -> [ByteString] -> ByteString
1044 join filler pss = concat (splice pss)
1045 where
1046 splice [] = []
1047 splice [x] = [x]
1048 splice (x:y:xs) = x:filler:splice (y:xs)
1049
1050 --
1051 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1052 -- with a char. Around 4 times faster than the generalised join.
1053 --
1054 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1055 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1056 withForeignPtr ffp $ \fp ->
1057 withForeignPtr fgp $ \gp -> do
1058 memcpy ptr (fp `plusPtr` s) l
1059 poke (ptr `plusPtr` l) c
1060 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
1061 where
1062 len = length f + length g + 1
1063 {-# INLINE joinWithByte #-}
1064
1065 -- ---------------------------------------------------------------------
1066 -- Indexing ByteStrings
1067
1068 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1069 index :: ByteString -> Int -> Word8
1070 index ps n
1071 | n < 0 = error $ "ByteString.indexWord8: negative index: " ++ show n
1072 | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
1073 ++ ", length = " ++ show (length ps)
1074 | otherwise = ps `unsafeIndex` n
1075 {-# INLINE index #-}
1076
1077 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1078 -- element in the given 'ByteString' which is equal to the query
1079 -- element, or 'Nothing' if there is no such element.
1080 -- This implementation uses memchr(3).
1081 elemIndex :: Word8 -> ByteString -> Maybe Int
1082 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1083 let p' = p `plusPtr` s
1084 q = memchr p' c (fromIntegral l)
1085 return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1086 {-# INLINE elemIndex #-}
1087
1088 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1089 -- element in the given 'ByteString' which is equal to the query
1090 -- element, or 'Nothing' if there is no such element. The following
1091 -- holds:
1092 --
1093 -- > elemIndexLast c xs ==
1094 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1095 --
1096 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1097 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1098 go (p `plusPtr` s) (l-1)
1099 where
1100 STRICT2(go)
1101 go p i | i < 0 = return Nothing
1102 | otherwise = do ch' <- peekByteOff p i
1103 if ch == ch'
1104 then return $ Just i
1105 else go p (i-1)
1106 {-# INLINE elemIndexLast #-}
1107
1108 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1109 -- the indices of all elements equal to the query element, in ascending order.
1110 -- This implementation uses memchr(3).
1111 elemIndices :: Word8 -> ByteString -> [Int]
1112 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1113 let ptr = p `plusPtr` s
1114
1115 STRICT1(loop)
1116 loop n = do
1117 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1118 if q == nullPtr
1119 then return []
1120 else do let i = q `minusPtr` ptr
1121 ls <- loop (i+1)
1122 return $! i:ls
1123 loop 0
1124
1125 {-
1126 -- much slower
1127 elemIndices :: Word8 -> ByteString -> [Int]
1128 elemIndices c ps = loop 0 ps
1129 where STRICT2(loop)
1130 loop _ ps' | null ps' = []
1131 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1132 | otherwise = loop (n+1) (unsafeTail ps')
1133 -}
1134
1135 -- | count returns the number of times its argument appears in the ByteString
1136 --
1137 -- > count = length . elemIndices
1138 --
1139 -- But more efficiently than using length on the intermediate list.
1140 count :: Word8 -> ByteString -> Int
1141 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1142 return $ c_count (p `plusPtr` s) (fromIntegral m) w
1143 {-# INLINE count #-}
1144
1145 {-
1146 --
1147 -- around 30% slower
1148 --
1149 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1150 go (p `plusPtr` s) (fromIntegral m) 0
1151 where
1152 go :: Ptr Word8 -> CSize -> Int -> IO Int
1153 STRICT3(go)
1154 go p l i = do
1155 let q = memchr p w l
1156 if q == nullPtr
1157 then return i
1158 else do let k = fromIntegral $ q `minusPtr` p
1159 go (q `plusPtr` 1) (l-k-1) (i+1)
1160 -}
1161
1162 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1163 -- returns the index of the first element in the ByteString
1164 -- satisfying the predicate.
1165 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1166 findIndex = (listToMaybe .) . findIndices
1167
1168 -- | The 'findIndices' function extends 'findIndex', by returning the
1169 -- indices of all elements satisfying the predicate, in ascending order.
1170 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1171 findIndices p ps = loop 0 ps
1172 where
1173 STRICT2(loop)
1174 loop _ qs | null qs = []
1175 loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1176 | otherwise = loop (n+1) (unsafeTail qs)
1177
1178 -- ---------------------------------------------------------------------
1179 -- Searching ByteStrings
1180
1181 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1182 elem :: Word8 -> ByteString -> Bool
1183 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1184 {-# INLINE elem #-}
1185
1186 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1187 notElem :: Word8 -> ByteString -> Bool
1188 notElem c ps = case elemIndex c ps of Nothing -> True ; _ -> False
1189 {-# INLINE notElem #-}
1190
1191 --
1192 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1193 -- case of filtering a single byte. It is more efficient to use
1194 -- /filterByte/ in this case.
1195 --
1196 -- > filterByte == filter . (==)
1197 --
1198 -- filterByte is around 10x faster, and uses much less space, than its
1199 -- filter equivalent
1200 filterByte :: Word8 -> ByteString -> ByteString
1201 filterByte w ps = replicate (count w ps) w
1202
1203 {-
1204 -- slower than the replicate version
1205
1206 filterByte ch ps@(PS x s l)
1207 | null ps = ps
1208 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1209 t <- go (f `plusPtr` s) p l
1210 return (t `minusPtr` p) -- actual length
1211 where
1212 STRICT3(go)
1213 go _ t 0 = return t
1214 go f t e = do w <- peek f
1215 if w == ch
1216 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1217 else go (f `plusPtr` 1) t (e-1)
1218 -}
1219
1220 --
1221 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1222 -- case of filtering a single byte out of a list. It is more efficient
1223 -- to use /filterNotByte/ in this case.
1224 --
1225 -- > filterNotByte == filter . (/=)
1226 --
1227 -- filterNotByte is around 3x faster, and uses much less space, than its
1228 -- filter equivalent
1229 filterNotByte :: Word8 -> ByteString -> ByteString
1230 filterNotByte ch ps@(PS x s l)
1231 | null ps = ps
1232 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1233 t <- go (f `plusPtr` s) p l
1234 return (t `minusPtr` p) -- actual length
1235 where
1236 STRICT3(go)
1237 go _ t 0 = return t
1238 go f t e = do w <- peek f
1239 if w /= ch
1240 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1241 else go (f `plusPtr` 1) t (e-1)
1242
1243 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1244 -- returns a ByteString containing those characters that satisfy the
1245 -- predicate.
1246 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1247 filter k ps@(PS x s l)
1248 | null ps = ps
1249 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1250 t <- go (f `plusPtr` s) p l
1251 return (t `minusPtr` p) -- actual length
1252 where
1253 STRICT3(go)
1254 go _ t 0 = return t
1255 go f t e = do w <- peek f
1256 if k w
1257 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
1258 else go (f `plusPtr` 1) t (e - 1)
1259
1260 -- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
1261
1262 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1263 -- and returns the first element in matching the predicate, or 'Nothing'
1264 -- if there is no such element.
1265 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1266 find p ps = case filter p ps of
1267 q | null q -> Nothing
1268 | otherwise -> Just (unsafeHead q)
1269
1270 -- ---------------------------------------------------------------------
1271 -- Searching for substrings
1272
1273 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1274 -- iff the first is a prefix of the second.
1275 isPrefixOf :: ByteString -> ByteString -> Bool
1276 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1277 | l1 == 0 = True
1278 | l2 < l1 = False
1279 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1280 withForeignPtr x2 $ \p2 -> do
1281 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
1282 return (i == 0)
1283
1284 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1285 -- iff the first is a suffix of the second.
1286 --
1287 -- The following holds:
1288 --
1289 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1290 --
1291 -- However, the real implemenation uses memcmp to compare the end of the
1292 -- string only, with no reverse required..
1293 isSuffixOf :: ByteString -> ByteString -> Bool
1294 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1295 | l1 == 0 = True
1296 | l2 < l1 = False
1297 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1298 withForeignPtr x2 $ \p2 -> do
1299 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
1300 return (i == 0)
1301
1302 -- | Check whether one string is a substring of another. @isSubstringOf
1303 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1304 isSubstringOf :: ByteString -- ^ String to search for.
1305 -> ByteString -- ^ String to search in.
1306 -> Bool
1307 isSubstringOf p s = not $ P.null $ findSubstrings p s
1308
1309 -- | Get the first index of a substring in another string,
1310 -- or 'Nothing' if the string is not found.
1311 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1312 findSubstring :: ByteString -- ^ String to search for.
1313 -> ByteString -- ^ String to seach in.
1314 -> Maybe Int
1315 findSubstring = (listToMaybe .) . findSubstrings
1316
1317 -- | Find the indexes of all (possibly overlapping) occurances of a
1318 -- substring in a string. This function uses the Knuth-Morris-Pratt
1319 -- string matching algorithm.
1320 findSubstrings :: ByteString -- ^ String to search for.
1321 -> ByteString -- ^ String to seach in.
1322 -> [Int]
1323
1324 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1325 where
1326 patc x = pat `unsafeIndex` x
1327 strc x = str `unsafeIndex` x
1328
1329 -- maybe we should make kmpNext a UArray before using it in search?
1330 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1331 kmpNextL p _ | null p = []
1332 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1333 ps = unsafeTail p
1334 x = if not (null ps) && unsafeHead ps == patc j'
1335 then kmpNext Array.! j' else j'
1336 in x:kmpNextL ps j'
1337 search i j = match ++ rest -- i: position in string, j: position in pattern
1338 where match = if j == m then [(i - j)] else []
1339 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1340 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1341 | otherwise = j
1342
1343 -- ---------------------------------------------------------------------
1344 -- Zipping
1345
1346 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1347 -- corresponding pairs of bytes. If one input ByteString is short,
1348 -- excess elements of the longer ByteString are discarded. This is
1349 -- equivalent to a pair of 'unpack' operations.
1350 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1351 zip ps qs
1352 | null ps || null qs = []
1353 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1354
1355 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1356 -- the first argument, instead of a tupling function. For example,
1357 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1358 -- corresponding sums.
1359 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1360 zipWith f ps qs
1361 | null ps || null qs = []
1362 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1363
1364 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1365 -- ByteStrings. Note that this performs two 'pack' operations.
1366 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1367 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1368 {-# INLINE unzip #-}
1369
1370 -- ---------------------------------------------------------------------
1371 -- Special lists
1372
1373 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1374 inits :: ByteString -> [ByteString]
1375 inits (PS x s l) = [PS x s n | n <- [0..l]]
1376
1377 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1378 tails :: ByteString -> [ByteString]
1379 tails p | null p = [empty]
1380 | otherwise = p : tails (unsafeTail p)
1381
1382 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1383
1384 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1385 elems :: ByteString -> [ByteString]
1386 elems (PS _ _ 0) = []
1387 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1388 {-# INLINE elems #-}
1389
1390 -- ---------------------------------------------------------------------
1391 -- ** Ordered 'ByteString's
1392
1393 -- | /O(n log(n))/ Sort a ByteString efficiently, using qsort(3).
1394 sort :: ByteString -> ByteString
1395 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1396 memcpy p (f `plusPtr` s) l
1397 c_qsort p l -- inplace
1398
1399 {-
1400 sort = pack . List.sort . unpack
1401 -}
1402
1403 -- ---------------------------------------------------------------------
1404 --
1405 -- Extensions to the basic interface
1406 --
1407
1408 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1409 -- check for the empty case, so there is an obligation on the programmer
1410 -- to provide a proof that the ByteString is non-empty.
1411 unsafeHead :: ByteString -> Word8
1412 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1413 {-# INLINE unsafeHead #-}
1414
1415 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1416 -- check for the empty case. As with 'unsafeHead', the programmer must
1417 -- provide a separate proof that the ByteString is non-empty.
1418 unsafeTail :: ByteString -> ByteString
1419 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1420 {-# INLINE unsafeTail #-}
1421
1422 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1423 -- This omits the bounds check, which means there is an accompanying
1424 -- obligation on the programmer to ensure the bounds are checked in some
1425 -- other way.
1426 unsafeIndex :: ByteString -> Int -> Word8
1427 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1428 {-# INLINE unsafeIndex #-}
1429
1430 -- ---------------------------------------------------------------------
1431 -- Low level constructors
1432
1433 #if defined(__GLASGOW_HASKELL__)
1434 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1435 -- Addr\# (an arbitrary machine address assumed to point outside the
1436 -- garbage-collected heap) into a @ByteString@. A much faster way to
1437 -- create an Addr\# is with an unboxed string literal, than to pack a
1438 -- boxed string. A unboxed string literal is compiled to a static @char
1439 -- []@ by GHC. Establishing the length of the string requires a call to
1440 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1441 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1442 -- if you know the length of the string statically.
1443 --
1444 -- An example:
1445 --
1446 -- > literalFS = packAddress "literal"#
1447 --
1448 packAddress :: Addr# -> ByteString
1449 packAddress addr# = inlinePerformIO $ do
1450 p <- newForeignPtr_ cstr
1451 return $ PS p 0 (fromIntegral $ c_strlen cstr)
1452 where
1453 cstr = Ptr addr#
1454 {-# INLINE packAddress #-}
1455
1456 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1457 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1458 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1459 -- 'Addr\#' to the string, and the length of the string. Make sure the
1460 -- length is correct, otherwise use the safer 'packAddress' (where the
1461 -- length will be calculated once at runtime).
1462 unsafePackAddress :: Int -> Addr# -> ByteString
1463 unsafePackAddress len addr# = inlinePerformIO $ do
1464 p <- newForeignPtr_ cstr
1465 return $ PS p 0 len
1466 where cstr = Ptr addr#
1467
1468 #endif
1469
1470 -- | /O(1)/ Build a ByteString from a ForeignPtr
1471 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1472 fromForeignPtr fp l = PS fp 0 l
1473
1474 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1475 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1476 toForeignPtr (PS ps s l) = (ps, s, l)
1477
1478 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1479 -- current 'ByteString' from any larger string it was created from, as
1480 -- an 'Int'.
1481 skipIndex :: ByteString -> Int
1482 skipIndex (PS _ s _) = s
1483 {-# INLINE skipIndex #-}
1484
1485 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1486 -- finalizer associated to it. The ByteString length is calculated using
1487 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1488 packCString :: CString -> ByteString
1489 packCString cstr = inlinePerformIO $ do
1490 fp <- newForeignPtr_ (castPtr cstr)
1491 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1492
1493 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1494 -- have /no/ finalizer associated with it. This operation has /O(1)/
1495 -- complexity as we already know the final size, so no /strlen(3)/ is
1496 -- required.
1497 packCStringLen :: CStringLen -> ByteString
1498 packCStringLen (ptr,len) = inlinePerformIO $ do
1499 fp <- newForeignPtr_ (castPtr ptr)
1500 return $ PS fp 0 (fromIntegral len)
1501
1502 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1503 -- have a @free(3)@ finalizer associated to it.
1504 packMallocCString :: CString -> ByteString
1505 packMallocCString cstr = inlinePerformIO $ do
1506 fp <- newForeignFreePtr (castPtr cstr)
1507 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1508
1509 #if defined(__GLASGOW_HASKELL__)
1510 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1511 -- length, and an IO action representing a finalizer. This function is
1512 -- not available on Hugs.
1513 --
1514 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1515 packCStringFinalizer p l f = do
1516 fp <- FC.newForeignPtr p f
1517 return $ PS fp 0 l
1518
1519 -- | Explicitly run the finaliser associated with a 'ByteString'.
1520 -- Further references to this value may generate invalid memory
1521 -- references. This operation is unsafe, as there may be other
1522 -- 'ByteStrings' referring to the same underlying pages. If you use
1523 -- this, you need to have a proof of some kind that all 'ByteString's
1524 -- ever generated from the underlying byte array are no longer live.
1525 unsafeFinalize :: ByteString -> IO ()
1526 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1527
1528 #endif
1529
1530 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1531 -- The @CString@ should not be freed afterwards. This is a memcpy(3).
1532 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1533 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1534 where
1535 alloc = withForeignPtr ps $ \p -> do
1536 buf <- c_malloc (fromIntegral l+1)
1537 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1538 poke (buf `plusPtr` l) (0::Word8)
1539 return $ castPtr buf
1540
1541 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1542 -- Warning: modifying the @CString@ will affect the @ByteString@.
1543 -- Why is this function unsafe? It relies on the null byte at the end of
1544 -- the ByteString to be there. This is /not/ the case if your ByteString
1545 -- has been spliced from a larger string (i.e. with take or drop).
1546 -- Unless you can guarantee the null byte, you should use the safe
1547 -- version, which will copy the string first.
1548 --
1549 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1550 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1551
1552 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1553 -- This is mainly useful to allow the rest of the data pointed
1554 -- to by the 'ByteString' to be garbage collected, for example
1555 -- if a large string has been read in, and only a small part of it
1556 -- is needed in the rest of the program.
1557 copy :: ByteString -> ByteString
1558 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
1559
1560 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1561 -- CString is going to be deallocated from C land.
1562 copyCString :: CString -> ByteString
1563 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1564
1565 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1566 copyCStringLen :: CStringLen -> ByteString
1567 copyCStringLen (cstr, len) = inlinePerformIO $ do
1568 fp <- mallocForeignPtrArray (len+1)
1569 withForeignPtr fp $ \p -> do
1570 memcpy p (castPtr cstr) len
1571 poke (p `plusPtr` len) (0 :: Word8)
1572 return $! PS fp 0 len
1573
1574 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1575 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1576 -- This is analogous to unsafeUseAsCString, and comes with the same
1577 -- safety requirements.
1578 --
1579 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1580 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1581
1582 -- | Given the maximum size needed and a function to make the contents
1583 -- of a ByteString, generate makes the 'ByteString'. The generating
1584 -- function is required to return the actual final size (<= the maximum
1585 -- size), and the resulting byte array is realloced to this size. The
1586 -- string is padded at the end with a null byte.
1587 --
1588 -- generate is the main mechanism for creating custom, efficient
1589 -- ByteString functions, using Haskell or C functions to fill the space.
1590 --
1591 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1592 generate i f = do
1593 p <- mallocArray i
1594 i' <- f p
1595 p' <- reallocArray p (i'+1)
1596 poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
1597 fp <- newForeignFreePtr p'
1598 return $ PS fp 0 i'
1599
1600 -- ---------------------------------------------------------------------
1601 -- line IO
1602
1603 #if defined(__GLASGOW_HASKELL__)
1604
1605 -- | getLine, read a line from stdin.
1606 getLine :: IO ByteString
1607 getLine = hGetLine stdin
1608
1609 -- | hGetLine. read a ByteString from a handle
1610 hGetLine :: Handle -> IO ByteString
1611 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1612 case haBufferMode handle_ of
1613 NoBuffering -> error "no buffering"
1614 _other -> hGetLineBuffered handle_
1615
1616 where
1617 hGetLineBuffered handle_ = do
1618 let ref = haBuffer handle_
1619 buf <- readIORef ref
1620 hGetLineBufferedLoop handle_ ref buf 0 []
1621
1622 hGetLineBufferedLoop handle_ ref
1623 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1624 len `seq` do
1625 off <- findEOL r w raw
1626 let new_len = len + off - r
1627 xs <- mkPS raw r off
1628
1629 -- if eol == True, then off is the offset of the '\n'
1630 -- otherwise off == w and the buffer is now empty.
1631 if off /= w
1632 then do if (w == off + 1)
1633 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1634 else writeIORef ref buf{ bufRPtr = off + 1 }
1635 mkBigPS new_len (xs:xss)
1636 else do
1637 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1638 buf{ bufWPtr=0, bufRPtr=0 }
1639 case maybe_buf of
1640 -- Nothing indicates we caught an EOF, and we may have a
1641 -- partial line to return.
1642 Nothing -> do
1643 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1644 if new_len > 0
1645 then mkBigPS new_len (xs:xss)
1646 else ioe_EOF
1647 Just new_buf ->
1648 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1649
1650 -- find the end-of-line character, if there is one
1651 findEOL r w raw
1652 | r == w = return w
1653 | otherwise = do
1654 (c,r') <- readCharFromBuffer raw r
1655 if c == '\n'
1656 then return r -- NB. not r': don't include the '\n'
1657 else findEOL r' w raw
1658
1659 maybeFillReadBuffer fd is_line is_stream buf = catch
1660 (do buf' <- fillReadBuffer fd is_line is_stream buf
1661 return (Just buf'))
1662 (\e -> if isEOFError e then return Nothing else ioError e)
1663
1664 -- TODO, rewrite to use normal memcpy
1665 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1666 mkPS buf start end = do
1667 let len = end - start
1668 fp <- mallocByteString len
1669 withForeignPtr fp $ \p -> do
1670 memcpy_ptr_baoff p buf start (fromIntegral len)
1671 return (PS fp 0 len)
1672
1673 mkBigPS :: Int -> [ByteString] -> IO ByteString
1674 mkBigPS _ [ps] = return ps
1675 mkBigPS _ pss = return $! concat (P.reverse pss)
1676
1677 #endif
1678
1679 -- ---------------------------------------------------------------------
1680 -- Block IO
1681
1682 -- | Outputs a 'ByteString' to the specified 'Handle'.
1683 hPut :: Handle -> ByteString -> IO ()
1684 hPut _ (PS _ _ 0) = return ()
1685 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1686 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1687
1688 -- | Write a ByteString to stdout
1689 putStr :: ByteString -> IO ()
1690 putStr = hPut stdout
1691
1692 -- | Write a ByteString to stdout, appending a newline byte
1693 putStrLn :: ByteString -> IO ()
1694 putStrLn ps = hPut stdout ps >> hPut stdout nl
1695 where nl = packByte 0x0a
1696
1697 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1698 -- is far more efficient than reading the characters into a 'String'
1699 -- and then using 'pack'.
1700 hGet :: Handle -> Int -> IO ByteString
1701 hGet _ 0 = return empty
1702 hGet h i = do fp <- mallocByteString i
1703 l <- withForeignPtr fp $ \p-> hGetBuf h p i
1704 return $ PS fp 0 l
1705
1706 #if defined(__GLASGOW_HASKELL__)
1707 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1708 -- waiting for data to become available, instead it returns only whatever data
1709 -- is available.
1710 hGetNonBlocking :: Handle -> Int -> IO ByteString
1711 hGetNonBlocking _ 0 = return empty
1712 hGetNonBlocking h i = do
1713 fp <- mallocByteString i
1714 l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1715 return $ PS fp 0 l
1716 #endif
1717
1718 -- | Read entire handle contents into a 'ByteString'.
1719 --
1720 -- As with 'hGet', the string representation in the file is assumed to
1721 -- be ISO-8859-1.
1722 --
1723 hGetContents :: Handle -> IO ByteString
1724 hGetContents h = do
1725 let start_size = 1024
1726 p <- mallocArray start_size
1727 i <- hGetBuf h p start_size
1728 if i < start_size
1729 then do p' <- reallocArray p i
1730 fp <- newForeignFreePtr p'
1731 return $ PS fp 0 i
1732 else f p start_size
1733 where
1734 f p s = do
1735 let s' = 2 * s
1736 p' <- reallocArray p s'
1737 i <- hGetBuf h (p' `plusPtr` s) s
1738 if i < s
1739 then do let i' = s + i
1740 p'' <- reallocArray p' i'
1741 fp <- newForeignFreePtr p''
1742 return $ PS fp 0 i'
1743 else f p' s'
1744
1745 -- | getContents. Equivalent to hGetContents stdin
1746 getContents :: IO ByteString
1747 getContents = hGetContents stdin
1748
1749 -- | Read an entire file directly into a 'ByteString'. This is far more
1750 -- efficient than reading the characters into a 'String' and then using
1751 -- 'pack'. It also may be more efficient than opening the file and
1752 -- reading it using hGet.
1753 readFile :: FilePath -> IO ByteString
1754 readFile f = do
1755 h <- openBinaryFile f ReadMode
1756 l <- hFileSize h
1757 s <- hGet h $ fromIntegral l
1758 hClose h
1759 return s
1760
1761 -- | Write a 'ByteString' to a file.
1762 writeFile :: FilePath -> ByteString -> IO ()
1763 writeFile f ps = do
1764 h <- openBinaryFile f WriteMode
1765 hPut h ps
1766 hClose h
1767
1768 {-
1769 --
1770 -- Disable until we can move it into a portable .hsc file
1771 --
1772
1773 -- | Like readFile, this reads an entire file directly into a
1774 -- 'ByteString', but it is even more efficient. It involves directly
1775 -- mapping the file to memory. This has the advantage that the contents
1776 -- of the file never need to be copied. Also, under memory pressure the
1777 -- page may simply be discarded, while in the case of readFile it would
1778 -- need to be written to swap. If you read many small files, mmapFile
1779 -- will be less memory-efficient than readFile, since each mmapFile
1780 -- takes up a separate page of memory. Also, you can run into bus
1781 -- errors if the file is modified. As with 'readFile', the string
1782 -- representation in the file is assumed to be ISO-8859-1.
1783 --
1784 -- On systems without mmap, this is the same as a readFile.
1785 --
1786 mmapFile :: FilePath -> IO ByteString
1787 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1788
1789 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1790 mmap f = do
1791 h <- openBinaryFile f ReadMode
1792 l <- fromIntegral `fmap` hFileSize h
1793 -- Don't bother mmaping small files because each mmapped file takes up
1794 -- at least one full VM block.
1795 if l < mmap_limit
1796 then do thefp <- mallocByteString l
1797 withForeignPtr thefp $ \p-> hGetBuf h p l
1798 hClose h
1799 return (thefp, l)
1800 else do
1801 -- unix only :(
1802 fd <- fromIntegral `fmap` handleToFd h
1803 p <- my_mmap l fd
1804 fp <- if p == nullPtr
1805 then do thefp <- mallocByteString l
1806 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1807 return thefp
1808 else do
1809 -- The munmap leads to crashes on OpenBSD.
1810 -- maybe there's a use after unmap in there somewhere?
1811 #if !defined(__OpenBSD__)
1812 let unmap = c_munmap p l >> return ()
1813 #else
1814 let unmap = return ()
1815 #endif
1816 fp <- FC.newForeignPtr p unmap
1817 return fp
1818 c_close fd
1819 hClose h
1820 return (fp, l)
1821 where mmap_limit = 16*1024
1822 -}
1823
1824 #if defined(__GLASGOW_HASKELL__)
1825 --
1826 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1827 --
1828 getArgs :: IO [ByteString]
1829 getArgs =
1830 alloca $ \ p_argc ->
1831 alloca $ \ p_argv -> do
1832 getProgArgv p_argc p_argv
1833 p <- fromIntegral `fmap` peek p_argc
1834 argv <- peek p_argv
1835 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1836 #endif
1837
1838 -- ---------------------------------------------------------------------
1839 -- Internal utilities
1840
1841 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1842 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1843 -- convenience for ByteString construction.
1844 w2c :: Word8 -> Char
1845 #if !defined(__GLASGOW_HASKELL__)
1846 w2c = chr . fromIntegral
1847 #else
1848 w2c = unsafeChr . fromIntegral
1849 #endif
1850 {-# INLINE w2c #-}
1851
1852 c2w :: Char -> Word8
1853 c2w = fromIntegral . ord
1854 {-# INLINE c2w #-}
1855
1856 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1857 -- is padded with a null byte.
1858 mallocByteString :: Int -> IO (ForeignPtr Word8)
1859 mallocByteString l = do
1860 fp <- mallocForeignPtrArray (l+1)
1861 withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1862 return fp
1863
1864 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1865 -- argument gives the final size of the ByteString. Unlike 'generate'
1866 -- the ByteString is no reallocated if the final size is less than the
1867 -- estimated size.
1868 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
1869 create l write_ptr = inlinePerformIO $ do
1870 fp <- mallocByteString (l+1)
1871 withForeignPtr fp $ \p -> write_ptr p
1872 return $ PS fp 0 l
1873 {-# INLINE create #-}
1874
1875 -- | Perform an operation with a temporary ByteString
1876 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1877 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1878 {-# INLINE withPtr #-}
1879
1880 -- Common up near identical calls to `error' to reduce the number
1881 -- constant strings created when compiled:
1882 errorEmptyList :: String -> a
1883 errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
1884 {-# INLINE errorEmptyList #-}
1885
1886 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
1887 -- of the string if no element is found, rather than Nothing.
1888 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1889 STRICT2(findIndexOrEnd)
1890 findIndexOrEnd f ps
1891 | null ps = 0
1892 | f (unsafeHead ps) = 0
1893 | otherwise = 1 + findIndexOrEnd f (unsafeTail ps)
1894 {-# INLINE findIndexOrEnd #-}
1895
1896 -- Find from the end of the string using predicate
1897 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
1898 STRICT2(findFromEndUntil)
1899 findFromEndUntil f ps@(PS x s l) =
1900 if null ps then 0
1901 else if f (last ps) then l
1902 else findFromEndUntil f (PS x s (l-1))
1903
1904 -- Just like inlinePerformIO, but we inline it. Big performance gains as
1905 -- it exposes lots of things to further inlining
1906 --
1907 {-# INLINE inlinePerformIO #-}
1908 inlinePerformIO :: IO a -> a
1909 #if defined(__GLASGOW_HASKELL__)
1910 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
1911 #else
1912 inlinePerformIO = unsafePerformIO
1913 #endif
1914
1915 {-# INLINE newForeignFreePtr #-}
1916 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
1917 #if defined(__GLASGOW_HASKELL__)
1918 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
1919 #else
1920 newForeignFreePtr p = newForeignPtr c_free_finalizer p
1921 #endif
1922
1923 -- ---------------------------------------------------------------------
1924 --
1925 -- Standard C functions
1926 --
1927
1928 foreign import ccall unsafe "string.h strlen" c_strlen
1929 :: CString -> CInt
1930
1931 foreign import ccall unsafe "stdlib.h malloc" c_malloc
1932 :: CInt -> IO (Ptr Word8)
1933
1934 foreign import ccall unsafe "static stdlib.h free" c_free
1935 :: Ptr Word8 -> IO ()
1936
1937 #if !defined(__GLASGOW_HASKELL__)
1938 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
1939 :: FunPtr (Ptr Word8 -> IO ())
1940 #endif
1941
1942 foreign import ccall unsafe "string.h memset" memset
1943 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
1944
1945 foreign import ccall unsafe "string.h memchr" memchr
1946 :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
1947
1948 foreign import ccall unsafe "string.h memcmp" memcmp
1949 :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
1950
1951 foreign import ccall unsafe "string.h memcpy" memcpy
1952 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
1953
1954 -- ---------------------------------------------------------------------
1955 --
1956 -- Uses our C code
1957 --
1958
1959 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
1960 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
1961
1962 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
1963 :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
1964
1965 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
1966 :: Ptr Word8 -> Int -> Word8
1967
1968 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
1969 :: Ptr Word8 -> Int -> Word8
1970
1971 foreign import ccall unsafe "static fpstring.h count" c_count
1972 :: Ptr Word8 -> Int -> Word8 -> Int
1973
1974 foreign import ccall unsafe "static fpstring.h my_qsort" c_qsort
1975 :: Ptr Word8 -> Int -> IO ()
1976
1977 -- ---------------------------------------------------------------------
1978 -- MMap
1979
1980 {-
1981 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
1982 :: Int -> Int -> IO (Ptr Word8)
1983
1984 foreign import ccall unsafe "static unistd.h close" c_close
1985 :: Int -> IO Int
1986
1987 # if !defined(__OpenBSD__)
1988 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
1989 :: Ptr Word8 -> Int -> IO Int
1990 # endif
1991 -}
1992
1993 -- ---------------------------------------------------------------------
1994 -- Internal GHC Haskell magic
1995
1996 #if defined(__GLASGOW_HASKELL__)
1997 foreign import ccall unsafe "RtsAPI.h getProgArgv"
1998 getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
1999
2000 foreign import ccall unsafe "__hscore_memcpy_src_off"
2001 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())
2002 #endif