add install-includes: field
[packages/pretty.git] / Data / ByteString.hs
1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-}
2 -- |
3 -- Module : Data.ByteString
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
9 -- Array fusion code:
10 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
11 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
12 --
13 -- License : BSD-style
14 --
15 -- Maintainer : dons@cse.unsw.edu.au
16 -- Stability : experimental
17 -- Portability : portable
18 --
19 -- A time and space-efficient implementation of byte vectors using
20 -- packed Word8 arrays, suitable for high performance use, both in terms
21 -- of large data quantities, or high speed requirements. Byte vectors
22 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
23 -- and can be passed between C and Haskell with little effort.
24 --
25 -- This module is intended to be imported @qualified@, to avoid name
26 -- clashes with "Prelude" functions. eg.
27 --
28 -- > import qualified Data.ByteString as B
29 --
30 -- Original GHC implementation by Bryan O\'Sullivan.
31 -- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow.
32 -- Rewritten to support slices and use 'ForeignPtr' by David Roundy.
33 -- Polished and extended by Don Stewart.
34 --
35
36 module Data.ByteString (
37
38 -- * The @ByteString@ type
39 ByteString, -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid
40
41 -- * Introducing and eliminating 'ByteString's
42 empty, -- :: ByteString
43 singleton, -- :: Word8 -> ByteString
44 pack, -- :: [Word8] -> ByteString
45 unpack, -- :: ByteString -> [Word8]
46
47 -- * Basic interface
48 cons, -- :: Word8 -> ByteString -> ByteString
49 snoc, -- :: ByteString -> Word8 -> ByteString
50 append, -- :: ByteString -> ByteString -> ByteString
51 head, -- :: ByteString -> Word8
52 last, -- :: ByteString -> Word8
53 tail, -- :: ByteString -> ByteString
54 init, -- :: ByteString -> ByteString
55 null, -- :: ByteString -> Bool
56 length, -- :: ByteString -> Int
57
58 -- * Transformating ByteStrings
59 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
60 reverse, -- :: ByteString -> ByteString
61 intersperse, -- :: Word8 -> ByteString -> ByteString
62 transpose, -- :: [ByteString] -> [ByteString]
63
64 -- * Reducing 'ByteString's (folds)
65 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
66 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
67 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
68 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
69
70 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
71 foldr', -- :: (Word8 -> a -> a) -> a -> ByteString -> a
72 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
73 foldr1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
74
75 -- ** Special folds
76 concat, -- :: [ByteString] -> ByteString
77 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
78 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
79 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
80 maximum, -- :: ByteString -> Word8
81 minimum, -- :: ByteString -> Word8
82
83 -- * Building ByteStrings
84 -- ** Scans
85 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
86 scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
87 scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
88 scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
89
90 -- ** Accumulating maps
91 mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
92 mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
93 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
94
95 -- ** Unfolding ByteStrings
96 replicate, -- :: Int -> Word8 -> ByteString
97 unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
98 unfoldrN, -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
99
100 -- * Substrings
101
102 -- ** Breaking strings
103 take, -- :: Int -> ByteString -> ByteString
104 drop, -- :: Int -> ByteString -> ByteString
105 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
106 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
107 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
108 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
109 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
110 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
111 breakEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
112 group, -- :: ByteString -> [ByteString]
113 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
114 inits, -- :: ByteString -> [ByteString]
115 tails, -- :: ByteString -> [ByteString]
116
117 -- ** Breaking into many substrings
118 split, -- :: Word8 -> ByteString -> [ByteString]
119 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
120
121 -- ** Joining strings
122 join, -- :: ByteString -> [ByteString] -> ByteString
123
124 -- * Predicates
125 isPrefixOf, -- :: ByteString -> ByteString -> Bool
126 isSuffixOf, -- :: ByteString -> ByteString -> Bool
127
128 -- ** Search for arbitrary substrings
129 isSubstringOf, -- :: ByteString -> ByteString -> Bool
130 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
131 findSubstrings, -- :: ByteString -> ByteString -> [Int]
132
133 -- * Searching ByteStrings
134
135 -- ** Searching by equality
136 -- | These functions use memchr(3) to efficiently search the ByteString
137 elem, -- :: Word8 -> ByteString -> Bool
138 notElem, -- :: Word8 -> ByteString -> Bool
139
140 -- ** Searching with a predicate
141 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
142 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
143 -- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
144
145 -- * Indexing ByteStrings
146 index, -- :: ByteString -> Int -> Word8
147 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
148 elemIndices, -- :: Word8 -> ByteString -> [Int]
149 elemIndexEnd, -- :: Word8 -> ByteString -> Maybe Int
150 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
151 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
152 count, -- :: Word8 -> ByteString -> Int
153
154 -- * Zipping and unzipping ByteStrings
155 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
156 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
157 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
158
159 -- * Ordered ByteStrings
160 sort, -- :: ByteString -> ByteString
161
162 -- * Low level CString conversions
163
164 -- ** Packing CStrings and pointers
165 packCString, -- :: CString -> ByteString
166 packCStringLen, -- :: CString -> ByteString
167 packMallocCString, -- :: CString -> ByteString
168
169 -- ** Using ByteStrings as CStrings
170 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
171 useAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
172
173 -- ** Copying ByteStrings
174 -- | These functions perform memcpy(3) operations
175 copy, -- :: ByteString -> ByteString
176 copyCString, -- :: CString -> IO ByteString
177 copyCStringLen, -- :: CStringLen -> IO ByteString
178
179 -- * I\/O with 'ByteString's
180
181 -- ** Standard input and output
182 getLine, -- :: IO ByteString
183 getContents, -- :: IO ByteString
184 putStr, -- :: ByteString -> IO ()
185 putStrLn, -- :: ByteString -> IO ()
186 interact, -- :: (ByteString -> ByteString) -> IO ()
187
188 -- ** Files
189 readFile, -- :: FilePath -> IO ByteString
190 writeFile, -- :: FilePath -> ByteString -> IO ()
191 appendFile, -- :: FilePath -> ByteString -> IO ()
192 -- mmapFile, -- :: FilePath -> IO ByteString
193
194 -- ** I\/O with Handles
195 hGetLine, -- :: Handle -> IO ByteString
196 hGetContents, -- :: Handle -> IO ByteString
197 hGet, -- :: Handle -> Int -> IO ByteString
198 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
199 hPut, -- :: Handle -> ByteString -> IO ()
200 hPutStr, -- :: Handle -> ByteString -> IO ()
201 hPutStrLn, -- :: Handle -> ByteString -> IO ()
202
203 #if defined(__GLASGOW_HASKELL__)
204 -- * Fusion utilities
205 unpackList, -- eek, otherwise it gets thrown away by the simplifier
206 lengthU, maximumU, minimumU
207 #endif
208
209 ) where
210
211 import qualified Prelude as P
212 import Prelude hiding (reverse,head,tail,last,init,null
213 ,length,map,lines,foldl,foldr,unlines
214 ,concat,any,take,drop,splitAt,takeWhile
215 ,dropWhile,span,break,elem,filter,maximum
216 ,minimum,all,concatMap,foldl1,foldr1
217 ,scanl,scanl1,scanr,scanr1
218 ,readFile,writeFile,appendFile,replicate
219 ,getContents,getLine,putStr,putStrLn,interact
220 ,zip,zipWith,unzip,notElem)
221
222 import Data.ByteString.Base
223 import Data.ByteString.Fusion
224
225 import qualified Data.List as List
226
227 import Data.Word (Word8)
228 import Data.Maybe (listToMaybe)
229 import Data.Array (listArray)
230 import qualified Data.Array as Array ((!))
231
232 -- Control.Exception.bracket not available in yhc or nhc
233 import Control.Exception (bracket, assert)
234 import qualified Control.Exception as Exception
235 import Control.Monad (when)
236
237 import Foreign.C.String (CString, CStringLen)
238 import Foreign.C.Types (CSize)
239 import Foreign.ForeignPtr
240 import Foreign.Marshal.Array
241 import Foreign.Ptr
242 import Foreign.Storable (Storable(..))
243
244 -- hGetBuf and hPutBuf not available in yhc or nhc
245 import System.IO (stdin,stdout,hClose,hFileSize
246 ,hGetBuf,hPutBuf,openBinaryFile
247 ,Handle,IOMode(..))
248
249 import Data.Monoid (Monoid, mempty, mappend, mconcat)
250
251 #if !defined(__GLASGOW_HASKELL__)
252 import System.IO.Unsafe
253 import qualified System.Environment
254 import qualified System.IO (hGetLine)
255 #endif
256
257 #if defined(__GLASGOW_HASKELL__)
258
259 import System.IO (hGetBufNonBlocking)
260 import System.IO.Error (isEOFError)
261
262 import GHC.Handle
263 import GHC.Prim (Word#, (+#), writeWord8OffAddr#)
264 import GHC.Base (build)
265 import GHC.Word hiding (Word8)
266 import GHC.Ptr (Ptr(..))
267 import GHC.ST (ST(..))
268 import GHC.IOBase
269
270 #endif
271
272 -- -----------------------------------------------------------------------------
273 --
274 -- Useful macros, until we have bang patterns
275 --
276
277 #define STRICT1(f) f a | a `seq` False = undefined
278 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
279 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
280 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
281 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
282
283 -- -----------------------------------------------------------------------------
284
285 instance Eq ByteString
286 where (==) = eq
287
288 instance Ord ByteString
289 where compare = compareBytes
290
291 instance Monoid ByteString where
292 mempty = empty
293 mappend = append
294 mconcat = concat
295
296 {-
297 instance Arbitrary PackedString where
298 arbitrary = P.pack `fmap` arbitrary
299 coarbitrary s = coarbitrary (P.unpack s)
300 -}
301
302 -- | /O(n)/ Equality on the 'ByteString' type.
303 eq :: ByteString -> ByteString -> Bool
304 eq a@(PS p s l) b@(PS p' s' l')
305 | l /= l' = False -- short cut on length
306 | p == p' && s == s' = True -- short cut for the same string
307 | otherwise = compareBytes a b == EQ
308 {-# INLINE eq #-}
309
310 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
311 compareBytes :: ByteString -> ByteString -> Ordering
312 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
313 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
314 | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
315 | otherwise = inlinePerformIO $
316 withForeignPtr x1 $ \p1 ->
317 withForeignPtr x2 $ \p2 -> do
318 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
319 return $! case i `compare` 0 of
320 EQ -> l1 `compare` l2
321 x -> x
322 {-# INLINE compareBytes #-}
323
324 {-
325 --
326 -- About 4x slower over 32M
327 --
328 compareBytes :: ByteString -> ByteString -> Ordering
329 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
330 withForeignPtr fp1 $ \p1 ->
331 withForeignPtr fp2 $ \p2 ->
332 cmp (p1 `plusPtr` off1)
333 (p2 `plusPtr` off2) 0 len1 len2
334
335 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
336 STRICT5(cmp)
337 cmp p1 p2 n len1 len2
338 | n == len1 = if n == len2 then return EQ else return LT
339 | n == len2 = return GT
340 | otherwise = do
341 (a :: Word8) <- peekByteOff p1 n
342 (b :: Word8) <- peekByteOff p2 n
343 case a `compare` b of
344 EQ -> cmp p1 p2 (n+1) len1 len2
345 LT -> return LT
346 GT -> return GT
347 {-# INLINE compareBytes #-}
348 -}
349
350 -- -----------------------------------------------------------------------------
351 -- Introducing and eliminating 'ByteString's
352
353 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
354 singleton :: Word8 -> ByteString
355 singleton c = unsafeCreate 1 $ \p -> poke p c
356 {-# INLINE [1] singleton #-}
357
358 --
359 -- XXX The unsafePerformIO is critical!
360 --
361 -- Otherwise:
362 --
363 -- singleton 255 `compare` singleton 127
364 --
365 -- is compiled to:
366 --
367 -- case mallocByteString 2 of
368 -- ForeignPtr f internals ->
369 -- case writeWord8OffAddr# f 0 255 of _ ->
370 -- case writeWord8OffAddr# f 0 127 of _ ->
371 -- case eqAddr# f f of
372 -- False -> case compare (GHC.Prim.plusAddr# f 0)
373 -- (GHC.Prim.plusAddr# f 0)
374 --
375 --
376
377 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
378 --
379 -- For applications with large numbers of string literals, pack can be a
380 -- bottleneck. In such cases, consider using packAddress (GHC only).
381 pack :: [Word8] -> ByteString
382
383 #if !defined(__GLASGOW_HASKELL__)
384
385 pack str = unsafeCreate (P.length str) $ \p -> go p str
386 where
387 go _ [] = return ()
388 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
389
390 #else /* hack away */
391
392 pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
393 where
394 go _ _ [] = return ()
395 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
396
397 writeByte p i c = ST $ \s# ->
398 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
399
400 #endif
401
402 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
403 unpack :: ByteString -> [Word8]
404
405 #if !defined(__GLASGOW_HASKELL__)
406
407 unpack (PS _ _ 0) = []
408 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
409 go (p `plusPtr` s) (l - 1) []
410 where
411 STRICT3(go)
412 go p 0 acc = peek p >>= \e -> return (e : acc)
413 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
414 {-# INLINE unpack #-}
415
416 #else
417
418 unpack ps = build (unpackFoldr ps)
419 {-# INLINE unpack #-}
420
421 --
422 -- critical this isn't strict in the acc
423 -- as it will break in the presence of list fusion. this is a known
424 -- issue with seq and build/foldr rewrite rules, which rely on lazy
425 -- demanding to avoid bottoms in the list.
426 --
427 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
428 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
429 let loop q n _ | q `seq` n `seq` False = undefined -- n.b.
430 loop _ (-1) acc = return acc
431 loop q n acc = do
432 a <- peekByteOff q n
433 loop q (n-1) (a `f` acc)
434 loop (p `plusPtr` off) (len-1) ch
435 {-# INLINE [0] unpackFoldr #-}
436
437 unpackList :: ByteString -> [Word8]
438 unpackList (PS fp off len) = withPtr fp $ \p -> do
439 let STRICT3(loop)
440 loop _ (-1) acc = return acc
441 loop q n acc = do
442 a <- peekByteOff q n
443 loop q (n-1) (a : acc)
444 loop (p `plusPtr` off) (len-1) []
445
446 {-# RULES
447 "FPS unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
448 #-}
449
450 #endif
451
452 -- ---------------------------------------------------------------------
453 -- Basic interface
454
455 -- | /O(1)/ Test whether a ByteString is empty.
456 null :: ByteString -> Bool
457 null (PS _ _ l) = assert (l >= 0) $ l <= 0
458 {-# INLINE null #-}
459
460 -- ---------------------------------------------------------------------
461 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
462 length :: ByteString -> Int
463 length (PS _ _ l) = assert (l >= 0) $ l
464
465 --
466 -- length/loop fusion. When taking the length of any fuseable loop,
467 -- rewrite it as a foldl', and thus avoid allocating the result buffer
468 -- worth around 10% in speed testing.
469 --
470
471 #if defined(__GLASGOW_HASKELL__)
472 {-# INLINE [1] length #-}
473 #endif
474
475 lengthU :: ByteString -> Int
476 lengthU = foldl' (const . (+1)) (0::Int)
477 {-# INLINE lengthU #-}
478
479 {-# RULES
480
481 -- v2 fusion
482 "FPS length/loop" forall loop s .
483 length (loopArr (loopWrapper loop s)) =
484 lengthU (loopArr (loopWrapper loop s))
485
486 #-}
487
488 ------------------------------------------------------------------------
489
490 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
491 -- complexity, as it requires a memcpy.
492 cons :: Word8 -> ByteString -> ByteString
493 cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
494 poke p c
495 memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
496 {-# INLINE cons #-}
497
498 -- | /O(n)/ Append a byte to the end of a 'ByteString'
499 snoc :: ByteString -> Word8 -> ByteString
500 snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
501 memcpy p (f `plusPtr` s) (fromIntegral l)
502 poke (p `plusPtr` l) c
503 {-# INLINE snoc #-}
504
505 -- todo fuse
506
507 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
508 -- An exception will be thrown in the case of an empty ByteString.
509 head :: ByteString -> Word8
510 head (PS x s l)
511 | l <= 0 = errorEmptyList "head"
512 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
513 {-# INLINE head #-}
514
515 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
516 -- An exception will be thrown in the case of an empty ByteString.
517 tail :: ByteString -> ByteString
518 tail (PS p s l)
519 | l <= 0 = errorEmptyList "tail"
520 | otherwise = PS p (s+1) (l-1)
521 {-# INLINE tail #-}
522
523 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
524 -- An exception will be thrown in the case of an empty ByteString.
525 last :: ByteString -> Word8
526 last ps@(PS x s l)
527 | null ps = errorEmptyList "last"
528 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
529 {-# INLINE last #-}
530
531 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
532 -- An exception will be thrown in the case of an empty ByteString.
533 init :: ByteString -> ByteString
534 init ps@(PS p s l)
535 | null ps = errorEmptyList "init"
536 | otherwise = PS p s (l-1)
537 {-# INLINE init #-}
538
539 -- | /O(n)/ Append two ByteStrings
540 append :: ByteString -> ByteString -> ByteString
541 append xs ys | null xs = ys
542 | null ys = xs
543 | otherwise = concat [xs,ys]
544 {-# INLINE append #-}
545
546 -- ---------------------------------------------------------------------
547 -- Transformations
548
549 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
550 -- element of @xs@. This function is subject to array fusion.
551 map :: (Word8 -> Word8) -> ByteString -> ByteString
552 #if defined(LOOPU_FUSION)
553 map f = loopArr . loopU (mapEFL f) NoAcc
554 #elif defined(LOOPUP_FUSION)
555 map f = loopArr . loopUp (mapEFL f) NoAcc
556 #elif defined(LOOPNOACC_FUSION)
557 map f = loopArr . loopNoAcc (mapEFL f)
558 #else
559 map f = loopArr . loopMap f
560 #endif
561 {-# INLINE map #-}
562
563 {-
564 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
565 -- slightly faster for one-shot cases.
566 map' :: (Word8 -> Word8) -> ByteString -> ByteString
567 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
568 create len $ map_ 0 (a `plusPtr` s)
569 where
570 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
571 STRICT3(map_)
572 map_ n p1 p2
573 | n >= len = return ()
574 | otherwise = do
575 x <- peekByteOff p1 n
576 pokeByteOff p2 n (f x)
577 map_ (n+1) p1 p2
578 {-# INLINE map' #-}
579 -}
580
581 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
582 reverse :: ByteString -> ByteString
583 reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
584 c_reverse p (f `plusPtr` s) (fromIntegral l)
585
586 -- todo, fuseable version
587
588 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
589 -- 'ByteString' and \`intersperses\' that byte between the elements of
590 -- the 'ByteString'. It is analogous to the intersperse function on
591 -- Lists.
592 intersperse :: Word8 -> ByteString -> ByteString
593 intersperse c ps@(PS x s l)
594 | length ps < 2 = ps
595 | otherwise = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f ->
596 c_intersperse p (f `plusPtr` s) (fromIntegral l) c
597
598 {-
599 intersperse c = pack . List.intersperse c . unpack
600 -}
601
602 -- | The 'transpose' function transposes the rows and columns of its
603 -- 'ByteString' argument.
604 transpose :: [ByteString] -> [ByteString]
605 transpose ps = P.map pack (List.transpose (P.map unpack ps))
606
607 -- ---------------------------------------------------------------------
608 -- Reducing 'ByteString's
609
610 -- | 'foldl', applied to a binary operator, a starting value (typically
611 -- the left-identity of the operator), and a ByteString, reduces the
612 -- ByteString using the binary operator, from left to right.
613 -- This function is subject to array fusion.
614 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
615 #if !defined(LOOPU_FUSION)
616 foldl f z = loopAcc . loopUp (foldEFL f) z
617 #else
618 foldl f z = loopAcc . loopU (foldEFL f) z
619 #endif
620 {-# INLINE foldl #-}
621
622 {-
623 --
624 -- About twice as fast with 6.4.1, but not fuseable
625 -- A simple fold . map is enough to make it worth while.
626 --
627 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
628 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
629 where
630 STRICT3(lgo)
631 lgo z p q | p == q = return z
632 | otherwise = do c <- peek p
633 lgo (f z c) (p `plusPtr` 1) q
634 -}
635
636 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
637 -- Though actually foldl is also strict in the accumulator.
638 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
639 foldl' = foldl
640 -- foldl' f z = loopAcc . loopU (foldEFL' f) z
641 {-# INLINE foldl' #-}
642
643 -- | 'foldr', applied to a binary operator, a starting value
644 -- (typically the right-identity of the operator), and a ByteString,
645 -- reduces the ByteString using the binary operator, from right to left.
646 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
647 foldr k z = loopAcc . loopDown (foldEFL (flip k)) z
648 {-# INLINE foldr #-}
649
650 -- | 'foldr\'' is like 'foldr', but strict in the accumulator.
651 foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a
652 foldr' k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
653 go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1))
654 where
655 STRICT3(go)
656 go z p q | p == q = return z
657 | otherwise = do c <- peek p
658 go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive
659 {-# INLINE [1] foldr' #-}
660
661 -- | 'foldl1' is a variant of 'foldl' that has no starting value
662 -- argument, and thus must be applied to non-empty 'ByteStrings'.
663 -- This function is subject to array fusion.
664 -- An exception will be thrown in the case of an empty ByteString.
665 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
666 foldl1 f ps
667 | null ps = errorEmptyList "foldl1"
668 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
669 {-# INLINE foldl1 #-}
670
671 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
672 -- An exception will be thrown in the case of an empty ByteString.
673 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
674 foldl1' f ps
675 | null ps = errorEmptyList "foldl1'"
676 | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
677 {-# INLINE foldl1' #-}
678
679 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
680 -- and thus must be applied to non-empty 'ByteString's
681 -- An exception will be thrown in the case of an empty ByteString.
682 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
683 foldr1 f ps
684 | null ps = errorEmptyList "foldr1"
685 | otherwise = foldr f (last ps) (init ps)
686 {-# INLINE foldr1 #-}
687
688 -- | 'foldr1\'' is a variant of 'foldr1', but is strict in the
689 -- accumulator.
690 foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
691 foldr1' f ps
692 | null ps = errorEmptyList "foldr1"
693 | otherwise = foldr' f (last ps) (init ps)
694 {-# INLINE [1] foldr1' #-}
695
696 -- ---------------------------------------------------------------------
697 -- Special folds
698
699 -- | /O(n)/ Concatenate a list of ByteStrings.
700 concat :: [ByteString] -> ByteString
701 concat [] = empty
702 concat [ps] = ps
703 concat xs = unsafeCreate len $ \ptr -> go xs ptr
704 where len = P.sum . P.map length $ xs
705 STRICT2(go)
706 go [] _ = return ()
707 go (PS p s l:ps) ptr = do
708 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
709 go ps (ptr `plusPtr` l)
710
711 -- | Map a function over a 'ByteString' and concatenate the results
712 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
713 concatMap f = concat . foldr ((:) . f) []
714
715 -- foldr (append . f) empty
716
717 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
718 -- any element of the 'ByteString' satisfies the predicate.
719 any :: (Word8 -> Bool) -> ByteString -> Bool
720 any _ (PS _ _ 0) = False
721 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
722 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
723 where
724 STRICT2(go)
725 go p q | p == q = return False
726 | otherwise = do c <- peek p
727 if f c then return True
728 else go (p `plusPtr` 1) q
729
730 -- todo fuse
731
732 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
733 -- if all elements of the 'ByteString' satisfy the predicate.
734 all :: (Word8 -> Bool) -> ByteString -> Bool
735 all _ (PS _ _ 0) = True
736 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
737 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
738 where
739 STRICT2(go)
740 go p q | p == q = return True -- end of list
741 | otherwise = do c <- peek p
742 if f c
743 then go (p `plusPtr` 1) q
744 else return False
745
746 ------------------------------------------------------------------------
747
748 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
749 -- This function will fuse.
750 -- An exception will be thrown in the case of an empty ByteString.
751 maximum :: ByteString -> Word8
752 maximum xs@(PS x s l)
753 | null xs = errorEmptyList "maximum"
754 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
755 c_maximum (p `plusPtr` s) (fromIntegral l)
756
757 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
758 -- This function will fuse.
759 -- An exception will be thrown in the case of an empty ByteString.
760 minimum :: ByteString -> Word8
761 minimum xs@(PS x s l)
762 | null xs = errorEmptyList "minimum"
763 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
764 c_minimum (p `plusPtr` s) (fromIntegral l)
765
766 --
767 -- minimum/maximum/loop fusion. As for length (and other folds), when we
768 -- see we're applied after a fuseable op, switch from using the C
769 -- version, to the fuseable version. The result should then avoid
770 -- allocating a buffer.
771 --
772
773 #if defined(__GLASGOW_HASKELL__)
774 {-# INLINE [1] minimum #-}
775 {-# INLINE [1] maximum #-}
776 #endif
777
778 maximumU :: ByteString -> Word8
779 maximumU = foldl1' max
780 {-# INLINE maximumU #-}
781
782 minimumU :: ByteString -> Word8
783 minimumU = foldl1' min
784 {-# INLINE minimumU #-}
785
786 {-# RULES
787
788 "FPS minimum/loop" forall loop s .
789 minimum (loopArr (loopWrapper loop s)) =
790 minimumU (loopArr (loopWrapper loop s))
791
792 "FPS maximum/loop" forall loop s .
793 maximum (loopArr (loopWrapper loop s)) =
794 maximumU (loopArr (loopWrapper loop s))
795
796 #-}
797
798 ------------------------------------------------------------------------
799
800 -- | The 'mapAccumL' function behaves like a combination of 'map' and
801 -- 'foldl'; it applies a function to each element of a ByteString,
802 -- passing an accumulating parameter from left to right, and returning a
803 -- final value of this accumulator together with the new list.
804 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
805 #if !defined(LOOPU_FUSION)
806 mapAccumL f z = unSP . loopUp (mapAccumEFL f) z
807 #else
808 mapAccumL f z = unSP . loopU (mapAccumEFL f) z
809 #endif
810 {-# INLINE mapAccumL #-}
811
812 -- | The 'mapAccumR' function behaves like a combination of 'map' and
813 -- 'foldr'; it applies a function to each element of a ByteString,
814 -- passing an accumulating parameter from right to left, and returning a
815 -- final value of this accumulator together with the new ByteString.
816 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
817 mapAccumR f z = unSP . loopDown (mapAccumEFL f) z
818 {-# INLINE mapAccumR #-}
819
820 -- | /O(n)/ map Word8 functions, provided with the index at each position
821 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
822 mapIndexed f = loopArr . loopUp (mapIndexEFL f) 0
823 {-# INLINE mapIndexed #-}
824
825 -- ---------------------------------------------------------------------
826 -- Building ByteStrings
827
828 -- | 'scanl' is similar to 'foldl', but returns a list of successive
829 -- reduced values from the left. This function will fuse.
830 --
831 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
832 --
833 -- Note that
834 --
835 -- > last (scanl f z xs) == foldl f z xs.
836 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
837 #if !defined(LOOPU_FUSION)
838 scanl f z ps = loopArr . loopUp (scanEFL f) z $ (ps `snoc` 0)
839 #else
840 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0)
841 #endif
842
843 -- n.b. haskell's List scan returns a list one bigger than the
844 -- input, so we need to snoc here to get some extra space, however,
845 -- it breaks map/up fusion (i.e. scanl . map no longer fuses)
846 {-# INLINE scanl #-}
847
848 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
849 -- This function will fuse.
850 --
851 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
852 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
853 scanl1 f ps
854 | null ps = empty
855 | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
856 {-# INLINE scanl1 #-}
857
858 -- | scanr is the right-to-left dual of scanl.
859 scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
860 scanr f z ps = loopArr . loopDown (scanEFL (flip f)) z $ (0 `cons` ps) -- extra space
861 {-# INLINE scanr #-}
862
863 -- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
864 scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
865 scanr1 f ps
866 | null ps = empty
867 | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions
868 {-# INLINE scanr1 #-}
869
870 -- ---------------------------------------------------------------------
871 -- Unfolds and replicates
872
873 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
874 -- the value of every element. The following holds:
875 --
876 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
877 --
878 -- This implemenation uses @memset(3)@
879 replicate :: Int -> Word8 -> ByteString
880 replicate w c
881 | w <= 0 = empty
882 | otherwise = unsafeCreate w $ \ptr ->
883 memset ptr c (fromIntegral w) >> return ()
884
885 -- | /O(n)/, where /n/ is the length of the result. The 'unfoldr'
886 -- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a
887 -- ByteString from a seed value. The function takes the element and
888 -- returns 'Nothing' if it is done producing the ByteString or returns
889 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
890 -- and @b@ is the seed value for further production.
891 --
892 -- Examples:
893 --
894 -- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
895 -- > == pack [0, 1, 2, 3, 4, 5]
896 --
897 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
898 unfoldr f = concat . unfoldChunk 32 64
899 where unfoldChunk n n' x =
900 case unfoldrN n f x of
901 (s, Nothing) -> s : []
902 (s, Just x') -> s : unfoldChunk n' (n+n') x'
903
904 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
905 -- value. However, the length of the result is limited by the first
906 -- argument to 'unfoldrN'. This function is more efficient than 'unfoldr'
907 -- when the maximum length of the result is known.
908 --
909 -- The following equation relates 'unfoldrN' and 'unfoldr':
910 --
911 -- > unfoldrN n f s == take n (unfoldr f s)
912 --
913 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
914 unfoldrN i f x0
915 | i < 0 = (empty, Just x0)
916 | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
917 where STRICT3(go)
918 go p x n =
919 case f x of
920 Nothing -> return (0, n, Nothing)
921 Just (w,x')
922 | n == i -> return (0, n, Just x)
923 | otherwise -> do poke p w
924 go (p `plusPtr` 1) x' (n+1)
925
926 -- ---------------------------------------------------------------------
927 -- Substrings
928
929 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
930 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
931 take :: Int -> ByteString -> ByteString
932 take n ps@(PS x s l)
933 | n <= 0 = empty
934 | n >= l = ps
935 | otherwise = PS x s n
936 {-# INLINE take #-}
937
938 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
939 -- elements, or @[]@ if @n > 'length' xs@.
940 drop :: Int -> ByteString -> ByteString
941 drop n ps@(PS x s l)
942 | n <= 0 = ps
943 | n >= l = empty
944 | otherwise = PS x (s+n) (l-n)
945 {-# INLINE drop #-}
946
947 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
948 splitAt :: Int -> ByteString -> (ByteString, ByteString)
949 splitAt n ps@(PS x s l)
950 | n <= 0 = (empty, ps)
951 | n >= l = (ps, empty)
952 | otherwise = (PS x s n, PS x (s+n) (l-n))
953 {-# INLINE splitAt #-}
954
955 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
956 -- returns the longest prefix (possibly empty) of @xs@ of elements that
957 -- satisfy @p@.
958 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
959 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
960 {-# INLINE takeWhile #-}
961
962 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
963 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
964 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
965 {-# INLINE dropWhile #-}
966
967 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
968 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
969 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
970 {-# INLINE [1] break #-}
971
972 {-# RULES
973 "FPS specialise break (x==)" forall x.
974 break ((==) x) = breakByte x
975 #-}
976
977 #if __GLASGOW_HASKELL__ >= 605
978 {-# RULES
979 "FPS specialise break (==x)" forall x.
980 break (==x) = breakByte x
981 #-}
982 #endif
983
984 -- | 'breakByte' breaks its ByteString argument at the first occurence
985 -- of the specified byte. It is more efficient than 'break' as it is
986 -- implemented with @memchr(3)@. I.e.
987 --
988 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
989 --
990 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
991 breakByte c p = case elemIndex c p of
992 Nothing -> (p,empty)
993 Just n -> (unsafeTake n p, unsafeDrop n p)
994 {-# INLINE breakByte #-}
995
996 -- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
997 --
998 -- breakEnd p == spanEnd (not.p)
999 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1000 breakEnd p ps = splitAt (findFromEndUntil p ps) ps
1001
1002 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
1003 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
1004 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1005 span p ps = break (not . p) ps
1006 {-# INLINE [1] span #-}
1007
1008 -- | 'spanByte' breaks its ByteString argument at the first
1009 -- occurence of a byte other than its argument. It is more efficient
1010 -- than 'span (==)'
1011 --
1012 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
1013 --
1014 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
1015 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1016 go (p `plusPtr` s) 0
1017 where
1018 STRICT2(go)
1019 go p i | i >= l = return (ps, empty)
1020 | otherwise = do c' <- peekByteOff p i
1021 if c /= c'
1022 then return (unsafeTake i ps, unsafeDrop i ps)
1023 else go p (i+1)
1024 {-# INLINE spanByte #-}
1025
1026 {-# RULES
1027 "FPS specialise span (x==)" forall x.
1028 span ((==) x) = spanByte x
1029 #-}
1030
1031 #if __GLASGOW_HASKELL__ >= 605
1032 {-# RULES
1033 "FPS specialise span (==x)" forall x.
1034 span (==x) = spanByte x
1035 #-}
1036 #endif
1037
1038 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
1039 -- We have
1040 --
1041 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
1042 --
1043 -- and
1044 --
1045 -- > spanEnd (not . isSpace) ps
1046 -- > ==
1047 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1048 --
1049 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1050 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
1051
1052 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1053 -- separators, where the predicate returns True for a separator element.
1054 -- The resulting components do not contain the separators. Two adjacent
1055 -- separators result in an empty component in the output. eg.
1056 --
1057 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1058 -- > splitWith (=='a') [] == []
1059 --
1060 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1061
1062 #if defined(__GLASGOW_HASKELL__)
1063 splitWith _pred (PS _ _ 0) = []
1064 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1065 where pred# c# = pred_ (W8# c#)
1066
1067 STRICT4(splitWith0)
1068 splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1069 splitLoop pred' p 0 off' len' fp'
1070
1071 splitLoop :: (Word# -> Bool)
1072 -> Ptr Word8
1073 -> Int -> Int -> Int
1074 -> ForeignPtr Word8
1075 -> IO [ByteString]
1076
1077 splitLoop pred' p idx' off' len' fp'
1078 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1079 | idx' >= len' = return [PS fp' off' idx']
1080 | otherwise = do
1081 w <- peekElemOff p (off'+idx')
1082 if pred' (case w of W8# w# -> w#)
1083 then return (PS fp' off' idx' :
1084 splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1085 else splitLoop pred' p (idx'+1) off' len' fp'
1086 {-# INLINE splitWith #-}
1087
1088 #else
1089 splitWith _ (PS _ _ 0) = []
1090 splitWith p ps = loop p ps
1091 where
1092 STRICT2(loop)
1093 loop q qs = if null rest then [chunk]
1094 else chunk : loop q (unsafeTail rest)
1095 where (chunk,rest) = break q qs
1096 #endif
1097
1098 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1099 -- argument, consuming the delimiter. I.e.
1100 --
1101 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1102 -- > split 'a' "aXaXaXa" == ["","X","X","X",""]
1103 -- > split 'x' "x" == ["",""]
1104 --
1105 -- and
1106 --
1107 -- > join [c] . split c == id
1108 -- > split == splitWith . (==)
1109 --
1110 -- As for all splitting functions in this library, this function does
1111 -- not copy the substrings, it just constructs new 'ByteStrings' that
1112 -- are slices of the original.
1113 --
1114 split :: Word8 -> ByteString -> [ByteString]
1115 split _ (PS _ _ 0) = []
1116 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1117 let ptr = p `plusPtr` s
1118
1119 STRICT1(loop)
1120 loop n =
1121 let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1122 w (fromIntegral (l-n))
1123 in if q == nullPtr
1124 then [PS x (s+n) (l-n)]
1125 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1126
1127 return (loop 0)
1128 {-# INLINE split #-}
1129
1130 {-
1131 -- slower. but stays inside Haskell.
1132 split _ (PS _ _ 0) = []
1133 split (W8# w#) (PS fp off len) = splitWith' off len fp
1134 where
1135 splitWith' off' len' fp' = withPtr fp $ \p ->
1136 splitLoop p 0 off' len' fp'
1137
1138 splitLoop :: Ptr Word8
1139 -> Int -> Int -> Int
1140 -> ForeignPtr Word8
1141 -> IO [ByteString]
1142
1143 STRICT5(splitLoop)
1144 splitLoop p idx' off' len' fp'
1145 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1146 | idx' >= len' = return [PS fp' off' idx']
1147 | otherwise = do
1148 (W8# x#) <- peekElemOff p (off'+idx')
1149 if word2Int# w# ==# word2Int# x#
1150 then return (PS fp' off' idx' :
1151 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1152 else splitLoop p (idx'+1) off' len' fp'
1153 -}
1154
1155 {-
1156 -- | Like 'splitWith', except that sequences of adjacent separators are
1157 -- treated as a single separator. eg.
1158 --
1159 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1160 --
1161 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1162 tokens f = P.filter (not.null) . splitWith f
1163 {-# INLINE tokens #-}
1164 -}
1165
1166 -- | The 'group' function takes a ByteString and returns a list of
1167 -- ByteStrings such that the concatenation of the result is equal to the
1168 -- argument. Moreover, each sublist in the result contains only equal
1169 -- elements. For example,
1170 --
1171 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1172 --
1173 -- It is a special case of 'groupBy', which allows the programmer to
1174 -- supply their own equality test. It is about 40% faster than
1175 -- /groupBy (==)/
1176 group :: ByteString -> [ByteString]
1177 group xs
1178 | null xs = []
1179 | otherwise = ys : group zs
1180 where
1181 (ys, zs) = spanByte (unsafeHead xs) xs
1182
1183 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1184 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1185 groupBy k xs
1186 | null xs = []
1187 | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1188 where
1189 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1190
1191 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1192 -- 'ByteString's and concatenates the list after interspersing the first
1193 -- argument between each element of the list.
1194 join :: ByteString -> [ByteString] -> ByteString
1195 join s = concat . (List.intersperse s)
1196 {-# INLINE [1] join #-}
1197
1198 {-# RULES
1199 "FPS specialise join c -> joinByte" forall c s1 s2 .
1200 join (singleton c) (s1 : s2 : []) = joinWithByte c s1 s2
1201 #-}
1202
1203 --
1204 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1205 -- with a char. Around 4 times faster than the generalised join.
1206 --
1207 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1208 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr ->
1209 withForeignPtr ffp $ \fp ->
1210 withForeignPtr fgp $ \gp -> do
1211 memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1212 poke (ptr `plusPtr` l) c
1213 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1214 where
1215 len = length f + length g + 1
1216 {-# INLINE joinWithByte #-}
1217
1218 -- ---------------------------------------------------------------------
1219 -- Indexing ByteStrings
1220
1221 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1222 index :: ByteString -> Int -> Word8
1223 index ps n
1224 | n < 0 = moduleError "index" ("negative index: " ++ show n)
1225 | n >= length ps = moduleError "index" ("index too large: " ++ show n
1226 ++ ", length = " ++ show (length ps))
1227 | otherwise = ps `unsafeIndex` n
1228 {-# INLINE index #-}
1229
1230 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1231 -- element in the given 'ByteString' which is equal to the query
1232 -- element, or 'Nothing' if there is no such element.
1233 -- This implementation uses memchr(3).
1234 elemIndex :: Word8 -> ByteString -> Maybe Int
1235 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1236 let p' = p `plusPtr` s
1237 q <- memchr p' c (fromIntegral l)
1238 return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1239 {-# INLINE elemIndex #-}
1240
1241 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
1242 -- element in the given 'ByteString' which is equal to the query
1243 -- element, or 'Nothing' if there is no such element. The following
1244 -- holds:
1245 --
1246 -- > elemIndexEnd c xs ==
1247 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1248 --
1249 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
1250 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1251 go (p `plusPtr` s) (l-1)
1252 where
1253 STRICT2(go)
1254 go p i | i < 0 = return Nothing
1255 | otherwise = do ch' <- peekByteOff p i
1256 if ch == ch'
1257 then return $ Just i
1258 else go p (i-1)
1259 {-# INLINE elemIndexEnd #-}
1260
1261 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1262 -- the indices of all elements equal to the query element, in ascending order.
1263 -- This implementation uses memchr(3).
1264 elemIndices :: Word8 -> ByteString -> [Int]
1265 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1266 let ptr = p `plusPtr` s
1267
1268 STRICT1(loop)
1269 loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1270 w (fromIntegral (l - n))
1271 in if q == nullPtr
1272 then []
1273 else let i = q `minusPtr` ptr
1274 in i : loop (i+1)
1275 return $! loop 0
1276 {-# INLINE elemIndices #-}
1277
1278 {-
1279 -- much slower
1280 elemIndices :: Word8 -> ByteString -> [Int]
1281 elemIndices c ps = loop 0 ps
1282 where STRICT2(loop)
1283 loop _ ps' | null ps' = []
1284 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1285 | otherwise = loop (n+1) (unsafeTail ps')
1286 -}
1287
1288 -- | count returns the number of times its argument appears in the ByteString
1289 --
1290 -- > count = length . elemIndices
1291 --
1292 -- But more efficiently than using length on the intermediate list.
1293 count :: Word8 -> ByteString -> Int
1294 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1295 fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1296 {-# INLINE count #-}
1297
1298 {-
1299 --
1300 -- around 30% slower
1301 --
1302 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1303 go (p `plusPtr` s) (fromIntegral m) 0
1304 where
1305 go :: Ptr Word8 -> CSize -> Int -> IO Int
1306 STRICT3(go)
1307 go p l i = do
1308 q <- memchr p w l
1309 if q == nullPtr
1310 then return i
1311 else do let k = fromIntegral $ q `minusPtr` p
1312 go (q `plusPtr` 1) (l-k-1) (i+1)
1313 -}
1314
1315 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1316 -- returns the index of the first element in the ByteString
1317 -- satisfying the predicate.
1318 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1319 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1320 where
1321 STRICT2(go)
1322 go ptr n | n >= l = return Nothing
1323 | otherwise = do w <- peek ptr
1324 if k w
1325 then return (Just n)
1326 else go (ptr `plusPtr` 1) (n+1)
1327 {-# INLINE findIndex #-}
1328
1329 -- | The 'findIndices' function extends 'findIndex', by returning the
1330 -- indices of all elements satisfying the predicate, in ascending order.
1331 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1332 findIndices p ps = loop 0 ps
1333 where
1334 STRICT2(loop)
1335 loop n qs | null qs = []
1336 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1337 | otherwise = loop (n+1) (unsafeTail qs)
1338
1339 -- ---------------------------------------------------------------------
1340 -- Searching ByteStrings
1341
1342 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1343 elem :: Word8 -> ByteString -> Bool
1344 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1345 {-# INLINE elem #-}
1346
1347 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1348 notElem :: Word8 -> ByteString -> Bool
1349 notElem c ps = not (elem c ps)
1350 {-# INLINE notElem #-}
1351
1352 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1353 -- returns a ByteString containing those characters that satisfy the
1354 -- predicate. This function is subject to array fusion.
1355 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1356 #if defined(LOOPU_FUSION)
1357 filter p = loopArr . loopU (filterEFL p) NoAcc
1358 #elif defined(LOOPUP_FUSION)
1359 filter p = loopArr . loopUp (filterEFL p) NoAcc
1360 #elif defined(LOOPNOACC_FUSION)
1361 filter p = loopArr . loopNoAcc (filterEFL p)
1362 #else
1363 filter f = loopArr . loopFilter f
1364 #endif
1365 {-# INLINE filter #-}
1366
1367 {-
1368 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1369 -- around 2x faster for some one-shot applications.
1370 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1371 filter' k ps@(PS x s l)
1372 | null ps = ps
1373 | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do
1374 t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1375 return $! t `minusPtr` p -- actual length
1376 where
1377 STRICT3(go)
1378 go f t end | f == end = return t
1379 | otherwise = do
1380 w <- peek f
1381 if k w
1382 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1383 else go (f `plusPtr` 1) t end
1384 {-# INLINE filter' #-}
1385 -}
1386
1387 --
1388 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1389 -- case of filtering a single byte. It is more efficient to use
1390 -- /filterByte/ in this case.
1391 --
1392 -- > filterByte == filter . (==)
1393 --
1394 -- filterByte is around 10x faster, and uses much less space, than its
1395 -- filter equivalent
1396 filterByte :: Word8 -> ByteString -> ByteString
1397 filterByte w ps = replicate (count w ps) w
1398 {-# INLINE filterByte #-}
1399
1400 {-# RULES
1401 "FPS specialise filter (== x)" forall x.
1402 filter ((==) x) = filterByte x
1403 #-}
1404
1405 #if __GLASGOW_HASKELL__ >= 605
1406 {-# RULES
1407 "FPS specialise filter (== x)" forall x.
1408 filter (== x) = filterByte x
1409 #-}
1410 #endif
1411
1412 --
1413 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1414 -- case of filtering a single byte out of a list. It is more efficient
1415 -- to use /filterNotByte/ in this case.
1416 --
1417 -- > filterNotByte == filter . (/=)
1418 --
1419 -- filterNotByte is around 2x faster than its filter equivalent.
1420 filterNotByte :: Word8 -> ByteString -> ByteString
1421 filterNotByte w = filter (/= w)
1422 {-# INLINE filterNotByte #-}
1423
1424 {-# RULES
1425 "FPS specialise filter (x /=)" forall x.
1426 filter ((/=) x) = filterNotByte x
1427 #-}
1428
1429 #if __GLASGOW_HASKELL__ >= 605
1430 {-# RULES
1431 "FPS specialise filter (/= x)" forall x.
1432 filter (/= x) = filterNotByte x
1433 #-}
1434 #endif
1435
1436 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1437 -- and returns the first element in matching the predicate, or 'Nothing'
1438 -- if there is no such element.
1439 --
1440 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1441 --
1442 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1443 find f p = case findIndex f p of
1444 Just n -> Just (p `unsafeIndex` n)
1445 _ -> Nothing
1446 {-# INLINE find #-}
1447
1448 {-
1449 --
1450 -- fuseable, but we don't want to walk the whole array.
1451 --
1452 find k = foldl findEFL Nothing
1453 where findEFL a@(Just _) _ = a
1454 findEFL _ c | k c = Just c
1455 | otherwise = Nothing
1456 -}
1457
1458 -- ---------------------------------------------------------------------
1459 -- Searching for substrings
1460
1461 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1462 -- iff the first is a prefix of the second.
1463 isPrefixOf :: ByteString -> ByteString -> Bool
1464 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1465 | l1 == 0 = True
1466 | l2 < l1 = False
1467 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1468 withForeignPtr x2 $ \p2 -> do
1469 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1470 return $! i == 0
1471
1472 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1473 -- iff the first is a suffix of the second.
1474 --
1475 -- The following holds:
1476 --
1477 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1478 --
1479 -- However, the real implemenation uses memcmp to compare the end of the
1480 -- string only, with no reverse required..
1481 isSuffixOf :: ByteString -> ByteString -> Bool
1482 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1483 | l1 == 0 = True
1484 | l2 < l1 = False
1485 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1486 withForeignPtr x2 $ \p2 -> do
1487 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1488 return $! i == 0
1489
1490 -- | Check whether one string is a substring of another. @isSubstringOf
1491 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1492 isSubstringOf :: ByteString -- ^ String to search for.
1493 -> ByteString -- ^ String to search in.
1494 -> Bool
1495 isSubstringOf p s = not $ P.null $ findSubstrings p s
1496
1497 -- | Get the first index of a substring in another string,
1498 -- or 'Nothing' if the string is not found.
1499 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1500 findSubstring :: ByteString -- ^ String to search for.
1501 -> ByteString -- ^ String to seach in.
1502 -> Maybe Int
1503 findSubstring = (listToMaybe .) . findSubstrings
1504
1505 -- | Find the indexes of all (possibly overlapping) occurances of a
1506 -- substring in a string. This function uses the Knuth-Morris-Pratt
1507 -- string matching algorithm.
1508 findSubstrings :: ByteString -- ^ String to search for.
1509 -> ByteString -- ^ String to seach in.
1510 -> [Int]
1511
1512 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1513 where
1514 patc x = pat `unsafeIndex` x
1515 strc x = str `unsafeIndex` x
1516
1517 -- maybe we should make kmpNext a UArray before using it in search?
1518 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1519 kmpNextL p _ | null p = []
1520 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1521 ps = unsafeTail p
1522 x = if not (null ps) && unsafeHead ps == patc j'
1523 then kmpNext Array.! j' else j'
1524 in x:kmpNextL ps j'
1525 search i j = match ++ rest -- i: position in string, j: position in pattern
1526 where match = if j == m then [(i - j)] else []
1527 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1528 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1529 | otherwise = j
1530
1531 -- ---------------------------------------------------------------------
1532 -- Zipping
1533
1534 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1535 -- corresponding pairs of bytes. If one input ByteString is short,
1536 -- excess elements of the longer ByteString are discarded. This is
1537 -- equivalent to a pair of 'unpack' operations.
1538 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1539 zip ps qs
1540 | null ps || null qs = []
1541 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1542
1543 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1544 -- the first argument, instead of a tupling function. For example,
1545 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1546 -- corresponding sums.
1547 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1548 zipWith f ps qs
1549 | null ps || null qs = []
1550 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1551 #if defined(__GLASGOW_HASKELL__)
1552 {-# INLINE [1] zipWith #-}
1553 #endif
1554
1555 --
1556 -- | A specialised version of zipWith for the common case of a
1557 -- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules
1558 -- are used to automatically covert zipWith into zipWith' when a pack is
1559 -- performed on the result of zipWith, but we also export it for
1560 -- convenience.
1561 --
1562 zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
1563 zipWith' f (PS fp s l) (PS fq t m) = inlinePerformIO $
1564 withForeignPtr fp $ \a ->
1565 withForeignPtr fq $ \b ->
1566 create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t)
1567 where
1568 zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
1569 STRICT4(zipWith_)
1570 zipWith_ n p1 p2 r
1571 | n >= len = return ()
1572 | otherwise = do
1573 x <- peekByteOff p1 n
1574 y <- peekByteOff p2 n
1575 pokeByteOff r n (f x y)
1576 zipWith_ (n+1) p1 p2 r
1577
1578 len = min l m
1579 {-# INLINE zipWith' #-}
1580
1581 {-# RULES
1582
1583 "FPS specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
1584 zipWith f p q = unpack (zipWith' f p q)
1585
1586 #-}
1587
1588 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1589 -- ByteStrings. Note that this performs two 'pack' operations.
1590 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1591 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1592 {-# INLINE unzip #-}
1593
1594 -- ---------------------------------------------------------------------
1595 -- Special lists
1596
1597 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1598 inits :: ByteString -> [ByteString]
1599 inits (PS x s l) = [PS x s n | n <- [0..l]]
1600
1601 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1602 tails :: ByteString -> [ByteString]
1603 tails p | null p = [empty]
1604 | otherwise = p : tails (unsafeTail p)
1605
1606 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1607
1608 -- ---------------------------------------------------------------------
1609 -- ** Ordered 'ByteString's
1610
1611 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1612 sort :: ByteString -> ByteString
1613 sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
1614
1615 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1616 withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l)
1617
1618 let STRICT2(go)
1619 go 256 _ = return ()
1620 go i ptr = do n <- peekElemOff arr i
1621 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1622 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1623 go 0 p
1624
1625 {-
1626 sort :: ByteString -> ByteString
1627 sort (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> do
1628 memcpy p (f `plusPtr` s) l
1629 c_qsort p l -- inplace
1630 -}
1631
1632 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1633 --
1634 -- Try some linear sorts: radix, counting
1635 -- Or mergesort.
1636 --
1637 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1638 -- sortBy f ps = undefined
1639
1640 -- ---------------------------------------------------------------------
1641 -- Low level constructors
1642
1643 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1644 -- finalizer associated to it. The ByteString length is calculated using
1645 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1646 packCString :: CString -> ByteString
1647 packCString cstr = unsafePerformIO $ do
1648 fp <- newForeignPtr_ (castPtr cstr)
1649 l <- c_strlen cstr
1650 return $! PS fp 0 (fromIntegral l)
1651
1652 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1653 -- have /no/ finalizer associated with it. This operation has /O(1)/
1654 -- complexity as we already know the final size, so no /strlen(3)/ is
1655 -- required.
1656 packCStringLen :: CStringLen -> ByteString
1657 packCStringLen (ptr,len) = unsafePerformIO $ do
1658 fp <- newForeignPtr_ (castPtr ptr)
1659 return $! PS fp 0 (fromIntegral len)
1660
1661 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1662 -- have a @free(3)@ finalizer associated to it.
1663 packMallocCString :: CString -> ByteString
1664 packMallocCString cstr = unsafePerformIO $ do
1665 fp <- newForeignFreePtr (castPtr cstr)
1666 len <- c_strlen cstr
1667 return $! PS fp 0 (fromIntegral len)
1668
1669 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
1670 -- null-terminated @CString@. The @CString@ will be freed
1671 -- automatically. This is a memcpy(3).
1672 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1673 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1674 where alloc = withForeignPtr ps $ \p -> do
1675 buf <- c_malloc (fromIntegral l+1)
1676 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1677 poke (buf `plusPtr` l) (0::Word8) -- n.b.
1678 return (castPtr buf)
1679
1680 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1681 useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1682 useAsCStringLen = unsafeUseAsCStringLen
1683
1684 --
1685 -- why were we doing this?
1686 --
1687 -- useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1688 -- useAsCStringLen (PS ps s l) = bracket alloc (c_free.castPtr.fst)
1689 -- where
1690 -- alloc = withForeignPtr ps $ \p -> do
1691 -- buf <- c_malloc (fromIntegral l+1)
1692 -- memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1693 -- poke (buf `plusPtr` l) (0::Word8) -- n.b.
1694 -- return $! (castPtr buf, l)
1695 --
1696
1697 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1698 -- This is mainly useful to allow the rest of the data pointed
1699 -- to by the 'ByteString' to be garbage collected, for example
1700 -- if a large string has been read in, and only a small part of it
1701 -- is needed in the rest of the program.
1702 copy :: ByteString -> ByteString
1703 copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
1704 memcpy p (f `plusPtr` s) (fromIntegral l)
1705
1706 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1707 -- CString is going to be deallocated from C land.
1708 copyCString :: CString -> IO ByteString
1709 copyCString cstr = do
1710 len <- c_strlen cstr
1711 copyCStringLen (cstr, fromIntegral len)
1712
1713 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1714 copyCStringLen :: CStringLen -> IO ByteString
1715 copyCStringLen (cstr, len) = create len $ \p ->
1716 memcpy p (castPtr cstr) (fromIntegral len)
1717
1718 -- ---------------------------------------------------------------------
1719 -- line IO
1720
1721 -- | Read a line from stdin.
1722 getLine :: IO ByteString
1723 getLine = hGetLine stdin
1724
1725 {-
1726 -- | Lazily construct a list of lines of ByteStrings. This will be much
1727 -- better on memory consumption than using 'hGetContents >>= lines'
1728 -- If you're considering this, a better choice might be to use
1729 -- Data.ByteString.Lazy
1730 hGetLines :: Handle -> IO [ByteString]
1731 hGetLines h = go
1732 where
1733 go = unsafeInterleaveIO $ do
1734 e <- hIsEOF h
1735 if e
1736 then return []
1737 else do
1738 x <- hGetLine h
1739 xs <- go
1740 return (x:xs)
1741 -}
1742
1743 -- | Read a line from a handle
1744
1745 hGetLine :: Handle -> IO ByteString
1746 #if !defined(__GLASGOW_HASKELL__)
1747 hGetLine h = System.IO.hGetLine h >>= return . pack . P.map c2w
1748 #else
1749 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1750 case haBufferMode handle_ of
1751 NoBuffering -> error "no buffering"
1752 _other -> hGetLineBuffered handle_
1753
1754 where
1755 hGetLineBuffered handle_ = do
1756 let ref = haBuffer handle_
1757 buf <- readIORef ref
1758 hGetLineBufferedLoop handle_ ref buf 0 []
1759
1760 hGetLineBufferedLoop handle_ ref
1761 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1762 len `seq` do
1763 off <- findEOL r w raw
1764 let new_len = len + off - r
1765 xs <- mkPS raw r off
1766
1767 -- if eol == True, then off is the offset of the '\n'
1768 -- otherwise off == w and the buffer is now empty.
1769 if off /= w
1770 then do if (w == off + 1)
1771 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1772 else writeIORef ref buf{ bufRPtr = off + 1 }
1773 mkBigPS new_len (xs:xss)
1774 else do
1775 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1776 buf{ bufWPtr=0, bufRPtr=0 }
1777 case maybe_buf of
1778 -- Nothing indicates we caught an EOF, and we may have a
1779 -- partial line to return.
1780 Nothing -> do
1781 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1782 if new_len > 0
1783 then mkBigPS new_len (xs:xss)
1784 else ioe_EOF
1785 Just new_buf ->
1786 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1787
1788 -- find the end-of-line character, if there is one
1789 findEOL r w raw
1790 | r == w = return w
1791 | otherwise = do
1792 (c,r') <- readCharFromBuffer raw r
1793 if c == '\n'
1794 then return r -- NB. not r': don't include the '\n'
1795 else findEOL r' w raw
1796
1797 maybeFillReadBuffer fd is_line is_stream buf = catch
1798 (do buf' <- fillReadBuffer fd is_line is_stream buf
1799 return (Just buf'))
1800 (\e -> if isEOFError e then return Nothing else ioError e)
1801
1802 -- TODO, rewrite to use normal memcpy
1803 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1804 mkPS buf start end =
1805 let len = end - start
1806 in create len $ \p -> do
1807 memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1808 return ()
1809
1810 mkBigPS :: Int -> [ByteString] -> IO ByteString
1811 mkBigPS _ [ps] = return ps
1812 mkBigPS _ pss = return $! concat (P.reverse pss)
1813
1814 #endif
1815
1816 -- ---------------------------------------------------------------------
1817 -- Block IO
1818
1819 -- | Outputs a 'ByteString' to the specified 'Handle'.
1820 hPut :: Handle -> ByteString -> IO ()
1821 hPut _ (PS _ _ 0) = return ()
1822 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1823
1824 -- | A synonym for @hPut@, for compatibility
1825 hPutStr :: Handle -> ByteString -> IO ()
1826 hPutStr = hPut
1827
1828 -- | Write a ByteString to a handle, appending a newline byte
1829 hPutStrLn :: Handle -> ByteString -> IO ()
1830 hPutStrLn h ps
1831 | length ps < 1024 = hPut h (ps `snoc` 0x0a)
1832 | otherwise = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy
1833
1834 -- | Write a ByteString to stdout
1835 putStr :: ByteString -> IO ()
1836 putStr = hPut stdout
1837
1838 -- | Write a ByteString to stdout, appending a newline byte
1839 putStrLn :: ByteString -> IO ()
1840 putStrLn = hPutStrLn stdout
1841
1842 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1843 -- is far more efficient than reading the characters into a 'String'
1844 -- and then using 'pack'.
1845 hGet :: Handle -> Int -> IO ByteString
1846 hGet _ 0 = return empty
1847 hGet h i = createAndTrim i $ \p -> hGetBuf h p i
1848
1849 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1850 -- waiting for data to become available, instead it returns only whatever data
1851 -- is available.
1852 hGetNonBlocking :: Handle -> Int -> IO ByteString
1853 #if defined(__GLASGOW_HASKELL__)
1854 hGetNonBlocking _ 0 = return empty
1855 hGetNonBlocking h i = createAndTrim i $ \p -> hGetBufNonBlocking h p i
1856 #else
1857 hGetNonBlocking = hGet
1858 #endif
1859
1860 -- | Read entire handle contents into a 'ByteString'.
1861 -- This function reads chunks at a time, doubling the chunksize on each
1862 -- read. The final buffer is then realloced to the appropriate size. For
1863 -- files > half of available memory, this may lead to memory exhaustion.
1864 -- Consider using 'readFile' in this case.
1865 --
1866 -- As with 'hGet', the string representation in the file is assumed to
1867 -- be ISO-8859-1.
1868 --
1869 hGetContents :: Handle -> IO ByteString
1870 hGetContents h = do
1871 let start_size = 1024
1872 p <- mallocArray start_size
1873 i <- hGetBuf h p start_size
1874 if i < start_size
1875 then do p' <- reallocArray p i
1876 fp <- newForeignFreePtr p'
1877 return $! PS fp 0 i
1878 else f p start_size
1879 where
1880 f p s = do
1881 let s' = 2 * s
1882 p' <- reallocArray p s'
1883 i <- hGetBuf h (p' `plusPtr` s) s
1884 if i < s
1885 then do let i' = s + i
1886 p'' <- reallocArray p' i'
1887 fp <- newForeignFreePtr p''
1888 return $! PS fp 0 i'
1889 else f p' s'
1890
1891 -- | getContents. Equivalent to hGetContents stdin
1892 getContents :: IO ByteString
1893 getContents = hGetContents stdin
1894
1895 -- | The interact function takes a function of type @ByteString -> ByteString@
1896 -- as its argument. The entire input from the standard input device is passed
1897 -- to this function as its argument, and the resulting string is output on the
1898 -- standard output device. It's great for writing one line programs!
1899 interact :: (ByteString -> ByteString) -> IO ()
1900 interact transformer = putStr . transformer =<< getContents
1901
1902 -- | Read an entire file strictly into a 'ByteString'. This is far more
1903 -- efficient than reading the characters into a 'String' and then using
1904 -- 'pack'. It also may be more efficient than opening the file and
1905 -- reading it using hGet. Files are read using 'binary mode' on Windows,
1906 -- for 'text mode' use the Char8 version of this function.
1907 readFile :: FilePath -> IO ByteString
1908 readFile f = bracket (openBinaryFile f ReadMode) hClose
1909 (\h -> hFileSize h >>= hGet h . fromIntegral)
1910
1911 -- | Write a 'ByteString' to a file.
1912 writeFile :: FilePath -> ByteString -> IO ()
1913 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
1914 (\h -> hPut h txt)
1915
1916 -- | Append a 'ByteString' to a file.
1917 appendFile :: FilePath -> ByteString -> IO ()
1918 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1919 (\h -> hPut h txt)
1920
1921 {-
1922 --
1923 -- Disable until we can move it into a portable .hsc file
1924 --
1925
1926 -- | Like readFile, this reads an entire file directly into a
1927 -- 'ByteString', but it is even more efficient. It involves directly
1928 -- mapping the file to memory. This has the advantage that the contents
1929 -- of the file never need to be copied. Also, under memory pressure the
1930 -- page may simply be discarded, while in the case of readFile it would
1931 -- need to be written to swap. If you read many small files, mmapFile
1932 -- will be less memory-efficient than readFile, since each mmapFile
1933 -- takes up a separate page of memory. Also, you can run into bus
1934 -- errors if the file is modified. As with 'readFile', the string
1935 -- representation in the file is assumed to be ISO-8859-1.
1936 --
1937 -- On systems without mmap, this is the same as a readFile.
1938 --
1939 mmapFile :: FilePath -> IO ByteString
1940 mmapFile f = mmap f >>= \(fp,l) -> return $! PS fp 0 l
1941
1942 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1943 mmap f = do
1944 h <- openBinaryFile f ReadMode
1945 l <- fromIntegral `fmap` hFileSize h
1946 -- Don't bother mmaping small files because each mmapped file takes up
1947 -- at least one full VM block.
1948 if l < mmap_limit
1949 then do thefp <- mallocByteString l
1950 withForeignPtr thefp $ \p-> hGetBuf h p l
1951 hClose h
1952 return (thefp, l)
1953 else do
1954 -- unix only :(
1955 fd <- fromIntegral `fmap` handleToFd h
1956 p <- my_mmap l fd
1957 fp <- if p == nullPtr
1958 then do thefp <- mallocByteString l
1959 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1960 return thefp
1961 else do
1962 -- The munmap leads to crashes on OpenBSD.
1963 -- maybe there's a use after unmap in there somewhere?
1964 -- Bulat suggests adding the hClose to the
1965 -- finalizer, excellent idea.
1966 #if !defined(__OpenBSD__)
1967 let unmap = c_munmap p l >> return ()
1968 #else
1969 let unmap = return ()
1970 #endif
1971 fp <- newForeignPtr p unmap
1972 return fp
1973 c_close fd
1974 hClose h
1975 return (fp, l)
1976 where mmap_limit = 16*1024
1977 -}
1978
1979 -- ---------------------------------------------------------------------
1980 -- Internal utilities
1981
1982 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1983 -- of the string if no element is found, rather than Nothing.
1984 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1985 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1986 where
1987 STRICT2(go)
1988 go ptr n | n >= l = return l
1989 | otherwise = do w <- peek ptr
1990 if k w
1991 then return n
1992 else go (ptr `plusPtr` 1) (n+1)
1993 {-# INLINE findIndexOrEnd #-}
1994
1995 -- | Perform an operation with a temporary ByteString
1996 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1997 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1998 {-# INLINE withPtr #-}
1999
2000 -- Common up near identical calls to `error' to reduce the number
2001 -- constant strings created when compiled:
2002 errorEmptyList :: String -> a
2003 errorEmptyList fun = moduleError fun "empty ByteString"
2004 {-# NOINLINE errorEmptyList #-}
2005
2006 moduleError :: String -> String -> a
2007 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2008 {-# NOINLINE moduleError #-}
2009
2010 -- Find from the end of the string using predicate
2011 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2012 STRICT2(findFromEndUntil)
2013 findFromEndUntil f ps@(PS x s l) =
2014 if null ps then 0
2015 else if f (last ps) then l
2016 else findFromEndUntil f (PS x s (l-1))
2017
2018 {-# INLINE newForeignFreePtr #-}
2019 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2020 newForeignFreePtr p = newForeignPtr c_free_finalizer p