Sync Data.ByteString with current stable branch, 0.7
[packages/random.git] / Data / ByteString / Lazy.hs
1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans -fno-warn-incomplete-patterns #-}
2 --
3 -- Module : ByteString.Lazy
4 -- Copyright : (c) Don Stewart 2006
5 -- (c) Duncan Coutts 2006
6 -- License : BSD-style
7 --
8 -- Maintainer : dons@cse.unsw.edu.au
9 -- Stability : experimental
10 -- Portability : portable, requires ffi and cpp
11 -- Tested with : GHC 6.4.1 and Hugs March 2005
12 --
13
14 --
15 -- | A time and space-efficient implementation of lazy byte vectors
16 -- using lists of packed 'Word8' arrays, suitable for high performance
17 -- use, both in terms of large data quantities, or high speed
18 -- requirements. Byte vectors are encoded as lazy lists of strict 'Word8'
19 -- arrays of bytes. They provide a means to manipulate large byte vectors
20 -- without requiring the entire vector be resident in memory.
21 --
22 -- Some operations, such as concat, append, reverse and cons, have
23 -- better complexity than their "Data.ByteString" equivalents, as due to
24 -- optimisations resulting from the list spine structure. And for other
25 -- operations Lazy ByteStrings are usually within a few percent of
26 -- strict ones, but with better heap usage. For data larger than the
27 -- available memory, or if you have tight memory constraints, this
28 -- module will be the only option. The default chunk size is 64k, which
29 -- should be good in most circumstances. For people with large L2
30 -- caches, you may want to increase this to fit your cache.
31 --
32 -- This module is intended to be imported @qualified@, to avoid name
33 -- clashes with "Prelude" functions. eg.
34 --
35 -- > import qualified Data.ByteString.Lazy as B
36 --
37 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
38 -- UArray by Simon Marlow. Rewritten to support slices and use
39 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
40 -- Lazy variant by Duncan Coutts and Don Stewart.
41 --
42
43 module Data.ByteString.Lazy (
44
45 -- * The @ByteString@ type
46 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
47
48 -- * Introducing and eliminating 'ByteString's
49 empty, -- :: ByteString
50 singleton, -- :: Word8 -> ByteString
51 pack, -- :: [Word8] -> ByteString
52 unpack, -- :: ByteString -> [Word8]
53
54 -- * Basic interface
55 cons, -- :: Word8 -> ByteString -> ByteString
56 snoc, -- :: ByteString -> Word8 -> ByteString
57 append, -- :: ByteString -> ByteString -> ByteString
58 head, -- :: ByteString -> Word8
59 last, -- :: ByteString -> Word8
60 tail, -- :: ByteString -> ByteString
61 init, -- :: ByteString -> ByteString
62 null, -- :: ByteString -> Bool
63 length, -- :: ByteString -> Int64
64
65 -- * Transformating ByteStrings
66 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
67 reverse, -- :: ByteString -> ByteString
68 -- intersperse, -- :: Word8 -> ByteString -> ByteString
69 transpose, -- :: [ByteString] -> [ByteString]
70
71 -- * Reducing 'ByteString's (folds)
72 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
73 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
74 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
75 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
76 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
77 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
78
79 -- ** Special folds
80 concat, -- :: [ByteString] -> ByteString
81 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
82 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
83 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
84 maximum, -- :: ByteString -> Word8
85 minimum, -- :: ByteString -> Word8
86
87 -- * Building ByteStrings
88 -- ** Scans
89 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
90 -- scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
91 -- scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
92 -- scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
93
94 -- ** Accumulating maps
95 mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
96 mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
97 mapIndexed, -- :: (Int64 -> Word8 -> Word8) -> ByteString -> ByteString
98
99 -- ** Infinite ByteStrings
100 repeat, -- :: Word8 -> ByteString
101 replicate, -- :: Int64 -> Word8 -> ByteString
102 cycle, -- :: ByteString -> ByteString
103 iterate, -- :: (Word8 -> Word8) -> Word8 -> ByteString
104
105 -- ** Unfolding
106 unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
107
108 -- * Substrings
109
110 -- ** Breaking strings
111 take, -- :: Int64 -> ByteString -> ByteString
112 drop, -- :: Int64 -> ByteString -> ByteString
113 splitAt, -- :: Int64 -> ByteString -> (ByteString, ByteString)
114 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
115 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
116 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
117 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
118 group, -- :: ByteString -> [ByteString]
119 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
120 inits, -- :: ByteString -> [ByteString]
121 tails, -- :: ByteString -> [ByteString]
122
123 -- ** Breaking into many substrings
124 split, -- :: Word8 -> ByteString -> [ByteString]
125 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
126 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
127
128 -- ** Joining strings
129 join, -- :: ByteString -> [ByteString] -> ByteString
130
131 -- * Predicates
132 isPrefixOf, -- :: ByteString -> ByteString -> Bool
133 -- isSuffixOf, -- :: ByteString -> ByteString -> Bool
134
135 -- * Searching ByteStrings
136
137 -- ** Searching by equality
138 elem, -- :: Word8 -> ByteString -> Bool
139 notElem, -- :: Word8 -> ByteString -> Bool
140
141 -- ** Searching with a predicate
142 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
143 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
144 -- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
145
146 -- * Indexing ByteStrings
147 index, -- :: ByteString -> Int64 -> Word8
148 elemIndex, -- :: Word8 -> ByteString -> Maybe Int64
149 elemIndices, -- :: Word8 -> ByteString -> [Int64]
150 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int64
151 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int64]
152 count, -- :: Word8 -> ByteString -> Int64
153
154 -- * Zipping and unzipping ByteStrings
155 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
156 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
157 -- unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
158
159 -- * Ordered ByteStrings
160 -- sort, -- :: ByteString -> ByteString
161
162 copy, -- :: ByteString -> ByteString
163
164 -- * I\/O with 'ByteString's
165
166 -- ** Standard input and output
167 getContents, -- :: IO ByteString
168 putStr, -- :: ByteString -> IO ()
169 putStrLn, -- :: ByteString -> IO ()
170 interact, -- :: (ByteString -> ByteString) -> IO ()
171
172 -- ** Files
173 readFile, -- :: FilePath -> IO ByteString
174 writeFile, -- :: FilePath -> ByteString -> IO ()
175 appendFile, -- :: FilePath -> ByteString -> IO ()
176
177 -- ** I\/O with Handles
178 hGetContents, -- :: Handle -> IO ByteString
179 hGet, -- :: Handle -> Int -> IO ByteString
180 hPut, -- :: Handle -> ByteString -> IO ()
181 hGetNonBlocking, -- :: Handle -> IO ByteString
182 -- hGetN, -- :: Int -> Handle -> Int -> IO ByteString
183 -- hGetContentsN, -- :: Int -> Handle -> IO ByteString
184 -- hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString
185
186 ) where
187
188 import qualified Prelude
189 import Prelude hiding
190 (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines
191 ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter,maximum
192 ,minimum,all,concatMap,foldl1,foldr1,scanl, scanl1, scanr, scanr1
193 ,repeat, cycle, interact, iterate,readFile,writeFile,appendFile,replicate
194 ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)
195
196 import qualified Data.List as L -- L for list/lazy
197 import qualified Data.ByteString as P -- P for packed
198 import qualified Data.ByteString.Base as P
199 import qualified Data.ByteString.Fusion as P
200 import Data.ByteString.Fusion (PairS(..),loopL)
201
202 import Data.Monoid (Monoid(..))
203
204 import Data.Word (Word8)
205 import Data.Int (Int64)
206 import System.IO (Handle,stdin,stdout,openBinaryFile,IOMode(..)
207 ,hClose,hWaitForInput,hIsEOF)
208 import System.IO.Unsafe
209 import Control.Exception (bracket)
210
211 import Foreign.ForeignPtr (withForeignPtr)
212 import Foreign.Ptr
213 import Foreign.Storable
214
215 #if defined(__GLASGOW_HASKELL__)
216 import Data.Generics (Data(..), Typeable(..))
217 #endif
218
219 -- -----------------------------------------------------------------------------
220 --
221 -- Useful macros, until we have bang patterns
222 --
223
224 #define STRICT1(f) f a | a `seq` False = undefined
225 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
226 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
227 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
228 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
229
230 -- -----------------------------------------------------------------------------
231
232 -- | A space-efficient representation of a Word8 vector, supporting many
233 -- efficient operations. A 'ByteString' contains 8-bit characters only.
234 --
235 -- Instances of Eq, Ord, Read, Show, Data, Typeable
236 --
237 newtype ByteString = LPS [P.ByteString] -- LPS for lazy packed string
238 deriving (Show,Read
239 #if defined(__GLASGOW_HASKELL__)
240 ,Data, Typeable
241 #endif
242 )
243
244 --
245 -- hmm, what about getting the PS constructor unpacked into the cons cell?
246 --
247 -- data List = Nil | Cons {-# UNPACK #-} !P.ByteString List
248 --
249 -- Would avoid one indirection per chunk.
250 --
251
252 unLPS :: ByteString -> [P.ByteString]
253 unLPS (LPS xs) = xs
254 {-# INLINE unLPS #-}
255
256 instance Eq ByteString
257 where (==) = eq
258
259 instance Ord ByteString
260 where compare = compareBytes
261
262 instance Monoid ByteString where
263 mempty = empty
264 mappend = append
265 mconcat = concat
266
267 ------------------------------------------------------------------------
268
269 -- XXX
270 -- The data type invariant:
271 -- Every ByteString is either empty or consists of non-null ByteStrings.
272 -- All functions must preserve this, and the QC properties must check this.
273 --
274 _invariant :: ByteString -> Bool
275 _invariant (LPS []) = True
276 _invariant (LPS xs) = L.all (not . P.null) xs
277
278 -- In a form useful for QC testing
279 _checkInvariant :: ByteString -> ByteString
280 _checkInvariant lps
281 | _invariant lps = lps
282 | otherwise = moduleError "invariant" ("violation: " ++ show lps)
283
284 -- The Data abstraction function
285 --
286 _abstr :: ByteString -> P.ByteString
287 _abstr (LPS []) = P.empty
288 _abstr (LPS xs) = P.concat xs
289
290 -- The representation uses lists of packed chunks. When we have to convert from
291 -- a lazy list to the chunked representation, then by default we'll use this
292 -- chunk size. Some functions give you more control over the chunk size.
293 --
294 -- Measurements here:
295 -- http://www.cse.unsw.edu.au/~dons/tmp/chunksize_v_cache.png
296 --
297 -- indicate that a value around 0.5 to 1 x your L2 cache is best.
298 -- The following value assumes people have something greater than 128k,
299 -- and need to share the cache with other programs.
300 --
301 defaultChunkSize :: Int
302 defaultChunkSize = 32 * k - overhead
303 where k = 1024
304 overhead = 2 * sizeOf (undefined :: Int)
305
306 smallChunkSize :: Int
307 smallChunkSize = 4 * k - overhead
308 where k = 1024
309 overhead = 2 * sizeOf (undefined :: Int)
310
311 -- defaultChunkSize = 1
312
313 ------------------------------------------------------------------------
314
315 eq :: ByteString -> ByteString -> Bool
316 eq (LPS xs) (LPS ys) = eq' xs ys
317 where eq' [] [] = True
318 eq' [] _ = False
319 eq' _ [] = False
320 eq' (a:as) (b:bs) =
321 case compare (P.length a) (P.length b) of
322 LT -> a == (P.take (P.length a) b) && eq' as (P.drop (P.length a) b : bs)
323 EQ -> a == b && eq' as bs
324 GT -> (P.take (P.length b) a) == b && eq' (P.drop (P.length b) a : as) bs
325
326 compareBytes :: ByteString -> ByteString -> Ordering
327 compareBytes (LPS xs) (LPS ys) = cmp xs ys
328 where cmp [] [] = EQ
329 cmp [] _ = LT
330 cmp _ [] = GT
331 cmp (a:as) (b:bs) =
332 case compare (P.length a) (P.length b) of
333 LT -> case compare a (P.take (P.length a) b) of
334 EQ -> cmp as (P.drop (P.length a) b : bs)
335 result -> result
336 EQ -> case compare a b of
337 EQ -> cmp as bs
338 result -> result
339 GT -> case compare (P.take (P.length b) a) b of
340 EQ -> cmp (P.drop (P.length b) a : as) bs
341 result -> result
342
343 -- -----------------------------------------------------------------------------
344 -- Introducing and eliminating 'ByteString's
345
346 -- | /O(1)/ The empty 'ByteString'
347 empty :: ByteString
348 empty = LPS []
349 {-# NOINLINE empty #-}
350
351 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
352 singleton :: Word8 -> ByteString
353 singleton c = LPS [P.singleton c]
354 {-# NOINLINE singleton #-}
355
356 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
357 pack :: [Word8] -> ByteString
358 pack str = LPS $ L.map P.pack (chunk defaultChunkSize str)
359
360 -- ?
361 chunk :: Int -> [a] -> [[a]]
362 chunk _ [] = []
363 chunk size xs = case L.splitAt size xs of (xs', xs'') -> xs' : chunk size xs''
364
365 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
366 unpack :: ByteString -> [Word8]
367 unpack (LPS ss) = L.concatMap P.unpack ss
368 {-# INLINE unpack #-}
369
370 ------------------------------------------------------------------------
371
372 {-
373 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
374 -- conversion function
375 packWith :: (a -> Word8) -> [a] -> ByteString
376 packWith k str = LPS $ L.map (P.packWith k) (chunk defaultChunkSize str)
377 {-# INLINE packWith #-}
378 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
379
380 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
381 unpackWith :: (Word8 -> a) -> ByteString -> [a]
382 unpackWith k (LPS ss) = L.concatMap (P.unpackWith k) ss
383 {-# INLINE unpackWith #-}
384 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
385 -}
386
387 -- ---------------------------------------------------------------------
388 -- Basic interface
389
390 -- | /O(1)/ Test whether a ByteString is empty.
391 null :: ByteString -> Bool
392 null (LPS []) = True
393 null (_) = False
394 {-# INLINE null #-}
395
396 -- | /O(n\/c)/ 'length' returns the length of a ByteString as an 'Int64'
397 length :: ByteString -> Int64
398 length (LPS ss) = L.foldl' (\n ps -> n + fromIntegral (P.length ps)) 0 ss
399
400 -- avoid the intermediate list?
401 -- length (LPS ss) = L.foldl lengthF 0 ss
402 -- where lengthF n s = let m = n + fromIntegral (P.length s) in m `seq` m
403 {-# INLINE length #-}
404
405 -- | /O(1)/ 'cons' is analogous to '(:)' for lists. Unlike '(:)' however it is
406 -- strict in the ByteString that we are consing onto. More precisely, it forces
407 -- the head and the first chunk. It does this because, for space efficiency, it
408 -- may coalesce the new byte onto the first \'chunk\' rather than starting a
409 -- new \'chunk\'.
410 --
411 -- So that means you can't use a lazy recursive contruction like this:
412 --
413 -- > let xs = cons c xs in xs
414 --
415 -- You can however use 'repeat' and 'cycle' to build infinite lazy ByteStrings.
416 --
417 cons :: Word8 -> ByteString -> ByteString
418 cons c (LPS (s:ss)) | P.length s <= 16 = LPS (P.cons c s : ss)
419 cons c (LPS ss) = LPS (P.singleton c : ss)
420 {-# INLINE cons #-}
421
422 -- | /O(n\/c)/ Append a byte to the end of a 'ByteString'
423 snoc :: ByteString -> Word8 -> ByteString
424 snoc (LPS ss) c = LPS (ss ++ [P.singleton c])
425 {-# INLINE snoc #-}
426
427 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
428 head :: ByteString -> Word8
429 head (LPS []) = errorEmptyList "head"
430 head (LPS (x:_)) = P.unsafeHead x
431 {-# INLINE head #-}
432
433 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
434 tail :: ByteString -> ByteString
435 tail (LPS []) = errorEmptyList "tail"
436 tail (LPS (x:xs))
437 | P.length x == 1 = LPS xs
438 | otherwise = LPS (P.unsafeTail x : xs)
439 {-# INLINE tail #-}
440
441 -- | /O(n\/c)/ Extract the last element of a ByteString, which must be finite and non-empty.
442 last :: ByteString -> Word8
443 last (LPS []) = errorEmptyList "last"
444 last (LPS xs) = P.last (L.last xs)
445 {-# INLINE last #-}
446
447 -- | /O(n\/c)/ Return all the elements of a 'ByteString' except the last one.
448 init :: ByteString -> ByteString
449 init (LPS []) = errorEmptyList "init"
450 init (LPS xs)
451 | P.length y == 1 = LPS ys
452 | otherwise = LPS (ys ++ [P.init y])
453 where (y,ys) = (L.last xs, L.init xs)
454 {-# INLINE init #-}
455
456 -- | /O(n)/ Append two ByteStrings
457 append :: ByteString -> ByteString -> ByteString
458 append (LPS []) (LPS ys) = LPS ys
459 append (LPS xs) (LPS []) = LPS xs
460 append (LPS xs) (LPS ys) = LPS (xs ++ ys)
461 {-# INLINE append #-}
462
463 -- ---------------------------------------------------------------------
464 -- Transformations
465
466 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
467 -- element of @xs@.
468 map :: (Word8 -> Word8) -> ByteString -> ByteString
469 --map f (LPS xs) = LPS (L.map (P.map' f) xs)
470 map f = LPS . P.loopArr . loopL (P.mapEFL f) P.NoAcc . unLPS
471 {-# INLINE map #-}
472
473 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
474 reverse :: ByteString -> ByteString
475 reverse (LPS ps) = LPS (rev [] ps)
476 where rev a [] = a
477 rev a (x:xs) = rev (P.reverse x:a) xs
478 -- note, here is one example where the extra element lazyness is an advantage.
479 -- we can reerse the list of chunks strictly but reverse each chunk lazily
480 -- so while we may force the whole lot into memory we do not need to copy
481 -- each chunk until it is used.
482 {-# INLINE reverse #-}
483
484 -- The 'intersperse' function takes a 'Word8' and a 'ByteString' and
485 -- \`intersperses\' that byte between the elements of the 'ByteString'.
486 -- It is analogous to the intersperse function on Lists.
487 -- intersperse :: Word8 -> ByteString -> ByteString
488 -- intersperse = error "FIXME: not yet implemented"
489
490 {-
491 intersperse c (LPS []) = LPS []
492 intersperse c (LPS (x:xs)) = LPS (P.intersperse c x : L.map intersperse')
493 where intersperse' c ps@(PS x s l) =
494 P.create (2*l) $ \p -> withForeignPtr x $ \f ->
495 poke p c
496 c_intersperse (p `plusPtr` 1) (f `plusPtr` s) l c
497 -}
498
499 -- | The 'transpose' function transposes the rows and columns of its
500 -- 'ByteString' argument.
501 transpose :: [ByteString] -> [ByteString]
502 transpose s = L.map (\ss -> LPS [P.pack ss]) (L.transpose (L.map unpack s))
503
504 -- ---------------------------------------------------------------------
505 -- Reducing 'ByteString's
506
507 -- | 'foldl', applied to a binary operator, a starting value (typically
508 -- the left-identity of the operator), and a ByteString, reduces the
509 -- ByteString using the binary operator, from left to right.
510 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
511 --foldl f z (LPS xs) = L.foldl (P.foldl f) z xs
512 foldl f z = P.loopAcc . loopL (P.foldEFL f) z . unLPS
513 {-# INLINE foldl #-}
514
515 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
516 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
517 --foldl' f z (LPS xs) = L.foldl' (P.foldl' f) z xs
518 foldl' f z = P.loopAcc . loopL (P.foldEFL' f) z . unLPS
519 {-# INLINE foldl' #-}
520
521 -- | 'foldr', applied to a binary operator, a starting value
522 -- (typically the right-identity of the operator), and a ByteString,
523 -- reduces the ByteString using the binary operator, from right to left.
524 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
525 foldr k z (LPS xs) = L.foldr (flip (P.foldr k)) z xs
526 {-# INLINE foldr #-}
527
528 -- | 'foldl1' is a variant of 'foldl' that has no starting value
529 -- argument, and thus must be applied to non-empty 'ByteStrings'.
530 -- This function is subject to array fusion.
531 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
532 foldl1 _ (LPS []) = errorEmptyList "foldl1"
533 foldl1 f (LPS (x:xs)) = foldl f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
534
535 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
536 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
537 foldl1' _ (LPS []) = errorEmptyList "foldl1'"
538 foldl1' f (LPS (x:xs)) = foldl' f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
539
540 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
541 -- and thus must be applied to non-empty 'ByteString's
542 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
543 foldr1 _ (LPS []) = errorEmptyList "foldr1"
544 foldr1 f (LPS ps) = foldr1' ps
545 where foldr1' (x:[]) = P.foldr1 f x
546 foldr1' (x:xs) = P.foldr f (foldr1' xs) x
547
548 -- ---------------------------------------------------------------------
549 -- Special folds
550
551 -- | /O(n)/ Concatenate a list of ByteStrings.
552 concat :: [ByteString] -> ByteString
553 concat lpss = LPS (L.concatMap (\(LPS xs) -> xs) lpss)
554
555 -- | Map a function over a 'ByteString' and concatenate the results
556 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
557 concatMap f (LPS lps) = LPS (filterMap (P.concatMap k) lps)
558 where
559 k w = case f w of LPS xs -> P.concat xs
560
561 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
562 -- any element of the 'ByteString' satisfies the predicate.
563 any :: (Word8 -> Bool) -> ByteString -> Bool
564 any f (LPS xs) = L.or (L.map (P.any f) xs)
565 -- todo fuse
566
567 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
568 -- if all elements of the 'ByteString' satisfy the predicate.
569 all :: (Word8 -> Bool) -> ByteString -> Bool
570 all f (LPS xs) = L.and (L.map (P.all f) xs)
571 -- todo fuse
572
573 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
574 maximum :: ByteString -> Word8
575 maximum (LPS []) = errorEmptyList "maximum"
576 maximum (LPS (x:xs)) = L.foldl' (\n ps -> n `max` P.maximum ps) (P.maximum x) xs
577 {-# INLINE maximum #-}
578
579 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
580 minimum :: ByteString -> Word8
581 minimum (LPS []) = errorEmptyList "minimum"
582 minimum (LPS (x:xs)) = L.foldl' (\n ps -> n `min` P.minimum ps) (P.minimum x) xs
583 {-# INLINE minimum #-}
584
585 -- | The 'mapAccumL' function behaves like a combination of 'map' and
586 -- 'foldl'; it applies a function to each element of a ByteString,
587 -- passing an accumulating parameter from left to right, and returning a
588 -- final value of this accumulator together with the new ByteString.
589 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
590 mapAccumL f z = (\(a :*: ps) -> (a, LPS ps)) . loopL (P.mapAccumEFL f) z . unLPS
591
592 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
593 mapAccumR = error "mapAccumR unimplemented"
594
595 -- | /O(n)/ map Word8 functions, provided with the index at each position
596 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
597 mapIndexed f = LPS . P.loopArr . loopL (P.mapIndexEFL f) 0 . unLPS
598
599 -- ---------------------------------------------------------------------
600 -- Building ByteStrings
601
602 -- | 'scanl' is similar to 'foldl', but returns a list of successive
603 -- reduced values from the left. This function will fuse.
604 --
605 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
606 --
607 -- Note that
608 --
609 -- > last (scanl f z xs) == foldl f z xs.
610 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
611 scanl f z ps = LPS . P.loopArr . loopL (P.scanEFL f) z . unLPS $ (ps `snoc` 0)
612 {-# INLINE scanl #-}
613
614 -- ---------------------------------------------------------------------
615 -- Unfolds and replicates
616
617 -- | @'iterate' f x@ returns an infinite ByteString of repeated applications
618 -- of @f@ to @x@:
619 --
620 -- > iterate f x == [x, f x, f (f x), ...]
621 --
622 iterate :: (Word8 -> Word8) -> Word8 -> ByteString
623 iterate f = unfoldr (\x -> case f x of x' -> x' `seq` Just (x', x'))
624
625 -- | @'repeat' x@ is an infinite ByteString, with @x@ the value of every
626 -- element.
627 --
628 repeat :: Word8 -> ByteString
629 repeat c = LPS (L.repeat block)
630 where block = P.replicate smallChunkSize c
631
632 -- | /O(n)/ @'replicate' n x@ is a ByteString of length @n@ with @x@
633 -- the value of every element.
634 --
635 replicate :: Int64 -> Word8 -> ByteString
636 replicate w c
637 | w <= 0 = empty
638 | w < fromIntegral smallChunkSize = LPS [P.replicate (fromIntegral w) c]
639 | r == 0 = LPS (L.genericReplicate q s) -- preserve invariant
640 | otherwise = LPS (P.unsafeTake (fromIntegral r) s : L.genericReplicate q s)
641 where
642 s = P.replicate smallChunkSize c
643 (q, r) = quotRem w (fromIntegral smallChunkSize)
644
645 -- | 'cycle' ties a finite ByteString into a circular one, or equivalently,
646 -- the infinite repetition of the original ByteString.
647 --
648 cycle :: ByteString -> ByteString
649 cycle (LPS []) = errorEmptyList "cycle"
650 cycle (LPS xs) = LPS (L.cycle xs)
651
652 -- | /O(n)/ The 'unfoldr' function is analogous to the List \'unfoldr\'.
653 -- 'unfoldr' builds a ByteString from a seed value. The function takes
654 -- the element and returns 'Nothing' if it is done producing the
655 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
656 -- prepending to the ByteString and @b@ is used as the next element in a
657 -- recursive call.
658 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
659 unfoldr f = LPS . unfoldChunk 32
660 where unfoldChunk n x =
661 case P.unfoldrN n f x of
662 (s, Nothing)
663 | P.null s -> []
664 | otherwise -> s : []
665 (s, Just x') -> s : unfoldChunk ((n*2) `min` smallChunkSize) x'
666
667 -- ---------------------------------------------------------------------
668 -- Substrings
669
670 -- | /O(n\/c)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
671 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
672 take :: Int64 -> ByteString -> ByteString
673 take n _ | n < 0 = empty
674 take i (LPS ps) = LPS (take' i ps)
675 where take' _ [] = []
676 take' 0 _ = []
677 take' n (x:xs) =
678 if n < fromIntegral (P.length x)
679 then P.take (fromIntegral n) x : []
680 else x : take' (n - fromIntegral (P.length x)) xs
681
682 -- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
683 -- elements, or @[]@ if @n > 'length' xs@.
684 drop :: Int64 -> ByteString -> ByteString
685 drop i p | i <= 0 = p
686 drop i (LPS ps) = LPS (drop' i ps)
687 where drop' _ [] = []
688 drop' 0 xs = xs
689 drop' n (x:xs) =
690 if n < fromIntegral (P.length x)
691 then P.drop (fromIntegral n) x : xs
692 else drop' (n - fromIntegral (P.length x)) xs
693
694 -- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
695 splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
696 splitAt i p | i <= 0 = (empty, p)
697 splitAt i (LPS ps) = case splitAt' i ps of (a,b) -> (LPS a, LPS b)
698 where splitAt' _ [] = ([], [])
699 splitAt' 0 xs = ([], xs)
700 splitAt' n (x:xs) =
701 if n < fromIntegral (P.length x)
702 then (P.take (fromIntegral n) x : [],
703 P.drop (fromIntegral n) x : xs)
704 else let (xs', xs'') = splitAt' (n - fromIntegral (P.length x)) xs
705 in (x:xs', xs'')
706
707
708 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
709 -- returns the longest prefix (possibly empty) of @xs@ of elements that
710 -- satisfy @p@.
711 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
712 takeWhile f (LPS ps) = LPS (takeWhile' ps)
713 where takeWhile' [] = []
714 takeWhile' (x:xs) =
715 case findIndexOrEnd (not . f) x of
716 0 -> []
717 n | n < P.length x -> P.take n x : []
718 | otherwise -> x : takeWhile' xs
719
720 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
721 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
722 dropWhile f (LPS ps) = LPS (dropWhile' ps)
723 where dropWhile' [] = []
724 dropWhile' (x:xs) =
725 case findIndexOrEnd (not . f) x of
726 n | n < P.length x -> P.drop n x : xs
727 | otherwise -> dropWhile' xs
728
729 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
730 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
731 break f (LPS ps) = case (break' ps) of (a,b) -> (LPS a, LPS b)
732 where break' [] = ([], [])
733 break' (x:xs) =
734 case findIndexOrEnd f x of
735 0 -> ([], x : xs)
736 n | n < P.length x -> (P.take n x : [], P.drop n x : xs)
737 | otherwise -> let (xs', xs'') = break' xs
738 in (x : xs', xs'')
739
740 --
741 -- TODO
742 --
743 -- Add rules
744 --
745
746 {-
747 -- | 'breakByte' breaks its ByteString argument at the first occurence
748 -- of the specified byte. It is more efficient than 'break' as it is
749 -- implemented with @memchr(3)@. I.e.
750 --
751 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
752 --
753 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
754 breakByte c (LPS ps) = case (breakByte' ps) of (a,b) -> (LPS a, LPS b)
755 where breakByte' [] = ([], [])
756 breakByte' (x:xs) =
757 case P.elemIndex c x of
758 Just 0 -> ([], x : xs)
759 Just n -> (P.take n x : [], P.drop n x : xs)
760 Nothing -> let (xs', xs'') = breakByte' xs
761 in (x : xs', xs'')
762
763 -- | 'spanByte' breaks its ByteString argument at the first
764 -- occurence of a byte other than its argument. It is more efficient
765 -- than 'span (==)'
766 --
767 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
768 --
769 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
770 spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
771 where spanByte' [] = ([], [])
772 spanByte' (x:xs) =
773 case P.spanByte c x of
774 (x', x'') | P.null x' -> ([], x : xs)
775 | P.null x'' -> let (xs', xs'') = spanByte' xs
776 in (x : xs', xs'')
777 | otherwise -> (x' : [], x'' : xs)
778 -}
779
780 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
781 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
782 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
783 span p = break (not . p)
784
785 -- | /O(n)/ Splits a 'ByteString' into components delimited by
786 -- separators, where the predicate returns True for a separator element.
787 -- The resulting components do not contain the separators. Two adjacent
788 -- separators result in an empty component in the output. eg.
789 --
790 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
791 -- > splitWith (=='a') [] == []
792 --
793 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
794 splitWith _ (LPS []) = []
795 splitWith p (LPS (a:as)) = comb [] (P.splitWith p a) as
796
797 where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
798 comb acc (s:[]) [] = LPS (L.reverse (cons' s acc)) : []
799 comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.splitWith p x) xs
800 comb acc (s:ss) xs = LPS (L.reverse (cons' s acc)) : comb [] ss xs
801
802 cons' x xs | P.null x = xs
803 | otherwise = x:xs
804 {-# INLINE cons' #-}
805 {-# INLINE splitWith #-}
806
807 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
808 -- argument, consuming the delimiter. I.e.
809 --
810 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
811 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
812 -- > split 'x' "x" == ["",""]
813 --
814 -- and
815 --
816 -- > join [c] . split c == id
817 -- > split == splitWith . (==)
818 --
819 -- As for all splitting functions in this library, this function does
820 -- not copy the substrings, it just constructs new 'ByteStrings' that
821 -- are slices of the original.
822 --
823 split :: Word8 -> ByteString -> [ByteString]
824 split _ (LPS []) = []
825 split c (LPS (a:as)) = comb [] (P.split c a) as
826
827 where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
828 comb acc (s:[]) [] = LPS (L.reverse (cons' s acc)) : []
829 comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.split c x) xs
830 comb acc (s:ss) xs = LPS (L.reverse (cons' s acc)) : comb [] ss xs
831
832 cons' x xs | P.null x = xs
833 | otherwise = x:xs
834 {-# INLINE cons' #-}
835 {-# INLINE split #-}
836
837 -- | Like 'splitWith', except that sequences of adjacent separators are
838 -- treated as a single separator. eg.
839 --
840 -- > tokens (=='a') "aabbaca" == ["bb","c"]
841 --
842 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
843 tokens f = L.filter (not.null) . splitWith f
844
845 -- | The 'group' function takes a ByteString and returns a list of
846 -- ByteStrings such that the concatenation of the result is equal to the
847 -- argument. Moreover, each sublist in the result contains only equal
848 -- elements. For example,
849 --
850 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
851 --
852 -- It is a special case of 'groupBy', which allows the programmer to
853 -- supply their own equality test.
854 group :: ByteString -> [ByteString]
855 group (LPS []) = []
856 group (LPS (a:as)) = group' [] (P.group a) as
857 where group' :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
858 group' acc@(s':_) ss@(s:_) xs
859 | P.unsafeHead s'
860 /= P.unsafeHead s = LPS (L.reverse acc) : group' [] ss xs
861 group' acc (s:[]) [] = LPS (L.reverse (s : acc)) : []
862 group' acc (s:[]) (x:xs) = group' (s:acc) (P.group x) xs
863 group' acc (s:ss) xs = LPS (L.reverse (s : acc)) : group' [] ss xs
864
865 {-
866 TODO: check if something like this might be faster
867
868 group :: ByteString -> [ByteString]
869 group xs
870 | null xs = []
871 | otherwise = ys : group zs
872 where
873 (ys, zs) = spanByte (unsafeHead xs) xs
874 -}
875
876 -- | The 'groupBy' function is the non-overloaded version of 'group'.
877 --
878 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
879 groupBy = error "Data.ByteString.Lazy.groupBy: unimplemented"
880 {-
881 groupBy _ (LPS []) = []
882 groupBy k (LPS (a:as)) = groupBy' [] 0 (P.groupBy k a) as
883 where groupBy' :: [P.ByteString] -> Word8 -> [P.ByteString] -> [P.ByteString] -> [ByteString]
884 groupBy' acc@(_:_) c ss@(s:_) xs
885 | not (c `k` P.unsafeHead s) = LPS (L.reverse acc) : groupBy' [] 0 ss xs
886 groupBy' acc _ (s:[]) [] = LPS (L.reverse (s : acc)) : []
887 groupBy' [] _ (s:[]) (x:xs) = groupBy' (s:[]) (P.unsafeHead s) (P.groupBy k x) xs
888 groupBy' acc c (s:[]) (x:xs) = groupBy' (s:acc) c (P.groupBy k x) xs
889 groupBy' acc _ (s:ss) xs = LPS (L.reverse (s : acc)) : groupBy' [] 0 ss xs
890 -}
891
892 {-
893 TODO: check if something like this might be faster
894
895 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
896 groupBy k xs
897 | null xs = []
898 | otherwise = take n xs : groupBy k (drop n xs)
899 where
900 n = 1 + findIndexOrEnd (not . k (head xs)) (tail xs)
901 -}
902
903 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
904 -- 'ByteString's and concatenates the list after interspersing the first
905 -- argument between each element of the list.
906 join :: ByteString -> [ByteString] -> ByteString
907 join s = concat . (L.intersperse s)
908
909 -- ---------------------------------------------------------------------
910 -- Indexing ByteStrings
911
912 -- | /O(c)/ 'ByteString' index (subscript) operator, starting from 0.
913 index :: ByteString -> Int64 -> Word8
914 index _ i | i < 0 = moduleError "index" ("negative index: " ++ show i)
915 index (LPS ps) i = index' ps i
916 where index' [] n = moduleError "index" ("index too large: " ++ show n)
917 index' (x:xs) n
918 | n >= fromIntegral (P.length x) =
919 index' xs (n - fromIntegral (P.length x))
920 | otherwise = P.unsafeIndex x (fromIntegral n)
921
922 -- | /O(n)/ The 'elemIndex' function returns the index of the first
923 -- element in the given 'ByteString' which is equal to the query
924 -- element, or 'Nothing' if there is no such element.
925 -- This implementation uses memchr(3).
926 elemIndex :: Word8 -> ByteString -> Maybe Int64
927 elemIndex c (LPS ps) = elemIndex' 0 ps
928 where elemIndex' _ [] = Nothing
929 elemIndex' n (x:xs) =
930 case P.elemIndex c x of
931 Nothing -> elemIndex' (n + fromIntegral (P.length x)) xs
932 Just i -> Just (n + fromIntegral i)
933
934 {-
935 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
936 -- element in the given 'ByteString' which is equal to the query
937 -- element, or 'Nothing' if there is no such element. The following
938 -- holds:
939 --
940 -- > elemIndexEnd c xs ==
941 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
942 --
943 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
944 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
945 go (p `plusPtr` s) (l-1)
946 where
947 STRICT2(go)
948 go p i | i < 0 = return Nothing
949 | otherwise = do ch' <- peekByteOff p i
950 if ch == ch'
951 then return $ Just i
952 else go p (i-1)
953 -}
954 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
955 -- the indices of all elements equal to the query element, in ascending order.
956 -- This implementation uses memchr(3).
957 elemIndices :: Word8 -> ByteString -> [Int64]
958 elemIndices c (LPS ps) = elemIndices' 0 ps
959 where elemIndices' _ [] = []
960 elemIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.elemIndices c x)
961 ++ elemIndices' (n + fromIntegral (P.length x)) xs
962
963 -- | count returns the number of times its argument appears in the ByteString
964 --
965 -- > count = length . elemIndices
966 --
967 -- But more efficiently than using length on the intermediate list.
968 count :: Word8 -> ByteString -> Int64
969 count w (LPS xs) = L.foldl' (\n ps -> n + fromIntegral (P.count w ps)) 0 xs
970
971 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
972 -- returns the index of the first element in the ByteString
973 -- satisfying the predicate.
974 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int64
975 findIndex k (LPS ps) = findIndex' 0 ps
976 where findIndex' _ [] = Nothing
977 findIndex' n (x:xs) =
978 case P.findIndex k x of
979 Nothing -> findIndex' (n + fromIntegral (P.length x)) xs
980 Just i -> Just (n + fromIntegral i)
981 {-# INLINE findIndex #-}
982
983 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
984 -- and returns the first element in matching the predicate, or 'Nothing'
985 -- if there is no such element.
986 --
987 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
988 --
989 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
990 find f (LPS ps) = find' ps
991 where find' [] = Nothing
992 find' (x:xs) = case P.find f x of
993 Nothing -> find' xs
994 Just w -> Just w
995 {-# INLINE find #-}
996
997 -- | The 'findIndices' function extends 'findIndex', by returning the
998 -- indices of all elements satisfying the predicate, in ascending order.
999 findIndices :: (Word8 -> Bool) -> ByteString -> [Int64]
1000 findIndices k (LPS ps) = findIndices' 0 ps
1001 where findIndices' _ [] = []
1002 findIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.findIndices k x)
1003 ++ findIndices' (n + fromIntegral (P.length x)) xs
1004
1005 -- ---------------------------------------------------------------------
1006 -- Searching ByteStrings
1007
1008 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1009 elem :: Word8 -> ByteString -> Bool
1010 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1011
1012 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1013 notElem :: Word8 -> ByteString -> Bool
1014 notElem c ps = not (elem c ps)
1015
1016 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1017 -- returns a ByteString containing those characters that satisfy the
1018 -- predicate.
1019 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1020 --filter f (LPS xs) = LPS (filterMap (P.filter' f) xs)
1021 filter p = LPS . P.loopArr . loopL (P.filterEFL p) P.NoAcc . unLPS
1022 {-# INLINE filter #-}
1023
1024 {-
1025 -- | /O(n)/ and /O(n\/c) space/ A first order equivalent of /filter .
1026 -- (==)/, for the common case of filtering a single byte. It is more
1027 -- efficient to use /filterByte/ in this case.
1028 --
1029 -- > filterByte == filter . (==)
1030 --
1031 -- filterByte is around 10x faster, and uses much less space, than its
1032 -- filter equivalent
1033 filterByte :: Word8 -> ByteString -> ByteString
1034 filterByte w ps = replicate (count w ps) w
1035 -- filterByte w (LPS xs) = LPS (filterMap (P.filterByte w) xs)
1036
1037 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1038 -- case of filtering a single byte out of a list. It is more efficient
1039 -- to use /filterNotByte/ in this case.
1040 --
1041 -- > filterNotByte == filter . (/=)
1042 --
1043 -- filterNotByte is around 2x faster than its filter equivalent.
1044 filterNotByte :: Word8 -> ByteString -> ByteString
1045 filterNotByte w (LPS xs) = LPS (filterMap (P.filterNotByte w) xs)
1046 -}
1047
1048 -- ---------------------------------------------------------------------
1049 -- Searching for substrings
1050
1051 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1052 -- iff the first is a prefix of the second.
1053 isPrefixOf :: ByteString -> ByteString -> Bool
1054 isPrefixOf (LPS as) (LPS bs) = isPrefixL as bs
1055 where isPrefixL [] _ = True
1056 isPrefixL _ [] = False
1057 isPrefixL (x:xs) (y:ys) | P.length x == P.length y = x == y && isPrefixL xs ys
1058 | P.length x < P.length y = x == yh && isPrefixL xs (yt:ys)
1059 | otherwise = xh == y && isPrefixL (xt:xs) ys
1060 where (xh,xt) = P.splitAt (P.length y) x
1061 (yh,yt) = P.splitAt (P.length x) y
1062
1063 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1064 -- iff the first is a suffix of the second.
1065 --
1066 -- The following holds:
1067 --
1068 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1069 --
1070 -- However, the real implemenation uses memcmp to compare the end of the
1071 -- string only, with no reverse required..
1072 --
1073 --isSuffixOf :: ByteString -> ByteString -> Bool
1074 --isSuffixOf = error "not yet implemented"
1075
1076 -- ---------------------------------------------------------------------
1077 -- Zipping
1078
1079 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1080 -- corresponding pairs of bytes. If one input ByteString is short,
1081 -- excess elements of the longer ByteString are discarded. This is
1082 -- equivalent to a pair of 'unpack' operations.
1083 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1084 zip = zipWith (,)
1085
1086 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1087 -- the first argument, instead of a tupling function. For example,
1088 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1089 -- corresponding sums.
1090 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1091 zipWith _ (LPS []) (LPS _) = []
1092 zipWith _ (LPS _) (LPS []) = []
1093 zipWith f (LPS (a:as)) (LPS (b:bs)) = zipWith' a as b bs
1094 where zipWith' x xs y ys =
1095 (f (P.unsafeHead x) (P.unsafeHead y) : zipWith'' (P.unsafeTail x) xs (P.unsafeTail y) ys)
1096
1097 zipWith'' x [] _ _ | P.null x = []
1098 zipWith'' _ _ y [] | P.null y = []
1099 zipWith'' x xs y ys | not (P.null x)
1100 && not (P.null y) = zipWith' x xs y ys
1101 zipWith'' x xs _ (y':ys) | not (P.null x) = zipWith' x xs y' ys
1102 zipWith'' _ (x':xs) y ys | not (P.null y) = zipWith' x' xs y ys
1103 zipWith'' _ (x':xs) _ (y':ys) = zipWith' x' xs y' ys
1104
1105 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1106 -- ByteStrings. Note that this performs two 'pack' operations.
1107 {-
1108 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1109 unzip _ls = error "not yet implemented"
1110 {-# INLINE unzip #-}
1111 -}
1112
1113 -- ---------------------------------------------------------------------
1114 -- Special lists
1115
1116 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1117 inits :: ByteString -> [ByteString]
1118 inits = (LPS [] :) . inits' . unLPS
1119 where inits' [] = []
1120 inits' (x:xs) = L.map (\x' -> LPS [x']) (L.tail (P.inits x))
1121 ++ L.map (\(LPS xs') -> LPS (x:xs')) (inits' xs)
1122
1123 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1124 tails :: ByteString -> [ByteString]
1125 tails = tails' . unLPS
1126 where tails' [] = LPS [] : []
1127 tails' xs@(x:xs')
1128 | P.length x == 1 = LPS xs : tails' xs'
1129 | otherwise = LPS xs : tails' (P.unsafeTail x : xs')
1130
1131 -- ---------------------------------------------------------------------
1132 -- Low level constructors
1133
1134 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1135 -- This is mainly useful to allow the rest of the data pointed
1136 -- to by the 'ByteString' to be garbage collected, for example
1137 -- if a large string has been read in, and only a small part of it
1138 -- is needed in the rest of the program.
1139 copy :: ByteString -> ByteString
1140 copy (LPS lps) = LPS (L.map P.copy lps)
1141 --TODO, we could coalese small blocks here
1142 --FIXME: probably not strict enough, if we're doing this to avoid retaining
1143 -- the parent blocks then we'd better copy strictly.
1144
1145 -- ---------------------------------------------------------------------
1146
1147 -- TODO defrag func that concatenates block together that are below a threshold
1148 -- defrag :: Int -> ByteString -> ByteString
1149
1150 -- ---------------------------------------------------------------------
1151 -- Lazy ByteString IO
1152
1153 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1154 -- are read on demand, in at most @k@-sized chunks. It does not block
1155 -- waiting for a whole @k@-sized chunk, so if less than @k@ bytes are
1156 -- available then they will be returned immediately as a smaller chunk.
1157 hGetContentsN :: Int -> Handle -> IO ByteString
1158 hGetContentsN k h = lazyRead >>= return . LPS
1159 where
1160 lazyRead = unsafeInterleaveIO loop
1161
1162 loop = do
1163 ps <- P.hGetNonBlocking h k
1164 --TODO: I think this should distinguish EOF from no data available
1165 -- the otherlying POSIX call makes this distincion, returning either
1166 -- 0 or EAGAIN
1167 if P.null ps
1168 then do eof <- hIsEOF h
1169 if eof then return []
1170 else hWaitForInput h (-1)
1171 >> loop
1172 else do pss <- lazyRead
1173 return (ps : pss)
1174
1175 -- | Read @n@ bytes into a 'ByteString', directly from the
1176 -- specified 'Handle', in chunks of size @k@.
1177 hGetN :: Int -> Handle -> Int -> IO ByteString
1178 hGetN _ _ 0 = return empty
1179 hGetN k h n = readChunks n >>= return . LPS
1180 where
1181 STRICT1(readChunks)
1182 readChunks i = do
1183 ps <- P.hGet h (min k i)
1184 case P.length ps of
1185 0 -> return []
1186 m -> do pss <- readChunks (i - m)
1187 return (ps : pss)
1188
1189 -- | hGetNonBlockingN is similar to 'hGetContentsN', except that it will never block
1190 -- waiting for data to become available, instead it returns only whatever data
1191 -- is available. Chunks are read on demand, in @k@-sized chunks.
1192 hGetNonBlockingN :: Int -> Handle -> Int -> IO ByteString
1193 #if defined(__GLASGOW_HASKELL__)
1194 hGetNonBlockingN _ _ 0 = return empty
1195 hGetNonBlockingN k h n = readChunks n >>= return . LPS
1196 where
1197 STRICT1(readChunks)
1198 readChunks i = do
1199 ps <- P.hGetNonBlocking h (min k i)
1200 case P.length ps of
1201 0 -> return []
1202 m -> do pss <- readChunks (i - m)
1203 return (ps : pss)
1204 #else
1205 hGetNonBlockingN = hGetN
1206 #endif
1207
1208 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1209 -- are read on demand, using the default chunk size.
1210 hGetContents :: Handle -> IO ByteString
1211 hGetContents = hGetContentsN defaultChunkSize
1212
1213 -- | Read @n@ bytes into a 'ByteString', directly from the specified 'Handle'.
1214 hGet :: Handle -> Int -> IO ByteString
1215 hGet = hGetN defaultChunkSize
1216
1217 -- | hGetNonBlocking is similar to 'hGet', except that it will never block
1218 -- waiting for data to become available, instead it returns only whatever data
1219 -- is available.
1220 #if defined(__GLASGOW_HASKELL__)
1221 hGetNonBlocking :: Handle -> Int -> IO ByteString
1222 hGetNonBlocking = hGetNonBlockingN defaultChunkSize
1223 #else
1224 hGetNonBlocking = hGet
1225 #endif
1226
1227 -- | Read an entire file /lazily/ into a 'ByteString'.
1228 readFile :: FilePath -> IO ByteString
1229 readFile f = openBinaryFile f ReadMode >>= hGetContents
1230
1231 -- | Write a 'ByteString' to a file.
1232 writeFile :: FilePath -> ByteString -> IO ()
1233 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
1234 (\hdl -> hPut hdl txt)
1235
1236 -- | Append a 'ByteString' to a file.
1237 appendFile :: FilePath -> ByteString -> IO ()
1238 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1239 (\hdl -> hPut hdl txt)
1240
1241 -- | getContents. Equivalent to hGetContents stdin. Will read /lazily/
1242 getContents :: IO ByteString
1243 getContents = hGetContents stdin
1244
1245 -- | Outputs a 'ByteString' to the specified 'Handle'.
1246 hPut :: Handle -> ByteString -> IO ()
1247 hPut h (LPS xs) = mapM_ (P.hPut h) xs
1248
1249 -- | Write a ByteString to stdout
1250 putStr :: ByteString -> IO ()
1251 putStr = hPut stdout
1252
1253 -- | Write a ByteString to stdout, appending a newline byte
1254 putStrLn :: ByteString -> IO ()
1255 putStrLn ps = hPut stdout ps >> hPut stdout (singleton 0x0a)
1256
1257 -- | The interact function takes a function of type @ByteString -> ByteString@
1258 -- as its argument. The entire input from the standard input device is passed
1259 -- to this function as its argument, and the resulting string is output on the
1260 -- standard output device. It's great for writing one line programs!
1261 interact :: (ByteString -> ByteString) -> IO ()
1262 interact transformer = putStr . transformer =<< getContents
1263
1264 -- ---------------------------------------------------------------------
1265 -- Internal utilities
1266
1267 -- Common up near identical calls to `error' to reduce the number
1268 -- constant strings created when compiled:
1269 errorEmptyList :: String -> a
1270 errorEmptyList fun = moduleError fun "empty ByteString"
1271
1272 moduleError :: String -> String -> a
1273 moduleError fun msg = error ("Data.ByteString.Lazy." ++ fun ++ ':':' ':msg)
1274
1275 -- A manually fused version of "filter (not.null) . map f", since they
1276 -- don't seem to fuse themselves. Really helps out filter*, concatMap.
1277 --
1278 -- TODO fuse.
1279 --
1280 filterMap :: (P.ByteString -> P.ByteString) -> [P.ByteString] -> [P.ByteString]
1281 filterMap _ [] = []
1282 filterMap f (x:xs) = case f x of
1283 y | P.null y -> filterMap f xs -- manually fuse the invariant filter
1284 | otherwise -> y : filterMap f xs
1285 {-# INLINE filterMap #-}
1286
1287
1288 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1289 -- of the string if no element is found, rather than Nothing.
1290 findIndexOrEnd :: (Word8 -> Bool) -> P.ByteString -> Int
1291 findIndexOrEnd k (P.PS x s l) = P.inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1292 where
1293 STRICT2(go)
1294 go ptr n | n >= l = return l
1295 | otherwise = do w <- peek ptr
1296 if k w
1297 then return n
1298 else go (ptr `plusPtr` 1) (n+1)
1299 {-# INLINE findIndexOrEnd #-}