LPS chunk sizes should be 16 bytes, not 17.
[packages/random.git] / Data / ByteString / Lazy.hs
1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans -fno-warn-incomplete-patterns #-}
2 -- |
3 -- Module : Data.ByteString.Lazy
4 -- Copyright : (c) Don Stewart 2006
5 -- (c) Duncan Coutts 2006
6 -- License : BSD-style
7 --
8 -- Maintainer : dons@cse.unsw.edu.au
9 -- Stability : experimental
10 -- Portability : non-portable (instance of type synonym)
11 --
12 -- A time and space-efficient implementation of lazy byte vectors
13 -- using lists of packed 'Word8' arrays, suitable for high performance
14 -- use, both in terms of large data quantities, or high speed
15 -- requirements. Byte vectors are encoded as lazy lists of strict 'Word8'
16 -- arrays of bytes. They provide a means to manipulate large byte vectors
17 -- without requiring the entire vector be resident in memory.
18 --
19 -- Some operations, such as concat, append, reverse and cons, have
20 -- better complexity than their "Data.ByteString" equivalents, due to
21 -- optimisations resulting from the list spine structure. And for other
22 -- operations lazy ByteStrings are usually within a few percent of
23 -- strict ones, but with better heap usage. For data larger than the
24 -- available memory, or if you have tight memory constraints, this
25 -- module will be the only option. The default chunk size is 64k, which
26 -- should be good in most circumstances. For people with large L2
27 -- caches, you may want to increase this to fit your cache.
28 --
29 -- This module is intended to be imported @qualified@, to avoid name
30 -- clashes with "Prelude" functions. eg.
31 --
32 -- > import qualified Data.ByteString.Lazy as B
33 --
34 -- Original GHC implementation by Bryan O\'Sullivan.
35 -- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow.
36 -- Rewritten to support slices and use 'Foreign.ForeignPtr.ForeignPtr'
37 -- by David Roundy.
38 -- Polished and extended by Don Stewart.
39 -- Lazy variant by Duncan Coutts and Don Stewart.
40 --
41
42 module Data.ByteString.Lazy (
43
44 -- * The @ByteString@ type
45 ByteString, -- instances: Eq, Ord, Show, Read, Data, Typeable
46
47 -- * Introducing and eliminating 'ByteString's
48 empty, -- :: ByteString
49 singleton, -- :: Word8 -> ByteString
50 pack, -- :: [Word8] -> ByteString
51 unpack, -- :: ByteString -> [Word8]
52 fromChunks, -- :: [Strict.ByteString] -> ByteString
53 toChunks, -- :: ByteString -> [Strict.ByteString]
54
55 -- * Basic interface
56 cons, -- :: Word8 -> ByteString -> ByteString
57 snoc, -- :: ByteString -> Word8 -> ByteString
58 append, -- :: ByteString -> ByteString -> ByteString
59 head, -- :: ByteString -> Word8
60 last, -- :: ByteString -> Word8
61 tail, -- :: ByteString -> ByteString
62 init, -- :: ByteString -> ByteString
63 null, -- :: ByteString -> Bool
64 length, -- :: ByteString -> Int64
65
66 -- * Transformating ByteStrings
67 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
68 reverse, -- :: ByteString -> ByteString
69 -- intersperse, -- :: Word8 -> ByteString -> ByteString
70 transpose, -- :: [ByteString] -> [ByteString]
71
72 -- * Reducing 'ByteString's (folds)
73 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
74 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
75 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
76 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
77 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
78 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
79
80 -- ** Special folds
81 concat, -- :: [ByteString] -> ByteString
82 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
83 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
84 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
85 maximum, -- :: ByteString -> Word8
86 minimum, -- :: ByteString -> Word8
87
88 -- * Building ByteStrings
89 -- ** Scans
90 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
91 -- scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
92 -- scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
93 -- scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
94
95 -- ** Accumulating maps
96 mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
97 mapIndexed, -- :: (Int64 -> Word8 -> Word8) -> ByteString -> ByteString
98
99 -- ** Infinite ByteStrings
100 repeat, -- :: Word8 -> ByteString
101 replicate, -- :: Int64 -> Word8 -> ByteString
102 cycle, -- :: ByteString -> ByteString
103 iterate, -- :: (Word8 -> Word8) -> Word8 -> ByteString
104
105 -- ** Unfolding
106 unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
107
108 -- * Substrings
109
110 -- ** Breaking strings
111 take, -- :: Int64 -> ByteString -> ByteString
112 drop, -- :: Int64 -> ByteString -> ByteString
113 splitAt, -- :: Int64 -> ByteString -> (ByteString, ByteString)
114 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
115 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
116 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
117 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
118 group, -- :: ByteString -> [ByteString]
119 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
120 inits, -- :: ByteString -> [ByteString]
121 tails, -- :: ByteString -> [ByteString]
122
123 -- ** Breaking into many substrings
124 split, -- :: Word8 -> ByteString -> [ByteString]
125 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
126
127 -- ** Joining strings
128 join, -- :: ByteString -> [ByteString] -> ByteString
129
130 -- * Predicates
131 isPrefixOf, -- :: ByteString -> ByteString -> Bool
132 -- isSuffixOf, -- :: ByteString -> ByteString -> Bool
133
134 -- * Searching ByteStrings
135
136 -- ** Searching by equality
137 elem, -- :: Word8 -> ByteString -> Bool
138 notElem, -- :: Word8 -> ByteString -> Bool
139
140 -- ** Searching with a predicate
141 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
142 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
143 -- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
144
145 -- * Indexing ByteStrings
146 index, -- :: ByteString -> Int64 -> Word8
147 elemIndex, -- :: Word8 -> ByteString -> Maybe Int64
148 elemIndices, -- :: Word8 -> ByteString -> [Int64]
149 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int64
150 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int64]
151 count, -- :: Word8 -> ByteString -> Int64
152
153 -- * Zipping and unzipping ByteStrings
154 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
155 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
156 -- unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
157
158 -- * Ordered ByteStrings
159 -- sort, -- :: ByteString -> ByteString
160
161 copy, -- :: ByteString -> ByteString
162
163 -- * I\/O with 'ByteString's
164
165 -- ** Standard input and output
166 getContents, -- :: IO ByteString
167 putStr, -- :: ByteString -> IO ()
168 putStrLn, -- :: ByteString -> IO ()
169 interact, -- :: (ByteString -> ByteString) -> IO ()
170
171 -- ** Files
172 readFile, -- :: FilePath -> IO ByteString
173 writeFile, -- :: FilePath -> ByteString -> IO ()
174 appendFile, -- :: FilePath -> ByteString -> IO ()
175
176 -- ** I\/O with Handles
177 hGetContents, -- :: Handle -> IO ByteString
178 hGet, -- :: Handle -> Int -> IO ByteString
179 hPut, -- :: Handle -> ByteString -> IO ()
180 hGetNonBlocking, -- :: Handle -> IO ByteString
181
182 -- hGetN, -- :: Int -> Handle -> Int -> IO ByteString
183 -- hGetContentsN, -- :: Int -> Handle -> IO ByteString
184 -- hGetNonBlockingN, -- :: Int -> Handle -> IO ByteString
185
186 ) where
187
188 import qualified Prelude
189 import Prelude hiding
190 (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines
191 ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter,maximum
192 ,minimum,all,concatMap,foldl1,foldr1,scanl, scanl1, scanr, scanr1
193 ,repeat, cycle, interact, iterate,readFile,writeFile,appendFile,replicate
194 ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)
195
196 import qualified Data.List as L -- L for list/lazy
197 import qualified Data.ByteString as P -- P for packed
198 import qualified Data.ByteString.Base as P
199 import Data.ByteString.Base (LazyByteString(..))
200 import qualified Data.ByteString.Fusion as P
201 import Data.ByteString.Fusion (PairS(..),loopL)
202
203 import Data.Monoid (Monoid(..))
204
205 import Data.Word (Word8)
206 import Data.Int (Int64)
207 import System.IO (Handle,stdin,stdout,openBinaryFile,IOMode(..)
208 ,hClose,hWaitForInput,hIsEOF)
209 import System.IO.Unsafe
210 import Control.Exception (bracket)
211
212 import Foreign.ForeignPtr (withForeignPtr)
213 import Foreign.Ptr
214 import Foreign.Storable
215
216 -- -----------------------------------------------------------------------------
217 --
218 -- Useful macros, until we have bang patterns
219 --
220
221 #define STRICT1(f) f a | a `seq` False = undefined
222 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
223 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
224 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
225 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
226
227 -- -----------------------------------------------------------------------------
228
229 type ByteString = LazyByteString
230
231 --
232 -- hmm, what about getting the PS constructor unpacked into the cons cell?
233 --
234 -- data List = Nil | Cons {-# UNPACK #-} !P.ByteString List
235 --
236 -- Would avoid one indirection per chunk.
237 --
238
239 unLPS :: ByteString -> [P.ByteString]
240 unLPS (LPS xs) = xs
241 {-# INLINE unLPS #-}
242
243 instance Eq ByteString
244 where (==) = eq
245
246 instance Ord ByteString
247 where compare = compareBytes
248
249 instance Monoid ByteString where
250 mempty = empty
251 mappend = append
252 mconcat = concat
253
254 ------------------------------------------------------------------------
255
256 -- XXX
257 -- The data type invariant:
258 -- Every ByteString is either empty or consists of non-null ByteStrings.
259 -- All functions must preserve this, and the QC properties must check this.
260 --
261 _invariant :: ByteString -> Bool
262 _invariant (LPS []) = True
263 _invariant (LPS xs) = L.all (not . P.null) xs
264
265 -- In a form useful for QC testing
266 _checkInvariant :: ByteString -> ByteString
267 _checkInvariant lps
268 | _invariant lps = lps
269 | otherwise = moduleError "invariant" ("violation: " ++ show lps)
270
271 -- The Data abstraction function
272 --
273 _abstr :: ByteString -> P.ByteString
274 _abstr (LPS []) = P.empty
275 _abstr (LPS xs) = P.concat xs
276
277 -- The representation uses lists of packed chunks. When we have to convert from
278 -- a lazy list to the chunked representation, then by default we'll use this
279 -- chunk size. Some functions give you more control over the chunk size.
280 --
281 -- Measurements here:
282 -- http://www.cse.unsw.edu.au/~dons/tmp/chunksize_v_cache.png
283 --
284 -- indicate that a value around 0.5 to 1 x your L2 cache is best.
285 -- The following value assumes people have something greater than 128k,
286 -- and need to share the cache with other programs.
287 --
288 defaultChunkSize :: Int
289 defaultChunkSize = 32 * k - overhead
290 where k = 1024
291 overhead = 2 * sizeOf (undefined :: Int)
292
293 smallChunkSize :: Int
294 smallChunkSize = 4 * k - overhead
295 where k = 1024
296 overhead = 2 * sizeOf (undefined :: Int)
297
298 -- defaultChunkSize = 1
299
300 ------------------------------------------------------------------------
301
302 eq :: ByteString -> ByteString -> Bool
303 eq (LPS xs) (LPS ys) = eq' xs ys
304 where eq' [] [] = True
305 eq' [] _ = False
306 eq' _ [] = False
307 eq' (a:as) (b:bs) =
308 case compare (P.length a) (P.length b) of
309 LT -> a == (P.take (P.length a) b) && eq' as (P.drop (P.length a) b : bs)
310 EQ -> a == b && eq' as bs
311 GT -> (P.take (P.length b) a) == b && eq' (P.drop (P.length b) a : as) bs
312
313 compareBytes :: ByteString -> ByteString -> Ordering
314 compareBytes (LPS xs) (LPS ys) = cmp xs ys
315 where cmp [] [] = EQ
316 cmp [] _ = LT
317 cmp _ [] = GT
318 cmp (a:as) (b:bs) =
319 case compare (P.length a) (P.length b) of
320 LT -> case compare a (P.take (P.length a) b) of
321 EQ -> cmp as (P.drop (P.length a) b : bs)
322 result -> result
323 EQ -> case compare a b of
324 EQ -> cmp as bs
325 result -> result
326 GT -> case compare (P.take (P.length b) a) b of
327 EQ -> cmp (P.drop (P.length b) a : as) bs
328 result -> result
329
330 -- -----------------------------------------------------------------------------
331 -- Introducing and eliminating 'ByteString's
332
333 -- | /O(1)/ The empty 'ByteString'
334 empty :: ByteString
335 empty = LPS []
336 {-# NOINLINE empty #-}
337
338 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
339 singleton :: Word8 -> ByteString
340 singleton c = LPS [P.singleton c]
341 {-# NOINLINE singleton #-}
342
343 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
344 pack :: [Word8] -> ByteString
345 pack str = LPS $ L.map P.pack (chunk defaultChunkSize str)
346
347 -- ?
348 chunk :: Int -> [a] -> [[a]]
349 chunk _ [] = []
350 chunk size xs = case L.splitAt size xs of (xs', xs'') -> xs' : chunk size xs''
351
352 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
353 unpack :: ByteString -> [Word8]
354 unpack (LPS ss) = L.concatMap P.unpack ss
355 {-# INLINE unpack #-}
356
357 -- | /O(c)/ Convert a list of strict 'ByteString' into a lazy 'ByteString'
358 fromChunks :: [P.ByteString] -> ByteString
359 fromChunks ls = LPS $ L.filter (not . P.null) ls
360
361 -- | /O(n)/ Convert a lazy 'ByteString' into a list of strict 'ByteString'
362 toChunks :: ByteString -> [P.ByteString]
363 toChunks (LPS s) = s
364
365 ------------------------------------------------------------------------
366
367 {-
368 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
369 -- conversion function
370 packWith :: (a -> Word8) -> [a] -> ByteString
371 packWith k str = LPS $ L.map (P.packWith k) (chunk defaultChunkSize str)
372 {-# INLINE packWith #-}
373 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
374
375 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
376 unpackWith :: (Word8 -> a) -> ByteString -> [a]
377 unpackWith k (LPS ss) = L.concatMap (P.unpackWith k) ss
378 {-# INLINE unpackWith #-}
379 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
380 -}
381
382 -- ---------------------------------------------------------------------
383 -- Basic interface
384
385 -- | /O(1)/ Test whether a ByteString is empty.
386 null :: ByteString -> Bool
387 null (LPS []) = True
388 null (_) = False
389 {-# INLINE null #-}
390
391 -- | /O(n\/c)/ 'length' returns the length of a ByteString as an 'Int64'
392 length :: ByteString -> Int64
393 length (LPS ss) = L.foldl' (\n ps -> n + fromIntegral (P.length ps)) 0 ss
394
395 -- avoid the intermediate list?
396 -- length (LPS ss) = L.foldl lengthF 0 ss
397 -- where lengthF n s = let m = n + fromIntegral (P.length s) in m `seq` m
398 {-# INLINE length #-}
399
400 -- | /O(1)/ 'cons' is analogous to '(:)' for lists. Unlike '(:)' however it is
401 -- strict in the ByteString that we are consing onto. More precisely, it forces
402 -- the head and the first chunk. It does this because, for space efficiency, it
403 -- may coalesce the new byte onto the first \'chunk\' rather than starting a
404 -- new \'chunk\'.
405 --
406 -- So that means you can't use a lazy recursive contruction like this:
407 --
408 -- > let xs = cons c xs in xs
409 --
410 -- You can however use 'repeat' and 'cycle' to build infinite lazy ByteStrings.
411 --
412 cons :: Word8 -> ByteString -> ByteString
413 cons c (LPS (s:ss)) | P.length s < 16 = LPS (P.cons c s : ss)
414 cons c (LPS ss) = LPS (P.singleton c : ss)
415 {-# INLINE cons #-}
416
417 -- | /O(n\/c)/ Append a byte to the end of a 'ByteString'
418 snoc :: ByteString -> Word8 -> ByteString
419 snoc (LPS ss) c = LPS (ss ++ [P.singleton c])
420 {-# INLINE snoc #-}
421
422 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
423 head :: ByteString -> Word8
424 head (LPS []) = errorEmptyList "head"
425 head (LPS (x:_)) = P.unsafeHead x
426 {-# INLINE head #-}
427
428 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
429 tail :: ByteString -> ByteString
430 tail (LPS []) = errorEmptyList "tail"
431 tail (LPS (x:xs))
432 | P.length x == 1 = LPS xs
433 | otherwise = LPS (P.unsafeTail x : xs)
434 {-# INLINE tail #-}
435
436 -- | /O(n\/c)/ Extract the last element of a ByteString, which must be finite and non-empty.
437 last :: ByteString -> Word8
438 last (LPS []) = errorEmptyList "last"
439 last (LPS xs) = P.last (L.last xs)
440 {-# INLINE last #-}
441
442 -- | /O(n\/c)/ Return all the elements of a 'ByteString' except the last one.
443 init :: ByteString -> ByteString
444 init (LPS []) = errorEmptyList "init"
445 init (LPS xs)
446 | P.length y == 1 = LPS ys
447 | otherwise = LPS (ys ++ [P.init y])
448 where (y,ys) = (L.last xs, L.init xs)
449 {-# INLINE init #-}
450
451 -- | /O(n)/ Append two ByteStrings
452 append :: ByteString -> ByteString -> ByteString
453 append (LPS []) (LPS ys) = LPS ys
454 append (LPS xs) (LPS []) = LPS xs
455 append (LPS xs) (LPS ys) = LPS (xs ++ ys)
456 {-# INLINE append #-}
457
458 -- ---------------------------------------------------------------------
459 -- Transformations
460
461 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
462 -- element of @xs@.
463 map :: (Word8 -> Word8) -> ByteString -> ByteString
464 --map f (LPS xs) = LPS (L.map (P.map' f) xs)
465 map f = LPS . P.loopArr . loopL (P.mapEFL f) P.NoAcc . unLPS
466 {-# INLINE map #-}
467
468 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
469 reverse :: ByteString -> ByteString
470 reverse (LPS ps) = LPS (rev [] ps)
471 where rev a [] = a
472 rev a (x:xs) = rev (P.reverse x:a) xs
473 -- note, here is one example where the extra element lazyness is an advantage.
474 -- we can reerse the list of chunks strictly but reverse each chunk lazily
475 -- so while we may force the whole lot into memory we do not need to copy
476 -- each chunk until it is used.
477 {-# INLINE reverse #-}
478
479 -- The 'intersperse' function takes a 'Word8' and a 'ByteString' and
480 -- \`intersperses\' that byte between the elements of the 'ByteString'.
481 -- It is analogous to the intersperse function on Lists.
482 -- intersperse :: Word8 -> ByteString -> ByteString
483 -- intersperse = error "FIXME: not yet implemented"
484
485 {-
486 intersperse c (LPS []) = LPS []
487 intersperse c (LPS (x:xs)) = LPS (P.intersperse c x : L.map intersperse')
488 where intersperse' c ps@(PS x s l) =
489 P.create (2*l) $ \p -> withForeignPtr x $ \f ->
490 poke p c
491 c_intersperse (p `plusPtr` 1) (f `plusPtr` s) l c
492 -}
493
494 -- | The 'transpose' function transposes the rows and columns of its
495 -- 'ByteString' argument.
496 transpose :: [ByteString] -> [ByteString]
497 transpose s = L.map (\ss -> LPS [P.pack ss]) (L.transpose (L.map unpack s))
498
499 -- ---------------------------------------------------------------------
500 -- Reducing 'ByteString's
501
502 -- | 'foldl', applied to a binary operator, a starting value (typically
503 -- the left-identity of the operator), and a ByteString, reduces the
504 -- ByteString using the binary operator, from left to right.
505 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
506 --foldl f z (LPS xs) = L.foldl (P.foldl f) z xs
507 foldl f z = P.loopAcc . loopL (P.foldEFL f) z . unLPS
508 {-# INLINE foldl #-}
509
510 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
511 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
512 --foldl' f z (LPS xs) = L.foldl' (P.foldl' f) z xs
513 foldl' f z = P.loopAcc . loopL (P.foldEFL' f) z . unLPS
514 {-# INLINE foldl' #-}
515
516 -- | 'foldr', applied to a binary operator, a starting value
517 -- (typically the right-identity of the operator), and a ByteString,
518 -- reduces the ByteString using the binary operator, from right to left.
519 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
520 foldr k z (LPS xs) = L.foldr (flip (P.foldr k)) z xs
521 {-# INLINE foldr #-}
522
523 -- | 'foldl1' is a variant of 'foldl' that has no starting value
524 -- argument, and thus must be applied to non-empty 'ByteStrings'.
525 -- This function is subject to array fusion.
526 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
527 foldl1 _ (LPS []) = errorEmptyList "foldl1"
528 foldl1 f (LPS (x:xs)) = foldl f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
529
530 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
531 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
532 foldl1' _ (LPS []) = errorEmptyList "foldl1'"
533 foldl1' f (LPS (x:xs)) = foldl' f (P.unsafeHead x) (LPS (P.unsafeTail x : xs))
534
535 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
536 -- and thus must be applied to non-empty 'ByteString's
537 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
538 foldr1 _ (LPS []) = errorEmptyList "foldr1"
539 foldr1 f (LPS ps) = foldr1' ps
540 where foldr1' (x:[]) = P.foldr1 f x
541 foldr1' (x:xs) = P.foldr f (foldr1' xs) x
542
543 -- ---------------------------------------------------------------------
544 -- Special folds
545
546 -- | /O(n)/ Concatenate a list of ByteStrings.
547 concat :: [ByteString] -> ByteString
548 concat lpss = LPS (L.concatMap (\(LPS xs) -> xs) lpss)
549
550 -- | Map a function over a 'ByteString' and concatenate the results
551 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
552 concatMap f (LPS lps) = LPS (filterMap (P.concatMap k) lps)
553 where
554 k w = case f w of LPS xs -> P.concat xs
555
556 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
557 -- any element of the 'ByteString' satisfies the predicate.
558 any :: (Word8 -> Bool) -> ByteString -> Bool
559 any f (LPS xs) = L.or (L.map (P.any f) xs)
560 -- todo fuse
561
562 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
563 -- if all elements of the 'ByteString' satisfy the predicate.
564 all :: (Word8 -> Bool) -> ByteString -> Bool
565 all f (LPS xs) = L.and (L.map (P.all f) xs)
566 -- todo fuse
567
568 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
569 maximum :: ByteString -> Word8
570 maximum (LPS []) = errorEmptyList "maximum"
571 maximum (LPS (x:xs)) = L.foldl' (\n ps -> n `max` P.maximum ps) (P.maximum x) xs
572 {-# INLINE maximum #-}
573
574 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
575 minimum :: ByteString -> Word8
576 minimum (LPS []) = errorEmptyList "minimum"
577 minimum (LPS (x:xs)) = L.foldl' (\n ps -> n `min` P.minimum ps) (P.minimum x) xs
578 {-# INLINE minimum #-}
579
580 -- | The 'mapAccumL' function behaves like a combination of 'map' and
581 -- 'foldl'; it applies a function to each element of a ByteString,
582 -- passing an accumulating parameter from left to right, and returning a
583 -- final value of this accumulator together with the new ByteString.
584 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
585 mapAccumL f z = (\(a :*: ps) -> (a, LPS ps)) . loopL (P.mapAccumEFL f) z . unLPS
586
587 -- | /O(n)/ map Word8 functions, provided with the index at each position
588 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
589 mapIndexed f = LPS . P.loopArr . loopL (P.mapIndexEFL f) 0 . unLPS
590
591 -- ---------------------------------------------------------------------
592 -- Building ByteStrings
593
594 -- | 'scanl' is similar to 'foldl', but returns a list of successive
595 -- reduced values from the left. This function will fuse.
596 --
597 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
598 --
599 -- Note that
600 --
601 -- > last (scanl f z xs) == foldl f z xs.
602 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
603 scanl f z ps = LPS . P.loopArr . loopL (P.scanEFL f) z . unLPS $ (ps `snoc` 0)
604 {-# INLINE scanl #-}
605
606 -- ---------------------------------------------------------------------
607 -- Unfolds and replicates
608
609 -- | @'iterate' f x@ returns an infinite ByteString of repeated applications
610 -- of @f@ to @x@:
611 --
612 -- > iterate f x == [x, f x, f (f x), ...]
613 --
614 iterate :: (Word8 -> Word8) -> Word8 -> ByteString
615 iterate f = unfoldr (\x -> case f x of x' -> x' `seq` Just (x', x'))
616
617 -- | @'repeat' x@ is an infinite ByteString, with @x@ the value of every
618 -- element.
619 --
620 repeat :: Word8 -> ByteString
621 repeat c = LPS (L.repeat block)
622 where block = P.replicate smallChunkSize c
623
624 -- | /O(n)/ @'replicate' n x@ is a ByteString of length @n@ with @x@
625 -- the value of every element.
626 --
627 replicate :: Int64 -> Word8 -> ByteString
628 replicate w c
629 | w <= 0 = empty
630 | w < fromIntegral smallChunkSize = LPS [P.replicate (fromIntegral w) c]
631 | r == 0 = LPS (L.genericReplicate q s) -- preserve invariant
632 | otherwise = LPS (P.unsafeTake (fromIntegral r) s : L.genericReplicate q s)
633 where
634 s = P.replicate smallChunkSize c
635 (q, r) = quotRem w (fromIntegral smallChunkSize)
636
637 -- | 'cycle' ties a finite ByteString into a circular one, or equivalently,
638 -- the infinite repetition of the original ByteString.
639 --
640 cycle :: ByteString -> ByteString
641 cycle (LPS []) = errorEmptyList "cycle"
642 cycle (LPS xs) = LPS (L.cycle xs)
643
644 -- | /O(n)/ The 'unfoldr' function is analogous to the List \'unfoldr\'.
645 -- 'unfoldr' builds a ByteString from a seed value. The function takes
646 -- the element and returns 'Nothing' if it is done producing the
647 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
648 -- prepending to the ByteString and @b@ is used as the next element in a
649 -- recursive call.
650 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
651 unfoldr f = LPS . unfoldChunk 32
652 where unfoldChunk n x =
653 case P.unfoldrN n f x of
654 (s, Nothing)
655 | P.null s -> []
656 | otherwise -> s : []
657 (s, Just x') -> s : unfoldChunk ((n*2) `min` smallChunkSize) x'
658
659 -- ---------------------------------------------------------------------
660 -- Substrings
661
662 -- | /O(n\/c)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
663 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
664 take :: Int64 -> ByteString -> ByteString
665 take i _ | i <= 0 = empty
666 take i (LPS ps) = LPS (take' i ps)
667 where take' 0 _ = []
668 take' _ [] = []
669 take' n (x:xs) =
670 if n < fromIntegral (P.length x)
671 then P.take (fromIntegral n) x : []
672 else x : take' (n - fromIntegral (P.length x)) xs
673
674 -- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
675 -- elements, or @[]@ if @n > 'length' xs@.
676 drop :: Int64 -> ByteString -> ByteString
677 drop i p | i <= 0 = p
678 drop i (LPS ps) = LPS (drop' i ps)
679 where drop' 0 xs = xs
680 drop' _ [] = []
681 drop' n (x:xs) =
682 if n < fromIntegral (P.length x)
683 then P.drop (fromIntegral n) x : xs
684 else drop' (n - fromIntegral (P.length x)) xs
685
686 -- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
687 splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
688 splitAt i p | i <= 0 = (empty, p)
689 splitAt i (LPS ps) = case splitAt' i ps of (a,b) -> (LPS a, LPS b)
690 where splitAt' 0 xs = ([], xs)
691 splitAt' _ [] = ([], [])
692 splitAt' n (x:xs) =
693 if n < fromIntegral (P.length x)
694 then (P.take (fromIntegral n) x : [],
695 P.drop (fromIntegral n) x : xs)
696 else let (xs', xs'') = splitAt' (n - fromIntegral (P.length x)) xs
697 in (x:xs', xs'')
698
699
700 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
701 -- returns the longest prefix (possibly empty) of @xs@ of elements that
702 -- satisfy @p@.
703 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
704 takeWhile f (LPS ps) = LPS (takeWhile' ps)
705 where takeWhile' [] = []
706 takeWhile' (x:xs) =
707 case findIndexOrEnd (not . f) x of
708 0 -> []
709 n | n < P.length x -> P.take n x : []
710 | otherwise -> x : takeWhile' xs
711
712 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
713 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
714 dropWhile f (LPS ps) = LPS (dropWhile' ps)
715 where dropWhile' [] = []
716 dropWhile' (x:xs) =
717 case findIndexOrEnd (not . f) x of
718 n | n < P.length x -> P.drop n x : xs
719 | otherwise -> dropWhile' xs
720
721 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
722 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
723 break f (LPS ps) = case (break' ps) of (a,b) -> (LPS a, LPS b)
724 where break' [] = ([], [])
725 break' (x:xs) =
726 case findIndexOrEnd f x of
727 0 -> ([], x : xs)
728 n | n < P.length x -> (P.take n x : [], P.drop n x : xs)
729 | otherwise -> let (xs', xs'') = break' xs
730 in (x : xs', xs'')
731
732 --
733 -- TODO
734 --
735 -- Add rules
736 --
737
738 {-
739 -- | 'breakByte' breaks its ByteString argument at the first occurence
740 -- of the specified byte. It is more efficient than 'break' as it is
741 -- implemented with @memchr(3)@. I.e.
742 --
743 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
744 --
745 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
746 breakByte c (LPS ps) = case (breakByte' ps) of (a,b) -> (LPS a, LPS b)
747 where breakByte' [] = ([], [])
748 breakByte' (x:xs) =
749 case P.elemIndex c x of
750 Just 0 -> ([], x : xs)
751 Just n -> (P.take n x : [], P.drop n x : xs)
752 Nothing -> let (xs', xs'') = breakByte' xs
753 in (x : xs', xs'')
754
755 -- | 'spanByte' breaks its ByteString argument at the first
756 -- occurence of a byte other than its argument. It is more efficient
757 -- than 'span (==)'
758 --
759 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
760 --
761 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
762 spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
763 where spanByte' [] = ([], [])
764 spanByte' (x:xs) =
765 case P.spanByte c x of
766 (x', x'') | P.null x' -> ([], x : xs)
767 | P.null x'' -> let (xs', xs'') = spanByte' xs
768 in (x : xs', xs'')
769 | otherwise -> (x' : [], x'' : xs)
770 -}
771
772 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
773 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
774 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
775 span p = break (not . p)
776
777 -- | /O(n)/ Splits a 'ByteString' into components delimited by
778 -- separators, where the predicate returns True for a separator element.
779 -- The resulting components do not contain the separators. Two adjacent
780 -- separators result in an empty component in the output. eg.
781 --
782 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
783 -- > splitWith (=='a') [] == []
784 --
785 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
786 splitWith _ (LPS []) = []
787 splitWith p (LPS (a:as)) = comb [] (P.splitWith p a) as
788
789 where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
790 comb acc (s:[]) [] = LPS (L.reverse (cons' s acc)) : []
791 comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.splitWith p x) xs
792 comb acc (s:ss) xs = LPS (L.reverse (cons' s acc)) : comb [] ss xs
793
794 cons' x xs | P.null x = xs
795 | otherwise = x:xs
796 {-# INLINE cons' #-}
797 {-# INLINE splitWith #-}
798
799 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
800 -- argument, consuming the delimiter. I.e.
801 --
802 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
803 -- > split 'a' "aXaXaXa" == ["","X","X","X",""]
804 -- > split 'x' "x" == ["",""]
805 --
806 -- and
807 --
808 -- > join [c] . split c == id
809 -- > split == splitWith . (==)
810 --
811 -- As for all splitting functions in this library, this function does
812 -- not copy the substrings, it just constructs new 'ByteStrings' that
813 -- are slices of the original.
814 --
815 split :: Word8 -> ByteString -> [ByteString]
816 split _ (LPS []) = []
817 split c (LPS (a:as)) = comb [] (P.split c a) as
818
819 where comb :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
820 comb acc (s:[]) [] = LPS (L.reverse (cons' s acc)) : []
821 comb acc (s:[]) (x:xs) = comb (cons' s acc) (P.split c x) xs
822 comb acc (s:ss) xs = LPS (L.reverse (cons' s acc)) : comb [] ss xs
823
824 cons' x xs | P.null x = xs
825 | otherwise = x:xs
826 {-# INLINE cons' #-}
827 {-# INLINE split #-}
828
829 {-
830 -- | Like 'splitWith', except that sequences of adjacent separators are
831 -- treated as a single separator. eg.
832 --
833 -- > tokens (=='a') "aabbaca" == ["bb","c"]
834 --
835 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
836 tokens f = L.filter (not.null) . splitWith f
837 -}
838
839 -- | The 'group' function takes a ByteString and returns a list of
840 -- ByteStrings such that the concatenation of the result is equal to the
841 -- argument. Moreover, each sublist in the result contains only equal
842 -- elements. For example,
843 --
844 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
845 --
846 -- It is a special case of 'groupBy', which allows the programmer to
847 -- supply their own equality test.
848 group :: ByteString -> [ByteString]
849 group (LPS []) = []
850 group (LPS (a:as)) = group' [] (P.group a) as
851 where group' :: [P.ByteString] -> [P.ByteString] -> [P.ByteString] -> [ByteString]
852 group' acc@(s':_) ss@(s:_) xs
853 | P.unsafeHead s'
854 /= P.unsafeHead s = LPS (L.reverse acc) : group' [] ss xs
855 group' acc (s:[]) [] = LPS (L.reverse (s : acc)) : []
856 group' acc (s:[]) (x:xs) = group' (s:acc) (P.group x) xs
857 group' acc (s:ss) xs = LPS (L.reverse (s : acc)) : group' [] ss xs
858
859 {-
860 TODO: check if something like this might be faster
861
862 group :: ByteString -> [ByteString]
863 group xs
864 | null xs = []
865 | otherwise = ys : group zs
866 where
867 (ys, zs) = spanByte (unsafeHead xs) xs
868 -}
869
870 -- | The 'groupBy' function is the non-overloaded version of 'group'.
871 --
872 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
873 groupBy = error "Data.ByteString.Lazy.groupBy: unimplemented"
874 {-
875 groupBy _ (LPS []) = []
876 groupBy k (LPS (a:as)) = groupBy' [] 0 (P.groupBy k a) as
877 where groupBy' :: [P.ByteString] -> Word8 -> [P.ByteString] -> [P.ByteString] -> [ByteString]
878 groupBy' acc@(_:_) c ss@(s:_) xs
879 | not (c `k` P.unsafeHead s) = LPS (L.reverse acc) : groupBy' [] 0 ss xs
880 groupBy' acc _ (s:[]) [] = LPS (L.reverse (s : acc)) : []
881 groupBy' [] _ (s:[]) (x:xs) = groupBy' (s:[]) (P.unsafeHead s) (P.groupBy k x) xs
882 groupBy' acc c (s:[]) (x:xs) = groupBy' (s:acc) c (P.groupBy k x) xs
883 groupBy' acc _ (s:ss) xs = LPS (L.reverse (s : acc)) : groupBy' [] 0 ss xs
884 -}
885
886 {-
887 TODO: check if something like this might be faster
888
889 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
890 groupBy k xs
891 | null xs = []
892 | otherwise = take n xs : groupBy k (drop n xs)
893 where
894 n = 1 + findIndexOrEnd (not . k (head xs)) (tail xs)
895 -}
896
897 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
898 -- 'ByteString's and concatenates the list after interspersing the first
899 -- argument between each element of the list.
900 join :: ByteString -> [ByteString] -> ByteString
901 join s = concat . (L.intersperse s)
902
903 -- ---------------------------------------------------------------------
904 -- Indexing ByteStrings
905
906 -- | /O(c)/ 'ByteString' index (subscript) operator, starting from 0.
907 index :: ByteString -> Int64 -> Word8
908 index _ i | i < 0 = moduleError "index" ("negative index: " ++ show i)
909 index (LPS ps) i = index' ps i
910 where index' [] n = moduleError "index" ("index too large: " ++ show n)
911 index' (x:xs) n
912 | n >= fromIntegral (P.length x) =
913 index' xs (n - fromIntegral (P.length x))
914 | otherwise = P.unsafeIndex x (fromIntegral n)
915
916 -- | /O(n)/ The 'elemIndex' function returns the index of the first
917 -- element in the given 'ByteString' which is equal to the query
918 -- element, or 'Nothing' if there is no such element.
919 -- This implementation uses memchr(3).
920 elemIndex :: Word8 -> ByteString -> Maybe Int64
921 elemIndex c (LPS ps) = elemIndex' 0 ps
922 where elemIndex' _ [] = Nothing
923 elemIndex' n (x:xs) =
924 case P.elemIndex c x of
925 Nothing -> elemIndex' (n + fromIntegral (P.length x)) xs
926 Just i -> Just (n + fromIntegral i)
927
928 {-
929 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
930 -- element in the given 'ByteString' which is equal to the query
931 -- element, or 'Nothing' if there is no such element. The following
932 -- holds:
933 --
934 -- > elemIndexEnd c xs ==
935 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
936 --
937 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
938 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
939 go (p `plusPtr` s) (l-1)
940 where
941 STRICT2(go)
942 go p i | i < 0 = return Nothing
943 | otherwise = do ch' <- peekByteOff p i
944 if ch == ch'
945 then return $ Just i
946 else go p (i-1)
947 -}
948 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
949 -- the indices of all elements equal to the query element, in ascending order.
950 -- This implementation uses memchr(3).
951 elemIndices :: Word8 -> ByteString -> [Int64]
952 elemIndices c (LPS ps) = elemIndices' 0 ps
953 where elemIndices' _ [] = []
954 elemIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.elemIndices c x)
955 ++ elemIndices' (n + fromIntegral (P.length x)) xs
956
957 -- | count returns the number of times its argument appears in the ByteString
958 --
959 -- > count = length . elemIndices
960 --
961 -- But more efficiently than using length on the intermediate list.
962 count :: Word8 -> ByteString -> Int64
963 count w (LPS xs) = L.foldl' (\n ps -> n + fromIntegral (P.count w ps)) 0 xs
964
965 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
966 -- returns the index of the first element in the ByteString
967 -- satisfying the predicate.
968 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int64
969 findIndex k (LPS ps) = findIndex' 0 ps
970 where findIndex' _ [] = Nothing
971 findIndex' n (x:xs) =
972 case P.findIndex k x of
973 Nothing -> findIndex' (n + fromIntegral (P.length x)) xs
974 Just i -> Just (n + fromIntegral i)
975 {-# INLINE findIndex #-}
976
977 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
978 -- and returns the first element in matching the predicate, or 'Nothing'
979 -- if there is no such element.
980 --
981 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
982 --
983 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
984 find f (LPS ps) = find' ps
985 where find' [] = Nothing
986 find' (x:xs) = case P.find f x of
987 Nothing -> find' xs
988 Just w -> Just w
989 {-# INLINE find #-}
990
991 -- | The 'findIndices' function extends 'findIndex', by returning the
992 -- indices of all elements satisfying the predicate, in ascending order.
993 findIndices :: (Word8 -> Bool) -> ByteString -> [Int64]
994 findIndices k (LPS ps) = findIndices' 0 ps
995 where findIndices' _ [] = []
996 findIndices' n (x:xs) = L.map ((+n).fromIntegral) (P.findIndices k x)
997 ++ findIndices' (n + fromIntegral (P.length x)) xs
998
999 -- ---------------------------------------------------------------------
1000 -- Searching ByteStrings
1001
1002 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1003 elem :: Word8 -> ByteString -> Bool
1004 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1005
1006 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1007 notElem :: Word8 -> ByteString -> Bool
1008 notElem c ps = not (elem c ps)
1009
1010 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1011 -- returns a ByteString containing those characters that satisfy the
1012 -- predicate.
1013 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1014 --filter f (LPS xs) = LPS (filterMap (P.filter' f) xs)
1015 filter p = LPS . P.loopArr . loopL (P.filterEFL p) P.NoAcc . unLPS
1016 {-# INLINE filter #-}
1017
1018 {-
1019 -- | /O(n)/ and /O(n\/c) space/ A first order equivalent of /filter .
1020 -- (==)/, for the common case of filtering a single byte. It is more
1021 -- efficient to use /filterByte/ in this case.
1022 --
1023 -- > filterByte == filter . (==)
1024 --
1025 -- filterByte is around 10x faster, and uses much less space, than its
1026 -- filter equivalent
1027 filterByte :: Word8 -> ByteString -> ByteString
1028 filterByte w ps = replicate (count w ps) w
1029 -- filterByte w (LPS xs) = LPS (filterMap (P.filterByte w) xs)
1030
1031 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1032 -- case of filtering a single byte out of a list. It is more efficient
1033 -- to use /filterNotByte/ in this case.
1034 --
1035 -- > filterNotByte == filter . (/=)
1036 --
1037 -- filterNotByte is around 2x faster than its filter equivalent.
1038 filterNotByte :: Word8 -> ByteString -> ByteString
1039 filterNotByte w (LPS xs) = LPS (filterMap (P.filterNotByte w) xs)
1040 -}
1041
1042 -- ---------------------------------------------------------------------
1043 -- Searching for substrings
1044
1045 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1046 -- iff the first is a prefix of the second.
1047 isPrefixOf :: ByteString -> ByteString -> Bool
1048 isPrefixOf (LPS as) (LPS bs) = isPrefixL as bs
1049 where isPrefixL [] _ = True
1050 isPrefixL _ [] = False
1051 isPrefixL (x:xs) (y:ys) | P.length x == P.length y = x == y && isPrefixL xs ys
1052 | P.length x < P.length y = x == yh && isPrefixL xs (yt:ys)
1053 | otherwise = xh == y && isPrefixL (xt:xs) ys
1054 where (xh,xt) = P.splitAt (P.length y) x
1055 (yh,yt) = P.splitAt (P.length x) y
1056
1057 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1058 -- iff the first is a suffix of the second.
1059 --
1060 -- The following holds:
1061 --
1062 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1063 --
1064 -- However, the real implemenation uses memcmp to compare the end of the
1065 -- string only, with no reverse required..
1066 --
1067 --isSuffixOf :: ByteString -> ByteString -> Bool
1068 --isSuffixOf = error "not yet implemented"
1069
1070 -- ---------------------------------------------------------------------
1071 -- Zipping
1072
1073 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1074 -- corresponding pairs of bytes. If one input ByteString is short,
1075 -- excess elements of the longer ByteString are discarded. This is
1076 -- equivalent to a pair of 'unpack' operations.
1077 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1078 zip = zipWith (,)
1079
1080 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1081 -- the first argument, instead of a tupling function. For example,
1082 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1083 -- corresponding sums.
1084 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1085 zipWith _ (LPS []) (LPS _) = []
1086 zipWith _ (LPS _) (LPS []) = []
1087 zipWith f (LPS (a:as)) (LPS (b:bs)) = zipWith' a as b bs
1088 where zipWith' x xs y ys =
1089 (f (P.unsafeHead x) (P.unsafeHead y) : zipWith'' (P.unsafeTail x) xs (P.unsafeTail y) ys)
1090
1091 zipWith'' x [] _ _ | P.null x = []
1092 zipWith'' _ _ y [] | P.null y = []
1093 zipWith'' x xs y ys | not (P.null x)
1094 && not (P.null y) = zipWith' x xs y ys
1095 zipWith'' x xs _ (y':ys) | not (P.null x) = zipWith' x xs y' ys
1096 zipWith'' _ (x':xs) y ys | not (P.null y) = zipWith' x' xs y ys
1097 zipWith'' _ (x':xs) _ (y':ys) = zipWith' x' xs y' ys
1098
1099 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1100 -- ByteStrings. Note that this performs two 'pack' operations.
1101 {-
1102 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1103 unzip _ls = error "not yet implemented"
1104 {-# INLINE unzip #-}
1105 -}
1106
1107 -- ---------------------------------------------------------------------
1108 -- Special lists
1109
1110 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1111 inits :: ByteString -> [ByteString]
1112 inits = (LPS [] :) . inits' . unLPS
1113 where inits' [] = []
1114 inits' (x:xs) = L.map (\x' -> LPS [x']) (L.tail (P.inits x))
1115 ++ L.map (\(LPS xs') -> LPS (x:xs')) (inits' xs)
1116
1117 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1118 tails :: ByteString -> [ByteString]
1119 tails = tails' . unLPS
1120 where tails' [] = LPS [] : []
1121 tails' xs@(x:xs')
1122 | P.length x == 1 = LPS xs : tails' xs'
1123 | otherwise = LPS xs : tails' (P.unsafeTail x : xs')
1124
1125 -- ---------------------------------------------------------------------
1126 -- Low level constructors
1127
1128 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1129 -- This is mainly useful to allow the rest of the data pointed
1130 -- to by the 'ByteString' to be garbage collected, for example
1131 -- if a large string has been read in, and only a small part of it
1132 -- is needed in the rest of the program.
1133 copy :: ByteString -> ByteString
1134 copy (LPS lps) = LPS (L.map P.copy lps)
1135 --TODO, we could coalese small blocks here
1136 --FIXME: probably not strict enough, if we're doing this to avoid retaining
1137 -- the parent blocks then we'd better copy strictly.
1138
1139 -- ---------------------------------------------------------------------
1140
1141 -- TODO defrag func that concatenates block together that are below a threshold
1142 -- defrag :: Int -> ByteString -> ByteString
1143
1144 -- ---------------------------------------------------------------------
1145 -- Lazy ByteString IO
1146
1147 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1148 -- are read on demand, in at most @k@-sized chunks. It does not block
1149 -- waiting for a whole @k@-sized chunk, so if less than @k@ bytes are
1150 -- available then they will be returned immediately as a smaller chunk.
1151 hGetContentsN :: Int -> Handle -> IO ByteString
1152 hGetContentsN k h = lazyRead >>= return . LPS
1153 where
1154 lazyRead = unsafeInterleaveIO loop
1155
1156 loop = do
1157 ps <- P.hGetNonBlocking h k
1158 --TODO: I think this should distinguish EOF from no data available
1159 -- the otherlying POSIX call makes this distincion, returning either
1160 -- 0 or EAGAIN
1161 if P.null ps
1162 then do eof <- hIsEOF h
1163 if eof then return []
1164 else hWaitForInput h (-1)
1165 >> loop
1166 else do pss <- lazyRead
1167 return (ps : pss)
1168
1169 -- | Read @n@ bytes into a 'ByteString', directly from the
1170 -- specified 'Handle', in chunks of size @k@.
1171 hGetN :: Int -> Handle -> Int -> IO ByteString
1172 hGetN _ _ 0 = return empty
1173 hGetN k h n = readChunks n >>= return . LPS
1174 where
1175 STRICT1(readChunks)
1176 readChunks i = do
1177 ps <- P.hGet h (min k i)
1178 case P.length ps of
1179 0 -> return []
1180 m -> do pss <- readChunks (i - m)
1181 return (ps : pss)
1182
1183 -- | hGetNonBlockingN is similar to 'hGetContentsN', except that it will never block
1184 -- waiting for data to become available, instead it returns only whatever data
1185 -- is available. Chunks are read on demand, in @k@-sized chunks.
1186 hGetNonBlockingN :: Int -> Handle -> Int -> IO ByteString
1187 #if defined(__GLASGOW_HASKELL__)
1188 hGetNonBlockingN _ _ 0 = return empty
1189 hGetNonBlockingN k h n = readChunks n >>= return . LPS
1190 where
1191 STRICT1(readChunks)
1192 readChunks i = do
1193 ps <- P.hGetNonBlocking h (min k i)
1194 case P.length ps of
1195 0 -> return []
1196 m -> do pss <- readChunks (i - m)
1197 return (ps : pss)
1198 #else
1199 hGetNonBlockingN = hGetN
1200 #endif
1201
1202 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
1203 -- are read on demand, using the default chunk size.
1204 hGetContents :: Handle -> IO ByteString
1205 hGetContents = hGetContentsN defaultChunkSize
1206
1207 -- | Read @n@ bytes into a 'ByteString', directly from the specified 'Handle'.
1208 hGet :: Handle -> Int -> IO ByteString
1209 hGet = hGetN defaultChunkSize
1210
1211 -- | hGetNonBlocking is similar to 'hGet', except that it will never block
1212 -- waiting for data to become available, instead it returns only whatever data
1213 -- is available.
1214 #if defined(__GLASGOW_HASKELL__)
1215 hGetNonBlocking :: Handle -> Int -> IO ByteString
1216 hGetNonBlocking = hGetNonBlockingN defaultChunkSize
1217 #else
1218 hGetNonBlocking = hGet
1219 #endif
1220
1221 -- | Read an entire file /lazily/ into a 'ByteString'.
1222 readFile :: FilePath -> IO ByteString
1223 readFile f = openBinaryFile f ReadMode >>= hGetContents
1224
1225 -- | Write a 'ByteString' to a file.
1226 writeFile :: FilePath -> ByteString -> IO ()
1227 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
1228 (\hdl -> hPut hdl txt)
1229
1230 -- | Append a 'ByteString' to a file.
1231 appendFile :: FilePath -> ByteString -> IO ()
1232 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1233 (\hdl -> hPut hdl txt)
1234
1235 -- | getContents. Equivalent to hGetContents stdin. Will read /lazily/
1236 getContents :: IO ByteString
1237 getContents = hGetContents stdin
1238
1239 -- | Outputs a 'ByteString' to the specified 'Handle'.
1240 hPut :: Handle -> ByteString -> IO ()
1241 hPut h (LPS xs) = mapM_ (P.hPut h) xs
1242
1243 -- | Write a ByteString to stdout
1244 putStr :: ByteString -> IO ()
1245 putStr = hPut stdout
1246
1247 -- | Write a ByteString to stdout, appending a newline byte
1248 putStrLn :: ByteString -> IO ()
1249 putStrLn ps = hPut stdout ps >> hPut stdout (singleton 0x0a)
1250
1251 -- | The interact function takes a function of type @ByteString -> ByteString@
1252 -- as its argument. The entire input from the standard input device is passed
1253 -- to this function as its argument, and the resulting string is output on the
1254 -- standard output device. It's great for writing one line programs!
1255 interact :: (ByteString -> ByteString) -> IO ()
1256 interact transformer = putStr . transformer =<< getContents
1257
1258 -- ---------------------------------------------------------------------
1259 -- Internal utilities
1260
1261 -- Common up near identical calls to `error' to reduce the number
1262 -- constant strings created when compiled:
1263 errorEmptyList :: String -> a
1264 errorEmptyList fun = moduleError fun "empty ByteString"
1265
1266 moduleError :: String -> String -> a
1267 moduleError fun msg = error ("Data.ByteString.Lazy." ++ fun ++ ':':' ':msg)
1268
1269 -- A manually fused version of "filter (not.null) . map f", since they
1270 -- don't seem to fuse themselves. Really helps out filter*, concatMap.
1271 --
1272 -- TODO fuse.
1273 --
1274 filterMap :: (P.ByteString -> P.ByteString) -> [P.ByteString] -> [P.ByteString]
1275 filterMap _ [] = []
1276 filterMap f (x:xs) = case f x of
1277 y | P.null y -> filterMap f xs -- manually fuse the invariant filter
1278 | otherwise -> y : filterMap f xs
1279 {-# INLINE filterMap #-}
1280
1281
1282 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1283 -- of the string if no element is found, rather than Nothing.
1284 findIndexOrEnd :: (Word8 -> Bool) -> P.ByteString -> Int
1285 findIndexOrEnd k (P.PS x s l) = P.inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1286 where
1287 STRICT2(go)
1288 go ptr n | n >= l = return l
1289 | otherwise = do w <- peek ptr
1290 if k w
1291 then return n
1292 else go (ptr `plusPtr` 1) (n+1)
1293 {-# INLINE findIndexOrEnd #-}