expand advice on importing these modules
[packages/old-time.git] / Data / ByteString.hs
1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-}
2 --
3 -- Module : ByteString
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
9 --
10 -- Array fusion code:
11 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
12 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
13 --
14 -- License : BSD-style
15 --
16 -- Maintainer : dons@cse.unsw.edu.au
17 -- Stability : experimental
18 -- Portability : portable, requires ffi and cpp
19 -- Tested with : GHC 6.4.1 and Hugs March 2005
20 --
21
22 --
23 -- | A time and space-efficient implementation of byte vectors using
24 -- packed Word8 arrays, suitable for high performance use, both in terms
25 -- of large data quantities, or high speed requirements. Byte vectors
26 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
27 -- and can be passed between C and Haskell with little effort.
28 --
29 -- This module is intended to be imported @qualified@, to avoid name
30 -- clashes with "Prelude" functions. eg.
31 --
32 -- > import qualified Data.ByteString as B
33 --
34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
35 -- UArray by Simon Marlow. Rewritten to support slices and use
36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
37 --
38
39 module Data.ByteString (
40
41 -- * The @ByteString@ type
42 ByteString, -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid
43
44 -- * Introducing and eliminating 'ByteString's
45 empty, -- :: ByteString
46 singleton, -- :: Word8 -> ByteString
47 pack, -- :: [Word8] -> ByteString
48 unpack, -- :: ByteString -> [Word8]
49
50 -- * Basic interface
51 cons, -- :: Word8 -> ByteString -> ByteString
52 snoc, -- :: ByteString -> Word8 -> ByteString
53 append, -- :: ByteString -> ByteString -> ByteString
54 head, -- :: ByteString -> Word8
55 last, -- :: ByteString -> Word8
56 tail, -- :: ByteString -> ByteString
57 init, -- :: ByteString -> ByteString
58 null, -- :: ByteString -> Bool
59 length, -- :: ByteString -> Int
60
61 -- * Transformating ByteStrings
62 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
63 reverse, -- :: ByteString -> ByteString
64 intersperse, -- :: Word8 -> ByteString -> ByteString
65 transpose, -- :: [ByteString] -> [ByteString]
66
67 -- * Reducing 'ByteString's (folds)
68 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
69 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
70 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
71 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
72
73 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
74 foldr', -- :: (Word8 -> a -> a) -> a -> ByteString -> a
75 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
76 foldr1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
77
78 -- ** Special folds
79 concat, -- :: [ByteString] -> ByteString
80 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
81 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
82 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
83 maximum, -- :: ByteString -> Word8
84 minimum, -- :: ByteString -> Word8
85
86 -- * Building ByteStrings
87 -- ** Scans
88 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
89 scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
90 scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
91 scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
92
93 -- ** Accumulating maps
94 mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
95 mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
96 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
97
98 -- ** Unfolding ByteStrings
99 replicate, -- :: Int -> Word8 -> ByteString
100 unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
101 unfoldrN, -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
102
103 -- * Substrings
104
105 -- ** Breaking strings
106 take, -- :: Int -> ByteString -> ByteString
107 drop, -- :: Int -> ByteString -> ByteString
108 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
109 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
110 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
111 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
112 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
113 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
114 breakEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
115 group, -- :: ByteString -> [ByteString]
116 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
117 inits, -- :: ByteString -> [ByteString]
118 tails, -- :: ByteString -> [ByteString]
119
120 -- ** Breaking into many substrings
121 split, -- :: Word8 -> ByteString -> [ByteString]
122 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
123
124 -- ** Joining strings
125 join, -- :: ByteString -> [ByteString] -> ByteString
126
127 -- * Predicates
128 isPrefixOf, -- :: ByteString -> ByteString -> Bool
129 isSuffixOf, -- :: ByteString -> ByteString -> Bool
130
131 -- ** Search for arbitrary substrings
132 isSubstringOf, -- :: ByteString -> ByteString -> Bool
133 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
134 findSubstrings, -- :: ByteString -> ByteString -> [Int]
135
136 -- * Searching ByteStrings
137
138 -- ** Searching by equality
139 -- | These functions use memchr(3) to efficiently search the ByteString
140 elem, -- :: Word8 -> ByteString -> Bool
141 notElem, -- :: Word8 -> ByteString -> Bool
142
143 -- ** Searching with a predicate
144 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
145 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
146 -- partition -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
147
148 -- * Indexing ByteStrings
149 index, -- :: ByteString -> Int -> Word8
150 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
151 elemIndices, -- :: Word8 -> ByteString -> [Int]
152 elemIndexEnd, -- :: Word8 -> ByteString -> Maybe Int
153 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
154 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
155 count, -- :: Word8 -> ByteString -> Int
156
157 -- * Zipping and unzipping ByteStrings
158 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
159 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
160 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
161
162 -- * Ordered ByteStrings
163 sort, -- :: ByteString -> ByteString
164
165 -- * Low level CString conversions
166
167 -- ** Packing CStrings and pointers
168 packCString, -- :: CString -> ByteString
169 packCStringLen, -- :: CString -> ByteString
170 packMallocCString, -- :: CString -> ByteString
171
172 -- ** Using ByteStrings as CStrings
173 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
174 useAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
175
176 -- ** Copying ByteStrings
177 -- | These functions perform memcpy(3) operations
178 copy, -- :: ByteString -> ByteString
179 copyCString, -- :: CString -> IO ByteString
180 copyCStringLen, -- :: CStringLen -> IO ByteString
181
182 -- * I\/O with 'ByteString's
183
184 -- ** Standard input and output
185 getLine, -- :: IO ByteString
186 getContents, -- :: IO ByteString
187 putStr, -- :: ByteString -> IO ()
188 putStrLn, -- :: ByteString -> IO ()
189 interact, -- :: (ByteString -> ByteString) -> IO ()
190
191 -- ** Files
192 readFile, -- :: FilePath -> IO ByteString
193 writeFile, -- :: FilePath -> ByteString -> IO ()
194 appendFile, -- :: FilePath -> ByteString -> IO ()
195 -- mmapFile, -- :: FilePath -> IO ByteString
196
197 -- ** I\/O with Handles
198 hGetLine, -- :: Handle -> IO ByteString
199 hGetContents, -- :: Handle -> IO ByteString
200 hGet, -- :: Handle -> Int -> IO ByteString
201 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
202 hPut, -- :: Handle -> ByteString -> IO ()
203 hPutStr, -- :: Handle -> ByteString -> IO ()
204 hPutStrLn, -- :: Handle -> ByteString -> IO ()
205
206 #if defined(__GLASGOW_HASKELL__)
207 -- * Fusion utilities
208 unpackList, -- eek, otherwise it gets thrown away by the simplifier
209 lengthU, maximumU, minimumU
210 #endif
211
212 ) where
213
214 import qualified Prelude as P
215 import Prelude hiding (reverse,head,tail,last,init,null
216 ,length,map,lines,foldl,foldr,unlines
217 ,concat,any,take,drop,splitAt,takeWhile
218 ,dropWhile,span,break,elem,filter,maximum
219 ,minimum,all,concatMap,foldl1,foldr1
220 ,scanl,scanl1,scanr,scanr1
221 ,readFile,writeFile,appendFile,replicate
222 ,getContents,getLine,putStr,putStrLn,interact
223 ,zip,zipWith,unzip,notElem)
224
225 import Data.ByteString.Base
226 import Data.ByteString.Fusion
227
228 import qualified Data.List as List
229
230 import Data.Word (Word8)
231 import Data.Maybe (listToMaybe)
232 import Data.Array (listArray)
233 import qualified Data.Array as Array ((!))
234
235 -- Control.Exception.bracket not available in yhc or nhc
236 import Control.Exception (bracket, assert)
237 import qualified Control.Exception as Exception
238 import Control.Monad (when)
239
240 import Foreign.C.String (CString, CStringLen)
241 import Foreign.C.Types (CSize)
242 import Foreign.ForeignPtr
243 import Foreign.Marshal.Array
244 import Foreign.Ptr
245 import Foreign.Storable (Storable(..))
246
247 -- hGetBuf and hPutBuf not available in yhc or nhc
248 import System.IO (stdin,stdout,hClose,hFileSize
249 ,hGetBuf,hPutBuf,openBinaryFile
250 ,Handle,IOMode(..))
251
252 import Data.Monoid (Monoid, mempty, mappend, mconcat)
253
254 #if !defined(__GLASGOW_HASKELL__)
255 import System.IO.Unsafe
256 import qualified System.Environment
257 import qualified System.IO (hGetLine)
258 #endif
259
260 #if defined(__GLASGOW_HASKELL__)
261
262 import System.IO (hGetBufNonBlocking)
263 import System.IO.Error (isEOFError)
264
265 import GHC.Handle
266 import GHC.Prim (Word#, (+#), writeWord8OffAddr#)
267 import GHC.Base (build)
268 import GHC.Word hiding (Word8)
269 import GHC.Ptr (Ptr(..))
270 import GHC.ST (ST(..))
271 import GHC.IOBase
272
273 #endif
274
275 -- -----------------------------------------------------------------------------
276 --
277 -- Useful macros, until we have bang patterns
278 --
279
280 #define STRICT1(f) f a | a `seq` False = undefined
281 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
282 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
283 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
284 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
285
286 -- -----------------------------------------------------------------------------
287
288 instance Eq ByteString
289 where (==) = eq
290
291 instance Ord ByteString
292 where compare = compareBytes
293
294 instance Show ByteString where
295 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
296
297 instance Read ByteString where
298 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
299
300 instance Monoid ByteString where
301 mempty = empty
302 mappend = append
303 mconcat = concat
304
305 {-
306 instance Arbitrary PackedString where
307 arbitrary = P.pack `fmap` arbitrary
308 coarbitrary s = coarbitrary (P.unpack s)
309 -}
310
311 -- | /O(n)/ Equality on the 'ByteString' type.
312 eq :: ByteString -> ByteString -> Bool
313 eq a@(PS p s l) b@(PS p' s' l')
314 | l /= l' = False -- short cut on length
315 | p == p' && s == s' = True -- short cut for the same string
316 | otherwise = compareBytes a b == EQ
317 {-# INLINE eq #-}
318
319 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
320 compareBytes :: ByteString -> ByteString -> Ordering
321 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
322 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
323 | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
324 | otherwise = inlinePerformIO $
325 withForeignPtr x1 $ \p1 ->
326 withForeignPtr x2 $ \p2 -> do
327 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
328 return $! case i `compare` 0 of
329 EQ -> l1 `compare` l2
330 x -> x
331 {-# INLINE compareBytes #-}
332
333 {-
334 --
335 -- About 4x slower over 32M
336 --
337 compareBytes :: ByteString -> ByteString -> Ordering
338 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
339 withForeignPtr fp1 $ \p1 ->
340 withForeignPtr fp2 $ \p2 ->
341 cmp (p1 `plusPtr` off1)
342 (p2 `plusPtr` off2) 0 len1 len2
343
344 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
345 STRICT5(cmp)
346 cmp p1 p2 n len1 len2
347 | n == len1 = if n == len2 then return EQ else return LT
348 | n == len2 = return GT
349 | otherwise = do
350 (a :: Word8) <- peekByteOff p1 n
351 (b :: Word8) <- peekByteOff p2 n
352 case a `compare` b of
353 EQ -> cmp p1 p2 (n+1) len1 len2
354 LT -> return LT
355 GT -> return GT
356 {-# INLINE compareBytes #-}
357 -}
358
359 -- -----------------------------------------------------------------------------
360 -- Introducing and eliminating 'ByteString's
361
362 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
363 singleton :: Word8 -> ByteString
364 singleton c = unsafeCreate 1 $ \p -> poke p c
365 {-# INLINE [1] singleton #-}
366
367 --
368 -- XXX The unsafePerformIO is critical!
369 --
370 -- Otherwise:
371 --
372 -- singleton 255 `compare` singleton 127
373 --
374 -- is compiled to:
375 --
376 -- case mallocByteString 2 of
377 -- ForeignPtr f internals ->
378 -- case writeWord8OffAddr# f 0 255 of _ ->
379 -- case writeWord8OffAddr# f 0 127 of _ ->
380 -- case eqAddr# f f of
381 -- False -> case compare (GHC.Prim.plusAddr# f 0)
382 -- (GHC.Prim.plusAddr# f 0)
383 --
384 --
385
386 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
387 --
388 -- For applications with large numbers of string literals, pack can be a
389 -- bottleneck. In such cases, consider using packAddress (GHC only).
390 pack :: [Word8] -> ByteString
391
392 #if !defined(__GLASGOW_HASKELL__)
393
394 pack str = unsafeCreate (P.length str) $ \p -> go p str
395 where
396 go _ [] = return ()
397 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
398
399 #else /* hack away */
400
401 pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
402 where
403 go _ _ [] = return ()
404 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
405
406 writeByte p i c = ST $ \s# ->
407 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
408
409 #endif
410
411 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
412 unpack :: ByteString -> [Word8]
413
414 #if !defined(__GLASGOW_HASKELL__)
415
416 unpack (PS _ _ 0) = []
417 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
418 go (p `plusPtr` s) (l - 1) []
419 where
420 STRICT3(go)
421 go p 0 acc = peek p >>= \e -> return (e : acc)
422 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
423 {-# INLINE unpack #-}
424
425 #else
426
427 unpack ps = build (unpackFoldr ps)
428 {-# INLINE unpack #-}
429
430 --
431 -- critical this isn't strict in the acc
432 -- as it will break in the presence of list fusion. this is a known
433 -- issue with seq and build/foldr rewrite rules, which rely on lazy
434 -- demanding to avoid bottoms in the list.
435 --
436 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
437 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
438 let loop q n _ | q `seq` n `seq` False = undefined -- n.b.
439 loop _ (-1) acc = return acc
440 loop q n acc = do
441 a <- peekByteOff q n
442 loop q (n-1) (a `f` acc)
443 loop (p `plusPtr` off) (len-1) ch
444 {-# INLINE [0] unpackFoldr #-}
445
446 unpackList :: ByteString -> [Word8]
447 unpackList (PS fp off len) = withPtr fp $ \p -> do
448 let STRICT3(loop)
449 loop _ (-1) acc = return acc
450 loop q n acc = do
451 a <- peekByteOff q n
452 loop q (n-1) (a : acc)
453 loop (p `plusPtr` off) (len-1) []
454
455 {-# RULES
456 "FPS unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
457 #-}
458
459 #endif
460
461 ------------------------------------------------------------------------
462
463 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
464 -- conversion function
465 packWith :: (a -> Word8) -> [a] -> ByteString
466 packWith k str = unsafeCreate (P.length str) $ \p -> go p str
467 where
468 STRICT2(go)
469 go _ [] = return ()
470 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
471 {-# INLINE packWith #-}
472 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
473
474 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
475 unpackWith :: (Word8 -> a) -> ByteString -> [a]
476 unpackWith _ (PS _ _ 0) = []
477 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
478 go (p `plusPtr` s) (l - 1) []
479 where
480 STRICT3(go)
481 go p 0 acc = peek p >>= \e -> return (k e : acc)
482 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
483 {-# INLINE unpackWith #-}
484 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
485
486 -- ---------------------------------------------------------------------
487 -- Basic interface
488
489 -- | /O(1)/ Test whether a ByteString is empty.
490 null :: ByteString -> Bool
491 null (PS _ _ l) = assert (l >= 0) $ l <= 0
492 {-# INLINE null #-}
493
494 -- ---------------------------------------------------------------------
495 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
496 length :: ByteString -> Int
497 length (PS _ _ l) = assert (l >= 0) $ l
498
499 --
500 -- length/loop fusion. When taking the length of any fuseable loop,
501 -- rewrite it as a foldl', and thus avoid allocating the result buffer
502 -- worth around 10% in speed testing.
503 --
504
505 #if defined(__GLASGOW_HASKELL__)
506 {-# INLINE [1] length #-}
507 #endif
508
509 lengthU :: ByteString -> Int
510 lengthU = foldl' (const . (+1)) (0::Int)
511 {-# INLINE lengthU #-}
512
513 {-# RULES
514
515 -- v2 fusion
516 "FPS length/loop" forall loop s .
517 length (loopArr (loopWrapper loop s)) =
518 lengthU (loopArr (loopWrapper loop s))
519
520 #-}
521
522 ------------------------------------------------------------------------
523
524 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
525 -- complexity, as it requires a memcpy.
526 cons :: Word8 -> ByteString -> ByteString
527 cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
528 poke p c
529 memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
530 {-# INLINE cons #-}
531
532 -- | /O(n)/ Append a byte to the end of a 'ByteString'
533 snoc :: ByteString -> Word8 -> ByteString
534 snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
535 memcpy p (f `plusPtr` s) (fromIntegral l)
536 poke (p `plusPtr` l) c
537 {-# INLINE snoc #-}
538
539 -- todo fuse
540
541 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
542 -- An exception will be thrown in the case of an empty ByteString.
543 head :: ByteString -> Word8
544 head (PS x s l)
545 | l <= 0 = errorEmptyList "head"
546 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
547 {-# INLINE head #-}
548
549 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
550 -- An exception will be thrown in the case of an empty ByteString.
551 tail :: ByteString -> ByteString
552 tail (PS p s l)
553 | l <= 0 = errorEmptyList "tail"
554 | otherwise = PS p (s+1) (l-1)
555 {-# INLINE tail #-}
556
557 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
558 -- An exception will be thrown in the case of an empty ByteString.
559 last :: ByteString -> Word8
560 last ps@(PS x s l)
561 | null ps = errorEmptyList "last"
562 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
563 {-# INLINE last #-}
564
565 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
566 -- An exception will be thrown in the case of an empty ByteString.
567 init :: ByteString -> ByteString
568 init ps@(PS p s l)
569 | null ps = errorEmptyList "init"
570 | otherwise = PS p s (l-1)
571 {-# INLINE init #-}
572
573 -- | /O(n)/ Append two ByteStrings
574 append :: ByteString -> ByteString -> ByteString
575 append xs ys | null xs = ys
576 | null ys = xs
577 | otherwise = concat [xs,ys]
578 {-# INLINE append #-}
579
580 -- ---------------------------------------------------------------------
581 -- Transformations
582
583 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
584 -- element of @xs@. This function is subject to array fusion.
585 map :: (Word8 -> Word8) -> ByteString -> ByteString
586 #if defined(LOOPU_FUSION)
587 map f = loopArr . loopU (mapEFL f) NoAcc
588 #elif defined(LOOPUP_FUSION)
589 map f = loopArr . loopUp (mapEFL f) NoAcc
590 #elif defined(LOOPNOACC_FUSION)
591 map f = loopArr . loopNoAcc (mapEFL f)
592 #else
593 map f = loopArr . loopMap f
594 #endif
595 {-# INLINE map #-}
596
597 {-
598 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
599 -- slightly faster for one-shot cases.
600 map' :: (Word8 -> Word8) -> ByteString -> ByteString
601 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
602 create len $ map_ 0 (a `plusPtr` s)
603 where
604 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
605 STRICT3(map_)
606 map_ n p1 p2
607 | n >= len = return ()
608 | otherwise = do
609 x <- peekByteOff p1 n
610 pokeByteOff p2 n (f x)
611 map_ (n+1) p1 p2
612 {-# INLINE map' #-}
613 -}
614
615 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
616 reverse :: ByteString -> ByteString
617 reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
618 c_reverse p (f `plusPtr` s) (fromIntegral l)
619
620 -- todo, fuseable version
621
622 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
623 -- 'ByteString' and \`intersperses\' that byte between the elements of
624 -- the 'ByteString'. It is analogous to the intersperse function on
625 -- Lists.
626 intersperse :: Word8 -> ByteString -> ByteString
627 intersperse c ps@(PS x s l)
628 | length ps < 2 = ps
629 | otherwise = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f ->
630 c_intersperse p (f `plusPtr` s) (fromIntegral l) c
631
632 {-
633 intersperse c = pack . List.intersperse c . unpack
634 -}
635
636 -- | The 'transpose' function transposes the rows and columns of its
637 -- 'ByteString' argument.
638 transpose :: [ByteString] -> [ByteString]
639 transpose ps = P.map pack (List.transpose (P.map unpack ps))
640
641 -- ---------------------------------------------------------------------
642 -- Reducing 'ByteString's
643
644 -- | 'foldl', applied to a binary operator, a starting value (typically
645 -- the left-identity of the operator), and a ByteString, reduces the
646 -- ByteString using the binary operator, from left to right.
647 -- This function is subject to array fusion.
648 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
649 #if !defined(LOOPU_FUSION)
650 foldl f z = loopAcc . loopUp (foldEFL f) z
651 #else
652 foldl f z = loopAcc . loopU (foldEFL f) z
653 #endif
654 {-# INLINE foldl #-}
655
656 {-
657 --
658 -- About twice as fast with 6.4.1, but not fuseable
659 -- A simple fold . map is enough to make it worth while.
660 --
661 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
662 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
663 where
664 STRICT3(lgo)
665 lgo z p q | p == q = return z
666 | otherwise = do c <- peek p
667 lgo (f z c) (p `plusPtr` 1) q
668 -}
669
670 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
671 -- Though actually foldl is also strict in the accumulator.
672 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
673 foldl' = foldl
674 -- foldl' f z = loopAcc . loopU (foldEFL' f) z
675 {-# INLINE foldl' #-}
676
677 -- | 'foldr', applied to a binary operator, a starting value
678 -- (typically the right-identity of the operator), and a ByteString,
679 -- reduces the ByteString using the binary operator, from right to left.
680 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
681 foldr k z = loopAcc . loopDown (foldEFL (flip k)) z
682 {-# INLINE foldr #-}
683
684 -- | 'foldr\'' is like 'foldr', but strict in the accumulator.
685 foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a
686 foldr' k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
687 go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1))
688 where
689 STRICT3(go)
690 go z p q | p == q = return z
691 | otherwise = do c <- peek p
692 go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive
693 {-# INLINE [1] foldr' #-}
694
695 -- | 'foldl1' is a variant of 'foldl' that has no starting value
696 -- argument, and thus must be applied to non-empty 'ByteStrings'.
697 -- This function is subject to array fusion.
698 -- An exception will be thrown in the case of an empty ByteString.
699 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
700 foldl1 f ps
701 | null ps = errorEmptyList "foldl1"
702 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
703 {-# INLINE foldl1 #-}
704
705 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
706 -- An exception will be thrown in the case of an empty ByteString.
707 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
708 foldl1' f ps
709 | null ps = errorEmptyList "foldl1'"
710 | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
711 {-# INLINE foldl1' #-}
712
713 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
714 -- and thus must be applied to non-empty 'ByteString's
715 -- An exception will be thrown in the case of an empty ByteString.
716 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
717 foldr1 f ps
718 | null ps = errorEmptyList "foldr1"
719 | otherwise = foldr f (last ps) (init ps)
720 {-# INLINE foldr1 #-}
721
722 -- | 'foldr1\'' is a variant of 'foldr1', but is strict in the
723 -- accumulator.
724 foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
725 foldr1' f ps
726 | null ps = errorEmptyList "foldr1"
727 | otherwise = foldr' f (last ps) (init ps)
728 {-# INLINE [1] foldr1' #-}
729
730 -- ---------------------------------------------------------------------
731 -- Special folds
732
733 -- | /O(n)/ Concatenate a list of ByteStrings.
734 concat :: [ByteString] -> ByteString
735 concat [] = empty
736 concat [ps] = ps
737 concat xs = unsafeCreate len $ \ptr -> go xs ptr
738 where len = P.sum . P.map length $ xs
739 STRICT2(go)
740 go [] _ = return ()
741 go (PS p s l:ps) ptr = do
742 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
743 go ps (ptr `plusPtr` l)
744
745 -- | Map a function over a 'ByteString' and concatenate the results
746 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
747 concatMap f = concat . foldr ((:) . f) []
748
749 -- foldr (append . f) empty
750
751 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
752 -- any element of the 'ByteString' satisfies the predicate.
753 any :: (Word8 -> Bool) -> ByteString -> Bool
754 any _ (PS _ _ 0) = False
755 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
756 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
757 where
758 STRICT2(go)
759 go p q | p == q = return False
760 | otherwise = do c <- peek p
761 if f c then return True
762 else go (p `plusPtr` 1) q
763
764 -- todo fuse
765
766 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
767 -- if all elements of the 'ByteString' satisfy the predicate.
768 all :: (Word8 -> Bool) -> ByteString -> Bool
769 all _ (PS _ _ 0) = True
770 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
771 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
772 where
773 STRICT2(go)
774 go p q | p == q = return True -- end of list
775 | otherwise = do c <- peek p
776 if f c
777 then go (p `plusPtr` 1) q
778 else return False
779
780 ------------------------------------------------------------------------
781
782 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
783 -- This function will fuse.
784 -- An exception will be thrown in the case of an empty ByteString.
785 maximum :: ByteString -> Word8
786 maximum xs@(PS x s l)
787 | null xs = errorEmptyList "maximum"
788 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
789 c_maximum (p `plusPtr` s) (fromIntegral l)
790
791 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
792 -- This function will fuse.
793 -- An exception will be thrown in the case of an empty ByteString.
794 minimum :: ByteString -> Word8
795 minimum xs@(PS x s l)
796 | null xs = errorEmptyList "minimum"
797 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
798 c_minimum (p `plusPtr` s) (fromIntegral l)
799
800 --
801 -- minimum/maximum/loop fusion. As for length (and other folds), when we
802 -- see we're applied after a fuseable op, switch from using the C
803 -- version, to the fuseable version. The result should then avoid
804 -- allocating a buffer.
805 --
806
807 #if defined(__GLASGOW_HASKELL__)
808 {-# INLINE [1] minimum #-}
809 {-# INLINE [1] maximum #-}
810 #endif
811
812 maximumU :: ByteString -> Word8
813 maximumU = foldl1' max
814 {-# INLINE maximumU #-}
815
816 minimumU :: ByteString -> Word8
817 minimumU = foldl1' min
818 {-# INLINE minimumU #-}
819
820 {-# RULES
821
822 "FPS minimum/loop" forall loop s .
823 minimum (loopArr (loopWrapper loop s)) =
824 minimumU (loopArr (loopWrapper loop s))
825
826 "FPS maximum/loop" forall loop s .
827 maximum (loopArr (loopWrapper loop s)) =
828 maximumU (loopArr (loopWrapper loop s))
829
830 #-}
831
832 ------------------------------------------------------------------------
833
834 -- | The 'mapAccumL' function behaves like a combination of 'map' and
835 -- 'foldl'; it applies a function to each element of a ByteString,
836 -- passing an accumulating parameter from left to right, and returning a
837 -- final value of this accumulator together with the new list.
838 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
839 #if !defined(LOOPU_FUSION)
840 mapAccumL f z = unSP . loopUp (mapAccumEFL f) z
841 #else
842 mapAccumL f z = unSP . loopU (mapAccumEFL f) z
843 #endif
844 {-# INLINE mapAccumL #-}
845
846 -- | The 'mapAccumR' function behaves like a combination of 'map' and
847 -- 'foldr'; it applies a function to each element of a ByteString,
848 -- passing an accumulating parameter from right to left, and returning a
849 -- final value of this accumulator together with the new ByteString.
850 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
851 mapAccumR f z = unSP . loopDown (mapAccumEFL f) z
852 {-# INLINE mapAccumR #-}
853
854 -- | /O(n)/ map Word8 functions, provided with the index at each position
855 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
856 mapIndexed f = loopArr . loopUp (mapIndexEFL f) 0
857 {-# INLINE mapIndexed #-}
858
859 -- ---------------------------------------------------------------------
860 -- Building ByteStrings
861
862 -- | 'scanl' is similar to 'foldl', but returns a list of successive
863 -- reduced values from the left. This function will fuse.
864 --
865 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
866 --
867 -- Note that
868 --
869 -- > last (scanl f z xs) == foldl f z xs.
870 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
871 #if !defined(LOOPU_FUSION)
872 scanl f z ps = loopArr . loopUp (scanEFL f) z $ (ps `snoc` 0)
873 #else
874 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0)
875 #endif
876
877 -- n.b. haskell's List scan returns a list one bigger than the
878 -- input, so we need to snoc here to get some extra space, however,
879 -- it breaks map/up fusion (i.e. scanl . map no longer fuses)
880 {-# INLINE scanl #-}
881
882 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
883 -- This function will fuse.
884 --
885 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
886 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
887 scanl1 f ps
888 | null ps = empty
889 | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
890 {-# INLINE scanl1 #-}
891
892 -- | scanr is the right-to-left dual of scanl.
893 scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
894 scanr f z ps = loopArr . loopDown (scanEFL (flip f)) z $ (0 `cons` ps) -- extra space
895 {-# INLINE scanr #-}
896
897 -- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
898 scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
899 scanr1 f ps
900 | null ps = empty
901 | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions
902 {-# INLINE scanr1 #-}
903
904 -- ---------------------------------------------------------------------
905 -- Unfolds and replicates
906
907 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
908 -- the value of every element. The following holds:
909 --
910 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
911 --
912 -- This implemenation uses @memset(3)@
913 replicate :: Int -> Word8 -> ByteString
914 replicate w c
915 | w <= 0 = empty
916 | otherwise = unsafeCreate w $ \ptr ->
917 memset ptr c (fromIntegral w) >> return ()
918
919 -- | /O(n)/, where /n/ is the length of the result. The 'unfoldr'
920 -- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a
921 -- ByteString from a seed value. The function takes the element and
922 -- returns 'Nothing' if it is done producing the ByteString or returns
923 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
924 -- and @b@ is the seed value for further production.
925 --
926 -- Examples:
927 --
928 -- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
929 -- > == pack [0, 1, 2, 3, 4, 5]
930 --
931 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
932 unfoldr f = concat . unfoldChunk 32 64
933 where unfoldChunk n n' x =
934 case unfoldrN n f x of
935 (s, Nothing) -> s : []
936 (s, Just x') -> s : unfoldChunk n' (n+n') x'
937
938 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
939 -- value. However, the length of the result is limited by the first
940 -- argument to 'unfoldrN'. This function is more efficient than 'unfoldr'
941 -- when the maximum length of the result is known.
942 --
943 -- The following equation relates 'unfoldrN' and 'unfoldr':
944 --
945 -- > unfoldrN n f s == take n (unfoldr f s)
946 --
947 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
948 unfoldrN i f x0
949 | i < 0 = (empty, Just x0)
950 | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
951 where STRICT3(go)
952 go p x n =
953 case f x of
954 Nothing -> return (0, n, Nothing)
955 Just (w,x')
956 | n == i -> return (0, n, Just x)
957 | otherwise -> do poke p w
958 go (p `plusPtr` 1) x' (n+1)
959
960 -- ---------------------------------------------------------------------
961 -- Substrings
962
963 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
964 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
965 take :: Int -> ByteString -> ByteString
966 take n ps@(PS x s l)
967 | n <= 0 = empty
968 | n >= l = ps
969 | otherwise = PS x s n
970 {-# INLINE take #-}
971
972 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
973 -- elements, or @[]@ if @n > 'length' xs@.
974 drop :: Int -> ByteString -> ByteString
975 drop n ps@(PS x s l)
976 | n <= 0 = ps
977 | n >= l = empty
978 | otherwise = PS x (s+n) (l-n)
979 {-# INLINE drop #-}
980
981 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
982 splitAt :: Int -> ByteString -> (ByteString, ByteString)
983 splitAt n ps@(PS x s l)
984 | n <= 0 = (empty, ps)
985 | n >= l = (ps, empty)
986 | otherwise = (PS x s n, PS x (s+n) (l-n))
987 {-# INLINE splitAt #-}
988
989 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
990 -- returns the longest prefix (possibly empty) of @xs@ of elements that
991 -- satisfy @p@.
992 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
993 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
994 {-# INLINE takeWhile #-}
995
996 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
997 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
998 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
999 {-# INLINE dropWhile #-}
1000
1001 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
1002 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1003 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
1004 {-# INLINE [1] break #-}
1005
1006 {-# RULES
1007 "FPS specialise break (x==)" forall x.
1008 break ((==) x) = breakByte x
1009 #-}
1010
1011 #if __GLASGOW_HASKELL__ >= 605
1012 {-# RULES
1013 "FPS specialise break (==x)" forall x.
1014 break (==x) = breakByte x
1015 #-}
1016 #endif
1017
1018 -- | 'breakByte' breaks its ByteString argument at the first occurence
1019 -- of the specified byte. It is more efficient than 'break' as it is
1020 -- implemented with @memchr(3)@. I.e.
1021 --
1022 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
1023 --
1024 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
1025 breakByte c p = case elemIndex c p of
1026 Nothing -> (p,empty)
1027 Just n -> (unsafeTake n p, unsafeDrop n p)
1028 {-# INLINE breakByte #-}
1029
1030 -- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
1031 --
1032 -- breakEnd p == spanEnd (not.p)
1033 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1034 breakEnd p ps = splitAt (findFromEndUntil p ps) ps
1035
1036 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
1037 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
1038 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1039 span p ps = break (not . p) ps
1040 {-# INLINE [1] span #-}
1041
1042 -- | 'spanByte' breaks its ByteString argument at the first
1043 -- occurence of a byte other than its argument. It is more efficient
1044 -- than 'span (==)'
1045 --
1046 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
1047 --
1048 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
1049 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1050 go (p `plusPtr` s) 0
1051 where
1052 STRICT2(go)
1053 go p i | i >= l = return (ps, empty)
1054 | otherwise = do c' <- peekByteOff p i
1055 if c /= c'
1056 then return (unsafeTake i ps, unsafeDrop i ps)
1057 else go p (i+1)
1058 {-# INLINE spanByte #-}
1059
1060 {-# RULES
1061 "FPS specialise span (x==)" forall x.
1062 span ((==) x) = spanByte x
1063 #-}
1064
1065 #if __GLASGOW_HASKELL__ >= 605
1066 {-# RULES
1067 "FPS specialise span (==x)" forall x.
1068 span (==x) = spanByte x
1069 #-}
1070 #endif
1071
1072 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
1073 -- We have
1074 --
1075 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
1076 --
1077 -- and
1078 --
1079 -- > spanEnd (not . isSpace) ps
1080 -- > ==
1081 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1082 --
1083 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1084 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
1085
1086 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1087 -- separators, where the predicate returns True for a separator element.
1088 -- The resulting components do not contain the separators. Two adjacent
1089 -- separators result in an empty component in the output. eg.
1090 --
1091 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1092 -- > splitWith (=='a') [] == []
1093 --
1094 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1095
1096 #if defined(__GLASGOW_HASKELL__)
1097 splitWith _pred (PS _ _ 0) = []
1098 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1099 where pred# c# = pred_ (W8# c#)
1100
1101 STRICT4(splitWith0)
1102 splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1103 splitLoop pred' p 0 off' len' fp'
1104
1105 splitLoop :: (Word# -> Bool)
1106 -> Ptr Word8
1107 -> Int -> Int -> Int
1108 -> ForeignPtr Word8
1109 -> IO [ByteString]
1110
1111 splitLoop pred' p idx' off' len' fp'
1112 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1113 | idx' >= len' = return [PS fp' off' idx']
1114 | otherwise = do
1115 w <- peekElemOff p (off'+idx')
1116 if pred' (case w of W8# w# -> w#)
1117 then return (PS fp' off' idx' :
1118 splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1119 else splitLoop pred' p (idx'+1) off' len' fp'
1120 {-# INLINE splitWith #-}
1121
1122 #else
1123 splitWith _ (PS _ _ 0) = []
1124 splitWith p ps = loop p ps
1125 where
1126 STRICT2(loop)
1127 loop q qs = if null rest then [chunk]
1128 else chunk : loop q (unsafeTail rest)
1129 where (chunk,rest) = break q qs
1130 #endif
1131
1132 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1133 -- argument, consuming the delimiter. I.e.
1134 --
1135 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1136 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
1137 -- > split 'x' "x" == ["",""]
1138 --
1139 -- and
1140 --
1141 -- > join [c] . split c == id
1142 -- > split == splitWith . (==)
1143 --
1144 -- As for all splitting functions in this library, this function does
1145 -- not copy the substrings, it just constructs new 'ByteStrings' that
1146 -- are slices of the original.
1147 --
1148 split :: Word8 -> ByteString -> [ByteString]
1149 split _ (PS _ _ 0) = []
1150 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1151 let ptr = p `plusPtr` s
1152
1153 STRICT1(loop)
1154 loop n =
1155 let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1156 w (fromIntegral (l-n))
1157 in if q == nullPtr
1158 then [PS x (s+n) (l-n)]
1159 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1160
1161 return (loop 0)
1162 {-# INLINE split #-}
1163
1164 {-
1165 -- slower. but stays inside Haskell.
1166 split _ (PS _ _ 0) = []
1167 split (W8# w#) (PS fp off len) = splitWith' off len fp
1168 where
1169 splitWith' off' len' fp' = withPtr fp $ \p ->
1170 splitLoop p 0 off' len' fp'
1171
1172 splitLoop :: Ptr Word8
1173 -> Int -> Int -> Int
1174 -> ForeignPtr Word8
1175 -> IO [ByteString]
1176
1177 STRICT5(splitLoop)
1178 splitLoop p idx' off' len' fp'
1179 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1180 | idx' >= len' = return [PS fp' off' idx']
1181 | otherwise = do
1182 (W8# x#) <- peekElemOff p (off'+idx')
1183 if word2Int# w# ==# word2Int# x#
1184 then return (PS fp' off' idx' :
1185 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1186 else splitLoop p (idx'+1) off' len' fp'
1187 -}
1188
1189 {-
1190 -- | Like 'splitWith', except that sequences of adjacent separators are
1191 -- treated as a single separator. eg.
1192 --
1193 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1194 --
1195 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1196 tokens f = P.filter (not.null) . splitWith f
1197 {-# INLINE tokens #-}
1198 -}
1199
1200 -- | The 'group' function takes a ByteString and returns a list of
1201 -- ByteStrings such that the concatenation of the result is equal to the
1202 -- argument. Moreover, each sublist in the result contains only equal
1203 -- elements. For example,
1204 --
1205 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1206 --
1207 -- It is a special case of 'groupBy', which allows the programmer to
1208 -- supply their own equality test. It is about 40% faster than
1209 -- /groupBy (==)/
1210 group :: ByteString -> [ByteString]
1211 group xs
1212 | null xs = []
1213 | otherwise = ys : group zs
1214 where
1215 (ys, zs) = spanByte (unsafeHead xs) xs
1216
1217 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1218 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1219 groupBy k xs
1220 | null xs = []
1221 | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1222 where
1223 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1224
1225 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1226 -- 'ByteString's and concatenates the list after interspersing the first
1227 -- argument between each element of the list.
1228 join :: ByteString -> [ByteString] -> ByteString
1229 join s = concat . (List.intersperse s)
1230 {-# INLINE [1] join #-}
1231
1232 {-# RULES
1233 "FPS specialise join c -> joinByte" forall c s1 s2 .
1234 join (singleton c) (s1 : s2 : []) = joinWithByte c s1 s2
1235 #-}
1236
1237 --
1238 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1239 -- with a char. Around 4 times faster than the generalised join.
1240 --
1241 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1242 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr ->
1243 withForeignPtr ffp $ \fp ->
1244 withForeignPtr fgp $ \gp -> do
1245 memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1246 poke (ptr `plusPtr` l) c
1247 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1248 where
1249 len = length f + length g + 1
1250 {-# INLINE joinWithByte #-}
1251
1252 -- ---------------------------------------------------------------------
1253 -- Indexing ByteStrings
1254
1255 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1256 index :: ByteString -> Int -> Word8
1257 index ps n
1258 | n < 0 = moduleError "index" ("negative index: " ++ show n)
1259 | n >= length ps = moduleError "index" ("index too large: " ++ show n
1260 ++ ", length = " ++ show (length ps))
1261 | otherwise = ps `unsafeIndex` n
1262 {-# INLINE index #-}
1263
1264 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1265 -- element in the given 'ByteString' which is equal to the query
1266 -- element, or 'Nothing' if there is no such element.
1267 -- This implementation uses memchr(3).
1268 elemIndex :: Word8 -> ByteString -> Maybe Int
1269 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1270 let p' = p `plusPtr` s
1271 q <- memchr p' c (fromIntegral l)
1272 return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1273 {-# INLINE elemIndex #-}
1274
1275 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
1276 -- element in the given 'ByteString' which is equal to the query
1277 -- element, or 'Nothing' if there is no such element. The following
1278 -- holds:
1279 --
1280 -- > elemIndexEnd c xs ==
1281 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1282 --
1283 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
1284 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1285 go (p `plusPtr` s) (l-1)
1286 where
1287 STRICT2(go)
1288 go p i | i < 0 = return Nothing
1289 | otherwise = do ch' <- peekByteOff p i
1290 if ch == ch'
1291 then return $ Just i
1292 else go p (i-1)
1293 {-# INLINE elemIndexEnd #-}
1294
1295 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1296 -- the indices of all elements equal to the query element, in ascending order.
1297 -- This implementation uses memchr(3).
1298 elemIndices :: Word8 -> ByteString -> [Int]
1299 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1300 let ptr = p `plusPtr` s
1301
1302 STRICT1(loop)
1303 loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
1304 w (fromIntegral (l - n))
1305 in if q == nullPtr
1306 then []
1307 else let i = q `minusPtr` ptr
1308 in i : loop (i+1)
1309 return $! loop 0
1310 {-# INLINE elemIndices #-}
1311
1312 {-
1313 -- much slower
1314 elemIndices :: Word8 -> ByteString -> [Int]
1315 elemIndices c ps = loop 0 ps
1316 where STRICT2(loop)
1317 loop _ ps' | null ps' = []
1318 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1319 | otherwise = loop (n+1) (unsafeTail ps')
1320 -}
1321
1322 -- | count returns the number of times its argument appears in the ByteString
1323 --
1324 -- > count = length . elemIndices
1325 --
1326 -- But more efficiently than using length on the intermediate list.
1327 count :: Word8 -> ByteString -> Int
1328 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1329 fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1330 {-# INLINE count #-}
1331
1332 {-
1333 --
1334 -- around 30% slower
1335 --
1336 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1337 go (p `plusPtr` s) (fromIntegral m) 0
1338 where
1339 go :: Ptr Word8 -> CSize -> Int -> IO Int
1340 STRICT3(go)
1341 go p l i = do
1342 q <- memchr p w l
1343 if q == nullPtr
1344 then return i
1345 else do let k = fromIntegral $ q `minusPtr` p
1346 go (q `plusPtr` 1) (l-k-1) (i+1)
1347 -}
1348
1349 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1350 -- returns the index of the first element in the ByteString
1351 -- satisfying the predicate.
1352 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1353 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1354 where
1355 STRICT2(go)
1356 go ptr n | n >= l = return Nothing
1357 | otherwise = do w <- peek ptr
1358 if k w
1359 then return (Just n)
1360 else go (ptr `plusPtr` 1) (n+1)
1361 {-# INLINE findIndex #-}
1362
1363 -- | The 'findIndices' function extends 'findIndex', by returning the
1364 -- indices of all elements satisfying the predicate, in ascending order.
1365 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1366 findIndices p ps = loop 0 ps
1367 where
1368 STRICT2(loop)
1369 loop n qs | null qs = []
1370 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1371 | otherwise = loop (n+1) (unsafeTail qs)
1372
1373 -- ---------------------------------------------------------------------
1374 -- Searching ByteStrings
1375
1376 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1377 elem :: Word8 -> ByteString -> Bool
1378 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1379 {-# INLINE elem #-}
1380
1381 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1382 notElem :: Word8 -> ByteString -> Bool
1383 notElem c ps = not (elem c ps)
1384 {-# INLINE notElem #-}
1385
1386 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1387 -- returns a ByteString containing those characters that satisfy the
1388 -- predicate. This function is subject to array fusion.
1389 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1390 #if defined(LOOPU_FUSION)
1391 filter p = loopArr . loopU (filterEFL p) NoAcc
1392 #elif defined(LOOPUP_FUSION)
1393 filter p = loopArr . loopUp (filterEFL p) NoAcc
1394 #elif defined(LOOPNOACC_FUSION)
1395 filter p = loopArr . loopNoAcc (filterEFL p)
1396 #else
1397 filter f = loopArr . loopFilter f
1398 #endif
1399 {-# INLINE filter #-}
1400
1401 {-
1402 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1403 -- around 2x faster for some one-shot applications.
1404 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1405 filter' k ps@(PS x s l)
1406 | null ps = ps
1407 | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do
1408 t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1409 return $! t `minusPtr` p -- actual length
1410 where
1411 STRICT3(go)
1412 go f t end | f == end = return t
1413 | otherwise = do
1414 w <- peek f
1415 if k w
1416 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1417 else go (f `plusPtr` 1) t end
1418 {-# INLINE filter' #-}
1419 -}
1420
1421 --
1422 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1423 -- case of filtering a single byte. It is more efficient to use
1424 -- /filterByte/ in this case.
1425 --
1426 -- > filterByte == filter . (==)
1427 --
1428 -- filterByte is around 10x faster, and uses much less space, than its
1429 -- filter equivalent
1430 filterByte :: Word8 -> ByteString -> ByteString
1431 filterByte w ps = replicate (count w ps) w
1432 {-# INLINE filterByte #-}
1433
1434 {-# RULES
1435 "FPS specialise filter (== x)" forall x.
1436 filter ((==) x) = filterByte x
1437 #-}
1438
1439 #if __GLASGOW_HASKELL__ >= 605
1440 {-# RULES
1441 "FPS specialise filter (== x)" forall x.
1442 filter (== x) = filterByte x
1443 #-}
1444 #endif
1445
1446 --
1447 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1448 -- case of filtering a single byte out of a list. It is more efficient
1449 -- to use /filterNotByte/ in this case.
1450 --
1451 -- > filterNotByte == filter . (/=)
1452 --
1453 -- filterNotByte is around 2x faster than its filter equivalent.
1454 filterNotByte :: Word8 -> ByteString -> ByteString
1455 filterNotByte w = filter (/= w)
1456 {-# INLINE filterNotByte #-}
1457
1458 {-# RULES
1459 "FPS specialise filter (x /=)" forall x.
1460 filter ((/=) x) = filterNotByte x
1461 #-}
1462
1463 #if __GLASGOW_HASKELL__ >= 605
1464 {-# RULES
1465 "FPS specialise filter (/= x)" forall x.
1466 filter (/= x) = filterNotByte x
1467 #-}
1468 #endif
1469
1470 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1471 -- and returns the first element in matching the predicate, or 'Nothing'
1472 -- if there is no such element.
1473 --
1474 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1475 --
1476 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1477 find f p = case findIndex f p of
1478 Just n -> Just (p `unsafeIndex` n)
1479 _ -> Nothing
1480 {-# INLINE find #-}
1481
1482 {-
1483 --
1484 -- fuseable, but we don't want to walk the whole array.
1485 --
1486 find k = foldl findEFL Nothing
1487 where findEFL a@(Just _) _ = a
1488 findEFL _ c | k c = Just c
1489 | otherwise = Nothing
1490 -}
1491
1492 -- ---------------------------------------------------------------------
1493 -- Searching for substrings
1494
1495 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1496 -- iff the first is a prefix of the second.
1497 isPrefixOf :: ByteString -> ByteString -> Bool
1498 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1499 | l1 == 0 = True
1500 | l2 < l1 = False
1501 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1502 withForeignPtr x2 $ \p2 -> do
1503 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1504 return $! i == 0
1505
1506 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1507 -- iff the first is a suffix of the second.
1508 --
1509 -- The following holds:
1510 --
1511 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1512 --
1513 -- However, the real implemenation uses memcmp to compare the end of the
1514 -- string only, with no reverse required..
1515 isSuffixOf :: ByteString -> ByteString -> Bool
1516 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1517 | l1 == 0 = True
1518 | l2 < l1 = False
1519 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1520 withForeignPtr x2 $ \p2 -> do
1521 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1522 return $! i == 0
1523
1524 -- | Check whether one string is a substring of another. @isSubstringOf
1525 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1526 isSubstringOf :: ByteString -- ^ String to search for.
1527 -> ByteString -- ^ String to search in.
1528 -> Bool
1529 isSubstringOf p s = not $ P.null $ findSubstrings p s
1530
1531 -- | Get the first index of a substring in another string,
1532 -- or 'Nothing' if the string is not found.
1533 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1534 findSubstring :: ByteString -- ^ String to search for.
1535 -> ByteString -- ^ String to seach in.
1536 -> Maybe Int
1537 findSubstring = (listToMaybe .) . findSubstrings
1538
1539 -- | Find the indexes of all (possibly overlapping) occurances of a
1540 -- substring in a string. This function uses the Knuth-Morris-Pratt
1541 -- string matching algorithm.
1542 findSubstrings :: ByteString -- ^ String to search for.
1543 -> ByteString -- ^ String to seach in.
1544 -> [Int]
1545
1546 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1547 where
1548 patc x = pat `unsafeIndex` x
1549 strc x = str `unsafeIndex` x
1550
1551 -- maybe we should make kmpNext a UArray before using it in search?
1552 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1553 kmpNextL p _ | null p = []
1554 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1555 ps = unsafeTail p
1556 x = if not (null ps) && unsafeHead ps == patc j'
1557 then kmpNext Array.! j' else j'
1558 in x:kmpNextL ps j'
1559 search i j = match ++ rest -- i: position in string, j: position in pattern
1560 where match = if j == m then [(i - j)] else []
1561 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1562 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1563 | otherwise = j
1564
1565 -- ---------------------------------------------------------------------
1566 -- Zipping
1567
1568 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1569 -- corresponding pairs of bytes. If one input ByteString is short,
1570 -- excess elements of the longer ByteString are discarded. This is
1571 -- equivalent to a pair of 'unpack' operations.
1572 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1573 zip ps qs
1574 | null ps || null qs = []
1575 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1576
1577 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1578 -- the first argument, instead of a tupling function. For example,
1579 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1580 -- corresponding sums.
1581 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1582 zipWith f ps qs
1583 | null ps || null qs = []
1584 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1585 #if defined(__GLASGOW_HASKELL__)
1586 {-# INLINE [1] zipWith #-}
1587 #endif
1588
1589 --
1590 -- | A specialised version of zipWith for the common case of a
1591 -- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules
1592 -- are used to automatically covert zipWith into zipWith' when a pack is
1593 -- performed on the result of zipWith, but we also export it for
1594 -- convenience.
1595 --
1596 zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
1597 zipWith' f (PS fp s l) (PS fq t m) = inlinePerformIO $
1598 withForeignPtr fp $ \a ->
1599 withForeignPtr fq $ \b ->
1600 create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t)
1601 where
1602 zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
1603 STRICT4(zipWith_)
1604 zipWith_ n p1 p2 r
1605 | n >= len = return ()
1606 | otherwise = do
1607 x <- peekByteOff p1 n
1608 y <- peekByteOff p2 n
1609 pokeByteOff r n (f x y)
1610 zipWith_ (n+1) p1 p2 r
1611
1612 len = min l m
1613 {-# INLINE zipWith' #-}
1614
1615 {-# RULES
1616
1617 "FPS specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
1618 zipWith f p q = unpack (zipWith' f p q)
1619
1620 #-}
1621
1622 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1623 -- ByteStrings. Note that this performs two 'pack' operations.
1624 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1625 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1626 {-# INLINE unzip #-}
1627
1628 -- ---------------------------------------------------------------------
1629 -- Special lists
1630
1631 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1632 inits :: ByteString -> [ByteString]
1633 inits (PS x s l) = [PS x s n | n <- [0..l]]
1634
1635 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1636 tails :: ByteString -> [ByteString]
1637 tails p | null p = [empty]
1638 | otherwise = p : tails (unsafeTail p)
1639
1640 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1641
1642 -- ---------------------------------------------------------------------
1643 -- ** Ordered 'ByteString's
1644
1645 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1646 sort :: ByteString -> ByteString
1647 sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
1648
1649 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1650 withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l)
1651
1652 let STRICT2(go)
1653 go 256 _ = return ()
1654 go i ptr = do n <- peekElemOff arr i
1655 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1656 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1657 go 0 p
1658
1659 {-
1660 sort :: ByteString -> ByteString
1661 sort (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> do
1662 memcpy p (f `plusPtr` s) l
1663 c_qsort p l -- inplace
1664 -}
1665
1666 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1667 --
1668 -- Try some linear sorts: radix, counting
1669 -- Or mergesort.
1670 --
1671 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1672 -- sortBy f ps = undefined
1673
1674 -- ---------------------------------------------------------------------
1675 -- Low level constructors
1676
1677 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1678 -- finalizer associated to it. The ByteString length is calculated using
1679 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1680 packCString :: CString -> ByteString
1681 packCString cstr = unsafePerformIO $ do
1682 fp <- newForeignPtr_ (castPtr cstr)
1683 l <- c_strlen cstr
1684 return $! PS fp 0 (fromIntegral l)
1685
1686 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1687 -- have /no/ finalizer associated with it. This operation has /O(1)/
1688 -- complexity as we already know the final size, so no /strlen(3)/ is
1689 -- required.
1690 packCStringLen :: CStringLen -> ByteString
1691 packCStringLen (ptr,len) = unsafePerformIO $ do
1692 fp <- newForeignPtr_ (castPtr ptr)
1693 return $! PS fp 0 (fromIntegral len)
1694
1695 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1696 -- have a @free(3)@ finalizer associated to it.
1697 packMallocCString :: CString -> ByteString
1698 packMallocCString cstr = unsafePerformIO $ do
1699 fp <- newForeignFreePtr (castPtr cstr)
1700 len <- c_strlen cstr
1701 return $! PS fp 0 (fromIntegral len)
1702
1703 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
1704 -- null-terminated @CString@. The @CString@ will be freed
1705 -- automatically. This is a memcpy(3).
1706 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1707 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1708 where alloc = withForeignPtr ps $ \p -> do
1709 buf <- c_malloc (fromIntegral l+1)
1710 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1711 poke (buf `plusPtr` l) (0::Word8) -- n.b.
1712 return (castPtr buf)
1713
1714 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1715 useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1716 useAsCStringLen = unsafeUseAsCStringLen
1717
1718 --
1719 -- why were we doing this?
1720 --
1721 -- useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1722 -- useAsCStringLen (PS ps s l) = bracket alloc (c_free.castPtr.fst)
1723 -- where
1724 -- alloc = withForeignPtr ps $ \p -> do
1725 -- buf <- c_malloc (fromIntegral l+1)
1726 -- memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1727 -- poke (buf `plusPtr` l) (0::Word8) -- n.b.
1728 -- return $! (castPtr buf, l)
1729 --
1730
1731 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1732 -- This is mainly useful to allow the rest of the data pointed
1733 -- to by the 'ByteString' to be garbage collected, for example
1734 -- if a large string has been read in, and only a small part of it
1735 -- is needed in the rest of the program.
1736 copy :: ByteString -> ByteString
1737 copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
1738 memcpy p (f `plusPtr` s) (fromIntegral l)
1739
1740 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1741 -- CString is going to be deallocated from C land.
1742 copyCString :: CString -> IO ByteString
1743 copyCString cstr = do
1744 len <- c_strlen cstr
1745 copyCStringLen (cstr, fromIntegral len)
1746
1747 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1748 copyCStringLen :: CStringLen -> IO ByteString
1749 copyCStringLen (cstr, len) = create len $ \p ->
1750 memcpy p (castPtr cstr) (fromIntegral len)
1751
1752 -- ---------------------------------------------------------------------
1753 -- line IO
1754
1755 -- | Read a line from stdin.
1756 getLine :: IO ByteString
1757 getLine = hGetLine stdin
1758
1759 {-
1760 -- | Lazily construct a list of lines of ByteStrings. This will be much
1761 -- better on memory consumption than using 'hGetContents >>= lines'
1762 -- If you're considering this, a better choice might be to use
1763 -- Data.ByteString.Lazy
1764 hGetLines :: Handle -> IO [ByteString]
1765 hGetLines h = go
1766 where
1767 go = unsafeInterleaveIO $ do
1768 e <- hIsEOF h
1769 if e
1770 then return []
1771 else do
1772 x <- hGetLine h
1773 xs <- go
1774 return (x:xs)
1775 -}
1776
1777 -- | Read a line from a handle
1778
1779 hGetLine :: Handle -> IO ByteString
1780 #if !defined(__GLASGOW_HASKELL__)
1781 hGetLine h = do
1782 string <- System.IO.hGetLine h
1783 return $ packWith c2w string
1784 #else
1785 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1786 case haBufferMode handle_ of
1787 NoBuffering -> error "no buffering"
1788 _other -> hGetLineBuffered handle_
1789
1790 where
1791 hGetLineBuffered handle_ = do
1792 let ref = haBuffer handle_
1793 buf <- readIORef ref
1794 hGetLineBufferedLoop handle_ ref buf 0 []
1795
1796 hGetLineBufferedLoop handle_ ref
1797 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1798 len `seq` do
1799 off <- findEOL r w raw
1800 let new_len = len + off - r
1801 xs <- mkPS raw r off
1802
1803 -- if eol == True, then off is the offset of the '\n'
1804 -- otherwise off == w and the buffer is now empty.
1805 if off /= w
1806 then do if (w == off + 1)
1807 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1808 else writeIORef ref buf{ bufRPtr = off + 1 }
1809 mkBigPS new_len (xs:xss)
1810 else do
1811 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1812 buf{ bufWPtr=0, bufRPtr=0 }
1813 case maybe_buf of
1814 -- Nothing indicates we caught an EOF, and we may have a
1815 -- partial line to return.
1816 Nothing -> do
1817 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1818 if new_len > 0
1819 then mkBigPS new_len (xs:xss)
1820 else ioe_EOF
1821 Just new_buf ->
1822 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1823
1824 -- find the end-of-line character, if there is one
1825 findEOL r w raw
1826 | r == w = return w
1827 | otherwise = do
1828 (c,r') <- readCharFromBuffer raw r
1829 if c == '\n'
1830 then return r -- NB. not r': don't include the '\n'
1831 else findEOL r' w raw
1832
1833 maybeFillReadBuffer fd is_line is_stream buf = catch
1834 (do buf' <- fillReadBuffer fd is_line is_stream buf
1835 return (Just buf'))
1836 (\e -> if isEOFError e then return Nothing else ioError e)
1837
1838 -- TODO, rewrite to use normal memcpy
1839 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1840 mkPS buf start end =
1841 let len = end - start
1842 in create len $ \p -> do
1843 memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1844 return ()
1845
1846 mkBigPS :: Int -> [ByteString] -> IO ByteString
1847 mkBigPS _ [ps] = return ps
1848 mkBigPS _ pss = return $! concat (P.reverse pss)
1849
1850 #endif
1851
1852 -- ---------------------------------------------------------------------
1853 -- Block IO
1854
1855 -- | Outputs a 'ByteString' to the specified 'Handle'.
1856 hPut :: Handle -> ByteString -> IO ()
1857 hPut _ (PS _ _ 0) = return ()
1858 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1859
1860 -- | A synonym for @hPut@, for compatibility
1861 hPutStr :: Handle -> ByteString -> IO ()
1862 hPutStr = hPut
1863
1864 -- | Write a ByteString to a handle, appending a newline byte
1865 hPutStrLn :: Handle -> ByteString -> IO ()
1866 hPutStrLn h ps
1867 | length ps < 1024 = hPut h (ps `snoc` 0x0a)
1868 | otherwise = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy
1869
1870 -- | Write a ByteString to stdout
1871 putStr :: ByteString -> IO ()
1872 putStr = hPut stdout
1873
1874 -- | Write a ByteString to stdout, appending a newline byte
1875 putStrLn :: ByteString -> IO ()
1876 putStrLn = hPutStrLn stdout
1877
1878 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1879 -- is far more efficient than reading the characters into a 'String'
1880 -- and then using 'pack'.
1881 hGet :: Handle -> Int -> IO ByteString
1882 hGet _ 0 = return empty
1883 hGet h i = createAndTrim i $ \p -> hGetBuf h p i
1884
1885 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1886 -- waiting for data to become available, instead it returns only whatever data
1887 -- is available.
1888 hGetNonBlocking :: Handle -> Int -> IO ByteString
1889 #if defined(__GLASGOW_HASKELL__)
1890 hGetNonBlocking _ 0 = return empty
1891 hGetNonBlocking h i = createAndTrim i $ \p -> hGetBufNonBlocking h p i
1892 #else
1893 hGetNonBlocking = hGet
1894 #endif
1895
1896 -- | Read entire handle contents into a 'ByteString'.
1897 -- This function reads chunks at a time, doubling the chunksize on each
1898 -- read. The final buffer is then realloced to the appropriate size. For
1899 -- files > half of available memory, this may lead to memory exhaustion.
1900 -- Consider using 'readFile' in this case.
1901 --
1902 -- As with 'hGet', the string representation in the file is assumed to
1903 -- be ISO-8859-1.
1904 --
1905 hGetContents :: Handle -> IO ByteString
1906 hGetContents h = do
1907 let start_size = 1024
1908 p <- mallocArray start_size
1909 i <- hGetBuf h p start_size
1910 if i < start_size
1911 then do p' <- reallocArray p i
1912 fp <- newForeignFreePtr p'
1913 return $! PS fp 0 i
1914 else f p start_size
1915 where
1916 f p s = do
1917 let s' = 2 * s
1918 p' <- reallocArray p s'
1919 i <- hGetBuf h (p' `plusPtr` s) s
1920 if i < s
1921 then do let i' = s + i
1922 p'' <- reallocArray p' i'
1923 fp <- newForeignFreePtr p''
1924 return $! PS fp 0 i'
1925 else f p' s'
1926
1927 -- | getContents. Equivalent to hGetContents stdin
1928 getContents :: IO ByteString
1929 getContents = hGetContents stdin
1930
1931 -- | The interact function takes a function of type @ByteString -> ByteString@
1932 -- as its argument. The entire input from the standard input device is passed
1933 -- to this function as its argument, and the resulting string is output on the
1934 -- standard output device. It's great for writing one line programs!
1935 interact :: (ByteString -> ByteString) -> IO ()
1936 interact transformer = putStr . transformer =<< getContents
1937
1938 -- | Read an entire file strictly into a 'ByteString'. This is far more
1939 -- efficient than reading the characters into a 'String' and then using
1940 -- 'pack'. It also may be more efficient than opening the file and
1941 -- reading it using hGet. Files are read using 'binary mode' on Windows,
1942 -- for 'text mode' use the Char8 version of this function.
1943 readFile :: FilePath -> IO ByteString
1944 readFile f = bracket (openBinaryFile f ReadMode) hClose
1945 (\h -> hFileSize h >>= hGet h . fromIntegral)
1946
1947 -- | Write a 'ByteString' to a file.
1948 writeFile :: FilePath -> ByteString -> IO ()
1949 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
1950 (\h -> hPut h txt)
1951
1952 -- | Append a 'ByteString' to a file.
1953 appendFile :: FilePath -> ByteString -> IO ()
1954 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
1955 (\h -> hPut h txt)
1956
1957 {-
1958 --
1959 -- Disable until we can move it into a portable .hsc file
1960 --
1961
1962 -- | Like readFile, this reads an entire file directly into a
1963 -- 'ByteString', but it is even more efficient. It involves directly
1964 -- mapping the file to memory. This has the advantage that the contents
1965 -- of the file never need to be copied. Also, under memory pressure the
1966 -- page may simply be discarded, while in the case of readFile it would
1967 -- need to be written to swap. If you read many small files, mmapFile
1968 -- will be less memory-efficient than readFile, since each mmapFile
1969 -- takes up a separate page of memory. Also, you can run into bus
1970 -- errors if the file is modified. As with 'readFile', the string
1971 -- representation in the file is assumed to be ISO-8859-1.
1972 --
1973 -- On systems without mmap, this is the same as a readFile.
1974 --
1975 mmapFile :: FilePath -> IO ByteString
1976 mmapFile f = mmap f >>= \(fp,l) -> return $! PS fp 0 l
1977
1978 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1979 mmap f = do
1980 h <- openBinaryFile f ReadMode
1981 l <- fromIntegral `fmap` hFileSize h
1982 -- Don't bother mmaping small files because each mmapped file takes up
1983 -- at least one full VM block.
1984 if l < mmap_limit
1985 then do thefp <- mallocByteString l
1986 withForeignPtr thefp $ \p-> hGetBuf h p l
1987 hClose h
1988 return (thefp, l)
1989 else do
1990 -- unix only :(
1991 fd <- fromIntegral `fmap` handleToFd h
1992 p <- my_mmap l fd
1993 fp <- if p == nullPtr
1994 then do thefp <- mallocByteString l
1995 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1996 return thefp
1997 else do
1998 -- The munmap leads to crashes on OpenBSD.
1999 -- maybe there's a use after unmap in there somewhere?
2000 -- Bulat suggests adding the hClose to the
2001 -- finalizer, excellent idea.
2002 #if !defined(__OpenBSD__)
2003 let unmap = c_munmap p l >> return ()
2004 #else
2005 let unmap = return ()
2006 #endif
2007 fp <- newForeignPtr p unmap
2008 return fp
2009 c_close fd
2010 hClose h
2011 return (fp, l)
2012 where mmap_limit = 16*1024
2013 -}
2014
2015 -- ---------------------------------------------------------------------
2016 -- Internal utilities
2017
2018 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
2019 -- of the string if no element is found, rather than Nothing.
2020 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
2021 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
2022 where
2023 STRICT2(go)
2024 go ptr n | n >= l = return l
2025 | otherwise = do w <- peek ptr
2026 if k w
2027 then return n
2028 else go (ptr `plusPtr` 1) (n+1)
2029 {-# INLINE findIndexOrEnd #-}
2030
2031 -- | Perform an operation with a temporary ByteString
2032 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
2033 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
2034 {-# INLINE withPtr #-}
2035
2036 -- Common up near identical calls to `error' to reduce the number
2037 -- constant strings created when compiled:
2038 errorEmptyList :: String -> a
2039 errorEmptyList fun = moduleError fun "empty ByteString"
2040 {-# NOINLINE errorEmptyList #-}
2041
2042 moduleError :: String -> String -> a
2043 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2044 {-# NOINLINE moduleError #-}
2045
2046 -- Find from the end of the string using predicate
2047 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2048 STRICT2(findFromEndUntil)
2049 findFromEndUntil f ps@(PS x s l) =
2050 if null ps then 0
2051 else if f (last ps) then l
2052 else findFromEndUntil f (PS x s (l-1))
2053
2054 {-# INLINE newForeignFreePtr #-}
2055 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2056 newForeignFreePtr p = newForeignPtr c_free_finalizer p