Add array fusion versions of map, filter and foldl
[packages/pretty.git] / Data / ByteString.hs
1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
2 --
3 -- Module : ByteString
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
9 --
10 -- Array fusion code:
11 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
12 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
13 --
14 -- License : BSD-style
15 --
16 -- Maintainer : dons@cse.unsw.edu.au
17 -- Stability : experimental
18 -- Portability : portable, requires ffi and cpp
19 -- Tested with : GHC 6.4.1 and Hugs March 2005
20 --
21
22 --
23 -- | A time and space-efficient implementation of byte vectors using
24 -- packed Word8 arrays, suitable for high performance use, both in terms
25 -- of large data quantities, or high speed requirements. Byte vectors
26 -- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr,
27 -- and can be passed between C and Haskell with little effort.
28 --
29 -- This module is intended to be imported @qualified@, to avoid name
30 -- clashes with Prelude functions. eg.
31 --
32 -- > import qualified Data.ByteString as B
33 --
34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
35 -- UArray by Simon Marlow. Rewritten to support slices and use
36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
37 --
38
39 module Data.ByteString (
40
41 -- * The @ByteString@ type
42 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
43
44 -- * Introducing and eliminating 'ByteString's
45 empty, -- :: ByteString
46 packByte, -- :: Word8 -> ByteString
47 pack, -- :: [Word8] -> ByteString
48 unpack, -- :: ByteString -> [Word8]
49 packWith, -- :: (a -> Word8) -> [a] -> ByteString
50 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
51
52 -- * Basic interface
53 cons, -- :: Word8 -> ByteString -> ByteString
54 snoc, -- :: Word8 -> ByteString -> ByteString
55 null, -- :: ByteString -> Bool
56 length, -- :: ByteString -> Int
57 head, -- :: ByteString -> Word8
58 tail, -- :: ByteString -> ByteString
59 last, -- :: ByteString -> Word8
60 init, -- :: ByteString -> ByteString
61 append, -- :: ByteString -> ByteString -> ByteString
62
63 -- * Special ByteStrings
64 inits, -- :: ByteString -> [ByteString]
65 tails, -- :: ByteString -> [ByteString]
66 elems, -- :: ByteString -> [ByteString]
67
68 -- * Transformating ByteStrings
69 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
70 reverse, -- :: ByteString -> ByteString
71 intersperse, -- :: Word8 -> ByteString -> ByteString
72 transpose, -- :: [ByteString] -> [ByteString]
73
74 -- * Reducing 'ByteString's
75 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
76 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
77 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
78 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
79
80 -- ** Special folds
81 concat, -- :: [ByteString] -> ByteString
82 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
83 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
84 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
85 maximum, -- :: ByteString -> Word8
86 minimum, -- :: ByteString -> Word8
87 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
88
89 -- * Generating and unfolding ByteStrings
90 replicate, -- :: Int -> Word8 -> ByteString
91 unfoldrN, -- :: (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
92
93 -- * Substrings
94
95 -- ** Breaking strings
96 take, -- :: Int -> ByteString -> ByteString
97 drop, -- :: Int -> ByteString -> ByteString
98 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
99 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
100 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
101 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
102 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
103 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
104
105 -- ** Breaking and dropping on specific bytes
106 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
107 spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
108 breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
109 breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
110
111 -- ** Breaking into many substrings
112 split, -- :: Word8 -> ByteString -> [ByteString]
113 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
114 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
115 group, -- :: ByteString -> [ByteString]
116 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
117
118 -- ** Joining strings
119 join, -- :: ByteString -> [ByteString] -> ByteString
120 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
121
122 -- * Indexing ByteStrings
123 index, -- :: ByteString -> Int -> Word8
124 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
125 elemIndices, -- :: Word8 -> ByteString -> [Int]
126 elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int
127 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
128 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
129 count, -- :: Word8 -> ByteString -> Int
130
131 -- * Ordered ByteStrings
132 sort, -- :: ByteString -> ByteString
133
134 -- * Searching ByteStrings
135
136 -- ** Searching by equality
137 -- | These functions use memchr(3) to efficiently search the ByteString
138
139 elem, -- :: Word8 -> ByteString -> Bool
140 notElem, -- :: Word8 -> ByteString -> Bool
141 filterByte, -- :: Word8 -> ByteString -> ByteString
142 filterNotByte, -- :: Word8 -> ByteString -> ByteString
143
144 -- ** Searching with a predicate
145 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
146 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
147
148 -- ** Prefixes and suffixes
149 -- | These functions use memcmp(3) to efficiently compare substrings
150 isPrefixOf, -- :: ByteString -> ByteString -> Bool
151 isSuffixOf, -- :: ByteString -> ByteString -> Bool
152
153 -- ** Search for arbitrary substrings
154 isSubstringOf, -- :: ByteString -> ByteString -> Bool
155 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
156 findSubstrings, -- :: ByteString -> ByteString -> [Int]
157
158 -- * Zipping and unzipping ByteStrings
159 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
160 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
161 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
162
163 -- * Unchecked access
164 unsafeHead, -- :: ByteString -> Word8
165 unsafeTail, -- :: ByteString -> ByteString
166 unsafeIndex, -- :: ByteString -> Int -> Word8
167
168 -- * Low level introduction and elimination
169 generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
170 create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
171 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
172 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
173 skipIndex, -- :: ByteString -> Int
174
175 -- ** Packing CStrings and pointers
176 packCString, -- :: CString -> ByteString
177 packCStringLen, -- :: CString -> ByteString
178 packMallocCString, -- :: CString -> ByteString
179
180 #if defined(__GLASGOW_HASKELL__)
181 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
182 packAddress, -- :: Addr# -> ByteString
183 unsafePackAddress, -- :: Int -> Addr# -> ByteString
184 unsafeFinalize, -- :: ByteString -> IO ()
185 #endif
186
187 -- ** Using ByteStrings as CStrings
188 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
189 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
190 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
191
192 -- ** Copying ByteStrings
193 -- | These functions perform memcpy(3) operations
194 copy, -- :: ByteString -> ByteString
195 copyCString, -- :: CString -> ByteString
196 copyCStringLen, -- :: CStringLen -> ByteString
197
198 -- * I\/O with @ByteString@s
199
200 -- ** Standard input and output
201
202 #if defined(__GLASGOW_HASKELL__)
203 getLine, -- :: IO ByteString
204 #endif
205 getContents, -- :: IO ByteString
206 putStr, -- :: ByteString -> IO ()
207 putStrLn, -- :: ByteString -> IO ()
208
209 -- ** Files
210 readFile, -- :: FilePath -> IO ByteString
211 writeFile, -- :: FilePath -> ByteString -> IO ()
212 -- mmapFile, -- :: FilePath -> IO ByteString
213
214 -- ** I\/O with Handles
215 #if defined(__GLASGOW_HASKELL__)
216 getArgs, -- :: IO [ByteString]
217 hGetLine, -- :: Handle -> IO ByteString
218 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
219 #endif
220 hGetContents, -- :: Handle -> IO ByteString
221 hGet, -- :: Handle -> Int -> IO ByteString
222 hPut, -- :: Handle -> ByteString -> IO ()
223
224 -- * Fusion utilities
225 #if defined(__GLASGOW_HASKELL__)
226 unpackList, -- eek, otherwise it gets thrown away by the simplifier
227 #endif
228
229 noAL, NoAL, loopArr, loopAcc, loopSndAcc,
230 loopU, mapEFL, filterEFL, foldEFL,
231 filterF, mapF
232
233 ) where
234
235 import qualified Prelude as P
236 import Prelude hiding (reverse,head,tail,last,init,null
237 ,length,map,lines,foldl,foldr,unlines
238 ,concat,any,take,drop,splitAt,takeWhile
239 ,dropWhile,span,break,elem,filter,maximum
240 ,minimum,all,concatMap,foldl1,foldr1
241 ,readFile,writeFile,replicate
242 ,getContents,getLine,putStr,putStrLn
243 ,zip,zipWith,unzip,notElem)
244
245 import qualified Data.List as List
246
247 import Data.Char
248 import Data.Word (Word8)
249 import Data.Maybe (listToMaybe)
250 import Data.Array (listArray)
251 import qualified Data.Array as Array ((!))
252
253 -- Control.Exception.bracket not available in yhc or nhc
254 import Control.Exception (bracket)
255 import Control.Monad (when)
256
257 import Foreign.C.String (CString, CStringLen)
258 import Foreign.C.Types (CSize, CInt)
259 import Foreign.ForeignPtr
260 import Foreign.Marshal.Array
261 import Foreign.Ptr
262 import Foreign.Storable (Storable(..))
263
264 -- hGetBuf and hPutBuf not available in yhc or nhc
265 import System.IO (stdin,stdout,hClose,hFileSize
266 ,hGetBuf,hPutBuf,openBinaryFile
267 ,Handle,IOMode(..))
268
269 #if !defined(__GLASGOW_HASKELL__)
270 import System.IO.Unsafe
271 #endif
272
273 #if defined(__GLASGOW_HASKELL__)
274
275 import Data.Generics (Data(..), Typeable(..))
276
277 import System.IO (hGetBufNonBlocking)
278 import System.IO.Error (isEOFError)
279
280 import Foreign.Marshal (alloca)
281 import qualified Foreign.Concurrent as FC (newForeignPtr)
282
283 import GHC.Handle
284 import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
285 import GHC.Base (build, unsafeChr)
286 import GHC.Word hiding (Word8)
287 import GHC.Ptr (Ptr(..))
288 import GHC.ST (ST(..))
289 import GHC.IOBase
290
291 #endif
292
293 -- CFILES stuff is Hugs only
294 {-# CFILES cbits/fpstring.c #-}
295
296 -- -----------------------------------------------------------------------------
297 --
298 -- Useful macros, until we have bang patterns
299 --
300
301 #define STRICT1(f) f a | a `seq` False = undefined
302 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
303 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
304 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
305 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
306
307 -- -----------------------------------------------------------------------------
308
309 -- | A space-efficient representation of a Word8 vector, supporting many
310 -- efficient operations. A 'ByteString' contains 8-bit characters only.
311 --
312 -- Instances of Eq, Ord, Read, Show, Data, Typeable
313 --
314 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
315 {-# UNPACK #-} !Int
316 {-# UNPACK #-} !Int
317
318 #if defined(__GLASGOW_HASKELL__)
319 deriving (Data, Typeable)
320 #endif
321
322 instance Eq ByteString
323 where (==) = eq
324
325 instance Ord ByteString
326 where compare = compareBytes
327
328 instance Show ByteString where
329 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
330
331 instance Read ByteString where
332 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
333
334 {-
335 instance Arbitrary PackedString where
336 arbitrary = P.pack `fmap` arbitrary
337 coarbitrary s = coarbitrary (P.unpack s)
338 -}
339
340 -- | /O(n)/ Equality on the 'ByteString' type.
341 eq :: ByteString -> ByteString -> Bool
342 eq a@(PS p s l) b@(PS p' s' l')
343 | l /= l' = False -- short cut on length
344 | p == p' && s == s' = True -- short cut for the same string
345 | otherwise = compareBytes a b == EQ
346 {-# INLINE eq #-}
347
348 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
349 compareBytes :: ByteString -> ByteString -> Ordering
350 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
351 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
352 | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
353 | otherwise = inlinePerformIO $
354 withForeignPtr x1 $ \p1 ->
355 withForeignPtr x2 $ \p2 -> do
356 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (min l1 l2)
357 return $ case i `compare` 0 of
358 EQ -> l1 `compare` l2
359 x -> x
360 {-# INLINE compareBytes #-}
361
362 {-
363 --
364 -- About 4x slower over 32M
365 --
366 compareBytes :: ByteString -> ByteString -> Ordering
367 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
368 withForeignPtr fp1 $ \p1 ->
369 withForeignPtr fp2 $ \p2 ->
370 cmp (p1 `plusPtr` off1)
371 (p2 `plusPtr` off2) 0 len1 len2
372
373 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
374 STRICT5(cmp)
375 cmp p1 p2 n len1 len2
376 | n == len1 = if n == len2 then return EQ else return LT
377 | n == len2 = return GT
378 | otherwise = do
379 (a :: Word8) <- peekByteOff p1 n
380 (b :: Word8) <- peekByteOff p2 n
381 case a `compare` b of
382 EQ -> cmp p1 p2 (n+1) len1 len2
383 LT -> return LT
384 GT -> return GT
385 {-# INLINE compareBytes #-}
386 -}
387
388 -- -----------------------------------------------------------------------------
389 -- Introducing and eliminating 'ByteString's
390
391 -- | /O(1)/ The empty 'ByteString'
392 empty :: ByteString
393 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
394 {-# NOINLINE empty #-}
395
396 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
397 packByte :: Word8 -> ByteString
398 packByte c = inlinePerformIO $ mallocByteString 2 >>= \fp -> do
399 withForeignPtr fp $ \p -> poke p c
400 return $ PS fp 0 1
401 {-# NOINLINE packByte #-}
402
403 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
404 --
405 -- For applications with large numbers of string literals, pack can be a
406 -- bottleneck. In such cases, consider using packAddress (GHC only).
407 pack :: [Word8] -> ByteString
408
409 #if !defined(__GLASGOW_HASKELL__)
410
411 pack str = create (P.length str) $ \p -> go p str
412 where
413 go _ [] = return ()
414 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
415
416 #else /* hack away */
417
418 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
419 where
420 go _ _ [] = return ()
421 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
422
423 writeByte p i c = ST $ \s# ->
424 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
425
426 #endif
427
428 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
429 unpack :: ByteString -> [Word8]
430
431 #if !defined(__GLASGOW_HASKELL__)
432
433 unpack (PS _ _ 0) = []
434 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
435 go (p `plusPtr` s) (l - 1) []
436 where
437 STRICT3(go)
438 go p 0 acc = peek p >>= \e -> return (e : acc)
439 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
440 {-# INLINE unpack #-}
441
442 #else
443
444 unpack ps = build (unpackFoldr ps)
445 {-# INLINE unpack #-}
446
447 unpackList :: ByteString -> [Word8]
448 unpackList (PS fp off len) = withPtr fp $ \p -> do
449 let STRICT3(loop)
450 loop _ (-1) acc = return acc
451 loop q n acc = do
452 a <- peekByteOff q n
453 loop q (n-1) (a : acc)
454 loop (p `plusPtr` off) (len-1) []
455
456 {-# RULES
457 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
458 #-}
459
460 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
461 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
462 let STRICT3(loop)
463 loop _ (-1) acc = return acc
464 loop q n acc = do
465 a <- peekByteOff q n
466 loop q (n-1) (a `f` acc)
467 loop (p `plusPtr` off) (len-1) ch
468 {-# INLINE [0] unpackFoldr #-}
469
470 #endif
471
472 ------------------------------------------------------------------------
473
474 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
475 -- conversion function
476 packWith :: (a -> Word8) -> [a] -> ByteString
477 packWith k str = create (P.length str) $ \p -> go p str
478 where
479 STRICT2(go)
480 go _ [] = return ()
481 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
482 {-# INLINE packWith #-}
483 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
484
485 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
486 unpackWith :: (Word8 -> a) -> ByteString -> [a]
487 unpackWith _ (PS _ _ 0) = []
488 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
489 go (p `plusPtr` s) (l - 1) []
490 where
491 STRICT3(go)
492 go p 0 acc = peek p >>= \e -> return (k e : acc)
493 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
494 {-# INLINE unpackWith #-}
495 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
496
497 -- ---------------------------------------------------------------------
498 -- Basic interface
499
500 -- | /O(1)/ Test whether a ByteString is empty.
501 null :: ByteString -> Bool
502 null (PS _ _ l) = l == 0
503 {-# INLINE null #-}
504
505 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
506 length :: ByteString -> Int
507 length (PS _ _ l) = l
508 {-# INLINE length #-}
509
510 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
511 -- complexity, as it requires a memcpy.
512 cons :: Word8 -> ByteString -> ByteString
513 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
514 memcpy (p `plusPtr` 1) (f `plusPtr` s) l
515 poke p c
516 {-# INLINE cons #-}
517
518 -- todo fuse
519
520 -- | /O(n)/ Append a byte to the end of a 'ByteString'
521 snoc :: ByteString -> Word8 -> ByteString
522 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
523 memcpy p (f `plusPtr` s) l
524 poke (p `plusPtr` l) c
525 {-# INLINE snoc #-}
526
527 -- todo fuse
528
529 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
530 head :: ByteString -> Word8
531 head ps@(PS x s _)
532 | null ps = errorEmptyList "head"
533 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
534 {-# INLINE head #-}
535
536 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
537 tail :: ByteString -> ByteString
538 tail (PS p s l)
539 | l <= 0 = errorEmptyList "tail"
540 | otherwise = PS p (s+1) (l-1)
541 {-# INLINE tail #-}
542
543 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
544 last :: ByteString -> Word8
545 last ps@(PS x s l)
546 | null ps = errorEmptyList "last"
547 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
548 {-# INLINE last #-}
549
550 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
551 init :: ByteString -> ByteString
552 init (PS p s l)
553 | l <= 0 = errorEmptyList "init"
554 | otherwise = PS p s (l-1)
555 {-# INLINE init #-}
556
557 -- | /O(n)/ Append two ByteStrings
558 append :: ByteString -> ByteString -> ByteString
559 append xs ys | null xs = ys
560 | null ys = xs
561 | otherwise = concat [xs,ys]
562 {-# INLINE append #-}
563
564 {-
565 --
566 -- About 30% faster, but allocating in a big chunk isn't good for memory use
567 --
568 append :: ByteString -> ByteString -> ByteString
569 append xs@(PS ffp s l) ys@(PS fgp t m)
570 | null xs = ys
571 | null ys = xs
572 | otherwise = create len $ \ptr ->
573 withForeignPtr ffp $ \fp ->
574 withForeignPtr fgp $ \gp -> do
575 memcpy ptr (fp `plusPtr` s) l
576 memcpy (ptr `plusPtr` l) (gp `plusPtr` t) m
577 where len = length xs + length ys
578 -}
579
580 -- ---------------------------------------------------------------------
581 -- Transformations
582
583 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
584 -- element of @xs@. This function is subject to array fusion.
585 map :: (Word8 -> Word8) -> ByteString -> ByteString
586 map f = loopArr . loopU (mapEFL f) noAL
587 {-# INLINE map #-}
588
589 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
590 -- slightly faster for one-shot cases.
591 mapF :: (Word8 -> Word8) -> ByteString -> ByteString
592 STRICT2(mapF)
593 mapF f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
594 np <- mallocByteString (len+1)
595 withForeignPtr np $ \p -> do
596 map_ 0 (a `plusPtr` s) p
597 return (PS np 0 len)
598 where
599 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
600 STRICT3(map_)
601 map_ n p1 p2
602 | n >= len = return ()
603 | otherwise = do
604 x <- peekByteOff p1 n
605 pokeByteOff p2 n (f x)
606 map_ (n+1) p1 p2
607 {-# INLINE mapF #-}
608
609 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
610 reverse :: ByteString -> ByteString
611 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
612 c_reverse p (f `plusPtr` s) l
613
614 {-
615 reverse = pack . P.reverse . unpack
616 -}
617
618 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
619 -- 'ByteString' and \`intersperses\' that byte between the elements of
620 -- the 'ByteString'. It is analogous to the intersperse function on
621 -- Lists.
622 intersperse :: Word8 -> ByteString -> ByteString
623 intersperse c ps@(PS x s l)
624 | length ps < 2 = ps
625 | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
626 c_intersperse p (f `plusPtr` s) l c
627
628 {-
629 intersperse c = pack . List.intersperse c . unpack
630 -}
631
632 -- | The 'transpose' function transposes the rows and columns of its
633 -- 'ByteString' argument.
634 transpose :: [ByteString] -> [ByteString]
635 transpose ps = P.map pack (List.transpose (P.map unpack ps))
636
637 -- ---------------------------------------------------------------------
638 -- Reducing 'ByteString's
639
640 -- | 'foldl', applied to a binary operator, a starting value (typically
641 -- the left-identity of the operator), and a ByteString, reduces the
642 -- ByteString using the binary operator, from left to right.
643 -- This function is subject to array fusion.
644 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
645 foldl f z = loopAcc . loopU (foldEFL f) z
646 {-# INLINE foldl #-}
647
648 {-
649 --
650 -- About twice as fast with 6.4.1, but not fuseable
651 -- A simple fold . map is enough to make it worth while.
652 --
653 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
654 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
655 where
656 STRICT3(lgo)
657 lgo z p q | p == q = return z
658 | otherwise = do c <- peek p
659 lgo (f z c) (p `plusPtr` 1) q
660 -}
661
662 -- | 'foldr', applied to a binary operator, a starting value
663 -- (typically the right-identity of the operator), and a ByteString,
664 -- reduces the ByteString using the binary operator, from right to left.
665 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
666 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
667 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
668 where
669 STRICT2(go)
670 go p q | p == q = return z
671 | otherwise = do c <- peek p
672 ws <- go (p `plusPtr` 1) q
673 return $ c `k` ws
674
675 -- | 'foldl1' is a variant of 'foldl' that has no starting value
676 -- argument, and thus must be applied to non-empty 'ByteStrings'.
677 -- This function is subject to array fusion.
678 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
679 foldl1 f ps
680 | null ps = errorEmptyList "foldl1"
681 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
682
683 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
684 -- and thus must be applied to non-empty 'ByteString's
685 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
686 foldr1 f ps
687 | null ps = errorEmptyList "foldr1"
688 | otherwise = foldr f (last ps) (init ps)
689
690 -- ---------------------------------------------------------------------
691 -- Special folds
692
693 -- | /O(n)/ Concatenate a list of ByteStrings.
694 concat :: [ByteString] -> ByteString
695 concat [] = empty
696 concat [ps] = ps
697 concat xs = inlinePerformIO $ do
698 let start_size = 1024
699 p <- mallocArray start_size
700 f p 0 1024 xs
701
702 where f ptr len _ [] = do
703 ptr' <- reallocArray ptr (len+1)
704 poke (ptr' `plusPtr` len) (0::Word8) -- XXX so CStrings work
705 fp <- newForeignFreePtr ptr'
706 return $ PS fp 0 len
707
708 f ptr len to_go pss@(PS p s l:pss')
709 | l <= to_go = do withForeignPtr p $ \pf ->
710 memcpy (ptr `plusPtr` len)
711 (pf `plusPtr` s) l
712 f ptr (len + l) (to_go - l) pss'
713
714 | otherwise = do let new_total = ((len + to_go) * 2) `max` (len + l)
715 ptr' <- reallocArray ptr new_total
716 f ptr' len (new_total - len) pss
717
718 -- | Map a function over a 'ByteString' and concatenate the results
719 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
720 concatMap f = foldr (append . f) empty
721
722 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
723 -- any element of the 'ByteString' satisfies the predicate.
724 any :: (Word8 -> Bool) -> ByteString -> Bool
725 any _ (PS _ _ 0) = False
726 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
727 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
728 where
729 STRICT2(go)
730 go p q | p == q = return False
731 | otherwise = do c <- peek p
732 if f c then return True
733 else go (p `plusPtr` 1) q
734
735 -- todo fuse
736
737 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
738 -- if all elements of the 'ByteString' satisfy the predicate.
739 all :: (Word8 -> Bool) -> ByteString -> Bool
740 all _ (PS _ _ 0) = True
741 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
742 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
743 where
744 STRICT2(go)
745 go p q | p == q = return True -- end of list
746 | otherwise = do c <- peek p
747 if f c
748 then go (p `plusPtr` 1) q
749 else return False
750 -- todo fuse
751
752 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
753 maximum :: ByteString -> Word8
754 maximum xs@(PS x s l)
755 | null xs = errorEmptyList "maximum"
756 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
757 return $ c_maximum (p `plusPtr` s) l
758 {-# INLINE maximum #-}
759
760 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
761 minimum :: ByteString -> Word8
762 minimum xs@(PS x s l)
763 | null xs = errorEmptyList "minimum"
764 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
765 return $ c_minimum (p `plusPtr` s) l
766 {-# INLINE minimum #-}
767
768 -- fusion is too slow here (10x)
769
770 {-
771 maximum xs@(PS x s l)
772 | null xs = errorEmptyList "maximum"
773 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
774 w <- peek p
775 maximum_ (p `plusPtr` s) 0 l w
776
777 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
778 STRICT4(maximum_)
779 maximum_ ptr n m c
780 | n >= m = return c
781 | otherwise = do w <- peekByteOff ptr n
782 maximum_ ptr (n+1) m (if w > c then w else c)
783
784 minimum xs@(PS x s l)
785 | null xs = errorEmptyList "minimum"
786 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
787 w <- peek p
788 minimum_ (p `plusPtr` s) 0 l w
789
790 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
791 STRICT4(minimum_)
792 minimum_ ptr n m c
793 | n >= m = return c
794 | otherwise = do w <- peekByteOff ptr n
795 minimum_ ptr (n+1) m (if w < c then w else c)
796 -}
797
798 -- | /O(n)/ map Word8 functions, provided with the index at each position
799 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
800 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
801 go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
802 where
803 go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
804 STRICT4(go)
805 go n f t p | f == p = return ()
806 | otherwise = do w <- peek f
807 ((poke t) . k n) w
808 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
809
810 -- ---------------------------------------------------------------------
811 -- Unfolds and replicates
812
813 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
814 -- the value of every element. The following holds:
815 --
816 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
817 --
818 -- This implemenation uses @memset(3)@
819 replicate :: Int -> Word8 -> ByteString
820 replicate w c = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
821
822 {-
823 -- About 5x slower
824 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
825 where
826 STRICT2(go)
827 go _ 0 = return w
828 go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
829 -}
830
831 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
832 -- 'unfoldrN' builds a ByteString from a seed value. The function takes
833 -- the element and returns 'Nothing' if it is done producing the
834 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
835 -- prepending to the ByteString and @b@ is used as the next element in a
836 -- recursive call.
837 --
838 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
839 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
840 -- final size of the ByteString as an argument. 'cons' can then be
841 -- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has
842 -- linear complexity. The depth of the recursion is limited to this
843 -- size, but may be less. For lazy, infinite unfoldr, use
844 -- 'Data.List.unfoldr' (from 'Data.List').
845 --
846 -- Examples:
847 --
848 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
849 --
850 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
851 --
852 -- > unfoldrN n == take n $ List.unfoldr
853 unfoldrN :: Int -> (Word8 -> Maybe (Word8, Word8)) -> Word8 -> ByteString
854 unfoldrN i f w = inlinePerformIO $ generate i $ \p -> go p w 0
855 where
856 STRICT3(go)
857 go q c n | n == i = return n -- stop if we reach `i'
858 | otherwise = case f c of
859 Nothing -> return n
860 Just (a,new_c) -> do
861 poke q a
862 go (q `plusPtr` 1) new_c (n+1)
863
864 -- ---------------------------------------------------------------------
865 -- Substrings
866
867 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
868 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
869 take :: Int -> ByteString -> ByteString
870 take n ps@(PS x s l)
871 | n < 0 = empty
872 | n >= l = ps
873 | otherwise = PS x s n
874 {-# INLINE take #-}
875
876 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
877 -- elements, or @[]@ if @n > 'length' xs@.
878 drop :: Int -> ByteString -> ByteString
879 drop n ps@(PS x s l)
880 | n <= 0 = ps
881 | n > l = empty
882 | otherwise = PS x (s+n) (l-n)
883 {-# INLINE drop #-}
884
885 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
886 splitAt :: Int -> ByteString -> (ByteString, ByteString)
887 splitAt n ps = (take n ps, drop n ps)
888 {-# INLINE splitAt #-}
889
890 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
891 -- returns the longest prefix (possibly empty) of @xs@ of elements that
892 -- satisfy @p@.
893 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
894 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
895 {-# INLINE takeWhile #-}
896
897 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
898 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
899 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
900 {-# INLINE dropWhile #-}
901
902 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
903 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
904 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
905 {-# INLINE break #-}
906
907 -- | 'breakByte' breaks its ByteString argument at the first occurence
908 -- of the specified byte. It is more efficient than 'break' as it is
909 -- implemented with @memchr(3)@. I.e.
910 --
911 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
912 --
913 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
914 breakByte c p = case elemIndex c p of
915 Nothing -> (p,empty)
916 Just n -> (take n p, drop n p)
917 {-# INLINE breakByte #-}
918
919 -- | 'spanByte' breaks its ByteString argument at the first
920 -- occurence of a byte other than its argument. It is more efficient
921 -- than 'span (==)'
922 --
923 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
924 --
925 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
926 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
927 go (p `plusPtr` s) 0
928 where
929 STRICT2(go)
930 go p i | i >= l = return (ps, empty)
931 | otherwise = do c' <- peekByteOff p i
932 if c /= c'
933 then return (take i ps, drop i ps)
934 else go p (i+1)
935 {-# INLINE spanByte #-}
936
937 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
938 -- occurence of @w@. It behaves like 'break', except the delimiter is
939 -- not returned, and @Nothing@ is returned if the delimiter is not in
940 -- the ByteString. I.e.
941 --
942 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
943 --
944 -- > breakFirst c xs ==
945 -- > let (x,y) = break (== c) xs
946 -- > in if null y then Nothing else Just (x, drop 1 y))
947 --
948 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
949 breakFirst c p = case elemIndex c p of
950 Nothing -> Nothing
951 Just n -> Just (take n p, drop (n+1) p)
952 {-# INLINE breakFirst #-}
953
954 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
955 -- ByteString.
956 --
957 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
958 --
959 -- and the following are equivalent:
960 --
961 -- > breakLast 'c' "abcdef"
962 -- > let (x,y) = break (=='c') (reverse "abcdef")
963 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
964 --
965 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
966 breakLast c p = case elemIndexLast c p of
967 Nothing -> Nothing
968 Just n -> Just (take n p, drop (n+1) p)
969 {-# INLINE breakLast #-}
970
971 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
972 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
973 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
974 span p ps = break (not . p) ps
975 {-# INLINE span #-}
976
977 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
978 -- We have
979 --
980 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
981 --
982 -- and
983 --
984 -- > spanEnd (not . isSpace) ps
985 -- > ==
986 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
987 --
988 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
989 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
990
991 -- | /O(n)/ Splits a 'ByteString' into components delimited by
992 -- separators, where the predicate returns True for a separator element.
993 -- The resulting components do not contain the separators. Two adjacent
994 -- separators result in an empty component in the output. eg.
995 --
996 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
997 -- > splitWith (=='a') [] == []
998 --
999 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1000
1001 #if defined(__GLASGOW_HASKELL__)
1002 splitWith _pred (PS _ _ 0) = []
1003 splitWith pred_ (PS fp off len) = splitWith' pred# off len fp
1004 where pred# c# = pred_ (W8# c#)
1005
1006 splitWith' pred' off' len' fp' = withPtr fp $ \p ->
1007 splitLoop pred' p 0 off' len' fp'
1008
1009 splitLoop :: (Word# -> Bool)
1010 -> Ptr Word8
1011 -> Int -> Int -> Int
1012 -> ForeignPtr Word8
1013 -> IO [ByteString]
1014
1015 splitLoop pred' p idx' off' len' fp'
1016 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1017 | idx' >= len' = return [PS fp' off' idx']
1018 | otherwise = do
1019 w <- peekElemOff p (off'+idx')
1020 if pred' (case w of W8# w# -> w#)
1021 then return (PS fp' off' idx' :
1022 splitWith' pred' (off'+idx'+1) (len'-idx'-1) fp')
1023 else splitLoop pred' p (idx'+1) off' len' fp'
1024 {-# INLINE splitWith #-}
1025
1026 #else
1027 splitWith _ (PS _ _ 0) = []
1028 splitWith p ps = splitWith' p ps
1029 where
1030 STRICT2(splitWith')
1031 splitWith' q qs = if null rest then [chunk]
1032 else chunk : splitWith' q (unsafeTail rest)
1033 where (chunk,rest) = break q qs
1034 #endif
1035
1036 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1037 -- argument, consuming the delimiter. I.e.
1038 --
1039 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1040 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
1041 -- > split 'x' "x" == ["",""]
1042 --
1043 -- and
1044 --
1045 -- > join [c] . split c == id
1046 -- > split == splitWith . (==)
1047 --
1048 -- As for all splitting functions in this library, this function does
1049 -- not copy the substrings, it just constructs new 'ByteStrings' that
1050 -- are slices of the original.
1051 --
1052 split :: Word8 -> ByteString -> [ByteString]
1053 split _ (PS _ _ 0) = []
1054 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1055 let ptr = p `plusPtr` s
1056
1057 STRICT1(loop)
1058 loop n = do
1059 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1060 if q == nullPtr
1061 then return [PS x (s+n) (l-n)]
1062 else do let i = q `minusPtr` ptr
1063 ls <- loop (i+1)
1064 return $! PS x (s+n) (i-n) : ls
1065 loop 0
1066 {-# INLINE split #-}
1067
1068 {-
1069 -- slower. but stays inside Haskell.
1070 split _ (PS _ _ 0) = []
1071 split (W8# w#) (PS fp off len) = splitWith' off len fp
1072 where
1073 splitWith' off' len' fp' = withPtr fp $ \p ->
1074 splitLoop p 0 off' len' fp'
1075
1076 splitLoop :: Ptr Word8
1077 -> Int -> Int -> Int
1078 -> ForeignPtr Word8
1079 -> IO [ByteString]
1080
1081 STRICT5(splitLoop)
1082 splitLoop p idx' off' len' fp'
1083 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1084 | idx' >= len' = return [PS fp' off' idx']
1085 | otherwise = do
1086 (W8# x#) <- peekElemOff p (off'+idx')
1087 if word2Int# w# ==# word2Int# x#
1088 then return (PS fp' off' idx' :
1089 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1090 else splitLoop p (idx'+1) off' len' fp'
1091 -}
1092
1093 -- | Like 'splitWith', except that sequences of adjacent separators are
1094 -- treated as a single separator. eg.
1095 --
1096 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1097 --
1098 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1099 tokens f = P.filter (not.null) . splitWith f
1100
1101 -- | The 'group' function takes a ByteString and returns a list of
1102 -- ByteStrings such that the concatenation of the result is equal to the
1103 -- argument. Moreover, each sublist in the result contains only equal
1104 -- elements. For example,
1105 --
1106 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1107 --
1108 -- It is a special case of 'groupBy', which allows the programmer to
1109 -- supply their own equality test. It is about 40% faster than
1110 -- /groupBy (==)/
1111 group :: ByteString -> [ByteString]
1112 group xs
1113 | null xs = []
1114 | otherwise = ys : group zs
1115 where
1116 (ys, zs) = spanByte (unsafeHead xs) xs
1117
1118 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1119 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1120 groupBy k xs
1121 | null xs = []
1122 | otherwise = take n xs : groupBy k (drop n xs)
1123 where
1124 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1125
1126 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1127 -- 'ByteString's and concatenates the list after interspersing the first
1128 -- argument between each element of the list.
1129 join :: ByteString -> [ByteString] -> ByteString
1130 join filler pss = concat (splice pss)
1131 where
1132 splice [] = []
1133 splice [x] = [x]
1134 splice (x:y:xs) = x:filler:splice (y:xs)
1135
1136 --
1137 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1138 -- with a char. Around 4 times faster than the generalised join.
1139 --
1140 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1141 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1142 withForeignPtr ffp $ \fp ->
1143 withForeignPtr fgp $ \gp -> do
1144 memcpy ptr (fp `plusPtr` s) l
1145 poke (ptr `plusPtr` l) c
1146 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) m
1147 where
1148 len = length f + length g + 1
1149 {-# INLINE joinWithByte #-}
1150
1151 -- ---------------------------------------------------------------------
1152 -- Indexing ByteStrings
1153
1154 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1155 index :: ByteString -> Int -> Word8
1156 index ps n
1157 | n < 0 = error $ "ByteString.indexWord8: negative index: " ++ show n
1158 | n >= length ps = error $ "ByteString.indexWord8: index too large: " ++ show n
1159 ++ ", length = " ++ show (length ps)
1160 | otherwise = ps `unsafeIndex` n
1161 {-# INLINE index #-}
1162
1163 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1164 -- element in the given 'ByteString' which is equal to the query
1165 -- element, or 'Nothing' if there is no such element.
1166 -- This implementation uses memchr(3).
1167 elemIndex :: Word8 -> ByteString -> Maybe Int
1168 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1169 let p' = p `plusPtr` s
1170 q = memchr p' c (fromIntegral l)
1171 return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1172 {-# INLINE elemIndex #-}
1173
1174 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1175 -- element in the given 'ByteString' which is equal to the query
1176 -- element, or 'Nothing' if there is no such element. The following
1177 -- holds:
1178 --
1179 -- > elemIndexLast c xs ==
1180 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1181 --
1182 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1183 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1184 go (p `plusPtr` s) (l-1)
1185 where
1186 STRICT2(go)
1187 go p i | i < 0 = return Nothing
1188 | otherwise = do ch' <- peekByteOff p i
1189 if ch == ch'
1190 then return $ Just i
1191 else go p (i-1)
1192 {-# INLINE elemIndexLast #-}
1193
1194 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1195 -- the indices of all elements equal to the query element, in ascending order.
1196 -- This implementation uses memchr(3).
1197 elemIndices :: Word8 -> ByteString -> [Int]
1198 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1199 let ptr = p `plusPtr` s
1200
1201 STRICT1(loop)
1202 loop n = do
1203 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1204 if q == nullPtr
1205 then return []
1206 else do let i = q `minusPtr` ptr
1207 ls <- loop (i+1)
1208 return $! i:ls
1209 loop 0
1210
1211 {-
1212 -- much slower
1213 elemIndices :: Word8 -> ByteString -> [Int]
1214 elemIndices c ps = loop 0 ps
1215 where STRICT2(loop)
1216 loop _ ps' | null ps' = []
1217 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1218 | otherwise = loop (n+1) (unsafeTail ps')
1219 -}
1220
1221 -- | count returns the number of times its argument appears in the ByteString
1222 --
1223 -- > count = length . elemIndices
1224 --
1225 -- But more efficiently than using length on the intermediate list.
1226 count :: Word8 -> ByteString -> Int
1227 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1228 return $ c_count (p `plusPtr` s) (fromIntegral m) w
1229 {-# INLINE count #-}
1230
1231 {-
1232 --
1233 -- around 30% slower
1234 --
1235 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1236 go (p `plusPtr` s) (fromIntegral m) 0
1237 where
1238 go :: Ptr Word8 -> CSize -> Int -> IO Int
1239 STRICT3(go)
1240 go p l i = do
1241 let q = memchr p w l
1242 if q == nullPtr
1243 then return i
1244 else do let k = fromIntegral $ q `minusPtr` p
1245 go (q `plusPtr` 1) (l-k-1) (i+1)
1246 -}
1247
1248 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1249 -- returns the index of the first element in the ByteString
1250 -- satisfying the predicate.
1251 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1252 findIndex = (listToMaybe .) . findIndices
1253
1254 -- | The 'findIndices' function extends 'findIndex', by returning the
1255 -- indices of all elements satisfying the predicate, in ascending order.
1256 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1257 findIndices p ps = loop 0 ps
1258 where
1259 STRICT2(loop)
1260 loop _ qs | null qs = []
1261 loop n qs | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1262 | otherwise = loop (n+1) (unsafeTail qs)
1263
1264 -- ---------------------------------------------------------------------
1265 -- Searching ByteStrings
1266
1267 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1268 elem :: Word8 -> ByteString -> Bool
1269 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1270 {-# INLINE elem #-}
1271
1272 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1273 notElem :: Word8 -> ByteString -> Bool
1274 notElem c ps = not (elem c ps)
1275 {-# INLINE notElem #-}
1276
1277 --
1278 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1279 -- case of filtering a single byte. It is more efficient to use
1280 -- /filterByte/ in this case.
1281 --
1282 -- > filterByte == filter . (==)
1283 --
1284 -- filterByte is around 10x faster, and uses much less space, than its
1285 -- filter equivalent
1286 filterByte :: Word8 -> ByteString -> ByteString
1287 filterByte w ps = replicate (count w ps) w
1288
1289 --
1290 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1291 -- case of filtering a single byte out of a list. It is more efficient
1292 -- to use /filterNotByte/ in this case.
1293 --
1294 -- > filterNotByte == filter . (/=)
1295 --
1296 -- filterNotByte is around 3x faster, and uses much less space, than its
1297 -- filter equivalent
1298 filterNotByte :: Word8 -> ByteString -> ByteString
1299 filterNotByte ch ps@(PS x s l)
1300 | null ps = ps
1301 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1302 t <- go (f `plusPtr` s) p l
1303 return (t `minusPtr` p) -- actual length
1304 where
1305 STRICT3(go)
1306 go _ t 0 = return t
1307 go f t e = do w <- peek f
1308 if w /= ch
1309 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e-1)
1310 else go (f `plusPtr` 1) t (e-1)
1311
1312 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1313 -- returns a ByteString containing those characters that satisfy the
1314 -- predicate. This function is subject to array fusion.
1315 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1316 filter p = loopArr . loopU (filterEFL p) noAL
1317 {-# INLINE filter #-}
1318
1319 filterF :: (Word8 -> Bool) -> ByteString -> ByteString
1320 filterF k ps@(PS x s l)
1321 | null ps = ps
1322 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1323 t <- go (f `plusPtr` s) p l
1324 return (t `minusPtr` p) -- actual length
1325 where
1326 STRICT3(go)
1327 go _ t 0 = return t
1328 go f t e = do w <- peek f
1329 if k w
1330 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) (e - 1)
1331 else go (f `plusPtr` 1) t (e - 1)
1332 {-# INLINE filterF #-}
1333
1334 -- Almost as good: pack $ foldl (\xs c -> if f c then c : xs else xs) [] ps
1335
1336 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1337 -- and returns the first element in matching the predicate, or 'Nothing'
1338 -- if there is no such element.
1339 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1340 find p ps = case filter p ps of
1341 q | null q -> Nothing
1342 | otherwise -> Just (unsafeHead q)
1343
1344 -- ---------------------------------------------------------------------
1345 -- Searching for substrings
1346
1347 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1348 -- iff the first is a prefix of the second.
1349 isPrefixOf :: ByteString -> ByteString -> Bool
1350 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1351 | l1 == 0 = True
1352 | l2 < l1 = False
1353 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1354 withForeignPtr x2 $ \p2 -> do
1355 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) l1
1356 return (i == 0)
1357
1358 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1359 -- iff the first is a suffix of the second.
1360 --
1361 -- The following holds:
1362 --
1363 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1364 --
1365 -- However, the real implemenation uses memcmp to compare the end of the
1366 -- string only, with no reverse required..
1367 isSuffixOf :: ByteString -> ByteString -> Bool
1368 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1369 | l1 == 0 = True
1370 | l2 < l1 = False
1371 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1372 withForeignPtr x2 $ \p2 -> do
1373 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) l1
1374 return (i == 0)
1375
1376 -- | Check whether one string is a substring of another. @isSubstringOf
1377 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1378 isSubstringOf :: ByteString -- ^ String to search for.
1379 -> ByteString -- ^ String to search in.
1380 -> Bool
1381 isSubstringOf p s = not $ P.null $ findSubstrings p s
1382
1383 -- | Get the first index of a substring in another string,
1384 -- or 'Nothing' if the string is not found.
1385 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1386 findSubstring :: ByteString -- ^ String to search for.
1387 -> ByteString -- ^ String to seach in.
1388 -> Maybe Int
1389 findSubstring = (listToMaybe .) . findSubstrings
1390
1391 -- | Find the indexes of all (possibly overlapping) occurances of a
1392 -- substring in a string. This function uses the Knuth-Morris-Pratt
1393 -- string matching algorithm.
1394 findSubstrings :: ByteString -- ^ String to search for.
1395 -> ByteString -- ^ String to seach in.
1396 -> [Int]
1397
1398 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1399 where
1400 patc x = pat `unsafeIndex` x
1401 strc x = str `unsafeIndex` x
1402
1403 -- maybe we should make kmpNext a UArray before using it in search?
1404 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1405 kmpNextL p _ | null p = []
1406 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1407 ps = unsafeTail p
1408 x = if not (null ps) && unsafeHead ps == patc j'
1409 then kmpNext Array.! j' else j'
1410 in x:kmpNextL ps j'
1411 search i j = match ++ rest -- i: position in string, j: position in pattern
1412 where match = if j == m then [(i - j)] else []
1413 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1414 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1415 | otherwise = j
1416
1417 -- ---------------------------------------------------------------------
1418 -- Zipping
1419
1420 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1421 -- corresponding pairs of bytes. If one input ByteString is short,
1422 -- excess elements of the longer ByteString are discarded. This is
1423 -- equivalent to a pair of 'unpack' operations.
1424 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1425 zip ps qs
1426 | null ps || null qs = []
1427 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1428
1429 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1430 -- the first argument, instead of a tupling function. For example,
1431 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1432 -- corresponding sums.
1433 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1434 zipWith f ps qs
1435 | null ps || null qs = []
1436 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1437
1438 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1439 -- ByteStrings. Note that this performs two 'pack' operations.
1440 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1441 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1442 {-# INLINE unzip #-}
1443
1444 -- ---------------------------------------------------------------------
1445 -- Special lists
1446
1447 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1448 inits :: ByteString -> [ByteString]
1449 inits (PS x s l) = [PS x s n | n <- [0..l]]
1450
1451 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1452 tails :: ByteString -> [ByteString]
1453 tails p | null p = [empty]
1454 | otherwise = p : tails (unsafeTail p)
1455
1456 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1457
1458 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1459 elems :: ByteString -> [ByteString]
1460 elems (PS _ _ 0) = []
1461 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1462 {-# INLINE elems #-}
1463
1464 -- ---------------------------------------------------------------------
1465 -- ** Ordered 'ByteString's
1466
1467 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1468 sort :: ByteString -> ByteString
1469 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1470
1471 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1472 withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1473
1474 let STRICT2(go)
1475 go 256 _ = return ()
1476 go i ptr = do n <- peekElemOff arr i
1477 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1478 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1479 go 0 p
1480
1481 -- "countEach counts str l" counts the number of occurences of each Word8 in
1482 -- str, and stores the result in counts.
1483 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1484 STRICT3(countEach)
1485 countEach counts str l = go 0
1486 where
1487 STRICT1(go)
1488 go i | i == l = return ()
1489 | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1490 x <- peekElemOff counts k
1491 pokeElemOff counts k (x + 1)
1492 go (i + 1)
1493
1494 {-
1495 sort :: ByteString -> ByteString
1496 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1497 memcpy p (f `plusPtr` s) l
1498 c_qsort p l -- inplace
1499 -}
1500
1501 {-
1502 sort = pack . List.sort . unpack
1503 -}
1504
1505 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1506 --
1507 -- Try some linear sorts: radix, counting
1508 -- Or mergesort.
1509 --
1510 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1511 -- sortBy f ps = undefined
1512
1513 -- ---------------------------------------------------------------------
1514 --
1515 -- Extensions to the basic interface
1516 --
1517
1518 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1519 -- check for the empty case, so there is an obligation on the programmer
1520 -- to provide a proof that the ByteString is non-empty.
1521 unsafeHead :: ByteString -> Word8
1522 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1523 {-# INLINE unsafeHead #-}
1524
1525 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1526 -- check for the empty case. As with 'unsafeHead', the programmer must
1527 -- provide a separate proof that the ByteString is non-empty.
1528 unsafeTail :: ByteString -> ByteString
1529 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1530 {-# INLINE unsafeTail #-}
1531
1532 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1533 -- This omits the bounds check, which means there is an accompanying
1534 -- obligation on the programmer to ensure the bounds are checked in some
1535 -- other way.
1536 unsafeIndex :: ByteString -> Int -> Word8
1537 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1538 {-# INLINE unsafeIndex #-}
1539
1540 -- ---------------------------------------------------------------------
1541 -- Low level constructors
1542
1543 #if defined(__GLASGOW_HASKELL__)
1544 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1545 -- Addr\# (an arbitrary machine address assumed to point outside the
1546 -- garbage-collected heap) into a @ByteString@. A much faster way to
1547 -- create an Addr\# is with an unboxed string literal, than to pack a
1548 -- boxed string. A unboxed string literal is compiled to a static @char
1549 -- []@ by GHC. Establishing the length of the string requires a call to
1550 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1551 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1552 -- if you know the length of the string statically.
1553 --
1554 -- An example:
1555 --
1556 -- > literalFS = packAddress "literal"#
1557 --
1558 packAddress :: Addr# -> ByteString
1559 packAddress addr# = inlinePerformIO $ do
1560 p <- newForeignPtr_ cstr
1561 return $ PS p 0 (fromIntegral $ c_strlen cstr)
1562 where
1563 cstr = Ptr addr#
1564 {-# INLINE packAddress #-}
1565
1566 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1567 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1568 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1569 -- 'Addr\#' to the string, and the length of the string. Make sure the
1570 -- length is correct, otherwise use the safer 'packAddress' (where the
1571 -- length will be calculated once at runtime).
1572 unsafePackAddress :: Int -> Addr# -> ByteString
1573 unsafePackAddress len addr# = inlinePerformIO $ do
1574 p <- newForeignPtr_ cstr
1575 return $ PS p 0 len
1576 where cstr = Ptr addr#
1577
1578 #endif
1579
1580 -- | /O(1)/ Build a ByteString from a ForeignPtr
1581 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1582 fromForeignPtr fp l = PS fp 0 l
1583
1584 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1585 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1586 toForeignPtr (PS ps s l) = (ps, s, l)
1587
1588 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1589 -- current 'ByteString' from any larger string it was created from, as
1590 -- an 'Int'.
1591 skipIndex :: ByteString -> Int
1592 skipIndex (PS _ s _) = s
1593 {-# INLINE skipIndex #-}
1594
1595 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1596 -- finalizer associated to it. The ByteString length is calculated using
1597 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1598 packCString :: CString -> ByteString
1599 packCString cstr = inlinePerformIO $ do
1600 fp <- newForeignPtr_ (castPtr cstr)
1601 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1602
1603 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1604 -- have /no/ finalizer associated with it. This operation has /O(1)/
1605 -- complexity as we already know the final size, so no /strlen(3)/ is
1606 -- required.
1607 packCStringLen :: CStringLen -> ByteString
1608 packCStringLen (ptr,len) = inlinePerformIO $ do
1609 fp <- newForeignPtr_ (castPtr ptr)
1610 return $ PS fp 0 (fromIntegral len)
1611
1612 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1613 -- have a @free(3)@ finalizer associated to it.
1614 packMallocCString :: CString -> ByteString
1615 packMallocCString cstr = inlinePerformIO $ do
1616 fp <- newForeignFreePtr (castPtr cstr)
1617 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1618
1619 #if defined(__GLASGOW_HASKELL__)
1620 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1621 -- length, and an IO action representing a finalizer. This function is
1622 -- not available on Hugs.
1623 --
1624 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1625 packCStringFinalizer p l f = do
1626 fp <- FC.newForeignPtr p f
1627 return $ PS fp 0 l
1628
1629 -- | Explicitly run the finaliser associated with a 'ByteString'.
1630 -- Further references to this value may generate invalid memory
1631 -- references. This operation is unsafe, as there may be other
1632 -- 'ByteStrings' referring to the same underlying pages. If you use
1633 -- this, you need to have a proof of some kind that all 'ByteString's
1634 -- ever generated from the underlying byte array are no longer live.
1635 unsafeFinalize :: ByteString -> IO ()
1636 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1637
1638 #endif
1639
1640 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1641 -- The @CString@ should not be freed afterwards. This is a memcpy(3).
1642 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1643 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1644 where
1645 alloc = withForeignPtr ps $ \p -> do
1646 buf <- c_malloc (fromIntegral l+1)
1647 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1648 poke (buf `plusPtr` l) (0::Word8)
1649 return $ castPtr buf
1650
1651 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1652 -- Warning: modifying the @CString@ will affect the @ByteString@.
1653 -- Why is this function unsafe? It relies on the null byte at the end of
1654 -- the ByteString to be there. This is /not/ the case if your ByteString
1655 -- has been spliced from a larger string (i.e. with take or drop).
1656 -- Unless you can guarantee the null byte, you should use the safe
1657 -- version, which will copy the string first.
1658 --
1659 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1660 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1661
1662 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1663 -- This is mainly useful to allow the rest of the data pointed
1664 -- to by the 'ByteString' to be garbage collected, for example
1665 -- if a large string has been read in, and only a small part of it
1666 -- is needed in the rest of the program.
1667 copy :: ByteString -> ByteString
1668 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> memcpy p (f `plusPtr` s) l
1669
1670 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1671 -- CString is going to be deallocated from C land.
1672 copyCString :: CString -> ByteString
1673 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1674
1675 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1676 copyCStringLen :: CStringLen -> ByteString
1677 copyCStringLen (cstr, len) = inlinePerformIO $ do
1678 fp <- mallocForeignPtrArray (len+1)
1679 withForeignPtr fp $ \p -> do
1680 memcpy p (castPtr cstr) len
1681 poke (p `plusPtr` len) (0 :: Word8)
1682 return $! PS fp 0 len
1683
1684 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1685 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1686 -- This is analogous to unsafeUseAsCString, and comes with the same
1687 -- safety requirements.
1688 --
1689 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1690 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1691
1692 -- | Given the maximum size needed and a function to make the contents
1693 -- of a ByteString, generate makes the 'ByteString'. The generating
1694 -- function is required to return the actual final size (<= the maximum
1695 -- size), and the resulting byte array is realloced to this size. The
1696 -- string is padded at the end with a null byte.
1697 --
1698 -- generate is the main mechanism for creating custom, efficient
1699 -- ByteString functions, using Haskell or C functions to fill the space.
1700 --
1701 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1702 generate i f = do
1703 p <- mallocArray (i+1)
1704 i' <- f p
1705 p' <- reallocArray p (i'+1)
1706 poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
1707 fp <- newForeignFreePtr p'
1708 return $ PS fp 0 i'
1709
1710 -- ---------------------------------------------------------------------
1711 -- line IO
1712
1713 #if defined(__GLASGOW_HASKELL__)
1714
1715 -- | getLine, read a line from stdin.
1716 getLine :: IO ByteString
1717 getLine = hGetLine stdin
1718
1719 -- | hGetLine. read a ByteString from a handle
1720 hGetLine :: Handle -> IO ByteString
1721 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1722 case haBufferMode handle_ of
1723 NoBuffering -> error "no buffering"
1724 _other -> hGetLineBuffered handle_
1725
1726 where
1727 hGetLineBuffered handle_ = do
1728 let ref = haBuffer handle_
1729 buf <- readIORef ref
1730 hGetLineBufferedLoop handle_ ref buf 0 []
1731
1732 hGetLineBufferedLoop handle_ ref
1733 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1734 len `seq` do
1735 off <- findEOL r w raw
1736 let new_len = len + off - r
1737 xs <- mkPS raw r off
1738
1739 -- if eol == True, then off is the offset of the '\n'
1740 -- otherwise off == w and the buffer is now empty.
1741 if off /= w
1742 then do if (w == off + 1)
1743 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1744 else writeIORef ref buf{ bufRPtr = off + 1 }
1745 mkBigPS new_len (xs:xss)
1746 else do
1747 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1748 buf{ bufWPtr=0, bufRPtr=0 }
1749 case maybe_buf of
1750 -- Nothing indicates we caught an EOF, and we may have a
1751 -- partial line to return.
1752 Nothing -> do
1753 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1754 if new_len > 0
1755 then mkBigPS new_len (xs:xss)
1756 else ioe_EOF
1757 Just new_buf ->
1758 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1759
1760 -- find the end-of-line character, if there is one
1761 findEOL r w raw
1762 | r == w = return w
1763 | otherwise = do
1764 (c,r') <- readCharFromBuffer raw r
1765 if c == '\n'
1766 then return r -- NB. not r': don't include the '\n'
1767 else findEOL r' w raw
1768
1769 maybeFillReadBuffer fd is_line is_stream buf = catch
1770 (do buf' <- fillReadBuffer fd is_line is_stream buf
1771 return (Just buf'))
1772 (\e -> if isEOFError e then return Nothing else ioError e)
1773
1774 -- TODO, rewrite to use normal memcpy
1775 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1776 mkPS buf start end = do
1777 let len = end - start
1778 fp <- mallocByteString len
1779 withForeignPtr fp $ \p -> do
1780 memcpy_ptr_baoff p buf start (fromIntegral len)
1781 return (PS fp 0 len)
1782
1783 mkBigPS :: Int -> [ByteString] -> IO ByteString
1784 mkBigPS _ [ps] = return ps
1785 mkBigPS _ pss = return $! concat (P.reverse pss)
1786
1787 #endif
1788
1789 -- ---------------------------------------------------------------------
1790 -- Block IO
1791
1792 -- | Outputs a 'ByteString' to the specified 'Handle'.
1793 hPut :: Handle -> ByteString -> IO ()
1794 hPut _ (PS _ _ 0) = return ()
1795 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1796 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1797
1798 -- | Write a ByteString to stdout
1799 putStr :: ByteString -> IO ()
1800 putStr = hPut stdout
1801
1802 -- | Write a ByteString to stdout, appending a newline byte
1803 putStrLn :: ByteString -> IO ()
1804 putStrLn ps = hPut stdout ps >> hPut stdout nl
1805 where nl = packByte 0x0a
1806
1807 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1808 -- is far more efficient than reading the characters into a 'String'
1809 -- and then using 'pack'.
1810 hGet :: Handle -> Int -> IO ByteString
1811 hGet _ 0 = return empty
1812 hGet h i = do fp <- mallocByteString i
1813 l <- withForeignPtr fp $ \p-> hGetBuf h p i
1814 return $ PS fp 0 l
1815
1816 #if defined(__GLASGOW_HASKELL__)
1817 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1818 -- waiting for data to become available, instead it returns only whatever data
1819 -- is available.
1820 hGetNonBlocking :: Handle -> Int -> IO ByteString
1821 hGetNonBlocking _ 0 = return empty
1822 hGetNonBlocking h i = do
1823 fp <- mallocByteString i
1824 l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1825 return $ PS fp 0 l
1826 #endif
1827
1828 -- | Read entire handle contents into a 'ByteString'.
1829 --
1830 -- As with 'hGet', the string representation in the file is assumed to
1831 -- be ISO-8859-1.
1832 --
1833 hGetContents :: Handle -> IO ByteString
1834 hGetContents h = do
1835 let start_size = 1024
1836 p <- mallocArray start_size
1837 i <- hGetBuf h p start_size
1838 if i < start_size
1839 then do p' <- reallocArray p i
1840 fp <- newForeignFreePtr p'
1841 return $ PS fp 0 i
1842 else f p start_size
1843 where
1844 f p s = do
1845 let s' = 2 * s
1846 p' <- reallocArray p s'
1847 i <- hGetBuf h (p' `plusPtr` s) s
1848 if i < s
1849 then do let i' = s + i
1850 p'' <- reallocArray p' i'
1851 fp <- newForeignFreePtr p''
1852 return $ PS fp 0 i'
1853 else f p' s'
1854
1855 -- | getContents. Equivalent to hGetContents stdin
1856 getContents :: IO ByteString
1857 getContents = hGetContents stdin
1858
1859 -- | Read an entire file directly into a 'ByteString'. This is far more
1860 -- efficient than reading the characters into a 'String' and then using
1861 -- 'pack'. It also may be more efficient than opening the file and
1862 -- reading it using hGet.
1863 readFile :: FilePath -> IO ByteString
1864 readFile f = do
1865 h <- openBinaryFile f ReadMode
1866 l <- hFileSize h
1867 s <- hGet h $ fromIntegral l
1868 hClose h
1869 return s
1870
1871 -- | Write a 'ByteString' to a file.
1872 writeFile :: FilePath -> ByteString -> IO ()
1873 writeFile f ps = do
1874 h <- openBinaryFile f WriteMode
1875 hPut h ps
1876 hClose h
1877
1878 {-
1879 --
1880 -- Disable until we can move it into a portable .hsc file
1881 --
1882
1883 -- | Like readFile, this reads an entire file directly into a
1884 -- 'ByteString', but it is even more efficient. It involves directly
1885 -- mapping the file to memory. This has the advantage that the contents
1886 -- of the file never need to be copied. Also, under memory pressure the
1887 -- page may simply be discarded, while in the case of readFile it would
1888 -- need to be written to swap. If you read many small files, mmapFile
1889 -- will be less memory-efficient than readFile, since each mmapFile
1890 -- takes up a separate page of memory. Also, you can run into bus
1891 -- errors if the file is modified. As with 'readFile', the string
1892 -- representation in the file is assumed to be ISO-8859-1.
1893 --
1894 -- On systems without mmap, this is the same as a readFile.
1895 --
1896 mmapFile :: FilePath -> IO ByteString
1897 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1898
1899 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1900 mmap f = do
1901 h <- openBinaryFile f ReadMode
1902 l <- fromIntegral `fmap` hFileSize h
1903 -- Don't bother mmaping small files because each mmapped file takes up
1904 -- at least one full VM block.
1905 if l < mmap_limit
1906 then do thefp <- mallocByteString l
1907 withForeignPtr thefp $ \p-> hGetBuf h p l
1908 hClose h
1909 return (thefp, l)
1910 else do
1911 -- unix only :(
1912 fd <- fromIntegral `fmap` handleToFd h
1913 p <- my_mmap l fd
1914 fp <- if p == nullPtr
1915 then do thefp <- mallocByteString l
1916 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1917 return thefp
1918 else do
1919 -- The munmap leads to crashes on OpenBSD.
1920 -- maybe there's a use after unmap in there somewhere?
1921 #if !defined(__OpenBSD__)
1922 let unmap = c_munmap p l >> return ()
1923 #else
1924 let unmap = return ()
1925 #endif
1926 fp <- FC.newForeignPtr p unmap
1927 return fp
1928 c_close fd
1929 hClose h
1930 return (fp, l)
1931 where mmap_limit = 16*1024
1932 -}
1933
1934 #if defined(__GLASGOW_HASKELL__)
1935 --
1936 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1937 --
1938 getArgs :: IO [ByteString]
1939 getArgs =
1940 alloca $ \ p_argc ->
1941 alloca $ \ p_argv -> do
1942 getProgArgv p_argc p_argv
1943 p <- fromIntegral `fmap` peek p_argc
1944 argv <- peek p_argv
1945 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1946 #endif
1947
1948 -- ---------------------------------------------------------------------
1949 -- Internal utilities
1950
1951 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1952 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1953 -- convenience for ByteString construction.
1954 w2c :: Word8 -> Char
1955 #if !defined(__GLASGOW_HASKELL__)
1956 w2c = chr . fromIntegral
1957 #else
1958 w2c = unsafeChr . fromIntegral
1959 #endif
1960 {-# INLINE w2c #-}
1961
1962 c2w :: Char -> Word8
1963 c2w = fromIntegral . ord
1964 {-# INLINE c2w #-}
1965
1966 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
1967 -- is padded with a null byte.
1968 mallocByteString :: Int -> IO (ForeignPtr Word8)
1969 mallocByteString l = do
1970 fp <- mallocForeignPtrArray (l+1)
1971 withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
1972 return fp
1973
1974 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
1975 -- argument gives the final size of the ByteString. Unlike 'generate'
1976 -- the ByteString is not reallocated if the final size is less than the
1977 -- estimated size. Also, unlike 'generate' ByteString's created this way
1978 -- are managed on the Haskell heap.
1979 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
1980 create l write_ptr = inlinePerformIO $ do
1981 fp <- mallocByteString (l+1)
1982 withForeignPtr fp $ \p -> write_ptr p
1983 return $ PS fp 0 l
1984 {-# INLINE create #-}
1985
1986 -- | Perform an operation with a temporary ByteString
1987 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
1988 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
1989 {-# INLINE withPtr #-}
1990
1991 -- Common up near identical calls to `error' to reduce the number
1992 -- constant strings created when compiled:
1993 errorEmptyList :: String -> a
1994 errorEmptyList fun = error ("Data.ByteString." ++ fun ++ ": empty ByteString")
1995 {-# INLINE errorEmptyList #-}
1996
1997 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
1998 -- of the string if no element is found, rather than Nothing.
1999 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
2000 STRICT2(findIndexOrEnd)
2001 findIndexOrEnd f ps
2002 | null ps = 0
2003 | f (unsafeHead ps) = 0
2004 | otherwise = 1 + findIndexOrEnd f (unsafeTail ps)
2005 {-# INLINE findIndexOrEnd #-}
2006
2007 -- Find from the end of the string using predicate
2008 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2009 STRICT2(findFromEndUntil)
2010 findFromEndUntil f ps@(PS x s l) =
2011 if null ps then 0
2012 else if f (last ps) then l
2013 else findFromEndUntil f (PS x s (l-1))
2014
2015 -- Just like inlinePerformIO, but we inline it. Big performance gains as
2016 -- it exposes lots of things to further inlining
2017 --
2018 {-# INLINE inlinePerformIO #-}
2019 inlinePerformIO :: IO a -> a
2020 #if defined(__GLASGOW_HASKELL__)
2021 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
2022 #else
2023 inlinePerformIO = unsafePerformIO
2024 #endif
2025
2026 {-# INLINE newForeignFreePtr #-}
2027 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2028 #if defined(__GLASGOW_HASKELL__)
2029 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2030 #else
2031 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2032 #endif
2033
2034 -- ---------------------------------------------------------------------
2035 --
2036 -- Standard C functions
2037 --
2038
2039 foreign import ccall unsafe "string.h strlen" c_strlen
2040 :: CString -> CInt
2041
2042 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2043 :: CInt -> IO (Ptr Word8)
2044
2045 foreign import ccall unsafe "static stdlib.h free" c_free
2046 :: Ptr Word8 -> IO ()
2047
2048 #if !defined(__GLASGOW_HASKELL__)
2049 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2050 :: FunPtr (Ptr Word8 -> IO ())
2051 #endif
2052
2053 foreign import ccall unsafe "string.h memset" memset
2054 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2055
2056 foreign import ccall unsafe "string.h memchr" memchr
2057 :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2058
2059 foreign import ccall unsafe "string.h memcmp" memcmp
2060 :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
2061
2062 foreign import ccall unsafe "string.h memcpy" memcpy
2063 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
2064
2065 -- ---------------------------------------------------------------------
2066 --
2067 -- Uses our C code
2068 --
2069
2070 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2071 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
2072
2073 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2074 :: Ptr Word8 -> Ptr Word8 -> Int -> Word8 -> IO ()
2075
2076 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2077 :: Ptr Word8 -> Int -> Word8
2078
2079 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2080 :: Ptr Word8 -> Int -> Word8
2081
2082 foreign import ccall unsafe "static fpstring.h count" c_count
2083 :: Ptr Word8 -> Int -> Word8 -> Int
2084
2085 -- ---------------------------------------------------------------------
2086 -- MMap
2087
2088 {-
2089 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2090 :: Int -> Int -> IO (Ptr Word8)
2091
2092 foreign import ccall unsafe "static unistd.h close" c_close
2093 :: Int -> IO Int
2094
2095 # if !defined(__OpenBSD__)
2096 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2097 :: Ptr Word8 -> Int -> IO Int
2098 # endif
2099 -}
2100
2101 -- ---------------------------------------------------------------------
2102 -- Internal GHC Haskell magic
2103
2104 #if defined(__GLASGOW_HASKELL__)
2105 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2106 getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2107
2108 foreign import ccall unsafe "__hscore_memcpy_src_off"
2109 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> Int -> CSize -> IO (Ptr ())
2110 #endif
2111
2112 -- ---------------------------------------------------------------------
2113 --
2114 -- Functional array fusion for ByteStrings.
2115 --
2116 -- From the Data Parallel Haskell project,
2117 -- http://www.cse.unsw.edu.au/~chak/project/dph/
2118 --
2119
2120 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
2121 -- the fact that no bottoms of this type are ever constructed; hence, we can
2122 -- assume @(_ :: NoAL) `seq` x = x@.
2123 --
2124 data NoAL = NoAL
2125
2126 -- | Special forms of loop arguments
2127 --
2128 -- * These are common special cases for the three function arguments of gen
2129 -- and loop; we give them special names to make it easier to trigger RULES
2130 -- applying in the special cases represented by these arguments. The
2131 -- "INLINE [1]" makes sure that these functions are only inlined in the last
2132 -- two simplifier phases.
2133 --
2134 -- * In the case where the accumulator is not needed, it is better to always
2135 -- explicitly return a value `()', rather than just copy the input to the
2136 -- output, as the former gives GHC better local information.
2137 --
2138
2139 -- | Element function expressing a mapping only
2140 mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2141 mapEFL f = \_ e -> (noAL, (Just $ f e))
2142 {-# INLINE [1] mapEFL #-}
2143
2144 -- | Element function implementing a filter function only
2145 filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2146 filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
2147 {-# INLINE [1] filterEFL #-}
2148
2149 -- |Element function expressing a reduction only
2150 foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2151 foldEFL f = \a e -> (f a e, Nothing)
2152 {-# INLINE [1] foldEFL #-}
2153
2154 -- | No accumulator
2155 noAL :: NoAL
2156 noAL = NoAL
2157 {-# INLINE [1] noAL #-}
2158
2159 -- | Projection functions that are fusion friendly (as in, we determine when
2160 -- they are inlined)
2161 loopArr :: (ByteString, acc) -> ByteString
2162 loopArr (arr, _) = arr
2163 {-# INLINE [1] loopArr #-}
2164
2165 loopAcc :: (ByteString, acc) -> acc
2166 loopAcc (_, acc) = acc
2167 {-# INLINE [1] loopAcc #-}
2168
2169 loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
2170 loopSndAcc (arr, (_, acc)) = (arr, acc)
2171 {-# INLINE [1] loopSndAcc #-}
2172
2173 ------------------------------------------------------------------------
2174
2175 -- | Iteration over over ByteStrings
2176 loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -- ^ mapping & folding, once per elem
2177 -> acc -- ^ initial acc value
2178 -> ByteString -- ^ input ByteString
2179 -> (ByteString, acc)
2180
2181 loopU f start (PS fp s i) = inlinePerformIO $ withForeignPtr fp $ \a -> do
2182 p <- mallocArray (i+1)
2183 (acc, i') <- go (a `plusPtr` s) p start
2184 p' <- if i == i' then return p else reallocArray p (i'+1) -- avoid realloc for maps
2185 poke (p' `plusPtr` i') (0::Word8)
2186 fp' <- newForeignFreePtr p'
2187 return (PS fp' 0 i', acc)
2188 where
2189 go p ma = trans 0 0
2190 where
2191 STRICT3(trans)
2192 trans a_off ma_off acc
2193 | a_off >= i = return (acc, ma_off)
2194 | otherwise = do
2195 x <- peekByteOff p a_off
2196 let (acc', oe) = f acc x
2197 ma_off' <- case oe of
2198 Nothing -> return ma_off
2199 Just e -> do pokeByteOff ma ma_off e
2200 return $ ma_off + 1
2201 trans (a_off+1) ma_off' acc'
2202
2203 {-# INLINE [1] loopU #-}
2204
2205 {-# RULES
2206
2207 "array fusion!" forall em1 em2 start1 start2 arr.
2208 loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
2209 let em (acc1, acc2) e =
2210 case em1 acc1 e of
2211 (acc1', Nothing) -> ((acc1', acc2), Nothing)
2212 (acc1', Just e') ->
2213 case em2 acc2 e' of
2214 (acc2', res) -> ((acc1', acc2'), res)
2215 in loopSndAcc (loopU em (start1, start2) arr)
2216
2217 "loopArr/loopSndAcc" forall x.
2218 loopArr (loopSndAcc x) = loopArr x
2219
2220 -- orphan?
2221 -- "seq/NoAL" forall (u::NoAL) e.
2222 -- u `seq` e = e
2223
2224 #-}
2225