Fix negative index handling in splitAt, replicate and unfoldrN. Move mapF, filterF...
[packages/pretty.git] / Data / ByteString.hs
1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
2 --
3 -- Module : ByteString
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
9 --
10 -- Array fusion code:
11 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
12 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
13 --
14 -- License : BSD-style
15 --
16 -- Maintainer : dons@cse.unsw.edu.au
17 -- Stability : experimental
18 -- Portability : portable, requires ffi and cpp
19 -- Tested with : GHC 6.4.1 and Hugs March 2005
20 --
21
22 --
23 -- | A time and space-efficient implementation of byte vectors using
24 -- packed Word8 arrays, suitable for high performance use, both in terms
25 -- of large data quantities, or high speed requirements. Byte vectors
26 -- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr,
27 -- and can be passed between C and Haskell with little effort.
28 --
29 -- This module is intended to be imported @qualified@, to avoid name
30 -- clashes with Prelude functions. eg.
31 --
32 -- > import qualified Data.ByteString as B
33 --
34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
35 -- UArray by Simon Marlow. Rewritten to support slices and use
36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
37 --
38
39 module Data.ByteString (
40
41 -- * The @ByteString@ type
42 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
43
44 -- * Introducing and eliminating 'ByteString's
45 empty, -- :: ByteString
46 packByte, -- :: Word8 -> ByteString
47 pack, -- :: [Word8] -> ByteString
48 unpack, -- :: ByteString -> [Word8]
49 packWith, -- :: (a -> Word8) -> [a] -> ByteString
50 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
51
52 -- * Basic interface
53 cons, -- :: Word8 -> ByteString -> ByteString
54 snoc, -- :: ByteString -> Word8 -> ByteString
55 null, -- :: ByteString -> Bool
56 length, -- :: ByteString -> Int
57 head, -- :: ByteString -> Word8
58 tail, -- :: ByteString -> ByteString
59 last, -- :: ByteString -> Word8
60 init, -- :: ByteString -> ByteString
61 append, -- :: ByteString -> ByteString -> ByteString
62
63 -- * Special ByteStrings
64 inits, -- :: ByteString -> [ByteString]
65 tails, -- :: ByteString -> [ByteString]
66 elems, -- :: ByteString -> [ByteString]
67
68 -- * Transformating ByteStrings
69 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
70 reverse, -- :: ByteString -> ByteString
71 intersperse, -- :: Word8 -> ByteString -> ByteString
72 transpose, -- :: [ByteString] -> [ByteString]
73 map', -- :: (Word8 -> Word8) -> ByteString -> ByteString
74
75 -- * Reducing 'ByteString's
76 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
77 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
78 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
79 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
80 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
81
82 -- ** Special folds
83 concat, -- :: [ByteString] -> ByteString
84 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
85 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
86 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
87 maximum, -- :: ByteString -> Word8
88 minimum, -- :: ByteString -> Word8
89 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
90
91 -- * Generating and unfolding ByteStrings
92 replicate, -- :: Int -> Word8 -> ByteString
93 unfoldrN, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
94
95 -- * Substrings
96
97 -- ** Breaking strings
98 take, -- :: Int -> ByteString -> ByteString
99 drop, -- :: Int -> ByteString -> ByteString
100 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
101 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
102 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
103 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
104 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
105 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
106
107 -- ** Breaking and dropping on specific bytes
108 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
109 spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
110 breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
111 breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
112
113 -- ** Breaking into many substrings
114 split, -- :: Word8 -> ByteString -> [ByteString]
115 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
116 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
117 group, -- :: ByteString -> [ByteString]
118 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
119
120 -- ** Joining strings
121 join, -- :: ByteString -> [ByteString] -> ByteString
122 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
123
124 -- * Indexing ByteStrings
125 index, -- :: ByteString -> Int -> Word8
126 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
127 elemIndices, -- :: Word8 -> ByteString -> [Int]
128 elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int
129 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
130 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
131 count, -- :: Word8 -> ByteString -> Int
132
133 -- * Ordered ByteStrings
134 sort, -- :: ByteString -> ByteString
135
136 -- * Searching ByteStrings
137
138 -- ** Searching by equality
139 -- | These functions use memchr(3) to efficiently search the ByteString
140
141 elem, -- :: Word8 -> ByteString -> Bool
142 notElem, -- :: Word8 -> ByteString -> Bool
143 filterByte, -- :: Word8 -> ByteString -> ByteString
144 filterNotByte, -- :: Word8 -> ByteString -> ByteString
145
146 -- ** Searching with a predicate
147 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
148 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
149 filter', -- :: (Word8 -> Bool) -> ByteString -> ByteString
150
151 -- ** Prefixes and suffixes
152 -- | These functions use memcmp(3) to efficiently compare substrings
153 isPrefixOf, -- :: ByteString -> ByteString -> Bool
154 isSuffixOf, -- :: ByteString -> ByteString -> Bool
155
156 -- ** Search for arbitrary substrings
157 isSubstringOf, -- :: ByteString -> ByteString -> Bool
158 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
159 findSubstrings, -- :: ByteString -> ByteString -> [Int]
160
161 -- * Zipping and unzipping ByteStrings
162 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
163 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
164 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
165
166 -- * Unchecked access
167 unsafeHead, -- :: ByteString -> Word8
168 unsafeTail, -- :: ByteString -> ByteString
169 unsafeIndex, -- :: ByteString -> Int -> Word8
170
171 -- * Low level introduction and elimination
172 generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
173 create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
174 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
175 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
176 skipIndex, -- :: ByteString -> Int
177
178 -- ** Packing CStrings and pointers
179 packCString, -- :: CString -> ByteString
180 packCStringLen, -- :: CString -> ByteString
181 packMallocCString, -- :: CString -> ByteString
182
183 #if defined(__GLASGOW_HASKELL__)
184 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
185 packAddress, -- :: Addr# -> ByteString
186 unsafePackAddress, -- :: Int -> Addr# -> ByteString
187 unsafeFinalize, -- :: ByteString -> IO ()
188 #endif
189
190 -- ** Using ByteStrings as CStrings
191 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
192 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
193 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
194
195 -- ** Copying ByteStrings
196 -- | These functions perform memcpy(3) operations
197 copy, -- :: ByteString -> ByteString
198 copyCString, -- :: CString -> ByteString
199 copyCStringLen, -- :: CStringLen -> ByteString
200
201 -- * I\/O with @ByteString@s
202
203 -- ** Standard input and output
204
205 #if defined(__GLASGOW_HASKELL__)
206 getLine, -- :: IO ByteString
207 #endif
208 getContents, -- :: IO ByteString
209 putStr, -- :: ByteString -> IO ()
210 putStrLn, -- :: ByteString -> IO ()
211
212 -- ** Files
213 readFile, -- :: FilePath -> IO ByteString
214 writeFile, -- :: FilePath -> ByteString -> IO ()
215 -- mmapFile, -- :: FilePath -> IO ByteString
216
217 -- ** I\/O with Handles
218 #if defined(__GLASGOW_HASKELL__)
219 getArgs, -- :: IO [ByteString]
220 hGetLine, -- :: Handle -> IO ByteString
221 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
222 #endif
223 hGetContents, -- :: Handle -> IO ByteString
224 hGet, -- :: Handle -> Int -> IO ByteString
225 hPut, -- :: Handle -> ByteString -> IO ()
226
227 -- * Fusion utilities
228 #if defined(__GLASGOW_HASKELL__)
229 unpackList, -- eek, otherwise it gets thrown away by the simplifier
230 #endif
231
232 noAL, NoAL, loopArr, loopAcc, loopSndAcc,
233 loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL,
234
235 ) where
236
237 import qualified Prelude as P
238 import Prelude hiding (reverse,head,tail,last,init,null
239 ,length,map,lines,foldl,foldr,unlines
240 ,concat,any,take,drop,splitAt,takeWhile
241 ,dropWhile,span,break,elem,filter,maximum
242 ,minimum,all,concatMap,foldl1,foldr1
243 ,readFile,writeFile,replicate
244 ,getContents,getLine,putStr,putStrLn
245 ,zip,zipWith,unzip,notElem)
246
247 import qualified Data.List as List
248
249 import Data.Char
250 import Data.Word (Word8)
251 import Data.Maybe (listToMaybe)
252 import Data.Array (listArray)
253 import qualified Data.Array as Array ((!))
254
255 -- Control.Exception.bracket not available in yhc or nhc
256 import Control.Exception (bracket)
257 import Control.Monad (when)
258
259 import Foreign.C.String (CString, CStringLen)
260 import Foreign.C.Types (CSize, CInt)
261 import Foreign.ForeignPtr
262 import Foreign.Marshal.Array
263 import Foreign.Ptr
264 import Foreign.Storable (Storable(..))
265
266 -- hGetBuf and hPutBuf not available in yhc or nhc
267 import System.IO (stdin,stdout,hClose,hFileSize
268 ,hGetBuf,hPutBuf,openBinaryFile
269 ,Handle,IOMode(..))
270
271 #if !defined(__GLASGOW_HASKELL__)
272 import System.IO.Unsafe
273 #endif
274
275 #if defined(__GLASGOW_HASKELL__)
276
277 import Data.Generics (Data(..), Typeable(..))
278
279 import System.IO (hGetBufNonBlocking)
280 import System.IO.Error (isEOFError)
281
282 import Foreign.Marshal (alloca)
283 import qualified Foreign.Concurrent as FC (newForeignPtr)
284
285 import GHC.Handle
286 import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
287 import GHC.Base (build, unsafeChr)
288 import GHC.Word hiding (Word8)
289 import GHC.Ptr (Ptr(..))
290 import GHC.ST (ST(..))
291 import GHC.IOBase
292
293 #endif
294
295 -- CFILES stuff is Hugs only
296 {-# CFILES cbits/fpstring.c #-}
297
298 -- -----------------------------------------------------------------------------
299 --
300 -- Useful macros, until we have bang patterns
301 --
302
303 #define STRICT1(f) f a | a `seq` False = undefined
304 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
305 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
306 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
307 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
308
309 -- -----------------------------------------------------------------------------
310
311 -- | A space-efficient representation of a Word8 vector, supporting many
312 -- efficient operations. A 'ByteString' contains 8-bit characters only.
313 --
314 -- Instances of Eq, Ord, Read, Show, Data, Typeable
315 --
316 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
317 {-# UNPACK #-} !Int
318 {-# UNPACK #-} !Int
319
320 #if defined(__GLASGOW_HASKELL__)
321 deriving (Data, Typeable)
322 #endif
323
324 instance Eq ByteString
325 where (==) = eq
326
327 instance Ord ByteString
328 where compare = compareBytes
329
330 instance Show ByteString where
331 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
332
333 instance Read ByteString where
334 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
335
336 {-
337 instance Arbitrary PackedString where
338 arbitrary = P.pack `fmap` arbitrary
339 coarbitrary s = coarbitrary (P.unpack s)
340 -}
341
342 -- | /O(n)/ Equality on the 'ByteString' type.
343 eq :: ByteString -> ByteString -> Bool
344 eq a@(PS p s l) b@(PS p' s' l')
345 | l /= l' = False -- short cut on length
346 | p == p' && s == s' = True -- short cut for the same string
347 | otherwise = compareBytes a b == EQ
348 {-# INLINE eq #-}
349
350 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
351 compareBytes :: ByteString -> ByteString -> Ordering
352 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
353 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
354 | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
355 | otherwise = inlinePerformIO $
356 withForeignPtr x1 $ \p1 ->
357 withForeignPtr x2 $ \p2 -> do
358 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
359 return $ case i `compare` 0 of
360 EQ -> l1 `compare` l2
361 x -> x
362 {-# INLINE compareBytes #-}
363
364 {-
365 --
366 -- About 4x slower over 32M
367 --
368 compareBytes :: ByteString -> ByteString -> Ordering
369 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
370 withForeignPtr fp1 $ \p1 ->
371 withForeignPtr fp2 $ \p2 ->
372 cmp (p1 `plusPtr` off1)
373 (p2 `plusPtr` off2) 0 len1 len2
374
375 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
376 STRICT5(cmp)
377 cmp p1 p2 n len1 len2
378 | n == len1 = if n == len2 then return EQ else return LT
379 | n == len2 = return GT
380 | otherwise = do
381 (a :: Word8) <- peekByteOff p1 n
382 (b :: Word8) <- peekByteOff p2 n
383 case a `compare` b of
384 EQ -> cmp p1 p2 (n+1) len1 len2
385 LT -> return LT
386 GT -> return GT
387 {-# INLINE compareBytes #-}
388 -}
389
390 -- -----------------------------------------------------------------------------
391 -- Introducing and eliminating 'ByteString's
392
393 -- | /O(1)/ The empty 'ByteString'
394 empty :: ByteString
395 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
396 {-# NOINLINE empty #-}
397
398 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
399 packByte :: Word8 -> ByteString
400 packByte c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do
401 withForeignPtr fp $ \p -> poke p c
402 return $ PS fp 0 1
403 {-# INLINE packByte #-}
404
405 --
406 -- XXX The unsafePerformIO is critical!
407 --
408 -- Otherwise:
409 --
410 -- packByte 255 `compare` packByte 127
411 --
412 -- is compiled to:
413 --
414 -- case mallocByteString 2 of
415 -- ForeignPtr f internals ->
416 -- case writeWord8OffAddr# f 0 255 of _ ->
417 -- case writeWord8OffAddr# f 0 127 of _ ->
418 -- case eqAddr# f f of
419 -- False -> case compare (GHC.Prim.plusAddr# f 0)
420 -- (GHC.Prim.plusAddr# f 0)
421 --
422 --
423
424 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
425 --
426 -- For applications with large numbers of string literals, pack can be a
427 -- bottleneck. In such cases, consider using packAddress (GHC only).
428 pack :: [Word8] -> ByteString
429
430 #if !defined(__GLASGOW_HASKELL__)
431
432 pack str = create (P.length str) $ \p -> go p str
433 where
434 go _ [] = return ()
435 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
436
437 #else /* hack away */
438
439 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
440 where
441 go _ _ [] = return ()
442 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
443
444 writeByte p i c = ST $ \s# ->
445 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
446
447 #endif
448
449 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
450 unpack :: ByteString -> [Word8]
451
452 #if !defined(__GLASGOW_HASKELL__)
453
454 unpack (PS _ _ 0) = []
455 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
456 go (p `plusPtr` s) (l - 1) []
457 where
458 STRICT3(go)
459 go p 0 acc = peek p >>= \e -> return (e : acc)
460 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
461 {-# INLINE unpack #-}
462
463 #else
464
465 unpack ps = build (unpackFoldr ps)
466 {-# INLINE unpack #-}
467
468 unpackList :: ByteString -> [Word8]
469 unpackList (PS fp off len) = withPtr fp $ \p -> do
470 let STRICT3(loop)
471 loop _ (-1) acc = return acc
472 loop q n acc = do
473 a <- peekByteOff q n
474 loop q (n-1) (a : acc)
475 loop (p `plusPtr` off) (len-1) []
476
477 {-# RULES
478 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
479 #-}
480
481 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
482 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
483 let STRICT3(loop)
484 loop _ (-1) acc = return acc
485 loop q n acc = do
486 a <- peekByteOff q n
487 loop q (n-1) (a `f` acc)
488 loop (p `plusPtr` off) (len-1) ch
489 {-# INLINE [0] unpackFoldr #-}
490
491 #endif
492
493 ------------------------------------------------------------------------
494
495 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
496 -- conversion function
497 packWith :: (a -> Word8) -> [a] -> ByteString
498 packWith k str = create (P.length str) $ \p -> go p str
499 where
500 STRICT2(go)
501 go _ [] = return ()
502 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
503 {-# INLINE packWith #-}
504 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
505
506 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
507 unpackWith :: (Word8 -> a) -> ByteString -> [a]
508 unpackWith _ (PS _ _ 0) = []
509 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
510 go (p `plusPtr` s) (l - 1) []
511 where
512 STRICT3(go)
513 go p 0 acc = peek p >>= \e -> return (k e : acc)
514 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
515 {-# INLINE unpackWith #-}
516 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
517
518 -- ---------------------------------------------------------------------
519 -- Basic interface
520
521 -- | /O(1)/ Test whether a ByteString is empty.
522 null :: ByteString -> Bool
523 null (PS _ _ l) = l == 0
524 {-# INLINE null #-}
525
526 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
527 length :: ByteString -> Int
528 length (PS _ _ l) = l
529
530 #if defined(__GLASGOW_HASKELL__)
531 {-# INLINE [1] length #-}
532 #endif
533
534 {-#
535
536 -- Translate length into a loop.
537 -- Performace ok, but allocates too much, so disable for now.
538
539 "length/loop" forall f acc s .
540 length (loopArr (loopU f acc s)) = foldl' (const . (+1)) (0::Int) (loopArr (loopU f acc s))
541
542 #-}
543
544 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
545 -- complexity, as it requires a memcpy.
546 cons :: Word8 -> ByteString -> ByteString
547 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
548 poke p c
549 memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
550 {-# INLINE cons #-}
551
552 -- todo fuse
553
554 -- | /O(n)/ Append a byte to the end of a 'ByteString'
555 snoc :: ByteString -> Word8 -> ByteString
556 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
557 memcpy p (f `plusPtr` s) (fromIntegral l)
558 poke (p `plusPtr` l) c
559 {-# INLINE snoc #-}
560
561 -- todo fuse
562
563 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
564 head :: ByteString -> Word8
565 head ps@(PS x s _)
566 | null ps = errorEmptyList "head"
567 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
568 {-# INLINE head #-}
569
570 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
571 tail :: ByteString -> ByteString
572 tail (PS p s l)
573 | l <= 0 = errorEmptyList "tail"
574 | otherwise = PS p (s+1) (l-1)
575 {-# INLINE tail #-}
576
577 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
578 last :: ByteString -> Word8
579 last ps@(PS x s l)
580 | null ps = errorEmptyList "last"
581 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
582 {-# INLINE last #-}
583
584 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
585 init :: ByteString -> ByteString
586 init (PS p s l)
587 | l <= 0 = errorEmptyList "init"
588 | otherwise = PS p s (l-1)
589 {-# INLINE init #-}
590
591 -- | /O(n)/ Append two ByteStrings
592 append :: ByteString -> ByteString -> ByteString
593 append xs ys | null xs = ys
594 | null ys = xs
595 | otherwise = concat [xs,ys]
596 {-# INLINE append #-}
597
598 -- ---------------------------------------------------------------------
599 -- Transformations
600
601 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
602 -- element of @xs@. This function is subject to array fusion.
603 map :: (Word8 -> Word8) -> ByteString -> ByteString
604 map f = loopArr . loopU (mapEFL f) noAL
605 {-# INLINE map #-}
606
607 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
608 -- slightly faster for one-shot cases.
609 map' :: (Word8 -> Word8) -> ByteString -> ByteString
610 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
611 np <- mallocByteString (len+1)
612 withForeignPtr np $ \p -> do
613 map_ 0 (a `plusPtr` s) p
614 return (PS np 0 len)
615 where
616
617 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
618 STRICT3(map_)
619 map_ n p1 p2
620 | n >= len = return ()
621 | otherwise = do
622 x <- peekByteOff p1 n
623 pokeByteOff p2 n (f x)
624 map_ (n+1) p1 p2
625 {-# INLINE map' #-}
626
627 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
628 reverse :: ByteString -> ByteString
629 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
630 c_reverse p (f `plusPtr` s) (fromIntegral l)
631
632 {-
633 reverse = pack . P.reverse . unpack
634 -}
635
636 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
637 -- 'ByteString' and \`intersperses\' that byte between the elements of
638 -- the 'ByteString'. It is analogous to the intersperse function on
639 -- Lists.
640 intersperse :: Word8 -> ByteString -> ByteString
641 intersperse c ps@(PS x s l)
642 | length ps < 2 = ps
643 | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
644 c_intersperse p (f `plusPtr` s) (fromIntegral l) c
645
646 {-
647 intersperse c = pack . List.intersperse c . unpack
648 -}
649
650 -- | The 'transpose' function transposes the rows and columns of its
651 -- 'ByteString' argument.
652 transpose :: [ByteString] -> [ByteString]
653 transpose ps = P.map pack (List.transpose (P.map unpack ps))
654
655 -- ---------------------------------------------------------------------
656 -- Reducing 'ByteString's
657
658 -- | 'foldl', applied to a binary operator, a starting value (typically
659 -- the left-identity of the operator), and a ByteString, reduces the
660 -- ByteString using the binary operator, from left to right.
661 -- This function is subject to array fusion.
662 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
663 foldl f z = loopAcc . loopU (foldEFL f) z
664 {-# INLINE foldl #-}
665
666 {-
667 --
668 -- About twice as fast with 6.4.1, but not fuseable
669 -- A simple fold . map is enough to make it worth while.
670 --
671 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
672 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
673 where
674 STRICT3(lgo)
675 lgo z p q | p == q = return z
676 | otherwise = do c <- peek p
677 lgo (f z c) (p `plusPtr` 1) q
678 -}
679
680 -- | 'foldl\'' is like foldl, but strict in the accumulator.
681 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
682 foldl' f z = loopAcc . loopU (foldEFL' f) z
683 {-# INLINE foldl' #-}
684
685 -- | 'foldr', applied to a binary operator, a starting value
686 -- (typically the right-identity of the operator), and a ByteString,
687 -- reduces the ByteString using the binary operator, from right to left.
688 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
689 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
690 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
691 where
692 STRICT2(go)
693 go p q | p == q = return z
694 | otherwise = do c <- peek p
695 ws <- go (p `plusPtr` 1) q
696 return $ c `k` ws
697
698 -- | 'foldl1' is a variant of 'foldl' that has no starting value
699 -- argument, and thus must be applied to non-empty 'ByteStrings'.
700 -- This function is subject to array fusion.
701 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
702 foldl1 f ps
703 | null ps = errorEmptyList "foldl1"
704 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
705
706 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
707 -- and thus must be applied to non-empty 'ByteString's
708 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
709 foldr1 f ps
710 | null ps = errorEmptyList "foldr1"
711 | otherwise = foldr f (last ps) (init ps)
712
713 -- ---------------------------------------------------------------------
714 -- Special folds
715
716 -- | /O(n)/ Concatenate a list of ByteStrings.
717 concat :: [ByteString] -> ByteString
718 concat [] = empty
719 concat [ps] = ps
720 concat xs = create len $ \ptr -> go xs ptr
721 where len = P.sum . P.map length $ xs
722 STRICT2(go)
723 go [] _ = return ()
724 go (PS p s l:ps) ptr = do
725 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
726 go ps (ptr `plusPtr` l)
727
728 -- | Map a function over a 'ByteString' and concatenate the results
729 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
730 concatMap f = foldr (append . f) empty
731
732 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
733 -- any element of the 'ByteString' satisfies the predicate.
734 any :: (Word8 -> Bool) -> ByteString -> Bool
735 any _ (PS _ _ 0) = False
736 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
737 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
738 where
739 STRICT2(go)
740 go p q | p == q = return False
741 | otherwise = do c <- peek p
742 if f c then return True
743 else go (p `plusPtr` 1) q
744
745 -- todo fuse
746
747 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
748 -- if all elements of the 'ByteString' satisfy the predicate.
749 all :: (Word8 -> Bool) -> ByteString -> Bool
750 all _ (PS _ _ 0) = True
751 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
752 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
753 where
754 STRICT2(go)
755 go p q | p == q = return True -- end of list
756 | otherwise = do c <- peek p
757 if f c
758 then go (p `plusPtr` 1) q
759 else return False
760 -- todo fuse
761
762 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
763 maximum :: ByteString -> Word8
764 maximum xs@(PS x s l)
765 | null xs = errorEmptyList "maximum"
766 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
767 return $ c_maximum (p `plusPtr` s) (fromIntegral l)
768 {-# INLINE maximum #-}
769
770 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
771 minimum :: ByteString -> Word8
772 minimum xs@(PS x s l)
773 | null xs = errorEmptyList "minimum"
774 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
775 return $ c_minimum (p `plusPtr` s) (fromIntegral l)
776 {-# INLINE minimum #-}
777
778 -- fusion is too slow here (10x)
779
780 {-
781 maximum xs@(PS x s l)
782 | null xs = errorEmptyList "maximum"
783 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
784 w <- peek p
785 maximum_ (p `plusPtr` s) 0 l w
786
787 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
788 STRICT4(maximum_)
789 maximum_ ptr n m c
790 | n >= m = return c
791 | otherwise = do w <- peekByteOff ptr n
792 maximum_ ptr (n+1) m (if w > c then w else c)
793
794 minimum xs@(PS x s l)
795 | null xs = errorEmptyList "minimum"
796 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
797 w <- peek p
798 minimum_ (p `plusPtr` s) 0 l w
799
800 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
801 STRICT4(minimum_)
802 minimum_ ptr n m c
803 | n >= m = return c
804 | otherwise = do w <- peekByteOff ptr n
805 minimum_ ptr (n+1) m (if w < c then w else c)
806 -}
807
808 -- | /O(n)/ map Word8 functions, provided with the index at each position
809 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
810 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
811 go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
812 where
813 go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
814 STRICT4(go)
815 go n f t p | f == p = return ()
816 | otherwise = do w <- peek f
817 ((poke t) . k n) w
818 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
819
820 -- ---------------------------------------------------------------------
821 -- Unfolds and replicates
822
823 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
824 -- the value of every element. The following holds:
825 --
826 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
827 --
828 -- This implemenation uses @memset(3)@
829 replicate :: Int -> Word8 -> ByteString
830 replicate w c | w <= 0 = empty
831 | otherwise = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
832
833 {-
834 -- About 5x slower
835 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
836 where
837 STRICT2(go)
838 go _ 0 = return w
839 go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
840 -}
841
842 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
843 -- 'unfoldrN' builds a ByteString from a seed value. The function takes
844 -- the element and returns 'Nothing' if it is done producing the
845 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
846 -- prepending to the ByteString and @b@ is used as the next element in a
847 -- recursive call.
848 --
849 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
850 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
851 -- final size of the ByteString as an argument. 'cons' can then be
852 -- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has
853 -- linear complexity. The depth of the recursion is limited to this
854 -- size, but may be less. For lazy, infinite unfoldr, use
855 -- 'Data.List.unfoldr' (from 'Data.List').
856 --
857 -- Examples:
858 --
859 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
860 --
861 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
862 --
863 -- > unfoldrN n == take n $ List.unfoldr
864 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString
865 unfoldrN i f w
866 | i <= 0 = empty
867 | otherwise = inlinePerformIO $ generate i $ \p -> go p w 0
868 where
869 STRICT3(go)
870 go q c n | n == i = return n -- stop if we reach `i'
871 | otherwise = case f c of
872 Nothing -> return n
873 Just (a,new_c) -> do
874 poke q a
875 go (q `plusPtr` 1) new_c (n+1)
876
877 -- ---------------------------------------------------------------------
878 -- Substrings
879
880 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
881 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
882 take :: Int -> ByteString -> ByteString
883 take n ps@(PS x s l)
884 | n < 0 = empty
885 | n >= l = ps
886 | otherwise = PS x s n
887 {-# INLINE take #-}
888
889 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
890 -- elements, or @[]@ if @n > 'length' xs@.
891 drop :: Int -> ByteString -> ByteString
892 drop n ps@(PS x s l)
893 | n <= 0 = ps
894 | n > l = empty
895 | otherwise = PS x (s+n) (l-n)
896 {-# INLINE drop #-}
897
898 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
899 splitAt :: Int -> ByteString -> (ByteString, ByteString)
900 splitAt n ps = (take n ps, drop n ps)
901 {-# INLINE splitAt #-}
902
903 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
904 -- returns the longest prefix (possibly empty) of @xs@ of elements that
905 -- satisfy @p@.
906 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
907 takeWhile f ps = take (findIndexOrEnd (not . f) ps) ps
908 {-# INLINE takeWhile #-}
909
910 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
911 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
912 dropWhile f ps = drop (findIndexOrEnd (not . f) ps) ps
913 {-# INLINE dropWhile #-}
914
915 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
916 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
917 break p ps = case findIndexOrEnd p ps of n -> (take n ps, drop n ps)
918 {-# INLINE break #-}
919
920 -- | 'breakByte' breaks its ByteString argument at the first occurence
921 -- of the specified byte. It is more efficient than 'break' as it is
922 -- implemented with @memchr(3)@. I.e.
923 --
924 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
925 --
926 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
927 breakByte c p = case elemIndex c p of
928 Nothing -> (p,empty)
929 Just n -> (take n p, drop n p)
930 {-# INLINE breakByte #-}
931
932 -- | 'spanByte' breaks its ByteString argument at the first
933 -- occurence of a byte other than its argument. It is more efficient
934 -- than 'span (==)'
935 --
936 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
937 --
938 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
939 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
940 go (p `plusPtr` s) 0
941 where
942 STRICT2(go)
943 go p i | i >= l = return (ps, empty)
944 | otherwise = do c' <- peekByteOff p i
945 if c /= c'
946 then return (take i ps, drop i ps)
947 else go p (i+1)
948 {-# INLINE spanByte #-}
949
950 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
951 -- occurence of @w@. It behaves like 'break', except the delimiter is
952 -- not returned, and @Nothing@ is returned if the delimiter is not in
953 -- the ByteString. I.e.
954 --
955 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
956 --
957 -- > breakFirst c xs ==
958 -- > let (x,y) = break (== c) xs
959 -- > in if null y then Nothing else Just (x, drop 1 y))
960 --
961 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
962 breakFirst c p = case elemIndex c p of
963 Nothing -> Nothing
964 Just n -> Just (take n p, drop (n+1) p)
965 {-# INLINE breakFirst #-}
966
967 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
968 -- ByteString.
969 --
970 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
971 --
972 -- and the following are equivalent:
973 --
974 -- > breakLast 'c' "abcdef"
975 -- > let (x,y) = break (=='c') (reverse "abcdef")
976 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
977 --
978 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
979 breakLast c p = case elemIndexLast c p of
980 Nothing -> Nothing
981 Just n -> Just (take n p, drop (n+1) p)
982 {-# INLINE breakLast #-}
983
984 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
985 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
986 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
987 span p ps = break (not . p) ps
988 {-# INLINE span #-}
989
990 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
991 -- We have
992 --
993 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
994 --
995 -- and
996 --
997 -- > spanEnd (not . isSpace) ps
998 -- > ==
999 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1000 --
1001 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1002 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
1003
1004 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1005 -- separators, where the predicate returns True for a separator element.
1006 -- The resulting components do not contain the separators. Two adjacent
1007 -- separators result in an empty component in the output. eg.
1008 --
1009 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1010 -- > splitWith (=='a') [] == []
1011 --
1012 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1013
1014 #if defined(__GLASGOW_HASKELL__)
1015 splitWith _pred (PS _ _ 0) = []
1016 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1017 where pred# c# = pred_ (W8# c#)
1018
1019 STRICT4(splitWith0)
1020 splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1021 splitLoop pred' p 0 off' len' fp'
1022
1023 splitLoop :: (Word# -> Bool)
1024 -> Ptr Word8
1025 -> Int -> Int -> Int
1026 -> ForeignPtr Word8
1027 -> IO [ByteString]
1028
1029 splitLoop pred' p idx' off' len' fp'
1030 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1031 | idx' >= len' = return [PS fp' off' idx']
1032 | otherwise = do
1033 w <- peekElemOff p (off'+idx')
1034 if pred' (case w of W8# w# -> w#)
1035 then return (PS fp' off' idx' :
1036 splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1037 else splitLoop pred' p (idx'+1) off' len' fp'
1038 {-# INLINE splitWith #-}
1039
1040 #else
1041 splitWith _ (PS _ _ 0) = []
1042 splitWith p ps = loop p ps
1043 where
1044 STRICT2(loop)
1045 loop q qs = if null rest then [chunk]
1046 else chunk : loop q (unsafeTail rest)
1047 where (chunk,rest) = break q qs
1048 #endif
1049
1050 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1051 -- argument, consuming the delimiter. I.e.
1052 --
1053 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1054 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
1055 -- > split 'x' "x" == ["",""]
1056 --
1057 -- and
1058 --
1059 -- > join [c] . split c == id
1060 -- > split == splitWith . (==)
1061 --
1062 -- As for all splitting functions in this library, this function does
1063 -- not copy the substrings, it just constructs new 'ByteStrings' that
1064 -- are slices of the original.
1065 --
1066 split :: Word8 -> ByteString -> [ByteString]
1067 split _ (PS _ _ 0) = []
1068 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1069 let ptr = p `plusPtr` s
1070
1071 STRICT1(loop)
1072 loop n = do
1073 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1074 if q == nullPtr
1075 then [PS x (s+n) (l-n)]
1076 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1077
1078 return (loop 0)
1079 {-# INLINE split #-}
1080
1081 {-
1082 -- slower. but stays inside Haskell.
1083 split _ (PS _ _ 0) = []
1084 split (W8# w#) (PS fp off len) = splitWith' off len fp
1085 where
1086 splitWith' off' len' fp' = withPtr fp $ \p ->
1087 splitLoop p 0 off' len' fp'
1088
1089 splitLoop :: Ptr Word8
1090 -> Int -> Int -> Int
1091 -> ForeignPtr Word8
1092 -> IO [ByteString]
1093
1094 STRICT5(splitLoop)
1095 splitLoop p idx' off' len' fp'
1096 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1097 | idx' >= len' = return [PS fp' off' idx']
1098 | otherwise = do
1099 (W8# x#) <- peekElemOff p (off'+idx')
1100 if word2Int# w# ==# word2Int# x#
1101 then return (PS fp' off' idx' :
1102 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1103 else splitLoop p (idx'+1) off' len' fp'
1104 -}
1105
1106 -- | Like 'splitWith', except that sequences of adjacent separators are
1107 -- treated as a single separator. eg.
1108 --
1109 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1110 --
1111 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1112 tokens f = P.filter (not.null) . splitWith f
1113 {-# INLINE tokens #-}
1114
1115 -- | The 'group' function takes a ByteString and returns a list of
1116 -- ByteStrings such that the concatenation of the result is equal to the
1117 -- argument. Moreover, each sublist in the result contains only equal
1118 -- elements. For example,
1119 --
1120 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1121 --
1122 -- It is a special case of 'groupBy', which allows the programmer to
1123 -- supply their own equality test. It is about 40% faster than
1124 -- /groupBy (==)/
1125 group :: ByteString -> [ByteString]
1126 group xs
1127 | null xs = []
1128 | otherwise = ys : group zs
1129 where
1130 (ys, zs) = spanByte (unsafeHead xs) xs
1131
1132 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1133 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1134 groupBy k xs
1135 | null xs = []
1136 | otherwise = take n xs : groupBy k (drop n xs)
1137 where
1138 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1139
1140 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1141 -- 'ByteString's and concatenates the list after interspersing the first
1142 -- argument between each element of the list.
1143 join :: ByteString -> [ByteString] -> ByteString
1144 join filler pss = concat (splice pss)
1145 where
1146 splice [] = []
1147 splice [x] = [x]
1148 splice (x:y:xs) = x:filler:splice (y:xs)
1149 {-# INLINE join #-}
1150
1151 --
1152 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1153 -- with a char. Around 4 times faster than the generalised join.
1154 --
1155 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1156 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1157 withForeignPtr ffp $ \fp ->
1158 withForeignPtr fgp $ \gp -> do
1159 memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1160 poke (ptr `plusPtr` l) c
1161 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1162 where
1163 len = length f + length g + 1
1164 {-# INLINE joinWithByte #-}
1165
1166 -- ---------------------------------------------------------------------
1167 -- Indexing ByteStrings
1168
1169 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1170 index :: ByteString -> Int -> Word8
1171 index ps n
1172 | n < 0 = moduleError "index" ("negative index: " ++ show n)
1173 | n >= length ps = moduleError "index" ("index too large: " ++ show n
1174 ++ ", length = " ++ show (length ps))
1175 | otherwise = ps `unsafeIndex` n
1176 {-# INLINE index #-}
1177
1178 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1179 -- element in the given 'ByteString' which is equal to the query
1180 -- element, or 'Nothing' if there is no such element.
1181 -- This implementation uses memchr(3).
1182 elemIndex :: Word8 -> ByteString -> Maybe Int
1183 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1184 let p' = p `plusPtr` s
1185 q = memchr p' c (fromIntegral l)
1186 return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1187 {-# INLINE elemIndex #-}
1188
1189 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1190 -- element in the given 'ByteString' which is equal to the query
1191 -- element, or 'Nothing' if there is no such element. The following
1192 -- holds:
1193 --
1194 -- > elemIndexLast c xs ==
1195 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1196 --
1197 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1198 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1199 go (p `plusPtr` s) (l-1)
1200 where
1201 STRICT2(go)
1202 go p i | i < 0 = return Nothing
1203 | otherwise = do ch' <- peekByteOff p i
1204 if ch == ch'
1205 then return $ Just i
1206 else go p (i-1)
1207 {-# INLINE elemIndexLast #-}
1208
1209 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1210 -- the indices of all elements equal to the query element, in ascending order.
1211 -- This implementation uses memchr(3).
1212 elemIndices :: Word8 -> ByteString -> [Int]
1213 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1214 let ptr = p `plusPtr` s
1215
1216 STRICT1(loop)
1217 loop n = let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1218 in if q == nullPtr
1219 then []
1220 else let i = q `minusPtr` ptr
1221 in i : loop (i+1)
1222 return (loop 0)
1223 {-# INLINE elemIndices #-}
1224
1225 {-
1226 -- much slower
1227 elemIndices :: Word8 -> ByteString -> [Int]
1228 elemIndices c ps = loop 0 ps
1229 where STRICT2(loop)
1230 loop _ ps' | null ps' = []
1231 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1232 | otherwise = loop (n+1) (unsafeTail ps')
1233 -}
1234
1235 -- | count returns the number of times its argument appears in the ByteString
1236 --
1237 -- > count = length . elemIndices
1238 --
1239 -- But more efficiently than using length on the intermediate list.
1240 count :: Word8 -> ByteString -> Int
1241 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1242 return $ fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1243 {-# INLINE count #-}
1244
1245 {-
1246 --
1247 -- around 30% slower
1248 --
1249 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1250 go (p `plusPtr` s) (fromIntegral m) 0
1251 where
1252 go :: Ptr Word8 -> CSize -> Int -> IO Int
1253 STRICT3(go)
1254 go p l i = do
1255 let q = memchr p w l
1256 if q == nullPtr
1257 then return i
1258 else do let k = fromIntegral $ q `minusPtr` p
1259 go (q `plusPtr` 1) (l-k-1) (i+1)
1260 -}
1261
1262 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1263 -- returns the index of the first element in the ByteString
1264 -- satisfying the predicate.
1265 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1266 findIndex k ps@(PS x s l)
1267 | null ps = Nothing
1268 | otherwise = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1269 where
1270 STRICT2(go)
1271 go ptr n | n >= l = return Nothing
1272 | otherwise = do w <- peek ptr
1273 if k w
1274 then return (Just n)
1275 else go (ptr `plusPtr` 1) (n+1)
1276 {-# INLINE findIndex #-}
1277
1278 -- | The 'findIndices' function extends 'findIndex', by returning the
1279 -- indices of all elements satisfying the predicate, in ascending order.
1280 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1281 findIndices p ps = loop 0 ps
1282 where
1283 STRICT2(loop)
1284 loop n qs | null qs = []
1285 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1286 | otherwise = loop (n+1) (unsafeTail qs)
1287
1288 -- ---------------------------------------------------------------------
1289 -- Searching ByteStrings
1290
1291 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1292 elem :: Word8 -> ByteString -> Bool
1293 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1294 {-# INLINE elem #-}
1295
1296 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1297 notElem :: Word8 -> ByteString -> Bool
1298 notElem c ps = not (elem c ps)
1299 {-# INLINE notElem #-}
1300
1301 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1302 -- returns a ByteString containing those characters that satisfy the
1303 -- predicate. This function is subject to array fusion.
1304 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1305 filter p = loopArr . loopU (filterEFL p) noAL
1306 {-# INLINE filter #-}
1307
1308 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1309 -- around 2x faster for some one-shot applications.
1310 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1311 filter' k ps@(PS x s l)
1312 | null ps = ps
1313 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1314 t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1315 return (t `minusPtr` p) -- actual length
1316 where
1317 STRICT3(go)
1318 go f t end | f == end = return t
1319 | otherwise = do
1320 w <- peek f
1321 if k w
1322 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1323 else go (f `plusPtr` 1) t end
1324 {-# INLINE filter' #-}
1325
1326 --
1327 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1328 -- case of filtering a single byte. It is more efficient to use
1329 -- /filterByte/ in this case.
1330 --
1331 -- > filterByte == filter . (==)
1332 --
1333 -- filterByte is around 10x faster, and uses much less space, than its
1334 -- filter equivalent
1335 filterByte :: Word8 -> ByteString -> ByteString
1336 filterByte w ps = replicate (count w ps) w
1337 {-# INLINE filterByte #-}
1338
1339 --
1340 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1341 -- case of filtering a single byte out of a list. It is more efficient
1342 -- to use /filterNotByte/ in this case.
1343 --
1344 -- > filterNotByte == filter . (/=)
1345 --
1346 -- filterNotByte is around 2x faster than its filter equivalent.
1347 filterNotByte :: Word8 -> ByteString -> ByteString
1348 filterNotByte w = filter' (/= w)
1349 {-# INLINE filterNotByte #-}
1350
1351 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1352 -- and returns the first element in matching the predicate, or 'Nothing'
1353 -- if there is no such element.
1354 --
1355 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1356 --
1357 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1358 find f p = case findIndex f p of
1359 Just n -> Just (p `unsafeIndex` n)
1360 _ -> Nothing
1361 {-# INLINE find #-}
1362
1363 {-
1364 --
1365 -- fuseable, but we don't want to walk the whole array.
1366 --
1367 find k = foldl findEFL Nothing
1368 where findEFL a@(Just _) _ = a
1369 findEFL _ c | k c = Just c
1370 | otherwise = Nothing
1371 -}
1372
1373 -- ---------------------------------------------------------------------
1374 -- Searching for substrings
1375
1376 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1377 -- iff the first is a prefix of the second.
1378 isPrefixOf :: ByteString -> ByteString -> Bool
1379 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1380 | l1 == 0 = True
1381 | l2 < l1 = False
1382 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1383 withForeignPtr x2 $ \p2 -> do
1384 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1385 return (i == 0)
1386
1387 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1388 -- iff the first is a suffix of the second.
1389 --
1390 -- The following holds:
1391 --
1392 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1393 --
1394 -- However, the real implemenation uses memcmp to compare the end of the
1395 -- string only, with no reverse required..
1396 isSuffixOf :: ByteString -> ByteString -> Bool
1397 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1398 | l1 == 0 = True
1399 | l2 < l1 = False
1400 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1401 withForeignPtr x2 $ \p2 -> do
1402 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1403 return (i == 0)
1404
1405 -- | Check whether one string is a substring of another. @isSubstringOf
1406 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1407 isSubstringOf :: ByteString -- ^ String to search for.
1408 -> ByteString -- ^ String to search in.
1409 -> Bool
1410 isSubstringOf p s = not $ P.null $ findSubstrings p s
1411
1412 -- | Get the first index of a substring in another string,
1413 -- or 'Nothing' if the string is not found.
1414 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1415 findSubstring :: ByteString -- ^ String to search for.
1416 -> ByteString -- ^ String to seach in.
1417 -> Maybe Int
1418 findSubstring = (listToMaybe .) . findSubstrings
1419
1420 -- | Find the indexes of all (possibly overlapping) occurances of a
1421 -- substring in a string. This function uses the Knuth-Morris-Pratt
1422 -- string matching algorithm.
1423 findSubstrings :: ByteString -- ^ String to search for.
1424 -> ByteString -- ^ String to seach in.
1425 -> [Int]
1426
1427 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1428 where
1429 patc x = pat `unsafeIndex` x
1430 strc x = str `unsafeIndex` x
1431
1432 -- maybe we should make kmpNext a UArray before using it in search?
1433 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1434 kmpNextL p _ | null p = []
1435 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1436 ps = unsafeTail p
1437 x = if not (null ps) && unsafeHead ps == patc j'
1438 then kmpNext Array.! j' else j'
1439 in x:kmpNextL ps j'
1440 search i j = match ++ rest -- i: position in string, j: position in pattern
1441 where match = if j == m then [(i - j)] else []
1442 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1443 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1444 | otherwise = j
1445
1446 -- ---------------------------------------------------------------------
1447 -- Zipping
1448
1449 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1450 -- corresponding pairs of bytes. If one input ByteString is short,
1451 -- excess elements of the longer ByteString are discarded. This is
1452 -- equivalent to a pair of 'unpack' operations.
1453 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1454 zip ps qs
1455 | null ps || null qs = []
1456 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1457
1458 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1459 -- the first argument, instead of a tupling function. For example,
1460 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1461 -- corresponding sums.
1462 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1463 zipWith f ps qs
1464 | null ps || null qs = []
1465 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1466
1467 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1468 -- ByteStrings. Note that this performs two 'pack' operations.
1469 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1470 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1471 {-# INLINE unzip #-}
1472
1473 -- ---------------------------------------------------------------------
1474 -- Special lists
1475
1476 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1477 inits :: ByteString -> [ByteString]
1478 inits (PS x s l) = [PS x s n | n <- [0..l]]
1479
1480 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1481 tails :: ByteString -> [ByteString]
1482 tails p | null p = [empty]
1483 | otherwise = p : tails (unsafeTail p)
1484
1485 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1486
1487 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1488 elems :: ByteString -> [ByteString]
1489 elems (PS _ _ 0) = []
1490 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1491 {-# INLINE elems #-}
1492
1493 -- ---------------------------------------------------------------------
1494 -- ** Ordered 'ByteString's
1495
1496 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1497 sort :: ByteString -> ByteString
1498 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1499
1500 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1501 withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1502
1503 let STRICT2(go)
1504 go 256 _ = return ()
1505 go i ptr = do n <- peekElemOff arr i
1506 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1507 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1508 go 0 p
1509
1510 -- "countEach counts str l" counts the number of occurences of each Word8 in
1511 -- str, and stores the result in counts.
1512 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1513 STRICT3(countEach)
1514 countEach counts str l = go 0
1515 where
1516 STRICT1(go)
1517 go i | i == l = return ()
1518 | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1519 x <- peekElemOff counts k
1520 pokeElemOff counts k (x + 1)
1521 go (i + 1)
1522
1523 {-
1524 sort :: ByteString -> ByteString
1525 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1526 memcpy p (f `plusPtr` s) l
1527 c_qsort p l -- inplace
1528 -}
1529
1530 {-
1531 sort = pack . List.sort . unpack
1532 -}
1533
1534 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1535 --
1536 -- Try some linear sorts: radix, counting
1537 -- Or mergesort.
1538 --
1539 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1540 -- sortBy f ps = undefined
1541
1542 -- ---------------------------------------------------------------------
1543 --
1544 -- Extensions to the basic interface
1545 --
1546
1547 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1548 -- check for the empty case, so there is an obligation on the programmer
1549 -- to provide a proof that the ByteString is non-empty.
1550 unsafeHead :: ByteString -> Word8
1551 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1552 {-# INLINE unsafeHead #-}
1553
1554 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1555 -- check for the empty case. As with 'unsafeHead', the programmer must
1556 -- provide a separate proof that the ByteString is non-empty.
1557 unsafeTail :: ByteString -> ByteString
1558 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1559 {-# INLINE unsafeTail #-}
1560
1561 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1562 -- This omits the bounds check, which means there is an accompanying
1563 -- obligation on the programmer to ensure the bounds are checked in some
1564 -- other way.
1565 unsafeIndex :: ByteString -> Int -> Word8
1566 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1567 {-# INLINE unsafeIndex #-}
1568
1569 -- ---------------------------------------------------------------------
1570 -- Low level constructors
1571
1572 #if defined(__GLASGOW_HASKELL__)
1573 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1574 -- Addr\# (an arbitrary machine address assumed to point outside the
1575 -- garbage-collected heap) into a @ByteString@. A much faster way to
1576 -- create an Addr\# is with an unboxed string literal, than to pack a
1577 -- boxed string. A unboxed string literal is compiled to a static @char
1578 -- []@ by GHC. Establishing the length of the string requires a call to
1579 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1580 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1581 -- if you know the length of the string statically.
1582 --
1583 -- An example:
1584 --
1585 -- > literalFS = packAddress "literal"#
1586 --
1587 packAddress :: Addr# -> ByteString
1588 packAddress addr# = inlinePerformIO $ do
1589 p <- newForeignPtr_ cstr
1590 return $ PS p 0 (fromIntegral $ c_strlen cstr)
1591 where
1592 cstr = Ptr addr#
1593 {-# INLINE packAddress #-}
1594
1595 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1596 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1597 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1598 -- 'Addr\#' to the string, and the length of the string. Make sure the
1599 -- length is correct, otherwise use the safer 'packAddress' (where the
1600 -- length will be calculated once at runtime).
1601 unsafePackAddress :: Int -> Addr# -> ByteString
1602 unsafePackAddress len addr# = inlinePerformIO $ do
1603 p <- newForeignPtr_ cstr
1604 return $ PS p 0 len
1605 where cstr = Ptr addr#
1606
1607 #endif
1608
1609 -- | /O(1)/ Build a ByteString from a ForeignPtr
1610 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1611 fromForeignPtr fp l = PS fp 0 l
1612
1613 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1614 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1615 toForeignPtr (PS ps s l) = (ps, s, l)
1616
1617 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1618 -- current 'ByteString' from any larger string it was created from, as
1619 -- an 'Int'.
1620 skipIndex :: ByteString -> Int
1621 skipIndex (PS _ s _) = s
1622 {-# INLINE skipIndex #-}
1623
1624 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1625 -- finalizer associated to it. The ByteString length is calculated using
1626 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1627 packCString :: CString -> ByteString
1628 packCString cstr = inlinePerformIO $ do
1629 fp <- newForeignPtr_ (castPtr cstr)
1630 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1631
1632 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1633 -- have /no/ finalizer associated with it. This operation has /O(1)/
1634 -- complexity as we already know the final size, so no /strlen(3)/ is
1635 -- required.
1636 packCStringLen :: CStringLen -> ByteString
1637 packCStringLen (ptr,len) = inlinePerformIO $ do
1638 fp <- newForeignPtr_ (castPtr ptr)
1639 return $ PS fp 0 (fromIntegral len)
1640
1641 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1642 -- have a @free(3)@ finalizer associated to it.
1643 packMallocCString :: CString -> ByteString
1644 packMallocCString cstr = inlinePerformIO $ do
1645 fp <- newForeignFreePtr (castPtr cstr)
1646 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1647
1648 #if defined(__GLASGOW_HASKELL__)
1649 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1650 -- length, and an IO action representing a finalizer. This function is
1651 -- not available on Hugs.
1652 --
1653 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1654 packCStringFinalizer p l f = do
1655 fp <- FC.newForeignPtr p f
1656 return $ PS fp 0 l
1657
1658 -- | Explicitly run the finaliser associated with a 'ByteString'.
1659 -- Further references to this value may generate invalid memory
1660 -- references. This operation is unsafe, as there may be other
1661 -- 'ByteStrings' referring to the same underlying pages. If you use
1662 -- this, you need to have a proof of some kind that all 'ByteString's
1663 -- ever generated from the underlying byte array are no longer live.
1664 unsafeFinalize :: ByteString -> IO ()
1665 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1666
1667 #endif
1668
1669 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1670 -- The @CString@ should not be freed afterwards. This is a memcpy(3).
1671 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1672 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1673 where
1674 alloc = withForeignPtr ps $ \p -> do
1675 buf <- c_malloc (fromIntegral l+1)
1676 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1677 poke (buf `plusPtr` l) (0::Word8)
1678 return $ castPtr buf
1679
1680 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1681 -- Warning: modifying the @CString@ will affect the @ByteString@.
1682 -- Why is this function unsafe? It relies on the null byte at the end of
1683 -- the ByteString to be there. This is /not/ the case if your ByteString
1684 -- has been spliced from a larger string (i.e. with take or drop).
1685 -- Unless you can guarantee the null byte, you should use the safe
1686 -- version, which will copy the string first.
1687 --
1688 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1689 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1690
1691 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1692 -- This is mainly useful to allow the rest of the data pointed
1693 -- to by the 'ByteString' to be garbage collected, for example
1694 -- if a large string has been read in, and only a small part of it
1695 -- is needed in the rest of the program.
1696 copy :: ByteString -> ByteString
1697 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
1698 memcpy p (f `plusPtr` s) (fromIntegral l)
1699
1700 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1701 -- CString is going to be deallocated from C land.
1702 copyCString :: CString -> ByteString
1703 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1704
1705 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1706 copyCStringLen :: CStringLen -> ByteString
1707 copyCStringLen (cstr, len) = inlinePerformIO $ do
1708 fp <- mallocForeignPtrArray (len+1)
1709 withForeignPtr fp $ \p -> do
1710 memcpy p (castPtr cstr) (fromIntegral len)
1711 poke (p `plusPtr` len) (0 :: Word8)
1712 return $! PS fp 0 len
1713
1714 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1715 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1716 -- This is analogous to unsafeUseAsCString, and comes with the same
1717 -- safety requirements.
1718 --
1719 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1720 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1721
1722 -- | Given the maximum size needed and a function to make the contents
1723 -- of a ByteString, generate makes the 'ByteString'. The generating
1724 -- function is required to return the actual final size (<= the maximum
1725 -- size), and the resulting byte array is realloced to this size. The
1726 -- string is padded at the end with a null byte.
1727 --
1728 -- generate is the main mechanism for creating custom, efficient
1729 -- ByteString functions, using Haskell or C functions to fill the space.
1730 --
1731 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1732 generate i f = do
1733 fp <- mallocByteString i
1734 (ptr,n) <- withForeignPtr fp $ \p -> do
1735 i' <- f p
1736 if i' == i
1737 then return (fp,i')
1738 else do fp_ <- mallocByteString i' -- realloc
1739 withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
1740 return (fp_,i')
1741 return (PS ptr 0 n)
1742
1743 {-
1744 --
1745 -- On the C malloc heap. Less fun.
1746 --
1747 generate i f = do
1748 p <- mallocArray (i+1)
1749 i' <- f p
1750 p' <- reallocArray p (i'+1)
1751 poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
1752 fp <- newForeignFreePtr p'
1753 return $ PS fp 0 i'
1754 -}
1755
1756 -- ---------------------------------------------------------------------
1757 -- line IO
1758
1759 #if defined(__GLASGOW_HASKELL__)
1760
1761 -- | getLine, read a line from stdin.
1762 getLine :: IO ByteString
1763 getLine = hGetLine stdin
1764
1765 -- | hGetLine. read a ByteString from a handle
1766 hGetLine :: Handle -> IO ByteString
1767 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1768 case haBufferMode handle_ of
1769 NoBuffering -> error "no buffering"
1770 _other -> hGetLineBuffered handle_
1771
1772 where
1773 hGetLineBuffered handle_ = do
1774 let ref = haBuffer handle_
1775 buf <- readIORef ref
1776 hGetLineBufferedLoop handle_ ref buf 0 []
1777
1778 hGetLineBufferedLoop handle_ ref
1779 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1780 len `seq` do
1781 off <- findEOL r w raw
1782 let new_len = len + off - r
1783 xs <- mkPS raw r off
1784
1785 -- if eol == True, then off is the offset of the '\n'
1786 -- otherwise off == w and the buffer is now empty.
1787 if off /= w
1788 then do if (w == off + 1)
1789 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1790 else writeIORef ref buf{ bufRPtr = off + 1 }
1791 mkBigPS new_len (xs:xss)
1792 else do
1793 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1794 buf{ bufWPtr=0, bufRPtr=0 }
1795 case maybe_buf of
1796 -- Nothing indicates we caught an EOF, and we may have a
1797 -- partial line to return.
1798 Nothing -> do
1799 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1800 if new_len > 0
1801 then mkBigPS new_len (xs:xss)
1802 else ioe_EOF
1803 Just new_buf ->
1804 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1805
1806 -- find the end-of-line character, if there is one
1807 findEOL r w raw
1808 | r == w = return w
1809 | otherwise = do
1810 (c,r') <- readCharFromBuffer raw r
1811 if c == '\n'
1812 then return r -- NB. not r': don't include the '\n'
1813 else findEOL r' w raw
1814
1815 maybeFillReadBuffer fd is_line is_stream buf = catch
1816 (do buf' <- fillReadBuffer fd is_line is_stream buf
1817 return (Just buf'))
1818 (\e -> if isEOFError e then return Nothing else ioError e)
1819
1820 -- TODO, rewrite to use normal memcpy
1821 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1822 mkPS buf start end = do
1823 let len = end - start
1824 fp <- mallocByteString len
1825 withForeignPtr fp $ \p -> do
1826 memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1827 return (PS fp 0 len)
1828
1829 mkBigPS :: Int -> [ByteString] -> IO ByteString
1830 mkBigPS _ [ps] = return ps
1831 mkBigPS _ pss = return $! concat (P.reverse pss)
1832
1833 #endif
1834
1835 -- ---------------------------------------------------------------------
1836 -- Block IO
1837
1838 -- | Outputs a 'ByteString' to the specified 'Handle'.
1839 hPut :: Handle -> ByteString -> IO ()
1840 hPut _ (PS _ _ 0) = return ()
1841 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1842 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1843
1844 -- | Write a ByteString to stdout
1845 putStr :: ByteString -> IO ()
1846 putStr = hPut stdout
1847
1848 -- | Write a ByteString to stdout, appending a newline byte
1849 putStrLn :: ByteString -> IO ()
1850 putStrLn ps = hPut stdout ps >> hPut stdout nl
1851 where nl = packByte 0x0a
1852
1853 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1854 -- is far more efficient than reading the characters into a 'String'
1855 -- and then using 'pack'.
1856 hGet :: Handle -> Int -> IO ByteString
1857 hGet _ 0 = return empty
1858 hGet h i = do fp <- mallocByteString i
1859 l <- withForeignPtr fp $ \p-> hGetBuf h p i
1860 return $ PS fp 0 l
1861
1862 #if defined(__GLASGOW_HASKELL__)
1863 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1864 -- waiting for data to become available, instead it returns only whatever data
1865 -- is available.
1866 hGetNonBlocking :: Handle -> Int -> IO ByteString
1867 hGetNonBlocking _ 0 = return empty
1868 hGetNonBlocking h i = do
1869 fp <- mallocByteString i
1870 l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1871 return $ PS fp 0 l
1872 #endif
1873
1874 -- | Read entire handle contents into a 'ByteString'.
1875 --
1876 -- As with 'hGet', the string representation in the file is assumed to
1877 -- be ISO-8859-1.
1878 --
1879 hGetContents :: Handle -> IO ByteString
1880 hGetContents h = do
1881 let start_size = 1024
1882 p <- mallocArray start_size
1883 i <- hGetBuf h p start_size
1884 if i < start_size
1885 then do p' <- reallocArray p i
1886 fp <- newForeignFreePtr p'
1887 return $ PS fp 0 i
1888 else f p start_size
1889 where
1890 f p s = do
1891 let s' = 2 * s
1892 p' <- reallocArray p s'
1893 i <- hGetBuf h (p' `plusPtr` s) s
1894 if i < s
1895 then do let i' = s + i
1896 p'' <- reallocArray p' i'
1897 fp <- newForeignFreePtr p''
1898 return $ PS fp 0 i'
1899 else f p' s'
1900
1901 -- | getContents. Equivalent to hGetContents stdin
1902 getContents :: IO ByteString
1903 getContents = hGetContents stdin
1904
1905 -- | Read an entire file strictly into a 'ByteString'. This is far more
1906 -- efficient than reading the characters into a 'String' and then using
1907 -- 'pack'. It also may be more efficient than opening the file and
1908 -- reading it using hGet.
1909 readFile :: FilePath -> IO ByteString
1910 readFile f = do
1911 h <- openBinaryFile f ReadMode
1912 l <- hFileSize h
1913 s <- hGet h $ fromIntegral l
1914 hClose h
1915 return s
1916
1917 -- | Write a 'ByteString' to a file.
1918 writeFile :: FilePath -> ByteString -> IO ()
1919 writeFile f ps = bracket (openBinaryFile f WriteMode) hClose
1920 (\h -> hPut h ps)
1921
1922 {-
1923 --
1924 -- Disable until we can move it into a portable .hsc file
1925 --
1926
1927 -- | Like readFile, this reads an entire file directly into a
1928 -- 'ByteString', but it is even more efficient. It involves directly
1929 -- mapping the file to memory. This has the advantage that the contents
1930 -- of the file never need to be copied. Also, under memory pressure the
1931 -- page may simply be discarded, while in the case of readFile it would
1932 -- need to be written to swap. If you read many small files, mmapFile
1933 -- will be less memory-efficient than readFile, since each mmapFile
1934 -- takes up a separate page of memory. Also, you can run into bus
1935 -- errors if the file is modified. As with 'readFile', the string
1936 -- representation in the file is assumed to be ISO-8859-1.
1937 --
1938 -- On systems without mmap, this is the same as a readFile.
1939 --
1940 mmapFile :: FilePath -> IO ByteString
1941 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
1942
1943 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
1944 mmap f = do
1945 h <- openBinaryFile f ReadMode
1946 l <- fromIntegral `fmap` hFileSize h
1947 -- Don't bother mmaping small files because each mmapped file takes up
1948 -- at least one full VM block.
1949 if l < mmap_limit
1950 then do thefp <- mallocByteString l
1951 withForeignPtr thefp $ \p-> hGetBuf h p l
1952 hClose h
1953 return (thefp, l)
1954 else do
1955 -- unix only :(
1956 fd <- fromIntegral `fmap` handleToFd h
1957 p <- my_mmap l fd
1958 fp <- if p == nullPtr
1959 then do thefp <- mallocByteString l
1960 withForeignPtr thefp $ \p' -> hGetBuf h p' l
1961 return thefp
1962 else do
1963 -- The munmap leads to crashes on OpenBSD.
1964 -- maybe there's a use after unmap in there somewhere?
1965 #if !defined(__OpenBSD__)
1966 let unmap = c_munmap p l >> return ()
1967 #else
1968 let unmap = return ()
1969 #endif
1970 fp <- FC.newForeignPtr p unmap
1971 return fp
1972 c_close fd
1973 hClose h
1974 return (fp, l)
1975 where mmap_limit = 16*1024
1976 -}
1977
1978 #if defined(__GLASGOW_HASKELL__)
1979 --
1980 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
1981 --
1982 getArgs :: IO [ByteString]
1983 getArgs =
1984 alloca $ \ p_argc ->
1985 alloca $ \ p_argv -> do
1986 getProgArgv p_argc p_argv
1987 p <- fromIntegral `fmap` peek p_argc
1988 argv <- peek p_argv
1989 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
1990 #endif
1991
1992 -- ---------------------------------------------------------------------
1993 -- Internal utilities
1994
1995 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
1996 -- silently truncate to 8 bits Chars > '\255'. They are provided as
1997 -- convenience for ByteString construction.
1998 w2c :: Word8 -> Char
1999 #if !defined(__GLASGOW_HASKELL__)
2000 w2c = chr . fromIntegral
2001 #else
2002 w2c = unsafeChr . fromIntegral
2003 #endif
2004 {-# INLINE w2c #-}
2005
2006 c2w :: Char -> Word8
2007 c2w = fromIntegral . ord
2008 {-# INLINE c2w #-}
2009
2010 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
2011 -- is padded with a null byte.
2012 mallocByteString :: Int -> IO (ForeignPtr Word8)
2013 mallocByteString l = do
2014 fp <- mallocForeignPtrArray (l+1)
2015 withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
2016 return fp
2017
2018 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
2019 -- argument gives the final size of the ByteString. Unlike 'generate'
2020 -- the ByteString is not reallocated if the final size is less than the
2021 -- estimated size. Also, unlike 'generate' ByteString's created this way
2022 -- are managed on the Haskell heap.
2023 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
2024 create l write_ptr = inlinePerformIO $ do
2025 fp <- mallocByteString (l+1)
2026 withForeignPtr fp $ \p -> write_ptr p
2027 return $ PS fp 0 l
2028 {-# INLINE create #-}
2029
2030 -- | Perform an operation with a temporary ByteString
2031 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
2032 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
2033 {-# INLINE withPtr #-}
2034
2035 -- Common up near identical calls to `error' to reduce the number
2036 -- constant strings created when compiled:
2037 errorEmptyList :: String -> a
2038 errorEmptyList fun = moduleError fun "empty ByteString"
2039 {-# NOINLINE errorEmptyList #-}
2040
2041 moduleError :: String -> String -> a
2042 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2043 {-# NOINLINE moduleError #-}
2044
2045 -- 'findIndexOrEnd' is a variant of findIndex, that returns the length
2046 -- of the string if no element is found, rather than Nothing.
2047 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
2048 STRICT2(findIndexOrEnd)
2049 findIndexOrEnd f ps
2050 | null ps = 0
2051 | f (unsafeHead ps) = 0
2052 | otherwise = 1 + findIndexOrEnd f (unsafeTail ps)
2053 {-# INLINE findIndexOrEnd #-}
2054
2055 -- Find from the end of the string using predicate
2056 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2057 STRICT2(findFromEndUntil)
2058 findFromEndUntil f ps@(PS x s l) =
2059 if null ps then 0
2060 else if f (last ps) then l
2061 else findFromEndUntil f (PS x s (l-1))
2062
2063 -- Just like inlinePerformIO, but we inline it. Big performance gains as
2064 -- it exposes lots of things to further inlining
2065 --
2066 {-# INLINE inlinePerformIO #-}
2067 inlinePerformIO :: IO a -> a
2068 #if defined(__GLASGOW_HASKELL__)
2069 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
2070 #else
2071 inlinePerformIO = unsafePerformIO
2072 #endif
2073
2074 {-# INLINE newForeignFreePtr #-}
2075 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2076 #if defined(__GLASGOW_HASKELL__)
2077 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2078 #else
2079 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2080 #endif
2081
2082 -- ---------------------------------------------------------------------
2083 --
2084 -- Standard C functions
2085 --
2086
2087 foreign import ccall unsafe "string.h strlen" c_strlen
2088 :: CString -> CInt
2089
2090 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2091 :: CInt -> IO (Ptr Word8)
2092
2093 foreign import ccall unsafe "static stdlib.h free" c_free
2094 :: Ptr Word8 -> IO ()
2095
2096 #if !defined(__GLASGOW_HASKELL__)
2097 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2098 :: FunPtr (Ptr Word8 -> IO ())
2099 #endif
2100
2101 foreign import ccall unsafe "string.h memset" memset
2102 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2103
2104 foreign import ccall unsafe "string.h memchr" memchr
2105 :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2106
2107 foreign import ccall unsafe "string.h memcmp" memcmp
2108 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
2109
2110 foreign import ccall unsafe "string.h memcpy" memcpy
2111 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
2112
2113 -- ---------------------------------------------------------------------
2114 --
2115 -- Uses our C code
2116 --
2117
2118 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2119 :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
2120
2121 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2122 :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
2123
2124 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2125 :: Ptr Word8 -> CInt -> Word8
2126
2127 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2128 :: Ptr Word8 -> CInt -> Word8
2129
2130 foreign import ccall unsafe "static fpstring.h count" c_count
2131 :: Ptr Word8 -> CInt -> Word8 -> CInt
2132
2133 -- ---------------------------------------------------------------------
2134 -- MMap
2135
2136 {-
2137 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2138 :: Int -> Int -> IO (Ptr Word8)
2139
2140 foreign import ccall unsafe "static unistd.h close" c_close
2141 :: Int -> IO Int
2142
2143 # if !defined(__OpenBSD__)
2144 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2145 :: Ptr Word8 -> Int -> IO Int
2146 # endif
2147 -}
2148
2149 -- ---------------------------------------------------------------------
2150 -- Internal GHC Haskell magic
2151
2152 #if defined(__GLASGOW_HASKELL__)
2153 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2154 getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2155
2156 foreign import ccall unsafe "__hscore_memcpy_src_off"
2157 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
2158 #endif
2159
2160 -- ---------------------------------------------------------------------
2161 --
2162 -- Functional array fusion for ByteStrings.
2163 --
2164 -- From the Data Parallel Haskell project,
2165 -- http://www.cse.unsw.edu.au/~chak/project/dph/
2166 --
2167
2168 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
2169 -- the fact that no bottoms of this type are ever constructed; hence, we can
2170 -- assume @(_ :: NoAL) `seq` x = x@.
2171 --
2172 data NoAL = NoAL
2173
2174 -- | Special forms of loop arguments
2175 --
2176 -- * These are common special cases for the three function arguments of gen
2177 -- and loop; we give them special names to make it easier to trigger RULES
2178 -- applying in the special cases represented by these arguments. The
2179 -- "INLINE [1]" makes sure that these functions are only inlined in the last
2180 -- two simplifier phases.
2181 --
2182 -- * In the case where the accumulator is not needed, it is better to always
2183 -- explicitly return a value `()', rather than just copy the input to the
2184 -- output, as the former gives GHC better local information.
2185 --
2186
2187 -- | Element function expressing a mapping only
2188 mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2189 mapEFL f = \_ e -> (noAL, (Just $ f e))
2190 #if defined(__GLASGOW_HASKELL__)
2191 {-# INLINE [1] mapEFL #-}
2192 #endif
2193
2194 -- | Element function implementing a filter function only
2195 filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2196 filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
2197 #if defined(__GLASGOW_HASKELL__)
2198 {-# INLINE [1] filterEFL #-}
2199 #endif
2200
2201 -- |Element function expressing a reduction only
2202 foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2203 foldEFL f = \a e -> (f a e, Nothing)
2204 #if defined(__GLASGOW_HASKELL__)
2205 {-# INLINE [1] foldEFL #-}
2206 #endif
2207
2208 -- | A strict foldEFL.
2209 foldEFL' :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2210 foldEFL' f = \a e -> let a' = f a e in a' `seq` (a', Nothing)
2211 #if defined(__GLASGOW_HASKELL__)
2212 {-# INLINE [1] foldEFL' #-}
2213 #endif
2214
2215 -- | No accumulator
2216 noAL :: NoAL
2217 noAL = NoAL
2218 #if defined(__GLASGOW_HASKELL__)
2219 {-# INLINE [1] noAL #-}
2220 #endif
2221
2222 -- | Projection functions that are fusion friendly (as in, we determine when
2223 -- they are inlined)
2224 loopArr :: (ByteString, acc) -> ByteString
2225 loopArr (arr, _) = arr
2226 #if defined(__GLASGOW_HASKELL__)
2227 {-# INLINE [1] loopArr #-}
2228 #endif
2229
2230 loopAcc :: (ByteString, acc) -> acc
2231 loopAcc (_, acc) = acc
2232 #if defined(__GLASGOW_HASKELL__)
2233 {-# INLINE [1] loopAcc #-}
2234 #endif
2235
2236 loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
2237 loopSndAcc (arr, (_, acc)) = (arr, acc)
2238 #if defined(__GLASGOW_HASKELL__)
2239 {-# INLINE [1] loopSndAcc #-}
2240 #endif
2241
2242 ------------------------------------------------------------------------
2243
2244 --
2245 -- size, and then percentage.
2246 --
2247
2248 -- | Iteration over over ByteStrings
2249 loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -- ^ mapping & folding, once per elem
2250 -> acc -- ^ initial acc value
2251 -> ByteString -- ^ input ByteString
2252 -> (ByteString, acc)
2253
2254 loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do
2255 fp <- mallocByteString i
2256 (ptr,n,acc) <- withForeignPtr fp $ \p -> do
2257 (acc, i') <- go (a `plusPtr` s) p start
2258 if i' == i
2259 then return (fp,i',acc) -- no realloc for map
2260 else do fp_ <- mallocByteString i' -- realloc
2261 withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
2262 return (fp_,i',acc)
2263
2264 return (PS ptr 0 n, acc)
2265 where
2266 go p ma = trans 0 0
2267 where
2268 STRICT3(trans)
2269 trans a_off ma_off acc
2270 | a_off >= i = return (acc, ma_off)
2271 | otherwise = do
2272 x <- peekByteOff p a_off
2273 let (acc', oe) = f acc x
2274 ma_off' <- case oe of
2275 Nothing -> return ma_off
2276 Just e -> do pokeByteOff ma ma_off e
2277 return $ ma_off + 1
2278 trans (a_off+1) ma_off' acc'
2279
2280 #if defined(__GLASGOW_HASKELL__)
2281 {-# INLINE [1] loopU #-}
2282 #endif
2283
2284 infixr 9 `fuseEFL`
2285
2286 -- |Fuse to flat loop functions
2287 fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8))
2288 -> (a2 -> Word8 -> (a2, Maybe Word8))
2289 -> (a1, a2)
2290 -> Word8
2291 -> ((a1, a2), Maybe Word8)
2292 fuseEFL f g (acc1, acc2) e1 =
2293 case f acc1 e1 of
2294 (acc1', Nothing) -> ((acc1', acc2), Nothing)
2295 (acc1', Just e2) ->
2296 case g acc2 e2 of
2297 (acc2', res) -> ((acc1', acc2'), res)
2298
2299 {-# RULES
2300
2301 "loop/loop fusion!" forall em1 em2 start1 start2 arr.
2302 loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
2303 loopSndAcc (loopU (em1 `fuseEFL` em2) (start1, start2) arr)
2304
2305 "loopArr/loopSndAcc" forall x.
2306 loopArr (loopSndAcc x) = loopArr x
2307
2308 "seq/NoAL" forall (u::NoAL) e.
2309 u `seq` e = e
2310
2311 #-}
2312