Sync with FPS head, including the following patches:
[packages/pretty.git] / Data / ByteString.hs
1 {-# OPTIONS_GHC -cpp -fffi -fglasgow-exts #-}
2 --
3 -- Module : ByteString
4 -- Copyright : (c) The University of Glasgow 2001,
5 -- (c) David Roundy 2003-2005,
6 -- (c) Simon Marlow 2005
7 -- (c) Don Stewart 2005-2006
8 -- (c) Bjorn Bringert 2006
9 --
10 -- Array fusion code:
11 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
12 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy
13 --
14 -- License : BSD-style
15 --
16 -- Maintainer : dons@cse.unsw.edu.au
17 -- Stability : experimental
18 -- Portability : portable, requires ffi and cpp
19 -- Tested with : GHC 6.4.1 and Hugs March 2005
20 --
21
22 --
23 -- | A time and space-efficient implementation of byte vectors using
24 -- packed Word8 arrays, suitable for high performance use, both in terms
25 -- of large data quantities, or high speed requirements. Byte vectors
26 -- are encoded as strict Word8 arrays of bytes, held in a ForeignPtr,
27 -- and can be passed between C and Haskell with little effort.
28 --
29 -- This module is intended to be imported @qualified@, to avoid name
30 -- clashes with Prelude functions. eg.
31 --
32 -- > import qualified Data.ByteString as B
33 --
34 -- Original GHC implementation by Bryan O\'Sullivan. Rewritten to use
35 -- UArray by Simon Marlow. Rewritten to support slices and use
36 -- ForeignPtr by David Roundy. Polished and extended by Don Stewart.
37 --
38
39 module Data.ByteString (
40
41 -- * The @ByteString@ type
42 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
43
44 -- * Introducing and eliminating 'ByteString's
45 empty, -- :: ByteString
46 packByte, -- :: Word8 -> ByteString
47 pack, -- :: [Word8] -> ByteString
48 unpack, -- :: ByteString -> [Word8]
49 packWith, -- :: (a -> Word8) -> [a] -> ByteString
50 unpackWith, -- :: (Word8 -> a) -> ByteString -> [a]
51
52 -- * Basic interface
53 cons, -- :: Word8 -> ByteString -> ByteString
54 snoc, -- :: ByteString -> Word8 -> ByteString
55 null, -- :: ByteString -> Bool
56 length, -- :: ByteString -> Int
57 head, -- :: ByteString -> Word8
58 tail, -- :: ByteString -> ByteString
59 last, -- :: ByteString -> Word8
60 init, -- :: ByteString -> ByteString
61 append, -- :: ByteString -> ByteString -> ByteString
62
63 -- * Special ByteStrings
64 inits, -- :: ByteString -> [ByteString]
65 tails, -- :: ByteString -> [ByteString]
66 elems, -- :: ByteString -> [ByteString]
67
68 -- * Transformating ByteStrings
69 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
70 reverse, -- :: ByteString -> ByteString
71 intersperse, -- :: Word8 -> ByteString -> ByteString
72 transpose, -- :: [ByteString] -> [ByteString]
73 map', -- :: (Word8 -> Word8) -> ByteString -> ByteString
74
75 -- * Reducing 'ByteString's
76 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
77 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
78 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
79 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
80 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
81 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
82
83 -- ** Special folds
84 concat, -- :: [ByteString] -> ByteString
85 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
86 any, -- :: (Word8 -> Bool) -> ByteString -> Bool
87 all, -- :: (Word8 -> Bool) -> ByteString -> Bool
88 maximum, -- :: ByteString -> Word8
89 minimum, -- :: ByteString -> Word8
90 mapIndexed, -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
91
92 -- * Building ByteStrings
93 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
94 scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
95
96 -- * Generating and unfolding ByteStrings
97 replicate, -- :: Int -> Word8 -> ByteString
98 unfoldrN, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
99
100 -- * Substrings
101
102 -- ** Breaking strings
103 take, -- :: Int -> ByteString -> ByteString
104 unsafeTake, -- :: Int -> ByteString -> ByteString
105 drop, -- :: Int -> ByteString -> ByteString
106 unsafeDrop, -- :: Int -> ByteString -> ByteString
107 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
108 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
109 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
110 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
111 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
112 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
113
114 -- ** Breaking and dropping on specific bytes
115 breakByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
116 spanByte, -- :: Word8 -> ByteString -> (ByteString, ByteString)
117 breakFirst, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
118 breakLast, -- :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
119
120 -- ** Breaking into many substrings
121 split, -- :: Word8 -> ByteString -> [ByteString]
122 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
123 tokens, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
124 group, -- :: ByteString -> [ByteString]
125 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
126
127 -- ** Joining strings
128 join, -- :: ByteString -> [ByteString] -> ByteString
129 joinWithByte, -- :: Word8 -> ByteString -> ByteString -> ByteString
130
131 -- * Indexing ByteStrings
132 index, -- :: ByteString -> Int -> Word8
133 elemIndex, -- :: Word8 -> ByteString -> Maybe Int
134 elemIndices, -- :: Word8 -> ByteString -> [Int]
135 elemIndexLast, -- :: Word8 -> ByteString -> Maybe Int
136 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
137 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
138 count, -- :: Word8 -> ByteString -> Int
139 findIndexOrEnd, -- :: (Word8 -> Bool) -> ByteString -> Int
140
141 -- * Ordered ByteStrings
142 sort, -- :: ByteString -> ByteString
143
144 -- * Searching ByteStrings
145
146 -- ** Searching by equality
147 -- | These functions use memchr(3) to efficiently search the ByteString
148
149 elem, -- :: Word8 -> ByteString -> Bool
150 notElem, -- :: Word8 -> ByteString -> Bool
151 filterByte, -- :: Word8 -> ByteString -> ByteString
152 filterNotByte, -- :: Word8 -> ByteString -> ByteString
153
154 -- ** Searching with a predicate
155 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
156 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
157 filter', -- :: (Word8 -> Bool) -> ByteString -> ByteString
158
159 -- ** Prefixes and suffixes
160 -- | These functions use memcmp(3) to efficiently compare substrings
161 isPrefixOf, -- :: ByteString -> ByteString -> Bool
162 isSuffixOf, -- :: ByteString -> ByteString -> Bool
163
164 -- ** Search for arbitrary substrings
165 isSubstringOf, -- :: ByteString -> ByteString -> Bool
166 findSubstring, -- :: ByteString -> ByteString -> Maybe Int
167 findSubstrings, -- :: ByteString -> ByteString -> [Int]
168
169 -- * Zipping and unzipping ByteStrings
170 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
171 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
172 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
173
174 -- * Unchecked access
175 unsafeHead, -- :: ByteString -> Word8
176 unsafeTail, -- :: ByteString -> ByteString
177 unsafeIndex, -- :: ByteString -> Int -> Word8
178
179 -- * Low level introduction and elimination
180 generate, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
181 create, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
182 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> ByteString
183 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
184 skipIndex, -- :: ByteString -> Int
185
186 -- ** Packing CStrings and pointers
187 packCString, -- :: CString -> ByteString
188 packCStringLen, -- :: CString -> ByteString
189 packMallocCString, -- :: CString -> ByteString
190
191 #if defined(__GLASGOW_HASKELL__)
192 packCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
193 packAddress, -- :: Addr# -> ByteString
194 unsafePackAddress, -- :: Int -> Addr# -> ByteString
195 unsafeFinalize, -- :: ByteString -> IO ()
196 #endif
197
198 -- ** Using ByteStrings as CStrings
199 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
200 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
201 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
202
203 -- ** Copying ByteStrings
204 -- | These functions perform memcpy(3) operations
205 copy, -- :: ByteString -> ByteString
206 copyCString, -- :: CString -> ByteString
207 copyCStringLen, -- :: CStringLen -> ByteString
208
209 -- * I\/O with @ByteString@s
210
211 -- ** Standard input and output
212
213 #if defined(__GLASGOW_HASKELL__)
214 getLine, -- :: IO ByteString
215 #endif
216 getContents, -- :: IO ByteString
217 putStr, -- :: ByteString -> IO ()
218 putStrLn, -- :: ByteString -> IO ()
219
220 -- ** Files
221 readFile, -- :: FilePath -> IO ByteString
222 writeFile, -- :: FilePath -> ByteString -> IO ()
223 -- mmapFile, -- :: FilePath -> IO ByteString
224
225 -- ** I\/O with Handles
226 #if defined(__GLASGOW_HASKELL__)
227 getArgs, -- :: IO [ByteString]
228 hGetLine, -- :: Handle -> IO ByteString
229 hGetLines, -- :: Handle -> IO [ByteString]
230 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
231 #endif
232 hGetContents, -- :: Handle -> IO ByteString
233 hGet, -- :: Handle -> Int -> IO ByteString
234 hPut, -- :: Handle -> ByteString -> IO ()
235
236 -- * Fusion utilities
237 #if defined(__GLASGOW_HASKELL__)
238 unpackList, -- eek, otherwise it gets thrown away by the simplifier
239 #endif
240
241 noAL, NoAL, loopArr, loopAcc, loopSndAcc,
242 loopU, mapEFL, filterEFL, foldEFL, foldEFL', fuseEFL, scanEFL,
243
244 ) where
245
246 import qualified Prelude as P
247 import Prelude hiding (reverse,head,tail,last,init,null
248 ,length,map,lines,foldl,foldr,unlines
249 ,concat,any,take,drop,splitAt,takeWhile
250 ,dropWhile,span,break,elem,filter,maximum
251 ,minimum,all,concatMap,foldl1,foldr1
252 ,scanl,scanl1,readFile,writeFile,replicate
253 ,getContents,getLine,putStr,putStrLn
254 ,zip,zipWith,unzip,notElem)
255
256 import qualified Data.List as List
257
258 import Data.Char
259 import Data.Word (Word8)
260 import Data.Maybe (listToMaybe)
261 import Data.Array (listArray)
262 import qualified Data.Array as Array ((!))
263
264 -- Control.Exception.bracket not available in yhc or nhc
265 import Control.Exception (bracket, assert)
266 import Control.Monad (when)
267
268 import Foreign.C.String (CString, CStringLen)
269 import Foreign.C.Types (CSize,CInt)
270 import Foreign.ForeignPtr
271 import Foreign.Marshal.Array
272 import Foreign.Ptr
273 import Foreign.Storable (Storable(..))
274
275 -- hGetBuf and hPutBuf not available in yhc or nhc
276 import System.IO (stdin,stdout,hClose,hFileSize
277 ,hGetBuf,hPutBuf,openBinaryFile
278 ,Handle,IOMode(..))
279
280 #if !defined(__GLASGOW_HASKELL__)
281 import System.IO.Unsafe
282 #endif
283
284 #if defined(__GLASGOW_HASKELL__)
285
286 import Data.Generics (Data(..), Typeable(..))
287
288 import System.IO (hGetBufNonBlocking)
289 import System.IO.Error (isEOFError)
290
291 import Foreign.Marshal (alloca)
292 import qualified Foreign.Concurrent as FC (newForeignPtr)
293
294 import GHC.Handle
295 import GHC.Prim (realWorld#, Addr#, Word#, (+#), writeWord8OffAddr#)
296 import GHC.Base (build, unsafeChr)
297 import GHC.Word hiding (Word8)
298 import GHC.Ptr (Ptr(..))
299 import GHC.ST (ST(..))
300 import GHC.IOBase
301
302 #endif
303
304 -- CFILES stuff is Hugs only
305 {-# CFILES cbits/fpstring.c #-}
306
307 -- -----------------------------------------------------------------------------
308 --
309 -- Useful macros, until we have bang patterns
310 --
311
312 #define STRICT1(f) f a | a `seq` False = undefined
313 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
314 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
315 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
316 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
317
318 -- -----------------------------------------------------------------------------
319
320 -- | A space-efficient representation of a Word8 vector, supporting many
321 -- efficient operations. A 'ByteString' contains 8-bit characters only.
322 --
323 -- Instances of Eq, Ord, Read, Show, Data, Typeable
324 --
325 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8)
326 {-# UNPACK #-} !Int
327 {-# UNPACK #-} !Int
328
329 #if defined(__GLASGOW_HASKELL__)
330 deriving (Data, Typeable)
331 #endif
332
333 instance Eq ByteString
334 where (==) = eq
335
336 instance Ord ByteString
337 where compare = compareBytes
338
339 instance Show ByteString where
340 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
341
342 instance Read ByteString where
343 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
344
345 {-
346 instance Arbitrary PackedString where
347 arbitrary = P.pack `fmap` arbitrary
348 coarbitrary s = coarbitrary (P.unpack s)
349 -}
350
351 -- | /O(n)/ Equality on the 'ByteString' type.
352 eq :: ByteString -> ByteString -> Bool
353 eq a@(PS p s l) b@(PS p' s' l')
354 | l /= l' = False -- short cut on length
355 | p == p' && s == s' = True -- short cut for the same string
356 | otherwise = compareBytes a b == EQ
357 {-# INLINE eq #-}
358
359 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices.
360 compareBytes :: ByteString -> ByteString -> Ordering
361 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
362 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings
363 | x1 == x2 && s1 == s2 && l1 == l2 = EQ -- short cut for the same string
364 | otherwise = inlinePerformIO $
365 withForeignPtr x1 $ \p1 ->
366 withForeignPtr x2 $ \p2 -> do
367 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
368 return $ case i `compare` 0 of
369 EQ -> l1 `compare` l2
370 x -> x
371 {-# INLINE compareBytes #-}
372
373 {-
374 --
375 -- About 4x slower over 32M
376 --
377 compareBytes :: ByteString -> ByteString -> Ordering
378 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) = inlinePerformIO $
379 withForeignPtr fp1 $ \p1 ->
380 withForeignPtr fp2 $ \p2 ->
381 cmp (p1 `plusPtr` off1)
382 (p2 `plusPtr` off2) 0 len1 len2
383
384 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
385 STRICT5(cmp)
386 cmp p1 p2 n len1 len2
387 | n == len1 = if n == len2 then return EQ else return LT
388 | n == len2 = return GT
389 | otherwise = do
390 (a :: Word8) <- peekByteOff p1 n
391 (b :: Word8) <- peekByteOff p2 n
392 case a `compare` b of
393 EQ -> cmp p1 p2 (n+1) len1 len2
394 LT -> return LT
395 GT -> return GT
396 {-# INLINE compareBytes #-}
397 -}
398
399 -- -----------------------------------------------------------------------------
400 -- Introducing and eliminating 'ByteString's
401
402 -- | /O(1)/ The empty 'ByteString'
403 empty :: ByteString
404 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0
405 {-# NOINLINE empty #-}
406
407 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
408 packByte :: Word8 -> ByteString
409 packByte c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do
410 withForeignPtr fp $ \p -> poke p c
411 return $ PS fp 0 1
412 {-# INLINE packByte #-}
413
414 --
415 -- XXX The unsafePerformIO is critical!
416 --
417 -- Otherwise:
418 --
419 -- packByte 255 `compare` packByte 127
420 --
421 -- is compiled to:
422 --
423 -- case mallocByteString 2 of
424 -- ForeignPtr f internals ->
425 -- case writeWord8OffAddr# f 0 255 of _ ->
426 -- case writeWord8OffAddr# f 0 127 of _ ->
427 -- case eqAddr# f f of
428 -- False -> case compare (GHC.Prim.plusAddr# f 0)
429 -- (GHC.Prim.plusAddr# f 0)
430 --
431 --
432
433 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
434 --
435 -- For applications with large numbers of string literals, pack can be a
436 -- bottleneck. In such cases, consider using packAddress (GHC only).
437 pack :: [Word8] -> ByteString
438
439 #if !defined(__GLASGOW_HASKELL__)
440
441 pack str = create (P.length str) $ \p -> go p str
442 where
443 go _ [] = return ()
444 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
445
446 #else /* hack away */
447
448 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
449 where
450 go _ _ [] = return ()
451 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
452
453 writeByte p i c = ST $ \s# ->
454 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
455
456 #endif
457
458 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
459 unpack :: ByteString -> [Word8]
460
461 #if !defined(__GLASGOW_HASKELL__)
462
463 unpack (PS _ _ 0) = []
464 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
465 go (p `plusPtr` s) (l - 1) []
466 where
467 STRICT3(go)
468 go p 0 acc = peek p >>= \e -> return (e : acc)
469 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
470 {-# INLINE unpack #-}
471
472 #else
473
474 unpack ps = build (unpackFoldr ps)
475 {-# INLINE unpack #-}
476
477 unpackList :: ByteString -> [Word8]
478 unpackList (PS fp off len) = withPtr fp $ \p -> do
479 let STRICT3(loop)
480 loop _ (-1) acc = return acc
481 loop q n acc = do
482 a <- peekByteOff q n
483 loop q (n-1) (a : acc)
484 loop (p `plusPtr` off) (len-1) []
485
486 {-# RULES
487 "unpack-list" [1] forall p . unpackFoldr p (:) [] = unpackList p
488 #-}
489
490 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
491 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
492 let STRICT3(loop)
493 loop _ (-1) acc = return acc
494 loop q n acc = do
495 a <- peekByteOff q n
496 loop q (n-1) (a `f` acc)
497 loop (p `plusPtr` off) (len-1) ch
498 {-# INLINE [0] unpackFoldr #-}
499
500 -- TODO just use normal foldr here.
501
502 #endif
503
504 ------------------------------------------------------------------------
505
506 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
507 -- conversion function
508 packWith :: (a -> Word8) -> [a] -> ByteString
509 packWith k str = create (P.length str) $ \p -> go p str
510 where
511 STRICT2(go)
512 go _ [] = return ()
513 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
514 {-# INLINE packWith #-}
515 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
516
517 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
518 unpackWith :: (Word8 -> a) -> ByteString -> [a]
519 unpackWith _ (PS _ _ 0) = []
520 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
521 go (p `plusPtr` s) (l - 1) []
522 where
523 STRICT3(go)
524 go p 0 acc = peek p >>= \e -> return (k e : acc)
525 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
526 {-# INLINE unpackWith #-}
527 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
528
529 -- ---------------------------------------------------------------------
530 -- Basic interface
531
532 -- | /O(1)/ Test whether a ByteString is empty.
533 null :: ByteString -> Bool
534 null (PS _ _ l) = l == 0
535 {-# INLINE null #-}
536
537 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
538 length :: ByteString -> Int
539 length (PS _ _ l) = l
540
541 #if defined(__GLASGOW_HASKELL__)
542 {-# INLINE [1] length #-}
543 #endif
544
545 {-#
546
547 -- Translate length into a loop.
548 -- Performace ok, but allocates too much, so disable for now.
549
550 "length/loop" forall f acc s .
551 length (loopArr (loopU f acc s)) = foldl' (const . (+1)) (0::Int) (loopArr (loopU f acc s))
552
553 #-}
554
555 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
556 -- complexity, as it requires a memcpy.
557 cons :: Word8 -> ByteString -> ByteString
558 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do
559 poke p c
560 memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
561 {-# INLINE cons #-}
562
563 -- todo fuse
564
565 -- | /O(n)/ Append a byte to the end of a 'ByteString'
566 snoc :: ByteString -> Word8 -> ByteString
567 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do
568 memcpy p (f `plusPtr` s) (fromIntegral l)
569 poke (p `plusPtr` l) c
570 {-# INLINE snoc #-}
571
572 -- todo fuse
573
574 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
575 head :: ByteString -> Word8
576 head ps@(PS x s _)
577 | null ps = errorEmptyList "head"
578 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
579 {-# INLINE head #-}
580
581 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
582 tail :: ByteString -> ByteString
583 tail (PS p s l)
584 | l <= 0 = errorEmptyList "tail"
585 | otherwise = PS p (s+1) (l-1)
586 {-# INLINE tail #-}
587
588 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
589 last :: ByteString -> Word8
590 last ps@(PS x s l)
591 | null ps = errorEmptyList "last"
592 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
593 {-# INLINE last #-}
594
595 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
596 init :: ByteString -> ByteString
597 init (PS p s l)
598 | l <= 0 = errorEmptyList "init"
599 | otherwise = PS p s (l-1)
600 {-# INLINE init #-}
601
602 -- | /O(n)/ Append two ByteStrings
603 append :: ByteString -> ByteString -> ByteString
604 append xs ys | null xs = ys
605 | null ys = xs
606 | otherwise = concat [xs,ys]
607 {-# INLINE append #-}
608
609 -- ---------------------------------------------------------------------
610 -- Transformations
611
612 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
613 -- element of @xs@. This function is subject to array fusion.
614 map :: (Word8 -> Word8) -> ByteString -> ByteString
615 map f = loopArr . loopU (mapEFL f) noAL
616 {-# INLINE map #-}
617
618 -- | /O(n)/ Like 'map', but not fuseable. The benefit is that it is
619 -- slightly faster for one-shot cases.
620 map' :: (Word8 -> Word8) -> ByteString -> ByteString
621 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
622 np <- mallocByteString (len+1)
623 withForeignPtr np $ \p -> do
624 map_ 0 (a `plusPtr` s) p
625 return (PS np 0 len)
626 where
627
628 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
629 STRICT3(map_)
630 map_ n p1 p2
631 | n >= len = return ()
632 | otherwise = do
633 x <- peekByteOff p1 n
634 pokeByteOff p2 n (f x)
635 map_ (n+1) p1 p2
636 {-# INLINE map' #-}
637
638 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
639 reverse :: ByteString -> ByteString
640 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
641 c_reverse p (f `plusPtr` s) (fromIntegral l)
642
643 {-
644 reverse = pack . P.reverse . unpack
645 -}
646
647 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
648 -- 'ByteString' and \`intersperses\' that byte between the elements of
649 -- the 'ByteString'. It is analogous to the intersperse function on
650 -- Lists.
651 intersperse :: Word8 -> ByteString -> ByteString
652 intersperse c ps@(PS x s l)
653 | length ps < 2 = ps
654 | otherwise = create (2*l-1) $ \p -> withForeignPtr x $ \f ->
655 c_intersperse p (f `plusPtr` s) (fromIntegral l) c
656
657 {-
658 intersperse c = pack . List.intersperse c . unpack
659 -}
660
661 -- | The 'transpose' function transposes the rows and columns of its
662 -- 'ByteString' argument.
663 transpose :: [ByteString] -> [ByteString]
664 transpose ps = P.map pack (List.transpose (P.map unpack ps))
665
666 -- ---------------------------------------------------------------------
667 -- Reducing 'ByteString's
668
669 -- | 'foldl', applied to a binary operator, a starting value (typically
670 -- the left-identity of the operator), and a ByteString, reduces the
671 -- ByteString using the binary operator, from left to right.
672 -- This function is subject to array fusion.
673 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
674 foldl f z = loopAcc . loopU (foldEFL f) z
675 {-# INLINE foldl #-}
676
677 {-
678 --
679 -- About twice as fast with 6.4.1, but not fuseable
680 -- A simple fold . map is enough to make it worth while.
681 --
682 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
683 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
684 where
685 STRICT3(lgo)
686 lgo z p q | p == q = return z
687 | otherwise = do c <- peek p
688 lgo (f z c) (p `plusPtr` 1) q
689 -}
690
691 -- | 'foldl\'' is like foldl, but strict in the accumulator.
692 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
693 foldl' f z = loopAcc . loopU (foldEFL' f) z
694 {-# INLINE foldl' #-}
695
696 -- | 'foldr', applied to a binary operator, a starting value
697 -- (typically the right-identity of the operator), and a ByteString,
698 -- reduces the ByteString using the binary operator, from right to left.
699 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
700 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
701 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
702 where
703 STRICT2(go)
704 go p q | p == q = return z
705 | otherwise = do c <- peek p
706 ws <- go (p `plusPtr` 1) q
707 return $ c `k` ws
708
709 -- | 'foldl1' is a variant of 'foldl' that has no starting value
710 -- argument, and thus must be applied to non-empty 'ByteStrings'.
711 -- This function is subject to array fusion.
712 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
713 foldl1 f ps
714 | null ps = errorEmptyList "foldl1"
715 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
716
717 -- | A strict version of 'foldl1'
718 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
719 foldl1' f ps
720 | null ps = errorEmptyList "foldl1'"
721 | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
722
723 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
724 -- and thus must be applied to non-empty 'ByteString's
725 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
726 foldr1 f ps
727 | null ps = errorEmptyList "foldr1"
728 | otherwise = foldr f (last ps) (init ps)
729
730 -- ---------------------------------------------------------------------
731 -- Special folds
732
733 -- | /O(n)/ Concatenate a list of ByteStrings.
734 concat :: [ByteString] -> ByteString
735 concat [] = empty
736 concat [ps] = ps
737 concat xs = create len $ \ptr -> go xs ptr
738 where len = P.sum . P.map length $ xs
739 STRICT2(go)
740 go [] _ = return ()
741 go (PS p s l:ps) ptr = do
742 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
743 go ps (ptr `plusPtr` l)
744
745 -- | Map a function over a 'ByteString' and concatenate the results
746 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
747 concatMap f = foldr (append . f) empty
748 -- A silly function for ByteStrings anyway.
749
750 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
751 -- any element of the 'ByteString' satisfies the predicate.
752 any :: (Word8 -> Bool) -> ByteString -> Bool
753 any _ (PS _ _ 0) = False
754 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
755 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
756 where
757 STRICT2(go)
758 go p q | p == q = return False
759 | otherwise = do c <- peek p
760 if f c then return True
761 else go (p `plusPtr` 1) q
762
763 -- todo fuse
764
765 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
766 -- if all elements of the 'ByteString' satisfy the predicate.
767 all :: (Word8 -> Bool) -> ByteString -> Bool
768 all _ (PS _ _ 0) = True
769 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
770 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
771 where
772 STRICT2(go)
773 go p q | p == q = return True -- end of list
774 | otherwise = do c <- peek p
775 if f c
776 then go (p `plusPtr` 1) q
777 else return False
778 -- todo fuse
779
780 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
781 maximum :: ByteString -> Word8
782 maximum xs@(PS x s l)
783 | null xs = errorEmptyList "maximum"
784 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
785 return $ c_maximum (p `plusPtr` s) (fromIntegral l)
786 {-# INLINE maximum #-}
787
788 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
789 minimum :: ByteString -> Word8
790 minimum xs@(PS x s l)
791 | null xs = errorEmptyList "minimum"
792 | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
793 return $ c_minimum (p `plusPtr` s) (fromIntegral l)
794 {-# INLINE minimum #-}
795
796 -- fusion is too slow here (10x)
797
798 {-
799 maximum xs@(PS x s l)
800 | null xs = errorEmptyList "maximum"
801 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
802 w <- peek p
803 maximum_ (p `plusPtr` s) 0 l w
804
805 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
806 STRICT4(maximum_)
807 maximum_ ptr n m c
808 | n >= m = return c
809 | otherwise = do w <- peekByteOff ptr n
810 maximum_ ptr (n+1) m (if w > c then w else c)
811
812 minimum xs@(PS x s l)
813 | null xs = errorEmptyList "minimum"
814 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do
815 w <- peek p
816 minimum_ (p `plusPtr` s) 0 l w
817
818 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8
819 STRICT4(minimum_)
820 minimum_ ptr n m c
821 | n >= m = return c
822 | otherwise = do w <- peekByteOff ptr n
823 minimum_ ptr (n+1) m (if w < c then w else c)
824 -}
825
826 -- | /O(n)/ map Word8 functions, provided with the index at each position
827 mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString
828 mapIndexed k (PS ps s l) = create l $ \p -> withForeignPtr ps $ \f ->
829 go 0 (f `plusPtr` s) p (f `plusPtr` s `plusPtr` l)
830 where
831 go :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
832 STRICT4(go)
833 go n f t p | f == p = return ()
834 | otherwise = do w <- peek f
835 ((poke t) . k n) w
836 go (n+1) (f `plusPtr` 1) (t `plusPtr` 1) p
837
838 -- ---------------------------------------------------------------------
839 -- Building ByteStrings
840
841 -- | 'scanl' is similar to 'foldl', but returns a list of successive
842 -- reduced values from the left. This function will fuse.
843 --
844 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
845 --
846 -- Note that
847 --
848 -- > last (scanl f z xs) == foldl f z xs.
849 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
850 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0) -- extra space
851 {-# INLINE scanl #-}
852
853 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
854 -- This function will fuse.
855 --
856 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
857 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
858 scanl1 f ps
859 | null ps = empty
860 | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
861 {-# INLINE scanl1 #-}
862
863 -- ---------------------------------------------------------------------
864 -- Unfolds and replicates
865
866 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
867 -- the value of every element. The following holds:
868 --
869 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
870 --
871 -- This implemenation uses @memset(3)@
872 replicate :: Int -> Word8 -> ByteString
873 replicate w c | w <= 0 = empty
874 | otherwise = create w $ \ptr -> memset ptr c (fromIntegral w) >> return ()
875
876 {-
877 -- About 5x slower
878 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w
879 where
880 STRICT2(go)
881 go _ 0 = return w
882 go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1)
883 -}
884
885 -- | /O(n)/ The 'unfoldrN' function is analogous to the List \'unfoldr\'.
886 -- 'unfoldrN' builds a ByteString from a seed value. The function takes
887 -- the element and returns 'Nothing' if it is done producing the
888 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
889 -- prepending to the ByteString and @b@ is used as the next element in a
890 -- recursive call.
891 --
892 -- To preven unfoldrN having /O(n^2)/ complexity (as prepending a
893 -- character to a ByteString is /O(n)/, this unfoldr requires a maximum
894 -- final size of the ByteString as an argument. 'cons' can then be
895 -- implemented in /O(1)/ (i.e. a 'poke'), and the unfoldr itself has
896 -- linear complexity. The depth of the recursion is limited to this
897 -- size, but may be less. For lazy, infinite unfoldr, use
898 -- 'Data.List.unfoldr' (from 'Data.List').
899 --
900 -- Examples:
901 --
902 -- > unfoldrN 10 (\x -> Just (x, chr (ord x + 1))) '0' == "0123456789"
903 --
904 -- The following equation connects the depth-limited unfoldr to the List unfoldr:
905 --
906 -- > unfoldrN n == take n $ List.unfoldr
907 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> ByteString
908 unfoldrN i f w
909 | i <= 0 = empty
910 | otherwise = inlinePerformIO $ generate i $ \p -> go p w 0
911 where
912 STRICT3(go)
913 go q c n | n == i = return n -- stop if we reach `i'
914 | otherwise = case f c of
915 Nothing -> return n
916 Just (a,new_c) -> do
917 poke q a
918 go (q `plusPtr` 1) new_c (n+1)
919
920 -- ---------------------------------------------------------------------
921 -- Substrings
922
923 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
924 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
925 take :: Int -> ByteString -> ByteString
926 take n ps@(PS x s l)
927 | n <= 0 = empty
928 | n >= l = ps
929 | otherwise = PS x s n
930 {-# INLINE take #-}
931
932 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
933 -- elements, or @[]@ if @n > 'length' xs@.
934 drop :: Int -> ByteString -> ByteString
935 drop n ps@(PS x s l)
936 | n <= 0 = ps
937 | n >= l = empty
938 | otherwise = PS x (s+n) (l-n)
939 {-# INLINE drop #-}
940
941 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
942 splitAt :: Int -> ByteString -> (ByteString, ByteString)
943 splitAt n ps@(PS x s l)
944 | n <= 0 = (empty, ps)
945 | n >= l = (ps, empty)
946 | otherwise = (PS x s n, PS x (s+n) (l-n))
947 {-# INLINE splitAt #-}
948
949 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
950 -- returns the longest prefix (possibly empty) of @xs@ of elements that
951 -- satisfy @p@.
952 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
953 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
954 {-# INLINE takeWhile #-}
955
956 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
957 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
958 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
959 {-# INLINE dropWhile #-}
960
961 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
962 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
963 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
964 {-# INLINE break #-}
965
966 -- | 'breakByte' breaks its ByteString argument at the first occurence
967 -- of the specified byte. It is more efficient than 'break' as it is
968 -- implemented with @memchr(3)@. I.e.
969 --
970 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
971 --
972 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
973 breakByte c p = case elemIndex c p of
974 Nothing -> (p,empty)
975 Just n -> (unsafeTake n p, unsafeDrop n p)
976 {-# INLINE breakByte #-}
977
978 -- | 'spanByte' breaks its ByteString argument at the first
979 -- occurence of a byte other than its argument. It is more efficient
980 -- than 'span (==)'
981 --
982 -- > span (=='c') "abcd" == spanByte 'c' "abcd"
983 --
984 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
985 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
986 go (p `plusPtr` s) 0
987 where
988 STRICT2(go)
989 go p i | i >= l = return (ps, empty)
990 | otherwise = do c' <- peekByteOff p i
991 if c /= c'
992 then return (unsafeTake i ps, unsafeDrop i ps)
993 else go p (i+1)
994 {-# INLINE spanByte #-}
995
996 -- | /O(n)/ 'breakFirst' breaks the given ByteString on the first
997 -- occurence of @w@. It behaves like 'break', except the delimiter is
998 -- not returned, and @Nothing@ is returned if the delimiter is not in
999 -- the ByteString. I.e.
1000 --
1001 -- > breakFirst 'b' "aabbcc" == Just ("aa","bcc")
1002 --
1003 -- > breakFirst c xs ==
1004 -- > let (x,y) = break (== c) xs
1005 -- > in if null y then Nothing else Just (x, drop 1 y))
1006 --
1007 breakFirst :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
1008 breakFirst c p = case elemIndex c p of
1009 Nothing -> Nothing
1010 Just n -> Just (unsafeTake n p, unsafeDrop (n+1) p)
1011 {-# INLINE breakFirst #-}
1012
1013 -- | /O(n)/ 'breakLast' behaves like breakFirst, but from the end of the
1014 -- ByteString.
1015 --
1016 -- > breakLast ('b') (pack "aabbcc") == Just ("aab","cc")
1017 --
1018 -- and the following are equivalent:
1019 --
1020 -- > breakLast 'c' "abcdef"
1021 -- > let (x,y) = break (=='c') (reverse "abcdef")
1022 -- > in if null x then Nothing else Just (reverse (drop 1 y), reverse x)
1023 --
1024 breakLast :: Word8 -> ByteString -> Maybe (ByteString,ByteString)
1025 breakLast c p = case elemIndexLast c p of
1026 Nothing -> Nothing
1027 Just n -> Just (unsafeTake n p, unsafeDrop (n+1) p)
1028 {-# INLINE breakLast #-}
1029
1030 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
1031 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
1032 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1033 span p ps = break (not . p) ps
1034 {-# INLINE span #-}
1035
1036 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
1037 -- We have
1038 --
1039 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
1040 --
1041 -- and
1042 --
1043 -- > spanEnd (not . isSpace) ps
1044 -- > ==
1045 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
1046 --
1047 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
1048 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
1049
1050 -- | /O(n)/ Splits a 'ByteString' into components delimited by
1051 -- separators, where the predicate returns True for a separator element.
1052 -- The resulting components do not contain the separators. Two adjacent
1053 -- separators result in an empty component in the output. eg.
1054 --
1055 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
1056 -- > splitWith (=='a') [] == []
1057 --
1058 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
1059
1060 #if defined(__GLASGOW_HASKELL__)
1061 splitWith _pred (PS _ _ 0) = []
1062 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
1063 where pred# c# = pred_ (W8# c#)
1064
1065 STRICT4(splitWith0)
1066 splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
1067 splitLoop pred' p 0 off' len' fp'
1068
1069 splitLoop :: (Word# -> Bool)
1070 -> Ptr Word8
1071 -> Int -> Int -> Int
1072 -> ForeignPtr Word8
1073 -> IO [ByteString]
1074
1075 splitLoop pred' p idx' off' len' fp'
1076 | pred' `seq` p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1077 | idx' >= len' = return [PS fp' off' idx']
1078 | otherwise = do
1079 w <- peekElemOff p (off'+idx')
1080 if pred' (case w of W8# w# -> w#)
1081 then return (PS fp' off' idx' :
1082 splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
1083 else splitLoop pred' p (idx'+1) off' len' fp'
1084 {-# INLINE splitWith #-}
1085
1086 #else
1087 splitWith _ (PS _ _ 0) = []
1088 splitWith p ps = loop p ps
1089 where
1090 STRICT2(loop)
1091 loop q qs = if null rest then [chunk]
1092 else chunk : loop q (unsafeTail rest)
1093 where (chunk,rest) = break q qs
1094 #endif
1095
1096 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
1097 -- argument, consuming the delimiter. I.e.
1098 --
1099 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
1100 -- > split 'a' "aXaXaXa" == ["","X","X","X"]
1101 -- > split 'x' "x" == ["",""]
1102 --
1103 -- and
1104 --
1105 -- > join [c] . split c == id
1106 -- > split == splitWith . (==)
1107 --
1108 -- As for all splitting functions in this library, this function does
1109 -- not copy the substrings, it just constructs new 'ByteStrings' that
1110 -- are slices of the original.
1111 --
1112 split :: Word8 -> ByteString -> [ByteString]
1113 split _ (PS _ _ 0) = []
1114 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1115 let ptr = p `plusPtr` s
1116
1117 STRICT1(loop)
1118 loop n = do
1119 let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n))
1120 if q == nullPtr
1121 then [PS x (s+n) (l-n)]
1122 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
1123
1124 return (loop 0)
1125 {-# INLINE split #-}
1126
1127 {-
1128 -- slower. but stays inside Haskell.
1129 split _ (PS _ _ 0) = []
1130 split (W8# w#) (PS fp off len) = splitWith' off len fp
1131 where
1132 splitWith' off' len' fp' = withPtr fp $ \p ->
1133 splitLoop p 0 off' len' fp'
1134
1135 splitLoop :: Ptr Word8
1136 -> Int -> Int -> Int
1137 -> ForeignPtr Word8
1138 -> IO [ByteString]
1139
1140 STRICT5(splitLoop)
1141 splitLoop p idx' off' len' fp'
1142 | p `seq` idx' `seq` off' `seq` len' `seq` fp' `seq` False = undefined
1143 | idx' >= len' = return [PS fp' off' idx']
1144 | otherwise = do
1145 (W8# x#) <- peekElemOff p (off'+idx')
1146 if word2Int# w# ==# word2Int# x#
1147 then return (PS fp' off' idx' :
1148 splitWith' (off'+idx'+1) (len'-idx'-1) fp')
1149 else splitLoop p (idx'+1) off' len' fp'
1150 -}
1151
1152 -- | Like 'splitWith', except that sequences of adjacent separators are
1153 -- treated as a single separator. eg.
1154 --
1155 -- > tokens (=='a') "aabbaca" == ["bb","c"]
1156 --
1157 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
1158 tokens f = P.filter (not.null) . splitWith f
1159 {-# INLINE tokens #-}
1160
1161 -- | The 'group' function takes a ByteString and returns a list of
1162 -- ByteStrings such that the concatenation of the result is equal to the
1163 -- argument. Moreover, each sublist in the result contains only equal
1164 -- elements. For example,
1165 --
1166 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
1167 --
1168 -- It is a special case of 'groupBy', which allows the programmer to
1169 -- supply their own equality test. It is about 40% faster than
1170 -- /groupBy (==)/
1171 group :: ByteString -> [ByteString]
1172 group xs
1173 | null xs = []
1174 | otherwise = ys : group zs
1175 where
1176 (ys, zs) = spanByte (unsafeHead xs) xs
1177
1178 -- | The 'groupBy' function is the non-overloaded version of 'group'.
1179 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
1180 groupBy k xs
1181 | null xs = []
1182 | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
1183 where
1184 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
1185
1186 -- | /O(n)/ The 'join' function takes a 'ByteString' and a list of
1187 -- 'ByteString's and concatenates the list after interspersing the first
1188 -- argument between each element of the list.
1189 join :: ByteString -> [ByteString] -> ByteString
1190 join filler pss = concat (splice pss)
1191 where
1192 splice [] = []
1193 splice [x] = [x]
1194 splice (x:y:xs) = x:filler:splice (y:xs)
1195 {-# INLINE join #-}
1196
1197 --
1198 -- | /O(n)/ joinWithByte. An efficient way to join to two ByteStrings
1199 -- with a char. Around 4 times faster than the generalised join.
1200 --
1201 joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString
1202 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr ->
1203 withForeignPtr ffp $ \fp ->
1204 withForeignPtr fgp $ \gp -> do
1205 memcpy ptr (fp `plusPtr` s) (fromIntegral l)
1206 poke (ptr `plusPtr` l) c
1207 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
1208 where
1209 len = length f + length g + 1
1210 {-# INLINE joinWithByte #-}
1211
1212 -- ---------------------------------------------------------------------
1213 -- Indexing ByteStrings
1214
1215 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
1216 index :: ByteString -> Int -> Word8
1217 index ps n
1218 | n < 0 = moduleError "index" ("negative index: " ++ show n)
1219 | n >= length ps = moduleError "index" ("index too large: " ++ show n
1220 ++ ", length = " ++ show (length ps))
1221 | otherwise = ps `unsafeIndex` n
1222 {-# INLINE index #-}
1223
1224 -- | /O(n)/ The 'elemIndex' function returns the index of the first
1225 -- element in the given 'ByteString' which is equal to the query
1226 -- element, or 'Nothing' if there is no such element.
1227 -- This implementation uses memchr(3).
1228 elemIndex :: Word8 -> ByteString -> Maybe Int
1229 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1230 let p' = p `plusPtr` s
1231 q = memchr p' c (fromIntegral l)
1232 return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p'
1233 {-# INLINE elemIndex #-}
1234
1235 -- | /O(n)/ The 'elemIndexLast' function returns the last index of the
1236 -- element in the given 'ByteString' which is equal to the query
1237 -- element, or 'Nothing' if there is no such element. The following
1238 -- holds:
1239 --
1240 -- > elemIndexLast c xs ==
1241 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
1242 --
1243 elemIndexLast :: Word8 -> ByteString -> Maybe Int
1244 elemIndexLast ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
1245 go (p `plusPtr` s) (l-1)
1246 where
1247 STRICT2(go)
1248 go p i | i < 0 = return Nothing
1249 | otherwise = do ch' <- peekByteOff p i
1250 if ch == ch'
1251 then return $ Just i
1252 else go p (i-1)
1253 {-# INLINE elemIndexLast #-}
1254
1255 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
1256 -- the indices of all elements equal to the query element, in ascending order.
1257 -- This implementation uses memchr(3).
1258 elemIndices :: Word8 -> ByteString -> [Int]
1259 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
1260 let ptr = p `plusPtr` s
1261
1262 STRICT1(loop)
1263 loop n = let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n))
1264 in if q == nullPtr
1265 then []
1266 else let i = q `minusPtr` ptr
1267 in i : loop (i+1)
1268 return (loop 0)
1269 {-# INLINE elemIndices #-}
1270
1271 {-
1272 -- much slower
1273 elemIndices :: Word8 -> ByteString -> [Int]
1274 elemIndices c ps = loop 0 ps
1275 where STRICT2(loop)
1276 loop _ ps' | null ps' = []
1277 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
1278 | otherwise = loop (n+1) (unsafeTail ps')
1279 -}
1280
1281 -- | count returns the number of times its argument appears in the ByteString
1282 --
1283 -- > count = length . elemIndices
1284 --
1285 -- But more efficiently than using length on the intermediate list.
1286 count :: Word8 -> ByteString -> Int
1287 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1288 return $ fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
1289 {-# INLINE count #-}
1290
1291 {-
1292 --
1293 -- around 30% slower
1294 --
1295 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
1296 go (p `plusPtr` s) (fromIntegral m) 0
1297 where
1298 go :: Ptr Word8 -> CSize -> Int -> IO Int
1299 STRICT3(go)
1300 go p l i = do
1301 let q = memchr p w l
1302 if q == nullPtr
1303 then return i
1304 else do let k = fromIntegral $ q `minusPtr` p
1305 go (q `plusPtr` 1) (l-k-1) (i+1)
1306 -}
1307
1308 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
1309 -- returns the index of the first element in the ByteString
1310 -- satisfying the predicate.
1311 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
1312 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1313 where
1314 STRICT2(go)
1315 go ptr n | n >= l = return Nothing
1316 | otherwise = do w <- peek ptr
1317 if k w
1318 then return (Just n)
1319 else go (ptr `plusPtr` 1) (n+1)
1320 {-# INLINE findIndex #-}
1321
1322 -- | The 'findIndices' function extends 'findIndex', by returning the
1323 -- indices of all elements satisfying the predicate, in ascending order.
1324 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
1325 findIndices p ps = loop 0 ps
1326 where
1327 STRICT2(loop)
1328 loop n qs | null qs = []
1329 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
1330 | otherwise = loop (n+1) (unsafeTail qs)
1331
1332 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
1333 -- of the string if no element is found, rather than Nothing.
1334 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
1335 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
1336 where
1337 STRICT2(go)
1338 go ptr n | n >= l = return l
1339 | otherwise = do w <- peek ptr
1340 if k w
1341 then return n
1342 else go (ptr `plusPtr` 1) (n+1)
1343 {-# INLINE findIndexOrEnd #-}
1344
1345 -- ---------------------------------------------------------------------
1346 -- Searching ByteStrings
1347
1348 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
1349 elem :: Word8 -> ByteString -> Bool
1350 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
1351 {-# INLINE elem #-}
1352
1353 -- | /O(n)/ 'notElem' is the inverse of 'elem'
1354 notElem :: Word8 -> ByteString -> Bool
1355 notElem c ps = not (elem c ps)
1356 {-# INLINE notElem #-}
1357
1358 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
1359 -- returns a ByteString containing those characters that satisfy the
1360 -- predicate. This function is subject to array fusion.
1361 filter :: (Word8 -> Bool) -> ByteString -> ByteString
1362 filter p = loopArr . loopU (filterEFL p) noAL
1363 {-# INLINE filter #-}
1364
1365 -- | /O(n)/ 'filter\'' is a non-fuseable version of filter, that may be
1366 -- around 2x faster for some one-shot applications.
1367 filter' :: (Word8 -> Bool) -> ByteString -> ByteString
1368 filter' k ps@(PS x s l)
1369 | null ps = ps
1370 | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do
1371 t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
1372 return (t `minusPtr` p) -- actual length
1373 where
1374 STRICT3(go)
1375 go f t end | f == end = return t
1376 | otherwise = do
1377 w <- peek f
1378 if k w
1379 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
1380 else go (f `plusPtr` 1) t end
1381 {-# INLINE filter' #-}
1382
1383 --
1384 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
1385 -- case of filtering a single byte. It is more efficient to use
1386 -- /filterByte/ in this case.
1387 --
1388 -- > filterByte == filter . (==)
1389 --
1390 -- filterByte is around 10x faster, and uses much less space, than its
1391 -- filter equivalent
1392 filterByte :: Word8 -> ByteString -> ByteString
1393 filterByte w ps = replicate (count w ps) w
1394 {-# INLINE filterByte #-}
1395
1396 --
1397 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
1398 -- case of filtering a single byte out of a list. It is more efficient
1399 -- to use /filterNotByte/ in this case.
1400 --
1401 -- > filterNotByte == filter . (/=)
1402 --
1403 -- filterNotByte is around 2x faster than its filter equivalent.
1404 filterNotByte :: Word8 -> ByteString -> ByteString
1405 filterNotByte w = filter' (/= w)
1406 {-# INLINE filterNotByte #-}
1407
1408 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
1409 -- and returns the first element in matching the predicate, or 'Nothing'
1410 -- if there is no such element.
1411 --
1412 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
1413 --
1414 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
1415 find f p = case findIndex f p of
1416 Just n -> Just (p `unsafeIndex` n)
1417 _ -> Nothing
1418 {-# INLINE find #-}
1419
1420 {-
1421 --
1422 -- fuseable, but we don't want to walk the whole array.
1423 --
1424 find k = foldl findEFL Nothing
1425 where findEFL a@(Just _) _ = a
1426 findEFL _ c | k c = Just c
1427 | otherwise = Nothing
1428 -}
1429
1430 -- ---------------------------------------------------------------------
1431 -- Searching for substrings
1432
1433 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
1434 -- iff the first is a prefix of the second.
1435 isPrefixOf :: ByteString -> ByteString -> Bool
1436 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
1437 | l1 == 0 = True
1438 | l2 < l1 = False
1439 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1440 withForeignPtr x2 $ \p2 -> do
1441 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
1442 return (i == 0)
1443
1444 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
1445 -- iff the first is a suffix of the second.
1446 --
1447 -- The following holds:
1448 --
1449 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
1450 --
1451 -- However, the real implemenation uses memcmp to compare the end of the
1452 -- string only, with no reverse required..
1453 isSuffixOf :: ByteString -> ByteString -> Bool
1454 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
1455 | l1 == 0 = True
1456 | l2 < l1 = False
1457 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
1458 withForeignPtr x2 $ \p2 -> do
1459 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
1460 return (i == 0)
1461
1462 -- | Check whether one string is a substring of another. @isSubstringOf
1463 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
1464 isSubstringOf :: ByteString -- ^ String to search for.
1465 -> ByteString -- ^ String to search in.
1466 -> Bool
1467 isSubstringOf p s = not $ P.null $ findSubstrings p s
1468
1469 -- | Get the first index of a substring in another string,
1470 -- or 'Nothing' if the string is not found.
1471 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
1472 findSubstring :: ByteString -- ^ String to search for.
1473 -> ByteString -- ^ String to seach in.
1474 -> Maybe Int
1475 findSubstring = (listToMaybe .) . findSubstrings
1476
1477 -- | Find the indexes of all (possibly overlapping) occurances of a
1478 -- substring in a string. This function uses the Knuth-Morris-Pratt
1479 -- string matching algorithm.
1480 findSubstrings :: ByteString -- ^ String to search for.
1481 -> ByteString -- ^ String to seach in.
1482 -> [Int]
1483
1484 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
1485 where
1486 patc x = pat `unsafeIndex` x
1487 strc x = str `unsafeIndex` x
1488
1489 -- maybe we should make kmpNext a UArray before using it in search?
1490 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
1491 kmpNextL p _ | null p = []
1492 kmpNextL p j = let j' = next (unsafeHead p) j + 1
1493 ps = unsafeTail p
1494 x = if not (null ps) && unsafeHead ps == patc j'
1495 then kmpNext Array.! j' else j'
1496 in x:kmpNextL ps j'
1497 search i j = match ++ rest -- i: position in string, j: position in pattern
1498 where match = if j == m then [(i - j)] else []
1499 rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
1500 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
1501 | otherwise = j
1502
1503 -- ---------------------------------------------------------------------
1504 -- Zipping
1505
1506 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
1507 -- corresponding pairs of bytes. If one input ByteString is short,
1508 -- excess elements of the longer ByteString are discarded. This is
1509 -- equivalent to a pair of 'unpack' operations.
1510 zip :: ByteString -> ByteString -> [(Word8,Word8)]
1511 zip ps qs
1512 | null ps || null qs = []
1513 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
1514
1515 -- | 'zipWith' generalises 'zip' by zipping with the function given as
1516 -- the first argument, instead of a tupling function. For example,
1517 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
1518 -- corresponding sums.
1519 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
1520 zipWith f ps qs
1521 | null ps || null qs = []
1522 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
1523
1524 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
1525 -- ByteStrings. Note that this performs two 'pack' operations.
1526 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
1527 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
1528 {-# INLINE unzip #-}
1529
1530 -- ---------------------------------------------------------------------
1531 -- Special lists
1532
1533 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1534 inits :: ByteString -> [ByteString]
1535 inits (PS x s l) = [PS x s n | n <- [0..l]]
1536
1537 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1538 tails :: ByteString -> [ByteString]
1539 tails p | null p = [empty]
1540 | otherwise = p : tails (unsafeTail p)
1541
1542 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
1543
1544 -- | /O(n)/ breaks a ByteString to a list of ByteStrings, one byte each.
1545 elems :: ByteString -> [ByteString]
1546 elems (PS _ _ 0) = []
1547 elems (PS x s l) = (PS x s 1:elems (PS x (s+1) (l-1)))
1548 {-# INLINE elems #-}
1549
1550 -- ---------------------------------------------------------------------
1551 -- ** Ordered 'ByteString's
1552
1553 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
1554 sort :: ByteString -> ByteString
1555 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do
1556
1557 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
1558 withForeignPtr input (\x -> countEach arr (x `plusPtr` s) l)
1559
1560 let STRICT2(go)
1561 go 256 _ = return ()
1562 go i ptr = do n <- peekElemOff arr i
1563 when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
1564 go (i + 1) (ptr `plusPtr` (fromIntegral n))
1565 go 0 p
1566
1567 -- "countEach counts str l" counts the number of occurences of each Word8 in
1568 -- str, and stores the result in counts.
1569 countEach :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
1570 STRICT3(countEach)
1571 countEach counts str l = go 0
1572 where
1573 STRICT1(go)
1574 go i | i == l = return ()
1575 | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
1576 x <- peekElemOff counts k
1577 pokeElemOff counts k (x + 1)
1578 go (i + 1)
1579
1580 {-
1581 sort :: ByteString -> ByteString
1582 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do
1583 memcpy p (f `plusPtr` s) l
1584 c_qsort p l -- inplace
1585 -}
1586
1587 {-
1588 sort = pack . List.sort . unpack
1589 -}
1590
1591 -- | The 'sortBy' function is the non-overloaded version of 'sort'.
1592 --
1593 -- Try some linear sorts: radix, counting
1594 -- Or mergesort.
1595 --
1596 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
1597 -- sortBy f ps = undefined
1598
1599 -- ---------------------------------------------------------------------
1600 --
1601 -- Extensions to the basic interface
1602 --
1603
1604 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
1605 -- check for the empty case, so there is an obligation on the programmer
1606 -- to provide a proof that the ByteString is non-empty.
1607 unsafeHead :: ByteString -> Word8
1608 unsafeHead (PS x s _) = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
1609 {-# INLINE unsafeHead #-}
1610
1611 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
1612 -- check for the empty case. As with 'unsafeHead', the programmer must
1613 -- provide a separate proof that the ByteString is non-empty.
1614 unsafeTail :: ByteString -> ByteString
1615 unsafeTail (PS ps s l) = PS ps (s+1) (l-1)
1616 {-# INLINE unsafeTail #-}
1617
1618 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
1619 -- This omits the bounds check, which means there is an accompanying
1620 -- obligation on the programmer to ensure the bounds are checked in some
1621 -- other way.
1622 unsafeIndex :: ByteString -> Int -> Word8
1623 unsafeIndex (PS x s _) i = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
1624 {-# INLINE unsafeIndex #-}
1625
1626 -- | A variety of 'take' which omits the checks on @n@ so there is an
1627 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
1628 unsafeTake :: Int -> ByteString -> ByteString
1629 unsafeTake n (PS x s l) =
1630 assert (0 <= n && n <= l) $ PS x s n
1631 {-# INLINE unsafeTake #-}
1632
1633 -- | A variety of 'drop' which omits the checks on @n@ so there is an
1634 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
1635 unsafeDrop :: Int -> ByteString -> ByteString
1636 unsafeDrop n (PS x s l) =
1637 assert (0 <= n && n <= l) $ PS x (s+n) (l-n)
1638 {-# INLINE unsafeDrop #-}
1639
1640 -- ---------------------------------------------------------------------
1641 -- Low level constructors
1642
1643 #if defined(__GLASGOW_HASKELL__)
1644 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
1645 -- Addr\# (an arbitrary machine address assumed to point outside the
1646 -- garbage-collected heap) into a @ByteString@. A much faster way to
1647 -- create an Addr\# is with an unboxed string literal, than to pack a
1648 -- boxed string. A unboxed string literal is compiled to a static @char
1649 -- []@ by GHC. Establishing the length of the string requires a call to
1650 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
1651 -- is the case with "string"# literals in GHC). Use 'unsafePackAddress'
1652 -- if you know the length of the string statically.
1653 --
1654 -- An example:
1655 --
1656 -- > literalFS = packAddress "literal"#
1657 --
1658 packAddress :: Addr# -> ByteString
1659 packAddress addr# = inlinePerformIO $ do
1660 p <- newForeignPtr_ cstr
1661 return $ PS p 0 (fromIntegral $ c_strlen cstr)
1662 where
1663 cstr = Ptr addr#
1664 {-# INLINE packAddress #-}
1665
1666 -- | /O(1)/ 'unsafePackAddress' provides constant-time construction of
1667 -- 'ByteStrings' -- which is ideal for string literals. It packs a
1668 -- null-terminated sequence of bytes into a 'ByteString', given a raw
1669 -- 'Addr\#' to the string, and the length of the string. Make sure the
1670 -- length is correct, otherwise use the safer 'packAddress' (where the
1671 -- length will be calculated once at runtime).
1672 unsafePackAddress :: Int -> Addr# -> ByteString
1673 unsafePackAddress len addr# = inlinePerformIO $ do
1674 p <- newForeignPtr_ cstr
1675 return $ PS p 0 len
1676 where cstr = Ptr addr#
1677
1678 #endif
1679
1680 -- | /O(1)/ Build a ByteString from a ForeignPtr
1681 fromForeignPtr :: ForeignPtr Word8 -> Int -> ByteString
1682 fromForeignPtr fp l = PS fp 0 l
1683
1684 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
1685 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int)
1686 toForeignPtr (PS ps s l) = (ps, s, l)
1687
1688 -- | /O(1)/ 'skipIndex' returns the internal skipped index of the
1689 -- current 'ByteString' from any larger string it was created from, as
1690 -- an 'Int'.
1691 skipIndex :: ByteString -> Int
1692 skipIndex (PS _ s _) = s
1693 {-# INLINE skipIndex #-}
1694
1695 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
1696 -- finalizer associated to it. The ByteString length is calculated using
1697 -- /strlen(3)/, and thus the complexity is a /O(n)/.
1698 packCString :: CString -> ByteString
1699 packCString cstr = inlinePerformIO $ do
1700 fp <- newForeignPtr_ (castPtr cstr)
1701 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1702
1703 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
1704 -- have /no/ finalizer associated with it. This operation has /O(1)/
1705 -- complexity as we already know the final size, so no /strlen(3)/ is
1706 -- required.
1707 packCStringLen :: CStringLen -> ByteString
1708 packCStringLen (ptr,len) = inlinePerformIO $ do
1709 fp <- newForeignPtr_ (castPtr ptr)
1710 return $ PS fp 0 (fromIntegral len)
1711
1712 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
1713 -- have a @free(3)@ finalizer associated to it.
1714 packMallocCString :: CString -> ByteString
1715 packMallocCString cstr = inlinePerformIO $ do
1716 fp <- newForeignFreePtr (castPtr cstr)
1717 return $ PS fp 0 (fromIntegral $ c_strlen cstr)
1718
1719 #if defined(__GLASGOW_HASKELL__)
1720 -- | /O(1)/ Construct a 'ByteString' given a C Ptr Word8 buffer, a
1721 -- length, and an IO action representing a finalizer. This function is
1722 -- not available on Hugs.
1723 --
1724 packCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
1725 packCStringFinalizer p l f = do
1726 fp <- FC.newForeignPtr p f
1727 return $ PS fp 0 l
1728
1729 -- | Explicitly run the finaliser associated with a 'ByteString'.
1730 -- Further references to this value may generate invalid memory
1731 -- references. This operation is unsafe, as there may be other
1732 -- 'ByteStrings' referring to the same underlying pages. If you use
1733 -- this, you need to have a proof of some kind that all 'ByteString's
1734 -- ever generated from the underlying byte array are no longer live.
1735 unsafeFinalize :: ByteString -> IO ()
1736 unsafeFinalize (PS p _ _) = finalizeForeignPtr p
1737
1738 #endif
1739
1740 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.
1741 -- The @CString@ should not be freed afterwards. This is a memcpy(3).
1742 useAsCString :: ByteString -> (CString -> IO a) -> IO a
1743 useAsCString (PS ps s l) = bracket alloc (c_free.castPtr)
1744 where
1745 alloc = withForeignPtr ps $ \p -> do
1746 buf <- c_malloc (fromIntegral l+1)
1747 memcpy (castPtr buf) (castPtr p `plusPtr` s) (fromIntegral l)
1748 poke (buf `plusPtr` l) (0::Word8)
1749 return $ castPtr buf
1750
1751 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CString@.
1752 -- Warning: modifying the @CString@ will affect the @ByteString@.
1753 -- Why is this function unsafe? It relies on the null byte at the end of
1754 -- the ByteString to be there. This is /not/ the case if your ByteString
1755 -- has been spliced from a larger string (i.e. with take or drop).
1756 -- Unless you can guarantee the null byte, you should use the safe
1757 -- version, which will copy the string first.
1758 --
1759 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
1760 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
1761
1762 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
1763 -- This is mainly useful to allow the rest of the data pointed
1764 -- to by the 'ByteString' to be garbage collected, for example
1765 -- if a large string has been read in, and only a small part of it
1766 -- is needed in the rest of the program.
1767 copy :: ByteString -> ByteString
1768 copy (PS x s l) = create l $ \p -> withForeignPtr x $ \f ->
1769 memcpy p (f `plusPtr` s) (fromIntegral l)
1770
1771 -- | /O(n)/ Duplicate a CString as a ByteString. Useful if you know the
1772 -- CString is going to be deallocated from C land.
1773 copyCString :: CString -> IO ByteString
1774 copyCString cstr = copyCStringLen (cstr, (fromIntegral $ c_strlen cstr))
1775
1776 -- | /O(n)/ Same as copyCString, but saves a strlen call when the length is known.
1777 copyCStringLen :: CStringLen -> IO ByteString
1778 copyCStringLen (cstr, len) = do
1779 fp <- mallocForeignPtrArray (len+1)
1780 withForeignPtr fp $ \p -> do
1781 memcpy p (castPtr cstr) (fromIntegral len)
1782 poke (p `plusPtr` len) (0 :: Word8)
1783 return $! PS fp 0 len
1784
1785 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
1786 -- Warning: modifying the @CStringLen@ will affect the @ByteString@.
1787 -- This is analogous to unsafeUseAsCString, and comes with the same
1788 -- safety requirements.
1789 --
1790 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
1791 unsafeUseAsCStringLen (PS ps s l) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s,l)
1792
1793 -- | Given the maximum size needed and a function to make the contents
1794 -- of a ByteString, generate makes the 'ByteString'. The generating
1795 -- function is required to return the actual final size (<= the maximum
1796 -- size), and the resulting byte array is realloced to this size. The
1797 -- string is padded at the end with a null byte.
1798 --
1799 -- generate is the main mechanism for creating custom, efficient
1800 -- ByteString functions, using Haskell or C functions to fill the space.
1801 --
1802 generate :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
1803 generate i f = do
1804 fp <- mallocByteString i
1805 (ptr,n) <- withForeignPtr fp $ \p -> do
1806 i' <- f p
1807 if i' == i
1808 then return (fp,i')
1809 else do fp_ <- mallocByteString i' -- realloc
1810 withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
1811 return (fp_,i')
1812 return (PS ptr 0 n)
1813
1814 {-
1815 --
1816 -- On the C malloc heap. Less fun.
1817 --
1818 generate i f = do
1819 p <- mallocArray (i+1)
1820 i' <- f p
1821 p' <- reallocArray p (i'+1)
1822 poke (p' `plusPtr` i') (0::Word8) -- XXX so CStrings work
1823 fp <- newForeignFreePtr p'
1824 return $ PS fp 0 i'
1825 -}
1826
1827 -- ---------------------------------------------------------------------
1828 -- line IO
1829
1830 #if defined(__GLASGOW_HASKELL__)
1831
1832 -- | getLine, read a line from stdin.
1833 getLine :: IO ByteString
1834 getLine = hGetLine stdin
1835
1836 -- | Lazily construct a list of lines of ByteStrings. This will be much
1837 -- better on memory consumption than using lines =<< getContents.
1838 hGetLines :: Handle -> IO [ByteString]
1839 hGetLines h = go
1840 where
1841 go = unsafeInterleaveIO $ do
1842 ms <- catch (hGetLine h >>= return . Just)
1843 (\_ -> return Nothing)
1844 case ms of
1845 Nothing -> return []
1846 Just s -> do ss <- go
1847 return (s:ss)
1848
1849 -- | hGetLine. read a ByteString from a handle
1850 hGetLine :: Handle -> IO ByteString
1851 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
1852 case haBufferMode handle_ of
1853 NoBuffering -> error "no buffering"
1854 _other -> hGetLineBuffered handle_
1855
1856 where
1857 hGetLineBuffered handle_ = do
1858 let ref = haBuffer handle_
1859 buf <- readIORef ref
1860 hGetLineBufferedLoop handle_ ref buf 0 []
1861
1862 hGetLineBufferedLoop handle_ ref
1863 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
1864 len `seq` do
1865 off <- findEOL r w raw
1866 let new_len = len + off - r
1867 xs <- mkPS raw r off
1868
1869 -- if eol == True, then off is the offset of the '\n'
1870 -- otherwise off == w and the buffer is now empty.
1871 if off /= w
1872 then do if (w == off + 1)
1873 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1874 else writeIORef ref buf{ bufRPtr = off + 1 }
1875 mkBigPS new_len (xs:xss)
1876 else do
1877 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
1878 buf{ bufWPtr=0, bufRPtr=0 }
1879 case maybe_buf of
1880 -- Nothing indicates we caught an EOF, and we may have a
1881 -- partial line to return.
1882 Nothing -> do
1883 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
1884 if new_len > 0
1885 then mkBigPS new_len (xs:xss)
1886 else ioe_EOF
1887 Just new_buf ->
1888 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
1889
1890 -- find the end-of-line character, if there is one
1891 findEOL r w raw
1892 | r == w = return w
1893 | otherwise = do
1894 (c,r') <- readCharFromBuffer raw r
1895 if c == '\n'
1896 then return r -- NB. not r': don't include the '\n'
1897 else findEOL r' w raw
1898
1899 maybeFillReadBuffer fd is_line is_stream buf = catch
1900 (do buf' <- fillReadBuffer fd is_line is_stream buf
1901 return (Just buf'))
1902 (\e -> if isEOFError e then return Nothing else ioError e)
1903
1904 -- TODO, rewrite to use normal memcpy
1905 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
1906 mkPS buf start end = do
1907 let len = end - start
1908 fp <- mallocByteString len
1909 withForeignPtr fp $ \p -> do
1910 memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
1911 return (PS fp 0 len)
1912
1913 mkBigPS :: Int -> [ByteString] -> IO ByteString
1914 mkBigPS _ [ps] = return ps
1915 mkBigPS _ pss = return $! concat (P.reverse pss)
1916
1917 #endif
1918
1919 -- ---------------------------------------------------------------------
1920 -- Block IO
1921
1922 -- | Outputs a 'ByteString' to the specified 'Handle'.
1923 hPut :: Handle -> ByteString -> IO ()
1924 hPut _ (PS _ _ 0) = return ()
1925 hPut h (PS ps 0 l) = withForeignPtr ps $ \p-> hPutBuf h p l
1926 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
1927
1928 -- | Write a ByteString to stdout
1929 putStr :: ByteString -> IO ()
1930 putStr = hPut stdout
1931
1932 -- | Write a ByteString to stdout, appending a newline byte
1933 putStrLn :: ByteString -> IO ()
1934 putStrLn ps = hPut stdout ps >> hPut stdout nl
1935 where nl = packByte 0x0a
1936
1937 -- | Read a 'ByteString' directly from the specified 'Handle'. This
1938 -- is far more efficient than reading the characters into a 'String'
1939 -- and then using 'pack'.
1940 hGet :: Handle -> Int -> IO ByteString
1941 hGet _ 0 = return empty
1942 hGet h i = do fp <- mallocByteString i
1943 l <- withForeignPtr fp $ \p-> hGetBuf h p i
1944 return $ PS fp 0 l
1945
1946 #if defined(__GLASGOW_HASKELL__)
1947 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
1948 -- waiting for data to become available, instead it returns only whatever data
1949 -- is available.
1950 hGetNonBlocking :: Handle -> Int -> IO ByteString
1951 hGetNonBlocking _ 0 = return empty
1952 hGetNonBlocking h i = do
1953 fp <- mallocByteString i
1954 l <- withForeignPtr fp $ \p -> hGetBufNonBlocking h p i
1955 return $ PS fp 0 l
1956 #endif
1957
1958 -- | Read entire handle contents into a 'ByteString'.
1959 -- This function reads chunks at a time, doubling the chunksize on each
1960 -- read. The final buffer is then realloced to the appropriate size. For
1961 -- files > half of available memory, this may lead to memory exhaustion.
1962 -- Consider using 'readFile' in this case.
1963 --
1964 -- As with 'hGet', the string representation in the file is assumed to
1965 -- be ISO-8859-1.
1966 --
1967 hGetContents :: Handle -> IO ByteString
1968 hGetContents h = do
1969 let start_size = 1024
1970 p <- mallocArray start_size
1971 i <- hGetBuf h p start_size
1972 if i < start_size
1973 then do p' <- reallocArray p i
1974 fp <- newForeignFreePtr p'
1975 return $ PS fp 0 i
1976 else f p start_size
1977 where
1978 f p s = do
1979 let s' = 2 * s
1980 p' <- reallocArray p s'
1981 i <- hGetBuf h (p' `plusPtr` s) s
1982 if i < s
1983 then do let i' = s + i
1984 p'' <- reallocArray p' i'
1985 fp <- newForeignFreePtr p''
1986 return $ PS fp 0 i'
1987 else f p' s'
1988
1989 -- | getContents. Equivalent to hGetContents stdin
1990 getContents :: IO ByteString
1991 getContents = hGetContents stdin
1992
1993 -- | Read an entire file strictly into a 'ByteString'. This is far more
1994 -- efficient than reading the characters into a 'String' and then using
1995 -- 'pack'. It also may be more efficient than opening the file and
1996 -- reading it using hGet.
1997 readFile :: FilePath -> IO ByteString
1998 readFile f = do
1999 h <- openBinaryFile f ReadMode
2000 l <- hFileSize h
2001 s <- hGet h $ fromIntegral l
2002 hClose h
2003 return s
2004
2005 -- | Write a 'ByteString' to a file.
2006 writeFile :: FilePath -> ByteString -> IO ()
2007 writeFile f ps = bracket (openBinaryFile f WriteMode) hClose
2008 (\h -> hPut h ps)
2009
2010 {-
2011 --
2012 -- Disable until we can move it into a portable .hsc file
2013 --
2014
2015 -- | Like readFile, this reads an entire file directly into a
2016 -- 'ByteString', but it is even more efficient. It involves directly
2017 -- mapping the file to memory. This has the advantage that the contents
2018 -- of the file never need to be copied. Also, under memory pressure the
2019 -- page may simply be discarded, while in the case of readFile it would
2020 -- need to be written to swap. If you read many small files, mmapFile
2021 -- will be less memory-efficient than readFile, since each mmapFile
2022 -- takes up a separate page of memory. Also, you can run into bus
2023 -- errors if the file is modified. As with 'readFile', the string
2024 -- representation in the file is assumed to be ISO-8859-1.
2025 --
2026 -- On systems without mmap, this is the same as a readFile.
2027 --
2028 mmapFile :: FilePath -> IO ByteString
2029 mmapFile f = mmap f >>= \(fp,l) -> return $ PS fp 0 l
2030
2031 mmap :: FilePath -> IO (ForeignPtr Word8, Int)
2032 mmap f = do
2033 h <- openBinaryFile f ReadMode
2034 l <- fromIntegral `fmap` hFileSize h
2035 -- Don't bother mmaping small files because each mmapped file takes up
2036 -- at least one full VM block.
2037 if l < mmap_limit
2038 then do thefp <- mallocByteString l
2039 withForeignPtr thefp $ \p-> hGetBuf h p l
2040 hClose h
2041 return (thefp, l)
2042 else do
2043 -- unix only :(
2044 fd <- fromIntegral `fmap` handleToFd h
2045 p <- my_mmap l fd
2046 fp <- if p == nullPtr
2047 then do thefp <- mallocByteString l
2048 withForeignPtr thefp $ \p' -> hGetBuf h p' l
2049 return thefp
2050 else do
2051 -- The munmap leads to crashes on OpenBSD.
2052 -- maybe there's a use after unmap in there somewhere?
2053 #if !defined(__OpenBSD__)
2054 let unmap = c_munmap p l >> return ()
2055 #else
2056 let unmap = return ()
2057 #endif
2058 fp <- FC.newForeignPtr p unmap
2059 return fp
2060 c_close fd
2061 hClose h
2062 return (fp, l)
2063 where mmap_limit = 16*1024
2064 -}
2065
2066 #if defined(__GLASGOW_HASKELL__)
2067 --
2068 -- | A ByteString equivalent for getArgs. More efficient for large argument lists
2069 --
2070 getArgs :: IO [ByteString]
2071 getArgs =
2072 alloca $ \ p_argc ->
2073 alloca $ \ p_argv -> do
2074 getProgArgv p_argc p_argv
2075 p <- fromIntegral `fmap` peek p_argc
2076 argv <- peek p_argv
2077 P.map packCString `fmap` peekArray (p - 1) (advancePtr argv 1)
2078 #endif
2079
2080 -- ---------------------------------------------------------------------
2081 -- Internal utilities
2082
2083 -- Unsafe conversion between 'Word8' and 'Char'. These are nops, and
2084 -- silently truncate to 8 bits Chars > '\255'. They are provided as
2085 -- convenience for ByteString construction.
2086 w2c :: Word8 -> Char
2087 #if !defined(__GLASGOW_HASKELL__)
2088 w2c = chr . fromIntegral
2089 #else
2090 w2c = unsafeChr . fromIntegral
2091 #endif
2092 {-# INLINE w2c #-}
2093
2094 c2w :: Char -> Word8
2095 c2w = fromIntegral . ord
2096 {-# INLINE c2w #-}
2097
2098 -- Wrapper of mallocForeignPtrArray. Any ByteString allocated this way
2099 -- is padded with a null byte.
2100 mallocByteString :: Int -> IO (ForeignPtr Word8)
2101 mallocByteString l = do
2102 fp <- mallocForeignPtrArray (l+1)
2103 withForeignPtr fp $ \p -> poke (p `plusPtr` l) (0::Word8)
2104 return fp
2105
2106 -- | A way of creating ForeignPtrs outside the IO monad. The @Int@
2107 -- argument gives the final size of the ByteString. Unlike 'generate'
2108 -- the ByteString is not reallocated if the final size is less than the
2109 -- estimated size. Also, unlike 'generate' ByteString's created this way
2110 -- are managed on the Haskell heap.
2111 create :: Int -> (Ptr Word8 -> IO ()) -> ByteString
2112 create l write_ptr = inlinePerformIO $ do
2113 fp <- mallocByteString (l+1)
2114 withForeignPtr fp $ \p -> write_ptr p
2115 return $ PS fp 0 l
2116 {-# INLINE create #-}
2117
2118 -- | Perform an operation with a temporary ByteString
2119 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
2120 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
2121 {-# INLINE withPtr #-}
2122
2123 -- Common up near identical calls to `error' to reduce the number
2124 -- constant strings created when compiled:
2125 errorEmptyList :: String -> a
2126 errorEmptyList fun = moduleError fun "empty ByteString"
2127 {-# NOINLINE errorEmptyList #-}
2128
2129 moduleError :: String -> String -> a
2130 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
2131 {-# NOINLINE moduleError #-}
2132
2133 -- Find from the end of the string using predicate
2134 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
2135 STRICT2(findFromEndUntil)
2136 findFromEndUntil f ps@(PS x s l) =
2137 if null ps then 0
2138 else if f (last ps) then l
2139 else findFromEndUntil f (PS x s (l-1))
2140
2141 -- Just like inlinePerformIO, but we inline it. Big performance gains as
2142 -- it exposes lots of things to further inlining
2143 --
2144 {-# INLINE inlinePerformIO #-}
2145 inlinePerformIO :: IO a -> a
2146 #if defined(__GLASGOW_HASKELL__)
2147 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
2148 #else
2149 inlinePerformIO = unsafePerformIO
2150 #endif
2151
2152 {-# INLINE newForeignFreePtr #-}
2153 newForeignFreePtr :: Ptr Word8 -> IO (ForeignPtr Word8)
2154 #if defined(__GLASGOW_HASKELL__)
2155 newForeignFreePtr p = FC.newForeignPtr p (c_free p)
2156 #else
2157 newForeignFreePtr p = newForeignPtr c_free_finalizer p
2158 #endif
2159
2160 -- ---------------------------------------------------------------------
2161 --
2162 -- Standard C functions
2163 --
2164
2165 foreign import ccall unsafe "string.h strlen" c_strlen
2166 :: CString -> CInt
2167
2168 foreign import ccall unsafe "stdlib.h malloc" c_malloc
2169 :: CInt -> IO (Ptr Word8)
2170
2171 foreign import ccall unsafe "static stdlib.h free" c_free
2172 :: Ptr Word8 -> IO ()
2173
2174 #if !defined(__GLASGOW_HASKELL__)
2175 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
2176 :: FunPtr (Ptr Word8 -> IO ())
2177 #endif
2178
2179 foreign import ccall unsafe "string.h memset" memset
2180 :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
2181
2182 foreign import ccall unsafe "string.h memchr" memchr
2183 :: Ptr Word8 -> Word8 -> CSize -> Ptr Word8
2184
2185 foreign import ccall unsafe "string.h memcmp" memcmp
2186 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
2187
2188 foreign import ccall unsafe "string.h memcpy" memcpy
2189 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
2190
2191 -- ---------------------------------------------------------------------
2192 --
2193 -- Uses our C code
2194 --
2195
2196 foreign import ccall unsafe "static fpstring.h reverse" c_reverse
2197 :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
2198
2199 foreign import ccall unsafe "static fpstring.h intersperse" c_intersperse
2200 :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
2201
2202 foreign import ccall unsafe "static fpstring.h maximum" c_maximum
2203 :: Ptr Word8 -> CInt -> Word8
2204
2205 foreign import ccall unsafe "static fpstring.h minimum" c_minimum
2206 :: Ptr Word8 -> CInt -> Word8
2207
2208 foreign import ccall unsafe "static fpstring.h count" c_count
2209 :: Ptr Word8 -> CInt -> Word8 -> CInt
2210
2211 -- ---------------------------------------------------------------------
2212 -- MMap
2213
2214 {-
2215 foreign import ccall unsafe "static fpstring.h my_mmap" my_mmap
2216 :: Int -> Int -> IO (Ptr Word8)
2217
2218 foreign import ccall unsafe "static unistd.h close" c_close
2219 :: Int -> IO Int
2220
2221 # if !defined(__OpenBSD__)
2222 foreign import ccall unsafe "static sys/mman.h munmap" c_munmap
2223 :: Ptr Word8 -> Int -> IO Int
2224 # endif
2225 -}
2226
2227 -- ---------------------------------------------------------------------
2228 -- Internal GHC Haskell magic
2229
2230 #if defined(__GLASGOW_HASKELL__)
2231 foreign import ccall unsafe "RtsAPI.h getProgArgv"
2232 getProgArgv :: Ptr CInt -> Ptr (Ptr CString) -> IO ()
2233
2234 foreign import ccall unsafe "__hscore_memcpy_src_off"
2235 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
2236 #endif
2237
2238 -- ---------------------------------------------------------------------
2239 --
2240 -- Functional array fusion for ByteStrings.
2241 --
2242 -- From the Data Parallel Haskell project,
2243 -- http://www.cse.unsw.edu.au/~chak/project/dph/
2244 --
2245
2246 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
2247 -- the fact that no bottoms of this type are ever constructed; hence, we can
2248 -- assume @(_ :: NoAL) `seq` x = x@.
2249 --
2250 data NoAL = NoAL
2251
2252 -- | Special forms of loop arguments
2253 --
2254 -- * These are common special cases for the three function arguments of gen
2255 -- and loop; we give them special names to make it easier to trigger RULES
2256 -- applying in the special cases represented by these arguments. The
2257 -- "INLINE [1]" makes sure that these functions are only inlined in the last
2258 -- two simplifier phases.
2259 --
2260 -- * In the case where the accumulator is not needed, it is better to always
2261 -- explicitly return a value `()', rather than just copy the input to the
2262 -- output, as the former gives GHC better local information.
2263 --
2264
2265 -- | Element function expressing a mapping only
2266 mapEFL :: (Word8 -> Word8) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2267 mapEFL f = \_ e -> (noAL, (Just $ f e))
2268 #if defined(__GLASGOW_HASKELL__)
2269 {-# INLINE [1] mapEFL #-}
2270 #endif
2271
2272 -- | Element function implementing a filter function only
2273 filterEFL :: (Word8 -> Bool) -> (NoAL -> Word8 -> (NoAL, Maybe Word8))
2274 filterEFL p = \_ e -> if p e then (noAL, Just e) else (noAL, Nothing)
2275 #if defined(__GLASGOW_HASKELL__)
2276 {-# INLINE [1] filterEFL #-}
2277 #endif
2278
2279 -- |Element function expressing a reduction only
2280 foldEFL :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2281 foldEFL f = \a e -> (f a e, Nothing)
2282 #if defined(__GLASGOW_HASKELL__)
2283 {-# INLINE [1] foldEFL #-}
2284 #endif
2285
2286 -- | A strict foldEFL.
2287 foldEFL' :: (acc -> Word8 -> acc) -> (acc -> Word8 -> (acc, Maybe Word8))
2288 foldEFL' f = \a e -> let a' = f a e in a' `seq` (a', Nothing)
2289 #if defined(__GLASGOW_HASKELL__)
2290 {-# INLINE [1] foldEFL' #-}
2291 #endif
2292
2293 -- | Element function expressing a prefix reduction only
2294 --
2295 scanEFL :: (Word8 -> Word8 -> Word8) -> Word8 -> Word8 -> (Word8, Maybe Word8)
2296 scanEFL f = \a e -> (f a e, Just a)
2297 #if defined(__GLASGOW_HASKELL__)
2298 {-# INLINE [1] scanEFL #-}
2299 #endif
2300
2301 -- | No accumulator
2302 noAL :: NoAL
2303 noAL = NoAL
2304 #if defined(__GLASGOW_HASKELL__)
2305 {-# INLINE [1] noAL #-}
2306 #endif
2307
2308 -- | Projection functions that are fusion friendly (as in, we determine when
2309 -- they are inlined)
2310 loopArr :: (ByteString, acc) -> ByteString
2311 loopArr (arr, _) = arr
2312 #if defined(__GLASGOW_HASKELL__)
2313 {-# INLINE [1] loopArr #-}
2314 #endif
2315
2316 loopAcc :: (ByteString, acc) -> acc
2317 loopAcc (_, acc) = acc
2318 #if defined(__GLASGOW_HASKELL__)
2319 {-# INLINE [1] loopAcc #-}
2320 #endif
2321
2322 loopSndAcc :: (ByteString, (acc1, acc2)) -> (ByteString, acc2)
2323 loopSndAcc (arr, (_, acc)) = (arr, acc)
2324 #if defined(__GLASGOW_HASKELL__)
2325 {-# INLINE [1] loopSndAcc #-}
2326 #endif
2327
2328 ------------------------------------------------------------------------
2329
2330 --
2331 -- size, and then percentage.
2332 --
2333
2334 -- | Iteration over over ByteStrings
2335 loopU :: (acc -> Word8 -> (acc, Maybe Word8)) -- ^ mapping & folding, once per elem
2336 -> acc -- ^ initial acc value
2337 -> ByteString -- ^ input ByteString
2338 -> (ByteString, acc)
2339
2340 loopU f start (PS z s i) = inlinePerformIO $ withForeignPtr z $ \a -> do
2341 fp <- mallocByteString i
2342 (ptr,n,acc) <- withForeignPtr fp $ \p -> do
2343 (acc, i') <- go (a `plusPtr` s) p start
2344 if i' == i
2345 then return (fp,i',acc) -- no realloc for map
2346 else do fp_ <- mallocByteString i' -- realloc
2347 withForeignPtr fp_ $ \p' -> memcpy p' p (fromIntegral i')
2348 return (fp_,i',acc)
2349
2350 return (PS ptr 0 n, acc)
2351 where
2352 go p ma = trans 0 0
2353 where
2354 STRICT3(trans)
2355 trans a_off ma_off acc
2356 | a_off >= i = return (acc, ma_off)
2357 | otherwise = do
2358 x <- peekByteOff p a_off
2359 let (acc', oe) = f acc x
2360 ma_off' <- case oe of
2361 Nothing -> return ma_off
2362 Just e -> do pokeByteOff ma ma_off e
2363 return $ ma_off + 1
2364 trans (a_off+1) ma_off' acc'
2365
2366 #if defined(__GLASGOW_HASKELL__)
2367 {-# INLINE [1] loopU #-}
2368 #endif
2369
2370 infixr 9 `fuseEFL`
2371
2372 -- |Fuse to flat loop functions
2373 fuseEFL :: (a1 -> Word8 -> (a1, Maybe Word8))
2374 -> (a2 -> Word8 -> (a2, Maybe Word8))
2375 -> (a1, a2)
2376 -> Word8
2377 -> ((a1, a2), Maybe Word8)
2378 fuseEFL f g (acc1, acc2) e1 =
2379 case f acc1 e1 of
2380 (acc1', Nothing) -> ((acc1', acc2), Nothing)
2381 (acc1', Just e2) ->
2382 case g acc2 e2 of
2383 (acc2', res) -> ((acc1', acc2'), res)
2384
2385 {-# RULES
2386
2387 "loop/loop fusion!" forall em1 em2 start1 start2 arr.
2388 loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
2389 loopSndAcc (loopU (em1 `fuseEFL` em2) (start1, start2) arr)
2390
2391 "loopArr/loopSndAcc" forall x.
2392 loopArr (loopSndAcc x) = loopArr x
2393
2394 "seq/NoAL" forall (u::NoAL) e.
2395 u `seq` e = e
2396
2397 #-}
2398