Avoid copying if possible in `concat`
[packages/bytestring.git] / Data / ByteString / Internal.hs
1 {-# LANGUAGE CPP, ForeignFunctionInterface, BangPatterns #-}
2 {-# LANGUAGE UnliftedFFITypes, MagicHash,
3 UnboxedTuples, DeriveDataTypeable #-}
4 #if __GLASGOW_HASKELL__ >= 703
5 {-# LANGUAGE Unsafe #-}
6 #endif
7 {-# OPTIONS_HADDOCK hide #-}
8
9 -- |
10 -- Module : Data.ByteString.Internal
11 -- Copyright : (c) Don Stewart 2006-2008
12 -- (c) Duncan Coutts 2006-2012
13 -- License : BSD-style
14 -- Maintainer : dons00@gmail.com, duncan@community.haskell.org
15 -- Stability : unstable
16 -- Portability : non-portable
17 --
18 -- A module containing semi-public 'ByteString' internals. This exposes the
19 -- 'ByteString' representation and low level construction functions. As such
20 -- all the functions in this module are unsafe. The API is also not stable.
21 --
22 -- Where possible application should instead use the functions from the normal
23 -- public interface modules, such as "Data.ByteString.Unsafe". Packages that
24 -- extend the ByteString system at a low level will need to use this module.
25 --
26 module Data.ByteString.Internal (
27
28 -- * The @ByteString@ type and representation
29 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
30
31 -- * Conversion with lists: packing and unpacking
32 packBytes, packUptoLenBytes, unsafePackLenBytes,
33 packChars, packUptoLenChars, unsafePackLenChars,
34 unpackBytes, unpackAppendBytesLazy, unpackAppendBytesStrict,
35 unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict,
36 unsafePackAddress,
37
38 -- * Low level imperative construction
39 create, -- :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString
40 createUptoN, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
41 createAndTrim, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
42 createAndTrim', -- :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a)
43 unsafeCreate, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
44 unsafeCreateUptoN, -- :: Int -> (Ptr Word8 -> IO Int) -> ByteString
45 mallocByteString, -- :: Int -> IO (ForeignPtr a)
46
47 -- * Conversion to and from ForeignPtrs
48 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> Int -> ByteString
49 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
50
51 -- * Utilities
52 nullForeignPtr, -- :: ForeignPtr Word8
53 checkedAdd, -- :: String -> Int -> Int -> Int
54
55 -- * Standard C Functions
56 c_strlen, -- :: CString -> IO CInt
57 c_free_finalizer, -- :: FunPtr (Ptr Word8 -> IO ())
58
59 memchr, -- :: Ptr Word8 -> Word8 -> CSize -> IO Ptr Word8
60 memcmp, -- :: Ptr Word8 -> Ptr Word8 -> Int -> IO CInt
61 memcpy, -- :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
62 memset, -- :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
63
64 -- * cbits functions
65 c_reverse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
66 c_intersperse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
67 c_maximum, -- :: Ptr Word8 -> CInt -> IO Word8
68 c_minimum, -- :: Ptr Word8 -> CInt -> IO Word8
69 c_count, -- :: Ptr Word8 -> CInt -> Word8 -> IO CInt
70
71 -- * Chars
72 w2c, c2w, isSpaceWord8, isSpaceChar8,
73
74 -- * Deprecated and unmentionable
75 accursedUnutterablePerformIO, -- :: IO a -> a
76 inlinePerformIO -- :: IO a -> a
77 ) where
78
79 import Prelude hiding (concat, null)
80 import qualified Data.List as List
81
82 import Foreign.ForeignPtr (ForeignPtr, withForeignPtr)
83 import Foreign.Ptr (Ptr, FunPtr, plusPtr)
84 import Foreign.Storable (Storable(..))
85
86 #if MIN_VERSION_base(4,5,0) || __GLASGOW_HASKELL__ >= 703
87 import Foreign.C.Types (CInt(..), CSize(..), CULong(..))
88 #else
89 import Foreign.C.Types (CInt, CSize, CULong)
90 #endif
91
92 import Foreign.C.String (CString)
93
94 #if MIN_VERSION_base(4,9,0)
95 import Data.Semigroup (Semigroup((<>)))
96 #endif
97 #if !(MIN_VERSION_base(4,8,0))
98 import Data.Monoid (Monoid(..))
99 #endif
100
101 import Control.DeepSeq (NFData(rnf))
102
103 import Data.String (IsString(..))
104
105 import Control.Exception (assert)
106
107 import Data.Char (ord)
108 import Data.Word (Word8)
109
110 import Data.Typeable (Typeable)
111 import Data.Data (Data(..), mkNoRepType)
112
113 import GHC.Base (nullAddr#,realWorld#,unsafeChr)
114
115 #if MIN_VERSION_base(4,4,0)
116 import GHC.CString (unpackCString#)
117 #else
118 import GHC.Base (unpackCString#)
119 #endif
120
121 import GHC.Prim (Addr#)
122
123 #if __GLASGOW_HASKELL__ >= 611
124 import GHC.IO (IO(IO),unsafeDupablePerformIO)
125 #else
126 import GHC.IOBase (IO(IO),RawBuffer,unsafeDupablePerformIO)
127 #endif
128
129 import GHC.ForeignPtr (ForeignPtr(ForeignPtr)
130 ,newForeignPtr_, mallocPlainForeignPtrBytes)
131 import GHC.Ptr (Ptr(..), castPtr)
132
133 -- CFILES stuff is Hugs only
134 {-# CFILES cbits/fpstring.c #-}
135
136 -- -----------------------------------------------------------------------------
137
138 -- | A space-efficient representation of a 'Word8' vector, supporting many
139 -- efficient operations.
140 --
141 -- A 'ByteString' contains 8-bit bytes, or by using the operations from
142 -- "Data.ByteString.Char8" it can be interpreted as containing 8-bit
143 -- characters.
144 --
145 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8) -- payload
146 {-# UNPACK #-} !Int -- offset
147 {-# UNPACK #-} !Int -- length
148 deriving (Typeable)
149
150 instance Eq ByteString where
151 (==) = eq
152
153 instance Ord ByteString where
154 compare = compareBytes
155
156 #if MIN_VERSION_base(4,9,0)
157 instance Semigroup ByteString where
158 (<>) = append
159 #endif
160
161 instance Monoid ByteString where
162 mempty = PS nullForeignPtr 0 0
163 #if MIN_VERSION_base(4,9,0)
164 mappend = (<>)
165 #else
166 mappend = append
167 #endif
168 mconcat = concat
169
170 instance NFData ByteString where
171 rnf PS{} = ()
172
173 instance Show ByteString where
174 showsPrec p ps r = showsPrec p (unpackChars ps) r
175
176 instance Read ByteString where
177 readsPrec p str = [ (packChars x, y) | (x, y) <- readsPrec p str ]
178
179 instance IsString ByteString where
180 fromString = packChars
181
182 instance Data ByteString where
183 gfoldl f z txt = z packBytes `f` unpackBytes txt
184 toConstr _ = error "Data.ByteString.ByteString.toConstr"
185 gunfold _ _ = error "Data.ByteString.ByteString.gunfold"
186 dataTypeOf _ = mkNoRepType "Data.ByteString.ByteString"
187
188 ------------------------------------------------------------------------
189 -- Packing and unpacking from lists
190
191 packBytes :: [Word8] -> ByteString
192 packBytes ws = unsafePackLenBytes (List.length ws) ws
193
194 packChars :: [Char] -> ByteString
195 packChars cs = unsafePackLenChars (List.length cs) cs
196
197 {-# INLINE [0] packChars #-}
198
199 {-# RULES
200 "ByteString packChars/packAddress" forall s .
201 packChars (unpackCString# s) = accursedUnutterablePerformIO (unsafePackAddress s)
202 #-}
203
204 unsafePackLenBytes :: Int -> [Word8] -> ByteString
205 unsafePackLenBytes len xs0 =
206 unsafeCreate len $ \p -> go p xs0
207 where
208 go !_ [] = return ()
209 go !p (x:xs) = poke p x >> go (p `plusPtr` 1) xs
210
211 unsafePackLenChars :: Int -> [Char] -> ByteString
212 unsafePackLenChars len cs0 =
213 unsafeCreate len $ \p -> go p cs0
214 where
215 go !_ [] = return ()
216 go !p (c:cs) = poke p (c2w c) >> go (p `plusPtr` 1) cs
217
218
219 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
220 -- Addr\# (an arbitrary machine address assumed to point outside the
221 -- garbage-collected heap) into a @ByteString@. A much faster way to
222 -- create an Addr\# is with an unboxed string literal, than to pack a
223 -- boxed string. A unboxed string literal is compiled to a static @char
224 -- []@ by GHC. Establishing the length of the string requires a call to
225 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
226 -- is the case with "string"# literals in GHC). Use 'unsafePackAddressLen'
227 -- if you know the length of the string statically.
228 --
229 -- An example:
230 --
231 -- > literalFS = unsafePackAddress "literal"#
232 --
233 -- This function is /unsafe/. If you modify the buffer pointed to by the
234 -- original Addr# this modification will be reflected in the resulting
235 -- @ByteString@, breaking referential transparency.
236 --
237 -- Note this also won't work if your Addr# has embedded '\0' characters in
238 -- the string, as @strlen@ will return too short a length.
239 --
240 unsafePackAddress :: Addr# -> IO ByteString
241 unsafePackAddress addr# = do
242 p <- newForeignPtr_ (castPtr cstr)
243 l <- c_strlen cstr
244 return $ PS p 0 (fromIntegral l)
245 where
246 cstr :: CString
247 cstr = Ptr addr#
248 {-# INLINE unsafePackAddress #-}
249
250
251 packUptoLenBytes :: Int -> [Word8] -> (ByteString, [Word8])
252 packUptoLenBytes len xs0 =
253 unsafeCreateUptoN' len $ \p -> go p len xs0
254 where
255 go !_ !n [] = return (len-n, [])
256 go !_ !0 xs = return (len, xs)
257 go !p !n (x:xs) = poke p x >> go (p `plusPtr` 1) (n-1) xs
258
259 packUptoLenChars :: Int -> [Char] -> (ByteString, [Char])
260 packUptoLenChars len cs0 =
261 unsafeCreateUptoN' len $ \p -> go p len cs0
262 where
263 go !_ !n [] = return (len-n, [])
264 go !_ !0 cs = return (len, cs)
265 go !p !n (c:cs) = poke p (c2w c) >> go (p `plusPtr` 1) (n-1) cs
266
267 -- Unpacking bytestrings into lists effeciently is a tradeoff: on the one hand
268 -- we would like to write a tight loop that just blats the list into memory, on
269 -- the other hand we want it to be unpacked lazily so we don't end up with a
270 -- massive list data structure in memory.
271 --
272 -- Our strategy is to combine both: we will unpack lazily in reasonable sized
273 -- chunks, where each chunk is unpacked strictly.
274 --
275 -- unpackBytes and unpackChars do the lazy loop, while unpackAppendBytes and
276 -- unpackAppendChars do the chunks strictly.
277
278 unpackBytes :: ByteString -> [Word8]
279 unpackBytes bs = unpackAppendBytesLazy bs []
280
281 unpackChars :: ByteString -> [Char]
282 unpackChars bs = unpackAppendCharsLazy bs []
283
284 unpackAppendBytesLazy :: ByteString -> [Word8] -> [Word8]
285 unpackAppendBytesLazy (PS fp off len) xs
286 | len <= 100 = unpackAppendBytesStrict (PS fp off len) xs
287 | otherwise = unpackAppendBytesStrict (PS fp off 100) remainder
288 where
289 remainder = unpackAppendBytesLazy (PS fp (off+100) (len-100)) xs
290
291 -- Why 100 bytes you ask? Because on a 64bit machine the list we allocate
292 -- takes just shy of 4k which seems like a reasonable amount.
293 -- (5 words per list element, 8 bytes per word, 100 elements = 4000 bytes)
294
295 unpackAppendCharsLazy :: ByteString -> [Char] -> [Char]
296 unpackAppendCharsLazy (PS fp off len) cs
297 | len <= 100 = unpackAppendCharsStrict (PS fp off len) cs
298 | otherwise = unpackAppendCharsStrict (PS fp off 100) remainder
299 where
300 remainder = unpackAppendCharsLazy (PS fp (off+100) (len-100)) cs
301
302 -- For these unpack functions, since we're unpacking the whole list strictly we
303 -- build up the result list in an accumulator. This means we have to build up
304 -- the list starting at the end. So our traversal starts at the end of the
305 -- buffer and loops down until we hit the sentinal:
306
307 unpackAppendBytesStrict :: ByteString -> [Word8] -> [Word8]
308 unpackAppendBytesStrict (PS fp off len) xs =
309 accursedUnutterablePerformIO $ withForeignPtr fp $ \base ->
310 loop (base `plusPtr` (off-1)) (base `plusPtr` (off-1+len)) xs
311 where
312 loop !sentinal !p acc
313 | p == sentinal = return acc
314 | otherwise = do x <- peek p
315 loop sentinal (p `plusPtr` (-1)) (x:acc)
316
317 unpackAppendCharsStrict :: ByteString -> [Char] -> [Char]
318 unpackAppendCharsStrict (PS fp off len) xs =
319 accursedUnutterablePerformIO $ withForeignPtr fp $ \base ->
320 loop (base `plusPtr` (off-1)) (base `plusPtr` (off-1+len)) xs
321 where
322 loop !sentinal !p acc
323 | p == sentinal = return acc
324 | otherwise = do x <- peek p
325 loop sentinal (p `plusPtr` (-1)) (w2c x:acc)
326
327 ------------------------------------------------------------------------
328
329 -- | The 0 pointer. Used to indicate the empty Bytestring.
330 nullForeignPtr :: ForeignPtr Word8
331 nullForeignPtr = ForeignPtr nullAddr# (error "nullForeignPtr") --TODO: should ForeignPtrContents be strict?
332
333 -- ---------------------------------------------------------------------
334 -- Low level constructors
335
336 -- | /O(1)/ Build a ByteString from a ForeignPtr.
337 --
338 -- If you do not need the offset parameter then you do should be using
339 -- 'Data.ByteString.Unsafe.unsafePackCStringLen' or
340 -- 'Data.ByteString.Unsafe.unsafePackCStringFinalizer' instead.
341 --
342 fromForeignPtr :: ForeignPtr Word8
343 -> Int -- ^ Offset
344 -> Int -- ^ Length
345 -> ByteString
346 fromForeignPtr = PS
347 {-# INLINE fromForeignPtr #-}
348
349 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
350 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int) -- ^ (ptr, offset, length)
351 toForeignPtr (PS ps s l) = (ps, s, l)
352 {-# INLINE toForeignPtr #-}
353
354 -- | A way of creating ByteStrings outside the IO monad. The @Int@
355 -- argument gives the final size of the ByteString.
356 unsafeCreate :: Int -> (Ptr Word8 -> IO ()) -> ByteString
357 unsafeCreate l f = unsafeDupablePerformIO (create l f)
358 {-# INLINE unsafeCreate #-}
359
360 -- | Like 'unsafeCreate' but instead of giving the final size of the
361 -- ByteString, it is just an upper bound. The inner action returns
362 -- the actual size. Unlike 'createAndTrim' the ByteString is not
363 -- reallocated if the final size is less than the estimated size.
364 unsafeCreateUptoN :: Int -> (Ptr Word8 -> IO Int) -> ByteString
365 unsafeCreateUptoN l f = unsafeDupablePerformIO (createUptoN l f)
366 {-# INLINE unsafeCreateUptoN #-}
367
368 unsafeCreateUptoN' :: Int -> (Ptr Word8 -> IO (Int, a)) -> (ByteString, a)
369 unsafeCreateUptoN' l f = unsafeDupablePerformIO (createUptoN' l f)
370 {-# INLINE unsafeCreateUptoN' #-}
371
372 -- | Create ByteString of size @l@ and use action @f@ to fill it's contents.
373 create :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString
374 create l f = do
375 fp <- mallocByteString l
376 withForeignPtr fp $ \p -> f p
377 return $! PS fp 0 l
378 {-# INLINE create #-}
379
380 -- | Create ByteString of up to size size @l@ and use action @f@ to fill it's
381 -- contents which returns its true size.
382 createUptoN :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
383 createUptoN l f = do
384 fp <- mallocByteString l
385 l' <- withForeignPtr fp $ \p -> f p
386 assert (l' <= l) $ return $! PS fp 0 l'
387 {-# INLINE createUptoN #-}
388
389 -- | Create ByteString of up to size @l@ and use action @f@ to fill it's contents which returns its true size.
390 createUptoN' :: Int -> (Ptr Word8 -> IO (Int, a)) -> IO (ByteString, a)
391 createUptoN' l f = do
392 fp <- mallocByteString l
393 (l', res) <- withForeignPtr fp $ \p -> f p
394 assert (l' <= l) $ return (PS fp 0 l', res)
395 {-# INLINE createUptoN' #-}
396
397 -- | Given the maximum size needed and a function to make the contents
398 -- of a ByteString, createAndTrim makes the 'ByteString'. The generating
399 -- function is required to return the actual final size (<= the maximum
400 -- size), and the resulting byte array is realloced to this size.
401 --
402 -- createAndTrim is the main mechanism for creating custom, efficient
403 -- ByteString functions, using Haskell or C functions to fill the space.
404 --
405 createAndTrim :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
406 createAndTrim l f = do
407 fp <- mallocByteString l
408 withForeignPtr fp $ \p -> do
409 l' <- f p
410 if assert (l' <= l) $ l' >= l
411 then return $! PS fp 0 l
412 else create l' $ \p' -> memcpy p' p l'
413 {-# INLINE createAndTrim #-}
414
415 createAndTrim' :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a)
416 createAndTrim' l f = do
417 fp <- mallocByteString l
418 withForeignPtr fp $ \p -> do
419 (off, l', res) <- f p
420 if assert (l' <= l) $ l' >= l
421 then return (PS fp 0 l, res)
422 else do ps <- create l' $ \p' ->
423 memcpy p' (p `plusPtr` off) l'
424 return (ps, res)
425
426 -- | Wrapper of 'mallocForeignPtrBytes' with faster implementation for GHC
427 --
428 mallocByteString :: Int -> IO (ForeignPtr a)
429 mallocByteString = mallocPlainForeignPtrBytes
430 {-# INLINE mallocByteString #-}
431
432 ------------------------------------------------------------------------
433 -- Implementations for Eq, Ord and Monoid instances
434
435 eq :: ByteString -> ByteString -> Bool
436 eq a@(PS fp off len) b@(PS fp' off' len')
437 | len /= len' = False -- short cut on length
438 | fp == fp' && off == off' = True -- short cut for the same string
439 | otherwise = compareBytes a b == EQ
440 {-# INLINE eq #-}
441 -- ^ still needed
442
443 compareBytes :: ByteString -> ByteString -> Ordering
444 compareBytes (PS _ _ 0) (PS _ _ 0) = EQ -- short cut for empty strings
445 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) =
446 accursedUnutterablePerformIO $
447 withForeignPtr fp1 $ \p1 ->
448 withForeignPtr fp2 $ \p2 -> do
449 i <- memcmp (p1 `plusPtr` off1) (p2 `plusPtr` off2) (min len1 len2)
450 return $! case i `compare` 0 of
451 EQ -> len1 `compare` len2
452 x -> x
453
454 append :: ByteString -> ByteString -> ByteString
455 append (PS _ _ 0) b = b
456 append a (PS _ _ 0) = a
457 append (PS fp1 off1 len1) (PS fp2 off2 len2) =
458 unsafeCreate (len1+len2) $ \destptr1 -> do
459 let destptr2 = destptr1 `plusPtr` len1
460 withForeignPtr fp1 $ \p1 -> memcpy destptr1 (p1 `plusPtr` off1) len1
461 withForeignPtr fp2 $ \p2 -> memcpy destptr2 (p2 `plusPtr` off2) len2
462
463 concat :: [ByteString] -> ByteString
464 concat = \bss0 -> goLen0 bss0 bss0
465 -- The idea here is we first do a pass over the input list to determine:
466 --
467 -- 1. is a copy necessary? e.g. @concat []@, @concat [mempty, "hello"]@,
468 -- and @concat ["hello", mempty, mempty]@ can all be handled without
469 -- copying.
470 -- 2. if a copy is necessary, how large is the result going to be?
471 --
472 -- If a copy is necessary then we create a buffer of the appropriate size
473 -- and do another pass over the input list, copying the chunks into the
474 -- buffer. Also, since foreign calls aren't entirely free we skip over
475 -- empty chunks while copying.
476 --
477 -- We pass the original [ByteString] (bss0) through as an argument through
478 -- goLen0, goLen1, and goLen since we will need it again in goCopy. Passing
479 -- it as an explicit argument avoids capturing it in these functions'
480 -- closures which would result in unnecessary closure allocation.
481 where
482 -- It's still possible that the result is empty
483 goLen0 _ [] = mempty
484 goLen0 bss0 (PS _ _ 0 :bss) = goLen0 bss0 bss
485 goLen0 bss0 (bs :bss) = goLen1 bss0 bs bss
486
487 -- It's still possible that the result is a single chunk
488 goLen1 _ bs [] = bs
489 goLen1 bss0 bs (PS _ _ 0 :bss) = goLen1 bss0 bs bss
490 goLen1 bss0 bs (PS _ _ len:bss) = goLen bss0 (checkedAdd "concat" len' len) bss
491 where PS _ _ len' = bs
492
493 -- General case, just find the total length we'll need
494 goLen bss0 !total (PS _ _ len:bss) = goLen bss0 total' bss
495 where total' = checkedAdd "concat" total len
496 goLen bss0 total [] =
497 unsafeCreate total $ \ptr -> goCopy bss0 ptr
498
499 -- Copy the data
500 goCopy [] !_ = return ()
501 goCopy (PS _ _ 0 :bss) !ptr = goCopy bss ptr
502 goCopy (PS fp off len:bss) !ptr = do
503 withForeignPtr fp $ \p -> memcpy ptr (p `plusPtr` off) len
504 goCopy bss (ptr `plusPtr` len)
505 {-# NOINLINE concat #-}
506
507 {-# RULES
508 "ByteString concat [] -> mempty"
509 concat [] = mempty
510 "ByteString concat [bs] -> bs" forall x.
511 concat [x] = x
512 #-}
513
514 -- | Add two non-negative numbers. Errors out on overflow.
515 checkedAdd :: String -> Int -> Int -> Int
516 checkedAdd fun x y
517 | r >= 0 = r
518 | otherwise = overflowError fun
519 where r = x + y
520 {-# INLINE checkedAdd #-}
521
522 ------------------------------------------------------------------------
523
524 -- | Conversion between 'Word8' and 'Char'. Should compile to a no-op.
525 w2c :: Word8 -> Char
526 w2c = unsafeChr . fromIntegral
527 {-# INLINE w2c #-}
528
529 -- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and
530 -- silently truncates to 8 bits Chars > '\255'. It is provided as
531 -- convenience for ByteString construction.
532 c2w :: Char -> Word8
533 c2w = fromIntegral . ord
534 {-# INLINE c2w #-}
535
536 -- | Selects words corresponding to white-space characters in the Latin-1 range
537 -- ordered by frequency.
538 isSpaceWord8 :: Word8 -> Bool
539 isSpaceWord8 w =
540 w == 0x20 ||
541 w == 0x0A || -- LF, \n
542 w == 0x09 || -- HT, \t
543 w == 0x0C || -- FF, \f
544 w == 0x0D || -- CR, \r
545 w == 0x0B || -- VT, \v
546 w == 0xA0 -- spotted by QC..
547 {-# INLINE isSpaceWord8 #-}
548
549 -- | Selects white-space characters in the Latin-1 range
550 isSpaceChar8 :: Char -> Bool
551 isSpaceChar8 c =
552 c == ' ' ||
553 c == '\t' ||
554 c == '\n' ||
555 c == '\r' ||
556 c == '\f' ||
557 c == '\v' ||
558 c == '\xa0'
559 {-# INLINE isSpaceChar8 #-}
560
561 overflowError :: String -> a
562 overflowError fun = error $ "Data.ByteString." ++ fun ++ ": size overflow"
563
564 ------------------------------------------------------------------------
565
566 -- | This \"function\" has a superficial similarity to 'unsafePerformIO' but
567 -- it is in fact a malevolent agent of chaos. It unpicks the seams of reality
568 -- (and the 'IO' monad) so that the normal rules no longer apply. It lulls you
569 -- into thinking it is reasonable, but when you are not looking it stabs you
570 -- in the back and aliases all of your mutable buffers. The carcass of many a
571 -- seasoned Haskell programmer lie strewn at its feet.
572 --
573 -- Witness the trail of destruction:
574 --
575 -- * <https://github.com/haskell/bytestring/commit/71c4b438c675aa360c79d79acc9a491e7bbc26e7>
576 --
577 -- * <https://github.com/haskell/bytestring/commit/210c656390ae617d9ee3b8bcff5c88dd17cef8da>
578 --
579 -- * <https://ghc.haskell.org/trac/ghc/ticket/3486>
580 --
581 -- * <https://ghc.haskell.org/trac/ghc/ticket/3487>
582 --
583 -- * <https://ghc.haskell.org/trac/ghc/ticket/7270>
584 --
585 -- Do not talk about \"safe\"! You do not know what is safe!
586 --
587 -- Yield not to its blasphemous call! Flee traveller! Flee or you will be
588 -- corrupted and devoured!
589 --
590 {-# INLINE accursedUnutterablePerformIO #-}
591 accursedUnutterablePerformIO :: IO a -> a
592 accursedUnutterablePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
593
594 inlinePerformIO :: IO a -> a
595 inlinePerformIO = accursedUnutterablePerformIO
596 {-# INLINE inlinePerformIO #-}
597 {-# DEPRECATED inlinePerformIO "If you think you know what you are doing, use 'unsafePerformIO'. If you are sure you know what you are doing, use 'unsafeDupablePerformIO'. If you enjoy sharing an address space with a malevolent agent of chaos, try 'accursedUnutterablePerformIO'." #-}
598
599 -- ---------------------------------------------------------------------
600 --
601 -- Standard C functions
602 --
603
604 foreign import ccall unsafe "string.h strlen" c_strlen
605 :: CString -> IO CSize
606
607 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
608 :: FunPtr (Ptr Word8 -> IO ())
609
610 foreign import ccall unsafe "string.h memchr" c_memchr
611 :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8)
612
613 memchr :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
614 memchr p w s = c_memchr p (fromIntegral w) s
615
616 foreign import ccall unsafe "string.h memcmp" c_memcmp
617 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
618
619 memcmp :: Ptr Word8 -> Ptr Word8 -> Int -> IO CInt
620 memcmp p q s = c_memcmp p q (fromIntegral s)
621
622 foreign import ccall unsafe "string.h memcpy" c_memcpy
623 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8)
624
625 memcpy :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
626 memcpy p q s = c_memcpy p q (fromIntegral s) >> return ()
627
628 {-
629 foreign import ccall unsafe "string.h memmove" c_memmove
630 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8)
631
632 memmove :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
633 memmove p q s = do c_memmove p q s
634 return ()
635 -}
636
637 foreign import ccall unsafe "string.h memset" c_memset
638 :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8)
639
640 memset :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
641 memset p w s = c_memset p (fromIntegral w) s
642
643 -- ---------------------------------------------------------------------
644 --
645 -- Uses our C code
646 --
647
648 foreign import ccall unsafe "static fpstring.h fps_reverse" c_reverse
649 :: Ptr Word8 -> Ptr Word8 -> CULong -> IO ()
650
651 foreign import ccall unsafe "static fpstring.h fps_intersperse" c_intersperse
652 :: Ptr Word8 -> Ptr Word8 -> CULong -> Word8 -> IO ()
653
654 foreign import ccall unsafe "static fpstring.h fps_maximum" c_maximum
655 :: Ptr Word8 -> CULong -> IO Word8
656
657 foreign import ccall unsafe "static fpstring.h fps_minimum" c_minimum
658 :: Ptr Word8 -> CULong -> IO Word8
659
660 foreign import ccall unsafe "static fpstring.h fps_count" c_count
661 :: Ptr Word8 -> CULong -> Word8 -> IO CULong