Avoid copying if possible in `concat`
[packages/bytestring.git] / Data / ByteString / Lazy / Internal.hs
1 {-# LANGUAGE CPP #-}
2 {-# LANGUAGE DeriveDataTypeable #-}
3 #if __GLASGOW_HASKELL__ >= 703
4 {-# LANGUAGE Unsafe #-}
5 #endif
6 {-# OPTIONS_HADDOCK hide #-}
7
8 -- |
9 -- Module : Data.ByteString.Lazy.Internal
10 -- Copyright : (c) Don Stewart 2006-2008
11 -- (c) Duncan Coutts 2006-2011
12 -- License : BSD-style
13 -- Maintainer : dons00@gmail.com, duncan@community.haskell.org
14 -- Stability : unstable
15 -- Portability : non-portable
16 --
17 -- A module containing semi-public 'ByteString' internals. This exposes
18 -- the 'ByteString' representation and low level construction functions.
19 -- Modules which extend the 'ByteString' system will need to use this module
20 -- while ideally most users will be able to make do with the public interface
21 -- modules.
22 --
23 module Data.ByteString.Lazy.Internal (
24
25 -- * The lazy @ByteString@ type and representation
26 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
27 chunk,
28 foldrChunks,
29 foldlChunks,
30
31 -- * Data type invariant and abstraction function
32 invariant,
33 checkInvariant,
34
35 -- * Chunk allocation sizes
36 defaultChunkSize,
37 smallChunkSize,
38 chunkOverhead,
39
40 -- * Conversion with lists: packing and unpacking
41 packBytes, packChars,
42 unpackBytes, unpackChars,
43
44 ) where
45
46 import Prelude hiding (concat)
47
48 import qualified Data.ByteString.Internal as S
49 import qualified Data.ByteString as S (length, take, drop)
50
51 import Data.Word (Word8)
52 import Foreign.Storable (Storable(sizeOf))
53
54 #if MIN_VERSION_base(4,9,0)
55 import Data.Semigroup (Semigroup((<>)))
56 #endif
57 #if !(MIN_VERSION_base(4,8,0))
58 import Data.Monoid (Monoid(..))
59 #endif
60 import Control.DeepSeq (NFData, rnf)
61
62 import Data.String (IsString(..))
63
64 import Data.Typeable (Typeable)
65 import Data.Data (Data(..), mkNoRepType)
66
67 -- | A space-efficient representation of a 'Word8' vector, supporting many
68 -- efficient operations.
69 --
70 -- A lazy 'ByteString' contains 8-bit bytes, or by using the operations
71 -- from "Data.ByteString.Lazy.Char8" it can be interpreted as containing
72 -- 8-bit characters.
73 --
74 data ByteString = Empty | Chunk {-# UNPACK #-} !S.ByteString ByteString
75 deriving (Typeable)
76 -- See 'invariant' function later in this module for internal invariants.
77
78 instance Eq ByteString where
79 (==) = eq
80
81 instance Ord ByteString where
82 compare = cmp
83
84 #if MIN_VERSION_base(4,9,0)
85 instance Semigroup ByteString where
86 (<>) = append
87 #endif
88
89 instance Monoid ByteString where
90 mempty = Empty
91 #if MIN_VERSION_base(4,9,0)
92 mappend = (<>)
93 #else
94 mappend = append
95 #endif
96 mconcat = concat
97
98 instance NFData ByteString where
99 rnf Empty = ()
100 rnf (Chunk _ b) = rnf b
101
102 instance Show ByteString where
103 showsPrec p ps r = showsPrec p (unpackChars ps) r
104
105 instance Read ByteString where
106 readsPrec p str = [ (packChars x, y) | (x, y) <- readsPrec p str ]
107
108 instance IsString ByteString where
109 fromString = packChars
110
111 instance Data ByteString where
112 gfoldl f z txt = z packBytes `f` unpackBytes txt
113 toConstr _ = error "Data.ByteString.Lazy.ByteString.toConstr"
114 gunfold _ _ = error "Data.ByteString.Lazy.ByteString.gunfold"
115 dataTypeOf _ = mkNoRepType "Data.ByteString.Lazy.ByteString"
116
117 ------------------------------------------------------------------------
118 -- Packing and unpacking from lists
119
120 packBytes :: [Word8] -> ByteString
121 packBytes cs0 =
122 packChunks 32 cs0
123 where
124 packChunks n cs = case S.packUptoLenBytes n cs of
125 (bs, []) -> chunk bs Empty
126 (bs, cs') -> Chunk bs (packChunks (min (n * 2) smallChunkSize) cs')
127
128 packChars :: [Char] -> ByteString
129 packChars cs0 = packChunks 32 cs0
130 where
131 packChunks n cs = case S.packUptoLenChars n cs of
132 (bs, []) -> chunk bs Empty
133 (bs, cs') -> Chunk bs (packChunks (min (n * 2) smallChunkSize) cs')
134
135 unpackBytes :: ByteString -> [Word8]
136 unpackBytes Empty = []
137 unpackBytes (Chunk c cs) = S.unpackAppendBytesLazy c (unpackBytes cs)
138
139 unpackChars :: ByteString -> [Char]
140 unpackChars Empty = []
141 unpackChars (Chunk c cs) = S.unpackAppendCharsLazy c (unpackChars cs)
142
143 ------------------------------------------------------------------------
144
145 -- | The data type invariant:
146 -- Every ByteString is either 'Empty' or consists of non-null 'S.ByteString's.
147 -- All functions must preserve this, and the QC properties must check this.
148 --
149 invariant :: ByteString -> Bool
150 invariant Empty = True
151 invariant (Chunk (S.PS _ _ len) cs) = len > 0 && invariant cs
152
153 -- | In a form that checks the invariant lazily.
154 checkInvariant :: ByteString -> ByteString
155 checkInvariant Empty = Empty
156 checkInvariant (Chunk c@(S.PS _ _ len) cs)
157 | len > 0 = Chunk c (checkInvariant cs)
158 | otherwise = error $ "Data.ByteString.Lazy: invariant violation:"
159 ++ show (Chunk c cs)
160
161 ------------------------------------------------------------------------
162
163 -- | Smart constructor for 'Chunk'. Guarantees the data type invariant.
164 chunk :: S.ByteString -> ByteString -> ByteString
165 chunk c@(S.PS _ _ len) cs | len == 0 = cs
166 | otherwise = Chunk c cs
167 {-# INLINE chunk #-}
168
169 -- | Consume the chunks of a lazy ByteString with a natural right fold.
170 foldrChunks :: (S.ByteString -> a -> a) -> a -> ByteString -> a
171 foldrChunks f z = go
172 where go Empty = z
173 go (Chunk c cs) = f c (go cs)
174 {-# INLINE foldrChunks #-}
175
176 -- | Consume the chunks of a lazy ByteString with a strict, tail-recursive,
177 -- accumulating left fold.
178 foldlChunks :: (a -> S.ByteString -> a) -> a -> ByteString -> a
179 foldlChunks f z = go z
180 where go a _ | a `seq` False = undefined
181 go a Empty = a
182 go a (Chunk c cs) = go (f a c) cs
183 {-# INLINE foldlChunks #-}
184
185 ------------------------------------------------------------------------
186
187 -- The representation uses lists of packed chunks. When we have to convert from
188 -- a lazy list to the chunked representation, then by default we use this
189 -- chunk size. Some functions give you more control over the chunk size.
190 --
191 -- Measurements here:
192 -- http://www.cse.unsw.edu.au/~dons/tmp/chunksize_v_cache.png
193 --
194 -- indicate that a value around 0.5 to 1 x your L2 cache is best.
195 -- The following value assumes people have something greater than 128k,
196 -- and need to share the cache with other programs.
197
198 -- | The chunk size used for I\/O. Currently set to 32k, less the memory management overhead
199 defaultChunkSize :: Int
200 defaultChunkSize = 32 * k - chunkOverhead
201 where k = 1024
202
203 -- | The recommended chunk size. Currently set to 4k, less the memory management overhead
204 smallChunkSize :: Int
205 smallChunkSize = 4 * k - chunkOverhead
206 where k = 1024
207
208 -- | The memory management overhead. Currently this is tuned for GHC only.
209 chunkOverhead :: Int
210 chunkOverhead = 2 * sizeOf (undefined :: Int)
211
212 ------------------------------------------------------------------------
213 -- Implementations for Eq, Ord and Monoid instances
214
215 eq :: ByteString -> ByteString -> Bool
216 eq Empty Empty = True
217 eq Empty _ = False
218 eq _ Empty = False
219 eq (Chunk a as) (Chunk b bs) =
220 case compare (S.length a) (S.length b) of
221 LT -> a == S.take (S.length a) b && eq as (Chunk (S.drop (S.length a) b) bs)
222 EQ -> a == b && eq as bs
223 GT -> S.take (S.length b) a == b && eq (Chunk (S.drop (S.length b) a) as) bs
224
225 cmp :: ByteString -> ByteString -> Ordering
226 cmp Empty Empty = EQ
227 cmp Empty _ = LT
228 cmp _ Empty = GT
229 cmp (Chunk a as) (Chunk b bs) =
230 case compare (S.length a) (S.length b) of
231 LT -> case compare a (S.take (S.length a) b) of
232 EQ -> cmp as (Chunk (S.drop (S.length a) b) bs)
233 result -> result
234 EQ -> case compare a b of
235 EQ -> cmp as bs
236 result -> result
237 GT -> case compare (S.take (S.length b) a) b of
238 EQ -> cmp (Chunk (S.drop (S.length b) a) as) bs
239 result -> result
240
241 append :: ByteString -> ByteString -> ByteString
242 append xs ys = foldrChunks Chunk ys xs
243
244 concat :: [ByteString] -> ByteString
245 concat css0 = to css0
246 where
247 go Empty css = to css
248 go (Chunk c cs) css = Chunk c (go cs css)
249 to [] = Empty
250 to (cs:css) = go cs css