wibble
[packages/binary.git] / src / Data / Binary.hs
1 -----------------------------------------------------------------------------
2 -- |
3 -- Module : Data.Binary
4 -- Copyright : Lennart Kolmodin
5 -- License : BSD3-style (see LICENSE)
6 --
7 -- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
8 -- Stability : unstable
9 -- Portability : FFI + (currently) flexible instances
10 --
11 -- Binary serialisation of values to and from lazy ByteStrings.
12 --
13 -----------------------------------------------------------------------------
14
15 module Data.Binary (
16
17 -- * The Binary class
18 Binary(..)
19
20 -- * The Get and Put monads
21 , Get
22 , Put
23
24 -- * Binary serialisation
25 , encode -- :: Binary a => a -> ByteString
26 , decode -- :: Binary a => ByteString -> a
27
28 -- * IO functions for serialisation
29 , hEncode -- :: Binary a => Handle -> a -> IO ()
30 , hDecode -- :: Binary a => Handle -> IO a
31 , encodeFile -- :: Binary a => FilePath -> a -> IO ()
32 , decodeFile -- :: Binary a => FilePath -> IO a
33
34 ) where
35
36 import Data.Binary.Put
37 import Data.Binary.Get
38
39 import Control.Monad
40 import Foreign
41
42 import Data.Char (ord, chr)
43 import Data.ByteString.Lazy (ByteString)
44 import qualified Data.ByteString.Lazy as L
45
46 -- and needed for the instances:
47 import qualified Data.ByteString as B
48 import qualified Data.Map as Map
49 import qualified Data.Set as Set
50 import qualified Data.IntMap as IntMap
51 import qualified Data.IntSet as IntSet
52
53 import Data.Array (Array)
54 import Data.Array.IArray
55 import Data.Array.Unboxed
56 import Data.List (unfoldr)
57 -- import Data.Queue
58 import qualified Data.Tree as T
59 import qualified Data.Sequence as Seq
60
61 import System.IO
62
63 ------------------------------------------------------------------------
64
65 -- | The @Binary@ class provides 'put' and 'get', methods to encode and
66 -- decode a value to a lazy bytestring.
67 --
68 -- New instances for binary should have the following property:
69 --
70 -- > get . put == id
71 --
72 -- A range of instances are provided for basic Haskell types. To
73 -- serialise a custom type, an instance of Binary for that type is
74 -- required. For example, suppose we have a data structure:
75 --
76 -- > data Exp = IntE Int
77 -- > | OpE String Exp Exp
78 -- > deriving Show
79 --
80 -- We can encode values of this type into bytestrings using the
81 -- following instance, which proceeds by recursively breaking down the
82 -- structure to serialise:
83 --
84 -- > instance Binary Exp where
85 -- > put (IntE i) = do put (0 :: Word8)
86 -- > put i
87 -- > put (OpE s e1 e2) = do put (1 :: Word8)
88 -- > put s
89 -- > put e1
90 -- > put e2
91 -- >
92 -- > get = do t <- get :: Get Word8
93 -- > case t of
94 -- > 0 -> do i <- get
95 -- > return (IntE i)
96 -- > 1 -> do s <- get
97 -- > e1 <- get
98 -- > e2 <- get
99 -- > return (OpE s e1 e2)
100 --
101 -- Note how we write an initial tag byte to indicate each variant of the
102 -- data type.
103 --
104 -- To serialise this to a bytestring, we use 'encode', which packs the
105 -- data structure into a binary format, in a lazy bytestring
106 --
107 -- > > let e = OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
108 -- > > let v = encode e
109 --
110 -- Where 'v' is a binary encoded data structure. To reconstruct the
111 -- original data, we use 'decode'
112 --
113 -- > > decode v :: Exp
114 -- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
115 --
116 -- The lazy ByteString that results from 'encode' can be written to
117 -- disk, and read from disk using Data.ByteString.Lazy IO functions,
118 -- such as hPutStr or writeFile:
119 --
120 -- > > writeFile "/tmp/exp.txt" (encode e)
121 --
122 -- And read back with:
123 --
124 -- > > readFile "/tmp/exp.txt" >>= return . decode :: IO Exp
125 -- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
126 --
127 -- We can also directly serialise a value to and from a Handle, or a file:
128 --
129 -- > > v <- decodeFile "/tmp/exp.txt" :: IO Exp
130 -- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
131 --
132 -- And write a value to disk
133 --
134 -- > > encodeFile "/tmp/a.txt" v
135 --
136 class Binary t where
137 -- | Encode a value in the Put monad.
138 put :: t -> Put ()
139 -- | Decode a value in the Get monad
140 get :: Get t
141
142 ------------------------------------------------------------------------
143 -- Wrappers to run the underlying monad
144
145 -- | Encode a value using binary serialisation to a lazy ByteString.
146 --
147 encode :: Binary a => a -> ByteString
148 encode = runPut . put
149
150 -- | Decode a value from a lazy ByteString, reconstructing the original structure.
151 --
152 decode :: Binary a => ByteString -> a
153 decode = runGet get
154
155 ------------------------------------------------------------------------
156 -- Convenience IO operations
157
158 -- | Serialise a value to a file
159 encodeFile :: Binary a => FilePath -> a -> IO ()
160 encodeFile f v = L.writeFile f (encode v)
161
162 -- | Reconstruct a value previously written to a file
163 decodeFile :: Binary a => FilePath -> IO a
164 decodeFile f = liftM decode (L.readFile f)
165
166 -- | Serialise a value to a Handle
167 hEncode :: Binary a => Handle -> a -> IO ()
168 hEncode h v = L.hPut h (encode v)
169
170 -- | Reconstruct a value from a Handle
171 hDecode :: Binary a => Handle -> IO a
172 hDecode h = liftM decode (L.hGetContents h)
173
174 ------------------------------------------------------------------------
175 -- Simple instances
176
177 instance Binary () where
178 put () = return ()
179 get = return ()
180
181 instance Binary Ordering where
182 put = putWord8 . fromIntegral . fromEnum
183 get = liftM (toEnum . fromIntegral) getWord8
184
185 ------------------------------------------------------------------------
186 -- Words and Ints
187
188 instance Binary Word8 where
189 put = putWord8
190 get = getWord8
191
192 instance Binary Bool where
193 put = putWord8 . fromIntegral . fromEnum
194 get = liftM (toEnum . fromIntegral) getWord8
195
196 instance Binary Word16 where
197 put = putWord16be
198 get = getWord16be
199
200 instance Binary Word32 where
201 put = putWord32be
202 get = getWord32be
203
204 instance Binary Word64 where
205 put = putWord64be
206 get = getWord64be
207
208 instance Binary Int8 where
209 put i = put (fromIntegral i :: Word8)
210 get = liftM fromIntegral (get :: Get Word8)
211
212 instance Binary Int16 where
213 put i = put (fromIntegral i :: Word16)
214 get = liftM fromIntegral (get :: Get Word16)
215
216 instance Binary Int32 where
217 put i = put (fromIntegral i :: Word32)
218 get = liftM fromIntegral (get :: Get Word32)
219
220 instance Binary Int64 where
221 put i = put (fromIntegral i :: Word64)
222 get = liftM fromIntegral (get :: Get Word64)
223
224 instance Binary Int where
225 put i = put (fromIntegral i :: Int32)
226 get = liftM fromIntegral (get :: Get Int32)
227
228 {-
229
230 instance Binary Integer where
231 put (S# i) = putWord8 0 >> put (I# i)
232 put (J# s ba) = do
233 putWord8 1
234 put (I# s)
235 let sz = sizeofByteArray# ba -- in *bytes*
236 put (I# sz) -- in *bytes*
237 putByteArray a# sz#
238
239 get = do
240 b <- getWord8
241 case b of
242 0 -> do (I# i) <- get
243 return (S# i)
244 _ -> do (I# s) <- get
245 sz <- get
246 (BA a) <- getByteArray sz
247 return (J# s a)
248
249 data ByteArray = BA ByteArray#
250 -}
251
252 ------------------------------------------------------------------------
253 -- Char
254
255 -- TODO profile, benchmark and test this instance
256 instance Binary Char where
257 put a | c <= 0x7f = put (fromIntegral c :: Word8)
258 | c <= 0x7ff = do put (0xc0 .|. y)
259 put (0x80 .|. z)
260 | c <= 0xffff = do put (0xe0 .|. x)
261 put (0x80 .|. y)
262 put (0x80 .|. z)
263 | c <= 0x10ffff = do put (0xf0 .|. w)
264 put (0x80 .|. x)
265 put (0x80 .|. y)
266 put (0x80 .|. z)
267 | otherwise = error "Not a valid Unicode code point"
268 where
269 c = ord a
270 z, y, x, w :: Word8
271 z = fromIntegral (c .&. 0x3f)
272 y = fromIntegral (shiftR c 6 .&. 0x3f)
273 x = fromIntegral (shiftR c 12 .&. 0x3f)
274 w = fromIntegral (shiftR c 18 .&. 0x7)
275
276 get = do
277 let getByte = liftM (fromIntegral :: Word8 -> Int) get
278 shiftL6 = flip shiftL 6 :: Int -> Int
279 w <- getByte
280 r <- case () of
281 _ | w < 0x80 -> return w
282 | w < 0xe0 -> do
283 x <- liftM (xor 0x80) getByte
284 return (x .|. shiftL6 (xor 0xc0 w))
285 | w < 0xf0 -> do
286 x <- liftM (xor 0x80) getByte
287 y <- liftM (xor 0x80) getByte
288 return (y .|. shiftL6 (x .|. shiftL6
289 (xor 0xe0 w)))
290 | otherwise -> do
291 x <- liftM (xor 0x80) getByte
292 y <- liftM (xor 0x80) getByte
293 z <- liftM (xor 0x80) getByte
294 return (z .|. shiftL6 (y .|. shiftL6
295 (x .|. shiftL6 (xor 0x80 w))))
296 return $! chr r
297
298 ------------------------------------------------------------------------
299 -- Instances for the first few tuples
300
301 instance (Binary a, Binary b) => Binary (a,b) where
302 put (a,b) = put a >> put b
303 get = do a <- get
304 b <- get
305 return (a,b)
306
307 --
308 -- And then the recursive cases
309 --
310
311 instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
312 put (a,b,c) = put (a, (b,c))
313 get = do (a,(b,c)) <- get ; return (a,b,c)
314
315 instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
316 put (a,b,c,d) = put (a,(b,c,d))
317 get = do (a,(b,c,d)) <- get ; return (a,b,c,d)
318
319 instance (Binary a, Binary b, Binary c, Binary d, Binary e) => Binary (a,b,c,d,e) where
320 put (a,b,c,d,e) = put (a,(b,c,d,e))
321 get = do (a,(b,c,d,e)) <- get ; return (a,b,c,d,e)
322
323 instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f)
324 => Binary (a,b,c,d,e,f) where
325 put (a,b,c,d,e,f) = put (a,(b,c,d,e,f))
326 get = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f)
327
328 instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g)
329 => Binary (a,b,c,d,e,f,g) where
330 put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g))
331 get = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g)
332
333 instance (Binary a, Binary b, Binary c, Binary d, Binary e,
334 Binary f, Binary g, Binary h)
335 => Binary (a,b,c,d,e,f,g,h) where
336 put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h))
337 get = do (a,(b,c,d,e,f,g,h)) <- get ; return (a,b,c,d,e,f,g,h)
338
339 instance (Binary a, Binary b, Binary c, Binary d, Binary e,
340 Binary f, Binary g, Binary h, Binary i)
341 => Binary (a,b,c,d,e,f,g,h,i) where
342 put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i))
343 get = do (a,(b,c,d,e,f,g,h,i)) <- get ; return (a,b,c,d,e,f,g,h,i)
344
345 instance (Binary a, Binary b, Binary c, Binary d, Binary e,
346 Binary f, Binary g, Binary h, Binary i, Binary j)
347 => Binary (a,b,c,d,e,f,g,h,i,j) where
348 put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j))
349 get = do (a,(b,c,d,e,f,g,h,i,j)) <- get ; return (a,b,c,d,e,f,g,h,i,j)
350
351 ------------------------------------------------------------------------
352 -- Container types
353
354 instance Binary a => Binary [a] where
355 put l = put (length l) >> mapM_ put l
356 get = do n <- get :: Get Int
357 replicateM n get
358
359 instance (Binary a) => Binary (Maybe a) where
360 put Nothing = putWord8 0
361 put (Just x) = putWord8 1 >> put x
362 get = do
363 w <- getWord8
364 case w of
365 0 -> return Nothing
366 _ -> liftM Just get
367
368 instance (Binary a, Binary b) => Binary (Either a b) where
369 put (Left a) = putWord8 0 >> put a
370 put (Right b) = putWord8 1 >> put b
371 get = do
372 w <- getWord8
373 case w of
374 0 -> liftM Left get
375 _ -> liftM Right get
376
377 ------------------------------------------------------------------------
378 -- ByteStrings (have specially efficient instances)
379
380 instance Binary B.ByteString where
381 put bs = do put (B.length bs)
382 putByteString bs
383 get = get >>= getByteString
384
385 instance Binary ByteString where
386 put bs = do put (L.length bs)
387 putLazyByteString bs
388 get = get >>= getLazyByteString
389
390 ------------------------------------------------------------------------
391 -- Maps and Sets
392
393 instance (Ord a, Binary a) => Binary (Set.Set a) where
394 put = put . Set.toAscList
395 get = liftM Set.fromDistinctAscList get
396
397 instance (Ord k, Binary k, Binary e) => Binary (Map.Map k e) where
398 put = put . Map.toAscList
399 get = liftM Map.fromDistinctAscList get
400
401 instance Binary IntSet.IntSet where
402 put = put . IntSet.toAscList
403 get = liftM IntSet.fromDistinctAscList get
404
405 instance (Binary e) => Binary (IntMap.IntMap e) where
406 put = put . IntMap.toAscList
407 get = liftM IntMap.fromDistinctAscList get
408
409 ------------------------------------------------------------------------
410 -- Queues and Sequences
411
412 {-
413 instance (Binary e) => Binary (Queue e) where
414 put = put . queueToList
415 get = fmap listToQueue get
416 -}
417
418 instance (Binary e) => Binary (Seq.Seq e) where
419 -- any better way to do this?
420 put s = put . flip unfoldr s $ \seq ->
421 case Seq.viewl seq of
422 Seq.EmptyL -> Nothing
423 (Seq.:<) e seq' -> Just (e,seq')
424 get = fmap Seq.fromList get
425
426 ------------------------------------------------------------------------
427 -- Trees
428
429 instance (Binary e) => Binary (T.Tree e) where
430 put (T.Node r s) = put r >> put s
431 get = liftM2 T.Node get get
432
433 ------------------------------------------------------------------------
434 -- Arrays
435
436 instance (Binary i, Ix i, Binary e) => Binary (Array i e) where
437 put a = do
438 put (bounds a)
439 put (elems a)
440 get = do
441 bs <- get
442 es <- get
443 return (listArray bs es)
444
445 -- todo handle UArray i Bool specially?
446 --
447 -- N.B.
448 --
449 -- Non type-variable argument in the constraint: IArray UArray e
450 -- (Use -fglasgow-exts to permit this)
451 --
452 instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where
453 put a = do
454 put (bounds a)
455 put (elems a)
456 get = do
457 bs <- get
458 es <- get
459 return (listArray bs es)