Tune to beat NewBinary
[packages/binary.git] / src / Data / Binary.hs
1 -----------------------------------------------------------------------------
2 -- |
3 -- Module : Data.Binary
4 -- Copyright : Lennart Kolmodin
5 -- License : BSD3-style (see LICENSE)
6 --
7 -- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
8 -- Stability : unstable
9 -- Portability : portable to Hugs and GHC. Requires the FFI and some flexible instances
10 --
11 -- Binary serialisation of Haskell values to and from lazy ByteStrings.
12 --
13 -----------------------------------------------------------------------------
14
15 module Data.Binary (
16
17 -- * The Binary class
18 Binary(..)
19
20 -- $example
21
22 -- * The Get and Put monads
23 , Get
24 , Put
25
26 -- * Useful helpers for writing instances
27 , putWord8
28 , getWord8
29
30 -- * Binary serialisation
31 , encode -- :: Binary a => a -> ByteString
32 , decode -- :: Binary a => ByteString -> a
33
34 -- * IO functions for serialisation
35 , encodeFile -- :: Binary a => FilePath -> a -> IO ()
36 , decodeFile -- :: Binary a => FilePath -> IO a
37
38 -- * Lazy put and get
39 , lazyPut
40 , lazyGet
41
42 , module Data.Word -- useful
43
44 ) where
45
46 import Data.Word
47
48 import Data.Binary.Put
49 import Data.Binary.Get
50
51 import Control.Monad
52 import Foreign
53 import System.IO
54
55 import Data.ByteString.Lazy (ByteString)
56 import qualified Data.ByteString.Lazy as L
57
58 import Data.Char (chr,ord)
59 import Data.List (unfoldr)
60
61 -- And needed for the instances:
62 import qualified Data.ByteString as B
63 import qualified Data.Map as Map
64 import qualified Data.Set as Set
65 import qualified Data.IntMap as IntMap
66 import qualified Data.IntSet as IntSet
67
68 import qualified Data.Tree as T
69
70 import Data.Array.Unboxed
71
72 --
73 -- This isn't available in older Hugs or older GHC
74 --
75 #if __GLASGOW_HASKELL__ >= 606
76 import qualified Data.Sequence as Seq
77 #endif
78
79 ------------------------------------------------------------------------
80
81 -- | The @Binary@ class provides 'put' and 'get', methods to encode and
82 -- decode a Haskell value to a lazy ByteString. It mirrors the Read and
83 -- Show classes for textual representation of Haskell types, and is
84 -- suitable for serialising Haskell values to disk, over the network.
85 --
86 -- For parsing and generating simple external binary formats (e.g. C
87 -- structures), Binary may be used, but in general is not suitable
88 -- for complex protocols. Instead use the Put and Get primitives
89 -- directly.
90 --
91 -- Instances of Binary should satisfy the following property:
92 --
93 -- > get . put == id
94 --
95 -- A range of instances are provided for basic Haskell types.
96 --
97 class Binary t where
98 -- | Encode a value in the Put monad.
99 put :: t -> Put
100 -- | Decode a value in the Get monad
101 get :: Get t
102
103 -- $example
104 -- To serialise a custom type, an instance of Binary for that type is
105 -- required. For example, suppose we have a data structure:
106 --
107 -- > data Exp = IntE Int
108 -- > | OpE String Exp Exp
109 -- > deriving Show
110 --
111 -- We can encode values of this type into bytestrings using the
112 -- following instance, which proceeds by recursively breaking down the
113 -- structure to serialise:
114 --
115 -- > instance Binary Exp where
116 -- > put (IntE i) = do put (0 :: Word8)
117 -- > put i
118 -- > put (OpE s e1 e2) = do put (1 :: Word8)
119 -- > put s
120 -- > put e1
121 -- > put e2
122 -- >
123 -- > get = do t <- get :: Get Word8
124 -- > case t of
125 -- > 0 -> do i <- get
126 -- > return (IntE i)
127 -- > 1 -> do s <- get
128 -- > e1 <- get
129 -- > e2 <- get
130 -- > return (OpE s e1 e2)
131 --
132 -- Note how we write an initial tag byte to indicate each variant of the
133 -- data type.
134 --
135 -- To serialise this to a bytestring, we use 'encode', which packs the
136 -- data structure into a binary format, in a lazy bytestring
137 --
138 -- > > let e = OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
139 -- > > let v = encode e
140 --
141 -- Where 'v' is a binary encoded data structure. To reconstruct the
142 -- original data, we use 'decode'
143 --
144 -- > > decode v :: Exp
145 -- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
146 --
147 -- The lazy ByteString that results from 'encode' can be written to
148 -- disk, and read from disk using Data.ByteString.Lazy IO functions,
149 -- such as hPutStr or writeFile:
150 --
151 -- > > writeFile "/tmp/exp.txt" (encode e)
152 --
153 -- And read back with:
154 --
155 -- > > readFile "/tmp/exp.txt" >>= return . decode :: IO Exp
156 -- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
157 --
158 -- We can also directly serialise a value to and from a Handle, or a file:
159 --
160 -- > > v <- decodeFile "/tmp/exp.txt" :: IO Exp
161 -- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
162 --
163 -- And write a value to disk
164 --
165 -- > > encodeFile "/tmp/a.txt" v
166 --
167
168 ------------------------------------------------------------------------
169 -- Wrappers to run the underlying monad
170
171 -- | Encode a value using binary serialisation to a lazy ByteString.
172 --
173 encode :: Binary a => a -> ByteString
174 encode = runPut . put
175
176 -- | Decode a value from a lazy ByteString, reconstructing the original structure.
177 --
178 decode :: Binary a => ByteString -> a
179 decode = runGet get
180
181 ------------------------------------------------------------------------
182 -- Convenience IO operations
183
184 -- | Lazily serialise a value to a file
185 --
186 -- This is just a convenience function, it's defined simply as:
187 --
188 -- > encodeFile f = B.writeFile f . encode
189 --
190 -- So for example if you wanted to compress as well, you could use:
191 --
192 -- > B.writeFile f . compress . encode
193 --
194 encodeFile :: Binary a => FilePath -> a -> IO ()
195 encodeFile f v = L.writeFile f (encode v)
196
197 -- | Lazily reconstruct a value previously written to a file
198 --
199 -- This is just a convenience function, it's defined simply as:
200 --
201 -- > decodeFile f = return . decode =<< B.readFile f
202 --
203 -- So for example if you wanted to decompress as well, you could use:
204 --
205 -- > return . decode . decompress =<< B.readFile f
206 --
207 decodeFile :: Binary a => FilePath -> IO a
208 decodeFile f = liftM decode (L.readFile f)
209
210 ------------------------------------------------------------------------
211 -- Lazy put and get
212
213 lazyPut :: (Binary a) => a -> Put
214 lazyPut a = put (encode a)
215
216 lazyGet :: (Binary a) => Get a
217 lazyGet = fmap decode get
218
219 ------------------------------------------------------------------------
220 -- Simple instances
221
222 -- The () type need never be written to disk: values of singleton type
223 -- can be reconstructed from the type alone
224 instance Binary () where
225 put () = return ()
226 get = return ()
227
228 -- Bools are encoded as a byte in the range 0 .. 1
229 instance Binary Bool where
230 put = putWord8 . fromIntegral . fromEnum
231 get = liftM (toEnum . fromIntegral) getWord8
232
233 -- Values of type 'Ordering' are encoded as a byte in the range 0 .. 2
234 instance Binary Ordering where
235 put = putWord8 . fromIntegral . fromEnum
236 get = liftM (toEnum . fromIntegral) getWord8
237
238 ------------------------------------------------------------------------
239 -- Words and Ints
240
241 -- Words8s are written as bytes
242 instance Binary Word8 where
243 put = putWord8
244 get = getWord8
245
246 -- Words16s are written as 2 bytes in big-endian (network) order
247 instance Binary Word16 where
248 put = putWord16be
249 get = getWord16be
250
251 -- Words32s are written as 4 bytes in big-endian (network) order
252 instance Binary Word32 where
253 put = putWord32be
254 get = getWord32be
255
256 -- Words64s are written as 8 bytes in big-endian (network) order
257 instance Binary Word64 where
258 put = putWord64be
259 get = getWord64be
260
261 -- Int8s are written as a single byte.
262 instance Binary Int8 where
263 put i = put (fromIntegral i :: Word8)
264 get = liftM fromIntegral (get :: Get Word8)
265
266 -- Int16s are written as a 2 bytes in big endian format
267 instance Binary Int16 where
268 put i = put (fromIntegral i :: Word16)
269 get = liftM fromIntegral (get :: Get Word16)
270
271 -- Int32s are written as a 4 bytes in big endian format
272 instance Binary Int32 where
273 put i = put (fromIntegral i :: Word32)
274 get = liftM fromIntegral (get :: Get Word32)
275
276 -- Int64s are written as a 4 bytes in big endian format
277 instance Binary Int64 where
278 put i = put (fromIntegral i :: Word64)
279 get = liftM fromIntegral (get :: Get Word64)
280
281 ------------------------------------------------------------------------
282
283 -- Words are are written as Word64s, that is, 8 bytes in big endian format
284 instance Binary Word where
285 put i = put (fromIntegral i :: Word64)
286 get = liftM fromIntegral (get :: Get Word64)
287
288 -- Ints are are written as Int64s, that is, 8 bytes in big endian format
289 instance Binary Int where
290 put i = put (fromIntegral i :: Int64)
291 get = liftM fromIntegral (get :: Get Int64)
292
293 ------------------------------------------------------------------------
294 --
295 -- Integer. We try to do this efficiently on GHC, and on Hugs we'll have
296 -- to serialise to a list of Word8 plus a length.
297 --
298 -- Portable, and pretty efficient, serialisation of Integer
299 --
300
301 -- Integers are encoded in two ways: if they fit inside a machine word,
302 -- they're written as a byte tag, and that word. If the Integer value is
303 -- too large to fit in a word, it is written as a byte array, along with
304 -- a sign and length field.
305
306 instance Binary Integer where
307
308 put n | n >= lo && n <= hi = do
309 putWord8 0
310 put (fromIntegral n :: Int) -- fast path
311 where
312 hi = fromIntegral (maxBound :: Int) :: Integer
313 lo = fromIntegral (minBound :: Int) :: Integer
314
315 put n = do
316 putWord8 1
317 put sign
318 put (unroll (abs n)) -- unroll the bytes
319 where
320 sign = fromIntegral (signum n) :: Word8
321
322 get = do
323 tag <- get :: Get Word8
324 case tag of
325 0 -> liftM fromIntegral (get :: Get Int)
326 _ -> do sign <- get
327 bytes <- get
328 let v = roll bytes
329 return $! if sign == (1 :: Word8) then v else - v
330
331 --
332 -- Unfold an Integer to a list of its bytes
333 --
334 unroll :: Integer -> [Word8]
335 unroll = unfoldr step
336 where
337 step 0 = Nothing
338 step i = Just (fromIntegral (i .&. 0xff), i `shiftR` 8)
339
340 --
341 -- Fold a list of bytes back in to an Integer
342 --
343 roll :: [Word8] -> Integer
344 roll = foldr (\b a -> a `shiftL` 8 .|. fromIntegral b) 0
345
346 {-
347
348 --
349 -- An efficient, raw serialisation for Integer (GHC only)
350 --
351
352 -- TODO This instance is not architecture portable. GMP stores numbers as
353 -- arrays of machine sized words, so the byte format is not portable across
354 -- architectures with different endianess and word size.
355
356 import Data.ByteString.Base (toForeignPtr,unsafePackAddress, memcpy)
357 import GHC.Base hiding (ord, chr)
358 import GHC.Prim
359 import GHC.Ptr (Ptr(..))
360 import GHC.IOBase (IO(..))
361
362 instance Binary Integer where
363 put (S# i) = putWord8 0 >> put (I# i)
364 put (J# s ba) = do
365 putWord8 1
366 put (I# s)
367 put (BA ba)
368
369 get = do
370 b <- getWord8
371 case b of
372 0 -> do (I# i#) <- get
373 return (S# i#)
374 _ -> do (I# s#) <- get
375 (BA a#) <- get
376 return (J# s# a#)
377
378 instance Binary ByteArray where
379
380 -- Pretty safe.
381 put (BA ba) =
382 let sz = sizeofByteArray# ba -- (primitive) in *bytes*
383 addr = byteArrayContents# ba
384 bs = unsafePackAddress (I# sz) addr
385 in put bs -- write as a ByteString. easy, yay!
386
387 -- Pretty scary. Should be quick though
388 get = do
389 (fp, off, n@(I# sz)) <- liftM toForeignPtr get -- so decode a ByteString
390 assert (off == 0) $ return $ unsafePerformIO $ do
391 (MBA arr) <- newByteArray sz -- and copy it into a ByteArray#
392 let to = byteArrayContents# (unsafeCoerce# arr) -- urk, is this safe?
393 withForeignPtr fp $ \from -> memcpy (Ptr to) from (fromIntegral n)
394 freezeByteArray arr
395
396 -- wrapper for ByteArray#
397 data ByteArray = BA {-# UNPACK #-} !ByteArray#
398 data MBA = MBA {-# UNPACK #-} !(MutableByteArray# RealWorld)
399
400 newByteArray :: Int# -> IO MBA
401 newByteArray sz = IO $ \s ->
402 case newPinnedByteArray# sz s of { (# s', arr #) ->
403 (# s', MBA arr #) }
404
405 freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
406 freezeByteArray arr = IO $ \s ->
407 case unsafeFreezeByteArray# arr s of { (# s', arr' #) ->
408 (# s', BA arr' #) }
409
410 -}
411
412 ------------------------------------------------------------------------
413
414 -- Char is serialised as UTF-8
415 instance Binary Char where
416 put a | c <= 0x7f = put (fromIntegral c :: Word8)
417 | c <= 0x7ff = do put (0xc0 .|. y)
418 put (0x80 .|. z)
419 | c <= 0xffff = do put (0xe0 .|. x)
420 put (0x80 .|. y)
421 put (0x80 .|. z)
422 | c <= 0x10ffff = do put (0xf0 .|. w)
423 put (0x80 .|. x)
424 put (0x80 .|. y)
425 put (0x80 .|. z)
426 | otherwise = error "Not a valid Unicode code point"
427 where
428 c = ord a
429 z, y, x, w :: Word8
430 z = fromIntegral (c .&. 0x3f)
431 y = fromIntegral (shiftR c 6 .&. 0x3f)
432 x = fromIntegral (shiftR c 12 .&. 0x3f)
433 w = fromIntegral (shiftR c 18 .&. 0x7)
434
435 get = do
436 let getByte = liftM (fromIntegral :: Word8 -> Int) get
437 shiftL6 = flip shiftL 6 :: Int -> Int
438 w <- getByte
439 r <- case () of
440 _ | w < 0x80 -> return w
441 | w < 0xe0 -> do
442 x <- liftM (xor 0x80) getByte
443 return (x .|. shiftL6 (xor 0xc0 w))
444 | w < 0xf0 -> do
445 x <- liftM (xor 0x80) getByte
446 y <- liftM (xor 0x80) getByte
447 return (y .|. shiftL6 (x .|. shiftL6
448 (xor 0xe0 w)))
449 | otherwise -> do
450 x <- liftM (xor 0x80) getByte
451 y <- liftM (xor 0x80) getByte
452 z <- liftM (xor 0x80) getByte
453 return (z .|. shiftL6 (y .|. shiftL6
454 (x .|. shiftL6 (xor 0xf0 w))))
455 return $! chr r
456
457 ------------------------------------------------------------------------
458 -- Instances for the first few tuples
459
460 instance (Binary a, Binary b) => Binary (a,b) where
461 put (a,b) = put a >> put b
462 get = do a <- get
463 b <- get
464 return (a,b)
465
466 --
467 -- And then the recursive cases
468 --
469
470 instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
471 put (a,b,c) = put (a, (b,c))
472 get = do (a,(b,c)) <- get ; return (a,b,c)
473
474 instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
475 put (a,b,c,d) = put (a,(b,c,d))
476 get = do (a,(b,c,d)) <- get ; return (a,b,c,d)
477
478 instance (Binary a, Binary b, Binary c, Binary d, Binary e) => Binary (a,b,c,d,e) where
479 put (a,b,c,d,e) = put (a,(b,c,d,e))
480 get = do (a,(b,c,d,e)) <- get ; return (a,b,c,d,e)
481
482 instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f)
483 => Binary (a,b,c,d,e,f) where
484 put (a,b,c,d,e,f) = put (a,(b,c,d,e,f))
485 get = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f)
486
487 instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g)
488 => Binary (a,b,c,d,e,f,g) where
489 put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g))
490 get = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g)
491
492 instance (Binary a, Binary b, Binary c, Binary d, Binary e,
493 Binary f, Binary g, Binary h)
494 => Binary (a,b,c,d,e,f,g,h) where
495 put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h))
496 get = do (a,(b,c,d,e,f,g,h)) <- get ; return (a,b,c,d,e,f,g,h)
497
498 instance (Binary a, Binary b, Binary c, Binary d, Binary e,
499 Binary f, Binary g, Binary h, Binary i)
500 => Binary (a,b,c,d,e,f,g,h,i) where
501 put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i))
502 get = do (a,(b,c,d,e,f,g,h,i)) <- get ; return (a,b,c,d,e,f,g,h,i)
503
504 instance (Binary a, Binary b, Binary c, Binary d, Binary e,
505 Binary f, Binary g, Binary h, Binary i, Binary j)
506 => Binary (a,b,c,d,e,f,g,h,i,j) where
507 put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j))
508 get = do (a,(b,c,d,e,f,g,h,i,j)) <- get ; return (a,b,c,d,e,f,g,h,i,j)
509
510 ------------------------------------------------------------------------
511 -- Container types
512
513 instance Binary a => Binary [a] where
514 put l = put (length l) >> mapM_ put l
515 get = do n <- get :: Get Int
516 replicateM n get
517
518 instance (Binary a) => Binary (Maybe a) where
519 put Nothing = putWord8 0
520 put (Just x) = putWord8 1 >> put x
521 get = do
522 w <- getWord8
523 case w of
524 0 -> return Nothing
525 _ -> liftM Just get
526
527 instance (Binary a, Binary b) => Binary (Either a b) where
528 put (Left a) = putWord8 0 >> put a
529 put (Right b) = putWord8 1 >> put b
530 get = do
531 w <- getWord8
532 case w of
533 0 -> liftM Left get
534 _ -> liftM Right get
535
536 ------------------------------------------------------------------------
537 -- ByteStrings (have specially efficient instances)
538
539 instance Binary B.ByteString where
540 put bs = do put (B.length bs)
541 putByteString bs
542 get = get >>= getByteString
543
544 -- Using old versions of fps, this is a type synonym, and non portable
545 instance Binary ByteString where
546 put bs = do put (fromIntegral (L.length bs) :: Int)
547 putLazyByteString bs
548 get = get >>= getLazyByteString
549
550 ------------------------------------------------------------------------
551 -- Maps and Sets
552
553 instance (Ord a, Binary a) => Binary (Set.Set a) where
554 put = put . Set.toAscList
555 get = liftM Set.fromDistinctAscList get
556
557 instance (Ord k, Binary k, Binary e) => Binary (Map.Map k e) where
558 put = put . Map.toAscList
559 get = liftM Map.fromDistinctAscList get
560
561 instance Binary IntSet.IntSet where
562 put = put . IntSet.toAscList
563 get = liftM IntSet.fromDistinctAscList get
564
565 instance (Binary e) => Binary (IntMap.IntMap e) where
566 put = put . IntMap.toAscList
567 get = liftM IntMap.fromDistinctAscList get
568
569 ------------------------------------------------------------------------
570 -- Queues and Sequences
571
572 #if __GLASGOW_HASKELL__ >= 606
573 --
574 -- This is valid Hugs, but you need the most recent Hugs
575 --
576
577 instance (Binary e) => Binary (Seq.Seq e) where
578 -- any better way to do this?
579 put s = put . flip unfoldr s $ \sq ->
580 case Seq.viewl sq of
581 Seq.EmptyL -> Nothing
582 (Seq.:<) e sq' -> Just (e,sq')
583 get = fmap Seq.fromList get
584
585 #endif
586
587 ------------------------------------------------------------------------
588 -- Trees
589
590 instance (Binary e) => Binary (T.Tree e) where
591 put (T.Node r s) = put r >> put s
592 get = liftM2 T.Node get get
593
594 ------------------------------------------------------------------------
595 -- Arrays
596
597 instance (Binary i, Ix i, Binary e) => Binary (Array i e) where
598 put a = do
599 put (bounds a)
600 put (elems a)
601 get = do
602 bs <- get
603 es <- get
604 return (listArray bs es)
605
606 -- The IArray UArray e constraint is non portable.
607 instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where
608 put a = do
609 put (bounds a)
610 put (elems a)
611 get = do
612 bs <- get
613 es <- get
614 return (listArray bs es)