3716458fcc7103bbe6853abbe6f46f17083cf9a6
[packages/binary.git] / src / Data / Binary.hs
1 -----------------------------------------------------------------------------
2 -- |
3 -- Module : Data.Binary
4 -- Copyright : Lennart Kolmodin
5 -- License : BSD3-style (see LICENSE)
6 --
7 -- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
8 -- Stability : unstable
9 -- Portability : portable to Hugs and GHC. Requires the FFI and some flexible instances
10 --
11 -- Binary serialisation of values to and from lazy ByteStrings.
12 --
13 -----------------------------------------------------------------------------
14
15 module Data.Binary (
16
17 -- * The Binary class
18 Binary(..)
19
20 -- * The Get and Put monads
21 , Get
22 , Put
23
24 -- * Useful helpers for writing instances
25 , putWord8
26 , getWord8
27
28 -- * Binary serialisation
29 , encode -- :: Binary a => a -> ByteString
30 , decode -- :: Binary a => ByteString -> a
31
32 -- * IO functions for serialisation
33 , encodeFile -- :: Binary a => FilePath -> a -> IO ()
34 , decodeFile -- :: Binary a => FilePath -> IO a
35
36 -- * Lazy put and get
37 , lazyPut
38 , lazyGet
39
40 ) where
41
42 import Data.Binary.Put
43 import Data.Binary.Get
44
45 import Control.Monad
46 import Foreign
47 import System.IO
48
49 import Data.ByteString.Lazy (ByteString)
50 import qualified Data.ByteString.Lazy as L
51
52 import Data.Char (chr,ord)
53 import Data.List (unfoldr)
54
55 -- And needed for the instances:
56 import qualified Data.ByteString as B
57 import qualified Data.Map as Map
58 import qualified Data.Set as Set
59 import qualified Data.IntMap as IntMap
60 import qualified Data.IntSet as IntSet
61
62 import qualified Data.Tree as T
63
64 import Data.Array.Unboxed
65
66 --
67 -- This isn't available in older Hugs or older GHC
68 --
69 #if __GLASGOW_HASKELL__ >= 606
70 import qualified Data.Sequence as Seq
71 #endif
72
73 ------------------------------------------------------------------------
74
75 -- | The @Binary@ class provides 'put' and 'get', methods to encode and
76 -- decode a value to a lazy bytestring.
77 --
78 -- New instances for binary should have the following property:
79 --
80 -- > get . put == id
81 --
82 -- A range of instances are provided for basic Haskell types. To
83 -- serialise a custom type, an instance of Binary for that type is
84 -- required. For example, suppose we have a data structure:
85 --
86 -- > data Exp = IntE Int
87 -- > | OpE String Exp Exp
88 -- > deriving Show
89 --
90 -- We can encode values of this type into bytestrings using the
91 -- following instance, which proceeds by recursively breaking down the
92 -- structure to serialise:
93 --
94 -- > instance Binary Exp where
95 -- > put (IntE i) = do put (0 :: Word8)
96 -- > put i
97 -- > put (OpE s e1 e2) = do put (1 :: Word8)
98 -- > put s
99 -- > put e1
100 -- > put e2
101 -- >
102 -- > get = do t <- get :: Get Word8
103 -- > case t of
104 -- > 0 -> do i <- get
105 -- > return (IntE i)
106 -- > 1 -> do s <- get
107 -- > e1 <- get
108 -- > e2 <- get
109 -- > return (OpE s e1 e2)
110 --
111 -- Note how we write an initial tag byte to indicate each variant of the
112 -- data type.
113 --
114 -- To serialise this to a bytestring, we use 'encode', which packs the
115 -- data structure into a binary format, in a lazy bytestring
116 --
117 -- > > let e = OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
118 -- > > let v = encode e
119 --
120 -- Where 'v' is a binary encoded data structure. To reconstruct the
121 -- original data, we use 'decode'
122 --
123 -- > > decode v :: Exp
124 -- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
125 --
126 -- The lazy ByteString that results from 'encode' can be written to
127 -- disk, and read from disk using Data.ByteString.Lazy IO functions,
128 -- such as hPutStr or writeFile:
129 --
130 -- > > writeFile "/tmp/exp.txt" (encode e)
131 --
132 -- And read back with:
133 --
134 -- > > readFile "/tmp/exp.txt" >>= return . decode :: IO Exp
135 -- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
136 --
137 -- We can also directly serialise a value to and from a Handle, or a file:
138 --
139 -- > > v <- decodeFile "/tmp/exp.txt" :: IO Exp
140 -- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
141 --
142 -- And write a value to disk
143 --
144 -- > > encodeFile "/tmp/a.txt" v
145 --
146 class Binary t where
147 -- | Encode a value in the Put monad.
148 put :: t -> Put
149 -- | Decode a value in the Get monad
150 get :: Get t
151
152 ------------------------------------------------------------------------
153 -- Wrappers to run the underlying monad
154
155 -- | Encode a value using binary serialisation to a lazy ByteString.
156 --
157 encode :: Binary a => a -> ByteString
158 encode = runPut . put
159
160 -- | Decode a value from a lazy ByteString, reconstructing the original structure.
161 --
162 decode :: Binary a => ByteString -> a
163 decode = runGet get
164
165 ------------------------------------------------------------------------
166 -- Convenience IO operations
167
168 -- | Lazily serialise a value to a file
169 --
170 -- This is just a convenience function, it's defined simply as:
171 --
172 -- > encodeFile f = B.writeFile f . encode
173 --
174 -- So for example if you wanted to compress as well, you could use:
175 --
176 -- > B.writeFile f . compress . encode
177 --
178 encodeFile :: Binary a => FilePath -> a -> IO ()
179 encodeFile f v = L.writeFile f (encode v)
180
181 -- | Lazily reconstruct a value previously written to a file
182 --
183 -- This is just a convenience function, it's defined simply as:
184 --
185 -- > decodeFile f = return . decode =<< B.readFile f
186 --
187 -- So for example if you wanted to decompress as well, you could use:
188 --
189 -- > return . decode . decompress =<< B.readFile f
190 --
191 decodeFile :: Binary a => FilePath -> IO a
192 decodeFile f = liftM decode (L.readFile f)
193
194 ------------------------------------------------------------------------
195 -- Lazy put and get
196
197 lazyPut :: (Binary a) => a -> Put
198 lazyPut a = put (encode a)
199
200 lazyGet :: (Binary a) => Get a
201 lazyGet = fmap decode get
202
203 ------------------------------------------------------------------------
204 -- Simple instances
205
206 -- The () type need never be written to disk: values of singleton type
207 -- can be reconstructed from the type alone
208 instance Binary () where
209 put () = return ()
210 get = return ()
211
212 -- Bools are encoded as a byte in the range 0 .. 1
213 instance Binary Bool where
214 put = putWord8 . fromIntegral . fromEnum
215 get = liftM (toEnum . fromIntegral) getWord8
216
217 -- Values of type 'Ordering' are encoded as a byte in the range 0 .. 2
218 instance Binary Ordering where
219 put = putWord8 . fromIntegral . fromEnum
220 get = liftM (toEnum . fromIntegral) getWord8
221
222 ------------------------------------------------------------------------
223 -- Words and Ints
224
225 -- Words8s are written as bytes
226 instance Binary Word8 where
227 put = putWord8
228 get = getWord8
229
230 -- Words16s are written as 2 bytes in big-endian (network) order
231 instance Binary Word16 where
232 put = putWord16be
233 get = getWord16be
234
235 -- Words32s are written as 4 bytes in big-endian (network) order
236 instance Binary Word32 where
237 put = putWord32be
238 get = getWord32be
239
240 -- Words64s are written as 8 bytes in big-endian (network) order
241 instance Binary Word64 where
242 put = putWord64be
243 get = getWord64be
244
245 -- Int8s are written as a single byte.
246 instance Binary Int8 where
247 put i = put (fromIntegral i :: Word8)
248 get = liftM fromIntegral (get :: Get Word8)
249
250 -- Int16s are written as a 2 bytes in big endian format
251 instance Binary Int16 where
252 put i = put (fromIntegral i :: Word16)
253 get = liftM fromIntegral (get :: Get Word16)
254
255 -- Int32s are written as a 4 bytes in big endian format
256 instance Binary Int32 where
257 put i = put (fromIntegral i :: Word32)
258 get = liftM fromIntegral (get :: Get Word32)
259
260 -- Int64s are written as a 4 bytes in big endian format
261 instance Binary Int64 where
262 put i = put (fromIntegral i :: Word64)
263 get = liftM fromIntegral (get :: Get Word64)
264
265 ------------------------------------------------------------------------
266
267 -- Words are are written as Word64s, that is, 8 bytes in big endian format
268 instance Binary Word where
269 put i = put (fromIntegral i :: Word64)
270 get = liftM fromIntegral (get :: Get Word64)
271
272 -- Ints are are written as Int64s, that is, 8 bytes in big endian format
273 instance Binary Int where
274 put i = put (fromIntegral i :: Int64)
275 get = liftM fromIntegral (get :: Get Int64)
276
277 ------------------------------------------------------------------------
278 --
279 -- Integer. We try to do this efficiently on GHC, and on Hugs we'll have
280 -- to serialise to a list of Word8 plus a length.
281 --
282 -- Portable, and pretty efficient, serialisation of Integer
283 --
284
285 -- Integers are encoded in two ways: if they fit inside a machine word,
286 -- they're written as a byte tag, and that word. If the Integer value is
287 -- too large to fit in a word, it is written as a byte array, along with
288 -- a sign and length field.
289
290 instance Binary Integer where
291
292 put n | n >= lo && n <= hi = do
293 putWord8 0
294 put (fromIntegral n :: Int) -- fast path
295 where
296 hi = fromIntegral (maxBound :: Int) :: Integer
297 lo = fromIntegral (minBound :: Int) :: Integer
298
299 put n = do
300 putWord8 1
301 put sign
302 put (unroll (abs n)) -- unroll the bytes
303 where
304 sign = fromIntegral (signum n) :: Word8
305
306 get = do
307 tag <- get :: Get Word8
308 case tag of
309 0 -> liftM fromIntegral (get :: Get Int)
310 _ -> do sign <- get
311 bytes <- get
312 let v = roll bytes
313 return $! if sign == (1 :: Word8) then v else - v
314
315 --
316 -- Unfold an Integer to a list of its bytes
317 --
318 unroll :: Integer -> [Word8]
319 unroll = unfoldr step
320 where
321 step 0 = Nothing
322 step i = Just (fromIntegral (i .&. 0xff), i `shiftR` 8)
323
324 --
325 -- Fold a list of bytes back in to an Integer
326 --
327 roll :: [Word8] -> Integer
328 roll = foldr (\b a -> a `shiftL` 8 .|. fromIntegral b) 0
329
330 {-
331
332 --
333 -- An efficient, raw serialisation for Integer (GHC only)
334 --
335
336 -- TODO This instance is not architecture portable. GMP stores numbers as
337 -- arrays of machine sized words, so the byte format is not portable across
338 -- architectures with different endianess and word size.
339
340 import Data.ByteString.Base (toForeignPtr,unsafePackAddress, memcpy)
341 import GHC.Base hiding (ord, chr)
342 import GHC.Prim
343 import GHC.Ptr (Ptr(..))
344 import GHC.IOBase (IO(..))
345
346 instance Binary Integer where
347 put (S# i) = putWord8 0 >> put (I# i)
348 put (J# s ba) = do
349 putWord8 1
350 put (I# s)
351 put (BA ba)
352
353 get = do
354 b <- getWord8
355 case b of
356 0 -> do (I# i#) <- get
357 return (S# i#)
358 _ -> do (I# s#) <- get
359 (BA a#) <- get
360 return (J# s# a#)
361
362 instance Binary ByteArray where
363
364 -- Pretty safe.
365 put (BA ba) =
366 let sz = sizeofByteArray# ba -- (primitive) in *bytes*
367 addr = byteArrayContents# ba
368 bs = unsafePackAddress (I# sz) addr
369 in put bs -- write as a ByteString. easy, yay!
370
371 -- Pretty scary. Should be quick though
372 get = do
373 (fp, off, n@(I# sz)) <- liftM toForeignPtr get -- so decode a ByteString
374 assert (off == 0) $ return $ unsafePerformIO $ do
375 (MBA arr) <- newByteArray sz -- and copy it into a ByteArray#
376 let to = byteArrayContents# (unsafeCoerce# arr) -- urk, is this safe?
377 withForeignPtr fp $ \from -> memcpy (Ptr to) from (fromIntegral n)
378 freezeByteArray arr
379
380 -- wrapper for ByteArray#
381 data ByteArray = BA {-# UNPACK #-} !ByteArray#
382 data MBA = MBA {-# UNPACK #-} !(MutableByteArray# RealWorld)
383
384 newByteArray :: Int# -> IO MBA
385 newByteArray sz = IO $ \s ->
386 case newPinnedByteArray# sz s of { (# s', arr #) ->
387 (# s', MBA arr #) }
388
389 freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
390 freezeByteArray arr = IO $ \s ->
391 case unsafeFreezeByteArray# arr s of { (# s', arr' #) ->
392 (# s', BA arr' #) }
393
394 -}
395
396 ------------------------------------------------------------------------
397
398 -- Char is serialised as UTF-8
399 instance Binary Char where
400 put a | c <= 0x7f = put (fromIntegral c :: Word8)
401 | c <= 0x7ff = do put (0xc0 .|. y)
402 put (0x80 .|. z)
403 | c <= 0xffff = do put (0xe0 .|. x)
404 put (0x80 .|. y)
405 put (0x80 .|. z)
406 | c <= 0x10ffff = do put (0xf0 .|. w)
407 put (0x80 .|. x)
408 put (0x80 .|. y)
409 put (0x80 .|. z)
410 | otherwise = error "Not a valid Unicode code point"
411 where
412 c = ord a
413 z, y, x, w :: Word8
414 z = fromIntegral (c .&. 0x3f)
415 y = fromIntegral (shiftR c 6 .&. 0x3f)
416 x = fromIntegral (shiftR c 12 .&. 0x3f)
417 w = fromIntegral (shiftR c 18 .&. 0x7)
418
419 get = do
420 let getByte = liftM (fromIntegral :: Word8 -> Int) get
421 shiftL6 = flip shiftL 6 :: Int -> Int
422 w <- getByte
423 r <- case () of
424 _ | w < 0x80 -> return w
425 | w < 0xe0 -> do
426 x <- liftM (xor 0x80) getByte
427 return (x .|. shiftL6 (xor 0xc0 w))
428 | w < 0xf0 -> do
429 x <- liftM (xor 0x80) getByte
430 y <- liftM (xor 0x80) getByte
431 return (y .|. shiftL6 (x .|. shiftL6
432 (xor 0xe0 w)))
433 | otherwise -> do
434 x <- liftM (xor 0x80) getByte
435 y <- liftM (xor 0x80) getByte
436 z <- liftM (xor 0x80) getByte
437 return (z .|. shiftL6 (y .|. shiftL6
438 (x .|. shiftL6 (xor 0xf0 w))))
439 return $! chr r
440
441 ------------------------------------------------------------------------
442 -- Instances for the first few tuples
443
444 instance (Binary a, Binary b) => Binary (a,b) where
445 put (a,b) = put a >> put b
446 get = do a <- get
447 b <- get
448 return (a,b)
449
450 --
451 -- And then the recursive cases
452 --
453
454 instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
455 put (a,b,c) = put (a, (b,c))
456 get = do (a,(b,c)) <- get ; return (a,b,c)
457
458 instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
459 put (a,b,c,d) = put (a,(b,c,d))
460 get = do (a,(b,c,d)) <- get ; return (a,b,c,d)
461
462 instance (Binary a, Binary b, Binary c, Binary d, Binary e) => Binary (a,b,c,d,e) where
463 put (a,b,c,d,e) = put (a,(b,c,d,e))
464 get = do (a,(b,c,d,e)) <- get ; return (a,b,c,d,e)
465
466 instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f)
467 => Binary (a,b,c,d,e,f) where
468 put (a,b,c,d,e,f) = put (a,(b,c,d,e,f))
469 get = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f)
470
471 instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g)
472 => Binary (a,b,c,d,e,f,g) where
473 put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g))
474 get = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g)
475
476 instance (Binary a, Binary b, Binary c, Binary d, Binary e,
477 Binary f, Binary g, Binary h)
478 => Binary (a,b,c,d,e,f,g,h) where
479 put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h))
480 get = do (a,(b,c,d,e,f,g,h)) <- get ; return (a,b,c,d,e,f,g,h)
481
482 instance (Binary a, Binary b, Binary c, Binary d, Binary e,
483 Binary f, Binary g, Binary h, Binary i)
484 => Binary (a,b,c,d,e,f,g,h,i) where
485 put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i))
486 get = do (a,(b,c,d,e,f,g,h,i)) <- get ; return (a,b,c,d,e,f,g,h,i)
487
488 instance (Binary a, Binary b, Binary c, Binary d, Binary e,
489 Binary f, Binary g, Binary h, Binary i, Binary j)
490 => Binary (a,b,c,d,e,f,g,h,i,j) where
491 put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j))
492 get = do (a,(b,c,d,e,f,g,h,i,j)) <- get ; return (a,b,c,d,e,f,g,h,i,j)
493
494 ------------------------------------------------------------------------
495 -- Container types
496
497 instance Binary a => Binary [a] where
498 put l = put (length l) >> mapM_ put l
499 get = do n <- get :: Get Int
500 replicateM n get
501
502 instance (Binary a) => Binary (Maybe a) where
503 put Nothing = putWord8 0
504 put (Just x) = putWord8 1 >> put x
505 get = do
506 w <- getWord8
507 case w of
508 0 -> return Nothing
509 _ -> liftM Just get
510
511 instance (Binary a, Binary b) => Binary (Either a b) where
512 put (Left a) = putWord8 0 >> put a
513 put (Right b) = putWord8 1 >> put b
514 get = do
515 w <- getWord8
516 case w of
517 0 -> liftM Left get
518 _ -> liftM Right get
519
520 ------------------------------------------------------------------------
521 -- ByteStrings (have specially efficient instances)
522
523 instance Binary B.ByteString where
524 put bs = do put (B.length bs)
525 putByteString bs
526 get = get >>= getByteString
527
528 -- Using old versions of fps, this is a type synonym, and non portable
529 instance Binary ByteString where
530 put bs = do put (fromIntegral (L.length bs) :: Int)
531 putLazyByteString bs
532 get = get >>= getLazyByteString
533
534 ------------------------------------------------------------------------
535 -- Maps and Sets
536
537 instance (Ord a, Binary a) => Binary (Set.Set a) where
538 put = put . Set.toAscList
539 get = liftM Set.fromDistinctAscList get
540
541 instance (Ord k, Binary k, Binary e) => Binary (Map.Map k e) where
542 put = put . Map.toAscList
543 get = liftM Map.fromDistinctAscList get
544
545 instance Binary IntSet.IntSet where
546 put = put . IntSet.toAscList
547 get = liftM IntSet.fromDistinctAscList get
548
549 instance (Binary e) => Binary (IntMap.IntMap e) where
550 put = put . IntMap.toAscList
551 get = liftM IntMap.fromDistinctAscList get
552
553 ------------------------------------------------------------------------
554 -- Queues and Sequences
555
556 #if __GLASGOW_HASKELL__ >= 606
557 --
558 -- This is valid Hugs, but you need the most recent Hugs
559 --
560
561 instance (Binary e) => Binary (Seq.Seq e) where
562 -- any better way to do this?
563 put s = put . flip unfoldr s $ \sq ->
564 case Seq.viewl sq of
565 Seq.EmptyL -> Nothing
566 (Seq.:<) e sq' -> Just (e,sq')
567 get = fmap Seq.fromList get
568
569 #endif
570
571 ------------------------------------------------------------------------
572 -- Trees
573
574 instance (Binary e) => Binary (T.Tree e) where
575 put (T.Node r s) = put r >> put s
576 get = liftM2 T.Node get get
577
578 ------------------------------------------------------------------------
579 -- Arrays
580
581 instance (Binary i, Ix i, Binary e) => Binary (Array i e) where
582 put a = do
583 put (bounds a)
584 put (elems a)
585 get = do
586 bs <- get
587 es <- get
588 return (listArray bs es)
589
590 -- The IArray UArray e constraint is non portable.
591 instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where
592 put a = do
593 put (bounds a)
594 put (elems a)
595 get = do
596 bs <- get
597 es <- get
598 return (listArray bs es)