From 98e494afed3c73f88ff1d57a9ca46b1f6ddbd1b9 Mon Sep 17 00:00:00 2001
From: "Edward Z. Yang"
Date: Fri, 17 Feb 2017 17:30:16 0800
Subject: [PATCH] Improve Haddock documentation for compact.
Test Plan: none
Reviewers: simonmar, bgamari, austin
Subscribers: thomie
Differential Revision: https://phabricator.haskell.org/D3148

libraries/compact/Data/Compact.hs  89 +++++++++++++++++++++++
libraries/compact/Data/Compact/Internal.hs  42 ++++++++
libraries/compact/Data/Compact/Serialized.hs  14 ++
3 files changed, 110 insertions(+), 35 deletions()
diff git a/libraries/compact/Data/Compact.hs b/libraries/compact/Data/Compact.hs
index 42245c5..ce6bf2b 100644
 a/libraries/compact/Data/Compact.hs
+++ b/libraries/compact/Data/Compact.hs
@@ 15,10 +15,48 @@
 Stability : unstable
 Portability : nonportable (GHC Extensions)

 This module provides a data structure, called a Compact, for
 holding fully evaluated data in a consecutive block of memory.
+ This module provides a data structure, called a 'Compact', for
+ holding immutable, fully evaluated data in a consecutive block of memory.
+ Compact regions are good for two things:

 /Since: 1.0.0/
+ 1. Data in a compact region is not traversed during GC; any
+ incoming pointer to a compact region keeps the entire region
+ live. Thus, if you put a longlived data structure in a compact
+ region, you may save a lot of cycles during major collections,
+ since you will no longer be (uselessly) retraversing this
+ data structure.
+
+ 2. Because the data is stored contiguously, you can easily
+ dump the memory to disk and/or send it over the network.
+ For applications that are not bandwidth bound (GHC's heap
+ representation can be as much of a x4 expansion over a
+ binary serialization), this can lead to substantial speed ups.
+
+ For example, suppose you have a function @loadBigStruct :: IO BigStruct@,
+ which loads a large data structure from the file system. First,
+ ensure that @BigStruct@ is immutable by defining an 'NFData' instance
+ for it. Then, you can "compact" the structure with the following code:
+
+ @
+ do r < 'compact' =<< loadBigStruct
+ let x = 'getCompact' r :: BigStruct
+  Do things with x
+ @
+
+ Note that 'compact' will not preserve internal sharing; use
+ 'compactWithSharing' (which is 10x slower) if you have cycles and/or
+ must preserve sharing. The 'Compact' pointer @r@ can be used
+ to add more data to a compact region; see 'compactAdd' or
+ 'compactAddWithSharing'.
+
+ The implementation of compact regions is described by:
+
+ * Edward Z. Yang, Giovanni Campagna, Ãmer AÄacan, Ahmed ElHassany, Abhishek
+ Kulkarni, Ryan Newton. \"/Efficient communication and Collection with Compact
+ Normal Forms/\". In Proceedings of the 20th ACM SIGPLAN International
+ Conference on Functional Programming. September 2015.
+
+ This library is supported by GHC 8.2 and later.
module Data.Compact (
 * The Compact type
@@ 47,15 +85,21 @@ import GHC.Types
import Data.Compact.Internal as Internal
  Retrieve the object that was stored in a 'Compact'
+  Retrieve a direct pointer to the value pointed at by a 'Compact' reference.
+ If you have used 'compactAdd', there may be multiple 'Compact' references
+ into the same compact region. Upholds the property:
+
+ > inCompact c (getCompact c) == True
+
getCompact :: Compact a > a
getCompact (Compact _ obj _) = obj
  Compact a value. /O(size of unshared data)/

 If the structure contains any internal sharing, the shared data
 will be duplicated during the compaction process. Loops if the
 structure contains cycles.
+ will be duplicated during the compaction process. This will
+ not terminate if the structure contains cycles (use 'compactWithSharing'
+ instead).

 The NFData constraint is just to ensure that the object contains no
 functions, 'compact' does not actually use it. If your object
@@ 80,39 +124,56 @@ compact = Internal.compactSized 31268 False
compactWithSharing :: NFData a => a > IO (Compact a)
compactWithSharing = Internal.compactSized 31268 True
  Add a value to an existing 'Compact'. Behaves exactly like
 'compact' with respect to sharing and the 'NFData' constraint.
+  Add a value to an existing 'Compact'. This will help you avoid
+ copying when the value contains pointers into the compact region,
+ but remember that after compaction this value will only be deallocated
+ with the entire compact region.
+
+ Behaves exactly like 'compact' with respect to sharing and the 'NFData'
+ constraint.
+
compactAdd :: NFData a => Compact b > a > IO (Compact a)
compactAdd (Compact compact# _ lock) a = withMVar lock $ \_ > IO $ \s >
case compactAdd# compact# a s of { (# s1, pk #) >
(# s1, Compact compact# pk lock #) }
  Add a value to an existing 'Compact'. Behaves exactly like
 'compactWithSharing' with respect to sharing and the 'NFData'
 constraint.
+  Add a value to an existing 'Compact', like 'compactAdd', but
+ behaving exactly like 'compactWithSharing' with respect to
+ sharing and the 'NFData' constraint.
+
compactAddWithSharing :: NFData a => Compact b > a > IO (Compact a)
compactAddWithSharing (Compact compact# _ lock) a =
withMVar lock $ \_ > IO $ \s >
case compactAddWithSharing# compact# a s of { (# s1, pk #) >
(# s1, Compact compact# pk lock #) }

  Check if the second argument is inside the 'Compact'
+  Check if the second argument is inside the passed 'Compact'.
+
inCompact :: Compact b > a > IO Bool
inCompact (Compact buffer _ _) !val =
IO (\s > case compactContains# buffer val s of
(# s', v #) > (# s', isTrue# v #) )
  Check if the argument is in any 'Compact'
+  Check if the argument is in any 'Compact'. If true, the value in question
+ is also fully evaluated, since any value in a compact region must
+ be fully evaluated.
+
isCompact :: a > IO Bool
isCompact !val =
IO (\s > case compactContainsAny# val s of
(# s', v #) > (# s', isTrue# v #) )
+  Returns the size in bytes of the compact region.
+
compactSize :: Compact a > IO Word
compactSize (Compact buffer _ lock) = withMVar lock $ \_ > IO $ \s0 >
case compactSize# buffer s0 of (# s1, sz #) > (# s1, W# sz #)
+  *Experimental.* This function doesn't actually resize a compact
+ region; rather, it changes the default block size which we allocate
+ when the current block runs out of space, and also appends a block
+ to the compact region.
+
compactResize :: Compact a > Word > IO ()
compactResize (Compact oldBuffer _ lock) (W# new_size) =
withMVar lock $ \_ > IO $ \s >
diff git a/libraries/compact/Data/Compact/Internal.hs b/libraries/compact/Data/Compact/Internal.hs
index 2780d19..2857a9d 100644
 a/libraries/compact/Data/Compact/Internal.hs
+++ b/libraries/compact/Data/Compact/Internal.hs
@@ 13,11 +13,9 @@
 Stability : unstable
 Portability : nonportable (GHC Extensions)

 This module provides a data structure, called a Compact, for
 holding fully evaluated data in a consecutive block of memory.

 This is a private implementation detail of the package and should
 not be imported directly.
+ This module provides some internal functions and representation for dealing
+ with compact regions, which may be of interest if you need higher
+ performance.

 /Since: 1.0.0/
@@ 50,13 +48,20 @@ import GHC.Types
 separate copy of the data.

 The cost of compaction is similar to the cost of GC for the same
 data, but it is perfomed only once. However, retainining internal
 sharing during the compaction process is very costly, so it is
 optional; there are two ways to create a 'Compact': 'compact' and
 'compactWithSharing'.

 Data can be added to an existing 'Compact' with 'compactAdd' or
 'compactAddWithSharing'.
+ data, but it is perfomed only once. However, because
+ "Data.Compact.compact" does not stoptheworld, retaining internal
+ sharing during the compaction process is very costly. The user
+ can choose wether to 'compact' or 'compactWithSharing'.
+
+ When you have a @'Compact' a@, you can get a pointer to the actual object
+ in the region using "Data.Compact.getCompact". The 'Compact' type
+ serves as handle on the region itself; you can use this handle
+ to add data to a specific 'Compact' with 'compactAdd' or
+ 'compactAddWithSharing' (giving you a new handle which corresponds
+ to the same compact region, but points to the newly added object
+ in the region). At the moment, due to technical reasons,
+ it's not possible to get the @'Compact' a@ if you only have an @a@,
+ so make sure you hold on to the handle as necessary.

 Data in a compact doesn't ever move, so compacting data is also a
 way to pin arbitrary data structures in memory.
@@ 70,8 +75,8 @@ import GHC.Types
 address (the address might be stored in a C data structure, for
 example), so we can't make a copy of it to store in the 'Compact'.

 * Mutable objects also cannot be compacted, because subsequent
 mutation would destroy the property that a compact is
+ * Objects with mutable pointer fields also cannot be compacted,
+ because subsequent mutation would destroy the property that a compact is
 selfcontained.

 If compaction encounters any of the above, a 'CompactionFailed'
@@ 83,6 +88,10 @@ data Compact a = Compact Compact# a (MVar ())
 The MVar here is to enforce mutual exclusion among writers.
 Note: the MVar protects the Compact# only, not the pure value 'a'
+  Make a new 'Compact' object, given a pointer to the true
+ underlying region. You must uphold the invariant that @a@ lives
+ in the compact region.
+
mkCompact
:: Compact# > a > State# RealWorld > (# State# RealWorld, Compact a #)
mkCompact compact# a s =
@@ 91,6 +100,11 @@ mkCompact compact# a s =
where
unIO (IO a) = a
+  Transfer @a@ into a new compact region, with a preallocated size,
+ possibly preserving sharing or not. If you know how big the data
+ structure in question is, you can save time by picking an appropriate
+ block size for the compact region.
+
compactSized :: NFData a => Int > Bool > a > IO (Compact a)
compactSized (I# size) share a = IO $ \s0 >
case compactNew# (int2Word# size) s0 of { (# s1, compact# #) >
diff git a/libraries/compact/Data/Compact/Serialized.hs b/libraries/compact/Data/Compact/Serialized.hs
index bdc2aff..bf2b4f7 100644
 a/libraries/compact/Data/Compact/Serialized.hs
+++ b/libraries/compact/Data/Compact/Serialized.hs
@@ 14,9 +14,6 @@
 Stability : unstable
 Portability : nonportable (GHC Extensions)

 This module provides a data structure, called a Compact, for
 holding fully evaluated data in a consecutive block of memory.

 This module contains support for serializing a Compact for network
 transmission and ondisk storage.

@@ 45,7 +42,7 @@ import Control.DeepSeq(NFData, force)
import Data.Compact.Internal
 A serialized version of the 'Compact' metadata (each block with
+  A serialized version of the 'Compact' metadata (each block with
 address and size and the address of the root). This structure is
 meant to be sent alongside the actual 'Compact' data. It can be
 sent out of band in advance if the data is to be sent over RDMA
@@ 58,7 +55,6 @@ data SerializedCompact a = SerializedCompact
addrIsNull :: Addr# > Bool
addrIsNull addr = isTrue# (nullAddr# `eqAddr#` addr)

compactGetFirstBlock :: Compact# > IO (Ptr a, Word)
compactGetFirstBlock buffer =
IO (\s > case compactGetFirstBlock# buffer s of
@@ 85,10 +81,11 @@ mkBlockList buffer = compactGetFirstBlock buffer >>= go
 before func had a chance to copy everything into its own
 buffers/sockets/whatever
 Serialize the 'Compact', and call the provided function with
+  Serialize the 'Compact', and call the provided function with
 with the 'Compact' serialized representation. The resulting
 action will be executed synchronously before this function
 completes.
+
{# NOINLINE withSerializedCompact #}
withSerializedCompact :: NFData c => Compact a >
(SerializedCompact a > IO c) > IO c
@@ 115,7 +112,7 @@ fixupPointers firstBlock rootAddr s =
(# root #) > case mkCompact buffer root s' of
(# s'', c #) > (# s'', Just c #)
 Deserialize a 'SerializedCompact' into a inmemory 'Compact'. The
+  Deserialize a 'SerializedCompact' into a inmemory 'Compact'. The
 provided function will be called with the address and size of each
 newly allocated block in succession, and should fill the memory
 from the external source (eg. by reading from a socket or from disk)
@@ 191,6 +188,9 @@ sanityCheckByteStrings (SerializedCompact scl _) bsl = go scl bsl
go ((_, size):scs) (bs:bss) =
fromIntegral size == ByteString.length bs && go scs bss
+  Convenience function for importing a compact region that is represented
+ by a list of strict 'ByteString's.
+
importCompactByteStrings :: SerializedCompact a > [ByteString.ByteString] >
IO (Maybe (Compact a))
importCompactByteStrings serialized stringList =

1.9.1