dph-prim-seq: better docs for vectors module, and add to cabal file
[packages/dph.git] / dph-prim-seq / Data / Array / Parallel / Unlifted / Sequential / Vectors.hs
1 {-# LANGUAGE BangPatterns, FlexibleInstances, UndecidableInstances, CPP #-}
2 #include "fusion-phases.h"
3
4 -- | Irregular two dimensional arrays.
5 --
6 -- * TODO: The inner arrays should be unboxed so we don't get an unboxing overhead
7 -- for every call to unsafeIndex2. This might need an extension to the GHC
8 -- runtime if we alwo want to convert a U.Vector directly to this form.
9 --
10 -- * TODO: We currently only allow primitive types to be in a Vectors, but
11 -- in future we'll want `Vectors` of tuples etc.
12 --
13 module Data.Array.Parallel.Unlifted.Sequential.Vectors
14 ( Vectors(..)
15 , Unboxes
16 , empty
17 , singleton
18 , unsafeIndex
19 , unsafeIndex2
20 , unsafeIndexUnpack
21 , append
22 , fromVector
23 , toVector
24
25 , unsafeExtracts
26 , unsafeStreamVectors)
27 where
28 import qualified Data.Primitive.ByteArray as P
29 import qualified Data.Primitive.Array as P
30 import qualified Data.Primitive.Types as P
31
32 import qualified Data.Vector.Generic as G
33 import qualified Data.Vector.Fusion.Stream as S
34 import qualified Data.Vector.Fusion.Stream.Size as S
35 import qualified Data.Vector.Fusion.Stream.Monadic as M
36 import qualified Data.Vector.Primitive as R
37 import qualified Data.Vector.Unboxed as U
38 import qualified Data.Vector as V
39 import Data.Vector.Unboxed (Unbox)
40
41 import qualified Data.Array.Parallel.Unlifted.Sequential.USegd as USegd
42 import qualified Data.Array.Parallel.Unlifted.Sequential.USSegd as USSegd
43 import Data.Array.Parallel.Unlifted.Sequential.USSegd (USSegd(..))
44 import System.IO.Unsafe
45 import Prelude hiding (length)
46 import Debug.Trace
47 import Data.Word
48
49 -- | Class of element types that can be used in a `Vectors`
50 class R.Prim a => Unboxes a
51 instance Unboxes Int
52 instance Unboxes Word8
53 instance Unboxes Float
54 instance Unboxes Double
55
56
57 -- | A 2-dimensional array,
58 -- where the inner arrays can all have different lengths.
59 data Vectors a
60 = Vectors
61 {-# UNPACK #-} !Int -- number of inner vectors
62 {-# UNPACK #-} !P.ByteArray -- starting index of each vector in its chunk
63 {-# UNPACK #-} !P.ByteArray -- lengths of each inner vector
64 {-# UNPACK #-} !(P.Array P.ByteArray) -- chunks
65
66 instance (Unboxes a, Unbox a, Show a) => Show (Vectors a) where
67 show = show . toVector
68 {-# NOINLINE show #-}
69
70 -- | Construct an empty `Vectors` with no arrays of no elements.
71 empty :: Vectors a
72 empty
73 = unsafePerformIO
74 $ do mba <- P.newByteArray 0
75 ba <- P.unsafeFreezeByteArray mba
76 marr <- P.newArray 0 ba
77 arr <- P.unsafeFreezeArray marr
78 return $ Vectors 0 ba ba arr
79 {-# INLINE_U empty #-}
80
81
82 -- | Construct a `Vectors` containing data from a single unboxed array.
83 singleton :: Unboxes a => R.Vector a -> Vectors a
84 singleton vec
85 = unsafePerformIO
86 $ do R.MVector start len mbaData <- R.unsafeThaw vec
87 baData <- P.unsafeFreezeByteArray mbaData
88
89 mbaStarts <- P.newByteArray 1
90 P.writeByteArray mbaStarts 0 start
91 baStarts <- P.unsafeFreezeByteArray mbaStarts
92
93 mbaLengths <- P.newByteArray 1
94 P.writeByteArray mbaLengths 0 len
95 baLengths <- P.unsafeFreezeByteArray mbaLengths
96
97 maChunks <- P.newArray 1 baData
98 aChunks <- P.unsafeFreezeArray maChunks
99
100 return $ Vectors 1 baStarts baLengths aChunks
101 {-# INLINE_U singleton #-}
102
103
104 -- | Yield the number of vectors in a `Vectors`.
105 length :: Unboxes a => Vectors a -> Int
106 length (Vectors len _ _ _) = len
107 {-# INLINE_U length #-}
108
109
110 -- | Take one of the outer vectors from a `Vectors`.
111 unsafeIndex :: (Unboxes a, Unbox a) => Vectors a -> Int -> U.Vector a
112 unsafeIndex (Vectors _ starts lens arrs) ix
113 = G.convert
114 $ unsafePerformIO
115 $ do let start = P.indexByteArray starts ix
116 let len = P.indexByteArray lens ix
117 let arr = P.indexArray arrs ix
118 marr <- P.unsafeThawByteArray arr
119 let mvec = R.MVector start len marr
120 R.unsafeFreeze mvec
121 {-# INLINE_U unsafeIndex #-}
122
123
124 -- | Retrieve a single element from a `Vectors`,
125 -- given the outer and inner indices.
126 unsafeIndex2 :: Unboxes a => Vectors a -> Int -> Int -> a
127 unsafeIndex2 (Vectors _ starts lens arrs) ix1 ix2
128 = (arrs `P.indexArray` ix1) `P.indexByteArray` (starts `P.indexByteArray` ix1 + ix2)
129 {-# INLINE_U unsafeIndex2 #-}
130
131
132 -- | Retrieve an inner array from a `Vectors`, returning the array data,
133 -- starting index in the data, and vector length.
134 unsafeIndexUnpack :: Unboxes a => Vectors a -> Int -> (P.ByteArray, Int, Int)
135 unsafeIndexUnpack (Vectors n starts lens arrs) ix
136 = ( arrs `P.indexArray` ix
137 , starts `P.indexByteArray` ix
138 , lens `P.indexByteArray` ix)
139 {-# INLINE_U unsafeIndexUnpack #-}
140
141
142 -- | Append two `Vectors`.
143 --
144 -- * Important: appending two `Vectors` involes work proportional to
145 -- the length of the outer arrays, not the size of the inner ones.
146 append :: Unboxes a => Vectors a -> Vectors a -> Vectors a
147 append (Vectors len1 starts1 lens1 chunks1)
148 (Vectors len2 starts2 lens2 chunks2)
149 = unsafePerformIO
150 $ do let len' = len1 + len2
151
152 -- append starts into result
153 let lenStarts1 = P.sizeofByteArray starts1
154 let lenStarts2 = P.sizeofByteArray starts2
155 maStarts <- P.newByteArray (lenStarts1 + lenStarts2)
156 P.copyByteArray maStarts 0 starts1 0 lenStarts1
157 P.copyByteArray maStarts lenStarts1 starts2 0 lenStarts2
158 starts' <- P.unsafeFreezeByteArray maStarts
159
160 -- append lens into result
161 let lenLens1 = P.sizeofByteArray lens1
162 let lenLens2 = P.sizeofByteArray lens2
163 maLens <- P.newByteArray (lenLens1 + lenLens2)
164 P.copyByteArray maLens 0 lens1 0 lenLens1
165 P.copyByteArray maLens lenStarts1 lens2 0 lenLens2
166 lens' <- P.unsafeFreezeByteArray maLens
167
168 -- append arrs into result
169 maChunks <- P.newArray len' (error "Vectors: append argh!")
170 P.copyArray maChunks 0 chunks1 0 len1
171 P.copyArray maChunks len1 chunks2 0 len2
172 chunks' <- P.unsafeFreezeArray maChunks
173
174 return $ Vectors len' starts' lens' chunks'
175 {-# INLINE_U append #-}
176
177
178 -- | Convert a boxed vector of unboxed vectors to a `Vectors`.
179 fromVector :: (Unboxes a, Unbox a) => V.Vector (U.Vector a) -> Vectors a
180 fromVector vecs
181 = unsafePerformIO
182 $ do let len = V.length vecs
183 let (barrs, vstarts, vlens) = V.unzip3 $ V.map unpackUVector vecs
184 let (baStarts, _, _) = unpackUVector $ V.convert vstarts
185 let (baLens, _, _) = unpackUVector $ V.convert vlens
186 mchunks <- P.newArray len (error "Vectors: fromVector argh!")
187 V.zipWithM_
188 (\i vec
189 -> let (ba, _, _) = unpackUVector vec
190 in P.writeArray mchunks i ba)
191 (V.enumFromN 0 len)
192 vecs
193
194 chunks <- P.unsafeFreezeArray mchunks
195 return $ Vectors len baStarts baLens chunks
196 {-# INLINE_U fromVector #-}
197
198
199 -- | Convert a `Vectors` to a boxed vector of unboxed vectors.
200 toVector :: (Unboxes a, Unbox a) => Vectors a -> V.Vector (U.Vector a)
201 toVector vectors
202 = V.map (unsafeIndex vectors)
203 $ V.enumFromN 0 (length vectors)
204 {-# INLINE_U toVector #-}
205
206
207 -- | Unpack an unboxed vector into array data, starting index, and vector length.
208 unpackUVector :: (Unbox a, P.Prim a) => U.Vector a -> (P.ByteArray, Int, Int)
209 unpackUVector vec
210 = unsafePerformIO
211 $ do let pvec = V.convert vec
212 R.MVector start len mba <- R.unsafeThaw pvec
213 ba <- P.unsafeFreezeByteArray mba
214 return (ba, start, len)
215 {-# INLINE_U unpackUVector #-}
216
217
218 -- | Pack some array data, starting index and vector length unto an unboxed vector.
219 packUVector :: (Unbox a, P.Prim a) => P.ByteArray -> Int -> Int -> U.Vector a
220 packUVector ba start len
221 = unsafePerformIO
222 $ do mba <- P.unsafeThawByteArray ba
223 pvec <- R.unsafeFreeze $ R.MVector start len mba
224 return $ G.convert pvec
225 {-# INLINE_U packUVector #-}
226
227
228 -- | Copy segments from a `Vectors` and concatenate them into a new array.
229 unsafeExtracts
230 :: (Unboxes a, U.Unbox a)
231 => USSegd -> Vectors a -> U.Vector a
232
233 unsafeExtracts ussegd vectors
234 = G.unstream $ unsafeStreamVectors ussegd vectors
235 {-# INLINE_U unsafeExtracts #-}
236
237
238 -- Stream -----------------------------------------------------------------------------------------
239 -- | Stream segments from a `Vectors`.
240 --
241 -- * There must be at least one segment in the `USSegd`, but this is not checked.
242 --
243 -- * No bounds checking is done for the `USSegd`.
244 unsafeStreamVectors :: Unboxes a => USSegd -> Vectors a -> S.Stream a
245 unsafeStreamVectors ussegd@(USSegd _ segStarts segSources usegd) vectors
246 = segStarts `seq` segSources `seq` usegd `seq` vectors `seq`
247 let -- Length of each segment
248 !segLens = USegd.takeLengths usegd
249
250 -- Total number of segments.
251 !segsTotal = USSegd.length ussegd
252
253 -- Total number of elements to stream.
254 !elements = USegd.takeElements usegd
255
256 -- seg, ix of that seg in usegd, length of seg, elem in seg
257 {-# INLINE_INNER fnSeg #-}
258 fnSeg (ixSeg, baSeg, ixEnd, ixElem)
259 = ixSeg `seq` baSeg `seq`
260 if ixElem >= ixEnd -- Was that the last elem in the current seg?
261 then if ixSeg + 1 >= segsTotal -- Was that last seg?
262
263 -- That was the last seg, we're done.
264 then return $ S.Done
265
266 -- Move to the next seg.
267 else let ixSeg' = ixSeg + 1
268 sourceSeg = U.unsafeIndex segSources ixSeg'
269 startSeg = U.unsafeIndex segStarts ixSeg'
270 lenSeg = U.unsafeIndex segLens ixSeg'
271 (arr, startArr, lenArr) = unsafeIndexUnpack vectors sourceSeg
272 in return $ S.Skip
273 ( ixSeg'
274 , arr
275 , startArr + startSeg + lenSeg
276 , startArr + startSeg)
277
278 -- Stream the next element from the segment.
279 else let !result = P.indexByteArray baSeg ixElem
280 in return $ S.Yield result (ixSeg, baSeg, ixEnd, ixElem + 1)
281
282 -- Starting state of the stream.
283 !initState
284 = let sourceSeg = U.unsafeIndex segSources 0
285 startSeg = U.unsafeIndex segStarts 0
286 lenSeg = U.unsafeIndex segLens 0
287 (arr, startArr, lenArr) = unsafeIndexUnpack vectors sourceSeg
288 in ( 0 -- starting segment id
289 , arr -- starting segment data
290 , startArr + startSeg + lenSeg -- segment end
291 , startArr + startSeg) -- segment start ix
292
293 -- It's important that we set the result stream size, so Data.Vector
294 -- doesn't need to add code to grow the result when it overflows.
295 in M.Stream fnSeg initState (S.Exact elements)
296 {-# INLINE_STREAM unsafeStreamVectors #-}
297