Add locked zipwith code
[packages/dph.git] / dph-prim-interface / interface / DPH_Interface.h
1 import Data.Array.Parallel.Base (Tag, tagToInt, fromBool)
2 import qualified GHC.Base
3 import Prelude ((.), ($), Num(..), Eq(..), seq, snd)
4 import qualified Prelude
5
6 instance Elt Int
7 instance Elt Word8
8 instance Elt Bool
9 instance Elt Float
10 instance Elt Double
11 instance (Elt a, Elt b) => Elt (a, b)
12
13 instance Elts Int
14 instance Elts Word8
15 instance Elts Float
16 instance Elts Double
17
18 infixl 9 `index`
19 infixr 5 +:+
20
21 -- TRAGIC HACKS ===============================================================
22 -- These hacky rules solve the replicate problem for the SMVM benchmark,
23 -- with dph-lifted-copy but are very fragile and ad-hoc.
24 --
25 -- Programs written with the new dph-lifted-vseg library don't need rules like
26 -- this, so we should dump them at some stage.
27 {-# RULES
28
29 "map/zipWith (+)/enumFromStepLen" forall m n is.
30 map (dph_mod_index m) (zipWith ((+) :: Int -> Int -> Int)
31 (enumFromStepLen 0 m n) is)
32 = map (dph_mod_index m) is
33
34 "map dph_mod_index/enumFromStepLenEach" forall k l is n1 n2.
35 map (dph_mod_index k)
36 (enumFromStepLenEach l is (replicate n1 (GHC.Base.I# 1#)) (replicate n2 k))
37 = enumFromStepLenEach l (map (dph_mod_index k) is)
38 (replicate n1 (GHC.Base.I# 1#))
39 (replicate n2 k)
40
41 "map dph_mod_index/replicate_s" forall k segd xs.
42 map (dph_mod_index k) (replicate_s segd xs)
43 = replicate_s segd (map (dph_mod_index k) xs)
44
45 "map dph_mod_index/enumFromStepLen" forall k# i n.
46 map (dph_mod_index (GHC.Base.I# k#)) (enumFromStepLen i (GHC.Base.I# k#) n)
47 = replicate n i
48
49 "enumFromStepLenEach/replicate x 3" forall k m n1 n2 n3.
50 enumFromStepLenEach m (replicate n1 (GHC.Base.I# 0#))
51 (replicate n2 (GHC.Base.I# 1#))
52 (replicate n3 k)
53 = generate_cheap m (dph_mod_index k)
54
55 "bpermute/generate_cheap" forall n f xs.
56 bpermute (generate_cheap n f) xs
57 = map f xs
58 #-}
59 -- ============================================================================
60
61
62 -- Constructors ===============================================================
63 -- | O(1). Construct an array with no elements.
64 empty :: Elt a => Array a
65 {-# INLINE_BACKEND empty #-}
66
67
68 -- Generate ---------------------------
69 -- | Generate a new array given its length and a function to compute each element.
70 generate :: Elt a => Int -> (Int -> a) -> Array a
71 generate n f = map f (enumFromTo 0 (n-1))
72 {-# INLINE_BACKEND generate #-}
73
74
75 generate_cheap :: Elt a => Int -> (Int -> a) -> Array a
76 generate_cheap n f = map f (enumFromTo 0 (n-1))
77 {-# INLINE_BACKEND generate_cheap #-}
78
79
80 -- Replicate --------------------------
81 -- | O(length result).
82 -- Construct a new array by replicating a single element the given
83 -- number of times.
84 replicate :: Elt a => Int -> a -> Array a
85 {-# INLINE CONLIKE PHASE_BACKEND replicate #-}
86
87 {-# RULES
88
89 "seq/replicate" forall n x y.
90 seq (replicate n x) y = n `seq` x `seq` y
91
92 #-}
93
94
95 -- | O(length result). Segmented replicate.
96 --
97 -- Elements of the array are replicated according to the lengths of the
98 -- segments defined by the `Segd`.
99 replicate_s :: Elt a => Segd -> Array a -> Array a
100 {-# INLINE CONLIKE PHASE_BACKEND replicate_s #-}
101
102
103 -- | O(length result). Regular segmented replicate.
104 --
105 -- Like `replicate_s`, but all segments are assumed to have the given length.
106 replicate_rs :: Elt a => Int -> Array a -> Array a
107 {-# INLINE CONLIKE PHASE_BACKEND replicate_rs #-}
108
109
110 {-# RULES
111
112 "replicate_s/replicate"
113 forall segd k x
114 . replicate_s segd (replicate k x)
115 = replicate (elementsSegd segd) x
116
117 "replicate_s->replicate_rs"
118 forall n m idxs nm xs
119 . replicate_s (mkSegd (replicate n m) idxs nm) xs
120 = replicate_rs m xs
121
122 "replicate_rs/replicate"
123 forall m n x
124 . replicate_rs m (replicate n x) = replicate (m*n) x
125
126 #-}
127
128
129 -- Repeat -----------------------------
130 -- | O(length result). Construct an array by copying a portion of another array.
131 repeat :: Elt a
132 => Int -- ^ Number of times to repeat the source.
133 -> Int -- ^ Length of source (can be less than the provided array).
134 -> Array a -- ^ Array elements to repeat.
135 -> Array a
136 {-# INLINE_BACKEND repeat #-}
137
138
139 {-# RULES
140
141 "repeat/enumFromStepLen[Int]" forall i j k n len.
142 repeat n len (enumFromStepLen i j k)
143 = generate_cheap len (\m -> i + ((m `Prelude.rem` k) * j))
144
145 #-}
146
147
148 -- Append -----------------------------
149 -- | O(length result). Append two arrays.
150 (+:+) :: Elt a => Array a -> Array a -> Array a
151 {-# INLINE_BACKEND (+:+) #-}
152
153
154 -- | O(length result). Segmented append.
155 append_s
156 :: Elt a
157 => Segd -- ^ Segment descriptor of result aarray.
158 -> Segd -- ^ Segment descriptor of first array.
159 -> Array a -- ^ Data of first array.
160 -> Segd -- ^ Segment descriptor of second array.
161 -> Array a -- ^ Data of second array.
162 -> Array a
163 {-# INLINE_BACKEND append_s #-}
164
165
166 {-# RULES
167
168 "append_s->interleave" forall n k idxs1 idxs2 idxs3 m1 m2 m3 xs ys.
169 append_s (mkSegd (replicate n k) idxs1 m1)
170 (mkSegd (replicate n (GHC.Base.I# 1#)) idxs2 m2) xs
171 (mkSegd (replicate n (GHC.Base.I# 1#)) idxs3 m3) ys
172 = interleave xs ys
173
174 #-}
175
176 append_vs
177 :: (Elt a, Elts a)
178 => Segd -- ^ Segment descriptor of result aarray.
179 -> VSegd -- ^ Segment descriptor of first array.
180 -> Arrays a -- ^ Data of first array.
181 -> VSegd -- ^ Segment descriptor of second array.
182 -> Arrays a -- ^ Data of second array.
183 -> Array a
184 {-# INLINE_BACKEND append_vs #-}
185
186
187
188 -- Indexed ----------------------------
189 -- | O(length result). Tag each element of an array with its index.
190 --
191 -- @indexed [42, 93, 13] = [(0, 42), (1, 93), (2, 13)]@
192 indexed :: Elt a => Array a -> Array (Int, a)
193 {-# INLINE_BACKEND indexed #-}
194
195
196 -- Indices ----------------------------
197 -- | O(length result). Segmented indices.
198 --
199 -- Construct an array containing containing the segments defined by the
200 -- given `Segd`.
201 --
202 -- Each segment will contain the elements @[0..len-1]@ where @len@ is the
203 -- length of that segment.
204 indices_s :: Segd -> Array Int
205 {-# INLINE_BACKEND indices_s #-}
206
207
208 -- Enumerations -----------------------
209 enumFromTo :: Int -> Int -> Array Int
210 {-# INLINE_BACKEND enumFromTo #-}
211
212 enumFromThenTo :: Int -> Int -> Int -> Array Int
213 {-# INLINE_BACKEND enumFromThenTo #-}
214
215 enumFromStepLen :: Int -> Int -> Int -> Array Int
216 {-# INLINE_BACKEND enumFromStepLen #-}
217
218 enumFromStepLenEach :: Int -> Array Int -> Array Int -> Array Int -> Array Int
219 {-# INLINE_BACKEND enumFromStepLenEach #-}
220
221
222 -- Projections ================================================================
223 -- | O(1). Yield the number of elements in an array.
224 length :: Elt a => Array a -> Int
225 {-# INLINE_BACKEND length #-}
226
227
228 -- | O(1). Retrieve a numbered element from an array.
229 --
230 -- The first argument gives a source-code location for out-of-bounds errors.
231 index :: Elt a => Prelude.String -> Array a -> Int -> a
232 {-# INLINE_BACKEND index #-}
233
234
235 -- | O(length result). Scattered indexing from a single `Array`.
236 --
237 -- This is an alias for `bpermute`.
238 indexs :: Elt a
239 => Array a
240 -> Array Int
241 -> Array a
242 {-# INLINE_BACKEND indexs #-}
243
244
245 -- | O(length result). Scattered indexing through a `VSegd`.
246 --
247 -- The index array contains pairs of segment id and the index within that
248 -- segment.
249 --
250 -- We use the `VSegd` to map the pairs to 2D indices within the `Arrays`,
251 -- and return an array of the resulting elements.
252 indexs_avs
253 :: (Elt a, Elts a)
254 => Arrays a -- ^ Irregular 2D array of elements.
255 -> VSegd -- ^ Maps (segment id, segment index) pairs
256 -- to 2D indices in the `Arrays`
257 -> Array (Int, Int) -- ^ Pairs of (segment id, segment index).
258 -> Array a
259 {-# INLINE_BACKEND indexs_avs #-}
260
261
262 -- | O(length result). Extract a subrange of elements from an array.
263 --
264 -- @extract [23, 42, 93, 50, 27] 1 3 = [42, 93, 50]@
265 extract :: Elt a
266 => Array a -- ^ Source array.
267 -> Int -- ^ Starting index in source array.
268 -> Int -- ^ Length of result array.
269 -> Array a
270 {-# INLINE_BACKEND extract #-}
271
272
273 -- | O(length result). Extract segments defined by a `SSegd` from a vector of arrays.
274 --
275 -- NOTE: This is a transitory interface, and will be removed in future versions.
276 -- Use `extracts_ass` instead.
277 extracts_nss
278 :: Elt a
279 => SSegd
280 -> VV.Vector (Array a)
281 -> Array a
282 {-# INLINE_BACKEND extracts_nss #-}
283
284
285 -- | O(length result). Extract segments defined by a `SSegd`.
286 --
287 -- Extract all the segments defined by the `SSegd` from the `Arrays`,
288 -- returning them concatenated in a fresh `Array`.
289 extracts_ass
290 :: (Elt a, Elts a)
291 => SSegd -- ^ `SSegd` defining the slices to extract.
292 -> Arrays a -- ^ Source arrays.
293 -> Array a
294 {-# INLINE_BACKEND extracts_ass #-}
295
296
297 -- | O(length result). Extract segments defined by a `VSegd`.
298 --
299 -- Extract all the segments defined by the `VSegd` from the `Arrays`,
300 -- returning them concatenated in a fresh `Array`.
301 extracts_avs
302 :: (Elt a, Elts a)
303 => VSegd -- ^ `VSegd` defining the slices to extract.
304 -> Arrays a -- ^ Source arrays.
305 -> Array a
306 {-# INLINE_BACKEND extracts_avs #-}
307
308
309 -- | O(length result). Drop elements from the front of an array,
310 -- returning the latter portion.
311 drop :: Elt a => Int -> Array a -> Array a
312 {-# INLINE_BACKEND drop #-}
313
314 {-# RULES
315
316 "indexs_avs/singletons/replicatedVSegd"
317 forall arr len reps srcixs
318 . indexs_avs (singletons arr) (replicatedVSegd len reps) srcixs
319 = indexs arr (map snd srcixs)
320
321 #-}
322
323
324 -- Update =====================================================================
325 -- | O(length result).
326 -- Copy the source array while replacing some elements by new ones in the result.
327 update :: Elt a
328 => Array a -- ^ Source array.
329 -> Array (Int, a) -- ^ Index and value of new elements.
330 -> Array a
331 {-# INLINE_BACKEND update #-}
332
333
334 -- Permutation ================================================================
335 -- | O(length result). Forwards permutation of array elements.
336 --
337 permute :: Elt a
338 => Array a -- ^ Source array.
339 -> Array Int -- ^ Indices in the destination to copy elements to.
340 -> Array a
341 {-# INLINE_BACKEND permute #-}
342
343
344 -- | O(length result). Backwards permutation of array elements.
345 --
346 -- @bpermute [50, 60, 20, 30] [0, 3, 2] = [50, 30, 20]@
347 bpermute
348 :: Elt a
349 => Array a -- ^ Source array.
350 -> Array Int -- ^ Indices in the source to copy elements from.
351 -> Array a
352 {-# INLINE_BACKEND bpermute #-}
353
354
355 -- | Combination of map and bpermute.
356 --
357 -- The advantage of using this combined version is that we don't need
358 -- to apply the parameter function to source elements that don't appear
359 -- in the result.
360 mbpermute :: (Elt a, Elt b) => (a->b) -> Array a -> Array Int -> Array b
361 {-# INLINE_BACKEND mbpermute #-}
362
363
364 -- | Default backwards permutation.
365 --
366 -- The values of the index-value pairs are written into the position in the
367 -- result array that is indicated by the corresponding index.
368 --
369 -- All positions not covered by the index-value pairs will have the value
370 -- determined by the initialiser function for that index position.
371 --
372 bpermuteDft:: Elt e => Int -> (Int -> e) -> Array (Int, e) -> Array e
373 {-# INLINE_BACKEND bpermuteDft #-}
374
375 {-# RULES
376
377 "bpermute/repeat" forall n len xs is.
378 bpermute (repeat n len xs) is
379 = len `Prelude.seq` bpermute xs (map (dph_mod_index len) is)
380
381 "bpermute/bpermute" forall xs is js.
382 bpermute (bpermute xs is) js = bpermute xs (bpermute is js)
383
384 #-}
385
386
387 -- Zipping and Unzipping ======================================================
388 -- | O(1). Zip two arrays into an array of pairs.
389 -- If one array is short, excess elements of the longer array are discarded.
390 zip :: (Elt a, Elt b) => Array a -> Array b -> Array (a, b)
391 {-# INLINE CONLIKE PHASE_BACKEND zip #-}
392
393
394 -- | O(1). Zip three arrays into an array of triples.
395 -- If one array is short, excess elements of the longer arrays are discarded.
396 zip3 :: (Elt a, Elt b, Elt c) => Array a -> Array b -> Array c -> Array (a, b, c)
397 {-# INLINE CONLIKE PHASE_BACKEND zip3 #-}
398
399
400 -- | O(1). Unzip an array of pairs into a pair of arrays.
401 unzip :: (Elt a, Elt b) => Array (a, b) -> (Array a, Array b)
402 {-# INLINE_BACKEND unzip #-}
403
404
405 -- | O(1). Unzip an array of triples into a triple of arrays.
406 unzip3 :: (Elt a, Elt b, Elt c) => Array (a, b, c) -> (Array a, Array b, Array c)
407 {-# INLINE_BACKEND unzip3 #-}
408
409
410 -- | O(1). Take the first elements of an array of pairs.
411 fsts :: (Elt a, Elt b) => Array (a, b) -> Array a
412 {-# INLINE_BACKEND fsts #-}
413
414
415 -- | O(1). Take the second elements of an array of pairs.
416 snds :: (Elt a, Elt b) => Array (a, b) -> Array b
417 {-# INLINE_BACKEND snds #-}
418
419
420 -- Maps and zipWith ===========================================================
421 -- | Apply a worker function to each element of an array, yielding a new array.
422 map :: (Elt a, Elt b)
423 => (a -> b) -> Array a -> Array b
424 {-# INLINE_BACKEND map #-}
425
426
427 -- | Apply a worker function to correponding elements of two arrays.
428 zipWith :: (Elt a, Elt b, Elt c)
429 => (a -> b -> c) -> Array a -> Array b -> Array c
430 {-# INLINE_BACKEND zipWith #-}
431
432
433 -- | Apply a worker function to corresponding elements of three arrays.
434 zipWith3 :: (Elt a, Elt b, Elt c, Elt d)
435 => (a -> b -> c -> d) -> Array a -> Array b -> Array c -> Array d
436 zipWith3 f xs ys zs
437 = zipWith (\(x, y) z -> f x y z)
438 (zip xs ys)
439 zs
440 {-# INLINE zipWith3 #-}
441
442
443 -- | Apply a worker function to corresponding elements of four arrays.
444 zipWith4 :: (Elt a, Elt b, Elt c, Elt d, Elt e)
445 => (a -> b -> c -> d -> e)
446 -> Array a -> Array b -> Array c -> Array d -> Array e
447 zipWith4 f as bs cs ds
448 = zipWith (\(a, b) (c, d) -> f a b c d)
449 (zip as bs)
450 (zip cs ds)
451 {-# INLINE zipWith4 #-}
452
453 -- | Apply a worker function to corresponding elements of five arrays.
454 zipWith5 :: (Elt a, Elt b, Elt c, Elt d, Elt e, Elt f)
455 => (a -> b -> c -> d -> e -> f)
456 -> Array a -> Array b -> Array c -> Array d -> Array e -> Array f
457 {-# INLINE zipWith5 #-}
458 zipWith5 f as bs cs ds es
459 = zipWith (\(a, b) ((c, d),e) -> f a b c d e)
460 (zip as bs)
461 (zip (zip cs ds) es)
462
463 -- | Apply a worker function to corresponding elements of six arrays.
464 zipWith6 :: (Elt a, Elt b, Elt c, Elt d, Elt e, Elt f, Elt g)
465 => (a -> b -> c -> d -> e -> f ->g)
466 -> Array a -> Array b -> Array c -> Array d -> Array e -> Array f -> Array g
467 {-# INLINE zipWith6 #-}
468 zipWith6 fn as bs cs ds es fs
469 = zipWith (\((a, b), c) ((d, e), f) -> fn a b c d e f)
470 (zip (zip as bs) cs)
471 (zip (zip ds es) fs)
472
473 -- | Apply a worker function to corresponding elements of seven arrays.
474 zipWith7 :: (Elt a, Elt b, Elt c, Elt d, Elt e, Elt f, Elt g, Elt h)
475 => (a -> b -> c -> d -> e -> f -> g ->h)
476 -> Array a -> Array b -> Array c -> Array d -> Array e -> Array f -> Array g
477 -> Array h
478 {-# INLINE zipWith7 #-}
479 zipWith7 fn as bs cs ds es fs gs
480 = zipWith (\((a, b), c) ((d, e), (f, g)) -> fn a b c d e f g)
481 (zip (zip as bs) cs)
482 (zip (zip ds es) (zip fs gs))
483
484 -- | Apply a worker function to corresponding elements of six arrays.
485 zipWith8 :: (Elt a, Elt b, Elt c, Elt d, Elt e, Elt f, Elt g, Elt h, Elt i)
486 => (a -> b -> c -> d -> e -> f -> g ->h -> i)
487 -> Array a -> Array b -> Array c -> Array d -> Array e -> Array f -> Array g
488 -> Array h -> Array i
489 {-# INLINE zipWith8 #-}
490 zipWith8 fn as bs cs ds es fs gs hs
491 = zipWith (\((a, b), (c, d)) ((e, f), (g, h)) -> fn a b c d e f g h)
492 (zip (zip as bs) (zip cs ds))
493 (zip (zip es fs) (zip gs hs))
494
495 {-# RULES
496
497 "zipWith/replicate" forall f m n x y.
498 zipWith f (replicate m x) (replicate n y) = replicate m (f x y)
499
500 "zipWith/plusInt0_1" forall n xs.
501 zipWith (+) (replicate n (GHC.Base.I# 0#)) xs = xs
502
503 "zipWith/plusInt0_2" forall n xs.
504 zipWith (+) xs (replicate n (GHC.Base.I# 0#)) = xs
505
506 "zipWith(plusInt)/enumFromStepLen" forall i1 k1 n1 i2 k2 n2.
507 zipWith ((+) :: Int -> Int -> Int)
508 (enumFromStepLen i1 k1 n1)
509 (enumFromStepLen i2 k2 n2)
510 = enumFromStepLen (i1+i2) (k1+k2) n1
511 #-}
512
513
514 -- == Locked ZipWiths =========================================================
515 -- | Apply a worker function to correponding elements of two arrays.
516 lockedZipWith :: (Elt a, Elt b, Elt c)
517 => (a -> b -> c) -> Array a -> Array b -> Array c
518 {-# INLINE_BACKEND lockedZipWith #-}
519
520
521 -- | Apply a worker function to corresponding elements of three arrays.
522 lockedZipWith3 :: (Elt a, Elt b, Elt c, Elt d)
523 => (a -> b -> c -> d) -> Array a -> Array b -> Array c -> Array d
524 {-# INLINE_BACKEND lockedZipWith3 #-}
525
526
527 -- | Apply a worker function to corresponding elements of four arrays.
528 lockedZipWith4 :: (Elt a, Elt b, Elt c, Elt d, Elt e)
529 => (a -> b -> c -> d -> e)
530 -> Array a -> Array b -> Array c -> Array d -> Array e
531 {-# INLINE_BACKEND lockedZipWith4 #-}
532
533
534 -- | Apply a worker function to corresponding elements of five arrays.
535 lockedZipWith5 :: (Elt a, Elt b, Elt c, Elt d, Elt e, Elt f)
536 => (a -> b -> c -> d -> e -> f)
537 -> Array a -> Array b -> Array c -> Array d -> Array e -> Array f
538 {-# INLINE_BACKEND lockedZipWith5 #-}
539
540
541 -- | Apply a worker function to corresponding elements of six arrays.
542 lockedZipWith6 :: (Elt a, Elt b, Elt c, Elt d, Elt e, Elt f, Elt g)
543 => (a -> b -> c -> d -> e -> f ->g)
544 -> Array a -> Array b -> Array c -> Array d -> Array e -> Array f -> Array g
545 {-# INLINE_BACKEND lockedZipWith6 #-}
546
547
548 -- | Apply a worker function to corresponding elements of seven arrays.
549 lockedZipWith7 :: (Elt a, Elt b, Elt c, Elt d, Elt e, Elt f, Elt g, Elt h)
550 => (a -> b -> c -> d -> e -> f -> g ->h)
551 -> Array a -> Array b -> Array c -> Array d -> Array e -> Array f -> Array g
552 -> Array h
553 {-# INLINE_BACKEND lockedZipWith7 #-}
554
555
556 -- | Apply a worker function to corresponding elements of six arrays.
557 lockedZipWith8 :: (Elt a, Elt b, Elt c, Elt d, Elt e, Elt f, Elt g, Elt h, Elt i)
558 => (a -> b -> c -> d -> e -> f -> g ->h -> i)
559 -> Array a -> Array b -> Array c -> Array d -> Array e -> Array f -> Array g
560 -> Array h -> Array i
561 {-# INLINE_BACKEND lockedZipWith8 #-}
562
563
564 -- Folds and Scans ============================================================
565 -- Scans ------------------------------
566 -- | Similar to `foldl` but return an array of the intermediate states, including
567 -- the final state that is computed by `foldl`.
568 scan :: Elt a => (a -> a -> a) -> a -> Array a -> Array a
569 {-# INLINE_BACKEND scan #-}
570
571
572 -- Fold -------------------------------
573 -- | Undirected fold over an array.
574 --
575 -- * The worker function must be associative.
576 --
577 -- * The provided starting element must be neutral with respect to the worker.
578 -- For example 0 is neutral wrt (+) and 1 is neutral wrt (*).
579 --
580 fold :: Elt a => (a -> a -> a) -> a -> Array a -> a
581 {-# INLINE_BACKEND fold #-}
582
583
584 -- | Undirected segmented fold.
585 --
586 -- All segments are folded individually, and the result contains one
587 -- element for each segment.
588 --
589 -- Same preconditions as `fold`.
590 fold_s :: Elt a => (a -> a -> a) -> a -> Segd -> Array a -> Array a
591 {-# INLINE_BACKEND fold_s #-}
592
593
594 -- | Undirected scattered segmented fold.
595 --
596 -- Like `fold_s`, but the segments can be scattered through an `Arrays`.
597 --
598 -- Same preconditions as `fold`.
599 fold_ss :: (Elts a, Elt a)
600 => (a -> a -> a) -> a -> SSegd -> Arrays a -> Array a
601 {-# INLINE_BACKEND fold_ss #-}
602
603
604 -- | Regular segmented fold.
605 --
606 -- All segements have the given length.
607 --
608 -- Same preconditions as `fold`.
609 fold_r :: Elt a => (a -> a -> a) -> a -> Int -> Array a -> Array a
610 {-# INLINE_BACKEND fold_r #-}
611
612
613 -- | Undirected fold over virtual segments.
614 --
615 -- The physical segments defined by the `VSegd` are folded individually,
616 -- and these results are replicated according to the virtual segment
617 -- id table of the `VSegd`. The result contains as many elements as there
618 -- virtual segments.
619 --
620 -- Same preconditions as `fold`.
621 fold_vs :: (Elts a, Elt a)
622 => (a -> a -> a) -> a -> VSegd -> Arrays a -> Array a
623 fold_vs f x vsegd arrs
624 = let -- Fold each physical segment individually
625 psegResults = fold_ss f x (takeSSegdOfVSegd vsegd) arrs
626
627 -- Replicate the physical results accorsing to the vsegids
628 in bpermute psegResults (takeVSegidsOfVSegd vsegd)
629 {-# INLINE_BACKEND fold_vs #-}
630
631
632 -- When we know the array data is manifest and/or contiguous then we want
633 -- to avoid using the extended information in the VSegd and SSegd types.
634 {-# RULES
635
636 "fold_ss/promoteSegdToSSegd" forall f x segd arr.
637 fold_ss f x (promoteSegdToSSegd segd) (singletons arr)
638 = fold_s f x segd arr
639
640 "fold_vs/promoteSegdToVSegd" forall f x segd arr.
641 fold_vs f x (promoteSegdToVSegd segd) (singletons arr)
642 = fold_s f x segd arr
643
644 "fold_vs/promoteSSegdToVSegd" forall f x ssegd arrs.
645 fold_vs f x (promoteSSegdToVSegd ssegd) arrs
646 = fold_ss f x ssegd arrs
647
648 #-}
649
650
651 -- Fold1 -------------------------------
652 -- | Undirected fold, using the first element to initialise the state.
653 --
654 -- * The worker function must be associative.
655 --
656 -- * The provided starting element must be neutral with respect to the worker.
657 -- For example 0 is neutral wrt (+) and 1 is neutral wrt (*).
658 --
659 -- * If the array contains no elements then you'll get a bounds check `error`.
660 --
661 fold1 :: Elt a => (a -> a -> a) -> Array a -> a
662 {-# INLINE_BACKEND fold1 #-}
663
664
665 -- | Like `fold_s`, but using the first element of each segment to initialise
666 -- the state of that segment.
667 --
668 -- Same preconditions as `fold1`.
669 fold1_s :: Elt a => (a -> a -> a) -> Segd -> Array a -> Array a
670 {-# INLINE_BACKEND fold1_s #-}
671
672
673 -- | Like `fold_ss`, but using the first element of each segment to intialise
674 -- the state of that segment.
675 --
676 -- Same preconditions as `fold1`.
677 fold1_ss :: (Elts a, Elt a)
678 => (a -> a -> a) -> SSegd -> Arrays a -> Array a
679 {-# INLINE_BACKEND fold1_ss #-}
680
681
682 -- | Like `fold_vs`, but using the first element of each segment to initialise
683 -- the state of that segment.
684 --
685 -- Same preconditions as `fold1`.
686 fold1_vs :: (Elts a, Elt a)
687 => (a -> a -> a) -> VSegd -> Arrays a -> Array a
688 fold1_vs f vsegd arrs
689 = let -- Fold each physical segment individually
690 psegResults = fold1_ss f (takeSSegdOfVSegd vsegd) arrs
691
692 -- Replicate the physical results accorsing to the vsegids
693 in bpermute psegResults (takeVSegidsOfVSegd vsegd)
694 {-# INLINE_BACKEND fold1_vs #-}
695
696
697 -- When we know the array data is manifest and/or contiguous then we want
698 -- to avoid using the extended information in the VSegd and SSegd types.
699 {-# RULES
700
701 "fold1_ss/promoteSegdToSSegd" forall f segd arr.
702 fold1_ss f (promoteSegdToSSegd segd) (singletons arr)
703 = fold1_s f segd arr
704
705 "fold1_vs/promoteSegdToVSegd" forall f segd arr.
706 fold1_vs f (promoteSegdToVSegd segd) (singletons arr)
707 = fold1_s f segd arr
708
709 "fold1_vs/promoteSSegdToVSegd" forall f ssegd arrs.
710 fold1_vs f (promoteSSegdToVSegd ssegd) arrs
711 = fold1_ss f ssegd arrs
712
713 #-}
714
715
716 -- Sums -------------------------------
717 -- | Same as @fold (+) 0@
718 sum :: (Num a, Elt a) => Array a -> a
719 {-# INLINE_BACKEND sum #-}
720
721 -- | Same as @fold_s (+) 0@
722 sum_s :: (Num a, Elt a) => Segd -> Array a -> Array a
723 sum_s = fold_s (Prelude.+) 0
724 {-# INLINE sum_s #-}
725
726 -- | Same as @fold_ss (+) 0@
727 sum_ss :: (Num a, Elts a, Elt a)
728 => SSegd -> Arrays a -> Array a
729 sum_ss = fold_ss (Prelude.+) 0
730 {-# INLINE sum_ss #-}
731
732 -- | Same as @fold_r (+) 0@
733 sum_r :: (Num a, Elt a) => Int -> Array a -> Array a
734 {-# INLINE_BACKEND sum_r #-}
735
736
737 -- Count ------------------------------
738 -- | Count the number of elements in array that are equal to the given value.
739 count :: (Elt a, Eq a) => Array a -> a -> Int
740 count xs !x = sum (map (tagToInt . fromBool . (==) x) xs)
741 {-# INLINE_BACKEND count #-}
742
743
744 -- | Segmented count.
745 count_s :: (Elt a, Eq a) => Segd -> Array a -> a -> Array Int
746 count_s segd xs !x
747 = sum_s segd (map (tagToInt . fromBool . (==) x) xs)
748 {-# INLINE_BACKEND count_s #-}
749
750
751 -- | Scattered segmented count.
752 --
753 -- NOTE: This is a transitory interface, and will be removed in future versions.
754 --- TODO: Make this take an `Arrays` instead of a Vector.
755 count_ss :: (Elt a, Eq a) => SSegd -> VV.Vector (Array a) -> a -> Array Int
756 {-# INLINE_BACKEND count_ss #-}
757 count_ss ssegd xs !x
758 = sum_ss ssegd (fromVectors $ VV.map (map (tagToInt . fromBool . (==) x)) xs)
759
760
761 -- And --------------------------------
762 -- | O(length source). Compute the conjunction of all elements in a boolean array.
763 and :: Array Bool -> Bool
764 {-# INLINE_BACKEND and #-}
765
766 {-# RULES
767
768 "seq/sum" forall xs e.
769 seq (sum xs) e = seq xs e
770
771 "seq/scan<Int> (+)" forall (i :: Int) xs e.
772 seq (scan (+) i xs) e = i `seq` xs `seq` e
773
774 "scan/replicate" forall (z :: Int) n x.
775 scan (+) z (replicate n x)
776 = enumFromStepLen z x n
777
778 "sum/replicate_rs" forall n xs.
779 sum (replicate_rs n xs) = sum xs * n
780
781 "count/replicate_s" forall segd xs tag.
782 count (replicate_s segd xs) tag
783 = sum (packByTag (lengthsSegd segd) xs tag)
784
785 "fold_s/replicate1" forall f z n idxs n' xs.
786 fold_s f z (mkSegd (replicate n (GHC.Base.I# 1#)) idxs n') xs = xs
787
788 "fold_s/replicate" forall f z m n idxs mn xs.
789 fold_s f z (mkSegd (replicate m n) idxs mn) xs
790 = fold_r f z n xs
791
792 "count/seq" forall xs x y. seq (count xs x) y = seq xs (seq x y)
793
794 #-}
795
796
797 -- Pack and Filter ============================================================
798 -- | O(length result).
799 -- Extract elements of an array where the associated flag is true.
800 pack :: Elt a => Array a -> Array Bool -> Array a
801 {-# INLINE_BACKEND pack #-}
802
803
804 -- | O(length result).
805 -- Select the elements of an array that have a corresponding tag.
806 --
807 -- @packByTag [12, 24, 42, 93] [1, 0, 0, 1] 0 = [24, 42]@
808 --
809 packByTag
810 :: Elt a
811 => Array a -- ^ data values
812 -> Array Tag -- ^ tag values
813 -> Tag -- ^ the tag of values to select
814 -> Array a -- ^ data values that had that tag
815
816 packByTag xs tags !tag
817 = fsts (filter (\p -> Prelude.snd p == tag) (zip xs tags))
818 {-# INLINE_BACKEND packByTag #-}
819
820
821 -- | Extract the elements from an array that match the given predicate.
822 filter :: Elt a => (a -> Bool) -> Array a -> Array a
823 {-# INLINE_BACKEND filter #-}
824
825 -- | Compute an array of flags indicating which elements match a given value.
826 --
827 -- @pick [4, 5, 3, 6, 5, 2, 5] 5 = [F, T, F, F, T, F, T]@
828 pick :: (Elt a, Eq a) => Array a -> a -> Array Bool
829 pick xs !x = map (x ==) xs
830 {-# INLINE pick #-}
831
832
833 {-# RULES
834
835 "tagZeroes" UNTIL_PHASE_BACKEND forall xs n.
836 map fromBool (zipWith GHC.Base.eqInt xs (replicate n (GHC.Base.I# 0#)))
837 = tagZeroes xs
838
839 "replicate_s/tagZeroes" forall lens idxs n.
840 replicate_s (mkSegd lens idxs n) (tagZeroes lens)
841 = replicate n 0
842
843 "packByTag/replicate" forall xs n t u.
844 packByTag xs (replicate n t) u = if t == u then xs else empty
845
846 "packByTag/bpermute" forall xs is tags n.
847 packByTag (bpermute xs is) tags n
848 = bpermute xs (packByTag is tags n)
849
850 #-}
851
852
853 -- Combine and Interleave =====================================================
854 -- | Combine two arrays,
855 -- using a flags array to tell us where to get each element from.
856 --
857 -- @combine [T, F, F, T, T, F] [1, 2, 3] [4, 5, 6] = [1, 4, 5, 2, 3, 6]@
858 combine :: Elt a => Array Bool -> Array a -> Array a -> Array a
859 {-# INLINE_BACKEND combine #-}
860
861
862 -- | Like `combine`, but use a precomputed selector to speed up the process.
863 --
864 -- See the description of `mkSel2` for how this works.
865 --
866 combine2 :: Elt a => Array Tag -> SelRep2 -> Array a -> Array a -> Array a
867 {-# INLINE_BACKEND combine2 #-}
868
869
870 -- | Interleave the elements of two arrays.
871 --
872 -- @interleave [1, 2, 3] [4, 5, 6] = [1, 4, 2, 5, 3, 6]@
873 interleave :: Elt a => Array a -> Array a -> Array a
874 {-# INLINE_BACKEND interleave #-}
875
876
877 -- Selectors ==================================================================
878 -- | O(1). Construct a selector.
879 --
880 -- A selector is a description of how to perform a `combine` operation.
881 --
882 -- Suppose we are evaluating the following expression:
883 --
884 -- @combine [F,F,T,F,T,T] [1,2,3] [4,5,6] = [4,5,1,6,2,3]@
885 --
886 -- This is difficult to parallelise. For each element in the result, the
887 -- source array we get this element from depends on the tag values associated
888 -- with all previous elements.
889 --
890 -- However, if we going to apply `combine` several times with the same flags array,
891 -- we can precompute a selector that tells us where to get each element.
892 -- The selector contains the original flags, as well as the source index telling
893 -- us where to get each element for the result array.
894 --
895 -- For example:
896 --
897 -- @tagsToIndices2 [F,F,T,F,T,T] -- tags
898 -- = [0,1,0,2,1,2] -- indices
899 -- @
900 --
901 -- This says get the first element from index 0 in the second array,
902 -- then from index 1 in the second array,
903 -- then index 0 in the first array ...
904 --
905 -- The selector then consists of both the @tag@ and @indices@ arrays.
906 --
907 mkSel2 :: Array Tag -- ^ Tags array.
908 -> Array Int -- ^ Indices array.
909 -> Int -- ^ Number of elements taken from first source array.
910 -> Int -- ^ Number of elements taken from second source array.
911 -> SelRep2 -- ^ Parallel selector representation.
912 -> Sel2
913 {-# INLINE CONLIKE PHASE_BACKEND mkSel2 #-}
914
915
916 -- | O(1). Yield the tags array of a selector.
917 tagsSel2 :: Sel2 -> Array Tag
918 {-# INLINE_BACKEND tagsSel2 #-}
919
920
921 -- | O(1). Yield the indices array of a selector.
922 indicesSel2 :: Sel2 -> Array Int
923 {-# INLINE_BACKEND indicesSel2 #-}
924
925
926 -- | O(1). Yield the number of elements that will be taken from the first array.
927 elementsSel2_0 :: Sel2 -> Int
928 {-# INLINE_BACKEND elementsSel2_0 #-}
929
930
931 -- | O(1). Yield the number of elements that will be taken from the second array.
932 elementsSel2_1 :: Sel2 -> Int
933 {-# INLINE_BACKEND elementsSel2_1 #-}
934
935
936 -- | O(1). Yield the parallel representation of a selector.
937 repSel2 :: Sel2 -> SelRep2
938 {-# INLINE_BACKEND repSel2 #-}
939
940
941 -- Selector Representations ===================================================
942 -- | O(n). Construct a parallel selector representation.
943 --
944 -- A `SelRep2` describes how to distribute the two data vectors
945 -- corresponding to a `Sel2` across several PEs.
946 --
947 -- Suppose we want to perform the following `combine` operation:
948 --
949 -- @
950 -- combine [F,F,T,T,F,T,F,F,T] [A0,A1,A2,A3,A4] [B0,B1,B2,B3]
951 -- = [A0,A1,B0,B1,A2,B2,A3,A4,B3]
952 -- @
953 --
954 -- The first array is the flags array, that says which of the data arrays to
955 -- get each successive element from. As `combine` is difficult to compute
956 -- in parallel, if we are going to perform several combines with the same
957 -- flags array, we can precompute a selector that tells us where to get each
958 -- element. The selector contains the original flags, as well as the source
959 -- index telling us where to get each element for the result array.
960 --
961 -- @
962 -- flags: [F,F,T,T,F,T,F,F,T]
963 -- indices: [0,1,0,1,2,2,3,4,3]
964 -- @
965 --
966 -- Suppose we want to distribute the combine operation across 3 PEs. It's
967 -- easy to split the selector like so:
968 --
969 -- @
970 -- PE0 PE1 PE2
971 -- flags: [F,F,T] [T,F,T] [F,F,T]
972 -- indices: [0,1,0] [1,2,2] [3,4,3]
973 -- @
974 --
975 -- We now need to split the two data arrays. Each PE needs slices of the data
976 -- arrays that correspond to the parts of the selector that were given to it.
977 -- For the current example we get:
978 --
979 -- @
980 -- PE0 PE1 PE2
981 -- data_A: [A0,A1] [A2] [A3,A4]
982 -- data_B: [B0] [B1,B2] [B3]
983 -- @
984 --
985 -- The `SelRep2` contains the starting index and length of each of of these
986 -- slices:
987 --
988 -- @
989 -- PE0 PE1 PE2
990 -- ((0, 0), (2, 1)) ((2, 1), (1, 2)) ((3, 3), (2, 1))
991 -- indices lens indices lens indices lens
992 -- @
993
994 mkSelRep2 :: Array Tag -> SelRep2
995 {-# INLINE CONLIKE PHASE_BACKEND mkSelRep2 #-}
996
997
998 -- | O(1). Take the `indices` field from a `SelRep2`.
999 indicesSelRep2 :: Array Tag -> SelRep2 -> Array Int
1000 {-# INLINE_BACKEND indicesSelRep2 #-}
1001
1002
1003 -- | O(1). Yield the number of elements to take from the first array.
1004 elementsSelRep2_0 :: Array Tag -> SelRep2 -> Int
1005 {-# INLINE_BACKEND elementsSelRep2_0 #-}
1006
1007
1008 -- | O(1). Yield the number of elements to take from the second array.
1009 elementsSelRep2_1 :: Array Tag -> SelRep2 -> Int
1010 {-# INLINE_BACKEND elementsSelRep2_1 #-}
1011
1012
1013 -- | O(n). Compute a selector from a tags array.
1014 tagsToSel2 :: Array Tag -> Sel2
1015 tagsToSel2 tags
1016 = let rep = mkSelRep2 tags
1017 in mkSel2 tags (indicesSelRep2 tags rep)
1018 (elementsSelRep2_0 tags rep)
1019 (elementsSelRep2_1 tags rep)
1020 rep
1021 {-# INLINE tagsToSel2 #-}
1022
1023
1024 {-# RULES
1025
1026 "tagsSel2/mkSel2"
1027 forall ts is n0 n1 r. tagsSel2 (mkSel2 ts is n0 n1 r) = ts
1028
1029 "indicesSel2/mkSel2"
1030 forall ts is n0 n1 r. indicesSel2 (mkSel2 ts is n0 n1 r) = is
1031
1032 "elementsSel2_0/mkSel2"
1033 forall ts is n0 n1 r. elementsSel2_0 (mkSel2 ts is n0 n1 r) = n0
1034
1035 "elementsSel2_1/mkSel2"
1036 forall ts is n0 n1 r. elementsSel2_1 (mkSel2 ts is n0 n1 r) = n1
1037
1038 "repSel2/mkSel2"
1039 forall ts is n0 n1 r. repSel2 (mkSel2 ts is n0 n1 r) = r
1040
1041 #-}
1042
1043
1044 -- Segment Descriptors ========================================================
1045 -- | O(max(segs, threads) . log segs). Construct a segment descriptor.
1046 --
1047 -- A segment desciptor defines an irregular 2D array based on a flat, 1D array
1048 -- of elements. The defined array is a nested array of segments, where every
1049 -- segment covers some of the elements from the flat array.
1050 --
1051 -- * The starting indices must be equal to @init (scanl (+) 0 lengths)@
1052 --
1053 -- * If you don't want to cover all the elements from the flat arrary then
1054 -- use a `SSegd` instead.
1055 --
1056 -- Example:
1057 --
1058 -- @
1059 -- flat array data: [1 2 3 4 5 6 7 8]
1060 -- (segmentation) --- ----- - ---
1061 -- segd lengths: [2, 3, 1, 2]
1062 -- indices: [0, 2, 5, 6]
1063 -- elements: 8
1064 -- @
1065 ---
1066 -- * This ensures that the indices are monotonically increasing,
1067 -- and all elements from the flat array are covered by some segment.
1068 --
1069 -- * We need this because in the implementation of `lengthsToSegd`, we binary
1070 -- search the indices to determine which segment an element of the
1071 -- flat array belongs to. It also means that the segment indices can always
1072 -- be reconstructed from the segment lengths by `lengthsToSegd`.
1073 --
1074 --
1075 mkSegd :: Array Int -- ^ (lengths) Segment lengths.
1076 -> Array Int -- ^ (indices) Segment indices.
1077 -> Int -- ^ Total number of elements.
1078 -> Segd
1079 {-# INLINE CONLIKE PHASE_BACKEND mkSegd #-}
1080
1081
1082 -- | Check whether a `Segd` is well formed.
1083 validSegd :: Segd -> Bool
1084 {-# NOINLINE validSegd #-}
1085 -- NOINLINE because it's only used during debugging.
1086
1087
1088 -- | O(1). Construct an empty `Segd`.
1089 emptySegd :: Segd
1090 {-# INLINE_BACKEND emptySegd #-}
1091
1092
1093 -- | O(1). Construct a `Segd` containing a single segment of the given length.
1094 singletonSegd :: Int -> Segd
1095 {-# INLINE_BACKEND singletonSegd #-}
1096
1097
1098 -- | O(max(segs, threads) . log segs).
1099 -- Construct a `Segd` from an array of segment lengths.
1100 lengthsToSegd :: Array Int -> Segd
1101 lengthsToSegd ns = mkSegd ns (scan (+) 0 ns) (sum ns)
1102 {-# INLINE_BACKEND lengthsToSegd #-}
1103
1104
1105 -- | O(1). Yield the length of a `Segd`.
1106 lengthSegd :: Segd -> Int
1107 {-# INLINE_BACKEND lengthSegd #-}
1108
1109
1110 -- | O(1). Yield the segment lengths of a `Segd`.
1111 lengthsSegd :: Segd -> Array Int
1112 {-# INLINE_BACKEND lengthsSegd #-}
1113
1114
1115 -- | O(1). Yield the segment starting indices of a `Segd`.
1116 indicesSegd :: Segd -> Array Int
1117 {-# INLINE_BACKEND indicesSegd #-}
1118
1119
1120 -- | O(1). Yield the total number of elements defined by a `Segd`.
1121 elementsSegd :: Segd -> Int
1122 {-# INLINE_BACKEND elementsSegd #-}
1123
1124 -- | O(max(segs, threads) . log segs).
1125 -- Add the lengths of corresponding segments in two descriptors.
1126 --
1127 -- @plusSegd [lens: 2 3 1] [lens: 3 1 1] = [lens: 5 4 2]@
1128 --
1129 plusSegd :: Segd -> Segd -> Segd
1130 plusSegd segd1 segd2
1131 = mkSegd (zipWith (+) (lengthsSegd segd1) (lengthsSegd segd2))
1132 (zipWith (+) (indicesSegd segd1) (indicesSegd segd2))
1133 (elementsSegd segd1 `dph_plus` elementsSegd segd2)
1134 {-# INLINE_BACKEND plusSegd #-}
1135
1136
1137 {-# RULES
1138
1139 "lengthsSegd/mkSegd" forall lens idxs n.
1140 lengthsSegd (mkSegd lens idxs n) = lens
1141
1142 "indicesSegd/mkSegd" forall lens idxs n.
1143 indicesSegd (mkSegd lens idxs n) = idxs
1144
1145 "elementsSegd/mkSegd" forall lens idxs n.
1146 elementsSegd (mkSegd lens idxs n) = n
1147
1148 "seq/elementsSegd" forall segd e.
1149 seq (elementsSegd segd) e = seq segd e
1150
1151 "seq/mkSegd" forall lens idxs n e.
1152 seq (mkSegd lens idxs n) e = lens `seq` idxs `seq` n `seq` e
1153
1154 #-}
1155
1156 -- Scattered Segment Descriptors ==============================================
1157 -- | Construct a Scattered Segment Descriptor.
1158 --
1159 -- A `SSegd` is an extension of a `Segd` that that allows the segments to be
1160 -- scattered through multiple flat arrays.
1161 --
1162 -- Each segment is associated with a source id that indicates what
1163 -- flat array it is in, along with the starting index in that flat array.
1164 --
1165 -- * The segments need not cover the entire flat array.
1166 --
1167 -- * Different segments may point to the same elements.
1168 --
1169 mkSSegd :: Array Int -- ^ (starts) Starting index of each segment within its flat array.
1170 -> Array Int -- ^ (sources) Source id of flat array to get each segment from.
1171 -> Segd -- ^ Plain segment descriptor giving the lengths
1172 -- of the segments.
1173 -> SSegd
1174 {-# INLINE CONLIKE PHASE_BACKEND mkSSegd #-}
1175
1176
1177 -- | Check whether a `Segd` is well formed.
1178 validSSegd :: SSegd -> Bool
1179 {-# NOINLINE validSSegd #-}
1180 -- NOINLINE because it's only used during debugging.
1181
1182
1183 -- | O(1). Construct an empty `SSegd`.
1184 emptySSegd :: SSegd
1185 {-# INLINE_BACKEND emptySSegd #-}
1186
1187
1188 -- | O(1). Construct a `Segd` containing a single segment of the given length.
1189 singletonSSegd :: Int -> SSegd
1190 {-# INLINE_BACKEND singletonSSegd #-}
1191
1192
1193 -- | O(segs). Promote a `Segd` to a `SSegd`,
1194 -- assuming all segments are contiguous and come from a single array.
1195 promoteSegdToSSegd :: Segd -> SSegd
1196 {-# INLINE_BACKEND promoteSegdToSSegd #-}
1197
1198
1199 -- | O(1). True when a `SSegd` has been constructed by promoting a `SSegd`.
1200 --
1201 -- In this case all the data elements are in one contiguous flat
1202 -- array, and consumers can avoid looking at the real starts and
1203 -- sources fields.
1204 isContiguousSSegd :: SSegd -> Bool
1205 {-# INLINE_BACKEND isContiguousSSegd #-}
1206
1207
1208 -- | O(1). Yield the length of a `SSegd`.
1209 lengthOfSSegd :: SSegd -> Int
1210 {-# INLINE_BACKEND lengthOfSSegd #-}
1211
1212
1213 -- | O(1). Yield the segment lengths of a `SSegd`.
1214 lengthsOfSSegd :: SSegd -> Array Int
1215 {-# INLINE_BACKEND lengthsOfSSegd #-}
1216
1217
1218 -- | O(1). Yield the indices field of a `SSegd`.
1219 indicesOfSSegd :: SSegd -> Array Int
1220 {-# INLINE_BACKEND indicesOfSSegd #-}
1221
1222
1223 -- | O(1). Yield the starts field of a `SSegd`.
1224 startsOfSSegd :: SSegd -> Array Int
1225 {-# INLINE_BACKEND startsOfSSegd #-}
1226
1227
1228 -- | O(1). Yield the sources field of a `SSegd`.
1229 sourcesOfSSegd :: SSegd -> Array Int
1230 {-# INLINE_BACKEND sourcesOfSSegd #-}
1231
1232
1233 -- | O(1). Get the length, segment index, starting index, and source id of a segment.
1234 getSegOfSSegd :: SSegd -> Int -> (Int, Int, Int, Int)
1235 {-# INLINE_BACKEND getSegOfSSegd #-}
1236
1237
1238 -- | Produce a segment descriptor that describes the result of appending two
1239 -- segmented arrays.
1240 appendSSegd :: SSegd -> Int -> SSegd -> Int -> SSegd
1241 {-# INLINE_BACKEND appendSSegd #-}
1242
1243
1244 -- Virtual Segment descriptors ================================================
1245 -- | Construct a Virtual Segment Descriptor.
1246 --
1247 -- A `VSegd` is an extension of a `SSegd` that allows data from the underlying
1248 -- flat array to be shared between segments. For example, you can define an array
1249 -- of 10 virtual segments that all have the same length and elements as a
1250 -- single physical segment.
1251 --
1252 -- * Internally we maintain the invariant that all physical segments must be
1253 -- reachable by some virtual segment. This is needed to ensure that operations
1254 -- such as `fold_ss` segmented fold have the right complexity.
1255 --
1256 -- * If you don't need the invariant then you can sidestep the code that
1257 -- maintains it by using the redundant versions of the following operators,
1258 -- and sometimes get faster code.
1259 --
1260 mkVSegd :: Array Int -- ^ (vsegids) Mapping from virtual to physical segments.
1261 -> SSegd -- ^ Scattered Segment descriptor defining the
1262 -- physical segments.
1263 -> VSegd
1264 {-# INLINE_BACKEND mkVSegd #-}
1265
1266
1267 -- | Check whether a `Segd` is well formed.
1268 validVSegd :: VSegd -> Bool
1269 {-# NOINLINE validVSegd #-}
1270 -- NOINLINE because it's only used during debugging.
1271
1272
1273 -- | O(1). Construct an empty `SSegd`.
1274 emptyVSegd :: VSegd
1275 {-# INLINE_BACKEND emptyVSegd #-}
1276
1277
1278 -- | O(1). Construct a `VSegd` containing a single segment of the given length.
1279 singletonVSegd :: Int -> VSegd
1280 {-# INLINE_BACKEND singletonVSegd #-}
1281
1282
1283 -- | O(len). Construct a `VSegd` that describes an array where all virtual
1284 -- segments point to the same physical segment.
1285 replicatedVSegd
1286 :: Int -- ^ Length of segment.
1287 -> Int -- ^ Number of times replicated.
1288 -> VSegd
1289 {-# INLINE_BACKEND replicatedVSegd #-}
1290
1291
1292 -- | O(segs). Promote a plain `Segd` to a `VSegd`.
1293 --
1294 -- The result contains one virtual segment for every physical segment
1295 -- the provided `Segd`.
1296 promoteSegdToVSegd :: Segd -> VSegd
1297 {-# INLINE CONLIKE PHASE_BACKEND promoteSegdToVSegd #-}
1298
1299
1300 -- | O(segs). Promote a plain `SSegd` to a `VSegd`.
1301 --
1302 -- The result contains one virtual segment for every physical segment
1303 -- the provided `SSegd`.
1304 promoteSSegdToVSegd :: SSegd -> VSegd
1305 {-# INLINE CONLIKE PHASE_BACKEND promoteSSegdToVSegd #-}
1306
1307
1308 -- | O(1). If true then the segments are all unshared, and the @vsegids@ field
1309 -- be just @[0..len-1]@.
1310 --
1311 -- Consumers can check this field to avoid demanding the @vsegids@ field.
1312 -- This can avoid the need for it to be constructed in the first place,
1313 -- due to lazy evaluation.
1314 isManifestVSegd :: VSegd -> Bool
1315 {-# INLINE_BACKEND isManifestVSegd #-}
1316
1317
1318 -- | O(1). If true then the @starts@ field is identical to the @indices@ field
1319 -- and the sourceids are all 0s.
1320 --
1321 -- In this case all the data elements are in one contiguous flat array, and
1322 -- consumers can avoid looking at the real starts and sources fields.
1323 isContiguousVSegd :: VSegd -> Bool
1324 {-# INLINE_BACKEND isContiguousVSegd #-}
1325
1326
1327 -- | O(1). Yield the length of a `VSegd`.
1328 lengthOfVSegd :: VSegd -> Int
1329 {-# INLINE_BACKEND lengthOfVSegd #-}
1330
1331
1332 -- | O(1). Yield the vsegids of a `VSegd`.
1333 takeVSegidsOfVSegd :: VSegd -> Array Int
1334 {-# INLINE_BACKEND takeVSegidsOfVSegd #-}
1335
1336
1337 -- | O(1). Yield the vsegids of a `VSegd`, but don't require that every physical
1338 -- segment is referenced by some virtual segment.
1339 --
1340 -- If you're just performing indexing and don't need the invariant that all
1341 -- physical segments are reachable from some virtual segment, then use this
1342 -- version as it's faster. This sidesteps the code that maintains the invariant.
1343 --
1344 -- The stated O(1) complexity assumes that the array has already been fully
1345 -- evalauted. If this is not the case then we can avoid demanding the result
1346 -- of a prior computation on the @vsegids@, thus reducing the cost attributed
1347 -- to that prior computation.
1348 --
1349 takeVSegidsRedundantOfVSegd :: VSegd -> Array Int
1350 {-# INLINE_BACKEND takeVSegidsRedundantOfVSegd #-}
1351
1352
1353 -- | O(1). Yield the `SSegd` of a `VSegd`.
1354 takeSSegdOfVSegd :: VSegd -> SSegd
1355 {-# INLINE_BACKEND takeSSegdOfVSegd #-}
1356
1357
1358 -- | O(1). Yield the `SSegd` of a `VSegd`, but don't require that every physical
1359 -- segment is referenced by some virtual segment.
1360 --
1361 -- See the note in `takeVSegidsRedundantOfVSegd`.
1362 takeSSegdRedundantOfVSegd :: VSegd -> SSegd
1363 {-# INLINE_BACKEND takeSSegdRedundantOfVSegd #-}
1364
1365
1366 -- | O(1). Yield the segment lengths of a `VSegd`.
1367 takeLengthsOfVSegd :: VSegd -> Array Int
1368 {-# INLINE_BACKEND takeLengthsOfVSegd #-}
1369
1370
1371 -- | O(1). Get the length, starting index, and source id of a segment.
1372 getSegOfVSegd :: VSegd -> Int -> (Int, Int, Int)
1373 {-# INLINE_BACKEND getSegOfVSegd #-}
1374
1375
1376 -- | O(segs).
1377 -- Yield a `SSegd` that describes each segment of a `VSegd` individually.
1378 --
1379 -- By doing this we lose information about which virtual segments
1380 -- correspond to the same physical segments.
1381 --
1382 -- /WARNING/: Trying to take the `SSegd` of a nested array that has been
1383 -- constructed with replication can cause index space overflow. This is
1384 -- because the virtual size of the corresponding flat data can be larger
1385 -- than physical memory. If this happens then indices fields and
1386 -- element count in the result will be invalid.
1387 --
1388 unsafeDemoteToSSegdOfVSegd :: VSegd -> SSegd
1389 {-# INLINE_BACKEND unsafeDemoteToSSegdOfVSegd #-}
1390
1391
1392 -- | O(segs). Yield a `Segd` that describes each segment of a `VSegd` individually.
1393 --
1394 -- By doing this we lose information about which virtual segments
1395 -- correspond to the same physical segments.
1396 --
1397 -- See the warning in `unsafeDemoteToSSegdOfVSegd`.
1398 unsafeDemoteToSegdOfVSegd :: VSegd -> Segd
1399 {-# INLINE_BACKEND unsafeDemoteToSegdOfVSegd #-}
1400
1401
1402 -- | Update the @vsegids@ of a `VSegd`, and then cull the physical
1403 -- segment descriptor so that all physical segments are reachable from
1404 -- some virtual segment.
1405 --
1406 updateVSegsOfVSegd :: (Array Int -> Array Int) -> VSegd -> VSegd
1407 {-# INLINE_BACKEND updateVSegsOfVSegd #-}
1408
1409
1410 -- | Update the @vsegids@ of `VSegd`, where the result is guaranteed to
1411 -- cover all physical segments.
1412 --
1413 -- Using this version avoids performing the 'cull' operation which
1414 -- discards unreachable physical segments.
1415 --
1416 -- * The resulting vsegids must cover all physical segments.
1417 -- If they do not then there will be physical segments that are not
1418 -- reachable from some virtual segment, and subsequent operations
1419 -- like @fold_ss@ will have the wrong work complexity.
1420 --
1421 updateVSegsReachableOfVSegd :: (Array Int -> Array Int) -> VSegd -> VSegd
1422 {-# INLINE_BACKEND updateVSegsReachableOfVSegd #-}
1423
1424
1425 -- | Produce a virtual segment descriptor that describes the result of
1426 -- appending two segmented arrays.
1427 appendVSegd
1428 :: VSegd -- ^ Descriptor of first array.
1429 -> Int -- ^ Number of flat physical arrays for first descriptor.
1430 -> VSegd -- ^ Descriptor of second array.
1431 -> Int -- ^ Number of flat physical arrays for second descriptor.
1432 -> VSegd
1433 {-# INLINE_BACKEND appendVSegd #-}
1434
1435
1436 -- | Combine two virtual segment descriptors.
1437 combine2VSegd
1438 :: Sel2 -- ^ Selector for the combine operation.
1439 -> VSegd -- ^ Descriptor of first array.
1440 -> Int -- ^ Number of flat physical arrays for first descriptor.
1441 -> VSegd -- ^ Descriptor of second array.
1442 -> Int -- ^ Number of flat physical arrays for second descriptor.
1443 -> VSegd
1444 {-# INLINE_BACKEND combine2VSegd #-}
1445
1446 {-# RULES
1447
1448 "updateVSegsOfVSegd/updateVSegsOfVSegd"
1449 forall f g vsegd
1450 . updateVSegsOfVSegd f (updateVSegsOfVSegd g vsegd)
1451 = updateVSegsOfVSegd (f . g) vsegd
1452
1453 "updateVSegsOfVSegd/replicate_s/replicateVSegd"
1454 forall segd len reps
1455 . updateVSegsOfVSegd (replicate_s segd) (replicatedVSegd len reps)
1456 = replicatedVSegd len (elementsSegd segd)
1457
1458 #-}
1459
1460
1461 -- Irregular 2D arrays --------------------------------------------------------
1462
1463 -- | O(1). Construct an empty `Arrays` with no elements.
1464 emptys :: Arrays a
1465 {-# INLINE_BACKEND emptys #-}
1466
1467
1468 -- | O(1). Construct an `Arrays` consisting of a single `Array`.
1469 singletons :: (Elt a, Elts a) => Array a -> Arrays a
1470 {-# INLINE_BACKEND singletons #-}
1471
1472
1473 -- | O(1). Yield the number of `Array` in an `Arrays`.
1474 lengths :: Elts a => Arrays a -> Int
1475 {-# INLINE_BACKEND lengths #-}
1476
1477
1478 -- | O(1). Take one of the outer `Array` from an `Arrays`.
1479 unsafeIndexs :: (Elt a, Elts a) => Arrays a -> Int -> Array a
1480 {-# INLINE_BACKEND unsafeIndexs #-}
1481
1482
1483 -- | O(1). Retrieve a single element from an `Arrays`,
1484 -- given the outer and inner indices.
1485 unsafeIndex2s :: (Elt a, Elts a) => Arrays a -> Int -> Int -> a
1486 {-# INLINE_BACKEND unsafeIndex2s #-}
1487
1488
1489 -- | O(n). Append two `Arrays`, using work proportional to the length
1490 -- of the outer array.
1491 appends :: (Elt a, Elts a) => Arrays a -> Arrays a -> Arrays a
1492 {-# INLINE_BACKEND appends #-}
1493
1494
1495 -- | O(number of inner arrays).
1496 -- Convert a boxed vector of `Array` to an `Arrays`.
1497 fromVectors :: (Elt a, Elts a) => VV.Vector (Array a) -> Arrays a
1498 {-# INLINE_BACKEND fromVectors #-}
1499
1500
1501 -- | O(number of inner arrays).
1502 -- Convert an `Arrays` to a boxed vector of `Array`.
1503 toVectors :: (Elt a, Elts a) => Arrays a -> VV.Vector (Array a)
1504 {-# INLINE_BACKEND toVectors #-}
1505
1506
1507 -- Random Arrays --------------------------------------------------------------
1508 -- | Generate an array of the given length full of random data.
1509 -- Good for testing.
1510 randoms :: (Elt a, System.Random.Random a, System.Random.RandomGen g)
1511 => Int -> g -> Array a
1512 {-# INLINE_BACKEND randoms #-}
1513
1514 -- | Generate an array of the given length full of random data.
1515 -- Good for testing.
1516 randomRs :: (Elt a, System.Random.Random a, System.Random.RandomGen g)
1517 => Int -> (a,a) -> g -> Array a
1518 {-# INLINE_BACKEND randomRs #-}
1519
1520
1521 -- Array IO -------------------------------------------------------------------
1522 instance IOElt Int
1523 instance IOElt Double
1524 instance (IOElt a, IOElt b) => IOElt (a, b)
1525
1526
1527 -- | Write an array to a file.
1528 hPut :: IOElt a => Handle -> Array a -> IO ()
1529 {-# INLINE_BACKEND hPut #-}
1530
1531
1532 -- | Read an array from a file.
1533 hGet :: IOElt a => Handle -> IO (Array a)
1534 {-# INLINE_BACKEND hGet #-}
1535
1536
1537 -- | Convert an array to a list of elements.
1538 toList :: Elt a => Array a -> [a]
1539 {-# INLINE_BACKEND toList #-}
1540
1541
1542 -- | Convert a list of elements to an array.
1543 fromList :: Elt a => [a] -> Array a
1544 {-# INLINE_BACKEND fromList #-}
1545
1546
1547 -- Aliases for primitive operations -------------------------------------------
1548 -- We rename these so we can write rules based on the names, and still
1549 -- control exactly when they get inlined.
1550
1551 dph_mod_index :: Int -> Int -> Int
1552 dph_mod_index by idx = idx `GHC.Base.remInt` by
1553 {-# INLINE_BACKEND dph_mod_index #-}
1554
1555 dph_plus :: Int -> Int -> Int
1556 dph_plus x y = x Prelude.+ y
1557 {-# INLINE_BACKEND dph_plus #-}
1558
1559 {-# RULES
1560
1561 "dph_plus" forall m n.
1562 dph_plus (GHC.Base.I# m) (GHC.Base.I# n) = GHC.Base.I# m Prelude.+ GHC.Base.I# n
1563
1564 #-}
1565
1566
1567 tagZeroes :: Array Int -> Array Tag
1568 tagZeroes xs = map (\x -> fromBool (x==0)) xs
1569 {-# INLINE CONLIKE PHASE_BACKEND tagZeroes #-}
1570