From c3594778c6efed9a521007cbdc98dad56f001689 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Donnacha=20Ois=C3=ADn=20Kidney?=
Date: Thu, 11 Jan 2018 20:13:41 +0000
Subject: [PATCH] Added notes on skew-heap attempt (#490)
* Added notes on skew-heap attempt
* added reference to notes in comments
---
Data/Sequence/Internal.hs | 6 +++
Data/Sequence/sorting.md | 100 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 106 insertions(+)
create mode 100644 Data/Sequence/sorting.md
diff --git a/Data/Sequence/Internal.hs b/Data/Sequence/Internal.hs
index 71f4126..fc78701 100644
--- a/Data/Sequence/Internal.hs
+++ b/Data/Sequence/Internal.hs
@@ -4418,6 +4418,9 @@ zipWith4 f s1 s2 s3 s4 = zipWith' ($) (zipWith3' f s1' s2' s3') s4'
--
-- mail@doisinkidney.com, 4/30/17
------------------------------------------------------------------------
+-- Further notes are available in the file sorting.md (in this
+-- directory).
+------------------------------------------------------------------------
-- | \( O(n \log n) \). 'sort' sorts the specified 'Seq' by the natural
-- ordering of its elements. The sort is stable.
@@ -4436,6 +4439,9 @@ sortBy cmp xs = fromList2 (length xs) (Data.List.sortBy cmp (toList xs))
-- | \( O(n \log n) \). 'unstableSort' sorts the specified 'Seq' by
-- the natural ordering of its elements, but the sort is not stable.
-- This algorithm is frequently faster and uses less memory than 'sort'.
+
+-- Notes on the implementation and choice of heap are available in
+-- the file sorting.md (in this directory).
unstableSort :: Ord a => Seq a -> Seq a
unstableSort = unstableSortBy compare
diff --git a/Data/Sequence/sorting.md b/Data/Sequence/sorting.md
new file mode 100644
index 0000000..86f53a2
--- /dev/null
+++ b/Data/Sequence/sorting.md
@@ -0,0 +1,100 @@
+# Sorting
+
+Data.Sequence exports two methods of sorting: stable and unstable. The stable sort is simply a call to Data.List.Sort, whereas the unstable sort constructs a pairing heap, and uses it to perform heap sort.
+
+The pairing heap seems to particularly suit the structure of the finger tree, as other heaps have not managed to beat it. Specifically, when compared to a skew heap:
+
+```haskell
+unstableSortBy :: (a -> a -> Ordering) -> Seq a -> Seq a
+unstableSortBy cmp (Seq xs) =
+ execState (replicateA (size xs) (popMin cmp)) (toSkew cmp (Seq xs))
+
+data Skew a = Nil | Br a !(Skew a) !(Skew a)
+
+popMin :: (e -> e -> Ordering) -> State (Skew e) e
+popMin cmp = State unrollPQ'
+ where
+ {-# INLINE unrollPQ' #-}
+ unrollPQ' (Br x ls rs) = (mergeSkew cmp ls rs, x)
+
+toSkew :: (e -> e -> Ordering) -> Seq e -> Skew e
+toSkew cmp (Seq xs') = toSkewTree cmp (\(Elem a) -> Br a Nil Nil) xs'
+ where
+ toSkewTree :: (b -> b -> Ordering) -> (a -> Skew b) -> FingerTree a -> Skew b
+ toSkewTree _ _ EmptyT = Nil
+ toSkewTree _ f (Single xs) = f xs
+ toSkewTree cmp f (Deep n pr m sf) = pr' <+> sf' <+> m'
+ where
+ pr' = toSkewDigit cmp f pr
+ sf' = toSkewDigit cmp f sf
+ m' = toSkewTree cmp (toSkewNode cmp f) m
+ (<+>) = mergeSkew cmp
+ toSkewDigit :: (b -> b -> Ordering) -> (a -> Skew b) -> Digit a -> Skew b
+ toSkewDigit cmp f dig =
+ case dig of
+ One a -> f a
+ Two a b -> f a <+> f b
+ Three a b c -> f a <+> f b <+> f c
+ Four a b c d -> (f a <+> f b) <+> (f c <+> f d)
+ where
+ (<+>) = mergeSkew cmp
+ toSkewNode cmp f node =
+ case node of
+ Node2 _ a b -> f a <+> f b
+ Node3 _ a b c -> f a <+> f b <+> f c
+ where
+ (<+>) = mergeSkew cmp
+
+mergeSkew :: (a -> a -> Ordering) -> Skew a -> Skew a -> Skew a
+mergeSkew cmp Nil ys = ys
+mergeSkew cmp xs Nil = xs
+mergeSkew cmp h1@(Br x lx rx) h2@(Br y ly ry)
+ | cmp x y == GT = Br y (mergeSkew cmp h1 ry) ly
+ | otherwise = Br x (mergeSkew cmp h2 rx) lx
+```
+
+The pairing heap implementation is faster in every aspect:
+
+```
+benchmarking 1000000/unsorted/pairing
+time 2.005 s (NaN s .. 2.102 s)
+ 1.000 RÂ² (0.998 RÂ² .. 1.000 RÂ²)
+mean 2.069 s (2.060 s .. 2.075 s)
+std dev 9.340 ms (0.0 s .. 10.67 ms)
+variance introduced by outliers: 19% (moderately inflated)
+
+benchmarking 1000000/unsorted/skew
+time 2.042 s (1.637 s .. 2.267 s)
+ 0.995 RÂ² (0.990 RÂ² .. NaN RÂ²)
+mean 2.165 s (2.065 s .. 2.217 s)
+std dev 87.10 ms (0.0 s .. 91.26 ms)
+variance introduced by outliers: 19% (moderately inflated)
+
+benchmarking 1000000/ascending/pairing
+time 191.4 ms (187.8 ms .. 193.5 ms)
+ 1.000 RÂ² (0.999 RÂ² .. 1.000 RÂ²)
+mean 197.0 ms (194.7 ms .. 200.0 ms)
+std dev 3.221 ms (2.441 ms .. 3.924 ms)
+variance introduced by outliers: 14% (moderately inflated)
+
+benchmarking 1000000/ascending/skew
+time 232.3 ms (227.0 ms .. 238.9 ms)
+ 0.999 RÂ² (0.997 RÂ² .. 1.000 RÂ²)
+mean 233.9 ms (230.6 ms .. 236.2 ms)
+std dev 3.678 ms (2.790 ms .. 4.777 ms)
+variance introduced by outliers: 14% (moderately inflated)
+
+benchmarking 1000000/descending/pairing
+time 204.6 ms (190.2 ms .. 214.1 ms)
+ 0.998 RÂ² (0.991 RÂ² .. 1.000 RÂ²)
+mean 208.4 ms (204.1 ms .. 210.6 ms)
+std dev 4.051 ms (1.299 ms .. 5.288 ms)
+variance introduced by outliers: 14% (moderately inflated)
+
+benchmarking 1000000/descending/skew
+time 229.9 ms (212.7 ms .. 240.1 ms)
+ 0.998 RÂ² (0.996 RÂ² .. 1.000 RÂ²)
+mean 238.8 ms (231.3 ms .. 241.4 ms)
+std dev 5.006 ms (269.0 Î¼s .. 6.151 ms)
+variance introduced by outliers: 16% (moderately inflated)
+```
--
1.9.1