5b84b37a4073d2ec9d0468a3a79f15f008970d54
[packages/containers.git] / Data / Sequence / Internal / Sorting.hs
1 {-# LANGUAGE BangPatterns #-}
2
3 {-# OPTIONS_HADDOCK not-home #-}
4
5 -- |
6 --
7 -- = WARNING
8 --
9 -- This module is considered __internal__.
10 --
11 -- The Package Versioning Policy __does not apply__.
12 --
13 -- The contents of this module may change __in any way whatsoever__
14 -- and __without any warning__ between minor versions of this package.
15 --
16 -- Authors importing this module are expected to track development
17 -- closely.
18 --
19 -- = Description
20 --
21 -- This module provides the various sorting implementations for
22 -- "Data.Sequence". Further notes are available in the file sorting.md
23 -- (in this directory).
24
25 module Data.Sequence.Internal.Sorting
26 (
27 -- * Sort Functions
28 sort
29 ,sortBy
30 ,sortOn
31 ,unstableSort
32 ,unstableSortBy
33 ,unstableSortOn
34 ,
35 -- * Heaps
36 -- $heaps
37 Queue(..)
38 ,QList(..)
39 ,IndexedQueue(..)
40 ,IQList(..)
41 ,TaggedQueue(..)
42 ,TQList(..)
43 ,IndexedTaggedQueue(..)
44 ,ITQList(..)
45 ,
46 -- * Merges
47 -- $merges
48 mergeQ
49 ,mergeIQ
50 ,mergeTQ
51 ,mergeITQ
52 ,
53 -- * popMin
54 -- $popMin
55 popMinQ
56 ,popMinIQ
57 ,popMinTQ
58 ,popMinITQ
59 ,
60 -- * Building
61 -- $building
62 buildQ
63 ,buildIQ
64 ,buildTQ
65 ,buildITQ
66 ,
67 -- * Special folds
68 -- $folds
69 foldToMaybeTree
70 ,foldToMaybeWithIndexTree)
71 where
72
73 import Data.Sequence.Internal
74 (Elem(..), Seq(..), Node(..), Digit(..), Sized(..), FingerTree(..),
75 replicateA, foldDigit, foldNode, foldWithIndexDigit,
76 foldWithIndexNode)
77 import Utils.Containers.Internal.State (State(..), execState)
78 -- | \( O(n \log n) \). 'sort' sorts the specified 'Seq' by the natural
79 -- ordering of its elements. The sort is stable. If stability is not
80 -- required, 'unstableSort' can be slightly faster.
81 --
82 -- @since 0.3.0
83 sort :: Ord a => Seq a -> Seq a
84 sort = sortBy compare
85
86 -- | \( O(n \log n) \). 'sortBy' sorts the specified 'Seq' according to the
87 -- specified comparator. The sort is stable. If stability is not required,
88 -- 'unstableSortBy' can be slightly faster.
89 --
90 -- @since 0.3.0
91 sortBy :: (a -> a -> Ordering) -> Seq a -> Seq a
92 sortBy cmp (Seq xs) =
93 maybe
94 (Seq EmptyT)
95 (execState (replicateA (size xs) (State (popMinIQ cmp))))
96 (buildIQ cmp (\s (Elem x) -> IQ s x IQNil) 0 xs)
97
98 -- | \( O(n \log n) \). 'sortOn' sorts the specified 'Seq' by comparing
99 -- the results of a key function applied to each element. @'sortOn' f@ is
100 -- equivalent to @'sortBy' ('compare' ``Data.Function.on`` f)@, but has the
101 -- performance advantage of only evaluating @f@ once for each element in the
102 -- input list. This is called the decorate-sort-undecorate paradigm, or
103 -- Schwartzian transform.
104 --
105 -- An example of using 'sortOn' might be to sort a 'Seq' of strings
106 -- according to their length:
107 --
108 -- > sortOn length (fromList ["alligator", "monkey", "zebra"]) == fromList ["zebra", "monkey", "alligator"]
109 --
110 -- If, instead, 'sortBy' had been used, 'length' would be evaluated on
111 -- every comparison, giving \( O(n \log n) \) evaluations, rather than
112 -- \( O(n) \).
113 --
114 -- If @f@ is very cheap (for example a record selector, or 'fst'),
115 -- @'sortBy' ('compare' ``Data.Function.on`` f)@ will be faster than
116 -- @'sortOn' f@.
117 --
118 -- @since 0.5.11
119 sortOn :: Ord b => (a -> b) -> Seq a -> Seq a
120 sortOn f (Seq xs) =
121 maybe
122 (Seq EmptyT)
123 (execState (replicateA (size xs) (State (popMinITQ compare))))
124 (buildITQ compare (\s (Elem x) -> ITQ s (f x) x ITQNil) 0 xs)
125
126 -- | \( O(n \log n) \). 'unstableSort' sorts the specified 'Seq' by
127 -- the natural ordering of its elements, but the sort is not stable.
128 -- This algorithm is frequently faster and uses less memory than 'sort'.
129
130 -- Notes on the implementation and choice of heap are available in
131 -- the file sorting.md (in this directory).
132 --
133 -- @since 0.3.0
134 unstableSort :: Ord a => Seq a -> Seq a
135 unstableSort = unstableSortBy compare
136
137 -- | \( O(n \log n) \). A generalization of 'unstableSort', 'unstableSortBy'
138 -- takes an arbitrary comparator and sorts the specified sequence.
139 -- The sort is not stable. This algorithm is frequently faster and
140 -- uses less memory than 'sortBy'.
141 --
142 -- @since 0.3.0
143 unstableSortBy :: (a -> a -> Ordering) -> Seq a -> Seq a
144 unstableSortBy cmp (Seq xs) =
145 maybe
146 (Seq EmptyT)
147 (execState (replicateA (size xs) (State (popMinQ cmp))))
148 (buildQ cmp (\(Elem x) -> Q x Nil) xs)
149
150 -- | \( O(n \log n) \). 'unstableSortOn' sorts the specified 'Seq' by
151 -- comparing the results of a key function applied to each element.
152 -- @'unstableSortOn' f@ is equivalent to @'unstableSortBy' ('compare' ``Data.Function.on`` f)@,
153 -- but has the performance advantage of only evaluating @f@ once for each
154 -- element in the input list. This is called the
155 -- decorate-sort-undecorate paradigm, or Schwartzian transform.
156 --
157 -- An example of using 'unstableSortOn' might be to sort a 'Seq' of strings
158 -- according to their length:
159 --
160 -- > unstableSortOn length (fromList ["alligator", "monkey", "zebra"]) == fromList ["zebra", "monkey", "alligator"]
161 --
162 -- If, instead, 'unstableSortBy' had been used, 'length' would be evaluated on
163 -- every comparison, giving \( O(n \log n) \) evaluations, rather than
164 -- \( O(n) \).
165 --
166 -- If @f@ is very cheap (for example a record selector, or 'fst'),
167 -- @'unstableSortBy' ('compare' ``Data.Function.on`` f)@ will be faster than
168 -- @'unstableSortOn' f@.
169 --
170 -- @since 0.5.11
171 unstableSortOn :: Ord b => (a -> b) -> Seq a -> Seq a
172 unstableSortOn f (Seq xs) =
173 maybe
174 (Seq EmptyT)
175 (execState (replicateA (size xs) (State (popMinTQ compare))))
176 (buildTQ compare (\(Elem x) -> TQ (f x) x TQNil) xs)
177
178 ------------------------------------------------------------------------
179 -- $heaps
180 --
181 -- The following are definitions for various specialized pairing heaps.
182 --
183 -- All of the heaps are defined to be non-empty, which speeds up the
184 -- merge functions.
185 ------------------------------------------------------------------------
186
187 -- | A simple pairing heap.
188 data Queue e = Q !e (QList e)
189 data QList e
190 = Nil
191 | QCons {-# UNPACK #-} !(Queue e)
192 (QList e)
193
194 -- | A pairing heap tagged with the original position of elements,
195 -- to allow for stable sorting.
196 data IndexedQueue e =
197 IQ {-# UNPACK #-} !Int !e (IQList e)
198 data IQList e
199 = IQNil
200 | IQCons {-# UNPACK #-} !(IndexedQueue e)
201 (IQList e)
202
203 -- | A pairing heap tagged with some key for sorting elements, for use
204 -- in 'unstableSortOn'.
205 data TaggedQueue a b =
206 TQ !a b (TQList a b)
207 data TQList a b
208 = TQNil
209 | TQCons {-# UNPACK #-} !(TaggedQueue a b)
210 (TQList a b)
211
212 -- | A pairing heap tagged with both a key and the original position
213 -- of its elements, for use in 'sortOn'.
214 data IndexedTaggedQueue e a =
215 ITQ {-# UNPACK #-} !Int !e a (ITQList e a)
216 data ITQList e a
217 = ITQNil
218 | ITQCons {-# UNPACK #-} !(IndexedTaggedQueue e a)
219 (ITQList e a)
220
221 infixr 8 `ITQCons`, `TQCons`, `QCons`, `IQCons`
222
223 ------------------------------------------------------------------------
224 -- $merges
225 --
226 -- The following are definitions for "merge" for each of the heaps
227 -- above. Each takes a comparison function which is used to order the
228 -- elements.
229 ------------------------------------------------------------------------
230
231 -- | 'mergeQ' merges two 'Queue's.
232 mergeQ :: (a -> a -> Ordering) -> Queue a -> Queue a -> Queue a
233 mergeQ cmp q1@(Q x1 ts1) q2@(Q x2 ts2)
234 | cmp x1 x2 == GT = Q x2 (q1 `QCons` ts2)
235 | otherwise = Q x1 (q2 `QCons` ts1)
236
237 -- | 'mergeTQ' merges two 'TaggedQueue's, based on the tag value.
238 mergeTQ :: (a -> a -> Ordering)
239 -> TaggedQueue a b
240 -> TaggedQueue a b
241 -> TaggedQueue a b
242 mergeTQ cmp q1@(TQ x1 y1 ts1) q2@(TQ x2 y2 ts2)
243 | cmp x1 x2 == GT = TQ x2 y2 (q1 `TQCons` ts2)
244 | otherwise = TQ x1 y1 (q2 `TQCons` ts1)
245
246 -- | 'mergeIQ' merges two 'IndexedQueue's, taking into account the
247 -- original position of the elements.
248 mergeIQ :: (a -> a -> Ordering)
249 -> IndexedQueue a
250 -> IndexedQueue a
251 -> IndexedQueue a
252 mergeIQ cmp q1@(IQ i1 x1 ts1) q2@(IQ i2 x2 ts2) =
253 case cmp x1 x2 of
254 LT -> IQ i1 x1 (q2 `IQCons` ts1)
255 EQ | i1 <= i2 -> IQ i1 x1 (q2 `IQCons` ts1)
256 _ -> IQ i2 x2 (q1 `IQCons` ts2)
257
258 -- | 'mergeITQ' merges two 'IndexedTaggedQueue's, based on the tag
259 -- value, taking into account the original position of the elements.
260 mergeITQ
261 :: (a -> a -> Ordering)
262 -> IndexedTaggedQueue a b
263 -> IndexedTaggedQueue a b
264 -> IndexedTaggedQueue a b
265 mergeITQ cmp q1@(ITQ i1 x1 y1 ts1) q2@(ITQ i2 x2 y2 ts2) =
266 case cmp x1 x2 of
267 LT -> ITQ i1 x1 y1 (q2 `ITQCons` ts1)
268 EQ | i1 <= i2 -> ITQ i1 x1 y1 (q2 `ITQCons` ts1)
269 _ -> ITQ i2 x2 y2 (q1 `ITQCons` ts2)
270
271 ------------------------------------------------------------------------
272 -- $popMin
273 --
274 -- The following are definitions for @popMin@, a function which
275 -- constructs a stateful action which pops the smallest element from the
276 -- queue, where "smallest" is according to the supplied comparison
277 -- function.
278 --
279 -- All of the functions fail on an empty queue.
280 --
281 -- Each of these functions is structured something like this:
282 --
283 -- @popMinQ cmp (Q x ts) = (mergeQs ts, x)@
284 --
285 -- The reason the call to @mergeQs@ is lazy is that it will be bottom
286 -- for the last element in the queue, preventing us from evaluating the
287 -- fully sorted sequence.
288 ------------------------------------------------------------------------
289
290 -- | Pop the smallest element from the queue, using the supplied
291 -- comparator.
292 popMinQ :: (e -> e -> Ordering) -> Queue e -> (Queue e, e)
293 popMinQ cmp (Q x xs) = (mergeQs xs, x)
294 where
295 mergeQs (t `QCons` Nil) = t
296 mergeQs (t1 `QCons` t2 `QCons` Nil) = t1 <+> t2
297 mergeQs (t1 `QCons` t2 `QCons` ts) = (t1 <+> t2) <+> mergeQs ts
298 mergeQs Nil = error "popMinQ: tried to pop from empty queue"
299 (<+>) = mergeQ cmp
300
301 -- | Pop the smallest element from the queue, using the supplied
302 -- comparator, deferring to the item's original position when the
303 -- comparator returns 'EQ'.
304 popMinIQ :: (e -> e -> Ordering) -> IndexedQueue e -> (IndexedQueue e, e)
305 popMinIQ cmp (IQ _ x xs) = (mergeQs xs, x)
306 where
307 mergeQs (t `IQCons` IQNil) = t
308 mergeQs (t1 `IQCons` t2 `IQCons` IQNil) = t1 <+> t2
309 mergeQs (t1 `IQCons` t2 `IQCons` ts) = (t1 <+> t2) <+> mergeQs ts
310 mergeQs IQNil = error "popMinQ: tried to pop from empty queue"
311 (<+>) = mergeIQ cmp
312
313 -- | Pop the smallest element from the queue, using the supplied
314 -- comparator on the tag.
315 popMinTQ :: (a -> a -> Ordering) -> TaggedQueue a b -> (TaggedQueue a b, b)
316 popMinTQ cmp (TQ _ x xs) = (mergeQs xs, x)
317 where
318 mergeQs (t `TQCons` TQNil) = t
319 mergeQs (t1 `TQCons` t2 `TQCons` TQNil) = t1 <+> t2
320 mergeQs (t1 `TQCons` t2 `TQCons` ts) = (t1 <+> t2) <+> mergeQs ts
321 mergeQs TQNil = error "popMinQ: tried to pop from empty queue"
322 (<+>) = mergeTQ cmp
323
324 -- | Pop the smallest element from the queue, using the supplied
325 -- comparator on the tag, deferring to the item's original position
326 -- when the comparator returns 'EQ'.
327 popMinITQ :: (e -> e -> Ordering)
328 -> IndexedTaggedQueue e b
329 -> (IndexedTaggedQueue e b, b)
330 popMinITQ cmp (ITQ _ _ x xs) = (mergeQs xs, x)
331 where
332 mergeQs (t `ITQCons` ITQNil) = t
333 mergeQs (t1 `ITQCons` t2 `ITQCons` ITQNil) = t1 <+> t2
334 mergeQs (t1 `ITQCons` t2 `ITQCons` ts) = (t1 <+> t2) <+> mergeQs ts
335 mergeQs ITQNil = error "popMinQ: tried to pop from empty queue"
336 (<+>) = mergeITQ cmp
337
338 ------------------------------------------------------------------------
339 -- $building
340 --
341 -- The following are definitions for functions to build queues, given a
342 -- comparison function.
343 ------------------------------------------------------------------------
344
345 buildQ :: (b -> b -> Ordering) -> (a -> Queue b) -> FingerTree a -> Maybe (Queue b)
346 buildQ cmp = foldToMaybeTree (mergeQ cmp)
347
348 buildIQ
349 :: (b -> b -> Ordering)
350 -> (Int -> Elem y -> IndexedQueue b)
351 -> Int
352 -> FingerTree (Elem y)
353 -> Maybe (IndexedQueue b)
354 buildIQ cmp = foldToMaybeWithIndexTree (mergeIQ cmp)
355
356 buildTQ
357 :: (b -> b -> Ordering)
358 -> (a -> TaggedQueue b c)
359 -> FingerTree a
360 -> Maybe (TaggedQueue b c)
361 buildTQ cmp = foldToMaybeTree (mergeTQ cmp)
362
363 buildITQ
364 :: (b -> b -> Ordering)
365 -> (Int -> Elem y -> IndexedTaggedQueue b c)
366 -> Int
367 -> FingerTree (Elem y)
368 -> Maybe (IndexedTaggedQueue b c)
369 buildITQ cmp = foldToMaybeWithIndexTree (mergeITQ cmp)
370
371 ------------------------------------------------------------------------
372 -- $folds
373 --
374 -- A big part of what makes the heaps fast is that they're non empty,
375 -- so the merge function can avoid an extra case match. To take
376 -- advantage of this, though, we need specialized versions of 'foldMap'
377 -- and 'Data.Sequence.foldMapWithIndex', which can alternate between
378 -- calling the faster semigroup-like merge when folding over non empty
379 -- structures (like 'Node' and 'Digit'), and the
380 -- 'Data.Semirgroup.Option'-like mappend, when folding over structures
381 -- which can be empty (like 'FingerTree').
382 ------------------------------------------------------------------------
383
384 -- | A 'foldMap'-like function, specialized to the
385 -- 'Data.Semigroup.Option' monoid, which takes advantage of the
386 -- internal structure of 'Seq' to avoid wrapping in 'Maybe' at certain
387 -- points.
388 foldToMaybeTree :: (b -> b -> b) -> (a -> b) -> FingerTree a -> Maybe b
389 foldToMaybeTree _ _ EmptyT = Nothing
390 foldToMaybeTree _ f (Single xs) = Just (f xs)
391 foldToMaybeTree (<+>) f (Deep _ pr m sf) =
392 Just (maybe (pr' <+> sf') ((pr' <+> sf') <+>) m')
393 where
394 pr' = foldDigit (<+>) f pr
395 sf' = foldDigit (<+>) f sf
396 m' = foldToMaybeTree (<+>) (foldNode (<+>) f) m
397 {-# INLINE foldToMaybeTree #-}
398
399 -- | A 'Data.Sequence.foldMapWithIndex'-like function, specialized to the
400 -- 'Data.Semigroup.Option' monoid, which takes advantage of the
401 -- internal structure of 'Seq' to avoid wrapping in 'Maybe' at certain
402 -- points.
403 foldToMaybeWithIndexTree :: (b -> b -> b)
404 -> (Int -> Elem y -> b)
405 -> Int
406 -> FingerTree (Elem y)
407 -> Maybe b
408 foldToMaybeWithIndexTree = foldToMaybeWithIndexTree'
409 where
410 {-# SPECIALISE foldToMaybeWithIndexTree' :: (b -> b -> b) -> (Int -> Elem y -> b) -> Int -> FingerTree (Elem y) -> Maybe b #-}
411 {-# SPECIALISE foldToMaybeWithIndexTree' :: (b -> b -> b) -> (Int -> Node y -> b) -> Int -> FingerTree (Node y) -> Maybe b #-}
412 foldToMaybeWithIndexTree'
413 :: Sized a
414 => (b -> b -> b) -> (Int -> a -> b) -> Int -> FingerTree a -> Maybe b
415 foldToMaybeWithIndexTree' _ _ !_s EmptyT = Nothing
416 foldToMaybeWithIndexTree' _ f s (Single xs) = Just (f s xs)
417 foldToMaybeWithIndexTree' (<+>) f s (Deep _ pr m sf) =
418 Just (maybe (pr' <+> sf') ((pr' <+> sf') <+>) m')
419 where
420 pr' = digit (<+>) f s pr
421 sf' = digit (<+>) f sPsprm sf
422 m' = foldToMaybeWithIndexTree' (<+>) (node (<+>) f) sPspr m
423 !sPspr = s + size pr
424 !sPsprm = sPspr + size m
425 {-# SPECIALISE digit :: (b -> b -> b) -> (Int -> Elem y -> b) -> Int -> Digit (Elem y) -> b #-}
426 {-# SPECIALISE digit :: (b -> b -> b) -> (Int -> Node y -> b) -> Int -> Digit (Node y) -> b #-}
427 digit
428 :: Sized a
429 => (b -> b -> b) -> (Int -> a -> b) -> Int -> Digit a -> b
430 digit = foldWithIndexDigit
431 {-# SPECIALISE node :: (b -> b -> b) -> (Int -> Elem y -> b) -> Int -> Node (Elem y) -> b #-}
432 {-# SPECIALISE node :: (b -> b -> b) -> (Int -> Node y -> b) -> Int -> Node (Node y) -> b #-}
433 node
434 :: Sized a
435 => (b -> b -> b) -> (Int -> a -> b) -> Int -> Node a -> b
436 node = foldWithIndexNode
437 {-# INLINE foldToMaybeWithIndexTree #-}