Improved performance of Data.Set
[packages/containers.git] / Data / Set.hs
1 {-# OPTIONS -cpp #-}
2 -----------------------------------------------------------------------------
3 -- |
4 -- Module : Data.Set
5 -- Copyright : (c) Daan Leijen 2002
6 -- License : BSD-style
7 -- Maintainer : libraries@haskell.org
8 -- Stability : provisional
9 -- Portability : portable
10 --
11 -- An efficient implementation of sets.
12 --
13 -- Since many function names (but not the type name) clash with
14 -- "Prelude" names, this module is usually imported @qualified@, e.g.
15 --
16 -- > import Data.Set (Set)
17 -- > import qualified Data.Set as Set
18 --
19 -- The implementation of 'Set' is based on /size balanced/ binary trees (or
20 -- trees of /bounded balance/) as described by:
21 --
22 -- * Stephen Adams, \"/Efficient sets: a balancing act/\",
23 -- Journal of Functional Programming 3(4):553-562, October 1993,
24 -- <http://www.swiss.ai.mit.edu/~adams/BB/>.
25 --
26 -- * J. Nievergelt and E.M. Reingold,
27 -- \"/Binary search trees of bounded balance/\",
28 -- SIAM journal of computing 2(1), March 1973.
29 --
30 -- Note that the implementation is /left-biased/ -- the elements of a
31 -- first argument are always preferred to the second, for example in
32 -- 'union' or 'insert'. Of course, left-biasing can only be observed
33 -- when equality is an equivalence relation instead of structural
34 -- equality.
35 -----------------------------------------------------------------------------
36
37 module Data.Set (
38 -- * Set type
39 #if !defined(TESTING)
40 Set -- instance Eq,Ord,Show,Read,Data,Typeable
41 #else
42 Set(..)
43 #endif
44
45 -- * Operators
46 , (\\)
47
48 -- * Query
49 , null
50 , size
51 , member
52 , notMember
53 , isSubsetOf
54 , isProperSubsetOf
55
56 -- * Construction
57 , empty
58 , singleton
59 , insert
60 , delete
61
62 -- * Combine
63 , union
64 , unions
65 , difference
66 , intersection
67
68 -- * Filter
69 , filter
70 , partition
71 , split
72 , splitMember
73
74 -- * Map
75 , map
76 , mapMonotonic
77
78 -- * Fold
79 , fold
80
81 -- * Min\/Max
82 , findMin
83 , findMax
84 , deleteMin
85 , deleteMax
86 , deleteFindMin
87 , deleteFindMax
88 , maxView
89 , minView
90
91 -- * Conversion
92
93 -- ** List
94 , elems
95 , toList
96 , fromList
97
98 -- ** Ordered list
99 , toAscList
100 , fromAscList
101 , fromDistinctAscList
102
103 -- * Debugging
104 , showTree
105 , showTreeWith
106 , valid
107
108 #if defined(TESTING)
109 -- Internals (for testing)
110 , bin
111 , balanced
112 , join
113 , merge
114 #endif
115 ) where
116
117 import Prelude hiding (filter,foldr,null,map)
118 import qualified Data.List as List
119 import Data.Monoid (Monoid(..))
120 import Data.Foldable (Foldable(foldMap))
121 #ifndef __GLASGOW_HASKELL__
122 import Data.Typeable (Typeable, typeOf, typeOfDefault)
123 #endif
124 import Data.Typeable (Typeable1(..), TyCon, mkTyCon, mkTyConApp)
125
126 {-
127 -- just for testing
128 import QuickCheck
129 import List (nub,sort)
130 import qualified List
131 -}
132
133 #if __GLASGOW_HASKELL__
134 import Text.Read
135 import Data.Data (Data(..), mkNoRepType, gcast1)
136 #endif
137
138 {--------------------------------------------------------------------
139 Operators
140 --------------------------------------------------------------------}
141 infixl 9 \\ --
142
143 -- | /O(n+m)/. See 'difference'.
144 (\\) :: Ord a => Set a -> Set a -> Set a
145 m1 \\ m2 = difference m1 m2
146 {-# INLINE (\\) #-}
147
148 {--------------------------------------------------------------------
149 Sets are size balanced trees
150 --------------------------------------------------------------------}
151 -- | A set of values @a@.
152 data Set a = Tip
153 | Bin {-# UNPACK #-} !Size a !(Set a) !(Set a)
154
155 type Size = Int
156
157 instance Ord a => Monoid (Set a) where
158 mempty = empty
159 mappend = union
160 mconcat = unions
161
162 instance Foldable Set where
163 foldMap _ Tip = mempty
164 foldMap f (Bin _s k l r) = foldMap f l `mappend` f k `mappend` foldMap f r
165
166 #if __GLASGOW_HASKELL__
167
168 {--------------------------------------------------------------------
169 A Data instance
170 --------------------------------------------------------------------}
171
172 -- This instance preserves data abstraction at the cost of inefficiency.
173 -- We omit reflection services for the sake of data abstraction.
174
175 instance (Data a, Ord a) => Data (Set a) where
176 gfoldl f z set = z fromList `f` (toList set)
177 toConstr _ = error "toConstr"
178 gunfold _ _ = error "gunfold"
179 dataTypeOf _ = mkNoRepType "Data.Set.Set"
180 dataCast1 f = gcast1 f
181
182 #endif
183
184 {--------------------------------------------------------------------
185 Query
186 --------------------------------------------------------------------}
187 -- | /O(1)/. Is this the empty set?
188 null :: Set a -> Bool
189 null Tip = True
190 null (Bin {}) = False
191 {-# INLINE null #-}
192
193 -- | /O(1)/. The number of elements in the set.
194 size :: Set a -> Int
195 size = go
196 where
197 go Tip = 0
198 go (Bin sz _ _ _) = sz
199 {-# INLINE size #-}
200
201 -- | /O(log n)/. Is the element in the set?
202 member :: Ord a => a -> Set a -> Bool
203 member x = x `seq` go
204 where
205 go Tip = False
206 go (Bin _ y l r) = case compare x y of
207 LT -> go l
208 GT -> go r
209 EQ -> True
210 {-# INLINE member #-}
211
212 -- | /O(log n)/. Is the element not in the set?
213 notMember :: Ord a => a -> Set a -> Bool
214 notMember a t = not $ member a t
215 {-# INLINE notMember #-}
216
217 {--------------------------------------------------------------------
218 Construction
219 --------------------------------------------------------------------}
220 -- | /O(1)/. The empty set.
221 empty :: Set a
222 empty = Tip
223 {-# INLINE empty #-}
224
225 -- | /O(1)/. Create a singleton set.
226 singleton :: a -> Set a
227 singleton x = Bin 1 x Tip Tip
228 {-# INLINE singleton #-}
229
230 {--------------------------------------------------------------------
231 Insertion, Deletion
232 --------------------------------------------------------------------}
233 -- | /O(log n)/. Insert an element in a set.
234 -- If the set already contains an element equal to the given value,
235 -- it is replaced with the new value.
236 insert :: Ord a => a -> Set a -> Set a
237 insert x = x `seq` go
238 where
239 go Tip = singleton x
240 go (Bin sz y l r) = case compare x y of
241 LT -> balance y (go l) r
242 GT -> balance y l (go r)
243 EQ -> Bin sz x l r
244 {-# INLINE insert #-}
245
246 -- | /O(log n)/. Delete an element from a set.
247 delete :: Ord a => a -> Set a -> Set a
248 delete x = x `seq` go
249 where
250 go Tip = Tip
251 go (Bin _ y l r) = case compare x y of
252 LT -> balance y (go l) r
253 GT -> balance y l (go r)
254 EQ -> glue l r
255 {-# INLINE delete #-}
256
257 {--------------------------------------------------------------------
258 Subset
259 --------------------------------------------------------------------}
260 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
261 isProperSubsetOf :: Ord a => Set a -> Set a -> Bool
262 isProperSubsetOf s1 s2
263 = (size s1 < size s2) && (isSubsetOf s1 s2)
264
265
266 -- | /O(n+m)/. Is this a subset?
267 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
268 isSubsetOf :: Ord a => Set a -> Set a -> Bool
269 isSubsetOf t1 t2
270 = (size t1 <= size t2) && (isSubsetOfX t1 t2)
271
272 isSubsetOfX :: Ord a => Set a -> Set a -> Bool
273 isSubsetOfX Tip _ = True
274 isSubsetOfX _ Tip = False
275 isSubsetOfX (Bin _ x l r) t
276 = found && isSubsetOfX l lt && isSubsetOfX r gt
277 where
278 (lt,found,gt) = splitMember x t
279
280
281 {--------------------------------------------------------------------
282 Minimal, Maximal
283 --------------------------------------------------------------------}
284 -- | /O(log n)/. The minimal element of a set.
285 findMin :: Set a -> a
286 findMin (Bin _ x Tip _) = x
287 findMin (Bin _ _ l _) = findMin l
288 findMin Tip = error "Set.findMin: empty set has no minimal element"
289
290 -- | /O(log n)/. The maximal element of a set.
291 findMax :: Set a -> a
292 findMax (Bin _ x _ Tip) = x
293 findMax (Bin _ _ _ r) = findMax r
294 findMax Tip = error "Set.findMax: empty set has no maximal element"
295
296 -- | /O(log n)/. Delete the minimal element.
297 deleteMin :: Set a -> Set a
298 deleteMin (Bin _ _ Tip r) = r
299 deleteMin (Bin _ x l r) = balance x (deleteMin l) r
300 deleteMin Tip = Tip
301
302 -- | /O(log n)/. Delete the maximal element.
303 deleteMax :: Set a -> Set a
304 deleteMax (Bin _ _ l Tip) = l
305 deleteMax (Bin _ x l r) = balance x l (deleteMax r)
306 deleteMax Tip = Tip
307
308 {--------------------------------------------------------------------
309 Union.
310 --------------------------------------------------------------------}
311 -- | The union of a list of sets: (@'unions' == 'foldl' 'union' 'empty'@).
312 unions :: Ord a => [Set a] -> Set a
313 unions = foldlStrict union empty
314 {-# INLINE unions #-}
315
316 -- | /O(n+m)/. The union of two sets, preferring the first set when
317 -- equal elements are encountered.
318 -- The implementation uses the efficient /hedge-union/ algorithm.
319 -- Hedge-union is more efficient on (bigset `union` smallset).
320 union :: Ord a => Set a -> Set a -> Set a
321 union Tip t2 = t2
322 union t1 Tip = t1
323 union t1 t2 = hedgeUnion (const LT) (const GT) t1 t2
324 {-# INLINE union #-}
325
326 hedgeUnion :: Ord a
327 => (a -> Ordering) -> (a -> Ordering) -> Set a -> Set a -> Set a
328 hedgeUnion _ _ t1 Tip
329 = t1
330 hedgeUnion cmplo cmphi Tip (Bin _ x l r)
331 = join x (filterGt cmplo l) (filterLt cmphi r)
332 hedgeUnion cmplo cmphi (Bin _ x l r) t2
333 = join x (hedgeUnion cmplo cmpx l (trim cmplo cmpx t2))
334 (hedgeUnion cmpx cmphi r (trim cmpx cmphi t2))
335 where
336 cmpx y = compare x y
337
338 {--------------------------------------------------------------------
339 Difference
340 --------------------------------------------------------------------}
341 -- | /O(n+m)/. Difference of two sets.
342 -- The implementation uses an efficient /hedge/ algorithm comparable with /hedge-union/.
343 difference :: Ord a => Set a -> Set a -> Set a
344 difference Tip _ = Tip
345 difference t1 Tip = t1
346 difference t1 t2 = hedgeDiff (const LT) (const GT) t1 t2
347 {-# INLINE difference #-}
348
349 hedgeDiff :: Ord a
350 => (a -> Ordering) -> (a -> Ordering) -> Set a -> Set a -> Set a
351 hedgeDiff _ _ Tip _
352 = Tip
353 hedgeDiff cmplo cmphi (Bin _ x l r) Tip
354 = join x (filterGt cmplo l) (filterLt cmphi r)
355 hedgeDiff cmplo cmphi t (Bin _ x l r)
356 = merge (hedgeDiff cmplo cmpx (trim cmplo cmpx t) l)
357 (hedgeDiff cmpx cmphi (trim cmpx cmphi t) r)
358 where
359 cmpx y = compare x y
360
361 {--------------------------------------------------------------------
362 Intersection
363 --------------------------------------------------------------------}
364 -- | /O(n+m)/. The intersection of two sets.
365 -- Elements of the result come from the first set, so for example
366 --
367 -- > import qualified Data.Set as S
368 -- > data AB = A | B deriving Show
369 -- > instance Ord AB where compare _ _ = EQ
370 -- > instance Eq AB where _ == _ = True
371 -- > main = print (S.singleton A `S.intersection` S.singleton B,
372 -- > S.singleton B `S.intersection` S.singleton A)
373 --
374 -- prints @(fromList [A],fromList [B])@.
375 intersection :: Ord a => Set a -> Set a -> Set a
376 intersection Tip _ = Tip
377 intersection _ Tip = Tip
378 intersection t1@(Bin s1 x1 l1 r1) t2@(Bin s2 x2 l2 r2) =
379 if s1 >= s2 then
380 let (lt,found,gt) = splitLookup x2 t1
381 tl = intersection lt l2
382 tr = intersection gt r2
383 in case found of
384 Just x -> join x tl tr
385 Nothing -> merge tl tr
386 else let (lt,found,gt) = splitMember x1 t2
387 tl = intersection l1 lt
388 tr = intersection r1 gt
389 in if found then join x1 tl tr
390 else merge tl tr
391
392 {--------------------------------------------------------------------
393 Filter and partition
394 --------------------------------------------------------------------}
395 -- | /O(n)/. Filter all elements that satisfy the predicate.
396 filter :: Ord a => (a -> Bool) -> Set a -> Set a
397 filter p = go
398 where
399 go Tip = Tip
400 go (Bin _ x l r)
401 | p x = join x (go l) (go r)
402 | otherwise = merge (go l) (go r)
403 {-# INLINE filter #-}
404
405 -- | /O(n)/. Partition the set into two sets, one with all elements that satisfy
406 -- the predicate and one with all elements that don't satisfy the predicate.
407 -- See also 'split'.
408 partition :: Ord a => (a -> Bool) -> Set a -> (Set a,Set a)
409 partition p = go
410 where
411 go Tip = (Tip, Tip)
412 go (Bin _ x l r) = case (go l, go r) of
413 ((l1, l2), (r1, r2))
414 | p x -> (join x l1 r1, merge l2 r2)
415 | otherwise -> (merge l1 r1, join x l2 r2)
416 {-# INLINE partition #-}
417
418 {----------------------------------------------------------------------
419 Map
420 ----------------------------------------------------------------------}
421
422 -- | /O(n*log n)/.
423 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
424 --
425 -- It's worth noting that the size of the result may be smaller if,
426 -- for some @(x,y)@, @x \/= y && f x == f y@
427
428 map :: (Ord a, Ord b) => (a->b) -> Set a -> Set b
429 map f = fromList . List.map f . toList
430 {-# INLINE map #-}
431
432 -- | /O(n)/. The
433 --
434 -- @'mapMonotonic' f s == 'map' f s@, but works only when @f@ is monotonic.
435 -- /The precondition is not checked./
436 -- Semi-formally, we have:
437 --
438 -- > and [x < y ==> f x < f y | x <- ls, y <- ls]
439 -- > ==> mapMonotonic f s == map f s
440 -- > where ls = toList s
441
442 mapMonotonic :: (a->b) -> Set a -> Set b
443 mapMonotonic f = go
444 where
445 go Tip = Tip
446 go (Bin sz x l r) = Bin sz (f x) (go l) (go r)
447 {-# INLINE mapMonotonic #-}
448
449 {--------------------------------------------------------------------
450 Fold
451 --------------------------------------------------------------------}
452 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
453 fold :: (a -> b -> b) -> b -> Set a -> b
454 fold = foldr
455 {-# INLINE fold #-}
456
457 -- | /O(n)/. Post-order fold.
458 foldr :: (a -> b -> b) -> b -> Set a -> b
459 foldr f = go
460 where
461 go z Tip = z
462 go z (Bin _ x l r) = go (f x (go z r)) l
463 {-# INLINE foldr #-}
464
465 {--------------------------------------------------------------------
466 List variations
467 --------------------------------------------------------------------}
468 -- | /O(n)/. The elements of a set.
469 elems :: Set a -> [a]
470 elems = toList
471 {-# INLINE elems #-}
472
473 {--------------------------------------------------------------------
474 Lists
475 --------------------------------------------------------------------}
476 -- | /O(n)/. Convert the set to a list of elements.
477 toList :: Set a -> [a]
478 toList = toAscList
479 {-# INLINE toList #-}
480
481 -- | /O(n)/. Convert the set to an ascending list of elements.
482 toAscList :: Set a -> [a]
483 toAscList = foldr (:) []
484 {-# INLINE toAscList #-}
485
486 -- | /O(n*log n)/. Create a set from a list of elements.
487 fromList :: Ord a => [a] -> Set a
488 fromList = foldlStrict ins empty
489 where
490 ins t x = insert x t
491 {-# INLINE fromList #-}
492
493 {--------------------------------------------------------------------
494 Building trees from ascending/descending lists can be done in linear time.
495
496 Note that if [xs] is ascending that:
497 fromAscList xs == fromList xs
498 --------------------------------------------------------------------}
499 -- | /O(n)/. Build a set from an ascending list in linear time.
500 -- /The precondition (input list is ascending) is not checked./
501 fromAscList :: Eq a => [a] -> Set a
502 fromAscList xs
503 = fromDistinctAscList (combineEq xs)
504 where
505 -- [combineEq xs] combines equal elements with [const] in an ordered list [xs]
506 combineEq xs'
507 = case xs' of
508 [] -> []
509 [x] -> [x]
510 (x:xx) -> combineEq' x xx
511
512 combineEq' z [] = [z]
513 combineEq' z (x:xs')
514 | z==x = combineEq' z xs'
515 | otherwise = z:combineEq' x xs'
516
517
518 -- | /O(n)/. Build a set from an ascending list of distinct elements in linear time.
519 -- /The precondition (input list is strictly ascending) is not checked./
520 fromDistinctAscList :: [a] -> Set a
521 fromDistinctAscList xs
522 = build const (length xs) xs
523 where
524 -- 1) use continutations so that we use heap space instead of stack space.
525 -- 2) special case for n==5 to build bushier trees.
526 build c 0 xs' = c Tip xs'
527 build c 5 xs' = case xs' of
528 (x1:x2:x3:x4:x5:xx)
529 -> c (bin x4 (bin x2 (singleton x1) (singleton x3)) (singleton x5)) xx
530 _ -> error "fromDistinctAscList build 5"
531 build c n xs' = seq nr $ build (buildR nr c) nl xs'
532 where
533 nl = n `div` 2
534 nr = n - nl - 1
535
536 buildR n c l (x:ys) = build (buildB l x c) n ys
537 buildR _ _ _ [] = error "fromDistinctAscList buildR []"
538 buildB l x c r zs = c (bin x l r) zs
539
540 {--------------------------------------------------------------------
541 Eq converts the set to a list. In a lazy setting, this
542 actually seems one of the faster methods to compare two trees
543 and it is certainly the simplest :-)
544 --------------------------------------------------------------------}
545 instance Eq a => Eq (Set a) where
546 t1 == t2 = (size t1 == size t2) && (toAscList t1 == toAscList t2)
547
548 {--------------------------------------------------------------------
549 Ord
550 --------------------------------------------------------------------}
551
552 instance Ord a => Ord (Set a) where
553 compare s1 s2 = compare (toAscList s1) (toAscList s2)
554
555 {--------------------------------------------------------------------
556 Show
557 --------------------------------------------------------------------}
558 instance Show a => Show (Set a) where
559 showsPrec p xs = showParen (p > 10) $
560 showString "fromList " . shows (toList xs)
561
562 {--------------------------------------------------------------------
563 Read
564 --------------------------------------------------------------------}
565 instance (Read a, Ord a) => Read (Set a) where
566 #ifdef __GLASGOW_HASKELL__
567 readPrec = parens $ prec 10 $ do
568 Ident "fromList" <- lexP
569 xs <- readPrec
570 return (fromList xs)
571
572 readListPrec = readListPrecDefault
573 #else
574 readsPrec p = readParen (p > 10) $ \ r -> do
575 ("fromList",s) <- lex r
576 (xs,t) <- reads s
577 return (fromList xs,t)
578 #endif
579
580 {--------------------------------------------------------------------
581 Typeable/Data
582 --------------------------------------------------------------------}
583
584 #include "Typeable.h"
585 INSTANCE_TYPEABLE1(Set,setTc,"Set")
586
587 {--------------------------------------------------------------------
588 Utility functions that return sub-ranges of the original
589 tree. Some functions take a comparison function as argument to
590 allow comparisons against infinite values. A function [cmplo x]
591 should be read as [compare lo x].
592
593 [trim cmplo cmphi t] A tree that is either empty or where [cmplo x == LT]
594 and [cmphi x == GT] for the value [x] of the root.
595 [filterGt cmp t] A tree where for all values [k]. [cmp k == LT]
596 [filterLt cmp t] A tree where for all values [k]. [cmp k == GT]
597
598 [split k t] Returns two trees [l] and [r] where all values
599 in [l] are <[k] and all keys in [r] are >[k].
600 [splitMember k t] Just like [split] but also returns whether [k]
601 was found in the tree.
602 --------------------------------------------------------------------}
603
604 {--------------------------------------------------------------------
605 [trim lo hi t] trims away all subtrees that surely contain no
606 values between the range [lo] to [hi]. The returned tree is either
607 empty or the key of the root is between @lo@ and @hi@.
608 --------------------------------------------------------------------}
609 trim :: (a -> Ordering) -> (a -> Ordering) -> Set a -> Set a
610 trim _ _ Tip = Tip
611 trim cmplo cmphi t@(Bin _ x l r)
612 = case cmplo x of
613 LT -> case cmphi x of
614 GT -> t
615 _ -> trim cmplo cmphi l
616 _ -> trim cmplo cmphi r
617
618 {--------------------------------------------------------------------
619 [filterGt x t] filter all values >[x] from tree [t]
620 [filterLt x t] filter all values <[x] from tree [t]
621 --------------------------------------------------------------------}
622 filterGt :: (a -> Ordering) -> Set a -> Set a
623 filterGt _ Tip = Tip
624 filterGt cmp (Bin _ x l r)
625 = case cmp x of
626 LT -> join x (filterGt cmp l) r
627 GT -> filterGt cmp r
628 EQ -> r
629 {-# INLINE filterGt #-}
630
631 filterLt :: (a -> Ordering) -> Set a -> Set a
632 filterLt _ Tip = Tip
633 filterLt cmp (Bin _ x l r)
634 = case cmp x of
635 LT -> filterLt cmp l
636 GT -> join x l (filterLt cmp r)
637 EQ -> l
638 {-# INLINE filterLt #-}
639
640 {--------------------------------------------------------------------
641 Split
642 --------------------------------------------------------------------}
643 -- | /O(log n)/. The expression (@'split' x set@) is a pair @(set1,set2)@
644 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
645 -- comprises the elements of @set@ greater than @x@.
646 split :: Ord a => a -> Set a -> (Set a,Set a)
647 split _ Tip = (Tip,Tip)
648 split x (Bin _ y l r)
649 = case compare x y of
650 LT -> let (lt,gt) = split x l in (lt,join y gt r)
651 GT -> let (lt,gt) = split x r in (join y l lt,gt)
652 EQ -> (l,r)
653
654 -- | /O(log n)/. Performs a 'split' but also returns whether the pivot
655 -- element was found in the original set.
656 splitMember :: Ord a => a -> Set a -> (Set a,Bool,Set a)
657 splitMember x t = let (l,m,r) = splitLookup x t in
658 (l,maybe False (const True) m,r)
659
660 -- | /O(log n)/. Performs a 'split' but also returns the pivot
661 -- element that was found in the original set.
662 splitLookup :: Ord a => a -> Set a -> (Set a,Maybe a,Set a)
663 splitLookup _ Tip = (Tip,Nothing,Tip)
664 splitLookup x (Bin _ y l r)
665 = case compare x y of
666 LT -> let (lt,found,gt) = splitLookup x l in (lt,found,join y gt r)
667 GT -> let (lt,found,gt) = splitLookup x r in (join y l lt,found,gt)
668 EQ -> (l,Just y,r)
669
670 {--------------------------------------------------------------------
671 Utility functions that maintain the balance properties of the tree.
672 All constructors assume that all values in [l] < [x] and all values
673 in [r] > [x], and that [l] and [r] are valid trees.
674
675 In order of sophistication:
676 [Bin sz x l r] The type constructor.
677 [bin x l r] Maintains the correct size, assumes that both [l]
678 and [r] are balanced with respect to each other.
679 [balance x l r] Restores the balance and size.
680 Assumes that the original tree was balanced and
681 that [l] or [r] has changed by at most one element.
682 [join x l r] Restores balance and size.
683
684 Furthermore, we can construct a new tree from two trees. Both operations
685 assume that all values in [l] < all values in [r] and that [l] and [r]
686 are valid:
687 [glue l r] Glues [l] and [r] together. Assumes that [l] and
688 [r] are already balanced with respect to each other.
689 [merge l r] Merges two trees and restores balance.
690
691 Note: in contrast to Adam's paper, we use (<=) comparisons instead
692 of (<) comparisons in [join], [merge] and [balance].
693 Quickcheck (on [difference]) showed that this was necessary in order
694 to maintain the invariants. It is quite unsatisfactory that I haven't
695 been able to find out why this is actually the case! Fortunately, it
696 doesn't hurt to be a bit more conservative.
697 --------------------------------------------------------------------}
698
699 {--------------------------------------------------------------------
700 Join
701 --------------------------------------------------------------------}
702 join :: a -> Set a -> Set a -> Set a
703 join x Tip r = insertMin x r
704 join x l Tip = insertMax x l
705 join x l@(Bin sizeL y ly ry) r@(Bin sizeR z lz rz)
706 | delta*sizeL <= sizeR = balance z (join x l lz) rz
707 | delta*sizeR <= sizeL = balance y ly (join x ry r)
708 | otherwise = bin x l r
709
710
711 -- insertMin and insertMax don't perform potentially expensive comparisons.
712 insertMax,insertMin :: a -> Set a -> Set a
713 insertMax x t
714 = case t of
715 Tip -> singleton x
716 Bin _ y l r
717 -> balance y l (insertMax x r)
718
719 insertMin x t
720 = case t of
721 Tip -> singleton x
722 Bin _ y l r
723 -> balance y (insertMin x l) r
724
725 {--------------------------------------------------------------------
726 [merge l r]: merges two trees.
727 --------------------------------------------------------------------}
728 merge :: Set a -> Set a -> Set a
729 merge Tip r = r
730 merge l Tip = l
731 merge l@(Bin sizeL x lx rx) r@(Bin sizeR y ly ry)
732 | delta*sizeL <= sizeR = balance y (merge l ly) ry
733 | delta*sizeR <= sizeL = balance x lx (merge rx r)
734 | otherwise = glue l r
735
736 {--------------------------------------------------------------------
737 [glue l r]: glues two trees together.
738 Assumes that [l] and [r] are already balanced with respect to each other.
739 --------------------------------------------------------------------}
740 glue :: Set a -> Set a -> Set a
741 glue Tip r = r
742 glue l Tip = l
743 glue l r
744 | size l > size r = let (m,l') = deleteFindMax l in balance m l' r
745 | otherwise = let (m,r') = deleteFindMin r in balance m l r'
746
747
748 -- | /O(log n)/. Delete and find the minimal element.
749 --
750 -- > deleteFindMin set = (findMin set, deleteMin set)
751
752 deleteFindMin :: Set a -> (a,Set a)
753 deleteFindMin t
754 = case t of
755 Bin _ x Tip r -> (x,r)
756 Bin _ x l r -> let (xm,l') = deleteFindMin l in (xm,balance x l' r)
757 Tip -> (error "Set.deleteFindMin: can not return the minimal element of an empty set", Tip)
758
759 -- | /O(log n)/. Delete and find the maximal element.
760 --
761 -- > deleteFindMax set = (findMax set, deleteMax set)
762 deleteFindMax :: Set a -> (a,Set a)
763 deleteFindMax t
764 = case t of
765 Bin _ x l Tip -> (x,l)
766 Bin _ x l r -> let (xm,r') = deleteFindMax r in (xm,balance x l r')
767 Tip -> (error "Set.deleteFindMax: can not return the maximal element of an empty set", Tip)
768
769 -- | /O(log n)/. Retrieves the minimal key of the set, and the set
770 -- stripped of that element, or 'Nothing' if passed an empty set.
771 minView :: Set a -> Maybe (a, Set a)
772 minView Tip = Nothing
773 minView x = Just (deleteFindMin x)
774
775 -- | /O(log n)/. Retrieves the maximal key of the set, and the set
776 -- stripped of that element, or 'Nothing' if passed an empty set.
777 maxView :: Set a -> Maybe (a, Set a)
778 maxView Tip = Nothing
779 maxView x = Just (deleteFindMax x)
780
781 {--------------------------------------------------------------------
782 [balance x l r] balances two trees with value x.
783 The sizes of the trees should balance after decreasing the
784 size of one of them. (a rotation).
785
786 [delta] is the maximal relative difference between the sizes of
787 two trees, it corresponds with the [w] in Adams' paper,
788 or equivalently, [1/delta] corresponds with the $\alpha$
789 in Nievergelt's paper. Adams shows that [delta] should
790 be larger than 3.745 in order to garantee that the
791 rotations can always restore balance.
792
793 [ratio] is the ratio between an outer and inner sibling of the
794 heavier subtree in an unbalanced setting. It determines
795 whether a double or single rotation should be performed
796 to restore balance. It is correspondes with the inverse
797 of $\alpha$ in Adam's article.
798
799 Note that:
800 - [delta] should be larger than 4.646 with a [ratio] of 2.
801 - [delta] should be larger than 3.745 with a [ratio] of 1.534.
802
803 - A lower [delta] leads to a more 'perfectly' balanced tree.
804 - A higher [delta] performs less rebalancing.
805
806 - Balancing is automatic for random data and a balancing
807 scheme is only necessary to avoid pathological worst cases.
808 Almost any choice will do in practice
809
810 - Allthough it seems that a rather large [delta] may perform better
811 than smaller one, measurements have shown that the smallest [delta]
812 of 4 is actually the fastest on a wide range of operations. It
813 especially improves performance on worst-case scenarios like
814 a sequence of ordered insertions.
815
816 Note: in contrast to Adams' paper, we use a ratio of (at least) 2
817 to decide whether a single or double rotation is needed. Allthough
818 he actually proves that this ratio is needed to maintain the
819 invariants, his implementation uses a (invalid) ratio of 1.
820 He is aware of the problem though since he has put a comment in his
821 original source code that he doesn't care about generating a
822 slightly inbalanced tree since it doesn't seem to matter in practice.
823 However (since we use quickcheck :-) we will stick to strictly balanced
824 trees.
825 --------------------------------------------------------------------}
826 delta,ratio :: Int
827 delta = 4
828 ratio = 2
829
830 balance :: a -> Set a -> Set a -> Set a
831 balance x l r
832 | sizeL + sizeR <= 1 = Bin sizeX x l r
833 | sizeR >= delta*sizeL = rotateL x l r
834 | sizeL >= delta*sizeR = rotateR x l r
835 | otherwise = Bin sizeX x l r
836 where
837 sizeL = size l
838 sizeR = size r
839 sizeX = sizeL + sizeR + 1
840
841 -- rotate
842 rotateL :: a -> Set a -> Set a -> Set a
843 rotateL x l r@(Bin _ _ ly ry)
844 | size ly < ratio*size ry = singleL x l r
845 | otherwise = doubleL x l r
846 rotateL _ _ Tip = error "rotateL Tip"
847
848 rotateR :: a -> Set a -> Set a -> Set a
849 rotateR x l@(Bin _ _ ly ry) r
850 | size ry < ratio*size ly = singleR x l r
851 | otherwise = doubleR x l r
852 rotateR _ Tip _ = error "rotateL Tip"
853
854 -- basic rotations
855 singleL, singleR :: a -> Set a -> Set a -> Set a
856 singleL x1 t1 (Bin _ x2 t2 t3) = bin x2 (bin x1 t1 t2) t3
857 singleL _ _ Tip = error "singleL"
858 singleR x1 (Bin _ x2 t1 t2) t3 = bin x2 t1 (bin x1 t2 t3)
859 singleR _ Tip _ = error "singleR"
860
861 doubleL, doubleR :: a -> Set a -> Set a -> Set a
862 doubleL x1 t1 (Bin _ x2 (Bin _ x3 t2 t3) t4) = bin x3 (bin x1 t1 t2) (bin x2 t3 t4)
863 doubleL _ _ _ = error "doubleL"
864 doubleR x1 (Bin _ x2 t1 (Bin _ x3 t2 t3)) t4 = bin x3 (bin x2 t1 t2) (bin x1 t3 t4)
865 doubleR _ _ _ = error "doubleR"
866
867
868 {--------------------------------------------------------------------
869 The bin constructor maintains the size of the tree
870 --------------------------------------------------------------------}
871 bin :: a -> Set a -> Set a -> Set a
872 bin x l r
873 = Bin (size l + size r + 1) x l r
874
875
876 {--------------------------------------------------------------------
877 Utilities
878 --------------------------------------------------------------------}
879 foldlStrict :: (a -> b -> a) -> a -> [b] -> a
880 foldlStrict f = go
881 where
882 go z [] = z
883 go z (x:xs) = z `seq` go (f z x) xs
884 {-# INLINE foldlStrict #-}
885
886 {--------------------------------------------------------------------
887 Debugging
888 --------------------------------------------------------------------}
889 -- | /O(n)/. Show the tree that implements the set. The tree is shown
890 -- in a compressed, hanging format.
891 showTree :: Show a => Set a -> String
892 showTree s
893 = showTreeWith True False s
894
895
896 {- | /O(n)/. The expression (@showTreeWith hang wide map@) shows
897 the tree that implements the set. If @hang@ is
898 @True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If
899 @wide@ is 'True', an extra wide version is shown.
900
901 > Set> putStrLn $ showTreeWith True False $ fromDistinctAscList [1..5]
902 > 4
903 > +--2
904 > | +--1
905 > | +--3
906 > +--5
907 >
908 > Set> putStrLn $ showTreeWith True True $ fromDistinctAscList [1..5]
909 > 4
910 > |
911 > +--2
912 > | |
913 > | +--1
914 > | |
915 > | +--3
916 > |
917 > +--5
918 >
919 > Set> putStrLn $ showTreeWith False True $ fromDistinctAscList [1..5]
920 > +--5
921 > |
922 > 4
923 > |
924 > | +--3
925 > | |
926 > +--2
927 > |
928 > +--1
929
930 -}
931 showTreeWith :: Show a => Bool -> Bool -> Set a -> String
932 showTreeWith hang wide t
933 | hang = (showsTreeHang wide [] t) ""
934 | otherwise = (showsTree wide [] [] t) ""
935
936 showsTree :: Show a => Bool -> [String] -> [String] -> Set a -> ShowS
937 showsTree wide lbars rbars t
938 = case t of
939 Tip -> showsBars lbars . showString "|\n"
940 Bin _ x Tip Tip
941 -> showsBars lbars . shows x . showString "\n"
942 Bin _ x l r
943 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
944 showWide wide rbars .
945 showsBars lbars . shows x . showString "\n" .
946 showWide wide lbars .
947 showsTree wide (withEmpty lbars) (withBar lbars) l
948
949 showsTreeHang :: Show a => Bool -> [String] -> Set a -> ShowS
950 showsTreeHang wide bars t
951 = case t of
952 Tip -> showsBars bars . showString "|\n"
953 Bin _ x Tip Tip
954 -> showsBars bars . shows x . showString "\n"
955 Bin _ x l r
956 -> showsBars bars . shows x . showString "\n" .
957 showWide wide bars .
958 showsTreeHang wide (withBar bars) l .
959 showWide wide bars .
960 showsTreeHang wide (withEmpty bars) r
961
962 showWide :: Bool -> [String] -> String -> String
963 showWide wide bars
964 | wide = showString (concat (reverse bars)) . showString "|\n"
965 | otherwise = id
966
967 showsBars :: [String] -> ShowS
968 showsBars bars
969 = case bars of
970 [] -> id
971 _ -> showString (concat (reverse (tail bars))) . showString node
972
973 node :: String
974 node = "+--"
975
976 withBar, withEmpty :: [String] -> [String]
977 withBar bars = "| ":bars
978 withEmpty bars = " ":bars
979
980 {--------------------------------------------------------------------
981 Assertions
982 --------------------------------------------------------------------}
983 -- | /O(n)/. Test if the internal set structure is valid.
984 valid :: Ord a => Set a -> Bool
985 valid t
986 = balanced t && ordered t && validsize t
987
988 ordered :: Ord a => Set a -> Bool
989 ordered t
990 = bounded (const True) (const True) t
991 where
992 bounded lo hi t'
993 = case t' of
994 Tip -> True
995 Bin _ x l r -> (lo x) && (hi x) && bounded lo (<x) l && bounded (>x) hi r
996
997 balanced :: Set a -> Bool
998 balanced t
999 = case t of
1000 Tip -> True
1001 Bin _ _ l r -> (size l + size r <= 1 || (size l <= delta*size r && size r <= delta*size l)) &&
1002 balanced l && balanced r
1003
1004 validsize :: Set a -> Bool
1005 validsize t
1006 = (realsize t == Just (size t))
1007 where
1008 realsize t'
1009 = case t' of
1010 Tip -> Just 0
1011 Bin sz _ l r -> case (realsize l,realsize r) of
1012 (Just n,Just m) | n+m+1 == sz -> Just sz
1013 _ -> Nothing