Make the package -Wall clean
[packages/containers.git] / Data / Set.hs
1 -----------------------------------------------------------------------------
2 -- |
3 -- Module : Data.Set
4 -- Copyright : (c) Daan Leijen 2002
5 -- License : BSD-style
6 -- Maintainer : libraries@haskell.org
7 -- Stability : provisional
8 -- Portability : portable
9 --
10 -- An efficient implementation of sets.
11 --
12 -- Since many function names (but not the type name) clash with
13 -- "Prelude" names, this module is usually imported @qualified@, e.g.
14 --
15 -- > import Data.Set (Set)
16 -- > import qualified Data.Set as Set
17 --
18 -- The implementation of 'Set' is based on /size balanced/ binary trees (or
19 -- trees of /bounded balance/) as described by:
20 --
21 -- * Stephen Adams, \"/Efficient sets: a balancing act/\",
22 -- Journal of Functional Programming 3(4):553-562, October 1993,
23 -- <http://www.swiss.ai.mit.edu/~adams/BB/>.
24 --
25 -- * J. Nievergelt and E.M. Reingold,
26 -- \"/Binary search trees of bounded balance/\",
27 -- SIAM journal of computing 2(1), March 1973.
28 --
29 -- Note that the implementation is /left-biased/ -- the elements of a
30 -- first argument are always preferred to the second, for example in
31 -- 'union' or 'insert'. Of course, left-biasing can only be observed
32 -- when equality is an equivalence relation instead of structural
33 -- equality.
34 -----------------------------------------------------------------------------
35
36 module Data.Set (
37 -- * Set type
38 Set -- instance Eq,Ord,Show,Read,Data,Typeable
39
40 -- * Operators
41 , (\\)
42
43 -- * Query
44 , null
45 , size
46 , member
47 , notMember
48 , isSubsetOf
49 , isProperSubsetOf
50
51 -- * Construction
52 , empty
53 , singleton
54 , insert
55 , delete
56
57 -- * Combine
58 , union, unions
59 , difference
60 , intersection
61
62 -- * Filter
63 , filter
64 , partition
65 , split
66 , splitMember
67
68 -- * Map
69 , map
70 , mapMonotonic
71
72 -- * Fold
73 , fold
74
75 -- * Min\/Max
76 , findMin
77 , findMax
78 , deleteMin
79 , deleteMax
80 , deleteFindMin
81 , deleteFindMax
82 , maxView
83 , minView
84
85 -- * Conversion
86
87 -- ** List
88 , elems
89 , toList
90 , fromList
91
92 -- ** Ordered list
93 , toAscList
94 , fromAscList
95 , fromDistinctAscList
96
97 -- * Debugging
98 , showTree
99 , showTreeWith
100 , valid
101 ) where
102
103 import Prelude hiding (filter,foldr,null,map)
104 import qualified Data.List as List
105 import Data.Monoid (Monoid(..))
106 import Data.Foldable (Foldable(foldMap))
107
108 {-
109 -- just for testing
110 import QuickCheck
111 import List (nub,sort)
112 import qualified List
113 -}
114
115 #if __GLASGOW_HASKELL__
116 import Text.Read
117 import Data.Generics.Basics
118 import Data.Generics.Instances ()
119 #endif
120
121 {--------------------------------------------------------------------
122 Operators
123 --------------------------------------------------------------------}
124 infixl 9 \\ --
125
126 -- | /O(n+m)/. See 'difference'.
127 (\\) :: Ord a => Set a -> Set a -> Set a
128 m1 \\ m2 = difference m1 m2
129
130 {--------------------------------------------------------------------
131 Sets are size balanced trees
132 --------------------------------------------------------------------}
133 -- | A set of values @a@.
134 data Set a = Tip
135 | Bin {-# UNPACK #-} !Size a !(Set a) !(Set a)
136
137 type Size = Int
138
139 instance Ord a => Monoid (Set a) where
140 mempty = empty
141 mappend = union
142 mconcat = unions
143
144 instance Foldable Set where
145 foldMap _ Tip = mempty
146 foldMap f (Bin _s k l r) = foldMap f l `mappend` f k `mappend` foldMap f r
147
148 #if __GLASGOW_HASKELL__
149
150 {--------------------------------------------------------------------
151 A Data instance
152 --------------------------------------------------------------------}
153
154 -- This instance preserves data abstraction at the cost of inefficiency.
155 -- We omit reflection services for the sake of data abstraction.
156
157 instance (Data a, Ord a) => Data (Set a) where
158 gfoldl f z set = z fromList `f` (toList set)
159 toConstr _ = error "toConstr"
160 gunfold _ _ = error "gunfold"
161 dataTypeOf _ = mkNorepType "Data.Set.Set"
162 dataCast1 f = gcast1 f
163
164 #endif
165
166 {--------------------------------------------------------------------
167 Query
168 --------------------------------------------------------------------}
169 -- | /O(1)/. Is this the empty set?
170 null :: Set a -> Bool
171 null t
172 = case t of
173 Tip -> True
174 Bin {} -> False
175
176 -- | /O(1)/. The number of elements in the set.
177 size :: Set a -> Int
178 size t
179 = case t of
180 Tip -> 0
181 Bin sz _ _ _ -> sz
182
183 -- | /O(log n)/. Is the element in the set?
184 member :: Ord a => a -> Set a -> Bool
185 member x t
186 = case t of
187 Tip -> False
188 Bin _ y l r
189 -> case compare x y of
190 LT -> member x l
191 GT -> member x r
192 EQ -> True
193
194 -- | /O(log n)/. Is the element not in the set?
195 notMember :: Ord a => a -> Set a -> Bool
196 notMember x t = not $ member x t
197
198 {--------------------------------------------------------------------
199 Construction
200 --------------------------------------------------------------------}
201 -- | /O(1)/. The empty set.
202 empty :: Set a
203 empty
204 = Tip
205
206 -- | /O(1)/. Create a singleton set.
207 singleton :: a -> Set a
208 singleton x
209 = Bin 1 x Tip Tip
210
211 {--------------------------------------------------------------------
212 Insertion, Deletion
213 --------------------------------------------------------------------}
214 -- | /O(log n)/. Insert an element in a set.
215 -- If the set already contains an element equal to the given value,
216 -- it is replaced with the new value.
217 insert :: Ord a => a -> Set a -> Set a
218 insert x t
219 = case t of
220 Tip -> singleton x
221 Bin sz y l r
222 -> case compare x y of
223 LT -> balance y (insert x l) r
224 GT -> balance y l (insert x r)
225 EQ -> Bin sz x l r
226
227
228 -- | /O(log n)/. Delete an element from a set.
229 delete :: Ord a => a -> Set a -> Set a
230 delete x t
231 = case t of
232 Tip -> Tip
233 Bin _ y l r
234 -> case compare x y of
235 LT -> balance y (delete x l) r
236 GT -> balance y l (delete x r)
237 EQ -> glue l r
238
239 {--------------------------------------------------------------------
240 Subset
241 --------------------------------------------------------------------}
242 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
243 isProperSubsetOf :: Ord a => Set a -> Set a -> Bool
244 isProperSubsetOf s1 s2
245 = (size s1 < size s2) && (isSubsetOf s1 s2)
246
247
248 -- | /O(n+m)/. Is this a subset?
249 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
250 isSubsetOf :: Ord a => Set a -> Set a -> Bool
251 isSubsetOf t1 t2
252 = (size t1 <= size t2) && (isSubsetOfX t1 t2)
253
254 isSubsetOfX :: Ord a => Set a -> Set a -> Bool
255 isSubsetOfX Tip _ = True
256 isSubsetOfX _ Tip = False
257 isSubsetOfX (Bin _ x l r) t
258 = found && isSubsetOfX l lt && isSubsetOfX r gt
259 where
260 (lt,found,gt) = splitMember x t
261
262
263 {--------------------------------------------------------------------
264 Minimal, Maximal
265 --------------------------------------------------------------------}
266 -- | /O(log n)/. The minimal element of a set.
267 findMin :: Set a -> a
268 findMin (Bin _ x Tip _) = x
269 findMin (Bin _ _ l _) = findMin l
270 findMin Tip = error "Set.findMin: empty set has no minimal element"
271
272 -- | /O(log n)/. The maximal element of a set.
273 findMax :: Set a -> a
274 findMax (Bin _ x _ Tip) = x
275 findMax (Bin _ _ _ r) = findMax r
276 findMax Tip = error "Set.findMax: empty set has no maximal element"
277
278 -- | /O(log n)/. Delete the minimal element.
279 deleteMin :: Set a -> Set a
280 deleteMin (Bin _ _ Tip r) = r
281 deleteMin (Bin _ x l r) = balance x (deleteMin l) r
282 deleteMin Tip = Tip
283
284 -- | /O(log n)/. Delete the maximal element.
285 deleteMax :: Set a -> Set a
286 deleteMax (Bin _ _ l Tip) = l
287 deleteMax (Bin _ x l r) = balance x l (deleteMax r)
288 deleteMax Tip = Tip
289
290
291 {--------------------------------------------------------------------
292 Union.
293 --------------------------------------------------------------------}
294 -- | The union of a list of sets: (@'unions' == 'foldl' 'union' 'empty'@).
295 unions :: Ord a => [Set a] -> Set a
296 unions ts
297 = foldlStrict union empty ts
298
299
300 -- | /O(n+m)/. The union of two sets, preferring the first set when
301 -- equal elements are encountered.
302 -- The implementation uses the efficient /hedge-union/ algorithm.
303 -- Hedge-union is more efficient on (bigset `union` smallset).
304 union :: Ord a => Set a -> Set a -> Set a
305 union Tip t2 = t2
306 union t1 Tip = t1
307 union t1 t2 = hedgeUnion (const LT) (const GT) t1 t2
308
309 hedgeUnion :: Ord a
310 => (a -> Ordering) -> (a -> Ordering) -> Set a -> Set a -> Set a
311 hedgeUnion _ _ t1 Tip
312 = t1
313 hedgeUnion cmplo cmphi Tip (Bin _ x l r)
314 = join x (filterGt cmplo l) (filterLt cmphi r)
315 hedgeUnion cmplo cmphi (Bin _ x l r) t2
316 = join x (hedgeUnion cmplo cmpx l (trim cmplo cmpx t2))
317 (hedgeUnion cmpx cmphi r (trim cmpx cmphi t2))
318 where
319 cmpx y = compare x y
320
321 {--------------------------------------------------------------------
322 Difference
323 --------------------------------------------------------------------}
324 -- | /O(n+m)/. Difference of two sets.
325 -- The implementation uses an efficient /hedge/ algorithm comparable with /hedge-union/.
326 difference :: Ord a => Set a -> Set a -> Set a
327 difference Tip _ = Tip
328 difference t1 Tip = t1
329 difference t1 t2 = hedgeDiff (const LT) (const GT) t1 t2
330
331 hedgeDiff :: Ord a
332 => (a -> Ordering) -> (a -> Ordering) -> Set a -> Set a -> Set a
333 hedgeDiff _ _ Tip _
334 = Tip
335 hedgeDiff cmplo cmphi (Bin _ x l r) Tip
336 = join x (filterGt cmplo l) (filterLt cmphi r)
337 hedgeDiff cmplo cmphi t (Bin _ x l r)
338 = merge (hedgeDiff cmplo cmpx (trim cmplo cmpx t) l)
339 (hedgeDiff cmpx cmphi (trim cmpx cmphi t) r)
340 where
341 cmpx y = compare x y
342
343 {--------------------------------------------------------------------
344 Intersection
345 --------------------------------------------------------------------}
346 -- | /O(n+m)/. The intersection of two sets.
347 -- Elements of the result come from the first set, so for example
348 --
349 -- > import qualified Data.Set as S
350 -- > data AB = A | B deriving Show
351 -- > instance Ord AB where compare _ _ = EQ
352 -- > instance Eq AB where _ == _ = True
353 -- > main = print (S.singleton A `S.intersection` S.singleton B,
354 -- > S.singleton B `S.intersection` S.singleton A)
355 --
356 -- prints @(fromList [A],fromList [B])@.
357 intersection :: Ord a => Set a -> Set a -> Set a
358 intersection Tip _ = Tip
359 intersection _ Tip = Tip
360 intersection t1@(Bin s1 x1 l1 r1) t2@(Bin s2 x2 l2 r2) =
361 if s1 >= s2 then
362 let (lt,found,gt) = splitLookup x2 t1
363 tl = intersection lt l2
364 tr = intersection gt r2
365 in case found of
366 Just x -> join x tl tr
367 Nothing -> merge tl tr
368 else let (lt,found,gt) = splitMember x1 t2
369 tl = intersection l1 lt
370 tr = intersection r1 gt
371 in if found then join x1 tl tr
372 else merge tl tr
373
374 {--------------------------------------------------------------------
375 Filter and partition
376 --------------------------------------------------------------------}
377 -- | /O(n)/. Filter all elements that satisfy the predicate.
378 filter :: Ord a => (a -> Bool) -> Set a -> Set a
379 filter _ Tip = Tip
380 filter p (Bin _ x l r)
381 | p x = join x (filter p l) (filter p r)
382 | otherwise = merge (filter p l) (filter p r)
383
384 -- | /O(n)/. Partition the set into two sets, one with all elements that satisfy
385 -- the predicate and one with all elements that don't satisfy the predicate.
386 -- See also 'split'.
387 partition :: Ord a => (a -> Bool) -> Set a -> (Set a,Set a)
388 partition _ Tip = (Tip,Tip)
389 partition p (Bin _ x l r)
390 | p x = (join x l1 r1,merge l2 r2)
391 | otherwise = (merge l1 r1,join x l2 r2)
392 where
393 (l1,l2) = partition p l
394 (r1,r2) = partition p r
395
396 {----------------------------------------------------------------------
397 Map
398 ----------------------------------------------------------------------}
399
400 -- | /O(n*log n)/.
401 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
402 --
403 -- It's worth noting that the size of the result may be smaller if,
404 -- for some @(x,y)@, @x \/= y && f x == f y@
405
406 map :: (Ord a, Ord b) => (a->b) -> Set a -> Set b
407 map f = fromList . List.map f . toList
408
409 -- | /O(n)/. The
410 --
411 -- @'mapMonotonic' f s == 'map' f s@, but works only when @f@ is monotonic.
412 -- /The precondition is not checked./
413 -- Semi-formally, we have:
414 --
415 -- > and [x < y ==> f x < f y | x <- ls, y <- ls]
416 -- > ==> mapMonotonic f s == map f s
417 -- > where ls = toList s
418
419 mapMonotonic :: (a->b) -> Set a -> Set b
420 mapMonotonic _ Tip = Tip
421 mapMonotonic f (Bin sz x l r) =
422 Bin sz (f x) (mapMonotonic f l) (mapMonotonic f r)
423
424
425 {--------------------------------------------------------------------
426 Fold
427 --------------------------------------------------------------------}
428 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
429 fold :: (a -> b -> b) -> b -> Set a -> b
430 fold f z s
431 = foldr f z s
432
433 -- | /O(n)/. Post-order fold.
434 foldr :: (a -> b -> b) -> b -> Set a -> b
435 foldr _ z Tip = z
436 foldr f z (Bin _ x l r) = foldr f (f x (foldr f z r)) l
437
438 {--------------------------------------------------------------------
439 List variations
440 --------------------------------------------------------------------}
441 -- | /O(n)/. The elements of a set.
442 elems :: Set a -> [a]
443 elems s
444 = toList s
445
446 {--------------------------------------------------------------------
447 Lists
448 --------------------------------------------------------------------}
449 -- | /O(n)/. Convert the set to a list of elements.
450 toList :: Set a -> [a]
451 toList s
452 = toAscList s
453
454 -- | /O(n)/. Convert the set to an ascending list of elements.
455 toAscList :: Set a -> [a]
456 toAscList t
457 = foldr (:) [] t
458
459
460 -- | /O(n*log n)/. Create a set from a list of elements.
461 fromList :: Ord a => [a] -> Set a
462 fromList xs
463 = foldlStrict ins empty xs
464 where
465 ins t x = insert x t
466
467 {--------------------------------------------------------------------
468 Building trees from ascending/descending lists can be done in linear time.
469
470 Note that if [xs] is ascending that:
471 fromAscList xs == fromList xs
472 --------------------------------------------------------------------}
473 -- | /O(n)/. Build a set from an ascending list in linear time.
474 -- /The precondition (input list is ascending) is not checked./
475 fromAscList :: Eq a => [a] -> Set a
476 fromAscList xs
477 = fromDistinctAscList (combineEq xs)
478 where
479 -- [combineEq xs] combines equal elements with [const] in an ordered list [xs]
480 combineEq xs'
481 = case xs' of
482 [] -> []
483 [x] -> [x]
484 (x:xx) -> combineEq' x xx
485
486 combineEq' z [] = [z]
487 combineEq' z (x:xs')
488 | z==x = combineEq' z xs'
489 | otherwise = z:combineEq' x xs'
490
491
492 -- | /O(n)/. Build a set from an ascending list of distinct elements in linear time.
493 -- /The precondition (input list is strictly ascending) is not checked./
494 fromDistinctAscList :: [a] -> Set a
495 fromDistinctAscList xs
496 = build const (length xs) xs
497 where
498 -- 1) use continutations so that we use heap space instead of stack space.
499 -- 2) special case for n==5 to build bushier trees.
500 build c 0 xs' = c Tip xs'
501 build c 5 xs' = case xs' of
502 (x1:x2:x3:x4:x5:xx)
503 -> c (bin x4 (bin x2 (singleton x1) (singleton x3)) (singleton x5)) xx
504 _ -> error "fromDistinctAscList build 5"
505 build c n xs' = seq nr $ build (buildR nr c) nl xs'
506 where
507 nl = n `div` 2
508 nr = n - nl - 1
509
510 buildR n c l (x:ys) = build (buildB l x c) n ys
511 buildR _ _ _ [] = error "fromDistinctAscList buildR []"
512 buildB l x c r zs = c (bin x l r) zs
513
514 {--------------------------------------------------------------------
515 Eq converts the set to a list. In a lazy setting, this
516 actually seems one of the faster methods to compare two trees
517 and it is certainly the simplest :-)
518 --------------------------------------------------------------------}
519 instance Eq a => Eq (Set a) where
520 t1 == t2 = (size t1 == size t2) && (toAscList t1 == toAscList t2)
521
522 {--------------------------------------------------------------------
523 Ord
524 --------------------------------------------------------------------}
525
526 instance Ord a => Ord (Set a) where
527 compare s1 s2 = compare (toAscList s1) (toAscList s2)
528
529 {--------------------------------------------------------------------
530 Show
531 --------------------------------------------------------------------}
532 instance Show a => Show (Set a) where
533 showsPrec p xs = showParen (p > 10) $
534 showString "fromList " . shows (toList xs)
535
536 {-
537 XXX unused code
538
539 showSet :: (Show a) => [a] -> ShowS
540 showSet []
541 = showString "{}"
542 showSet (x:xs)
543 = showChar '{' . shows x . showTail xs
544 where
545 showTail [] = showChar '}'
546 showTail (x':xs') = showChar ',' . shows x' . showTail xs'
547 -}
548
549 {--------------------------------------------------------------------
550 Read
551 --------------------------------------------------------------------}
552 instance (Read a, Ord a) => Read (Set a) where
553 #ifdef __GLASGOW_HASKELL__
554 readPrec = parens $ prec 10 $ do
555 Ident "fromList" <- lexP
556 xs <- readPrec
557 return (fromList xs)
558
559 readListPrec = readListPrecDefault
560 #else
561 readsPrec p = readParen (p > 10) $ \ r -> do
562 ("fromList",s) <- lex r
563 (xs,t) <- reads s
564 return (fromList xs,t)
565 #endif
566
567 {--------------------------------------------------------------------
568 Typeable/Data
569 --------------------------------------------------------------------}
570
571 #include "Typeable.h"
572 INSTANCE_TYPEABLE1(Set,setTc,"Set")
573
574 {--------------------------------------------------------------------
575 Utility functions that return sub-ranges of the original
576 tree. Some functions take a comparison function as argument to
577 allow comparisons against infinite values. A function [cmplo x]
578 should be read as [compare lo x].
579
580 [trim cmplo cmphi t] A tree that is either empty or where [cmplo x == LT]
581 and [cmphi x == GT] for the value [x] of the root.
582 [filterGt cmp t] A tree where for all values [k]. [cmp k == LT]
583 [filterLt cmp t] A tree where for all values [k]. [cmp k == GT]
584
585 [split k t] Returns two trees [l] and [r] where all values
586 in [l] are <[k] and all keys in [r] are >[k].
587 [splitMember k t] Just like [split] but also returns whether [k]
588 was found in the tree.
589 --------------------------------------------------------------------}
590
591 {--------------------------------------------------------------------
592 [trim lo hi t] trims away all subtrees that surely contain no
593 values between the range [lo] to [hi]. The returned tree is either
594 empty or the key of the root is between @lo@ and @hi@.
595 --------------------------------------------------------------------}
596 trim :: (a -> Ordering) -> (a -> Ordering) -> Set a -> Set a
597 trim _ _ Tip = Tip
598 trim cmplo cmphi t@(Bin _ x l r)
599 = case cmplo x of
600 LT -> case cmphi x of
601 GT -> t
602 _ -> trim cmplo cmphi l
603 _ -> trim cmplo cmphi r
604
605 {-
606 XXX unused code
607
608 trimMemberLo :: Ord a => a -> (a -> Ordering) -> Set a -> (Bool, Set a)
609 trimMemberLo _ _ Tip = (False,Tip)
610 trimMemberLo lo cmphi t@(Bin _ x l r)
611 = case compare lo x of
612 LT -> case cmphi x of
613 GT -> (member lo t, t)
614 _ -> trimMemberLo lo cmphi l
615 GT -> trimMemberLo lo cmphi r
616 EQ -> (True,trim (compare lo) cmphi r)
617 -}
618
619 {--------------------------------------------------------------------
620 [filterGt x t] filter all values >[x] from tree [t]
621 [filterLt x t] filter all values <[x] from tree [t]
622 --------------------------------------------------------------------}
623 filterGt :: (a -> Ordering) -> Set a -> Set a
624 filterGt _ Tip = Tip
625 filterGt cmp (Bin _ x l r)
626 = case cmp x of
627 LT -> join x (filterGt cmp l) r
628 GT -> filterGt cmp r
629 EQ -> r
630
631 filterLt :: (a -> Ordering) -> Set a -> Set a
632 filterLt _ Tip = Tip
633 filterLt cmp (Bin _ x l r)
634 = case cmp x of
635 LT -> filterLt cmp l
636 GT -> join x l (filterLt cmp r)
637 EQ -> l
638
639
640 {--------------------------------------------------------------------
641 Split
642 --------------------------------------------------------------------}
643 -- | /O(log n)/. The expression (@'split' x set@) is a pair @(set1,set2)@
644 -- where all elements in @set1@ are lower than @x@ and all elements in
645 -- @set2@ larger than @x@. @x@ is not found in neither @set1@ nor @set2@.
646 split :: Ord a => a -> Set a -> (Set a,Set a)
647 split _ Tip = (Tip,Tip)
648 split x (Bin _ y l r)
649 = case compare x y of
650 LT -> let (lt,gt) = split x l in (lt,join y gt r)
651 GT -> let (lt,gt) = split x r in (join y l lt,gt)
652 EQ -> (l,r)
653
654 -- | /O(log n)/. Performs a 'split' but also returns whether the pivot
655 -- element was found in the original set.
656 splitMember :: Ord a => a -> Set a -> (Set a,Bool,Set a)
657 splitMember x t = let (l,m,r) = splitLookup x t in
658 (l,maybe False (const True) m,r)
659
660 -- | /O(log n)/. Performs a 'split' but also returns the pivot
661 -- element that was found in the original set.
662 splitLookup :: Ord a => a -> Set a -> (Set a,Maybe a,Set a)
663 splitLookup _ Tip = (Tip,Nothing,Tip)
664 splitLookup x (Bin _ y l r)
665 = case compare x y of
666 LT -> let (lt,found,gt) = splitLookup x l in (lt,found,join y gt r)
667 GT -> let (lt,found,gt) = splitLookup x r in (join y l lt,found,gt)
668 EQ -> (l,Just y,r)
669
670 {--------------------------------------------------------------------
671 Utility functions that maintain the balance properties of the tree.
672 All constructors assume that all values in [l] < [x] and all values
673 in [r] > [x], and that [l] and [r] are valid trees.
674
675 In order of sophistication:
676 [Bin sz x l r] The type constructor.
677 [bin x l r] Maintains the correct size, assumes that both [l]
678 and [r] are balanced with respect to each other.
679 [balance x l r] Restores the balance and size.
680 Assumes that the original tree was balanced and
681 that [l] or [r] has changed by at most one element.
682 [join x l r] Restores balance and size.
683
684 Furthermore, we can construct a new tree from two trees. Both operations
685 assume that all values in [l] < all values in [r] and that [l] and [r]
686 are valid:
687 [glue l r] Glues [l] and [r] together. Assumes that [l] and
688 [r] are already balanced with respect to each other.
689 [merge l r] Merges two trees and restores balance.
690
691 Note: in contrast to Adam's paper, we use (<=) comparisons instead
692 of (<) comparisons in [join], [merge] and [balance].
693 Quickcheck (on [difference]) showed that this was necessary in order
694 to maintain the invariants. It is quite unsatisfactory that I haven't
695 been able to find out why this is actually the case! Fortunately, it
696 doesn't hurt to be a bit more conservative.
697 --------------------------------------------------------------------}
698
699 {--------------------------------------------------------------------
700 Join
701 --------------------------------------------------------------------}
702 join :: a -> Set a -> Set a -> Set a
703 join x Tip r = insertMin x r
704 join x l Tip = insertMax x l
705 join x l@(Bin sizeL y ly ry) r@(Bin sizeR z lz rz)
706 | delta*sizeL <= sizeR = balance z (join x l lz) rz
707 | delta*sizeR <= sizeL = balance y ly (join x ry r)
708 | otherwise = bin x l r
709
710
711 -- insertMin and insertMax don't perform potentially expensive comparisons.
712 insertMax,insertMin :: a -> Set a -> Set a
713 insertMax x t
714 = case t of
715 Tip -> singleton x
716 Bin _ y l r
717 -> balance y l (insertMax x r)
718
719 insertMin x t
720 = case t of
721 Tip -> singleton x
722 Bin _ y l r
723 -> balance y (insertMin x l) r
724
725 {--------------------------------------------------------------------
726 [merge l r]: merges two trees.
727 --------------------------------------------------------------------}
728 merge :: Set a -> Set a -> Set a
729 merge Tip r = r
730 merge l Tip = l
731 merge l@(Bin sizeL x lx rx) r@(Bin sizeR y ly ry)
732 | delta*sizeL <= sizeR = balance y (merge l ly) ry
733 | delta*sizeR <= sizeL = balance x lx (merge rx r)
734 | otherwise = glue l r
735
736 {--------------------------------------------------------------------
737 [glue l r]: glues two trees together.
738 Assumes that [l] and [r] are already balanced with respect to each other.
739 --------------------------------------------------------------------}
740 glue :: Set a -> Set a -> Set a
741 glue Tip r = r
742 glue l Tip = l
743 glue l r
744 | size l > size r = let (m,l') = deleteFindMax l in balance m l' r
745 | otherwise = let (m,r') = deleteFindMin r in balance m l r'
746
747
748 -- | /O(log n)/. Delete and find the minimal element.
749 --
750 -- > deleteFindMin set = (findMin set, deleteMin set)
751
752 deleteFindMin :: Set a -> (a,Set a)
753 deleteFindMin t
754 = case t of
755 Bin _ x Tip r -> (x,r)
756 Bin _ x l r -> let (xm,l') = deleteFindMin l in (xm,balance x l' r)
757 Tip -> (error "Set.deleteFindMin: can not return the minimal element of an empty set", Tip)
758
759 -- | /O(log n)/. Delete and find the maximal element.
760 --
761 -- > deleteFindMax set = (findMax set, deleteMax set)
762 deleteFindMax :: Set a -> (a,Set a)
763 deleteFindMax t
764 = case t of
765 Bin _ x l Tip -> (x,l)
766 Bin _ x l r -> let (xm,r') = deleteFindMax r in (xm,balance x l r')
767 Tip -> (error "Set.deleteFindMax: can not return the maximal element of an empty set", Tip)
768
769 -- | /O(log n)/. Retrieves the minimal key of the set, and the set stripped from that element
770 -- @fail@s (in the monad) when passed an empty set.
771 minView :: Monad m => Set a -> m (a, Set a)
772 minView Tip = fail "Set.minView: empty set"
773 minView x = return (deleteFindMin x)
774
775 -- | /O(log n)/. Retrieves the maximal key of the set, and the set stripped from that element
776 -- @fail@s (in the monad) when passed an empty set.
777 maxView :: Monad m => Set a -> m (a, Set a)
778 maxView Tip = fail "Set.maxView: empty set"
779 maxView x = return (deleteFindMax x)
780
781
782 {--------------------------------------------------------------------
783 [balance x l r] balances two trees with value x.
784 The sizes of the trees should balance after decreasing the
785 size of one of them. (a rotation).
786
787 [delta] is the maximal relative difference between the sizes of
788 two trees, it corresponds with the [w] in Adams' paper,
789 or equivalently, [1/delta] corresponds with the $\alpha$
790 in Nievergelt's paper. Adams shows that [delta] should
791 be larger than 3.745 in order to garantee that the
792 rotations can always restore balance.
793
794 [ratio] is the ratio between an outer and inner sibling of the
795 heavier subtree in an unbalanced setting. It determines
796 whether a double or single rotation should be performed
797 to restore balance. It is correspondes with the inverse
798 of $\alpha$ in Adam's article.
799
800 Note that:
801 - [delta] should be larger than 4.646 with a [ratio] of 2.
802 - [delta] should be larger than 3.745 with a [ratio] of 1.534.
803
804 - A lower [delta] leads to a more 'perfectly' balanced tree.
805 - A higher [delta] performs less rebalancing.
806
807 - Balancing is automatic for random data and a balancing
808 scheme is only necessary to avoid pathological worst cases.
809 Almost any choice will do in practice
810
811 - Allthough it seems that a rather large [delta] may perform better
812 than smaller one, measurements have shown that the smallest [delta]
813 of 4 is actually the fastest on a wide range of operations. It
814 especially improves performance on worst-case scenarios like
815 a sequence of ordered insertions.
816
817 Note: in contrast to Adams' paper, we use a ratio of (at least) 2
818 to decide whether a single or double rotation is needed. Allthough
819 he actually proves that this ratio is needed to maintain the
820 invariants, his implementation uses a (invalid) ratio of 1.
821 He is aware of the problem though since he has put a comment in his
822 original source code that he doesn't care about generating a
823 slightly inbalanced tree since it doesn't seem to matter in practice.
824 However (since we use quickcheck :-) we will stick to strictly balanced
825 trees.
826 --------------------------------------------------------------------}
827 delta,ratio :: Int
828 delta = 4
829 ratio = 2
830
831 balance :: a -> Set a -> Set a -> Set a
832 balance x l r
833 | sizeL + sizeR <= 1 = Bin sizeX x l r
834 | sizeR >= delta*sizeL = rotateL x l r
835 | sizeL >= delta*sizeR = rotateR x l r
836 | otherwise = Bin sizeX x l r
837 where
838 sizeL = size l
839 sizeR = size r
840 sizeX = sizeL + sizeR + 1
841
842 -- rotate
843 rotateL :: a -> Set a -> Set a -> Set a
844 rotateL x l r@(Bin _ _ ly ry)
845 | size ly < ratio*size ry = singleL x l r
846 | otherwise = doubleL x l r
847 rotateL _ _ Tip = error "rotateL Tip"
848
849 rotateR :: a -> Set a -> Set a -> Set a
850 rotateR x l@(Bin _ _ ly ry) r
851 | size ry < ratio*size ly = singleR x l r
852 | otherwise = doubleR x l r
853 rotateR _ Tip _ = error "rotateL Tip"
854
855 -- basic rotations
856 singleL, singleR :: a -> Set a -> Set a -> Set a
857 singleL x1 t1 (Bin _ x2 t2 t3) = bin x2 (bin x1 t1 t2) t3
858 singleL _ _ Tip = error "singleL"
859 singleR x1 (Bin _ x2 t1 t2) t3 = bin x2 t1 (bin x1 t2 t3)
860 singleR _ Tip _ = error "singleR"
861
862 doubleL, doubleR :: a -> Set a -> Set a -> Set a
863 doubleL x1 t1 (Bin _ x2 (Bin _ x3 t2 t3) t4) = bin x3 (bin x1 t1 t2) (bin x2 t3 t4)
864 doubleL _ _ _ = error "doubleL"
865 doubleR x1 (Bin _ x2 t1 (Bin _ x3 t2 t3)) t4 = bin x3 (bin x2 t1 t2) (bin x1 t3 t4)
866 doubleR _ _ _ = error "doubleR"
867
868
869 {--------------------------------------------------------------------
870 The bin constructor maintains the size of the tree
871 --------------------------------------------------------------------}
872 bin :: a -> Set a -> Set a -> Set a
873 bin x l r
874 = Bin (size l + size r + 1) x l r
875
876
877 {--------------------------------------------------------------------
878 Utilities
879 --------------------------------------------------------------------}
880 foldlStrict :: (a -> b -> a) -> a -> [b] -> a
881 foldlStrict f z xs
882 = case xs of
883 [] -> z
884 (x:xx) -> let z' = f z x in seq z' (foldlStrict f z' xx)
885
886
887 {--------------------------------------------------------------------
888 Debugging
889 --------------------------------------------------------------------}
890 -- | /O(n)/. Show the tree that implements the set. The tree is shown
891 -- in a compressed, hanging format.
892 showTree :: Show a => Set a -> String
893 showTree s
894 = showTreeWith True False s
895
896
897 {- | /O(n)/. The expression (@showTreeWith hang wide map@) shows
898 the tree that implements the set. If @hang@ is
899 @True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If
900 @wide@ is 'True', an extra wide version is shown.
901
902 > Set> putStrLn $ showTreeWith True False $ fromDistinctAscList [1..5]
903 > 4
904 > +--2
905 > | +--1
906 > | +--3
907 > +--5
908 >
909 > Set> putStrLn $ showTreeWith True True $ fromDistinctAscList [1..5]
910 > 4
911 > |
912 > +--2
913 > | |
914 > | +--1
915 > | |
916 > | +--3
917 > |
918 > +--5
919 >
920 > Set> putStrLn $ showTreeWith False True $ fromDistinctAscList [1..5]
921 > +--5
922 > |
923 > 4
924 > |
925 > | +--3
926 > | |
927 > +--2
928 > |
929 > +--1
930
931 -}
932 showTreeWith :: Show a => Bool -> Bool -> Set a -> String
933 showTreeWith hang wide t
934 | hang = (showsTreeHang wide [] t) ""
935 | otherwise = (showsTree wide [] [] t) ""
936
937 showsTree :: Show a => Bool -> [String] -> [String] -> Set a -> ShowS
938 showsTree wide lbars rbars t
939 = case t of
940 Tip -> showsBars lbars . showString "|\n"
941 Bin _ x Tip Tip
942 -> showsBars lbars . shows x . showString "\n"
943 Bin _ x l r
944 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
945 showWide wide rbars .
946 showsBars lbars . shows x . showString "\n" .
947 showWide wide lbars .
948 showsTree wide (withEmpty lbars) (withBar lbars) l
949
950 showsTreeHang :: Show a => Bool -> [String] -> Set a -> ShowS
951 showsTreeHang wide bars t
952 = case t of
953 Tip -> showsBars bars . showString "|\n"
954 Bin _ x Tip Tip
955 -> showsBars bars . shows x . showString "\n"
956 Bin _ x l r
957 -> showsBars bars . shows x . showString "\n" .
958 showWide wide bars .
959 showsTreeHang wide (withBar bars) l .
960 showWide wide bars .
961 showsTreeHang wide (withEmpty bars) r
962
963 showWide :: Bool -> [String] -> String -> String
964 showWide wide bars
965 | wide = showString (concat (reverse bars)) . showString "|\n"
966 | otherwise = id
967
968 showsBars :: [String] -> ShowS
969 showsBars bars
970 = case bars of
971 [] -> id
972 _ -> showString (concat (reverse (tail bars))) . showString node
973
974 node :: String
975 node = "+--"
976
977 withBar, withEmpty :: [String] -> [String]
978 withBar bars = "| ":bars
979 withEmpty bars = " ":bars
980
981 {--------------------------------------------------------------------
982 Assertions
983 --------------------------------------------------------------------}
984 -- | /O(n)/. Test if the internal set structure is valid.
985 valid :: Ord a => Set a -> Bool
986 valid t
987 = balanced t && ordered t && validsize t
988
989 ordered :: Ord a => Set a -> Bool
990 ordered t
991 = bounded (const True) (const True) t
992 where
993 bounded lo hi t'
994 = case t' of
995 Tip -> True
996 Bin _ x l r -> (lo x) && (hi x) && bounded lo (<x) l && bounded (>x) hi r
997
998 balanced :: Set a -> Bool
999 balanced t
1000 = case t of
1001 Tip -> True
1002 Bin _ _ l r -> (size l + size r <= 1 || (size l <= delta*size r && size r <= delta*size l)) &&
1003 balanced l && balanced r
1004
1005 validsize :: Set a -> Bool
1006 validsize t
1007 = (realsize t == Just (size t))
1008 where
1009 realsize t'
1010 = case t' of
1011 Tip -> Just 0
1012 Bin sz _ l r -> case (realsize l,realsize r) of
1013 (Just n,Just m) | n+m+1 == sz -> Just sz
1014 _ -> Nothing
1015
1016 {-
1017 {--------------------------------------------------------------------
1018 Testing
1019 --------------------------------------------------------------------}
1020 testTree :: [Int] -> Set Int
1021 testTree xs = fromList xs
1022 test1 = testTree [1..20]
1023 test2 = testTree [30,29..10]
1024 test3 = testTree [1,4,6,89,2323,53,43,234,5,79,12,9,24,9,8,423,8,42,4,8,9,3]
1025
1026 {--------------------------------------------------------------------
1027 QuickCheck
1028 --------------------------------------------------------------------}
1029 qcheck prop
1030 = check config prop
1031 where
1032 config = Config
1033 { configMaxTest = 500
1034 , configMaxFail = 5000
1035 , configSize = \n -> (div n 2 + 3)
1036 , configEvery = \n args -> let s = show n in s ++ [ '\b' | _ <- s ]
1037 }
1038
1039
1040 {--------------------------------------------------------------------
1041 Arbitrary, reasonably balanced trees
1042 --------------------------------------------------------------------}
1043 instance (Enum a) => Arbitrary (Set a) where
1044 arbitrary = sized (arbtree 0 maxkey)
1045 where maxkey = 10000
1046
1047 arbtree :: (Enum a) => Int -> Int -> Int -> Gen (Set a)
1048 arbtree lo hi n
1049 | n <= 0 = return Tip
1050 | lo >= hi = return Tip
1051 | otherwise = do{ i <- choose (lo,hi)
1052 ; m <- choose (1,30)
1053 ; let (ml,mr) | m==(1::Int)= (1,2)
1054 | m==2 = (2,1)
1055 | m==3 = (1,1)
1056 | otherwise = (2,2)
1057 ; l <- arbtree lo (i-1) (n `div` ml)
1058 ; r <- arbtree (i+1) hi (n `div` mr)
1059 ; return (bin (toEnum i) l r)
1060 }
1061
1062
1063 {--------------------------------------------------------------------
1064 Valid tree's
1065 --------------------------------------------------------------------}
1066 forValid :: (Enum a,Show a,Testable b) => (Set a -> b) -> Property
1067 forValid f
1068 = forAll arbitrary $ \t ->
1069 -- classify (balanced t) "balanced" $
1070 classify (size t == 0) "empty" $
1071 classify (size t > 0 && size t <= 10) "small" $
1072 classify (size t > 10 && size t <= 64) "medium" $
1073 classify (size t > 64) "large" $
1074 balanced t ==> f t
1075
1076 forValidIntTree :: Testable a => (Set Int -> a) -> Property
1077 forValidIntTree f
1078 = forValid f
1079
1080 forValidUnitTree :: Testable a => (Set Int -> a) -> Property
1081 forValidUnitTree f
1082 = forValid f
1083
1084
1085 prop_Valid
1086 = forValidUnitTree $ \t -> valid t
1087
1088 {--------------------------------------------------------------------
1089 Single, Insert, Delete
1090 --------------------------------------------------------------------}
1091 prop_Single :: Int -> Bool
1092 prop_Single x
1093 = (insert x empty == singleton x)
1094
1095 prop_InsertValid :: Int -> Property
1096 prop_InsertValid k
1097 = forValidUnitTree $ \t -> valid (insert k t)
1098
1099 prop_InsertDelete :: Int -> Set Int -> Property
1100 prop_InsertDelete k t
1101 = not (member k t) ==> delete k (insert k t) == t
1102
1103 prop_DeleteValid :: Int -> Property
1104 prop_DeleteValid k
1105 = forValidUnitTree $ \t ->
1106 valid (delete k (insert k t))
1107
1108 {--------------------------------------------------------------------
1109 Balance
1110 --------------------------------------------------------------------}
1111 prop_Join :: Int -> Property
1112 prop_Join x
1113 = forValidUnitTree $ \t ->
1114 let (l,r) = split x t
1115 in valid (join x l r)
1116
1117 prop_Merge :: Int -> Property
1118 prop_Merge x
1119 = forValidUnitTree $ \t ->
1120 let (l,r) = split x t
1121 in valid (merge l r)
1122
1123
1124 {--------------------------------------------------------------------
1125 Union
1126 --------------------------------------------------------------------}
1127 prop_UnionValid :: Property
1128 prop_UnionValid
1129 = forValidUnitTree $ \t1 ->
1130 forValidUnitTree $ \t2 ->
1131 valid (union t1 t2)
1132
1133 prop_UnionInsert :: Int -> Set Int -> Bool
1134 prop_UnionInsert x t
1135 = union t (singleton x) == insert x t
1136
1137 prop_UnionAssoc :: Set Int -> Set Int -> Set Int -> Bool
1138 prop_UnionAssoc t1 t2 t3
1139 = union t1 (union t2 t3) == union (union t1 t2) t3
1140
1141 prop_UnionComm :: Set Int -> Set Int -> Bool
1142 prop_UnionComm t1 t2
1143 = (union t1 t2 == union t2 t1)
1144
1145
1146 prop_DiffValid
1147 = forValidUnitTree $ \t1 ->
1148 forValidUnitTree $ \t2 ->
1149 valid (difference t1 t2)
1150
1151 prop_Diff :: [Int] -> [Int] -> Bool
1152 prop_Diff xs ys
1153 = toAscList (difference (fromList xs) (fromList ys))
1154 == List.sort ((List.\\) (nub xs) (nub ys))
1155
1156 prop_IntValid
1157 = forValidUnitTree $ \t1 ->
1158 forValidUnitTree $ \t2 ->
1159 valid (intersection t1 t2)
1160
1161 prop_Int :: [Int] -> [Int] -> Bool
1162 prop_Int xs ys
1163 = toAscList (intersection (fromList xs) (fromList ys))
1164 == List.sort (nub ((List.intersect) (xs) (ys)))
1165
1166 {--------------------------------------------------------------------
1167 Lists
1168 --------------------------------------------------------------------}
1169 prop_Ordered
1170 = forAll (choose (5,100)) $ \n ->
1171 let xs = [0..n::Int]
1172 in fromAscList xs == fromList xs
1173
1174 prop_List :: [Int] -> Bool
1175 prop_List xs
1176 = (sort (nub xs) == toList (fromList xs))
1177 -}