Fix #2760: change mkNorepType to mkNoRepType
[packages/containers.git] / Data / Set.hs
1 -----------------------------------------------------------------------------
2 -- |
3 -- Module : Data.Set
4 -- Copyright : (c) Daan Leijen 2002
5 -- License : BSD-style
6 -- Maintainer : libraries@haskell.org
7 -- Stability : provisional
8 -- Portability : portable
9 --
10 -- An efficient implementation of sets.
11 --
12 -- Since many function names (but not the type name) clash with
13 -- "Prelude" names, this module is usually imported @qualified@, e.g.
14 --
15 -- > import Data.Set (Set)
16 -- > import qualified Data.Set as Set
17 --
18 -- The implementation of 'Set' is based on /size balanced/ binary trees (or
19 -- trees of /bounded balance/) as described by:
20 --
21 -- * Stephen Adams, \"/Efficient sets: a balancing act/\",
22 -- Journal of Functional Programming 3(4):553-562, October 1993,
23 -- <http://www.swiss.ai.mit.edu/~adams/BB/>.
24 --
25 -- * J. Nievergelt and E.M. Reingold,
26 -- \"/Binary search trees of bounded balance/\",
27 -- SIAM journal of computing 2(1), March 1973.
28 --
29 -- Note that the implementation is /left-biased/ -- the elements of a
30 -- first argument are always preferred to the second, for example in
31 -- 'union' or 'insert'. Of course, left-biasing can only be observed
32 -- when equality is an equivalence relation instead of structural
33 -- equality.
34 -----------------------------------------------------------------------------
35
36 module Data.Set (
37 -- * Set type
38 Set -- instance Eq,Ord,Show,Read,Data,Typeable
39
40 -- * Operators
41 , (\\)
42
43 -- * Query
44 , null
45 , size
46 , member
47 , notMember
48 , isSubsetOf
49 , isProperSubsetOf
50
51 -- * Construction
52 , empty
53 , singleton
54 , insert
55 , delete
56
57 -- * Combine
58 , union, unions
59 , difference
60 , intersection
61
62 -- * Filter
63 , filter
64 , partition
65 , split
66 , splitMember
67
68 -- * Map
69 , map
70 , mapMonotonic
71
72 -- * Fold
73 , fold
74
75 -- * Min\/Max
76 , findMin
77 , findMax
78 , deleteMin
79 , deleteMax
80 , deleteFindMin
81 , deleteFindMax
82 , maxView
83 , minView
84
85 -- * Conversion
86
87 -- ** List
88 , elems
89 , toList
90 , fromList
91
92 -- ** Ordered list
93 , toAscList
94 , fromAscList
95 , fromDistinctAscList
96
97 -- * Debugging
98 , showTree
99 , showTreeWith
100 , valid
101 ) where
102
103 import Prelude hiding (filter,foldr,null,map)
104 import qualified Data.List as List
105 import Data.Monoid (Monoid(..))
106 import Data.Foldable (Foldable(foldMap))
107 #ifndef __GLASGOW_HASKELL__
108 import Data.Typeable (Typeable, typeOf, typeOfDefault)
109 #endif
110 import Data.Typeable (Typeable1(..), TyCon, mkTyCon, mkTyConApp)
111
112 {-
113 -- just for testing
114 import QuickCheck
115 import List (nub,sort)
116 import qualified List
117 -}
118
119 #if __GLASGOW_HASKELL__
120 import Text.Read
121 import Data.Data (Data(..), mkNoRepType, gcast1)
122 #endif
123
124 {--------------------------------------------------------------------
125 Operators
126 --------------------------------------------------------------------}
127 infixl 9 \\ --
128
129 -- | /O(n+m)/. See 'difference'.
130 (\\) :: Ord a => Set a -> Set a -> Set a
131 m1 \\ m2 = difference m1 m2
132
133 {--------------------------------------------------------------------
134 Sets are size balanced trees
135 --------------------------------------------------------------------}
136 -- | A set of values @a@.
137 data Set a = Tip
138 | Bin {-# UNPACK #-} !Size a !(Set a) !(Set a)
139
140 type Size = Int
141
142 instance Ord a => Monoid (Set a) where
143 mempty = empty
144 mappend = union
145 mconcat = unions
146
147 instance Foldable Set where
148 foldMap _ Tip = mempty
149 foldMap f (Bin _s k l r) = foldMap f l `mappend` f k `mappend` foldMap f r
150
151 #if __GLASGOW_HASKELL__
152
153 {--------------------------------------------------------------------
154 A Data instance
155 --------------------------------------------------------------------}
156
157 -- This instance preserves data abstraction at the cost of inefficiency.
158 -- We omit reflection services for the sake of data abstraction.
159
160 instance (Data a, Ord a) => Data (Set a) where
161 gfoldl f z set = z fromList `f` (toList set)
162 toConstr _ = error "toConstr"
163 gunfold _ _ = error "gunfold"
164 dataTypeOf _ = mkNoRepType "Data.Set.Set"
165 dataCast1 f = gcast1 f
166
167 #endif
168
169 {--------------------------------------------------------------------
170 Query
171 --------------------------------------------------------------------}
172 -- | /O(1)/. Is this the empty set?
173 null :: Set a -> Bool
174 null t
175 = case t of
176 Tip -> True
177 Bin {} -> False
178
179 -- | /O(1)/. The number of elements in the set.
180 size :: Set a -> Int
181 size t
182 = case t of
183 Tip -> 0
184 Bin sz _ _ _ -> sz
185
186 -- | /O(log n)/. Is the element in the set?
187 member :: Ord a => a -> Set a -> Bool
188 member x t
189 = case t of
190 Tip -> False
191 Bin _ y l r
192 -> case compare x y of
193 LT -> member x l
194 GT -> member x r
195 EQ -> True
196
197 -- | /O(log n)/. Is the element not in the set?
198 notMember :: Ord a => a -> Set a -> Bool
199 notMember x t = not $ member x t
200
201 {--------------------------------------------------------------------
202 Construction
203 --------------------------------------------------------------------}
204 -- | /O(1)/. The empty set.
205 empty :: Set a
206 empty
207 = Tip
208
209 -- | /O(1)/. Create a singleton set.
210 singleton :: a -> Set a
211 singleton x
212 = Bin 1 x Tip Tip
213
214 {--------------------------------------------------------------------
215 Insertion, Deletion
216 --------------------------------------------------------------------}
217 -- | /O(log n)/. Insert an element in a set.
218 -- If the set already contains an element equal to the given value,
219 -- it is replaced with the new value.
220 insert :: Ord a => a -> Set a -> Set a
221 insert x t
222 = case t of
223 Tip -> singleton x
224 Bin sz y l r
225 -> case compare x y of
226 LT -> balance y (insert x l) r
227 GT -> balance y l (insert x r)
228 EQ -> Bin sz x l r
229
230
231 -- | /O(log n)/. Delete an element from a set.
232 delete :: Ord a => a -> Set a -> Set a
233 delete x t
234 = case t of
235 Tip -> Tip
236 Bin _ y l r
237 -> case compare x y of
238 LT -> balance y (delete x l) r
239 GT -> balance y l (delete x r)
240 EQ -> glue l r
241
242 {--------------------------------------------------------------------
243 Subset
244 --------------------------------------------------------------------}
245 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
246 isProperSubsetOf :: Ord a => Set a -> Set a -> Bool
247 isProperSubsetOf s1 s2
248 = (size s1 < size s2) && (isSubsetOf s1 s2)
249
250
251 -- | /O(n+m)/. Is this a subset?
252 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
253 isSubsetOf :: Ord a => Set a -> Set a -> Bool
254 isSubsetOf t1 t2
255 = (size t1 <= size t2) && (isSubsetOfX t1 t2)
256
257 isSubsetOfX :: Ord a => Set a -> Set a -> Bool
258 isSubsetOfX Tip _ = True
259 isSubsetOfX _ Tip = False
260 isSubsetOfX (Bin _ x l r) t
261 = found && isSubsetOfX l lt && isSubsetOfX r gt
262 where
263 (lt,found,gt) = splitMember x t
264
265
266 {--------------------------------------------------------------------
267 Minimal, Maximal
268 --------------------------------------------------------------------}
269 -- | /O(log n)/. The minimal element of a set.
270 findMin :: Set a -> a
271 findMin (Bin _ x Tip _) = x
272 findMin (Bin _ _ l _) = findMin l
273 findMin Tip = error "Set.findMin: empty set has no minimal element"
274
275 -- | /O(log n)/. The maximal element of a set.
276 findMax :: Set a -> a
277 findMax (Bin _ x _ Tip) = x
278 findMax (Bin _ _ _ r) = findMax r
279 findMax Tip = error "Set.findMax: empty set has no maximal element"
280
281 -- | /O(log n)/. Delete the minimal element.
282 deleteMin :: Set a -> Set a
283 deleteMin (Bin _ _ Tip r) = r
284 deleteMin (Bin _ x l r) = balance x (deleteMin l) r
285 deleteMin Tip = Tip
286
287 -- | /O(log n)/. Delete the maximal element.
288 deleteMax :: Set a -> Set a
289 deleteMax (Bin _ _ l Tip) = l
290 deleteMax (Bin _ x l r) = balance x l (deleteMax r)
291 deleteMax Tip = Tip
292
293
294 {--------------------------------------------------------------------
295 Union.
296 --------------------------------------------------------------------}
297 -- | The union of a list of sets: (@'unions' == 'foldl' 'union' 'empty'@).
298 unions :: Ord a => [Set a] -> Set a
299 unions ts
300 = foldlStrict union empty ts
301
302
303 -- | /O(n+m)/. The union of two sets, preferring the first set when
304 -- equal elements are encountered.
305 -- The implementation uses the efficient /hedge-union/ algorithm.
306 -- Hedge-union is more efficient on (bigset `union` smallset).
307 union :: Ord a => Set a -> Set a -> Set a
308 union Tip t2 = t2
309 union t1 Tip = t1
310 union t1 t2 = hedgeUnion (const LT) (const GT) t1 t2
311
312 hedgeUnion :: Ord a
313 => (a -> Ordering) -> (a -> Ordering) -> Set a -> Set a -> Set a
314 hedgeUnion _ _ t1 Tip
315 = t1
316 hedgeUnion cmplo cmphi Tip (Bin _ x l r)
317 = join x (filterGt cmplo l) (filterLt cmphi r)
318 hedgeUnion cmplo cmphi (Bin _ x l r) t2
319 = join x (hedgeUnion cmplo cmpx l (trim cmplo cmpx t2))
320 (hedgeUnion cmpx cmphi r (trim cmpx cmphi t2))
321 where
322 cmpx y = compare x y
323
324 {--------------------------------------------------------------------
325 Difference
326 --------------------------------------------------------------------}
327 -- | /O(n+m)/. Difference of two sets.
328 -- The implementation uses an efficient /hedge/ algorithm comparable with /hedge-union/.
329 difference :: Ord a => Set a -> Set a -> Set a
330 difference Tip _ = Tip
331 difference t1 Tip = t1
332 difference t1 t2 = hedgeDiff (const LT) (const GT) t1 t2
333
334 hedgeDiff :: Ord a
335 => (a -> Ordering) -> (a -> Ordering) -> Set a -> Set a -> Set a
336 hedgeDiff _ _ Tip _
337 = Tip
338 hedgeDiff cmplo cmphi (Bin _ x l r) Tip
339 = join x (filterGt cmplo l) (filterLt cmphi r)
340 hedgeDiff cmplo cmphi t (Bin _ x l r)
341 = merge (hedgeDiff cmplo cmpx (trim cmplo cmpx t) l)
342 (hedgeDiff cmpx cmphi (trim cmpx cmphi t) r)
343 where
344 cmpx y = compare x y
345
346 {--------------------------------------------------------------------
347 Intersection
348 --------------------------------------------------------------------}
349 -- | /O(n+m)/. The intersection of two sets.
350 -- Elements of the result come from the first set, so for example
351 --
352 -- > import qualified Data.Set as S
353 -- > data AB = A | B deriving Show
354 -- > instance Ord AB where compare _ _ = EQ
355 -- > instance Eq AB where _ == _ = True
356 -- > main = print (S.singleton A `S.intersection` S.singleton B,
357 -- > S.singleton B `S.intersection` S.singleton A)
358 --
359 -- prints @(fromList [A],fromList [B])@.
360 intersection :: Ord a => Set a -> Set a -> Set a
361 intersection Tip _ = Tip
362 intersection _ Tip = Tip
363 intersection t1@(Bin s1 x1 l1 r1) t2@(Bin s2 x2 l2 r2) =
364 if s1 >= s2 then
365 let (lt,found,gt) = splitLookup x2 t1
366 tl = intersection lt l2
367 tr = intersection gt r2
368 in case found of
369 Just x -> join x tl tr
370 Nothing -> merge tl tr
371 else let (lt,found,gt) = splitMember x1 t2
372 tl = intersection l1 lt
373 tr = intersection r1 gt
374 in if found then join x1 tl tr
375 else merge tl tr
376
377 {--------------------------------------------------------------------
378 Filter and partition
379 --------------------------------------------------------------------}
380 -- | /O(n)/. Filter all elements that satisfy the predicate.
381 filter :: Ord a => (a -> Bool) -> Set a -> Set a
382 filter _ Tip = Tip
383 filter p (Bin _ x l r)
384 | p x = join x (filter p l) (filter p r)
385 | otherwise = merge (filter p l) (filter p r)
386
387 -- | /O(n)/. Partition the set into two sets, one with all elements that satisfy
388 -- the predicate and one with all elements that don't satisfy the predicate.
389 -- See also 'split'.
390 partition :: Ord a => (a -> Bool) -> Set a -> (Set a,Set a)
391 partition _ Tip = (Tip,Tip)
392 partition p (Bin _ x l r)
393 | p x = (join x l1 r1,merge l2 r2)
394 | otherwise = (merge l1 r1,join x l2 r2)
395 where
396 (l1,l2) = partition p l
397 (r1,r2) = partition p r
398
399 {----------------------------------------------------------------------
400 Map
401 ----------------------------------------------------------------------}
402
403 -- | /O(n*log n)/.
404 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
405 --
406 -- It's worth noting that the size of the result may be smaller if,
407 -- for some @(x,y)@, @x \/= y && f x == f y@
408
409 map :: (Ord a, Ord b) => (a->b) -> Set a -> Set b
410 map f = fromList . List.map f . toList
411
412 -- | /O(n)/. The
413 --
414 -- @'mapMonotonic' f s == 'map' f s@, but works only when @f@ is monotonic.
415 -- /The precondition is not checked./
416 -- Semi-formally, we have:
417 --
418 -- > and [x < y ==> f x < f y | x <- ls, y <- ls]
419 -- > ==> mapMonotonic f s == map f s
420 -- > where ls = toList s
421
422 mapMonotonic :: (a->b) -> Set a -> Set b
423 mapMonotonic _ Tip = Tip
424 mapMonotonic f (Bin sz x l r) =
425 Bin sz (f x) (mapMonotonic f l) (mapMonotonic f r)
426
427
428 {--------------------------------------------------------------------
429 Fold
430 --------------------------------------------------------------------}
431 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
432 fold :: (a -> b -> b) -> b -> Set a -> b
433 fold f z s
434 = foldr f z s
435
436 -- | /O(n)/. Post-order fold.
437 foldr :: (a -> b -> b) -> b -> Set a -> b
438 foldr _ z Tip = z
439 foldr f z (Bin _ x l r) = foldr f (f x (foldr f z r)) l
440
441 {--------------------------------------------------------------------
442 List variations
443 --------------------------------------------------------------------}
444 -- | /O(n)/. The elements of a set.
445 elems :: Set a -> [a]
446 elems s
447 = toList s
448
449 {--------------------------------------------------------------------
450 Lists
451 --------------------------------------------------------------------}
452 -- | /O(n)/. Convert the set to a list of elements.
453 toList :: Set a -> [a]
454 toList s
455 = toAscList s
456
457 -- | /O(n)/. Convert the set to an ascending list of elements.
458 toAscList :: Set a -> [a]
459 toAscList t
460 = foldr (:) [] t
461
462
463 -- | /O(n*log n)/. Create a set from a list of elements.
464 fromList :: Ord a => [a] -> Set a
465 fromList xs
466 = foldlStrict ins empty xs
467 where
468 ins t x = insert x t
469
470 {--------------------------------------------------------------------
471 Building trees from ascending/descending lists can be done in linear time.
472
473 Note that if [xs] is ascending that:
474 fromAscList xs == fromList xs
475 --------------------------------------------------------------------}
476 -- | /O(n)/. Build a set from an ascending list in linear time.
477 -- /The precondition (input list is ascending) is not checked./
478 fromAscList :: Eq a => [a] -> Set a
479 fromAscList xs
480 = fromDistinctAscList (combineEq xs)
481 where
482 -- [combineEq xs] combines equal elements with [const] in an ordered list [xs]
483 combineEq xs'
484 = case xs' of
485 [] -> []
486 [x] -> [x]
487 (x:xx) -> combineEq' x xx
488
489 combineEq' z [] = [z]
490 combineEq' z (x:xs')
491 | z==x = combineEq' z xs'
492 | otherwise = z:combineEq' x xs'
493
494
495 -- | /O(n)/. Build a set from an ascending list of distinct elements in linear time.
496 -- /The precondition (input list is strictly ascending) is not checked./
497 fromDistinctAscList :: [a] -> Set a
498 fromDistinctAscList xs
499 = build const (length xs) xs
500 where
501 -- 1) use continutations so that we use heap space instead of stack space.
502 -- 2) special case for n==5 to build bushier trees.
503 build c 0 xs' = c Tip xs'
504 build c 5 xs' = case xs' of
505 (x1:x2:x3:x4:x5:xx)
506 -> c (bin x4 (bin x2 (singleton x1) (singleton x3)) (singleton x5)) xx
507 _ -> error "fromDistinctAscList build 5"
508 build c n xs' = seq nr $ build (buildR nr c) nl xs'
509 where
510 nl = n `div` 2
511 nr = n - nl - 1
512
513 buildR n c l (x:ys) = build (buildB l x c) n ys
514 buildR _ _ _ [] = error "fromDistinctAscList buildR []"
515 buildB l x c r zs = c (bin x l r) zs
516
517 {--------------------------------------------------------------------
518 Eq converts the set to a list. In a lazy setting, this
519 actually seems one of the faster methods to compare two trees
520 and it is certainly the simplest :-)
521 --------------------------------------------------------------------}
522 instance Eq a => Eq (Set a) where
523 t1 == t2 = (size t1 == size t2) && (toAscList t1 == toAscList t2)
524
525 {--------------------------------------------------------------------
526 Ord
527 --------------------------------------------------------------------}
528
529 instance Ord a => Ord (Set a) where
530 compare s1 s2 = compare (toAscList s1) (toAscList s2)
531
532 {--------------------------------------------------------------------
533 Show
534 --------------------------------------------------------------------}
535 instance Show a => Show (Set a) where
536 showsPrec p xs = showParen (p > 10) $
537 showString "fromList " . shows (toList xs)
538
539 {-
540 XXX unused code
541
542 showSet :: (Show a) => [a] -> ShowS
543 showSet []
544 = showString "{}"
545 showSet (x:xs)
546 = showChar '{' . shows x . showTail xs
547 where
548 showTail [] = showChar '}'
549 showTail (x':xs') = showChar ',' . shows x' . showTail xs'
550 -}
551
552 {--------------------------------------------------------------------
553 Read
554 --------------------------------------------------------------------}
555 instance (Read a, Ord a) => Read (Set a) where
556 #ifdef __GLASGOW_HASKELL__
557 readPrec = parens $ prec 10 $ do
558 Ident "fromList" <- lexP
559 xs <- readPrec
560 return (fromList xs)
561
562 readListPrec = readListPrecDefault
563 #else
564 readsPrec p = readParen (p > 10) $ \ r -> do
565 ("fromList",s) <- lex r
566 (xs,t) <- reads s
567 return (fromList xs,t)
568 #endif
569
570 {--------------------------------------------------------------------
571 Typeable/Data
572 --------------------------------------------------------------------}
573
574 #include "Typeable.h"
575 INSTANCE_TYPEABLE1(Set,setTc,"Set")
576
577 {--------------------------------------------------------------------
578 Utility functions that return sub-ranges of the original
579 tree. Some functions take a comparison function as argument to
580 allow comparisons against infinite values. A function [cmplo x]
581 should be read as [compare lo x].
582
583 [trim cmplo cmphi t] A tree that is either empty or where [cmplo x == LT]
584 and [cmphi x == GT] for the value [x] of the root.
585 [filterGt cmp t] A tree where for all values [k]. [cmp k == LT]
586 [filterLt cmp t] A tree where for all values [k]. [cmp k == GT]
587
588 [split k t] Returns two trees [l] and [r] where all values
589 in [l] are <[k] and all keys in [r] are >[k].
590 [splitMember k t] Just like [split] but also returns whether [k]
591 was found in the tree.
592 --------------------------------------------------------------------}
593
594 {--------------------------------------------------------------------
595 [trim lo hi t] trims away all subtrees that surely contain no
596 values between the range [lo] to [hi]. The returned tree is either
597 empty or the key of the root is between @lo@ and @hi@.
598 --------------------------------------------------------------------}
599 trim :: (a -> Ordering) -> (a -> Ordering) -> Set a -> Set a
600 trim _ _ Tip = Tip
601 trim cmplo cmphi t@(Bin _ x l r)
602 = case cmplo x of
603 LT -> case cmphi x of
604 GT -> t
605 _ -> trim cmplo cmphi l
606 _ -> trim cmplo cmphi r
607
608 {-
609 XXX unused code
610
611 trimMemberLo :: Ord a => a -> (a -> Ordering) -> Set a -> (Bool, Set a)
612 trimMemberLo _ _ Tip = (False,Tip)
613 trimMemberLo lo cmphi t@(Bin _ x l r)
614 = case compare lo x of
615 LT -> case cmphi x of
616 GT -> (member lo t, t)
617 _ -> trimMemberLo lo cmphi l
618 GT -> trimMemberLo lo cmphi r
619 EQ -> (True,trim (compare lo) cmphi r)
620 -}
621
622 {--------------------------------------------------------------------
623 [filterGt x t] filter all values >[x] from tree [t]
624 [filterLt x t] filter all values <[x] from tree [t]
625 --------------------------------------------------------------------}
626 filterGt :: (a -> Ordering) -> Set a -> Set a
627 filterGt _ Tip = Tip
628 filterGt cmp (Bin _ x l r)
629 = case cmp x of
630 LT -> join x (filterGt cmp l) r
631 GT -> filterGt cmp r
632 EQ -> r
633
634 filterLt :: (a -> Ordering) -> Set a -> Set a
635 filterLt _ Tip = Tip
636 filterLt cmp (Bin _ x l r)
637 = case cmp x of
638 LT -> filterLt cmp l
639 GT -> join x l (filterLt cmp r)
640 EQ -> l
641
642
643 {--------------------------------------------------------------------
644 Split
645 --------------------------------------------------------------------}
646 -- | /O(log n)/. The expression (@'split' x set@) is a pair @(set1,set2)@
647 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
648 -- comprises the elements of @set@ greater than @x@.
649 split :: Ord a => a -> Set a -> (Set a,Set a)
650 split _ Tip = (Tip,Tip)
651 split x (Bin _ y l r)
652 = case compare x y of
653 LT -> let (lt,gt) = split x l in (lt,join y gt r)
654 GT -> let (lt,gt) = split x r in (join y l lt,gt)
655 EQ -> (l,r)
656
657 -- | /O(log n)/. Performs a 'split' but also returns whether the pivot
658 -- element was found in the original set.
659 splitMember :: Ord a => a -> Set a -> (Set a,Bool,Set a)
660 splitMember x t = let (l,m,r) = splitLookup x t in
661 (l,maybe False (const True) m,r)
662
663 -- | /O(log n)/. Performs a 'split' but also returns the pivot
664 -- element that was found in the original set.
665 splitLookup :: Ord a => a -> Set a -> (Set a,Maybe a,Set a)
666 splitLookup _ Tip = (Tip,Nothing,Tip)
667 splitLookup x (Bin _ y l r)
668 = case compare x y of
669 LT -> let (lt,found,gt) = splitLookup x l in (lt,found,join y gt r)
670 GT -> let (lt,found,gt) = splitLookup x r in (join y l lt,found,gt)
671 EQ -> (l,Just y,r)
672
673 {--------------------------------------------------------------------
674 Utility functions that maintain the balance properties of the tree.
675 All constructors assume that all values in [l] < [x] and all values
676 in [r] > [x], and that [l] and [r] are valid trees.
677
678 In order of sophistication:
679 [Bin sz x l r] The type constructor.
680 [bin x l r] Maintains the correct size, assumes that both [l]
681 and [r] are balanced with respect to each other.
682 [balance x l r] Restores the balance and size.
683 Assumes that the original tree was balanced and
684 that [l] or [r] has changed by at most one element.
685 [join x l r] Restores balance and size.
686
687 Furthermore, we can construct a new tree from two trees. Both operations
688 assume that all values in [l] < all values in [r] and that [l] and [r]
689 are valid:
690 [glue l r] Glues [l] and [r] together. Assumes that [l] and
691 [r] are already balanced with respect to each other.
692 [merge l r] Merges two trees and restores balance.
693
694 Note: in contrast to Adam's paper, we use (<=) comparisons instead
695 of (<) comparisons in [join], [merge] and [balance].
696 Quickcheck (on [difference]) showed that this was necessary in order
697 to maintain the invariants. It is quite unsatisfactory that I haven't
698 been able to find out why this is actually the case! Fortunately, it
699 doesn't hurt to be a bit more conservative.
700 --------------------------------------------------------------------}
701
702 {--------------------------------------------------------------------
703 Join
704 --------------------------------------------------------------------}
705 join :: a -> Set a -> Set a -> Set a
706 join x Tip r = insertMin x r
707 join x l Tip = insertMax x l
708 join x l@(Bin sizeL y ly ry) r@(Bin sizeR z lz rz)
709 | delta*sizeL <= sizeR = balance z (join x l lz) rz
710 | delta*sizeR <= sizeL = balance y ly (join x ry r)
711 | otherwise = bin x l r
712
713
714 -- insertMin and insertMax don't perform potentially expensive comparisons.
715 insertMax,insertMin :: a -> Set a -> Set a
716 insertMax x t
717 = case t of
718 Tip -> singleton x
719 Bin _ y l r
720 -> balance y l (insertMax x r)
721
722 insertMin x t
723 = case t of
724 Tip -> singleton x
725 Bin _ y l r
726 -> balance y (insertMin x l) r
727
728 {--------------------------------------------------------------------
729 [merge l r]: merges two trees.
730 --------------------------------------------------------------------}
731 merge :: Set a -> Set a -> Set a
732 merge Tip r = r
733 merge l Tip = l
734 merge l@(Bin sizeL x lx rx) r@(Bin sizeR y ly ry)
735 | delta*sizeL <= sizeR = balance y (merge l ly) ry
736 | delta*sizeR <= sizeL = balance x lx (merge rx r)
737 | otherwise = glue l r
738
739 {--------------------------------------------------------------------
740 [glue l r]: glues two trees together.
741 Assumes that [l] and [r] are already balanced with respect to each other.
742 --------------------------------------------------------------------}
743 glue :: Set a -> Set a -> Set a
744 glue Tip r = r
745 glue l Tip = l
746 glue l r
747 | size l > size r = let (m,l') = deleteFindMax l in balance m l' r
748 | otherwise = let (m,r') = deleteFindMin r in balance m l r'
749
750
751 -- | /O(log n)/. Delete and find the minimal element.
752 --
753 -- > deleteFindMin set = (findMin set, deleteMin set)
754
755 deleteFindMin :: Set a -> (a,Set a)
756 deleteFindMin t
757 = case t of
758 Bin _ x Tip r -> (x,r)
759 Bin _ x l r -> let (xm,l') = deleteFindMin l in (xm,balance x l' r)
760 Tip -> (error "Set.deleteFindMin: can not return the minimal element of an empty set", Tip)
761
762 -- | /O(log n)/. Delete and find the maximal element.
763 --
764 -- > deleteFindMax set = (findMax set, deleteMax set)
765 deleteFindMax :: Set a -> (a,Set a)
766 deleteFindMax t
767 = case t of
768 Bin _ x l Tip -> (x,l)
769 Bin _ x l r -> let (xm,r') = deleteFindMax r in (xm,balance x l r')
770 Tip -> (error "Set.deleteFindMax: can not return the maximal element of an empty set", Tip)
771
772 -- | /O(log n)/. Retrieves the minimal key of the set, and the set
773 -- stripped of that element, or 'Nothing' if passed an empty set.
774 minView :: Set a -> Maybe (a, Set a)
775 minView Tip = Nothing
776 minView x = Just (deleteFindMin x)
777
778 -- | /O(log n)/. Retrieves the maximal key of the set, and the set
779 -- stripped of that element, or 'Nothing' if passed an empty set.
780 maxView :: Set a -> Maybe (a, Set a)
781 maxView Tip = Nothing
782 maxView x = Just (deleteFindMax x)
783
784 {--------------------------------------------------------------------
785 [balance x l r] balances two trees with value x.
786 The sizes of the trees should balance after decreasing the
787 size of one of them. (a rotation).
788
789 [delta] is the maximal relative difference between the sizes of
790 two trees, it corresponds with the [w] in Adams' paper,
791 or equivalently, [1/delta] corresponds with the $\alpha$
792 in Nievergelt's paper. Adams shows that [delta] should
793 be larger than 3.745 in order to garantee that the
794 rotations can always restore balance.
795
796 [ratio] is the ratio between an outer and inner sibling of the
797 heavier subtree in an unbalanced setting. It determines
798 whether a double or single rotation should be performed
799 to restore balance. It is correspondes with the inverse
800 of $\alpha$ in Adam's article.
801
802 Note that:
803 - [delta] should be larger than 4.646 with a [ratio] of 2.
804 - [delta] should be larger than 3.745 with a [ratio] of 1.534.
805
806 - A lower [delta] leads to a more 'perfectly' balanced tree.
807 - A higher [delta] performs less rebalancing.
808
809 - Balancing is automatic for random data and a balancing
810 scheme is only necessary to avoid pathological worst cases.
811 Almost any choice will do in practice
812
813 - Allthough it seems that a rather large [delta] may perform better
814 than smaller one, measurements have shown that the smallest [delta]
815 of 4 is actually the fastest on a wide range of operations. It
816 especially improves performance on worst-case scenarios like
817 a sequence of ordered insertions.
818
819 Note: in contrast to Adams' paper, we use a ratio of (at least) 2
820 to decide whether a single or double rotation is needed. Allthough
821 he actually proves that this ratio is needed to maintain the
822 invariants, his implementation uses a (invalid) ratio of 1.
823 He is aware of the problem though since he has put a comment in his
824 original source code that he doesn't care about generating a
825 slightly inbalanced tree since it doesn't seem to matter in practice.
826 However (since we use quickcheck :-) we will stick to strictly balanced
827 trees.
828 --------------------------------------------------------------------}
829 delta,ratio :: Int
830 delta = 4
831 ratio = 2
832
833 balance :: a -> Set a -> Set a -> Set a
834 balance x l r
835 | sizeL + sizeR <= 1 = Bin sizeX x l r
836 | sizeR >= delta*sizeL = rotateL x l r
837 | sizeL >= delta*sizeR = rotateR x l r
838 | otherwise = Bin sizeX x l r
839 where
840 sizeL = size l
841 sizeR = size r
842 sizeX = sizeL + sizeR + 1
843
844 -- rotate
845 rotateL :: a -> Set a -> Set a -> Set a
846 rotateL x l r@(Bin _ _ ly ry)
847 | size ly < ratio*size ry = singleL x l r
848 | otherwise = doubleL x l r
849 rotateL _ _ Tip = error "rotateL Tip"
850
851 rotateR :: a -> Set a -> Set a -> Set a
852 rotateR x l@(Bin _ _ ly ry) r
853 | size ry < ratio*size ly = singleR x l r
854 | otherwise = doubleR x l r
855 rotateR _ Tip _ = error "rotateL Tip"
856
857 -- basic rotations
858 singleL, singleR :: a -> Set a -> Set a -> Set a
859 singleL x1 t1 (Bin _ x2 t2 t3) = bin x2 (bin x1 t1 t2) t3
860 singleL _ _ Tip = error "singleL"
861 singleR x1 (Bin _ x2 t1 t2) t3 = bin x2 t1 (bin x1 t2 t3)
862 singleR _ Tip _ = error "singleR"
863
864 doubleL, doubleR :: a -> Set a -> Set a -> Set a
865 doubleL x1 t1 (Bin _ x2 (Bin _ x3 t2 t3) t4) = bin x3 (bin x1 t1 t2) (bin x2 t3 t4)
866 doubleL _ _ _ = error "doubleL"
867 doubleR x1 (Bin _ x2 t1 (Bin _ x3 t2 t3)) t4 = bin x3 (bin x2 t1 t2) (bin x1 t3 t4)
868 doubleR _ _ _ = error "doubleR"
869
870
871 {--------------------------------------------------------------------
872 The bin constructor maintains the size of the tree
873 --------------------------------------------------------------------}
874 bin :: a -> Set a -> Set a -> Set a
875 bin x l r
876 = Bin (size l + size r + 1) x l r
877
878
879 {--------------------------------------------------------------------
880 Utilities
881 --------------------------------------------------------------------}
882 foldlStrict :: (a -> b -> a) -> a -> [b] -> a
883 foldlStrict f z xs
884 = case xs of
885 [] -> z
886 (x:xx) -> let z' = f z x in seq z' (foldlStrict f z' xx)
887
888
889 {--------------------------------------------------------------------
890 Debugging
891 --------------------------------------------------------------------}
892 -- | /O(n)/. Show the tree that implements the set. The tree is shown
893 -- in a compressed, hanging format.
894 showTree :: Show a => Set a -> String
895 showTree s
896 = showTreeWith True False s
897
898
899 {- | /O(n)/. The expression (@showTreeWith hang wide map@) shows
900 the tree that implements the set. If @hang@ is
901 @True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If
902 @wide@ is 'True', an extra wide version is shown.
903
904 > Set> putStrLn $ showTreeWith True False $ fromDistinctAscList [1..5]
905 > 4
906 > +--2
907 > | +--1
908 > | +--3
909 > +--5
910 >
911 > Set> putStrLn $ showTreeWith True True $ fromDistinctAscList [1..5]
912 > 4
913 > |
914 > +--2
915 > | |
916 > | +--1
917 > | |
918 > | +--3
919 > |
920 > +--5
921 >
922 > Set> putStrLn $ showTreeWith False True $ fromDistinctAscList [1..5]
923 > +--5
924 > |
925 > 4
926 > |
927 > | +--3
928 > | |
929 > +--2
930 > |
931 > +--1
932
933 -}
934 showTreeWith :: Show a => Bool -> Bool -> Set a -> String
935 showTreeWith hang wide t
936 | hang = (showsTreeHang wide [] t) ""
937 | otherwise = (showsTree wide [] [] t) ""
938
939 showsTree :: Show a => Bool -> [String] -> [String] -> Set a -> ShowS
940 showsTree wide lbars rbars t
941 = case t of
942 Tip -> showsBars lbars . showString "|\n"
943 Bin _ x Tip Tip
944 -> showsBars lbars . shows x . showString "\n"
945 Bin _ x l r
946 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
947 showWide wide rbars .
948 showsBars lbars . shows x . showString "\n" .
949 showWide wide lbars .
950 showsTree wide (withEmpty lbars) (withBar lbars) l
951
952 showsTreeHang :: Show a => Bool -> [String] -> Set a -> ShowS
953 showsTreeHang wide bars t
954 = case t of
955 Tip -> showsBars bars . showString "|\n"
956 Bin _ x Tip Tip
957 -> showsBars bars . shows x . showString "\n"
958 Bin _ x l r
959 -> showsBars bars . shows x . showString "\n" .
960 showWide wide bars .
961 showsTreeHang wide (withBar bars) l .
962 showWide wide bars .
963 showsTreeHang wide (withEmpty bars) r
964
965 showWide :: Bool -> [String] -> String -> String
966 showWide wide bars
967 | wide = showString (concat (reverse bars)) . showString "|\n"
968 | otherwise = id
969
970 showsBars :: [String] -> ShowS
971 showsBars bars
972 = case bars of
973 [] -> id
974 _ -> showString (concat (reverse (tail bars))) . showString node
975
976 node :: String
977 node = "+--"
978
979 withBar, withEmpty :: [String] -> [String]
980 withBar bars = "| ":bars
981 withEmpty bars = " ":bars
982
983 {--------------------------------------------------------------------
984 Assertions
985 --------------------------------------------------------------------}
986 -- | /O(n)/. Test if the internal set structure is valid.
987 valid :: Ord a => Set a -> Bool
988 valid t
989 = balanced t && ordered t && validsize t
990
991 ordered :: Ord a => Set a -> Bool
992 ordered t
993 = bounded (const True) (const True) t
994 where
995 bounded lo hi t'
996 = case t' of
997 Tip -> True
998 Bin _ x l r -> (lo x) && (hi x) && bounded lo (<x) l && bounded (>x) hi r
999
1000 balanced :: Set a -> Bool
1001 balanced t
1002 = case t of
1003 Tip -> True
1004 Bin _ _ l r -> (size l + size r <= 1 || (size l <= delta*size r && size r <= delta*size l)) &&
1005 balanced l && balanced r
1006
1007 validsize :: Set a -> Bool
1008 validsize t
1009 = (realsize t == Just (size t))
1010 where
1011 realsize t'
1012 = case t' of
1013 Tip -> Just 0
1014 Bin sz _ l r -> case (realsize l,realsize r) of
1015 (Just n,Just m) | n+m+1 == sz -> Just sz
1016 _ -> Nothing
1017
1018 {-
1019 {--------------------------------------------------------------------
1020 Testing
1021 --------------------------------------------------------------------}
1022 testTree :: [Int] -> Set Int
1023 testTree xs = fromList xs
1024 test1 = testTree [1..20]
1025 test2 = testTree [30,29..10]
1026 test3 = testTree [1,4,6,89,2323,53,43,234,5,79,12,9,24,9,8,423,8,42,4,8,9,3]
1027
1028 {--------------------------------------------------------------------
1029 QuickCheck
1030 --------------------------------------------------------------------}
1031 qcheck prop
1032 = check config prop
1033 where
1034 config = Config
1035 { configMaxTest = 500
1036 , configMaxFail = 5000
1037 , configSize = \n -> (div n 2 + 3)
1038 , configEvery = \n args -> let s = show n in s ++ [ '\b' | _ <- s ]
1039 }
1040
1041
1042 {--------------------------------------------------------------------
1043 Arbitrary, reasonably balanced trees
1044 --------------------------------------------------------------------}
1045 instance (Enum a) => Arbitrary (Set a) where
1046 arbitrary = sized (arbtree 0 maxkey)
1047 where maxkey = 10000
1048
1049 arbtree :: (Enum a) => Int -> Int -> Int -> Gen (Set a)
1050 arbtree lo hi n
1051 | n <= 0 = return Tip
1052 | lo >= hi = return Tip
1053 | otherwise = do{ i <- choose (lo,hi)
1054 ; m <- choose (1,30)
1055 ; let (ml,mr) | m==(1::Int)= (1,2)
1056 | m==2 = (2,1)
1057 | m==3 = (1,1)
1058 | otherwise = (2,2)
1059 ; l <- arbtree lo (i-1) (n `div` ml)
1060 ; r <- arbtree (i+1) hi (n `div` mr)
1061 ; return (bin (toEnum i) l r)
1062 }
1063
1064
1065 {--------------------------------------------------------------------
1066 Valid tree's
1067 --------------------------------------------------------------------}
1068 forValid :: (Enum a,Show a,Testable b) => (Set a -> b) -> Property
1069 forValid f
1070 = forAll arbitrary $ \t ->
1071 -- classify (balanced t) "balanced" $
1072 classify (size t == 0) "empty" $
1073 classify (size t > 0 && size t <= 10) "small" $
1074 classify (size t > 10 && size t <= 64) "medium" $
1075 classify (size t > 64) "large" $
1076 balanced t ==> f t
1077
1078 forValidIntTree :: Testable a => (Set Int -> a) -> Property
1079 forValidIntTree f
1080 = forValid f
1081
1082 forValidUnitTree :: Testable a => (Set Int -> a) -> Property
1083 forValidUnitTree f
1084 = forValid f
1085
1086
1087 prop_Valid
1088 = forValidUnitTree $ \t -> valid t
1089
1090 {--------------------------------------------------------------------
1091 Single, Insert, Delete
1092 --------------------------------------------------------------------}
1093 prop_Single :: Int -> Bool
1094 prop_Single x
1095 = (insert x empty == singleton x)
1096
1097 prop_InsertValid :: Int -> Property
1098 prop_InsertValid k
1099 = forValidUnitTree $ \t -> valid (insert k t)
1100
1101 prop_InsertDelete :: Int -> Set Int -> Property
1102 prop_InsertDelete k t
1103 = not (member k t) ==> delete k (insert k t) == t
1104
1105 prop_DeleteValid :: Int -> Property
1106 prop_DeleteValid k
1107 = forValidUnitTree $ \t ->
1108 valid (delete k (insert k t))
1109
1110 {--------------------------------------------------------------------
1111 Balance
1112 --------------------------------------------------------------------}
1113 prop_Join :: Int -> Property
1114 prop_Join x
1115 = forValidUnitTree $ \t ->
1116 let (l,r) = split x t
1117 in valid (join x l r)
1118
1119 prop_Merge :: Int -> Property
1120 prop_Merge x
1121 = forValidUnitTree $ \t ->
1122 let (l,r) = split x t
1123 in valid (merge l r)
1124
1125
1126 {--------------------------------------------------------------------
1127 Union
1128 --------------------------------------------------------------------}
1129 prop_UnionValid :: Property
1130 prop_UnionValid
1131 = forValidUnitTree $ \t1 ->
1132 forValidUnitTree $ \t2 ->
1133 valid (union t1 t2)
1134
1135 prop_UnionInsert :: Int -> Set Int -> Bool
1136 prop_UnionInsert x t
1137 = union t (singleton x) == insert x t
1138
1139 prop_UnionAssoc :: Set Int -> Set Int -> Set Int -> Bool
1140 prop_UnionAssoc t1 t2 t3
1141 = union t1 (union t2 t3) == union (union t1 t2) t3
1142
1143 prop_UnionComm :: Set Int -> Set Int -> Bool
1144 prop_UnionComm t1 t2
1145 = (union t1 t2 == union t2 t1)
1146
1147
1148 prop_DiffValid
1149 = forValidUnitTree $ \t1 ->
1150 forValidUnitTree $ \t2 ->
1151 valid (difference t1 t2)
1152
1153 prop_Diff :: [Int] -> [Int] -> Bool
1154 prop_Diff xs ys
1155 = toAscList (difference (fromList xs) (fromList ys))
1156 == List.sort ((List.\\) (nub xs) (nub ys))
1157
1158 prop_IntValid
1159 = forValidUnitTree $ \t1 ->
1160 forValidUnitTree $ \t2 ->
1161 valid (intersection t1 t2)
1162
1163 prop_Int :: [Int] -> [Int] -> Bool
1164 prop_Int xs ys
1165 = toAscList (intersection (fromList xs) (fromList ys))
1166 == List.sort (nub ((List.intersect) (xs) (ys)))
1167
1168 {--------------------------------------------------------------------
1169 Lists
1170 --------------------------------------------------------------------}
1171 prop_Ordered
1172 = forAll (choose (5,100)) $ \n ->
1173 let xs = [0..n::Int]
1174 in fromAscList xs == fromList xs
1175
1176 prop_List :: [Int] -> Bool
1177 prop_List xs
1178 = (sort (nub xs) == toList (fromList xs))
1179 -}