f17971dc57c98c0e5c11dca33d06e25790ef1075
[packages/containers.git] / Data / Set.hs
1 {-# LANGUAGE CPP #-}
2 -----------------------------------------------------------------------------
3 -- |
4 -- Module : Data.Set
5 -- Copyright : (c) Daan Leijen 2002
6 -- License : BSD-style
7 -- Maintainer : libraries@haskell.org
8 -- Stability : provisional
9 -- Portability : portable
10 --
11 -- An efficient implementation of sets.
12 --
13 -- Since many function names (but not the type name) clash with
14 -- "Prelude" names, this module is usually imported @qualified@, e.g.
15 --
16 -- > import Data.Set (Set)
17 -- > import qualified Data.Set as Set
18 --
19 -- The implementation of 'Set' is based on /size balanced/ binary trees (or
20 -- trees of /bounded balance/) as described by:
21 --
22 -- * Stephen Adams, \"/Efficient sets: a balancing act/\",
23 -- Journal of Functional Programming 3(4):553-562, October 1993,
24 -- <http://www.swiss.ai.mit.edu/~adams/BB/>.
25 --
26 -- * J. Nievergelt and E.M. Reingold,
27 -- \"/Binary search trees of bounded balance/\",
28 -- SIAM journal of computing 2(1), March 1973.
29 --
30 -- Note that the implementation is /left-biased/ -- the elements of a
31 -- first argument are always preferred to the second, for example in
32 -- 'union' or 'insert'. Of course, left-biasing can only be observed
33 -- when equality is an equivalence relation instead of structural
34 -- equality.
35 -----------------------------------------------------------------------------
36
37 module Data.Set (
38 -- * Set type
39 #if !defined(TESTING)
40 Set -- instance Eq,Ord,Show,Read,Data,Typeable
41 #else
42 Set(..)
43 #endif
44
45 -- * Operators
46 , (\\)
47
48 -- * Query
49 , null
50 , size
51 , member
52 , notMember
53 , isSubsetOf
54 , isProperSubsetOf
55
56 -- * Construction
57 , empty
58 , singleton
59 , insert
60 , delete
61
62 -- * Combine
63 , union
64 , unions
65 , difference
66 , intersection
67
68 -- * Filter
69 , filter
70 , partition
71 , split
72 , splitMember
73
74 -- * Map
75 , map
76 , mapMonotonic
77
78 -- * Fold
79 , fold
80
81 -- * Min\/Max
82 , findMin
83 , findMax
84 , deleteMin
85 , deleteMax
86 , deleteFindMin
87 , deleteFindMax
88 , maxView
89 , minView
90
91 -- * Conversion
92
93 -- ** List
94 , elems
95 , toList
96 , fromList
97
98 -- ** Ordered list
99 , toAscList
100 , fromAscList
101 , fromDistinctAscList
102
103 -- * Debugging
104 , showTree
105 , showTreeWith
106 , valid
107
108 #if defined(TESTING)
109 -- Internals (for testing)
110 , bin
111 , balanced
112 , join
113 , merge
114 #endif
115 ) where
116
117 import Prelude hiding (filter,foldr,null,map)
118 import qualified Data.List as List
119 import Data.Monoid (Monoid(..))
120 import Data.Foldable (Foldable(foldMap))
121 #ifndef __GLASGOW_HASKELL__
122 import Data.Typeable (Typeable, typeOf, typeOfDefault)
123 #endif
124 import Data.Typeable (Typeable1(..), TyCon, mkTyCon, mkTyConApp)
125
126 {-
127 -- just for testing
128 import QuickCheck
129 import List (nub,sort)
130 import qualified List
131 -}
132
133 #if __GLASGOW_HASKELL__
134 import Text.Read
135 import Data.Data (Data(..), mkNoRepType, gcast1)
136 #endif
137
138 -- Use macros to define strictness of functions.
139 -- STRICTxy denotes an y-ary function strict in the x-th parameter.
140 #define STRICT12(fn) fn arg _ | arg `seq` False = undefined
141 #define STRICT23(fn) fn _ arg _ | arg `seq` False = undefined
142
143 {--------------------------------------------------------------------
144 Operators
145 --------------------------------------------------------------------}
146 infixl 9 \\ --
147
148 -- | /O(n+m)/. See 'difference'.
149 (\\) :: Ord a => Set a -> Set a -> Set a
150 m1 \\ m2 = difference m1 m2
151 #if __GLASGOW_HASKELL__ >= 700
152 {-# INLINABLE (\\) #-}
153 #endif
154
155 {--------------------------------------------------------------------
156 Sets are size balanced trees
157 --------------------------------------------------------------------}
158 -- | A set of values @a@.
159 data Set a = Tip
160 | Bin {-# UNPACK #-} !Size !a !(Set a) !(Set a)
161
162 type Size = Int
163
164 instance Ord a => Monoid (Set a) where
165 mempty = empty
166 mappend = union
167 mconcat = unions
168
169 instance Foldable Set where
170 foldMap _ Tip = mempty
171 foldMap f (Bin _s k l r) = foldMap f l `mappend` f k `mappend` foldMap f r
172
173 #if __GLASGOW_HASKELL__
174
175 {--------------------------------------------------------------------
176 A Data instance
177 --------------------------------------------------------------------}
178
179 -- This instance preserves data abstraction at the cost of inefficiency.
180 -- We omit reflection services for the sake of data abstraction.
181
182 instance (Data a, Ord a) => Data (Set a) where
183 gfoldl f z set = z fromList `f` (toList set)
184 toConstr _ = error "toConstr"
185 gunfold _ _ = error "gunfold"
186 dataTypeOf _ = mkNoRepType "Data.Set.Set"
187 dataCast1 f = gcast1 f
188
189 #endif
190
191 {--------------------------------------------------------------------
192 Query
193 --------------------------------------------------------------------}
194 -- | /O(1)/. Is this the empty set?
195 null :: Set a -> Bool
196 null Tip = True
197 null (Bin {}) = False
198 #if __GLASGOW_HASKELL__ >= 700
199 {-# INLINABLE null #-}
200 #endif
201
202 -- | /O(1)/. The number of elements in the set.
203 size :: Set a -> Int
204 size = go
205 where
206 go Tip = 0
207 go (Bin sz _ _ _) = sz
208 #if __GLASGOW_HASKELL__ >= 700
209 {-# INLINABLE size #-}
210 #endif
211
212 -- | /O(log n)/. Is the element in the set?
213 member :: Ord a => a -> Set a -> Bool
214 member = go
215 where
216 STRICT12(go)
217 go x Tip = False
218 go x (Bin _ y l r) = case compare x y of
219 LT -> go x l
220 GT -> go x r
221 EQ -> True
222 {-# INLINE member #-}
223
224 -- | /O(log n)/. Is the element not in the set?
225 notMember :: Ord a => a -> Set a -> Bool
226 notMember a t = not $ member a t
227 {-# INLINE notMember #-}
228
229 {--------------------------------------------------------------------
230 Construction
231 --------------------------------------------------------------------}
232 -- | /O(1)/. The empty set.
233 empty :: Set a
234 empty = Tip
235 {-# INLINE empty #-}
236
237 -- | /O(1)/. Create a singleton set.
238 singleton :: a -> Set a
239 singleton x = Bin 1 x Tip Tip
240 {-# INLINE singleton #-}
241
242 {--------------------------------------------------------------------
243 Insertion, Deletion
244 --------------------------------------------------------------------}
245 -- | /O(log n)/. Insert an element in a set.
246 -- If the set already contains an element equal to the given value,
247 -- it is replaced with the new value.
248 insert :: Ord a => a -> Set a -> Set a
249 insert = go
250 where
251 STRICT12(go)
252 go x Tip = singleton x
253 go x (Bin sz y l r) = case compare x y of
254 LT -> balanceL y (go x l) r
255 GT -> balanceR y l (go x r)
256 EQ -> Bin sz x l r
257 #if __GLASGOW_HASKELL__ >= 700
258 {-# INLINEABLE insert #-}
259 #else
260 {-# INLINE insert #-}
261 #endif
262
263 -- Insert an element to the set only if it is not in the set. Used by
264 -- `union`.
265 insertR :: Ord a => a -> Set a -> Set a
266 insertR = go
267 where
268 STRICT12(go)
269 go x Tip = singleton x
270 go x t@(Bin _ y l r) = case compare x y of
271 LT -> balanceL y (go x l) r
272 GT -> balanceR y l (go x r)
273 EQ -> t
274 #if __GLASGOW_HASKELL__ >= 700
275 {-# INLINEABLE insertR #-}
276 #else
277 {-# INLINE insertR #-}
278 #endif
279
280 -- | /O(log n)/. Delete an element from a set.
281 delete :: Ord a => a -> Set a -> Set a
282 delete = go
283 where
284 STRICT12(go)
285 go x Tip = Tip
286 go x (Bin _ y l r) = case compare x y of
287 LT -> balanceR y (go x l) r
288 GT -> balanceL y l (go x r)
289 EQ -> glue l r
290 #if __GLASGOW_HASKELL__ >= 700
291 {-# INLINEABLE delete #-}
292 #else
293 {-# INLINE delete #-}
294 #endif
295
296 {--------------------------------------------------------------------
297 Subset
298 --------------------------------------------------------------------}
299 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
300 isProperSubsetOf :: Ord a => Set a -> Set a -> Bool
301 isProperSubsetOf s1 s2
302 = (size s1 < size s2) && (isSubsetOf s1 s2)
303 #if __GLASGOW_HASKELL__ >= 700
304 {-# INLINABLE isProperSubsetOf #-}
305 #endif
306
307
308 -- | /O(n+m)/. Is this a subset?
309 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
310 isSubsetOf :: Ord a => Set a -> Set a -> Bool
311 isSubsetOf t1 t2
312 = (size t1 <= size t2) && (isSubsetOfX t1 t2)
313 #if __GLASGOW_HASKELL__ >= 700
314 {-# INLINABLE isSubsetOf #-}
315 #endif
316
317 isSubsetOfX :: Ord a => Set a -> Set a -> Bool
318 isSubsetOfX Tip _ = True
319 isSubsetOfX _ Tip = False
320 isSubsetOfX (Bin _ x l r) t
321 = found && isSubsetOfX l lt && isSubsetOfX r gt
322 where
323 (lt,found,gt) = splitMember x t
324 #if __GLASGOW_HASKELL__ >= 700
325 {-# INLINABLE isSubsetOfX #-}
326 #endif
327
328
329 {--------------------------------------------------------------------
330 Minimal, Maximal
331 --------------------------------------------------------------------}
332 -- | /O(log n)/. The minimal element of a set.
333 findMin :: Set a -> a
334 findMin (Bin _ x Tip _) = x
335 findMin (Bin _ _ l _) = findMin l
336 findMin Tip = error "Set.findMin: empty set has no minimal element"
337 #if __GLASGOW_HASKELL__ >= 700
338 {-# INLINABLE findMin #-}
339 #endif
340
341 -- | /O(log n)/. The maximal element of a set.
342 findMax :: Set a -> a
343 findMax (Bin _ x _ Tip) = x
344 findMax (Bin _ _ _ r) = findMax r
345 findMax Tip = error "Set.findMax: empty set has no maximal element"
346 #if __GLASGOW_HASKELL__ >= 700
347 {-# INLINABLE findMax #-}
348 #endif
349
350 -- | /O(log n)/. Delete the minimal element.
351 deleteMin :: Set a -> Set a
352 deleteMin (Bin _ _ Tip r) = r
353 deleteMin (Bin _ x l r) = balanceR x (deleteMin l) r
354 deleteMin Tip = Tip
355 #if __GLASGOW_HASKELL__ >= 700
356 {-# INLINABLE deleteMin #-}
357 #endif
358
359 -- | /O(log n)/. Delete the maximal element.
360 deleteMax :: Set a -> Set a
361 deleteMax (Bin _ _ l Tip) = l
362 deleteMax (Bin _ x l r) = balanceL x l (deleteMax r)
363 deleteMax Tip = Tip
364 #if __GLASGOW_HASKELL__ >= 700
365 {-# INLINABLE deleteMax #-}
366 #endif
367
368 {--------------------------------------------------------------------
369 Union.
370 --------------------------------------------------------------------}
371 -- | The union of a list of sets: (@'unions' == 'foldl' 'union' 'empty'@).
372 unions :: Ord a => [Set a] -> Set a
373 unions = foldlStrict union empty
374 #if __GLASGOW_HASKELL__ >= 700
375 {-# INLINABLE unions #-}
376 #endif
377
378 -- | /O(n+m)/. The union of two sets, preferring the first set when
379 -- equal elements are encountered.
380 -- The implementation uses the efficient /hedge-union/ algorithm.
381 -- Hedge-union is more efficient on (bigset `union` smallset).
382 union :: Ord a => Set a -> Set a -> Set a
383 union Tip t2 = t2
384 union t1 Tip = t1
385 union (Bin _ x Tip Tip) t = insert x t
386 union t (Bin _ x Tip Tip) = insertR x t
387 union t1 t2 = hedgeUnion NothingS NothingS t1 t2
388 #if __GLASGOW_HASKELL__ >= 700
389 {-# INLINABLE union #-}
390 #endif
391
392 hedgeUnion :: Ord a
393 => MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
394 hedgeUnion _ _ t1 Tip
395 = t1
396 hedgeUnion blo bhi Tip (Bin _ x l r)
397 = join x (filterGt blo l) (filterLt bhi r)
398 hedgeUnion blo bhi (Bin _ x l r) t2
399 = join x (hedgeUnion blo bmi l (trim blo bmi t2))
400 (hedgeUnion bmi bhi r (trim bmi bhi t2))
401 where
402 bmi = JustS x
403 #if __GLASGOW_HASKELL__ >= 700
404 {-# INLINABLE hedgeUnion #-}
405 #endif
406
407 {--------------------------------------------------------------------
408 Difference
409 --------------------------------------------------------------------}
410 -- | /O(n+m)/. Difference of two sets.
411 -- The implementation uses an efficient /hedge/ algorithm comparable with /hedge-union/.
412 difference :: Ord a => Set a -> Set a -> Set a
413 difference Tip _ = Tip
414 difference t1 Tip = t1
415 difference t1 t2 = hedgeDiff NothingS NothingS t1 t2
416 #if __GLASGOW_HASKELL__ >= 700
417 {-# INLINABLE difference #-}
418 #endif
419
420 hedgeDiff :: Ord a
421 => MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
422 hedgeDiff _ _ Tip _
423 = Tip
424 hedgeDiff blo bhi (Bin _ x l r) Tip
425 = join x (filterGt blo l) (filterLt bhi r)
426 hedgeDiff blo bhi t (Bin _ x l r)
427 = merge (hedgeDiff blo bmi (trim blo bmi t) l)
428 (hedgeDiff bmi bhi (trim bmi bhi t) r)
429 where
430 bmi = JustS x
431 #if __GLASGOW_HASKELL__ >= 700
432 {-# INLINABLE hedgeDiff #-}
433 #endif
434
435 {--------------------------------------------------------------------
436 Intersection
437 --------------------------------------------------------------------}
438 -- | /O(n+m)/. The intersection of two sets.
439 -- Elements of the result come from the first set, so for example
440 --
441 -- > import qualified Data.Set as S
442 -- > data AB = A | B deriving Show
443 -- > instance Ord AB where compare _ _ = EQ
444 -- > instance Eq AB where _ == _ = True
445 -- > main = print (S.singleton A `S.intersection` S.singleton B,
446 -- > S.singleton B `S.intersection` S.singleton A)
447 --
448 -- prints @(fromList [A],fromList [B])@.
449 intersection :: Ord a => Set a -> Set a -> Set a
450 intersection Tip _ = Tip
451 intersection _ Tip = Tip
452 intersection t1@(Bin s1 x1 l1 r1) t2@(Bin s2 x2 l2 r2) =
453 if s1 >= s2 then
454 let (lt,found,gt) = splitLookup x2 t1
455 tl = intersection lt l2
456 tr = intersection gt r2
457 in case found of
458 Just x -> join x tl tr
459 Nothing -> merge tl tr
460 else let (lt,found,gt) = splitMember x1 t2
461 tl = intersection l1 lt
462 tr = intersection r1 gt
463 in if found then join x1 tl tr
464 else merge tl tr
465 #if __GLASGOW_HASKELL__ >= 700
466 {-# INLINABLE intersection #-}
467 #endif
468
469 {--------------------------------------------------------------------
470 Filter and partition
471 --------------------------------------------------------------------}
472 -- | /O(n)/. Filter all elements that satisfy the predicate.
473 filter :: Ord a => (a -> Bool) -> Set a -> Set a
474 filter p = go
475 where
476 go Tip = Tip
477 go (Bin _ x l r)
478 | p x = join x (go l) (go r)
479 | otherwise = merge (go l) (go r)
480 #if __GLASGOW_HASKELL__ >= 700
481 {-# INLINABLE filter #-}
482 #endif
483
484 -- | /O(n)/. Partition the set into two sets, one with all elements that satisfy
485 -- the predicate and one with all elements that don't satisfy the predicate.
486 -- See also 'split'.
487 partition :: Ord a => (a -> Bool) -> Set a -> (Set a,Set a)
488 partition p = go
489 where
490 go Tip = (Tip, Tip)
491 go (Bin _ x l r) = case (go l, go r) of
492 ((l1, l2), (r1, r2))
493 | p x -> (join x l1 r1, merge l2 r2)
494 | otherwise -> (merge l1 r1, join x l2 r2)
495 #if __GLASGOW_HASKELL__ >= 700
496 {-# INLINABLE partition #-}
497 #endif
498
499 {----------------------------------------------------------------------
500 Map
501 ----------------------------------------------------------------------}
502
503 -- | /O(n*log n)/.
504 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
505 --
506 -- It's worth noting that the size of the result may be smaller if,
507 -- for some @(x,y)@, @x \/= y && f x == f y@
508
509 map :: (Ord a, Ord b) => (a->b) -> Set a -> Set b
510 map f = fromList . List.map f . toList
511 #if __GLASGOW_HASKELL__ >= 700
512 {-# INLINABLE map #-}
513 #endif
514
515 -- | /O(n)/. The
516 --
517 -- @'mapMonotonic' f s == 'map' f s@, but works only when @f@ is monotonic.
518 -- /The precondition is not checked./
519 -- Semi-formally, we have:
520 --
521 -- > and [x < y ==> f x < f y | x <- ls, y <- ls]
522 -- > ==> mapMonotonic f s == map f s
523 -- > where ls = toList s
524
525 mapMonotonic :: (a->b) -> Set a -> Set b
526 mapMonotonic f = go
527 where
528 go Tip = Tip
529 go (Bin sz x l r) = Bin sz (f x) (go l) (go r)
530 #if __GLASGOW_HASKELL__ >= 700
531 {-# INLINABLE mapMonotonic #-}
532 #endif
533
534 {--------------------------------------------------------------------
535 Fold
536 --------------------------------------------------------------------}
537 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
538 fold :: (a -> b -> b) -> b -> Set a -> b
539 fold = foldr
540 {-# INLINE fold #-}
541
542 -- | /O(n)/. Post-order fold.
543 foldr :: (a -> b -> b) -> b -> Set a -> b
544 foldr f = go
545 where
546 go z Tip = z
547 go z (Bin _ x l r) = go (f x (go z r)) l
548 {-# INLINE foldr #-}
549
550 {--------------------------------------------------------------------
551 List variations
552 --------------------------------------------------------------------}
553 -- | /O(n)/. The elements of a set.
554 elems :: Set a -> [a]
555 elems = toList
556 #if __GLASGOW_HASKELL__ >= 700
557 {-# INLINABLE elems #-}
558 #endif
559
560 {--------------------------------------------------------------------
561 Lists
562 --------------------------------------------------------------------}
563 -- | /O(n)/. Convert the set to a list of elements.
564 toList :: Set a -> [a]
565 toList = toAscList
566 #if __GLASGOW_HASKELL__ >= 700
567 {-# INLINABLE toList #-}
568 #endif
569
570 -- | /O(n)/. Convert the set to an ascending list of elements.
571 toAscList :: Set a -> [a]
572 toAscList = foldr (:) []
573 #if __GLASGOW_HASKELL__ >= 700
574 {-# INLINABLE toAscList #-}
575 #endif
576
577 -- | /O(n*log n)/. Create a set from a list of elements.
578 fromList :: Ord a => [a] -> Set a
579 fromList = foldlStrict ins empty
580 where
581 ins t x = insert x t
582 #if __GLASGOW_HASKELL__ >= 700
583 {-# INLINABLE fromList #-}
584 #endif
585
586 {--------------------------------------------------------------------
587 Building trees from ascending/descending lists can be done in linear time.
588
589 Note that if [xs] is ascending that:
590 fromAscList xs == fromList xs
591 --------------------------------------------------------------------}
592 -- | /O(n)/. Build a set from an ascending list in linear time.
593 -- /The precondition (input list is ascending) is not checked./
594 fromAscList :: Eq a => [a] -> Set a
595 fromAscList xs
596 = fromDistinctAscList (combineEq xs)
597 where
598 -- [combineEq xs] combines equal elements with [const] in an ordered list [xs]
599 combineEq xs'
600 = case xs' of
601 [] -> []
602 [x] -> [x]
603 (x:xx) -> combineEq' x xx
604
605 combineEq' z [] = [z]
606 combineEq' z (x:xs')
607 | z==x = combineEq' z xs'
608 | otherwise = z:combineEq' x xs'
609 #if __GLASGOW_HASKELL__ >= 700
610 {-# INLINABLE fromAscList #-}
611 #endif
612
613
614 -- | /O(n)/. Build a set from an ascending list of distinct elements in linear time.
615 -- /The precondition (input list is strictly ascending) is not checked./
616 fromDistinctAscList :: [a] -> Set a
617 fromDistinctAscList xs
618 = build const (length xs) xs
619 where
620 -- 1) use continutations so that we use heap space instead of stack space.
621 -- 2) special case for n==5 to build bushier trees.
622 build c 0 xs' = c Tip xs'
623 build c 5 xs' = case xs' of
624 (x1:x2:x3:x4:x5:xx)
625 -> c (bin x4 (bin x2 (singleton x1) (singleton x3)) (singleton x5)) xx
626 _ -> error "fromDistinctAscList build 5"
627 build c n xs' = seq nr $ build (buildR nr c) nl xs'
628 where
629 nl = n `div` 2
630 nr = n - nl - 1
631
632 buildR n c l (x:ys) = build (buildB l x c) n ys
633 buildR _ _ _ [] = error "fromDistinctAscList buildR []"
634 buildB l x c r zs = c (bin x l r) zs
635 #if __GLASGOW_HASKELL__ >= 700
636 {-# INLINABLE fromDistinctAscList #-}
637 #endif
638
639 {--------------------------------------------------------------------
640 Eq converts the set to a list. In a lazy setting, this
641 actually seems one of the faster methods to compare two trees
642 and it is certainly the simplest :-)
643 --------------------------------------------------------------------}
644 instance Eq a => Eq (Set a) where
645 t1 == t2 = (size t1 == size t2) && (toAscList t1 == toAscList t2)
646
647 {--------------------------------------------------------------------
648 Ord
649 --------------------------------------------------------------------}
650
651 instance Ord a => Ord (Set a) where
652 compare s1 s2 = compare (toAscList s1) (toAscList s2)
653
654 {--------------------------------------------------------------------
655 Show
656 --------------------------------------------------------------------}
657 instance Show a => Show (Set a) where
658 showsPrec p xs = showParen (p > 10) $
659 showString "fromList " . shows (toList xs)
660
661 {--------------------------------------------------------------------
662 Read
663 --------------------------------------------------------------------}
664 instance (Read a, Ord a) => Read (Set a) where
665 #ifdef __GLASGOW_HASKELL__
666 readPrec = parens $ prec 10 $ do
667 Ident "fromList" <- lexP
668 xs <- readPrec
669 return (fromList xs)
670
671 readListPrec = readListPrecDefault
672 #else
673 readsPrec p = readParen (p > 10) $ \ r -> do
674 ("fromList",s) <- lex r
675 (xs,t) <- reads s
676 return (fromList xs,t)
677 #endif
678
679 {--------------------------------------------------------------------
680 Typeable/Data
681 --------------------------------------------------------------------}
682
683 #include "Typeable.h"
684 INSTANCE_TYPEABLE1(Set,setTc,"Set")
685
686 {--------------------------------------------------------------------
687 Utility functions that return sub-ranges of the original
688 tree. Some functions take a `Maybe value` as an argument to
689 allow comparisons against infinite values. These are called `blow`
690 (Nothing is -\infty) and `bhigh` (here Nothing is +\infty).
691 We use MaybeS value, which is a Maybe strict in the Just case.
692
693 [trim blow bhigh t] A tree that is either empty or where [x > blow]
694 and [x < bhigh] for the value [x] of the root.
695 [filterGt blow t] A tree where for all values [k]. [k > blow]
696 [filterLt bhigh t] A tree where for all values [k]. [k < bhigh]
697
698 [split k t] Returns two trees [l] and [r] where all values
699 in [l] are <[k] and all keys in [r] are >[k].
700 [splitMember k t] Just like [split] but also returns whether [k]
701 was found in the tree.
702 --------------------------------------------------------------------}
703
704 data MaybeS a = NothingS | JustS !a
705
706 {--------------------------------------------------------------------
707 [trim blo bhi t] trims away all subtrees that surely contain no
708 values between the range [blo] to [bhi]. The returned tree is either
709 empty or the key of the root is between @blo@ and @bhi@.
710 --------------------------------------------------------------------}
711 trim :: Ord a => MaybeS a -> MaybeS a -> Set a -> Set a
712 trim NothingS NothingS t = t
713 trim (JustS lx) NothingS t = greater lx t where greater lx (Bin _ x _ r) | x <= lx = greater lx r
714 greater _ t' = t'
715 trim NothingS (JustS hx) t = lesser hx t where lesser hx (Bin _ x l _) | x >= hx = lesser hx l
716 lesser _ t' = t'
717 trim (JustS lx) (JustS hx) t = middle lx hx t where middle lx hx (Bin _ x _ r) | x <= lx = middle lx hx r
718 middle lx hx (Bin _ x l _) | x >= hx = middle lx hx l
719 middle _ _ t' = t'
720 #if __GLASGOW_HASKELL__ >= 700
721 {-# INLINABLE trim #-}
722 #endif
723
724 {--------------------------------------------------------------------
725 [filterGt b t] filter all values >[b] from tree [t]
726 [filterLt b t] filter all values <[b] from tree [t]
727 --------------------------------------------------------------------}
728 filterGt :: Ord a => MaybeS a -> Set a -> Set a
729 filterGt NothingS t = t
730 filterGt (JustS b) t = filter' b t
731 where filter' b Tip = Tip
732 filter' b (Bin _ x l r) = case compare b x of LT -> join x (filter' b l) r
733 EQ -> r
734 GT -> filter' b r
735 #if __GLASGOW_HASKELL__ >= 700
736 {-# INLINABLE filterGt #-}
737 #endif
738
739 filterLt :: Ord a => MaybeS a -> Set a -> Set a
740 filterLt NothingS t = t
741 filterLt (JustS b) t = filter' b t
742 where filter' b Tip = Tip
743 filter' b (Bin _ x l r) = case compare x b of LT -> join x l (filter' b r)
744 EQ -> l
745 GT -> filter' b l
746 #if __GLASGOW_HASKELL__ >= 700
747 {-# INLINABLE filterLt #-}
748 #endif
749
750 {--------------------------------------------------------------------
751 Split
752 --------------------------------------------------------------------}
753 -- | /O(log n)/. The expression (@'split' x set@) is a pair @(set1,set2)@
754 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
755 -- comprises the elements of @set@ greater than @x@.
756 split :: Ord a => a -> Set a -> (Set a,Set a)
757 split _ Tip = (Tip,Tip)
758 split x (Bin _ y l r)
759 = case compare x y of
760 LT -> let (lt,gt) = split x l in (lt,join y gt r)
761 GT -> let (lt,gt) = split x r in (join y l lt,gt)
762 EQ -> (l,r)
763 #if __GLASGOW_HASKELL__ >= 700
764 {-# INLINABLE split #-}
765 #endif
766
767 -- | /O(log n)/. Performs a 'split' but also returns whether the pivot
768 -- element was found in the original set.
769 splitMember :: Ord a => a -> Set a -> (Set a,Bool,Set a)
770 splitMember x t = let (l,m,r) = splitLookup x t in
771 (l,maybe False (const True) m,r)
772 #if __GLASGOW_HASKELL__ >= 700
773 {-# INLINABLE splitMember #-}
774 #endif
775
776 -- | /O(log n)/. Performs a 'split' but also returns the pivot
777 -- element that was found in the original set.
778 splitLookup :: Ord a => a -> Set a -> (Set a,Maybe a,Set a)
779 splitLookup _ Tip = (Tip,Nothing,Tip)
780 splitLookup x (Bin _ y l r)
781 = case compare x y of
782 LT -> let (lt,found,gt) = splitLookup x l in (lt,found,join y gt r)
783 GT -> let (lt,found,gt) = splitLookup x r in (join y l lt,found,gt)
784 EQ -> (l,Just y,r)
785 #if __GLASGOW_HASKELL__ >= 700
786 {-# INLINABLE splitLookup #-}
787 #endif
788
789 {--------------------------------------------------------------------
790 Utility functions that maintain the balance properties of the tree.
791 All constructors assume that all values in [l] < [x] and all values
792 in [r] > [x], and that [l] and [r] are valid trees.
793
794 In order of sophistication:
795 [Bin sz x l r] The type constructor.
796 [bin x l r] Maintains the correct size, assumes that both [l]
797 and [r] are balanced with respect to each other.
798 [balance x l r] Restores the balance and size.
799 Assumes that the original tree was balanced and
800 that [l] or [r] has changed by at most one element.
801 [join x l r] Restores balance and size.
802
803 Furthermore, we can construct a new tree from two trees. Both operations
804 assume that all values in [l] < all values in [r] and that [l] and [r]
805 are valid:
806 [glue l r] Glues [l] and [r] together. Assumes that [l] and
807 [r] are already balanced with respect to each other.
808 [merge l r] Merges two trees and restores balance.
809
810 Note: in contrast to Adam's paper, we use (<=) comparisons instead
811 of (<) comparisons in [join], [merge] and [balance].
812 Quickcheck (on [difference]) showed that this was necessary in order
813 to maintain the invariants. It is quite unsatisfactory that I haven't
814 been able to find out why this is actually the case! Fortunately, it
815 doesn't hurt to be a bit more conservative.
816 --------------------------------------------------------------------}
817
818 {--------------------------------------------------------------------
819 Join
820 --------------------------------------------------------------------}
821 join :: a -> Set a -> Set a -> Set a
822 join x Tip r = insertMin x r
823 join x l Tip = insertMax x l
824 join x l@(Bin sizeL y ly ry) r@(Bin sizeR z lz rz)
825 | delta*sizeL < sizeR = balanceL z (join x l lz) rz
826 | delta*sizeR < sizeL = balanceR y ly (join x ry r)
827 | otherwise = bin x l r
828 #if __GLASGOW_HASKELL__ >= 700
829 {-# INLINABLE join #-}
830 #endif
831
832
833 -- insertMin and insertMax don't perform potentially expensive comparisons.
834 insertMax,insertMin :: a -> Set a -> Set a
835 insertMax x t
836 = case t of
837 Tip -> singleton x
838 Bin _ y l r
839 -> balanceR y l (insertMax x r)
840 #if __GLASGOW_HASKELL__ >= 700
841 {-# INLINABLE insertMax #-}
842 #endif
843
844 insertMin x t
845 = case t of
846 Tip -> singleton x
847 Bin _ y l r
848 -> balanceL y (insertMin x l) r
849 #if __GLASGOW_HASKELL__ >= 700
850 {-# INLINABLE insertMin #-}
851 #endif
852
853 {--------------------------------------------------------------------
854 [merge l r]: merges two trees.
855 --------------------------------------------------------------------}
856 merge :: Set a -> Set a -> Set a
857 merge Tip r = r
858 merge l Tip = l
859 merge l@(Bin sizeL x lx rx) r@(Bin sizeR y ly ry)
860 | delta*sizeL < sizeR = balanceL y (merge l ly) ry
861 | delta*sizeR < sizeL = balanceR x lx (merge rx r)
862 | otherwise = glue l r
863 #if __GLASGOW_HASKELL__ >= 700
864 {-# INLINABLE merge #-}
865 #endif
866
867 {--------------------------------------------------------------------
868 [glue l r]: glues two trees together.
869 Assumes that [l] and [r] are already balanced with respect to each other.
870 --------------------------------------------------------------------}
871 glue :: Set a -> Set a -> Set a
872 glue Tip r = r
873 glue l Tip = l
874 glue l r
875 | size l > size r = let (m,l') = deleteFindMax l in balanceR m l' r
876 | otherwise = let (m,r') = deleteFindMin r in balanceL m l r'
877 #if __GLASGOW_HASKELL__ >= 700
878 {-# INLINABLE glue #-}
879 #endif
880
881
882 -- | /O(log n)/. Delete and find the minimal element.
883 --
884 -- > deleteFindMin set = (findMin set, deleteMin set)
885
886 deleteFindMin :: Set a -> (a,Set a)
887 deleteFindMin t
888 = case t of
889 Bin _ x Tip r -> (x,r)
890 Bin _ x l r -> let (xm,l') = deleteFindMin l in (xm,balanceR x l' r)
891 Tip -> (error "Set.deleteFindMin: can not return the minimal element of an empty set", Tip)
892 #if __GLASGOW_HASKELL__ >= 700
893 {-# INLINABLE deleteFindMin #-}
894 #endif
895
896 -- | /O(log n)/. Delete and find the maximal element.
897 --
898 -- > deleteFindMax set = (findMax set, deleteMax set)
899 deleteFindMax :: Set a -> (a,Set a)
900 deleteFindMax t
901 = case t of
902 Bin _ x l Tip -> (x,l)
903 Bin _ x l r -> let (xm,r') = deleteFindMax r in (xm,balanceL x l r')
904 Tip -> (error "Set.deleteFindMax: can not return the maximal element of an empty set", Tip)
905 #if __GLASGOW_HASKELL__ >= 700
906 {-# INLINABLE deleteFindMax #-}
907 #endif
908
909 -- | /O(log n)/. Retrieves the minimal key of the set, and the set
910 -- stripped of that element, or 'Nothing' if passed an empty set.
911 minView :: Set a -> Maybe (a, Set a)
912 minView Tip = Nothing
913 minView x = Just (deleteFindMin x)
914 #if __GLASGOW_HASKELL__ >= 700
915 {-# INLINABLE minView #-}
916 #endif
917
918 -- | /O(log n)/. Retrieves the maximal key of the set, and the set
919 -- stripped of that element, or 'Nothing' if passed an empty set.
920 maxView :: Set a -> Maybe (a, Set a)
921 maxView Tip = Nothing
922 maxView x = Just (deleteFindMax x)
923 #if __GLASGOW_HASKELL__ >= 700
924 {-# INLINABLE maxView #-}
925 #endif
926
927 {--------------------------------------------------------------------
928 [balance x l r] balances two trees with value x.
929 The sizes of the trees should balance after decreasing the
930 size of one of them. (a rotation).
931
932 [delta] is the maximal relative difference between the sizes of
933 two trees, it corresponds with the [w] in Adams' paper.
934 [ratio] is the ratio between an outer and inner sibling of the
935 heavier subtree in an unbalanced setting. It determines
936 whether a double or single rotation should be performed
937 to restore balance. It is correspondes with the inverse
938 of $\alpha$ in Adam's article.
939
940 Note that according to the Adam's paper:
941 - [delta] should be larger than 4.646 with a [ratio] of 2.
942 - [delta] should be larger than 3.745 with a [ratio] of 1.534.
943
944 But the Adam's paper is errorneous:
945 - it can be proved that for delta=2 and delta>=5 there does
946 not exist any ratio that would work
947 - delta=4.5 and ratio=2 does not work
948
949 That leaves two reasonable variants, delta=3 and delta=4,
950 both with ratio=2.
951
952 - A lower [delta] leads to a more 'perfectly' balanced tree.
953 - A higher [delta] performs less rebalancing.
954
955 In the benchmarks, delta=3 is faster on insert operations,
956 and delta=4 has slightly better deletes. As the insert speedup
957 is larger, we currently use delta=3.
958
959 --------------------------------------------------------------------}
960 delta,ratio :: Int
961 delta = 3
962 ratio = 2
963
964 -- The balance function is equivalent to the following:
965 --
966 -- balance :: a -> Set a -> Set a -> Set a
967 -- balance x l r
968 -- | sizeL + sizeR <= 1 = Bin sizeX x l r
969 -- | sizeR > delta*sizeL = rotateL x l r
970 -- | sizeL > delta*sizeR = rotateR x l r
971 -- | otherwise = Bin sizeX x l r
972 -- where
973 -- sizeL = size l
974 -- sizeR = size r
975 -- sizeX = sizeL + sizeR + 1
976 --
977 -- rotateL :: a -> Set a -> Set a -> Set a
978 -- rotateL x l r@(Bin _ _ ly ry) | size ly < ratio*size ry = singleL x l r
979 -- | otherwise = doubleL x l r
980 -- rotateR :: a -> Set a -> Set a -> Set a
981 -- rotateR x l@(Bin _ _ ly ry) r | size ry < ratio*size ly = singleR x l r
982 -- | otherwise = doubleR x l r
983 --
984 -- singleL, singleR :: a -> Set a -> Set a -> Set a
985 -- singleL x1 t1 (Bin _ x2 t2 t3) = bin x2 (bin x1 t1 t2) t3
986 -- singleR x1 (Bin _ x2 t1 t2) t3 = bin x2 t1 (bin x1 t2 t3)
987 --
988 -- doubleL, doubleR :: a -> Set a -> Set a -> Set a
989 -- doubleL x1 t1 (Bin _ x2 (Bin _ x3 t2 t3) t4) = bin x3 (bin x1 t1 t2) (bin x2 t3 t4)
990 -- doubleR x1 (Bin _ x2 t1 (Bin _ x3 t2 t3)) t4 = bin x3 (bin x2 t1 t2) (bin x1 t3 t4)
991 --
992 -- It is only written in such a way that every node is pattern-matched only once.
993 --
994 -- Only balanceL and balanceR are needed at the moment, so balance is not here anymore.
995 -- In case it is needed, it can be found in Data.Map.
996
997 -- Functions balanceL and balanceR are specialised versions of balance.
998 -- balanceL only checks whether the left subtree is too big,
999 -- balanceR only checks whether the right subtree is too big.
1000
1001 -- balanceL is called when left subtree might have been inserted to or when
1002 -- right subtree might have been deleted from.
1003 balanceL :: a -> Set a -> Set a -> Set a
1004 balanceL x l r = case r of
1005 Tip -> case l of
1006 Tip -> Bin 1 x Tip Tip
1007 (Bin _ _ Tip Tip) -> Bin 2 x l Tip
1008 (Bin _ lx Tip (Bin _ lrx _ _)) -> Bin 3 lrx (Bin 1 lx Tip Tip) (Bin 1 x Tip Tip)
1009 (Bin _ lx ll@(Bin _ _ _ _) Tip) -> Bin 3 lx ll (Bin 1 x Tip Tip)
1010 (Bin ls lx ll@(Bin lls _ _ _) lr@(Bin lrs lrx lrl lrr))
1011 | lrs < ratio*lls -> Bin (1+ls) lx ll (Bin (1+lrs) x lr Tip)
1012 | otherwise -> Bin (1+ls) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+size lrr) x lrr Tip)
1013
1014 (Bin rs _ _ _) -> case l of
1015 Tip -> Bin (1+rs) x Tip r
1016
1017 (Bin ls lx ll lr)
1018 | ls > delta*rs -> case (ll, lr) of
1019 (Bin lls _ _ _, Bin lrs lrx lrl lrr)
1020 | lrs < ratio*lls -> Bin (1+ls+rs) lx ll (Bin (1+rs+lrs) x lr r)
1021 | otherwise -> Bin (1+ls+rs) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+rs+size lrr) x lrr r)
1022 (_, _) -> error "Failure in Data.Map.balanceL"
1023 | otherwise -> Bin (1+ls+rs) x l r
1024 {-# NOINLINE balanceL #-}
1025
1026 -- balanceR is called when right subtree might have been inserted to or when
1027 -- left subtree might have been deleted from.
1028 balanceR :: a -> Set a -> Set a -> Set a
1029 balanceR x l r = case l of
1030 Tip -> case r of
1031 Tip -> Bin 1 x Tip Tip
1032 (Bin _ _ Tip Tip) -> Bin 2 x Tip r
1033 (Bin _ rx Tip rr@(Bin _ _ _ _)) -> Bin 3 rx (Bin 1 x Tip Tip) rr
1034 (Bin _ rx (Bin _ rlx _ _) Tip) -> Bin 3 rlx (Bin 1 x Tip Tip) (Bin 1 rx Tip Tip)
1035 (Bin rs rx rl@(Bin rls rlx rll rlr) rr@(Bin rrs _ _ _))
1036 | rls < ratio*rrs -> Bin (1+rs) rx (Bin (1+rls) x Tip rl) rr
1037 | otherwise -> Bin (1+rs) rlx (Bin (1+size rll) x Tip rll) (Bin (1+rrs+size rlr) rx rlr rr)
1038
1039 (Bin ls _ _ _) -> case r of
1040 Tip -> Bin (1+ls) x l Tip
1041
1042 (Bin rs rx rl rr)
1043 | rs > delta*ls -> case (rl, rr) of
1044 (Bin rls rlx rll rlr, Bin rrs _ _ _)
1045 | rls < ratio*rrs -> Bin (1+ls+rs) rx (Bin (1+ls+rls) x l rl) rr
1046 | otherwise -> Bin (1+ls+rs) rlx (Bin (1+ls+size rll) x l rll) (Bin (1+rrs+size rlr) rx rlr rr)
1047 (_, _) -> error "Failure in Data.Map.balanceR"
1048 | otherwise -> Bin (1+ls+rs) x l r
1049 {-# NOINLINE balanceR #-}
1050
1051 {--------------------------------------------------------------------
1052 The bin constructor maintains the size of the tree
1053 --------------------------------------------------------------------}
1054 bin :: a -> Set a -> Set a -> Set a
1055 bin x l r
1056 = Bin (size l + size r + 1) x l r
1057 {-# INLINE bin #-}
1058
1059
1060 {--------------------------------------------------------------------
1061 Utilities
1062 --------------------------------------------------------------------}
1063 foldlStrict :: (a -> b -> a) -> a -> [b] -> a
1064 foldlStrict f = go
1065 where
1066 go z [] = z
1067 go z (x:xs) = let z' = f z x in z' `seq` go z' xs
1068 {-# INLINE foldlStrict #-}
1069
1070 {--------------------------------------------------------------------
1071 Debugging
1072 --------------------------------------------------------------------}
1073 -- | /O(n)/. Show the tree that implements the set. The tree is shown
1074 -- in a compressed, hanging format.
1075 showTree :: Show a => Set a -> String
1076 showTree s
1077 = showTreeWith True False s
1078
1079
1080 {- | /O(n)/. The expression (@showTreeWith hang wide map@) shows
1081 the tree that implements the set. If @hang@ is
1082 @True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If
1083 @wide@ is 'True', an extra wide version is shown.
1084
1085 > Set> putStrLn $ showTreeWith True False $ fromDistinctAscList [1..5]
1086 > 4
1087 > +--2
1088 > | +--1
1089 > | +--3
1090 > +--5
1091 >
1092 > Set> putStrLn $ showTreeWith True True $ fromDistinctAscList [1..5]
1093 > 4
1094 > |
1095 > +--2
1096 > | |
1097 > | +--1
1098 > | |
1099 > | +--3
1100 > |
1101 > +--5
1102 >
1103 > Set> putStrLn $ showTreeWith False True $ fromDistinctAscList [1..5]
1104 > +--5
1105 > |
1106 > 4
1107 > |
1108 > | +--3
1109 > | |
1110 > +--2
1111 > |
1112 > +--1
1113
1114 -}
1115 showTreeWith :: Show a => Bool -> Bool -> Set a -> String
1116 showTreeWith hang wide t
1117 | hang = (showsTreeHang wide [] t) ""
1118 | otherwise = (showsTree wide [] [] t) ""
1119
1120 showsTree :: Show a => Bool -> [String] -> [String] -> Set a -> ShowS
1121 showsTree wide lbars rbars t
1122 = case t of
1123 Tip -> showsBars lbars . showString "|\n"
1124 Bin _ x Tip Tip
1125 -> showsBars lbars . shows x . showString "\n"
1126 Bin _ x l r
1127 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
1128 showWide wide rbars .
1129 showsBars lbars . shows x . showString "\n" .
1130 showWide wide lbars .
1131 showsTree wide (withEmpty lbars) (withBar lbars) l
1132
1133 showsTreeHang :: Show a => Bool -> [String] -> Set a -> ShowS
1134 showsTreeHang wide bars t
1135 = case t of
1136 Tip -> showsBars bars . showString "|\n"
1137 Bin _ x Tip Tip
1138 -> showsBars bars . shows x . showString "\n"
1139 Bin _ x l r
1140 -> showsBars bars . shows x . showString "\n" .
1141 showWide wide bars .
1142 showsTreeHang wide (withBar bars) l .
1143 showWide wide bars .
1144 showsTreeHang wide (withEmpty bars) r
1145
1146 showWide :: Bool -> [String] -> String -> String
1147 showWide wide bars
1148 | wide = showString (concat (reverse bars)) . showString "|\n"
1149 | otherwise = id
1150
1151 showsBars :: [String] -> ShowS
1152 showsBars bars
1153 = case bars of
1154 [] -> id
1155 _ -> showString (concat (reverse (tail bars))) . showString node
1156
1157 node :: String
1158 node = "+--"
1159
1160 withBar, withEmpty :: [String] -> [String]
1161 withBar bars = "| ":bars
1162 withEmpty bars = " ":bars
1163
1164 {--------------------------------------------------------------------
1165 Assertions
1166 --------------------------------------------------------------------}
1167 -- | /O(n)/. Test if the internal set structure is valid.
1168 valid :: Ord a => Set a -> Bool
1169 valid t
1170 = balanced t && ordered t && validsize t
1171
1172 ordered :: Ord a => Set a -> Bool
1173 ordered t
1174 = bounded (const True) (const True) t
1175 where
1176 bounded lo hi t'
1177 = case t' of
1178 Tip -> True
1179 Bin _ x l r -> (lo x) && (hi x) && bounded lo (<x) l && bounded (>x) hi r
1180
1181 balanced :: Set a -> Bool
1182 balanced t
1183 = case t of
1184 Tip -> True
1185 Bin _ _ l r -> (size l + size r <= 1 || (size l <= delta*size r && size r <= delta*size l)) &&
1186 balanced l && balanced r
1187
1188 validsize :: Set a -> Bool
1189 validsize t
1190 = (realsize t == Just (size t))
1191 where
1192 realsize t'
1193 = case t' of
1194 Tip -> Just 0
1195 Bin sz _ l r -> case (realsize l,realsize r) of
1196 (Just n,Just m) | n+m+1 == sz -> Just sz
1197 _ -> Nothing