Fix warnings.
[packages/containers.git] / Data / Set.hs
1 {-# LANGUAGE CPP #-}
2 -----------------------------------------------------------------------------
3 -- |
4 -- Module : Data.Set
5 -- Copyright : (c) Daan Leijen 2002
6 -- License : BSD-style
7 -- Maintainer : libraries@haskell.org
8 -- Stability : provisional
9 -- Portability : portable
10 --
11 -- An efficient implementation of sets.
12 --
13 -- Since many function names (but not the type name) clash with
14 -- "Prelude" names, this module is usually imported @qualified@, e.g.
15 --
16 -- > import Data.Set (Set)
17 -- > import qualified Data.Set as Set
18 --
19 -- The implementation of 'Set' is based on /size balanced/ binary trees (or
20 -- trees of /bounded balance/) as described by:
21 --
22 -- * Stephen Adams, \"/Efficient sets: a balancing act/\",
23 -- Journal of Functional Programming 3(4):553-562, October 1993,
24 -- <http://www.swiss.ai.mit.edu/~adams/BB/>.
25 --
26 -- * J. Nievergelt and E.M. Reingold,
27 -- \"/Binary search trees of bounded balance/\",
28 -- SIAM journal of computing 2(1), March 1973.
29 --
30 -- Note that the implementation is /left-biased/ -- the elements of a
31 -- first argument are always preferred to the second, for example in
32 -- 'union' or 'insert'. Of course, left-biasing can only be observed
33 -- when equality is an equivalence relation instead of structural
34 -- equality.
35 -----------------------------------------------------------------------------
36
37 -- It is crucial to the performance that the functions specialize on the Ord
38 -- type when possible. GHC 7.0 and higher does this by itself when it sees th
39 -- unfolding of a function -- that is why all public functions are marked
40 -- INLINABLE (that exposes the unfolding).
41 --
42 -- For other compilers and GHC pre 7.0, we mark some of the functions INLINE.
43 -- We mark the functions that just navigate down the tree (lookup, insert,
44 -- delete and similar). That navigation code gets inlined and thus specialized
45 -- when possible. There is a price to pay -- code growth. The code INLINED is
46 -- therefore only the tree navigation, all the real work (rebalancing) is not
47 -- INLINED by using a NOINLINE.
48 --
49 -- All methods that can be INLINE are not recursive -- a 'go' function doing
50 -- the real work is provided.
51
52 module Data.Set (
53 -- * Set type
54 #if !defined(TESTING)
55 Set -- instance Eq,Ord,Show,Read,Data,Typeable
56 #else
57 Set(..)
58 #endif
59
60 -- * Operators
61 , (\\)
62
63 -- * Query
64 , null
65 , size
66 , member
67 , notMember
68 , isSubsetOf
69 , isProperSubsetOf
70
71 -- * Construction
72 , empty
73 , singleton
74 , insert
75 , delete
76
77 -- * Combine
78 , union
79 , unions
80 , difference
81 , intersection
82
83 -- * Filter
84 , filter
85 , partition
86 , split
87 , splitMember
88
89 -- * Map
90 , map
91 , mapMonotonic
92
93 -- * Fold
94 , fold
95
96 -- * Min\/Max
97 , findMin
98 , findMax
99 , deleteMin
100 , deleteMax
101 , deleteFindMin
102 , deleteFindMax
103 , maxView
104 , minView
105
106 -- * Conversion
107
108 -- ** List
109 , elems
110 , toList
111 , fromList
112
113 -- ** Ordered list
114 , toAscList
115 , fromAscList
116 , fromDistinctAscList
117
118 -- * Debugging
119 , showTree
120 , showTreeWith
121 , valid
122
123 #if defined(TESTING)
124 -- Internals (for testing)
125 , bin
126 , balanced
127 , join
128 , merge
129 #endif
130 ) where
131
132 import Prelude hiding (filter,foldr,null,map)
133 import qualified Data.List as List
134 import Data.Monoid (Monoid(..))
135 import Data.Foldable (Foldable(foldMap))
136 #ifndef __GLASGOW_HASKELL__
137 import Data.Typeable (Typeable, typeOf, typeOfDefault)
138 #endif
139 import Data.Typeable (Typeable1(..), TyCon, mkTyCon, mkTyConApp)
140
141 {-
142 -- just for testing
143 import QuickCheck
144 import List (nub,sort)
145 import qualified List
146 -}
147
148 #if __GLASGOW_HASKELL__
149 import Text.Read
150 import Data.Data (Data(..), mkNoRepType, gcast1)
151 #endif
152
153 -- Use macros to define strictness of functions.
154 -- STRICT_x_OF_y denotes an y-ary function strict in the x-th parameter.
155 -- We do not use BangPatterns, because they are not in any standard and we
156 -- want the compilers to be compiled by as many compilers as possible.
157 #define STRICT_1_OF_2(fn) fn arg _ | arg `seq` False = undefined
158
159 {--------------------------------------------------------------------
160 Operators
161 --------------------------------------------------------------------}
162 infixl 9 \\ --
163
164 -- | /O(n+m)/. See 'difference'.
165 (\\) :: Ord a => Set a -> Set a -> Set a
166 m1 \\ m2 = difference m1 m2
167 #if __GLASGOW_HASKELL__ >= 700
168 {-# INLINABLE (\\) #-}
169 #endif
170
171 {--------------------------------------------------------------------
172 Sets are size balanced trees
173 --------------------------------------------------------------------}
174 -- | A set of values @a@.
175 data Set a = Tip
176 | Bin {-# UNPACK #-} !Size !a !(Set a) !(Set a)
177
178 type Size = Int
179
180 instance Ord a => Monoid (Set a) where
181 mempty = empty
182 mappend = union
183 mconcat = unions
184
185 instance Foldable Set where
186 foldMap _ Tip = mempty
187 foldMap f (Bin _s k l r) = foldMap f l `mappend` f k `mappend` foldMap f r
188
189 #if __GLASGOW_HASKELL__
190
191 {--------------------------------------------------------------------
192 A Data instance
193 --------------------------------------------------------------------}
194
195 -- This instance preserves data abstraction at the cost of inefficiency.
196 -- We omit reflection services for the sake of data abstraction.
197
198 instance (Data a, Ord a) => Data (Set a) where
199 gfoldl f z set = z fromList `f` (toList set)
200 toConstr _ = error "toConstr"
201 gunfold _ _ = error "gunfold"
202 dataTypeOf _ = mkNoRepType "Data.Set.Set"
203 dataCast1 f = gcast1 f
204
205 #endif
206
207 {--------------------------------------------------------------------
208 Query
209 --------------------------------------------------------------------}
210 -- | /O(1)/. Is this the empty set?
211 null :: Set a -> Bool
212 null Tip = True
213 null (Bin {}) = False
214 #if __GLASGOW_HASKELL__ >= 700
215 {-# INLINABLE null #-}
216 #endif
217
218 -- | /O(1)/. The number of elements in the set.
219 size :: Set a -> Int
220 size Tip = 0
221 size (Bin sz _ _ _) = sz
222 #if __GLASGOW_HASKELL__ >= 700
223 {-# INLINABLE size #-}
224 #endif
225
226 -- | /O(log n)/. Is the element in the set?
227 member :: Ord a => a -> Set a -> Bool
228 member = go
229 where
230 STRICT_1_OF_2(go)
231 go _ Tip = False
232 go x (Bin _ y l r) = case compare x y of
233 LT -> go x l
234 GT -> go x r
235 EQ -> True
236 #if __GLASGOW_HASKELL__ >= 700
237 {-# INLINABLE member #-}
238 #else
239 {-# INLINE member #-}
240 #endif
241
242 -- | /O(log n)/. Is the element not in the set?
243 notMember :: Ord a => a -> Set a -> Bool
244 notMember a t = not $ member a t
245 {-# INLINE notMember #-}
246
247 {--------------------------------------------------------------------
248 Construction
249 --------------------------------------------------------------------}
250 -- | /O(1)/. The empty set.
251 empty :: Set a
252 empty = Tip
253
254 -- | /O(1)/. Create a singleton set.
255 singleton :: a -> Set a
256 singleton x = Bin 1 x Tip Tip
257
258 {--------------------------------------------------------------------
259 Insertion, Deletion
260 --------------------------------------------------------------------}
261 -- | /O(log n)/. Insert an element in a set.
262 -- If the set already contains an element equal to the given value,
263 -- it is replaced with the new value.
264 insert :: Ord a => a -> Set a -> Set a
265 insert = go
266 where
267 STRICT_1_OF_2(go)
268 go x Tip = singleton x
269 go x (Bin sz y l r) = case compare x y of
270 LT -> balanceL y (go x l) r
271 GT -> balanceR y l (go x r)
272 EQ -> Bin sz x l r
273 #if __GLASGOW_HASKELL__ >= 700
274 {-# INLINEABLE insert #-}
275 #else
276 {-# INLINE insert #-}
277 #endif
278
279 -- Insert an element to the set only if it is not in the set. Used by
280 -- `union`.
281 insertR :: Ord a => a -> Set a -> Set a
282 insertR = go
283 where
284 STRICT_1_OF_2(go)
285 go x Tip = singleton x
286 go x t@(Bin _ y l r) = case compare x y of
287 LT -> balanceL y (go x l) r
288 GT -> balanceR y l (go x r)
289 EQ -> t
290 #if __GLASGOW_HASKELL__ >= 700
291 {-# INLINEABLE insertR #-}
292 #else
293 {-# INLINE insertR #-}
294 #endif
295
296 -- | /O(log n)/. Delete an element from a set.
297 delete :: Ord a => a -> Set a -> Set a
298 delete = go
299 where
300 STRICT_1_OF_2(go)
301 go _ Tip = Tip
302 go x (Bin _ y l r) = case compare x y of
303 LT -> balanceR y (go x l) r
304 GT -> balanceL y l (go x r)
305 EQ -> glue l r
306 #if __GLASGOW_HASKELL__ >= 700
307 {-# INLINEABLE delete #-}
308 #else
309 {-# INLINE delete #-}
310 #endif
311
312 {--------------------------------------------------------------------
313 Subset
314 --------------------------------------------------------------------}
315 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
316 isProperSubsetOf :: Ord a => Set a -> Set a -> Bool
317 isProperSubsetOf s1 s2
318 = (size s1 < size s2) && (isSubsetOf s1 s2)
319 #if __GLASGOW_HASKELL__ >= 700
320 {-# INLINABLE isProperSubsetOf #-}
321 #endif
322
323
324 -- | /O(n+m)/. Is this a subset?
325 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
326 isSubsetOf :: Ord a => Set a -> Set a -> Bool
327 isSubsetOf t1 t2
328 = (size t1 <= size t2) && (isSubsetOfX t1 t2)
329 #if __GLASGOW_HASKELL__ >= 700
330 {-# INLINABLE isSubsetOf #-}
331 #endif
332
333 isSubsetOfX :: Ord a => Set a -> Set a -> Bool
334 isSubsetOfX Tip _ = True
335 isSubsetOfX _ Tip = False
336 isSubsetOfX (Bin _ x l r) t
337 = found && isSubsetOfX l lt && isSubsetOfX r gt
338 where
339 (lt,found,gt) = splitMember x t
340 #if __GLASGOW_HASKELL__ >= 700
341 {-# INLINABLE isSubsetOfX #-}
342 #endif
343
344
345 {--------------------------------------------------------------------
346 Minimal, Maximal
347 --------------------------------------------------------------------}
348 -- | /O(log n)/. The minimal element of a set.
349 findMin :: Set a -> a
350 findMin (Bin _ x Tip _) = x
351 findMin (Bin _ _ l _) = findMin l
352 findMin Tip = error "Set.findMin: empty set has no minimal element"
353 #if __GLASGOW_HASKELL__ >= 700
354 {-# INLINABLE findMin #-}
355 #endif
356
357 -- | /O(log n)/. The maximal element of a set.
358 findMax :: Set a -> a
359 findMax (Bin _ x _ Tip) = x
360 findMax (Bin _ _ _ r) = findMax r
361 findMax Tip = error "Set.findMax: empty set has no maximal element"
362 #if __GLASGOW_HASKELL__ >= 700
363 {-# INLINABLE findMax #-}
364 #endif
365
366 -- | /O(log n)/. Delete the minimal element.
367 deleteMin :: Set a -> Set a
368 deleteMin (Bin _ _ Tip r) = r
369 deleteMin (Bin _ x l r) = balanceR x (deleteMin l) r
370 deleteMin Tip = Tip
371 #if __GLASGOW_HASKELL__ >= 700
372 {-# INLINABLE deleteMin #-}
373 #endif
374
375 -- | /O(log n)/. Delete the maximal element.
376 deleteMax :: Set a -> Set a
377 deleteMax (Bin _ _ l Tip) = l
378 deleteMax (Bin _ x l r) = balanceL x l (deleteMax r)
379 deleteMax Tip = Tip
380 #if __GLASGOW_HASKELL__ >= 700
381 {-# INLINABLE deleteMax #-}
382 #endif
383
384 {--------------------------------------------------------------------
385 Union.
386 --------------------------------------------------------------------}
387 -- | The union of a list of sets: (@'unions' == 'foldl' 'union' 'empty'@).
388 unions :: Ord a => [Set a] -> Set a
389 unions = foldlStrict union empty
390 #if __GLASGOW_HASKELL__ >= 700
391 {-# INLINABLE unions #-}
392 #endif
393
394 -- | /O(n+m)/. The union of two sets, preferring the first set when
395 -- equal elements are encountered.
396 -- The implementation uses the efficient /hedge-union/ algorithm.
397 -- Hedge-union is more efficient on (bigset `union` smallset).
398 union :: Ord a => Set a -> Set a -> Set a
399 union Tip t2 = t2
400 union t1 Tip = t1
401 union (Bin _ x Tip Tip) t = insert x t
402 union t (Bin _ x Tip Tip) = insertR x t
403 union t1 t2 = hedgeUnion NothingS NothingS t1 t2
404 #if __GLASGOW_HASKELL__ >= 700
405 {-# INLINABLE union #-}
406 #endif
407
408 hedgeUnion :: Ord a
409 => MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
410 hedgeUnion _ _ t1 Tip
411 = t1
412 hedgeUnion blo bhi Tip (Bin _ x l r)
413 = join x (filterGt blo l) (filterLt bhi r)
414 hedgeUnion blo bhi (Bin _ x l r) t2
415 = join x (hedgeUnion blo bmi l (trim blo bmi t2))
416 (hedgeUnion bmi bhi r (trim bmi bhi t2))
417 where
418 bmi = JustS x
419 #if __GLASGOW_HASKELL__ >= 700
420 {-# INLINABLE hedgeUnion #-}
421 #endif
422
423 {--------------------------------------------------------------------
424 Difference
425 --------------------------------------------------------------------}
426 -- | /O(n+m)/. Difference of two sets.
427 -- The implementation uses an efficient /hedge/ algorithm comparable with /hedge-union/.
428 difference :: Ord a => Set a -> Set a -> Set a
429 difference Tip _ = Tip
430 difference t1 Tip = t1
431 difference t1 t2 = hedgeDiff NothingS NothingS t1 t2
432 #if __GLASGOW_HASKELL__ >= 700
433 {-# INLINABLE difference #-}
434 #endif
435
436 hedgeDiff :: Ord a
437 => MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
438 hedgeDiff _ _ Tip _
439 = Tip
440 hedgeDiff blo bhi (Bin _ x l r) Tip
441 = join x (filterGt blo l) (filterLt bhi r)
442 hedgeDiff blo bhi t (Bin _ x l r)
443 = merge (hedgeDiff blo bmi (trim blo bmi t) l)
444 (hedgeDiff bmi bhi (trim bmi bhi t) r)
445 where
446 bmi = JustS x
447 #if __GLASGOW_HASKELL__ >= 700
448 {-# INLINABLE hedgeDiff #-}
449 #endif
450
451 {--------------------------------------------------------------------
452 Intersection
453 --------------------------------------------------------------------}
454 -- | /O(n+m)/. The intersection of two sets.
455 -- Elements of the result come from the first set, so for example
456 --
457 -- > import qualified Data.Set as S
458 -- > data AB = A | B deriving Show
459 -- > instance Ord AB where compare _ _ = EQ
460 -- > instance Eq AB where _ == _ = True
461 -- > main = print (S.singleton A `S.intersection` S.singleton B,
462 -- > S.singleton B `S.intersection` S.singleton A)
463 --
464 -- prints @(fromList [A],fromList [B])@.
465 intersection :: Ord a => Set a -> Set a -> Set a
466 intersection Tip _ = Tip
467 intersection _ Tip = Tip
468 intersection t1@(Bin s1 x1 l1 r1) t2@(Bin s2 x2 l2 r2) =
469 if s1 >= s2 then
470 let (lt,found,gt) = splitLookup x2 t1
471 tl = intersection lt l2
472 tr = intersection gt r2
473 in case found of
474 Just x -> join x tl tr
475 Nothing -> merge tl tr
476 else let (lt,found,gt) = splitMember x1 t2
477 tl = intersection l1 lt
478 tr = intersection r1 gt
479 in if found then join x1 tl tr
480 else merge tl tr
481 #if __GLASGOW_HASKELL__ >= 700
482 {-# INLINABLE intersection #-}
483 #endif
484
485 {--------------------------------------------------------------------
486 Filter and partition
487 --------------------------------------------------------------------}
488 -- | /O(n)/. Filter all elements that satisfy the predicate.
489 filter :: Ord a => (a -> Bool) -> Set a -> Set a
490 filter _ Tip = Tip
491 filter p (Bin _ x l r)
492 | p x = join x (filter p l) (filter p r)
493 | otherwise = merge (filter p l) (filter p r)
494 #if __GLASGOW_HASKELL__ >= 700
495 {-# INLINABLE filter #-}
496 #endif
497
498 -- | /O(n)/. Partition the set into two sets, one with all elements that satisfy
499 -- the predicate and one with all elements that don't satisfy the predicate.
500 -- See also 'split'.
501 partition :: Ord a => (a -> Bool) -> Set a -> (Set a,Set a)
502 partition _ Tip = (Tip, Tip)
503 partition p (Bin _ x l r) = case (partition p l, partition p r) of
504 ((l1, l2), (r1, r2))
505 | p x -> (join x l1 r1, merge l2 r2)
506 | otherwise -> (merge l1 r1, join x l2 r2)
507 #if __GLASGOW_HASKELL__ >= 700
508 {-# INLINABLE partition #-}
509 #endif
510
511 {----------------------------------------------------------------------
512 Map
513 ----------------------------------------------------------------------}
514
515 -- | /O(n*log n)/.
516 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
517 --
518 -- It's worth noting that the size of the result may be smaller if,
519 -- for some @(x,y)@, @x \/= y && f x == f y@
520
521 map :: (Ord a, Ord b) => (a->b) -> Set a -> Set b
522 map f = fromList . List.map f . toList
523 #if __GLASGOW_HASKELL__ >= 700
524 {-# INLINABLE map #-}
525 #endif
526
527 -- | /O(n)/. The
528 --
529 -- @'mapMonotonic' f s == 'map' f s@, but works only when @f@ is monotonic.
530 -- /The precondition is not checked./
531 -- Semi-formally, we have:
532 --
533 -- > and [x < y ==> f x < f y | x <- ls, y <- ls]
534 -- > ==> mapMonotonic f s == map f s
535 -- > where ls = toList s
536
537 mapMonotonic :: (a->b) -> Set a -> Set b
538 mapMonotonic _ Tip = Tip
539 mapMonotonic f (Bin sz x l r) = Bin sz (f x) (mapMonotonic f l) (mapMonotonic f r)
540 #if __GLASGOW_HASKELL__ >= 700
541 {-# INLINABLE mapMonotonic #-}
542 #endif
543
544 {--------------------------------------------------------------------
545 Fold
546 --------------------------------------------------------------------}
547 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
548 fold :: (a -> b -> b) -> b -> Set a -> b
549 fold = foldr
550 {-# INLINE fold #-}
551
552 -- | /O(n)/. Post-order fold.
553 foldr :: (a -> b -> b) -> b -> Set a -> b
554 foldr f = go
555 where
556 go z Tip = z
557 go z (Bin _ x l r) = go (f x (go z r)) l
558 {-# INLINE foldr #-}
559
560 {--------------------------------------------------------------------
561 List variations
562 --------------------------------------------------------------------}
563 -- | /O(n)/. The elements of a set.
564 elems :: Set a -> [a]
565 elems = toList
566 #if __GLASGOW_HASKELL__ >= 700
567 {-# INLINABLE elems #-}
568 #endif
569
570 {--------------------------------------------------------------------
571 Lists
572 --------------------------------------------------------------------}
573 -- | /O(n)/. Convert the set to a list of elements.
574 toList :: Set a -> [a]
575 toList = toAscList
576 #if __GLASGOW_HASKELL__ >= 700
577 {-# INLINABLE toList #-}
578 #endif
579
580 -- | /O(n)/. Convert the set to an ascending list of elements.
581 toAscList :: Set a -> [a]
582 toAscList = foldr (:) []
583 #if __GLASGOW_HASKELL__ >= 700
584 {-# INLINABLE toAscList #-}
585 #endif
586
587 -- | /O(n*log n)/. Create a set from a list of elements.
588 fromList :: Ord a => [a] -> Set a
589 fromList = foldlStrict ins empty
590 where
591 ins t x = insert x t
592 #if __GLASGOW_HASKELL__ >= 700
593 {-# INLINABLE fromList #-}
594 #endif
595
596 {--------------------------------------------------------------------
597 Building trees from ascending/descending lists can be done in linear time.
598
599 Note that if [xs] is ascending that:
600 fromAscList xs == fromList xs
601 --------------------------------------------------------------------}
602 -- | /O(n)/. Build a set from an ascending list in linear time.
603 -- /The precondition (input list is ascending) is not checked./
604 fromAscList :: Eq a => [a] -> Set a
605 fromAscList xs
606 = fromDistinctAscList (combineEq xs)
607 where
608 -- [combineEq xs] combines equal elements with [const] in an ordered list [xs]
609 combineEq xs'
610 = case xs' of
611 [] -> []
612 [x] -> [x]
613 (x:xx) -> combineEq' x xx
614
615 combineEq' z [] = [z]
616 combineEq' z (x:xs')
617 | z==x = combineEq' z xs'
618 | otherwise = z:combineEq' x xs'
619 #if __GLASGOW_HASKELL__ >= 700
620 {-# INLINABLE fromAscList #-}
621 #endif
622
623
624 -- | /O(n)/. Build a set from an ascending list of distinct elements in linear time.
625 -- /The precondition (input list is strictly ascending) is not checked./
626 fromDistinctAscList :: [a] -> Set a
627 fromDistinctAscList xs
628 = build const (length xs) xs
629 where
630 -- 1) use continutations so that we use heap space instead of stack space.
631 -- 2) special case for n==5 to build bushier trees.
632 build c 0 xs' = c Tip xs'
633 build c 5 xs' = case xs' of
634 (x1:x2:x3:x4:x5:xx)
635 -> c (bin x4 (bin x2 (singleton x1) (singleton x3)) (singleton x5)) xx
636 _ -> error "fromDistinctAscList build 5"
637 build c n xs' = seq nr $ build (buildR nr c) nl xs'
638 where
639 nl = n `div` 2
640 nr = n - nl - 1
641
642 buildR n c l (x:ys) = build (buildB l x c) n ys
643 buildR _ _ _ [] = error "fromDistinctAscList buildR []"
644 buildB l x c r zs = c (bin x l r) zs
645 #if __GLASGOW_HASKELL__ >= 700
646 {-# INLINABLE fromDistinctAscList #-}
647 #endif
648
649 {--------------------------------------------------------------------
650 Eq converts the set to a list. In a lazy setting, this
651 actually seems one of the faster methods to compare two trees
652 and it is certainly the simplest :-)
653 --------------------------------------------------------------------}
654 instance Eq a => Eq (Set a) where
655 t1 == t2 = (size t1 == size t2) && (toAscList t1 == toAscList t2)
656
657 {--------------------------------------------------------------------
658 Ord
659 --------------------------------------------------------------------}
660
661 instance Ord a => Ord (Set a) where
662 compare s1 s2 = compare (toAscList s1) (toAscList s2)
663
664 {--------------------------------------------------------------------
665 Show
666 --------------------------------------------------------------------}
667 instance Show a => Show (Set a) where
668 showsPrec p xs = showParen (p > 10) $
669 showString "fromList " . shows (toList xs)
670
671 {--------------------------------------------------------------------
672 Read
673 --------------------------------------------------------------------}
674 instance (Read a, Ord a) => Read (Set a) where
675 #ifdef __GLASGOW_HASKELL__
676 readPrec = parens $ prec 10 $ do
677 Ident "fromList" <- lexP
678 xs <- readPrec
679 return (fromList xs)
680
681 readListPrec = readListPrecDefault
682 #else
683 readsPrec p = readParen (p > 10) $ \ r -> do
684 ("fromList",s) <- lex r
685 (xs,t) <- reads s
686 return (fromList xs,t)
687 #endif
688
689 {--------------------------------------------------------------------
690 Typeable/Data
691 --------------------------------------------------------------------}
692
693 #include "Typeable.h"
694 INSTANCE_TYPEABLE1(Set,setTc,"Set")
695
696 {--------------------------------------------------------------------
697 Utility functions that return sub-ranges of the original
698 tree. Some functions take a `Maybe value` as an argument to
699 allow comparisons against infinite values. These are called `blow`
700 (Nothing is -\infty) and `bhigh` (here Nothing is +\infty).
701 We use MaybeS value, which is a Maybe strict in the Just case.
702
703 [trim blow bhigh t] A tree that is either empty or where [x > blow]
704 and [x < bhigh] for the value [x] of the root.
705 [filterGt blow t] A tree where for all values [k]. [k > blow]
706 [filterLt bhigh t] A tree where for all values [k]. [k < bhigh]
707
708 [split k t] Returns two trees [l] and [r] where all values
709 in [l] are <[k] and all keys in [r] are >[k].
710 [splitMember k t] Just like [split] but also returns whether [k]
711 was found in the tree.
712 --------------------------------------------------------------------}
713
714 data MaybeS a = NothingS | JustS !a
715
716 {--------------------------------------------------------------------
717 [trim blo bhi t] trims away all subtrees that surely contain no
718 values between the range [blo] to [bhi]. The returned tree is either
719 empty or the key of the root is between @blo@ and @bhi@.
720 --------------------------------------------------------------------}
721 trim :: Ord a => MaybeS a -> MaybeS a -> Set a -> Set a
722 trim NothingS NothingS t = t
723 trim (JustS lx) NothingS t = greater lx t where greater lo (Bin _ x _ r) | x <= lo = greater lo r
724 greater _ t' = t'
725 trim NothingS (JustS hx) t = lesser hx t where lesser hi (Bin _ x l _) | x >= hi = lesser hi l
726 lesser _ t' = t'
727 trim (JustS lx) (JustS hx) t = middle lx hx t where middle lo hi (Bin _ x _ r) | x <= lo = middle lo hi r
728 middle lo hi (Bin _ x l _) | x >= hi = middle lo hi l
729 middle _ _ t' = t'
730 #if __GLASGOW_HASKELL__ >= 700
731 {-# INLINABLE trim #-}
732 #endif
733
734 {--------------------------------------------------------------------
735 [filterGt b t] filter all values >[b] from tree [t]
736 [filterLt b t] filter all values <[b] from tree [t]
737 --------------------------------------------------------------------}
738 filterGt :: Ord a => MaybeS a -> Set a -> Set a
739 filterGt NothingS t = t
740 filterGt (JustS b) t = filter' b t
741 where filter' _ Tip = Tip
742 filter' b' (Bin _ x l r) =
743 case compare b' x of LT -> join x (filter' b' l) r
744 EQ -> r
745 GT -> filter' b' r
746 #if __GLASGOW_HASKELL__ >= 700
747 {-# INLINABLE filterGt #-}
748 #endif
749
750 filterLt :: Ord a => MaybeS a -> Set a -> Set a
751 filterLt NothingS t = t
752 filterLt (JustS b) t = filter' b t
753 where filter' _ Tip = Tip
754 filter' b' (Bin _ x l r) =
755 case compare x b' of LT -> join x l (filter' b' r)
756 EQ -> l
757 GT -> filter' b' l
758 #if __GLASGOW_HASKELL__ >= 700
759 {-# INLINABLE filterLt #-}
760 #endif
761
762 {--------------------------------------------------------------------
763 Split
764 --------------------------------------------------------------------}
765 -- | /O(log n)/. The expression (@'split' x set@) is a pair @(set1,set2)@
766 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
767 -- comprises the elements of @set@ greater than @x@.
768 split :: Ord a => a -> Set a -> (Set a,Set a)
769 split _ Tip = (Tip,Tip)
770 split x (Bin _ y l r)
771 = case compare x y of
772 LT -> let (lt,gt) = split x l in (lt,join y gt r)
773 GT -> let (lt,gt) = split x r in (join y l lt,gt)
774 EQ -> (l,r)
775 #if __GLASGOW_HASKELL__ >= 700
776 {-# INLINABLE split #-}
777 #endif
778
779 -- | /O(log n)/. Performs a 'split' but also returns whether the pivot
780 -- element was found in the original set.
781 splitMember :: Ord a => a -> Set a -> (Set a,Bool,Set a)
782 splitMember x t = let (l,m,r) = splitLookup x t in
783 (l,maybe False (const True) m,r)
784 #if __GLASGOW_HASKELL__ >= 700
785 {-# INLINABLE splitMember #-}
786 #endif
787
788 -- | /O(log n)/. Performs a 'split' but also returns the pivot
789 -- element that was found in the original set.
790 splitLookup :: Ord a => a -> Set a -> (Set a,Maybe a,Set a)
791 splitLookup _ Tip = (Tip,Nothing,Tip)
792 splitLookup x (Bin _ y l r)
793 = case compare x y of
794 LT -> let (lt,found,gt) = splitLookup x l in (lt,found,join y gt r)
795 GT -> let (lt,found,gt) = splitLookup x r in (join y l lt,found,gt)
796 EQ -> (l,Just y,r)
797 #if __GLASGOW_HASKELL__ >= 700
798 {-# INLINABLE splitLookup #-}
799 #endif
800
801 {--------------------------------------------------------------------
802 Utility functions that maintain the balance properties of the tree.
803 All constructors assume that all values in [l] < [x] and all values
804 in [r] > [x], and that [l] and [r] are valid trees.
805
806 In order of sophistication:
807 [Bin sz x l r] The type constructor.
808 [bin x l r] Maintains the correct size, assumes that both [l]
809 and [r] are balanced with respect to each other.
810 [balance x l r] Restores the balance and size.
811 Assumes that the original tree was balanced and
812 that [l] or [r] has changed by at most one element.
813 [join x l r] Restores balance and size.
814
815 Furthermore, we can construct a new tree from two trees. Both operations
816 assume that all values in [l] < all values in [r] and that [l] and [r]
817 are valid:
818 [glue l r] Glues [l] and [r] together. Assumes that [l] and
819 [r] are already balanced with respect to each other.
820 [merge l r] Merges two trees and restores balance.
821
822 Note: in contrast to Adam's paper, we use (<=) comparisons instead
823 of (<) comparisons in [join], [merge] and [balance].
824 Quickcheck (on [difference]) showed that this was necessary in order
825 to maintain the invariants. It is quite unsatisfactory that I haven't
826 been able to find out why this is actually the case! Fortunately, it
827 doesn't hurt to be a bit more conservative.
828 --------------------------------------------------------------------}
829
830 {--------------------------------------------------------------------
831 Join
832 --------------------------------------------------------------------}
833 join :: a -> Set a -> Set a -> Set a
834 join x Tip r = insertMin x r
835 join x l Tip = insertMax x l
836 join x l@(Bin sizeL y ly ry) r@(Bin sizeR z lz rz)
837 | delta*sizeL < sizeR = balanceL z (join x l lz) rz
838 | delta*sizeR < sizeL = balanceR y ly (join x ry r)
839 | otherwise = bin x l r
840 #if __GLASGOW_HASKELL__ >= 700
841 {-# INLINABLE join #-}
842 #endif
843
844
845 -- insertMin and insertMax don't perform potentially expensive comparisons.
846 insertMax,insertMin :: a -> Set a -> Set a
847 insertMax x t
848 = case t of
849 Tip -> singleton x
850 Bin _ y l r
851 -> balanceR y l (insertMax x r)
852 #if __GLASGOW_HASKELL__ >= 700
853 {-# INLINABLE insertMax #-}
854 #endif
855
856 insertMin x t
857 = case t of
858 Tip -> singleton x
859 Bin _ y l r
860 -> balanceL y (insertMin x l) r
861 #if __GLASGOW_HASKELL__ >= 700
862 {-# INLINABLE insertMin #-}
863 #endif
864
865 {--------------------------------------------------------------------
866 [merge l r]: merges two trees.
867 --------------------------------------------------------------------}
868 merge :: Set a -> Set a -> Set a
869 merge Tip r = r
870 merge l Tip = l
871 merge l@(Bin sizeL x lx rx) r@(Bin sizeR y ly ry)
872 | delta*sizeL < sizeR = balanceL y (merge l ly) ry
873 | delta*sizeR < sizeL = balanceR x lx (merge rx r)
874 | otherwise = glue l r
875 #if __GLASGOW_HASKELL__ >= 700
876 {-# INLINABLE merge #-}
877 #endif
878
879 {--------------------------------------------------------------------
880 [glue l r]: glues two trees together.
881 Assumes that [l] and [r] are already balanced with respect to each other.
882 --------------------------------------------------------------------}
883 glue :: Set a -> Set a -> Set a
884 glue Tip r = r
885 glue l Tip = l
886 glue l r
887 | size l > size r = let (m,l') = deleteFindMax l in balanceR m l' r
888 | otherwise = let (m,r') = deleteFindMin r in balanceL m l r'
889 #if __GLASGOW_HASKELL__ >= 700
890 {-# INLINABLE glue #-}
891 #endif
892
893
894 -- | /O(log n)/. Delete and find the minimal element.
895 --
896 -- > deleteFindMin set = (findMin set, deleteMin set)
897
898 deleteFindMin :: Set a -> (a,Set a)
899 deleteFindMin t
900 = case t of
901 Bin _ x Tip r -> (x,r)
902 Bin _ x l r -> let (xm,l') = deleteFindMin l in (xm,balanceR x l' r)
903 Tip -> (error "Set.deleteFindMin: can not return the minimal element of an empty set", Tip)
904 #if __GLASGOW_HASKELL__ >= 700
905 {-# INLINABLE deleteFindMin #-}
906 #endif
907
908 -- | /O(log n)/. Delete and find the maximal element.
909 --
910 -- > deleteFindMax set = (findMax set, deleteMax set)
911 deleteFindMax :: Set a -> (a,Set a)
912 deleteFindMax t
913 = case t of
914 Bin _ x l Tip -> (x,l)
915 Bin _ x l r -> let (xm,r') = deleteFindMax r in (xm,balanceL x l r')
916 Tip -> (error "Set.deleteFindMax: can not return the maximal element of an empty set", Tip)
917 #if __GLASGOW_HASKELL__ >= 700
918 {-# INLINABLE deleteFindMax #-}
919 #endif
920
921 -- | /O(log n)/. Retrieves the minimal key of the set, and the set
922 -- stripped of that element, or 'Nothing' if passed an empty set.
923 minView :: Set a -> Maybe (a, Set a)
924 minView Tip = Nothing
925 minView x = Just (deleteFindMin x)
926 #if __GLASGOW_HASKELL__ >= 700
927 {-# INLINABLE minView #-}
928 #endif
929
930 -- | /O(log n)/. Retrieves the maximal key of the set, and the set
931 -- stripped of that element, or 'Nothing' if passed an empty set.
932 maxView :: Set a -> Maybe (a, Set a)
933 maxView Tip = Nothing
934 maxView x = Just (deleteFindMax x)
935 #if __GLASGOW_HASKELL__ >= 700
936 {-# INLINABLE maxView #-}
937 #endif
938
939 {--------------------------------------------------------------------
940 [balance x l r] balances two trees with value x.
941 The sizes of the trees should balance after decreasing the
942 size of one of them. (a rotation).
943
944 [delta] is the maximal relative difference between the sizes of
945 two trees, it corresponds with the [w] in Adams' paper.
946 [ratio] is the ratio between an outer and inner sibling of the
947 heavier subtree in an unbalanced setting. It determines
948 whether a double or single rotation should be performed
949 to restore balance. It is correspondes with the inverse
950 of $\alpha$ in Adam's article.
951
952 Note that according to the Adam's paper:
953 - [delta] should be larger than 4.646 with a [ratio] of 2.
954 - [delta] should be larger than 3.745 with a [ratio] of 1.534.
955
956 But the Adam's paper is errorneous:
957 - it can be proved that for delta=2 and delta>=5 there does
958 not exist any ratio that would work
959 - delta=4.5 and ratio=2 does not work
960
961 That leaves two reasonable variants, delta=3 and delta=4,
962 both with ratio=2.
963
964 - A lower [delta] leads to a more 'perfectly' balanced tree.
965 - A higher [delta] performs less rebalancing.
966
967 In the benchmarks, delta=3 is faster on insert operations,
968 and delta=4 has slightly better deletes. As the insert speedup
969 is larger, we currently use delta=3.
970
971 --------------------------------------------------------------------}
972 delta,ratio :: Int
973 delta = 3
974 ratio = 2
975
976 -- The balance function is equivalent to the following:
977 --
978 -- balance :: a -> Set a -> Set a -> Set a
979 -- balance x l r
980 -- | sizeL + sizeR <= 1 = Bin sizeX x l r
981 -- | sizeR > delta*sizeL = rotateL x l r
982 -- | sizeL > delta*sizeR = rotateR x l r
983 -- | otherwise = Bin sizeX x l r
984 -- where
985 -- sizeL = size l
986 -- sizeR = size r
987 -- sizeX = sizeL + sizeR + 1
988 --
989 -- rotateL :: a -> Set a -> Set a -> Set a
990 -- rotateL x l r@(Bin _ _ ly ry) | size ly < ratio*size ry = singleL x l r
991 -- | otherwise = doubleL x l r
992 -- rotateR :: a -> Set a -> Set a -> Set a
993 -- rotateR x l@(Bin _ _ ly ry) r | size ry < ratio*size ly = singleR x l r
994 -- | otherwise = doubleR x l r
995 --
996 -- singleL, singleR :: a -> Set a -> Set a -> Set a
997 -- singleL x1 t1 (Bin _ x2 t2 t3) = bin x2 (bin x1 t1 t2) t3
998 -- singleR x1 (Bin _ x2 t1 t2) t3 = bin x2 t1 (bin x1 t2 t3)
999 --
1000 -- doubleL, doubleR :: a -> Set a -> Set a -> Set a
1001 -- doubleL x1 t1 (Bin _ x2 (Bin _ x3 t2 t3) t4) = bin x3 (bin x1 t1 t2) (bin x2 t3 t4)
1002 -- doubleR x1 (Bin _ x2 t1 (Bin _ x3 t2 t3)) t4 = bin x3 (bin x2 t1 t2) (bin x1 t3 t4)
1003 --
1004 -- It is only written in such a way that every node is pattern-matched only once.
1005 --
1006 -- Only balanceL and balanceR are needed at the moment, so balance is not here anymore.
1007 -- In case it is needed, it can be found in Data.Map.
1008
1009 -- Functions balanceL and balanceR are specialised versions of balance.
1010 -- balanceL only checks whether the left subtree is too big,
1011 -- balanceR only checks whether the right subtree is too big.
1012
1013 -- balanceL is called when left subtree might have been inserted to or when
1014 -- right subtree might have been deleted from.
1015 balanceL :: a -> Set a -> Set a -> Set a
1016 balanceL x l r = case r of
1017 Tip -> case l of
1018 Tip -> Bin 1 x Tip Tip
1019 (Bin _ _ Tip Tip) -> Bin 2 x l Tip
1020 (Bin _ lx Tip (Bin _ lrx _ _)) -> Bin 3 lrx (Bin 1 lx Tip Tip) (Bin 1 x Tip Tip)
1021 (Bin _ lx ll@(Bin _ _ _ _) Tip) -> Bin 3 lx ll (Bin 1 x Tip Tip)
1022 (Bin ls lx ll@(Bin lls _ _ _) lr@(Bin lrs lrx lrl lrr))
1023 | lrs < ratio*lls -> Bin (1+ls) lx ll (Bin (1+lrs) x lr Tip)
1024 | otherwise -> Bin (1+ls) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+size lrr) x lrr Tip)
1025
1026 (Bin rs _ _ _) -> case l of
1027 Tip -> Bin (1+rs) x Tip r
1028
1029 (Bin ls lx ll lr)
1030 | ls > delta*rs -> case (ll, lr) of
1031 (Bin lls _ _ _, Bin lrs lrx lrl lrr)
1032 | lrs < ratio*lls -> Bin (1+ls+rs) lx ll (Bin (1+rs+lrs) x lr r)
1033 | otherwise -> Bin (1+ls+rs) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+rs+size lrr) x lrr r)
1034 (_, _) -> error "Failure in Data.Map.balanceL"
1035 | otherwise -> Bin (1+ls+rs) x l r
1036 {-# NOINLINE balanceL #-}
1037
1038 -- balanceR is called when right subtree might have been inserted to or when
1039 -- left subtree might have been deleted from.
1040 balanceR :: a -> Set a -> Set a -> Set a
1041 balanceR x l r = case l of
1042 Tip -> case r of
1043 Tip -> Bin 1 x Tip Tip
1044 (Bin _ _ Tip Tip) -> Bin 2 x Tip r
1045 (Bin _ rx Tip rr@(Bin _ _ _ _)) -> Bin 3 rx (Bin 1 x Tip Tip) rr
1046 (Bin _ rx (Bin _ rlx _ _) Tip) -> Bin 3 rlx (Bin 1 x Tip Tip) (Bin 1 rx Tip Tip)
1047 (Bin rs rx rl@(Bin rls rlx rll rlr) rr@(Bin rrs _ _ _))
1048 | rls < ratio*rrs -> Bin (1+rs) rx (Bin (1+rls) x Tip rl) rr
1049 | otherwise -> Bin (1+rs) rlx (Bin (1+size rll) x Tip rll) (Bin (1+rrs+size rlr) rx rlr rr)
1050
1051 (Bin ls _ _ _) -> case r of
1052 Tip -> Bin (1+ls) x l Tip
1053
1054 (Bin rs rx rl rr)
1055 | rs > delta*ls -> case (rl, rr) of
1056 (Bin rls rlx rll rlr, Bin rrs _ _ _)
1057 | rls < ratio*rrs -> Bin (1+ls+rs) rx (Bin (1+ls+rls) x l rl) rr
1058 | otherwise -> Bin (1+ls+rs) rlx (Bin (1+ls+size rll) x l rll) (Bin (1+rrs+size rlr) rx rlr rr)
1059 (_, _) -> error "Failure in Data.Map.balanceR"
1060 | otherwise -> Bin (1+ls+rs) x l r
1061 {-# NOINLINE balanceR #-}
1062
1063 {--------------------------------------------------------------------
1064 The bin constructor maintains the size of the tree
1065 --------------------------------------------------------------------}
1066 bin :: a -> Set a -> Set a -> Set a
1067 bin x l r
1068 = Bin (size l + size r + 1) x l r
1069 {-# INLINE bin #-}
1070
1071
1072 {--------------------------------------------------------------------
1073 Utilities
1074 --------------------------------------------------------------------}
1075 foldlStrict :: (a -> b -> a) -> a -> [b] -> a
1076 foldlStrict f = go
1077 where
1078 go z [] = z
1079 go z (x:xs) = let z' = f z x in z' `seq` go z' xs
1080 {-# INLINE foldlStrict #-}
1081
1082 {--------------------------------------------------------------------
1083 Debugging
1084 --------------------------------------------------------------------}
1085 -- | /O(n)/. Show the tree that implements the set. The tree is shown
1086 -- in a compressed, hanging format.
1087 showTree :: Show a => Set a -> String
1088 showTree s
1089 = showTreeWith True False s
1090
1091
1092 {- | /O(n)/. The expression (@showTreeWith hang wide map@) shows
1093 the tree that implements the set. If @hang@ is
1094 @True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If
1095 @wide@ is 'True', an extra wide version is shown.
1096
1097 > Set> putStrLn $ showTreeWith True False $ fromDistinctAscList [1..5]
1098 > 4
1099 > +--2
1100 > | +--1
1101 > | +--3
1102 > +--5
1103 >
1104 > Set> putStrLn $ showTreeWith True True $ fromDistinctAscList [1..5]
1105 > 4
1106 > |
1107 > +--2
1108 > | |
1109 > | +--1
1110 > | |
1111 > | +--3
1112 > |
1113 > +--5
1114 >
1115 > Set> putStrLn $ showTreeWith False True $ fromDistinctAscList [1..5]
1116 > +--5
1117 > |
1118 > 4
1119 > |
1120 > | +--3
1121 > | |
1122 > +--2
1123 > |
1124 > +--1
1125
1126 -}
1127 showTreeWith :: Show a => Bool -> Bool -> Set a -> String
1128 showTreeWith hang wide t
1129 | hang = (showsTreeHang wide [] t) ""
1130 | otherwise = (showsTree wide [] [] t) ""
1131
1132 showsTree :: Show a => Bool -> [String] -> [String] -> Set a -> ShowS
1133 showsTree wide lbars rbars t
1134 = case t of
1135 Tip -> showsBars lbars . showString "|\n"
1136 Bin _ x Tip Tip
1137 -> showsBars lbars . shows x . showString "\n"
1138 Bin _ x l r
1139 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
1140 showWide wide rbars .
1141 showsBars lbars . shows x . showString "\n" .
1142 showWide wide lbars .
1143 showsTree wide (withEmpty lbars) (withBar lbars) l
1144
1145 showsTreeHang :: Show a => Bool -> [String] -> Set a -> ShowS
1146 showsTreeHang wide bars t
1147 = case t of
1148 Tip -> showsBars bars . showString "|\n"
1149 Bin _ x Tip Tip
1150 -> showsBars bars . shows x . showString "\n"
1151 Bin _ x l r
1152 -> showsBars bars . shows x . showString "\n" .
1153 showWide wide bars .
1154 showsTreeHang wide (withBar bars) l .
1155 showWide wide bars .
1156 showsTreeHang wide (withEmpty bars) r
1157
1158 showWide :: Bool -> [String] -> String -> String
1159 showWide wide bars
1160 | wide = showString (concat (reverse bars)) . showString "|\n"
1161 | otherwise = id
1162
1163 showsBars :: [String] -> ShowS
1164 showsBars bars
1165 = case bars of
1166 [] -> id
1167 _ -> showString (concat (reverse (tail bars))) . showString node
1168
1169 node :: String
1170 node = "+--"
1171
1172 withBar, withEmpty :: [String] -> [String]
1173 withBar bars = "| ":bars
1174 withEmpty bars = " ":bars
1175
1176 {--------------------------------------------------------------------
1177 Assertions
1178 --------------------------------------------------------------------}
1179 -- | /O(n)/. Test if the internal set structure is valid.
1180 valid :: Ord a => Set a -> Bool
1181 valid t
1182 = balanced t && ordered t && validsize t
1183
1184 ordered :: Ord a => Set a -> Bool
1185 ordered t
1186 = bounded (const True) (const True) t
1187 where
1188 bounded lo hi t'
1189 = case t' of
1190 Tip -> True
1191 Bin _ x l r -> (lo x) && (hi x) && bounded lo (<x) l && bounded (>x) hi r
1192
1193 balanced :: Set a -> Bool
1194 balanced t
1195 = case t of
1196 Tip -> True
1197 Bin _ _ l r -> (size l + size r <= 1 || (size l <= delta*size r && size r <= delta*size l)) &&
1198 balanced l && balanced r
1199
1200 validsize :: Set a -> Bool
1201 validsize t
1202 = (realsize t == Just (size t))
1203 where
1204 realsize t'
1205 = case t' of
1206 Tip -> Just 0
1207 Bin sz _ l r -> case (realsize l,realsize r) of
1208 (Just n,Just m) | n+m+1 == sz -> Just sz
1209 _ -> Nothing