For GHC, implement the Typeable.hs macros using standalone deriving
[packages/containers.git] / Data / Set.hs
1 {-# LANGUAGE CPP #-}
2 -----------------------------------------------------------------------------
3 -- |
4 -- Module : Data.Set
5 -- Copyright : (c) Daan Leijen 2002
6 -- License : BSD-style
7 -- Maintainer : libraries@haskell.org
8 -- Stability : provisional
9 -- Portability : portable
10 --
11 -- An efficient implementation of sets.
12 --
13 -- Since many function names (but not the type name) clash with
14 -- "Prelude" names, this module is usually imported @qualified@, e.g.
15 --
16 -- > import Data.Set (Set)
17 -- > import qualified Data.Set as Set
18 --
19 -- The implementation of 'Set' is based on /size balanced/ binary trees (or
20 -- trees of /bounded balance/) as described by:
21 --
22 -- * Stephen Adams, \"/Efficient sets: a balancing act/\",
23 -- Journal of Functional Programming 3(4):553-562, October 1993,
24 -- <http://www.swiss.ai.mit.edu/~adams/BB/>.
25 --
26 -- * J. Nievergelt and E.M. Reingold,
27 -- \"/Binary search trees of bounded balance/\",
28 -- SIAM journal of computing 2(1), March 1973.
29 --
30 -- Note that the implementation is /left-biased/ -- the elements of a
31 -- first argument are always preferred to the second, for example in
32 -- 'union' or 'insert'. Of course, left-biasing can only be observed
33 -- when equality is an equivalence relation instead of structural
34 -- equality.
35 -----------------------------------------------------------------------------
36
37 -- It is crucial to the performance that the functions specialize on the Ord
38 -- type when possible. GHC 7.0 and higher does this by itself when it sees th
39 -- unfolding of a function -- that is why all public functions are marked
40 -- INLINABLE (that exposes the unfolding).
41 --
42 -- For other compilers and GHC pre 7.0, we mark some of the functions INLINE.
43 -- We mark the functions that just navigate down the tree (lookup, insert,
44 -- delete and similar). That navigation code gets inlined and thus specialized
45 -- when possible. There is a price to pay -- code growth. The code INLINED is
46 -- therefore only the tree navigation, all the real work (rebalancing) is not
47 -- INLINED by using a NOINLINE.
48 --
49 -- All methods that can be INLINE are not recursive -- a 'go' function doing
50 -- the real work is provided.
51
52 module Data.Set (
53 -- * Set type
54 #if !defined(TESTING)
55 Set -- instance Eq,Ord,Show,Read,Data,Typeable
56 #else
57 Set(..)
58 #endif
59
60 -- * Operators
61 , (\\)
62
63 -- * Query
64 , null
65 , size
66 , member
67 , notMember
68 , isSubsetOf
69 , isProperSubsetOf
70
71 -- * Construction
72 , empty
73 , singleton
74 , insert
75 , delete
76
77 -- * Combine
78 , union
79 , unions
80 , difference
81 , intersection
82
83 -- * Filter
84 , filter
85 , partition
86 , split
87 , splitMember
88
89 -- * Map
90 , map
91 , mapMonotonic
92
93 -- * Fold
94 , fold
95
96 -- * Min\/Max
97 , findMin
98 , findMax
99 , deleteMin
100 , deleteMax
101 , deleteFindMin
102 , deleteFindMax
103 , maxView
104 , minView
105
106 -- * Conversion
107
108 -- ** List
109 , elems
110 , toList
111 , fromList
112
113 -- ** Ordered list
114 , toAscList
115 , fromAscList
116 , fromDistinctAscList
117
118 -- * Debugging
119 , showTree
120 , showTreeWith
121 , valid
122
123 #if defined(TESTING)
124 -- Internals (for testing)
125 , bin
126 , balanced
127 , join
128 , merge
129 #endif
130 ) where
131
132 import Prelude hiding (filter,foldr,null,map)
133 import qualified Data.List as List
134 import Data.Monoid (Monoid(..))
135 import Data.Foldable (Foldable(foldMap))
136 import Data.Typeable
137
138 {-
139 -- just for testing
140 import QuickCheck
141 import List (nub,sort)
142 import qualified List
143 -}
144
145 #if __GLASGOW_HASKELL__
146 import Text.Read
147 import Data.Data
148 #endif
149
150 -- Use macros to define strictness of functions.
151 -- STRICT_x_OF_y denotes an y-ary function strict in the x-th parameter.
152 -- We do not use BangPatterns, because they are not in any standard and we
153 -- want the compilers to be compiled by as many compilers as possible.
154 #define STRICT_1_OF_2(fn) fn arg _ | arg `seq` False = undefined
155
156 {--------------------------------------------------------------------
157 Operators
158 --------------------------------------------------------------------}
159 infixl 9 \\ --
160
161 -- | /O(n+m)/. See 'difference'.
162 (\\) :: Ord a => Set a -> Set a -> Set a
163 m1 \\ m2 = difference m1 m2
164 #if __GLASGOW_HASKELL__ >= 700
165 {-# INLINABLE (\\) #-}
166 #endif
167
168 {--------------------------------------------------------------------
169 Sets are size balanced trees
170 --------------------------------------------------------------------}
171 -- | A set of values @a@.
172 data Set a = Tip
173 | Bin {-# UNPACK #-} !Size !a !(Set a) !(Set a)
174
175 type Size = Int
176
177 instance Ord a => Monoid (Set a) where
178 mempty = empty
179 mappend = union
180 mconcat = unions
181
182 instance Foldable Set where
183 foldMap _ Tip = mempty
184 foldMap f (Bin _s k l r) = foldMap f l `mappend` f k `mappend` foldMap f r
185
186 #if __GLASGOW_HASKELL__
187
188 {--------------------------------------------------------------------
189 A Data instance
190 --------------------------------------------------------------------}
191
192 -- This instance preserves data abstraction at the cost of inefficiency.
193 -- We omit reflection services for the sake of data abstraction.
194
195 instance (Data a, Ord a) => Data (Set a) where
196 gfoldl f z set = z fromList `f` (toList set)
197 toConstr _ = error "toConstr"
198 gunfold _ _ = error "gunfold"
199 dataTypeOf _ = mkNoRepType "Data.Set.Set"
200 dataCast1 f = gcast1 f
201
202 #endif
203
204 {--------------------------------------------------------------------
205 Query
206 --------------------------------------------------------------------}
207 -- | /O(1)/. Is this the empty set?
208 null :: Set a -> Bool
209 null Tip = True
210 null (Bin {}) = False
211 #if __GLASGOW_HASKELL__ >= 700
212 {-# INLINABLE null #-}
213 #endif
214
215 -- | /O(1)/. The number of elements in the set.
216 size :: Set a -> Int
217 size Tip = 0
218 size (Bin sz _ _ _) = sz
219 #if __GLASGOW_HASKELL__ >= 700
220 {-# INLINABLE size #-}
221 #endif
222
223 -- | /O(log n)/. Is the element in the set?
224 member :: Ord a => a -> Set a -> Bool
225 member = go
226 where
227 STRICT_1_OF_2(go)
228 go _ Tip = False
229 go x (Bin _ y l r) = case compare x y of
230 LT -> go x l
231 GT -> go x r
232 EQ -> True
233 #if __GLASGOW_HASKELL__ >= 700
234 {-# INLINABLE member #-}
235 #else
236 {-# INLINE member #-}
237 #endif
238
239 -- | /O(log n)/. Is the element not in the set?
240 notMember :: Ord a => a -> Set a -> Bool
241 notMember a t = not $ member a t
242 {-# INLINE notMember #-}
243
244 {--------------------------------------------------------------------
245 Construction
246 --------------------------------------------------------------------}
247 -- | /O(1)/. The empty set.
248 empty :: Set a
249 empty = Tip
250
251 -- | /O(1)/. Create a singleton set.
252 singleton :: a -> Set a
253 singleton x = Bin 1 x Tip Tip
254
255 {--------------------------------------------------------------------
256 Insertion, Deletion
257 --------------------------------------------------------------------}
258 -- | /O(log n)/. Insert an element in a set.
259 -- If the set already contains an element equal to the given value,
260 -- it is replaced with the new value.
261 insert :: Ord a => a -> Set a -> Set a
262 insert = go
263 where
264 STRICT_1_OF_2(go)
265 go x Tip = singleton x
266 go x (Bin sz y l r) = case compare x y of
267 LT -> balanceL y (go x l) r
268 GT -> balanceR y l (go x r)
269 EQ -> Bin sz x l r
270 #if __GLASGOW_HASKELL__ >= 700
271 {-# INLINEABLE insert #-}
272 #else
273 {-# INLINE insert #-}
274 #endif
275
276 -- Insert an element to the set only if it is not in the set. Used by
277 -- `union`.
278 insertR :: Ord a => a -> Set a -> Set a
279 insertR = go
280 where
281 STRICT_1_OF_2(go)
282 go x Tip = singleton x
283 go x t@(Bin _ y l r) = case compare x y of
284 LT -> balanceL y (go x l) r
285 GT -> balanceR y l (go x r)
286 EQ -> t
287 #if __GLASGOW_HASKELL__ >= 700
288 {-# INLINEABLE insertR #-}
289 #else
290 {-# INLINE insertR #-}
291 #endif
292
293 -- | /O(log n)/. Delete an element from a set.
294 delete :: Ord a => a -> Set a -> Set a
295 delete = go
296 where
297 STRICT_1_OF_2(go)
298 go _ Tip = Tip
299 go x (Bin _ y l r) = case compare x y of
300 LT -> balanceR y (go x l) r
301 GT -> balanceL y l (go x r)
302 EQ -> glue l r
303 #if __GLASGOW_HASKELL__ >= 700
304 {-# INLINEABLE delete #-}
305 #else
306 {-# INLINE delete #-}
307 #endif
308
309 {--------------------------------------------------------------------
310 Subset
311 --------------------------------------------------------------------}
312 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
313 isProperSubsetOf :: Ord a => Set a -> Set a -> Bool
314 isProperSubsetOf s1 s2
315 = (size s1 < size s2) && (isSubsetOf s1 s2)
316 #if __GLASGOW_HASKELL__ >= 700
317 {-# INLINABLE isProperSubsetOf #-}
318 #endif
319
320
321 -- | /O(n+m)/. Is this a subset?
322 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
323 isSubsetOf :: Ord a => Set a -> Set a -> Bool
324 isSubsetOf t1 t2
325 = (size t1 <= size t2) && (isSubsetOfX t1 t2)
326 #if __GLASGOW_HASKELL__ >= 700
327 {-# INLINABLE isSubsetOf #-}
328 #endif
329
330 isSubsetOfX :: Ord a => Set a -> Set a -> Bool
331 isSubsetOfX Tip _ = True
332 isSubsetOfX _ Tip = False
333 isSubsetOfX (Bin _ x l r) t
334 = found && isSubsetOfX l lt && isSubsetOfX r gt
335 where
336 (lt,found,gt) = splitMember x t
337 #if __GLASGOW_HASKELL__ >= 700
338 {-# INLINABLE isSubsetOfX #-}
339 #endif
340
341
342 {--------------------------------------------------------------------
343 Minimal, Maximal
344 --------------------------------------------------------------------}
345 -- | /O(log n)/. The minimal element of a set.
346 findMin :: Set a -> a
347 findMin (Bin _ x Tip _) = x
348 findMin (Bin _ _ l _) = findMin l
349 findMin Tip = error "Set.findMin: empty set has no minimal element"
350 #if __GLASGOW_HASKELL__ >= 700
351 {-# INLINABLE findMin #-}
352 #endif
353
354 -- | /O(log n)/. The maximal element of a set.
355 findMax :: Set a -> a
356 findMax (Bin _ x _ Tip) = x
357 findMax (Bin _ _ _ r) = findMax r
358 findMax Tip = error "Set.findMax: empty set has no maximal element"
359 #if __GLASGOW_HASKELL__ >= 700
360 {-# INLINABLE findMax #-}
361 #endif
362
363 -- | /O(log n)/. Delete the minimal element.
364 deleteMin :: Set a -> Set a
365 deleteMin (Bin _ _ Tip r) = r
366 deleteMin (Bin _ x l r) = balanceR x (deleteMin l) r
367 deleteMin Tip = Tip
368 #if __GLASGOW_HASKELL__ >= 700
369 {-# INLINABLE deleteMin #-}
370 #endif
371
372 -- | /O(log n)/. Delete the maximal element.
373 deleteMax :: Set a -> Set a
374 deleteMax (Bin _ _ l Tip) = l
375 deleteMax (Bin _ x l r) = balanceL x l (deleteMax r)
376 deleteMax Tip = Tip
377 #if __GLASGOW_HASKELL__ >= 700
378 {-# INLINABLE deleteMax #-}
379 #endif
380
381 {--------------------------------------------------------------------
382 Union.
383 --------------------------------------------------------------------}
384 -- | The union of a list of sets: (@'unions' == 'foldl' 'union' 'empty'@).
385 unions :: Ord a => [Set a] -> Set a
386 unions = foldlStrict union empty
387 #if __GLASGOW_HASKELL__ >= 700
388 {-# INLINABLE unions #-}
389 #endif
390
391 -- | /O(n+m)/. The union of two sets, preferring the first set when
392 -- equal elements are encountered.
393 -- The implementation uses the efficient /hedge-union/ algorithm.
394 -- Hedge-union is more efficient on (bigset `union` smallset).
395 union :: Ord a => Set a -> Set a -> Set a
396 union Tip t2 = t2
397 union t1 Tip = t1
398 union (Bin _ x Tip Tip) t = insert x t
399 union t (Bin _ x Tip Tip) = insertR x t
400 union t1 t2 = hedgeUnion NothingS NothingS t1 t2
401 #if __GLASGOW_HASKELL__ >= 700
402 {-# INLINABLE union #-}
403 #endif
404
405 hedgeUnion :: Ord a
406 => MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
407 hedgeUnion _ _ t1 Tip
408 = t1
409 hedgeUnion blo bhi Tip (Bin _ x l r)
410 = join x (filterGt blo l) (filterLt bhi r)
411 hedgeUnion blo bhi (Bin _ x l r) t2
412 = join x (hedgeUnion blo bmi l (trim blo bmi t2))
413 (hedgeUnion bmi bhi r (trim bmi bhi t2))
414 where
415 bmi = JustS x
416 #if __GLASGOW_HASKELL__ >= 700
417 {-# INLINABLE hedgeUnion #-}
418 #endif
419
420 {--------------------------------------------------------------------
421 Difference
422 --------------------------------------------------------------------}
423 -- | /O(n+m)/. Difference of two sets.
424 -- The implementation uses an efficient /hedge/ algorithm comparable with /hedge-union/.
425 difference :: Ord a => Set a -> Set a -> Set a
426 difference Tip _ = Tip
427 difference t1 Tip = t1
428 difference t1 t2 = hedgeDiff NothingS NothingS t1 t2
429 #if __GLASGOW_HASKELL__ >= 700
430 {-# INLINABLE difference #-}
431 #endif
432
433 hedgeDiff :: Ord a
434 => MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
435 hedgeDiff _ _ Tip _
436 = Tip
437 hedgeDiff blo bhi (Bin _ x l r) Tip
438 = join x (filterGt blo l) (filterLt bhi r)
439 hedgeDiff blo bhi t (Bin _ x l r)
440 = merge (hedgeDiff blo bmi (trim blo bmi t) l)
441 (hedgeDiff bmi bhi (trim bmi bhi t) r)
442 where
443 bmi = JustS x
444 #if __GLASGOW_HASKELL__ >= 700
445 {-# INLINABLE hedgeDiff #-}
446 #endif
447
448 {--------------------------------------------------------------------
449 Intersection
450 --------------------------------------------------------------------}
451 -- | /O(n+m)/. The intersection of two sets.
452 -- Elements of the result come from the first set, so for example
453 --
454 -- > import qualified Data.Set as S
455 -- > data AB = A | B deriving Show
456 -- > instance Ord AB where compare _ _ = EQ
457 -- > instance Eq AB where _ == _ = True
458 -- > main = print (S.singleton A `S.intersection` S.singleton B,
459 -- > S.singleton B `S.intersection` S.singleton A)
460 --
461 -- prints @(fromList [A],fromList [B])@.
462 intersection :: Ord a => Set a -> Set a -> Set a
463 intersection Tip _ = Tip
464 intersection _ Tip = Tip
465 intersection t1@(Bin s1 x1 l1 r1) t2@(Bin s2 x2 l2 r2) =
466 if s1 >= s2 then
467 let (lt,found,gt) = splitLookup x2 t1
468 tl = intersection lt l2
469 tr = intersection gt r2
470 in case found of
471 Just x -> join x tl tr
472 Nothing -> merge tl tr
473 else let (lt,found,gt) = splitMember x1 t2
474 tl = intersection l1 lt
475 tr = intersection r1 gt
476 in if found then join x1 tl tr
477 else merge tl tr
478 #if __GLASGOW_HASKELL__ >= 700
479 {-# INLINABLE intersection #-}
480 #endif
481
482 {--------------------------------------------------------------------
483 Filter and partition
484 --------------------------------------------------------------------}
485 -- | /O(n)/. Filter all elements that satisfy the predicate.
486 filter :: Ord a => (a -> Bool) -> Set a -> Set a
487 filter _ Tip = Tip
488 filter p (Bin _ x l r)
489 | p x = join x (filter p l) (filter p r)
490 | otherwise = merge (filter p l) (filter p r)
491 #if __GLASGOW_HASKELL__ >= 700
492 {-# INLINABLE filter #-}
493 #endif
494
495 -- | /O(n)/. Partition the set into two sets, one with all elements that satisfy
496 -- the predicate and one with all elements that don't satisfy the predicate.
497 -- See also 'split'.
498 partition :: Ord a => (a -> Bool) -> Set a -> (Set a,Set a)
499 partition _ Tip = (Tip, Tip)
500 partition p (Bin _ x l r) = case (partition p l, partition p r) of
501 ((l1, l2), (r1, r2))
502 | p x -> (join x l1 r1, merge l2 r2)
503 | otherwise -> (merge l1 r1, join x l2 r2)
504 #if __GLASGOW_HASKELL__ >= 700
505 {-# INLINABLE partition #-}
506 #endif
507
508 {----------------------------------------------------------------------
509 Map
510 ----------------------------------------------------------------------}
511
512 -- | /O(n*log n)/.
513 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
514 --
515 -- It's worth noting that the size of the result may be smaller if,
516 -- for some @(x,y)@, @x \/= y && f x == f y@
517
518 map :: (Ord a, Ord b) => (a->b) -> Set a -> Set b
519 map f = fromList . List.map f . toList
520 #if __GLASGOW_HASKELL__ >= 700
521 {-# INLINABLE map #-}
522 #endif
523
524 -- | /O(n)/. The
525 --
526 -- @'mapMonotonic' f s == 'map' f s@, but works only when @f@ is monotonic.
527 -- /The precondition is not checked./
528 -- Semi-formally, we have:
529 --
530 -- > and [x < y ==> f x < f y | x <- ls, y <- ls]
531 -- > ==> mapMonotonic f s == map f s
532 -- > where ls = toList s
533
534 mapMonotonic :: (a->b) -> Set a -> Set b
535 mapMonotonic _ Tip = Tip
536 mapMonotonic f (Bin sz x l r) = Bin sz (f x) (mapMonotonic f l) (mapMonotonic f r)
537 #if __GLASGOW_HASKELL__ >= 700
538 {-# INLINABLE mapMonotonic #-}
539 #endif
540
541 {--------------------------------------------------------------------
542 Fold
543 --------------------------------------------------------------------}
544 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
545 fold :: (a -> b -> b) -> b -> Set a -> b
546 fold = foldr
547 {-# INLINE fold #-}
548
549 -- | /O(n)/. Post-order fold.
550 foldr :: (a -> b -> b) -> b -> Set a -> b
551 foldr f = go
552 where
553 go z Tip = z
554 go z (Bin _ x l r) = go (f x (go z r)) l
555 {-# INLINE foldr #-}
556
557 {--------------------------------------------------------------------
558 List variations
559 --------------------------------------------------------------------}
560 -- | /O(n)/. The elements of a set.
561 elems :: Set a -> [a]
562 elems = toList
563 #if __GLASGOW_HASKELL__ >= 700
564 {-# INLINABLE elems #-}
565 #endif
566
567 {--------------------------------------------------------------------
568 Lists
569 --------------------------------------------------------------------}
570 -- | /O(n)/. Convert the set to a list of elements.
571 toList :: Set a -> [a]
572 toList = toAscList
573 #if __GLASGOW_HASKELL__ >= 700
574 {-# INLINABLE toList #-}
575 #endif
576
577 -- | /O(n)/. Convert the set to an ascending list of elements.
578 toAscList :: Set a -> [a]
579 toAscList = foldr (:) []
580 #if __GLASGOW_HASKELL__ >= 700
581 {-# INLINABLE toAscList #-}
582 #endif
583
584 -- | /O(n*log n)/. Create a set from a list of elements.
585 fromList :: Ord a => [a] -> Set a
586 fromList = foldlStrict ins empty
587 where
588 ins t x = insert x t
589 #if __GLASGOW_HASKELL__ >= 700
590 {-# INLINABLE fromList #-}
591 #endif
592
593 {--------------------------------------------------------------------
594 Building trees from ascending/descending lists can be done in linear time.
595
596 Note that if [xs] is ascending that:
597 fromAscList xs == fromList xs
598 --------------------------------------------------------------------}
599 -- | /O(n)/. Build a set from an ascending list in linear time.
600 -- /The precondition (input list is ascending) is not checked./
601 fromAscList :: Eq a => [a] -> Set a
602 fromAscList xs
603 = fromDistinctAscList (combineEq xs)
604 where
605 -- [combineEq xs] combines equal elements with [const] in an ordered list [xs]
606 combineEq xs'
607 = case xs' of
608 [] -> []
609 [x] -> [x]
610 (x:xx) -> combineEq' x xx
611
612 combineEq' z [] = [z]
613 combineEq' z (x:xs')
614 | z==x = combineEq' z xs'
615 | otherwise = z:combineEq' x xs'
616 #if __GLASGOW_HASKELL__ >= 700
617 {-# INLINABLE fromAscList #-}
618 #endif
619
620
621 -- | /O(n)/. Build a set from an ascending list of distinct elements in linear time.
622 -- /The precondition (input list is strictly ascending) is not checked./
623 fromDistinctAscList :: [a] -> Set a
624 fromDistinctAscList xs
625 = build const (length xs) xs
626 where
627 -- 1) use continutations so that we use heap space instead of stack space.
628 -- 2) special case for n==5 to build bushier trees.
629 build c 0 xs' = c Tip xs'
630 build c 5 xs' = case xs' of
631 (x1:x2:x3:x4:x5:xx)
632 -> c (bin x4 (bin x2 (singleton x1) (singleton x3)) (singleton x5)) xx
633 _ -> error "fromDistinctAscList build 5"
634 build c n xs' = seq nr $ build (buildR nr c) nl xs'
635 where
636 nl = n `div` 2
637 nr = n - nl - 1
638
639 buildR n c l (x:ys) = build (buildB l x c) n ys
640 buildR _ _ _ [] = error "fromDistinctAscList buildR []"
641 buildB l x c r zs = c (bin x l r) zs
642 #if __GLASGOW_HASKELL__ >= 700
643 {-# INLINABLE fromDistinctAscList #-}
644 #endif
645
646 {--------------------------------------------------------------------
647 Eq converts the set to a list. In a lazy setting, this
648 actually seems one of the faster methods to compare two trees
649 and it is certainly the simplest :-)
650 --------------------------------------------------------------------}
651 instance Eq a => Eq (Set a) where
652 t1 == t2 = (size t1 == size t2) && (toAscList t1 == toAscList t2)
653
654 {--------------------------------------------------------------------
655 Ord
656 --------------------------------------------------------------------}
657
658 instance Ord a => Ord (Set a) where
659 compare s1 s2 = compare (toAscList s1) (toAscList s2)
660
661 {--------------------------------------------------------------------
662 Show
663 --------------------------------------------------------------------}
664 instance Show a => Show (Set a) where
665 showsPrec p xs = showParen (p > 10) $
666 showString "fromList " . shows (toList xs)
667
668 {--------------------------------------------------------------------
669 Read
670 --------------------------------------------------------------------}
671 instance (Read a, Ord a) => Read (Set a) where
672 #ifdef __GLASGOW_HASKELL__
673 readPrec = parens $ prec 10 $ do
674 Ident "fromList" <- lexP
675 xs <- readPrec
676 return (fromList xs)
677
678 readListPrec = readListPrecDefault
679 #else
680 readsPrec p = readParen (p > 10) $ \ r -> do
681 ("fromList",s) <- lex r
682 (xs,t) <- reads s
683 return (fromList xs,t)
684 #endif
685
686 {--------------------------------------------------------------------
687 Typeable/Data
688 --------------------------------------------------------------------}
689
690 #include "Typeable.h"
691 INSTANCE_TYPEABLE1(Set,setTc,"Set")
692
693 {--------------------------------------------------------------------
694 Utility functions that return sub-ranges of the original
695 tree. Some functions take a `Maybe value` as an argument to
696 allow comparisons against infinite values. These are called `blow`
697 (Nothing is -\infty) and `bhigh` (here Nothing is +\infty).
698 We use MaybeS value, which is a Maybe strict in the Just case.
699
700 [trim blow bhigh t] A tree that is either empty or where [x > blow]
701 and [x < bhigh] for the value [x] of the root.
702 [filterGt blow t] A tree where for all values [k]. [k > blow]
703 [filterLt bhigh t] A tree where for all values [k]. [k < bhigh]
704
705 [split k t] Returns two trees [l] and [r] where all values
706 in [l] are <[k] and all keys in [r] are >[k].
707 [splitMember k t] Just like [split] but also returns whether [k]
708 was found in the tree.
709 --------------------------------------------------------------------}
710
711 data MaybeS a = NothingS | JustS !a
712
713 {--------------------------------------------------------------------
714 [trim blo bhi t] trims away all subtrees that surely contain no
715 values between the range [blo] to [bhi]. The returned tree is either
716 empty or the key of the root is between @blo@ and @bhi@.
717 --------------------------------------------------------------------}
718 trim :: Ord a => MaybeS a -> MaybeS a -> Set a -> Set a
719 trim NothingS NothingS t = t
720 trim (JustS lx) NothingS t = greater lx t where greater lo (Bin _ x _ r) | x <= lo = greater lo r
721 greater _ t' = t'
722 trim NothingS (JustS hx) t = lesser hx t where lesser hi (Bin _ x l _) | x >= hi = lesser hi l
723 lesser _ t' = t'
724 trim (JustS lx) (JustS hx) t = middle lx hx t where middle lo hi (Bin _ x _ r) | x <= lo = middle lo hi r
725 middle lo hi (Bin _ x l _) | x >= hi = middle lo hi l
726 middle _ _ t' = t'
727 #if __GLASGOW_HASKELL__ >= 700
728 {-# INLINABLE trim #-}
729 #endif
730
731 {--------------------------------------------------------------------
732 [filterGt b t] filter all values >[b] from tree [t]
733 [filterLt b t] filter all values <[b] from tree [t]
734 --------------------------------------------------------------------}
735 filterGt :: Ord a => MaybeS a -> Set a -> Set a
736 filterGt NothingS t = t
737 filterGt (JustS b) t = filter' b t
738 where filter' _ Tip = Tip
739 filter' b' (Bin _ x l r) =
740 case compare b' x of LT -> join x (filter' b' l) r
741 EQ -> r
742 GT -> filter' b' r
743 #if __GLASGOW_HASKELL__ >= 700
744 {-# INLINABLE filterGt #-}
745 #endif
746
747 filterLt :: Ord a => MaybeS a -> Set a -> Set a
748 filterLt NothingS t = t
749 filterLt (JustS b) t = filter' b t
750 where filter' _ Tip = Tip
751 filter' b' (Bin _ x l r) =
752 case compare x b' of LT -> join x l (filter' b' r)
753 EQ -> l
754 GT -> filter' b' l
755 #if __GLASGOW_HASKELL__ >= 700
756 {-# INLINABLE filterLt #-}
757 #endif
758
759 {--------------------------------------------------------------------
760 Split
761 --------------------------------------------------------------------}
762 -- | /O(log n)/. The expression (@'split' x set@) is a pair @(set1,set2)@
763 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
764 -- comprises the elements of @set@ greater than @x@.
765 split :: Ord a => a -> Set a -> (Set a,Set a)
766 split _ Tip = (Tip,Tip)
767 split x (Bin _ y l r)
768 = case compare x y of
769 LT -> let (lt,gt) = split x l in (lt,join y gt r)
770 GT -> let (lt,gt) = split x r in (join y l lt,gt)
771 EQ -> (l,r)
772 #if __GLASGOW_HASKELL__ >= 700
773 {-# INLINABLE split #-}
774 #endif
775
776 -- | /O(log n)/. Performs a 'split' but also returns whether the pivot
777 -- element was found in the original set.
778 splitMember :: Ord a => a -> Set a -> (Set a,Bool,Set a)
779 splitMember x t = let (l,m,r) = splitLookup x t in
780 (l,maybe False (const True) m,r)
781 #if __GLASGOW_HASKELL__ >= 700
782 {-# INLINABLE splitMember #-}
783 #endif
784
785 -- | /O(log n)/. Performs a 'split' but also returns the pivot
786 -- element that was found in the original set.
787 splitLookup :: Ord a => a -> Set a -> (Set a,Maybe a,Set a)
788 splitLookup _ Tip = (Tip,Nothing,Tip)
789 splitLookup x (Bin _ y l r)
790 = case compare x y of
791 LT -> let (lt,found,gt) = splitLookup x l in (lt,found,join y gt r)
792 GT -> let (lt,found,gt) = splitLookup x r in (join y l lt,found,gt)
793 EQ -> (l,Just y,r)
794 #if __GLASGOW_HASKELL__ >= 700
795 {-# INLINABLE splitLookup #-}
796 #endif
797
798 {--------------------------------------------------------------------
799 Utility functions that maintain the balance properties of the tree.
800 All constructors assume that all values in [l] < [x] and all values
801 in [r] > [x], and that [l] and [r] are valid trees.
802
803 In order of sophistication:
804 [Bin sz x l r] The type constructor.
805 [bin x l r] Maintains the correct size, assumes that both [l]
806 and [r] are balanced with respect to each other.
807 [balance x l r] Restores the balance and size.
808 Assumes that the original tree was balanced and
809 that [l] or [r] has changed by at most one element.
810 [join x l r] Restores balance and size.
811
812 Furthermore, we can construct a new tree from two trees. Both operations
813 assume that all values in [l] < all values in [r] and that [l] and [r]
814 are valid:
815 [glue l r] Glues [l] and [r] together. Assumes that [l] and
816 [r] are already balanced with respect to each other.
817 [merge l r] Merges two trees and restores balance.
818
819 Note: in contrast to Adam's paper, we use (<=) comparisons instead
820 of (<) comparisons in [join], [merge] and [balance].
821 Quickcheck (on [difference]) showed that this was necessary in order
822 to maintain the invariants. It is quite unsatisfactory that I haven't
823 been able to find out why this is actually the case! Fortunately, it
824 doesn't hurt to be a bit more conservative.
825 --------------------------------------------------------------------}
826
827 {--------------------------------------------------------------------
828 Join
829 --------------------------------------------------------------------}
830 join :: a -> Set a -> Set a -> Set a
831 join x Tip r = insertMin x r
832 join x l Tip = insertMax x l
833 join x l@(Bin sizeL y ly ry) r@(Bin sizeR z lz rz)
834 | delta*sizeL < sizeR = balanceL z (join x l lz) rz
835 | delta*sizeR < sizeL = balanceR y ly (join x ry r)
836 | otherwise = bin x l r
837 #if __GLASGOW_HASKELL__ >= 700
838 {-# INLINABLE join #-}
839 #endif
840
841
842 -- insertMin and insertMax don't perform potentially expensive comparisons.
843 insertMax,insertMin :: a -> Set a -> Set a
844 insertMax x t
845 = case t of
846 Tip -> singleton x
847 Bin _ y l r
848 -> balanceR y l (insertMax x r)
849 #if __GLASGOW_HASKELL__ >= 700
850 {-# INLINABLE insertMax #-}
851 #endif
852
853 insertMin x t
854 = case t of
855 Tip -> singleton x
856 Bin _ y l r
857 -> balanceL y (insertMin x l) r
858 #if __GLASGOW_HASKELL__ >= 700
859 {-# INLINABLE insertMin #-}
860 #endif
861
862 {--------------------------------------------------------------------
863 [merge l r]: merges two trees.
864 --------------------------------------------------------------------}
865 merge :: Set a -> Set a -> Set a
866 merge Tip r = r
867 merge l Tip = l
868 merge l@(Bin sizeL x lx rx) r@(Bin sizeR y ly ry)
869 | delta*sizeL < sizeR = balanceL y (merge l ly) ry
870 | delta*sizeR < sizeL = balanceR x lx (merge rx r)
871 | otherwise = glue l r
872 #if __GLASGOW_HASKELL__ >= 700
873 {-# INLINABLE merge #-}
874 #endif
875
876 {--------------------------------------------------------------------
877 [glue l r]: glues two trees together.
878 Assumes that [l] and [r] are already balanced with respect to each other.
879 --------------------------------------------------------------------}
880 glue :: Set a -> Set a -> Set a
881 glue Tip r = r
882 glue l Tip = l
883 glue l r
884 | size l > size r = let (m,l') = deleteFindMax l in balanceR m l' r
885 | otherwise = let (m,r') = deleteFindMin r in balanceL m l r'
886 #if __GLASGOW_HASKELL__ >= 700
887 {-# INLINABLE glue #-}
888 #endif
889
890
891 -- | /O(log n)/. Delete and find the minimal element.
892 --
893 -- > deleteFindMin set = (findMin set, deleteMin set)
894
895 deleteFindMin :: Set a -> (a,Set a)
896 deleteFindMin t
897 = case t of
898 Bin _ x Tip r -> (x,r)
899 Bin _ x l r -> let (xm,l') = deleteFindMin l in (xm,balanceR x l' r)
900 Tip -> (error "Set.deleteFindMin: can not return the minimal element of an empty set", Tip)
901 #if __GLASGOW_HASKELL__ >= 700
902 {-# INLINABLE deleteFindMin #-}
903 #endif
904
905 -- | /O(log n)/. Delete and find the maximal element.
906 --
907 -- > deleteFindMax set = (findMax set, deleteMax set)
908 deleteFindMax :: Set a -> (a,Set a)
909 deleteFindMax t
910 = case t of
911 Bin _ x l Tip -> (x,l)
912 Bin _ x l r -> let (xm,r') = deleteFindMax r in (xm,balanceL x l r')
913 Tip -> (error "Set.deleteFindMax: can not return the maximal element of an empty set", Tip)
914 #if __GLASGOW_HASKELL__ >= 700
915 {-# INLINABLE deleteFindMax #-}
916 #endif
917
918 -- | /O(log n)/. Retrieves the minimal key of the set, and the set
919 -- stripped of that element, or 'Nothing' if passed an empty set.
920 minView :: Set a -> Maybe (a, Set a)
921 minView Tip = Nothing
922 minView x = Just (deleteFindMin x)
923 #if __GLASGOW_HASKELL__ >= 700
924 {-# INLINABLE minView #-}
925 #endif
926
927 -- | /O(log n)/. Retrieves the maximal key of the set, and the set
928 -- stripped of that element, or 'Nothing' if passed an empty set.
929 maxView :: Set a -> Maybe (a, Set a)
930 maxView Tip = Nothing
931 maxView x = Just (deleteFindMax x)
932 #if __GLASGOW_HASKELL__ >= 700
933 {-# INLINABLE maxView #-}
934 #endif
935
936 {--------------------------------------------------------------------
937 [balance x l r] balances two trees with value x.
938 The sizes of the trees should balance after decreasing the
939 size of one of them. (a rotation).
940
941 [delta] is the maximal relative difference between the sizes of
942 two trees, it corresponds with the [w] in Adams' paper.
943 [ratio] is the ratio between an outer and inner sibling of the
944 heavier subtree in an unbalanced setting. It determines
945 whether a double or single rotation should be performed
946 to restore balance. It is correspondes with the inverse
947 of $\alpha$ in Adam's article.
948
949 Note that according to the Adam's paper:
950 - [delta] should be larger than 4.646 with a [ratio] of 2.
951 - [delta] should be larger than 3.745 with a [ratio] of 1.534.
952
953 But the Adam's paper is errorneous:
954 - it can be proved that for delta=2 and delta>=5 there does
955 not exist any ratio that would work
956 - delta=4.5 and ratio=2 does not work
957
958 That leaves two reasonable variants, delta=3 and delta=4,
959 both with ratio=2.
960
961 - A lower [delta] leads to a more 'perfectly' balanced tree.
962 - A higher [delta] performs less rebalancing.
963
964 In the benchmarks, delta=3 is faster on insert operations,
965 and delta=4 has slightly better deletes. As the insert speedup
966 is larger, we currently use delta=3.
967
968 --------------------------------------------------------------------}
969 delta,ratio :: Int
970 delta = 3
971 ratio = 2
972
973 -- The balance function is equivalent to the following:
974 --
975 -- balance :: a -> Set a -> Set a -> Set a
976 -- balance x l r
977 -- | sizeL + sizeR <= 1 = Bin sizeX x l r
978 -- | sizeR > delta*sizeL = rotateL x l r
979 -- | sizeL > delta*sizeR = rotateR x l r
980 -- | otherwise = Bin sizeX x l r
981 -- where
982 -- sizeL = size l
983 -- sizeR = size r
984 -- sizeX = sizeL + sizeR + 1
985 --
986 -- rotateL :: a -> Set a -> Set a -> Set a
987 -- rotateL x l r@(Bin _ _ ly ry) | size ly < ratio*size ry = singleL x l r
988 -- | otherwise = doubleL x l r
989 -- rotateR :: a -> Set a -> Set a -> Set a
990 -- rotateR x l@(Bin _ _ ly ry) r | size ry < ratio*size ly = singleR x l r
991 -- | otherwise = doubleR x l r
992 --
993 -- singleL, singleR :: a -> Set a -> Set a -> Set a
994 -- singleL x1 t1 (Bin _ x2 t2 t3) = bin x2 (bin x1 t1 t2) t3
995 -- singleR x1 (Bin _ x2 t1 t2) t3 = bin x2 t1 (bin x1 t2 t3)
996 --
997 -- doubleL, doubleR :: a -> Set a -> Set a -> Set a
998 -- doubleL x1 t1 (Bin _ x2 (Bin _ x3 t2 t3) t4) = bin x3 (bin x1 t1 t2) (bin x2 t3 t4)
999 -- doubleR x1 (Bin _ x2 t1 (Bin _ x3 t2 t3)) t4 = bin x3 (bin x2 t1 t2) (bin x1 t3 t4)
1000 --
1001 -- It is only written in such a way that every node is pattern-matched only once.
1002 --
1003 -- Only balanceL and balanceR are needed at the moment, so balance is not here anymore.
1004 -- In case it is needed, it can be found in Data.Map.
1005
1006 -- Functions balanceL and balanceR are specialised versions of balance.
1007 -- balanceL only checks whether the left subtree is too big,
1008 -- balanceR only checks whether the right subtree is too big.
1009
1010 -- balanceL is called when left subtree might have been inserted to or when
1011 -- right subtree might have been deleted from.
1012 balanceL :: a -> Set a -> Set a -> Set a
1013 balanceL x l r = case r of
1014 Tip -> case l of
1015 Tip -> Bin 1 x Tip Tip
1016 (Bin _ _ Tip Tip) -> Bin 2 x l Tip
1017 (Bin _ lx Tip (Bin _ lrx _ _)) -> Bin 3 lrx (Bin 1 lx Tip Tip) (Bin 1 x Tip Tip)
1018 (Bin _ lx ll@(Bin _ _ _ _) Tip) -> Bin 3 lx ll (Bin 1 x Tip Tip)
1019 (Bin ls lx ll@(Bin lls _ _ _) lr@(Bin lrs lrx lrl lrr))
1020 | lrs < ratio*lls -> Bin (1+ls) lx ll (Bin (1+lrs) x lr Tip)
1021 | otherwise -> Bin (1+ls) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+size lrr) x lrr Tip)
1022
1023 (Bin rs _ _ _) -> case l of
1024 Tip -> Bin (1+rs) x Tip r
1025
1026 (Bin ls lx ll lr)
1027 | ls > delta*rs -> case (ll, lr) of
1028 (Bin lls _ _ _, Bin lrs lrx lrl lrr)
1029 | lrs < ratio*lls -> Bin (1+ls+rs) lx ll (Bin (1+rs+lrs) x lr r)
1030 | otherwise -> Bin (1+ls+rs) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+rs+size lrr) x lrr r)
1031 (_, _) -> error "Failure in Data.Map.balanceL"
1032 | otherwise -> Bin (1+ls+rs) x l r
1033 {-# NOINLINE balanceL #-}
1034
1035 -- balanceR is called when right subtree might have been inserted to or when
1036 -- left subtree might have been deleted from.
1037 balanceR :: a -> Set a -> Set a -> Set a
1038 balanceR x l r = case l of
1039 Tip -> case r of
1040 Tip -> Bin 1 x Tip Tip
1041 (Bin _ _ Tip Tip) -> Bin 2 x Tip r
1042 (Bin _ rx Tip rr@(Bin _ _ _ _)) -> Bin 3 rx (Bin 1 x Tip Tip) rr
1043 (Bin _ rx (Bin _ rlx _ _) Tip) -> Bin 3 rlx (Bin 1 x Tip Tip) (Bin 1 rx Tip Tip)
1044 (Bin rs rx rl@(Bin rls rlx rll rlr) rr@(Bin rrs _ _ _))
1045 | rls < ratio*rrs -> Bin (1+rs) rx (Bin (1+rls) x Tip rl) rr
1046 | otherwise -> Bin (1+rs) rlx (Bin (1+size rll) x Tip rll) (Bin (1+rrs+size rlr) rx rlr rr)
1047
1048 (Bin ls _ _ _) -> case r of
1049 Tip -> Bin (1+ls) x l Tip
1050
1051 (Bin rs rx rl rr)
1052 | rs > delta*ls -> case (rl, rr) of
1053 (Bin rls rlx rll rlr, Bin rrs _ _ _)
1054 | rls < ratio*rrs -> Bin (1+ls+rs) rx (Bin (1+ls+rls) x l rl) rr
1055 | otherwise -> Bin (1+ls+rs) rlx (Bin (1+ls+size rll) x l rll) (Bin (1+rrs+size rlr) rx rlr rr)
1056 (_, _) -> error "Failure in Data.Map.balanceR"
1057 | otherwise -> Bin (1+ls+rs) x l r
1058 {-# NOINLINE balanceR #-}
1059
1060 {--------------------------------------------------------------------
1061 The bin constructor maintains the size of the tree
1062 --------------------------------------------------------------------}
1063 bin :: a -> Set a -> Set a -> Set a
1064 bin x l r
1065 = Bin (size l + size r + 1) x l r
1066 {-# INLINE bin #-}
1067
1068
1069 {--------------------------------------------------------------------
1070 Utilities
1071 --------------------------------------------------------------------}
1072 foldlStrict :: (a -> b -> a) -> a -> [b] -> a
1073 foldlStrict f = go
1074 where
1075 go z [] = z
1076 go z (x:xs) = let z' = f z x in z' `seq` go z' xs
1077 {-# INLINE foldlStrict #-}
1078
1079 {--------------------------------------------------------------------
1080 Debugging
1081 --------------------------------------------------------------------}
1082 -- | /O(n)/. Show the tree that implements the set. The tree is shown
1083 -- in a compressed, hanging format.
1084 showTree :: Show a => Set a -> String
1085 showTree s
1086 = showTreeWith True False s
1087
1088
1089 {- | /O(n)/. The expression (@showTreeWith hang wide map@) shows
1090 the tree that implements the set. If @hang@ is
1091 @True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If
1092 @wide@ is 'True', an extra wide version is shown.
1093
1094 > Set> putStrLn $ showTreeWith True False $ fromDistinctAscList [1..5]
1095 > 4
1096 > +--2
1097 > | +--1
1098 > | +--3
1099 > +--5
1100 >
1101 > Set> putStrLn $ showTreeWith True True $ fromDistinctAscList [1..5]
1102 > 4
1103 > |
1104 > +--2
1105 > | |
1106 > | +--1
1107 > | |
1108 > | +--3
1109 > |
1110 > +--5
1111 >
1112 > Set> putStrLn $ showTreeWith False True $ fromDistinctAscList [1..5]
1113 > +--5
1114 > |
1115 > 4
1116 > |
1117 > | +--3
1118 > | |
1119 > +--2
1120 > |
1121 > +--1
1122
1123 -}
1124 showTreeWith :: Show a => Bool -> Bool -> Set a -> String
1125 showTreeWith hang wide t
1126 | hang = (showsTreeHang wide [] t) ""
1127 | otherwise = (showsTree wide [] [] t) ""
1128
1129 showsTree :: Show a => Bool -> [String] -> [String] -> Set a -> ShowS
1130 showsTree wide lbars rbars t
1131 = case t of
1132 Tip -> showsBars lbars . showString "|\n"
1133 Bin _ x Tip Tip
1134 -> showsBars lbars . shows x . showString "\n"
1135 Bin _ x l r
1136 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
1137 showWide wide rbars .
1138 showsBars lbars . shows x . showString "\n" .
1139 showWide wide lbars .
1140 showsTree wide (withEmpty lbars) (withBar lbars) l
1141
1142 showsTreeHang :: Show a => Bool -> [String] -> Set a -> ShowS
1143 showsTreeHang wide bars t
1144 = case t of
1145 Tip -> showsBars bars . showString "|\n"
1146 Bin _ x Tip Tip
1147 -> showsBars bars . shows x . showString "\n"
1148 Bin _ x l r
1149 -> showsBars bars . shows x . showString "\n" .
1150 showWide wide bars .
1151 showsTreeHang wide (withBar bars) l .
1152 showWide wide bars .
1153 showsTreeHang wide (withEmpty bars) r
1154
1155 showWide :: Bool -> [String] -> String -> String
1156 showWide wide bars
1157 | wide = showString (concat (reverse bars)) . showString "|\n"
1158 | otherwise = id
1159
1160 showsBars :: [String] -> ShowS
1161 showsBars bars
1162 = case bars of
1163 [] -> id
1164 _ -> showString (concat (reverse (tail bars))) . showString node
1165
1166 node :: String
1167 node = "+--"
1168
1169 withBar, withEmpty :: [String] -> [String]
1170 withBar bars = "| ":bars
1171 withEmpty bars = " ":bars
1172
1173 {--------------------------------------------------------------------
1174 Assertions
1175 --------------------------------------------------------------------}
1176 -- | /O(n)/. Test if the internal set structure is valid.
1177 valid :: Ord a => Set a -> Bool
1178 valid t
1179 = balanced t && ordered t && validsize t
1180
1181 ordered :: Ord a => Set a -> Bool
1182 ordered t
1183 = bounded (const True) (const True) t
1184 where
1185 bounded lo hi t'
1186 = case t' of
1187 Tip -> True
1188 Bin _ x l r -> (lo x) && (hi x) && bounded lo (<x) l && bounded (>x) hi r
1189
1190 balanced :: Set a -> Bool
1191 balanced t
1192 = case t of
1193 Tip -> True
1194 Bin _ _ l r -> (size l + size r <= 1 || (size l <= delta*size r && size r <= delta*size l)) &&
1195 balanced l && balanced r
1196
1197 validsize :: Set a -> Bool
1198 validsize t
1199 = (realsize t == Just (size t))
1200 where
1201 realsize t'
1202 = case t' of
1203 Tip -> Just 0
1204 Bin sz _ l r -> case (realsize l,realsize r) of
1205 (Just n,Just m) | n+m+1 == sz -> Just sz
1206 _ -> Nothing