Settle performance issues in Map and Set.
[packages/containers.git] / Data / Set.hs
1 {-# LANGUAGE CPP #-}
2 -----------------------------------------------------------------------------
3 -- |
4 -- Module : Data.Set
5 -- Copyright : (c) Daan Leijen 2002
6 -- License : BSD-style
7 -- Maintainer : libraries@haskell.org
8 -- Stability : provisional
9 -- Portability : portable
10 --
11 -- An efficient implementation of sets.
12 --
13 -- Since many function names (but not the type name) clash with
14 -- "Prelude" names, this module is usually imported @qualified@, e.g.
15 --
16 -- > import Data.Set (Set)
17 -- > import qualified Data.Set as Set
18 --
19 -- The implementation of 'Set' is based on /size balanced/ binary trees (or
20 -- trees of /bounded balance/) as described by:
21 --
22 -- * Stephen Adams, \"/Efficient sets: a balancing act/\",
23 -- Journal of Functional Programming 3(4):553-562, October 1993,
24 -- <http://www.swiss.ai.mit.edu/~adams/BB/>.
25 --
26 -- * J. Nievergelt and E.M. Reingold,
27 -- \"/Binary search trees of bounded balance/\",
28 -- SIAM journal of computing 2(1), March 1973.
29 --
30 -- Note that the implementation is /left-biased/ -- the elements of a
31 -- first argument are always preferred to the second, for example in
32 -- 'union' or 'insert'. Of course, left-biasing can only be observed
33 -- when equality is an equivalence relation instead of structural
34 -- equality.
35 -----------------------------------------------------------------------------
36
37 -- It is crucial to the performance that the functions specialize on the Ord
38 -- type when possible. GHC 7.0 and higher does this by itself when it sees th
39 -- unfolding of a function -- that is why all public functions are marked
40 -- INLINABLE (that exposes the unfolding).
41 --
42 -- For other compilers and GHC pre 7.0, we mark some of the functions INLINE.
43 -- We mark the functions that just navigate down the tree (lookup, insert,
44 -- delete and similar). That navigation code gets inlined and thus specialized
45 -- when possible. There is a price to pay -- code growth. The code INLINED is
46 -- therefore only the tree navigation, all the real work (rebalancing) is not
47 -- INLINED by using a NOINLINE.
48 --
49 -- All methods that can be INLINE are not recursive -- a 'go' function doing
50 -- the real work is provided.
51
52 module Data.Set (
53 -- * Set type
54 #if !defined(TESTING)
55 Set -- instance Eq,Ord,Show,Read,Data,Typeable
56 #else
57 Set(..)
58 #endif
59
60 -- * Operators
61 , (\\)
62
63 -- * Query
64 , null
65 , size
66 , member
67 , notMember
68 , isSubsetOf
69 , isProperSubsetOf
70
71 -- * Construction
72 , empty
73 , singleton
74 , insert
75 , delete
76
77 -- * Combine
78 , union
79 , unions
80 , difference
81 , intersection
82
83 -- * Filter
84 , filter
85 , partition
86 , split
87 , splitMember
88
89 -- * Map
90 , map
91 , mapMonotonic
92
93 -- * Fold
94 , fold
95
96 -- * Min\/Max
97 , findMin
98 , findMax
99 , deleteMin
100 , deleteMax
101 , deleteFindMin
102 , deleteFindMax
103 , maxView
104 , minView
105
106 -- * Conversion
107
108 -- ** List
109 , elems
110 , toList
111 , fromList
112
113 -- ** Ordered list
114 , toAscList
115 , fromAscList
116 , fromDistinctAscList
117
118 -- * Debugging
119 , showTree
120 , showTreeWith
121 , valid
122
123 #if defined(TESTING)
124 -- Internals (for testing)
125 , bin
126 , balanced
127 , join
128 , merge
129 #endif
130 ) where
131
132 import Prelude hiding (filter,foldr,null,map)
133 import qualified Data.List as List
134 import Data.Monoid (Monoid(..))
135 import Data.Foldable (Foldable(foldMap))
136 #ifndef __GLASGOW_HASKELL__
137 import Data.Typeable (Typeable, typeOf, typeOfDefault)
138 #endif
139 import Data.Typeable (Typeable1(..), TyCon, mkTyCon, mkTyConApp)
140
141 {-
142 -- just for testing
143 import QuickCheck
144 import List (nub,sort)
145 import qualified List
146 -}
147
148 #if __GLASGOW_HASKELL__
149 import Text.Read
150 import Data.Data (Data(..), mkNoRepType, gcast1)
151 #endif
152
153 -- Use macros to define strictness of functions.
154 -- STRICTxy denotes an y-ary function strict in the x-th parameter.
155 #define STRICT12(fn) fn arg _ | arg `seq` False = undefined
156
157 {--------------------------------------------------------------------
158 Operators
159 --------------------------------------------------------------------}
160 infixl 9 \\ --
161
162 -- | /O(n+m)/. See 'difference'.
163 (\\) :: Ord a => Set a -> Set a -> Set a
164 m1 \\ m2 = difference m1 m2
165 #if __GLASGOW_HASKELL__ >= 700
166 {-# INLINABLE (\\) #-}
167 #endif
168
169 {--------------------------------------------------------------------
170 Sets are size balanced trees
171 --------------------------------------------------------------------}
172 -- | A set of values @a@.
173 data Set a = Tip
174 | Bin {-# UNPACK #-} !Size !a !(Set a) !(Set a)
175
176 type Size = Int
177
178 instance Ord a => Monoid (Set a) where
179 mempty = empty
180 mappend = union
181 mconcat = unions
182
183 instance Foldable Set where
184 foldMap _ Tip = mempty
185 foldMap f (Bin _s k l r) = foldMap f l `mappend` f k `mappend` foldMap f r
186
187 #if __GLASGOW_HASKELL__
188
189 {--------------------------------------------------------------------
190 A Data instance
191 --------------------------------------------------------------------}
192
193 -- This instance preserves data abstraction at the cost of inefficiency.
194 -- We omit reflection services for the sake of data abstraction.
195
196 instance (Data a, Ord a) => Data (Set a) where
197 gfoldl f z set = z fromList `f` (toList set)
198 toConstr _ = error "toConstr"
199 gunfold _ _ = error "gunfold"
200 dataTypeOf _ = mkNoRepType "Data.Set.Set"
201 dataCast1 f = gcast1 f
202
203 #endif
204
205 {--------------------------------------------------------------------
206 Query
207 --------------------------------------------------------------------}
208 -- | /O(1)/. Is this the empty set?
209 null :: Set a -> Bool
210 null Tip = True
211 null (Bin {}) = False
212 #if __GLASGOW_HASKELL__ >= 700
213 {-# INLINABLE null #-}
214 #endif
215
216 -- | /O(1)/. The number of elements in the set.
217 size :: Set a -> Int
218 size Tip = 0
219 size (Bin sz _ _ _) = sz
220 #if __GLASGOW_HASKELL__ >= 700
221 {-# INLINABLE size #-}
222 #endif
223
224 -- | /O(log n)/. Is the element in the set?
225 member :: Ord a => a -> Set a -> Bool
226 member = go
227 where
228 STRICT12(go)
229 go x Tip = False
230 go x (Bin _ y l r) = case compare x y of
231 LT -> go x l
232 GT -> go x r
233 EQ -> True
234 #if __GLASGOW_HASKELL__ >= 700
235 {-# INLINABLE member #-}
236 #else
237 {-# INLINE member #-}
238 #endif
239
240 -- | /O(log n)/. Is the element not in the set?
241 notMember :: Ord a => a -> Set a -> Bool
242 notMember a t = not $ member a t
243 {-# INLINE notMember #-}
244
245 {--------------------------------------------------------------------
246 Construction
247 --------------------------------------------------------------------}
248 -- | /O(1)/. The empty set.
249 empty :: Set a
250 empty = Tip
251
252 -- | /O(1)/. Create a singleton set.
253 singleton :: a -> Set a
254 singleton x = Bin 1 x Tip Tip
255
256 {--------------------------------------------------------------------
257 Insertion, Deletion
258 --------------------------------------------------------------------}
259 -- | /O(log n)/. Insert an element in a set.
260 -- If the set already contains an element equal to the given value,
261 -- it is replaced with the new value.
262 insert :: Ord a => a -> Set a -> Set a
263 insert = go
264 where
265 STRICT12(go)
266 go x Tip = singleton x
267 go x (Bin sz y l r) = case compare x y of
268 LT -> balanceL y (go x l) r
269 GT -> balanceR y l (go x r)
270 EQ -> Bin sz x l r
271 #if __GLASGOW_HASKELL__ >= 700
272 {-# INLINEABLE insert #-}
273 #else
274 {-# INLINE insert #-}
275 #endif
276
277 -- Insert an element to the set only if it is not in the set. Used by
278 -- `union`.
279 insertR :: Ord a => a -> Set a -> Set a
280 insertR = go
281 where
282 STRICT12(go)
283 go x Tip = singleton x
284 go x t@(Bin _ y l r) = case compare x y of
285 LT -> balanceL y (go x l) r
286 GT -> balanceR y l (go x r)
287 EQ -> t
288 #if __GLASGOW_HASKELL__ >= 700
289 {-# INLINEABLE insertR #-}
290 #else
291 {-# INLINE insertR #-}
292 #endif
293
294 -- | /O(log n)/. Delete an element from a set.
295 delete :: Ord a => a -> Set a -> Set a
296 delete = go
297 where
298 STRICT12(go)
299 go x Tip = Tip
300 go x (Bin _ y l r) = case compare x y of
301 LT -> balanceR y (go x l) r
302 GT -> balanceL y l (go x r)
303 EQ -> glue l r
304 #if __GLASGOW_HASKELL__ >= 700
305 {-# INLINEABLE delete #-}
306 #else
307 {-# INLINE delete #-}
308 #endif
309
310 {--------------------------------------------------------------------
311 Subset
312 --------------------------------------------------------------------}
313 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
314 isProperSubsetOf :: Ord a => Set a -> Set a -> Bool
315 isProperSubsetOf s1 s2
316 = (size s1 < size s2) && (isSubsetOf s1 s2)
317 #if __GLASGOW_HASKELL__ >= 700
318 {-# INLINABLE isProperSubsetOf #-}
319 #endif
320
321
322 -- | /O(n+m)/. Is this a subset?
323 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
324 isSubsetOf :: Ord a => Set a -> Set a -> Bool
325 isSubsetOf t1 t2
326 = (size t1 <= size t2) && (isSubsetOfX t1 t2)
327 #if __GLASGOW_HASKELL__ >= 700
328 {-# INLINABLE isSubsetOf #-}
329 #endif
330
331 isSubsetOfX :: Ord a => Set a -> Set a -> Bool
332 isSubsetOfX Tip _ = True
333 isSubsetOfX _ Tip = False
334 isSubsetOfX (Bin _ x l r) t
335 = found && isSubsetOfX l lt && isSubsetOfX r gt
336 where
337 (lt,found,gt) = splitMember x t
338 #if __GLASGOW_HASKELL__ >= 700
339 {-# INLINABLE isSubsetOfX #-}
340 #endif
341
342
343 {--------------------------------------------------------------------
344 Minimal, Maximal
345 --------------------------------------------------------------------}
346 -- | /O(log n)/. The minimal element of a set.
347 findMin :: Set a -> a
348 findMin (Bin _ x Tip _) = x
349 findMin (Bin _ _ l _) = findMin l
350 findMin Tip = error "Set.findMin: empty set has no minimal element"
351 #if __GLASGOW_HASKELL__ >= 700
352 {-# INLINABLE findMin #-}
353 #endif
354
355 -- | /O(log n)/. The maximal element of a set.
356 findMax :: Set a -> a
357 findMax (Bin _ x _ Tip) = x
358 findMax (Bin _ _ _ r) = findMax r
359 findMax Tip = error "Set.findMax: empty set has no maximal element"
360 #if __GLASGOW_HASKELL__ >= 700
361 {-# INLINABLE findMax #-}
362 #endif
363
364 -- | /O(log n)/. Delete the minimal element.
365 deleteMin :: Set a -> Set a
366 deleteMin (Bin _ _ Tip r) = r
367 deleteMin (Bin _ x l r) = balanceR x (deleteMin l) r
368 deleteMin Tip = Tip
369 #if __GLASGOW_HASKELL__ >= 700
370 {-# INLINABLE deleteMin #-}
371 #endif
372
373 -- | /O(log n)/. Delete the maximal element.
374 deleteMax :: Set a -> Set a
375 deleteMax (Bin _ _ l Tip) = l
376 deleteMax (Bin _ x l r) = balanceL x l (deleteMax r)
377 deleteMax Tip = Tip
378 #if __GLASGOW_HASKELL__ >= 700
379 {-# INLINABLE deleteMax #-}
380 #endif
381
382 {--------------------------------------------------------------------
383 Union.
384 --------------------------------------------------------------------}
385 -- | The union of a list of sets: (@'unions' == 'foldl' 'union' 'empty'@).
386 unions :: Ord a => [Set a] -> Set a
387 unions = foldlStrict union empty
388 #if __GLASGOW_HASKELL__ >= 700
389 {-# INLINABLE unions #-}
390 #endif
391
392 -- | /O(n+m)/. The union of two sets, preferring the first set when
393 -- equal elements are encountered.
394 -- The implementation uses the efficient /hedge-union/ algorithm.
395 -- Hedge-union is more efficient on (bigset `union` smallset).
396 union :: Ord a => Set a -> Set a -> Set a
397 union Tip t2 = t2
398 union t1 Tip = t1
399 union (Bin _ x Tip Tip) t = insert x t
400 union t (Bin _ x Tip Tip) = insertR x t
401 union t1 t2 = hedgeUnion NothingS NothingS t1 t2
402 #if __GLASGOW_HASKELL__ >= 700
403 {-# INLINABLE union #-}
404 #endif
405
406 hedgeUnion :: Ord a
407 => MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
408 hedgeUnion _ _ t1 Tip
409 = t1
410 hedgeUnion blo bhi Tip (Bin _ x l r)
411 = join x (filterGt blo l) (filterLt bhi r)
412 hedgeUnion blo bhi (Bin _ x l r) t2
413 = join x (hedgeUnion blo bmi l (trim blo bmi t2))
414 (hedgeUnion bmi bhi r (trim bmi bhi t2))
415 where
416 bmi = JustS x
417 #if __GLASGOW_HASKELL__ >= 700
418 {-# INLINABLE hedgeUnion #-}
419 #endif
420
421 {--------------------------------------------------------------------
422 Difference
423 --------------------------------------------------------------------}
424 -- | /O(n+m)/. Difference of two sets.
425 -- The implementation uses an efficient /hedge/ algorithm comparable with /hedge-union/.
426 difference :: Ord a => Set a -> Set a -> Set a
427 difference Tip _ = Tip
428 difference t1 Tip = t1
429 difference t1 t2 = hedgeDiff NothingS NothingS t1 t2
430 #if __GLASGOW_HASKELL__ >= 700
431 {-# INLINABLE difference #-}
432 #endif
433
434 hedgeDiff :: Ord a
435 => MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
436 hedgeDiff _ _ Tip _
437 = Tip
438 hedgeDiff blo bhi (Bin _ x l r) Tip
439 = join x (filterGt blo l) (filterLt bhi r)
440 hedgeDiff blo bhi t (Bin _ x l r)
441 = merge (hedgeDiff blo bmi (trim blo bmi t) l)
442 (hedgeDiff bmi bhi (trim bmi bhi t) r)
443 where
444 bmi = JustS x
445 #if __GLASGOW_HASKELL__ >= 700
446 {-# INLINABLE hedgeDiff #-}
447 #endif
448
449 {--------------------------------------------------------------------
450 Intersection
451 --------------------------------------------------------------------}
452 -- | /O(n+m)/. The intersection of two sets.
453 -- Elements of the result come from the first set, so for example
454 --
455 -- > import qualified Data.Set as S
456 -- > data AB = A | B deriving Show
457 -- > instance Ord AB where compare _ _ = EQ
458 -- > instance Eq AB where _ == _ = True
459 -- > main = print (S.singleton A `S.intersection` S.singleton B,
460 -- > S.singleton B `S.intersection` S.singleton A)
461 --
462 -- prints @(fromList [A],fromList [B])@.
463 intersection :: Ord a => Set a -> Set a -> Set a
464 intersection Tip _ = Tip
465 intersection _ Tip = Tip
466 intersection t1@(Bin s1 x1 l1 r1) t2@(Bin s2 x2 l2 r2) =
467 if s1 >= s2 then
468 let (lt,found,gt) = splitLookup x2 t1
469 tl = intersection lt l2
470 tr = intersection gt r2
471 in case found of
472 Just x -> join x tl tr
473 Nothing -> merge tl tr
474 else let (lt,found,gt) = splitMember x1 t2
475 tl = intersection l1 lt
476 tr = intersection r1 gt
477 in if found then join x1 tl tr
478 else merge tl tr
479 #if __GLASGOW_HASKELL__ >= 700
480 {-# INLINABLE intersection #-}
481 #endif
482
483 {--------------------------------------------------------------------
484 Filter and partition
485 --------------------------------------------------------------------}
486 -- | /O(n)/. Filter all elements that satisfy the predicate.
487 filter :: Ord a => (a -> Bool) -> Set a -> Set a
488 filter p Tip = Tip
489 filter p (Bin _ x l r)
490 | p x = join x (filter p l) (filter p r)
491 | otherwise = merge (filter p l) (filter p r)
492 #if __GLASGOW_HASKELL__ >= 700
493 {-# INLINABLE filter #-}
494 #endif
495
496 -- | /O(n)/. Partition the set into two sets, one with all elements that satisfy
497 -- the predicate and one with all elements that don't satisfy the predicate.
498 -- See also 'split'.
499 partition :: Ord a => (a -> Bool) -> Set a -> (Set a,Set a)
500 partition p Tip = (Tip, Tip)
501 partition p (Bin _ x l r) = case (partition p l, partition p r) of
502 ((l1, l2), (r1, r2))
503 | p x -> (join x l1 r1, merge l2 r2)
504 | otherwise -> (merge l1 r1, join x l2 r2)
505 #if __GLASGOW_HASKELL__ >= 700
506 {-# INLINABLE partition #-}
507 #endif
508
509 {----------------------------------------------------------------------
510 Map
511 ----------------------------------------------------------------------}
512
513 -- | /O(n*log n)/.
514 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
515 --
516 -- It's worth noting that the size of the result may be smaller if,
517 -- for some @(x,y)@, @x \/= y && f x == f y@
518
519 map :: (Ord a, Ord b) => (a->b) -> Set a -> Set b
520 map f = fromList . List.map f . toList
521 #if __GLASGOW_HASKELL__ >= 700
522 {-# INLINABLE map #-}
523 #endif
524
525 -- | /O(n)/. The
526 --
527 -- @'mapMonotonic' f s == 'map' f s@, but works only when @f@ is monotonic.
528 -- /The precondition is not checked./
529 -- Semi-formally, we have:
530 --
531 -- > and [x < y ==> f x < f y | x <- ls, y <- ls]
532 -- > ==> mapMonotonic f s == map f s
533 -- > where ls = toList s
534
535 mapMonotonic :: (a->b) -> Set a -> Set b
536 mapMonotonic f Tip = Tip
537 mapMonotonic f (Bin sz x l r) = Bin sz (f x) (mapMonotonic f l) (mapMonotonic f r)
538 #if __GLASGOW_HASKELL__ >= 700
539 {-# INLINABLE mapMonotonic #-}
540 #endif
541
542 {--------------------------------------------------------------------
543 Fold
544 --------------------------------------------------------------------}
545 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
546 fold :: (a -> b -> b) -> b -> Set a -> b
547 fold = foldr
548 {-# INLINE fold #-}
549
550 -- | /O(n)/. Post-order fold.
551 foldr :: (a -> b -> b) -> b -> Set a -> b
552 foldr f = go
553 where
554 go z Tip = z
555 go z (Bin _ x l r) = go (f x (go z r)) l
556 {-# INLINE foldr #-}
557
558 {--------------------------------------------------------------------
559 List variations
560 --------------------------------------------------------------------}
561 -- | /O(n)/. The elements of a set.
562 elems :: Set a -> [a]
563 elems = toList
564 #if __GLASGOW_HASKELL__ >= 700
565 {-# INLINABLE elems #-}
566 #endif
567
568 {--------------------------------------------------------------------
569 Lists
570 --------------------------------------------------------------------}
571 -- | /O(n)/. Convert the set to a list of elements.
572 toList :: Set a -> [a]
573 toList = toAscList
574 #if __GLASGOW_HASKELL__ >= 700
575 {-# INLINABLE toList #-}
576 #endif
577
578 -- | /O(n)/. Convert the set to an ascending list of elements.
579 toAscList :: Set a -> [a]
580 toAscList = foldr (:) []
581 #if __GLASGOW_HASKELL__ >= 700
582 {-# INLINABLE toAscList #-}
583 #endif
584
585 -- | /O(n*log n)/. Create a set from a list of elements.
586 fromList :: Ord a => [a] -> Set a
587 fromList = foldlStrict ins empty
588 where
589 ins t x = insert x t
590 #if __GLASGOW_HASKELL__ >= 700
591 {-# INLINABLE fromList #-}
592 #endif
593
594 {--------------------------------------------------------------------
595 Building trees from ascending/descending lists can be done in linear time.
596
597 Note that if [xs] is ascending that:
598 fromAscList xs == fromList xs
599 --------------------------------------------------------------------}
600 -- | /O(n)/. Build a set from an ascending list in linear time.
601 -- /The precondition (input list is ascending) is not checked./
602 fromAscList :: Eq a => [a] -> Set a
603 fromAscList xs
604 = fromDistinctAscList (combineEq xs)
605 where
606 -- [combineEq xs] combines equal elements with [const] in an ordered list [xs]
607 combineEq xs'
608 = case xs' of
609 [] -> []
610 [x] -> [x]
611 (x:xx) -> combineEq' x xx
612
613 combineEq' z [] = [z]
614 combineEq' z (x:xs')
615 | z==x = combineEq' z xs'
616 | otherwise = z:combineEq' x xs'
617 #if __GLASGOW_HASKELL__ >= 700
618 {-# INLINABLE fromAscList #-}
619 #endif
620
621
622 -- | /O(n)/. Build a set from an ascending list of distinct elements in linear time.
623 -- /The precondition (input list is strictly ascending) is not checked./
624 fromDistinctAscList :: [a] -> Set a
625 fromDistinctAscList xs
626 = build const (length xs) xs
627 where
628 -- 1) use continutations so that we use heap space instead of stack space.
629 -- 2) special case for n==5 to build bushier trees.
630 build c 0 xs' = c Tip xs'
631 build c 5 xs' = case xs' of
632 (x1:x2:x3:x4:x5:xx)
633 -> c (bin x4 (bin x2 (singleton x1) (singleton x3)) (singleton x5)) xx
634 _ -> error "fromDistinctAscList build 5"
635 build c n xs' = seq nr $ build (buildR nr c) nl xs'
636 where
637 nl = n `div` 2
638 nr = n - nl - 1
639
640 buildR n c l (x:ys) = build (buildB l x c) n ys
641 buildR _ _ _ [] = error "fromDistinctAscList buildR []"
642 buildB l x c r zs = c (bin x l r) zs
643 #if __GLASGOW_HASKELL__ >= 700
644 {-# INLINABLE fromDistinctAscList #-}
645 #endif
646
647 {--------------------------------------------------------------------
648 Eq converts the set to a list. In a lazy setting, this
649 actually seems one of the faster methods to compare two trees
650 and it is certainly the simplest :-)
651 --------------------------------------------------------------------}
652 instance Eq a => Eq (Set a) where
653 t1 == t2 = (size t1 == size t2) && (toAscList t1 == toAscList t2)
654
655 {--------------------------------------------------------------------
656 Ord
657 --------------------------------------------------------------------}
658
659 instance Ord a => Ord (Set a) where
660 compare s1 s2 = compare (toAscList s1) (toAscList s2)
661
662 {--------------------------------------------------------------------
663 Show
664 --------------------------------------------------------------------}
665 instance Show a => Show (Set a) where
666 showsPrec p xs = showParen (p > 10) $
667 showString "fromList " . shows (toList xs)
668
669 {--------------------------------------------------------------------
670 Read
671 --------------------------------------------------------------------}
672 instance (Read a, Ord a) => Read (Set a) where
673 #ifdef __GLASGOW_HASKELL__
674 readPrec = parens $ prec 10 $ do
675 Ident "fromList" <- lexP
676 xs <- readPrec
677 return (fromList xs)
678
679 readListPrec = readListPrecDefault
680 #else
681 readsPrec p = readParen (p > 10) $ \ r -> do
682 ("fromList",s) <- lex r
683 (xs,t) <- reads s
684 return (fromList xs,t)
685 #endif
686
687 {--------------------------------------------------------------------
688 Typeable/Data
689 --------------------------------------------------------------------}
690
691 #include "Typeable.h"
692 INSTANCE_TYPEABLE1(Set,setTc,"Set")
693
694 {--------------------------------------------------------------------
695 Utility functions that return sub-ranges of the original
696 tree. Some functions take a `Maybe value` as an argument to
697 allow comparisons against infinite values. These are called `blow`
698 (Nothing is -\infty) and `bhigh` (here Nothing is +\infty).
699 We use MaybeS value, which is a Maybe strict in the Just case.
700
701 [trim blow bhigh t] A tree that is either empty or where [x > blow]
702 and [x < bhigh] for the value [x] of the root.
703 [filterGt blow t] A tree where for all values [k]. [k > blow]
704 [filterLt bhigh t] A tree where for all values [k]. [k < bhigh]
705
706 [split k t] Returns two trees [l] and [r] where all values
707 in [l] are <[k] and all keys in [r] are >[k].
708 [splitMember k t] Just like [split] but also returns whether [k]
709 was found in the tree.
710 --------------------------------------------------------------------}
711
712 data MaybeS a = NothingS | JustS !a
713
714 {--------------------------------------------------------------------
715 [trim blo bhi t] trims away all subtrees that surely contain no
716 values between the range [blo] to [bhi]. The returned tree is either
717 empty or the key of the root is between @blo@ and @bhi@.
718 --------------------------------------------------------------------}
719 trim :: Ord a => MaybeS a -> MaybeS a -> Set a -> Set a
720 trim NothingS NothingS t = t
721 trim (JustS lx) NothingS t = greater lx t where greater lx (Bin _ x _ r) | x <= lx = greater lx r
722 greater _ t' = t'
723 trim NothingS (JustS hx) t = lesser hx t where lesser hx (Bin _ x l _) | x >= hx = lesser hx l
724 lesser _ t' = t'
725 trim (JustS lx) (JustS hx) t = middle lx hx t where middle lx hx (Bin _ x _ r) | x <= lx = middle lx hx r
726 middle lx hx (Bin _ x l _) | x >= hx = middle lx hx l
727 middle _ _ t' = t'
728 #if __GLASGOW_HASKELL__ >= 700
729 {-# INLINABLE trim #-}
730 #endif
731
732 {--------------------------------------------------------------------
733 [filterGt b t] filter all values >[b] from tree [t]
734 [filterLt b t] filter all values <[b] from tree [t]
735 --------------------------------------------------------------------}
736 filterGt :: Ord a => MaybeS a -> Set a -> Set a
737 filterGt NothingS t = t
738 filterGt (JustS b) t = filter' b t
739 where filter' b Tip = Tip
740 filter' b (Bin _ x l r) = case compare b x of LT -> join x (filter' b l) r
741 EQ -> r
742 GT -> filter' b r
743 #if __GLASGOW_HASKELL__ >= 700
744 {-# INLINABLE filterGt #-}
745 #endif
746
747 filterLt :: Ord a => MaybeS a -> Set a -> Set a
748 filterLt NothingS t = t
749 filterLt (JustS b) t = filter' b t
750 where filter' b Tip = Tip
751 filter' b (Bin _ x l r) = case compare x b of LT -> join x l (filter' b r)
752 EQ -> l
753 GT -> filter' b l
754 #if __GLASGOW_HASKELL__ >= 700
755 {-# INLINABLE filterLt #-}
756 #endif
757
758 {--------------------------------------------------------------------
759 Split
760 --------------------------------------------------------------------}
761 -- | /O(log n)/. The expression (@'split' x set@) is a pair @(set1,set2)@
762 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
763 -- comprises the elements of @set@ greater than @x@.
764 split :: Ord a => a -> Set a -> (Set a,Set a)
765 split _ Tip = (Tip,Tip)
766 split x (Bin _ y l r)
767 = case compare x y of
768 LT -> let (lt,gt) = split x l in (lt,join y gt r)
769 GT -> let (lt,gt) = split x r in (join y l lt,gt)
770 EQ -> (l,r)
771 #if __GLASGOW_HASKELL__ >= 700
772 {-# INLINABLE split #-}
773 #endif
774
775 -- | /O(log n)/. Performs a 'split' but also returns whether the pivot
776 -- element was found in the original set.
777 splitMember :: Ord a => a -> Set a -> (Set a,Bool,Set a)
778 splitMember x t = let (l,m,r) = splitLookup x t in
779 (l,maybe False (const True) m,r)
780 #if __GLASGOW_HASKELL__ >= 700
781 {-# INLINABLE splitMember #-}
782 #endif
783
784 -- | /O(log n)/. Performs a 'split' but also returns the pivot
785 -- element that was found in the original set.
786 splitLookup :: Ord a => a -> Set a -> (Set a,Maybe a,Set a)
787 splitLookup _ Tip = (Tip,Nothing,Tip)
788 splitLookup x (Bin _ y l r)
789 = case compare x y of
790 LT -> let (lt,found,gt) = splitLookup x l in (lt,found,join y gt r)
791 GT -> let (lt,found,gt) = splitLookup x r in (join y l lt,found,gt)
792 EQ -> (l,Just y,r)
793 #if __GLASGOW_HASKELL__ >= 700
794 {-# INLINABLE splitLookup #-}
795 #endif
796
797 {--------------------------------------------------------------------
798 Utility functions that maintain the balance properties of the tree.
799 All constructors assume that all values in [l] < [x] and all values
800 in [r] > [x], and that [l] and [r] are valid trees.
801
802 In order of sophistication:
803 [Bin sz x l r] The type constructor.
804 [bin x l r] Maintains the correct size, assumes that both [l]
805 and [r] are balanced with respect to each other.
806 [balance x l r] Restores the balance and size.
807 Assumes that the original tree was balanced and
808 that [l] or [r] has changed by at most one element.
809 [join x l r] Restores balance and size.
810
811 Furthermore, we can construct a new tree from two trees. Both operations
812 assume that all values in [l] < all values in [r] and that [l] and [r]
813 are valid:
814 [glue l r] Glues [l] and [r] together. Assumes that [l] and
815 [r] are already balanced with respect to each other.
816 [merge l r] Merges two trees and restores balance.
817
818 Note: in contrast to Adam's paper, we use (<=) comparisons instead
819 of (<) comparisons in [join], [merge] and [balance].
820 Quickcheck (on [difference]) showed that this was necessary in order
821 to maintain the invariants. It is quite unsatisfactory that I haven't
822 been able to find out why this is actually the case! Fortunately, it
823 doesn't hurt to be a bit more conservative.
824 --------------------------------------------------------------------}
825
826 {--------------------------------------------------------------------
827 Join
828 --------------------------------------------------------------------}
829 join :: a -> Set a -> Set a -> Set a
830 join x Tip r = insertMin x r
831 join x l Tip = insertMax x l
832 join x l@(Bin sizeL y ly ry) r@(Bin sizeR z lz rz)
833 | delta*sizeL < sizeR = balanceL z (join x l lz) rz
834 | delta*sizeR < sizeL = balanceR y ly (join x ry r)
835 | otherwise = bin x l r
836 #if __GLASGOW_HASKELL__ >= 700
837 {-# INLINABLE join #-}
838 #endif
839
840
841 -- insertMin and insertMax don't perform potentially expensive comparisons.
842 insertMax,insertMin :: a -> Set a -> Set a
843 insertMax x t
844 = case t of
845 Tip -> singleton x
846 Bin _ y l r
847 -> balanceR y l (insertMax x r)
848 #if __GLASGOW_HASKELL__ >= 700
849 {-# INLINABLE insertMax #-}
850 #endif
851
852 insertMin x t
853 = case t of
854 Tip -> singleton x
855 Bin _ y l r
856 -> balanceL y (insertMin x l) r
857 #if __GLASGOW_HASKELL__ >= 700
858 {-# INLINABLE insertMin #-}
859 #endif
860
861 {--------------------------------------------------------------------
862 [merge l r]: merges two trees.
863 --------------------------------------------------------------------}
864 merge :: Set a -> Set a -> Set a
865 merge Tip r = r
866 merge l Tip = l
867 merge l@(Bin sizeL x lx rx) r@(Bin sizeR y ly ry)
868 | delta*sizeL < sizeR = balanceL y (merge l ly) ry
869 | delta*sizeR < sizeL = balanceR x lx (merge rx r)
870 | otherwise = glue l r
871 #if __GLASGOW_HASKELL__ >= 700
872 {-# INLINABLE merge #-}
873 #endif
874
875 {--------------------------------------------------------------------
876 [glue l r]: glues two trees together.
877 Assumes that [l] and [r] are already balanced with respect to each other.
878 --------------------------------------------------------------------}
879 glue :: Set a -> Set a -> Set a
880 glue Tip r = r
881 glue l Tip = l
882 glue l r
883 | size l > size r = let (m,l') = deleteFindMax l in balanceR m l' r
884 | otherwise = let (m,r') = deleteFindMin r in balanceL m l r'
885 #if __GLASGOW_HASKELL__ >= 700
886 {-# INLINABLE glue #-}
887 #endif
888
889
890 -- | /O(log n)/. Delete and find the minimal element.
891 --
892 -- > deleteFindMin set = (findMin set, deleteMin set)
893
894 deleteFindMin :: Set a -> (a,Set a)
895 deleteFindMin t
896 = case t of
897 Bin _ x Tip r -> (x,r)
898 Bin _ x l r -> let (xm,l') = deleteFindMin l in (xm,balanceR x l' r)
899 Tip -> (error "Set.deleteFindMin: can not return the minimal element of an empty set", Tip)
900 #if __GLASGOW_HASKELL__ >= 700
901 {-# INLINABLE deleteFindMin #-}
902 #endif
903
904 -- | /O(log n)/. Delete and find the maximal element.
905 --
906 -- > deleteFindMax set = (findMax set, deleteMax set)
907 deleteFindMax :: Set a -> (a,Set a)
908 deleteFindMax t
909 = case t of
910 Bin _ x l Tip -> (x,l)
911 Bin _ x l r -> let (xm,r') = deleteFindMax r in (xm,balanceL x l r')
912 Tip -> (error "Set.deleteFindMax: can not return the maximal element of an empty set", Tip)
913 #if __GLASGOW_HASKELL__ >= 700
914 {-# INLINABLE deleteFindMax #-}
915 #endif
916
917 -- | /O(log n)/. Retrieves the minimal key of the set, and the set
918 -- stripped of that element, or 'Nothing' if passed an empty set.
919 minView :: Set a -> Maybe (a, Set a)
920 minView Tip = Nothing
921 minView x = Just (deleteFindMin x)
922 #if __GLASGOW_HASKELL__ >= 700
923 {-# INLINABLE minView #-}
924 #endif
925
926 -- | /O(log n)/. Retrieves the maximal key of the set, and the set
927 -- stripped of that element, or 'Nothing' if passed an empty set.
928 maxView :: Set a -> Maybe (a, Set a)
929 maxView Tip = Nothing
930 maxView x = Just (deleteFindMax x)
931 #if __GLASGOW_HASKELL__ >= 700
932 {-# INLINABLE maxView #-}
933 #endif
934
935 {--------------------------------------------------------------------
936 [balance x l r] balances two trees with value x.
937 The sizes of the trees should balance after decreasing the
938 size of one of them. (a rotation).
939
940 [delta] is the maximal relative difference between the sizes of
941 two trees, it corresponds with the [w] in Adams' paper.
942 [ratio] is the ratio between an outer and inner sibling of the
943 heavier subtree in an unbalanced setting. It determines
944 whether a double or single rotation should be performed
945 to restore balance. It is correspondes with the inverse
946 of $\alpha$ in Adam's article.
947
948 Note that according to the Adam's paper:
949 - [delta] should be larger than 4.646 with a [ratio] of 2.
950 - [delta] should be larger than 3.745 with a [ratio] of 1.534.
951
952 But the Adam's paper is errorneous:
953 - it can be proved that for delta=2 and delta>=5 there does
954 not exist any ratio that would work
955 - delta=4.5 and ratio=2 does not work
956
957 That leaves two reasonable variants, delta=3 and delta=4,
958 both with ratio=2.
959
960 - A lower [delta] leads to a more 'perfectly' balanced tree.
961 - A higher [delta] performs less rebalancing.
962
963 In the benchmarks, delta=3 is faster on insert operations,
964 and delta=4 has slightly better deletes. As the insert speedup
965 is larger, we currently use delta=3.
966
967 --------------------------------------------------------------------}
968 delta,ratio :: Int
969 delta = 3
970 ratio = 2
971
972 -- The balance function is equivalent to the following:
973 --
974 -- balance :: a -> Set a -> Set a -> Set a
975 -- balance x l r
976 -- | sizeL + sizeR <= 1 = Bin sizeX x l r
977 -- | sizeR > delta*sizeL = rotateL x l r
978 -- | sizeL > delta*sizeR = rotateR x l r
979 -- | otherwise = Bin sizeX x l r
980 -- where
981 -- sizeL = size l
982 -- sizeR = size r
983 -- sizeX = sizeL + sizeR + 1
984 --
985 -- rotateL :: a -> Set a -> Set a -> Set a
986 -- rotateL x l r@(Bin _ _ ly ry) | size ly < ratio*size ry = singleL x l r
987 -- | otherwise = doubleL x l r
988 -- rotateR :: a -> Set a -> Set a -> Set a
989 -- rotateR x l@(Bin _ _ ly ry) r | size ry < ratio*size ly = singleR x l r
990 -- | otherwise = doubleR x l r
991 --
992 -- singleL, singleR :: a -> Set a -> Set a -> Set a
993 -- singleL x1 t1 (Bin _ x2 t2 t3) = bin x2 (bin x1 t1 t2) t3
994 -- singleR x1 (Bin _ x2 t1 t2) t3 = bin x2 t1 (bin x1 t2 t3)
995 --
996 -- doubleL, doubleR :: a -> Set a -> Set a -> Set a
997 -- doubleL x1 t1 (Bin _ x2 (Bin _ x3 t2 t3) t4) = bin x3 (bin x1 t1 t2) (bin x2 t3 t4)
998 -- doubleR x1 (Bin _ x2 t1 (Bin _ x3 t2 t3)) t4 = bin x3 (bin x2 t1 t2) (bin x1 t3 t4)
999 --
1000 -- It is only written in such a way that every node is pattern-matched only once.
1001 --
1002 -- Only balanceL and balanceR are needed at the moment, so balance is not here anymore.
1003 -- In case it is needed, it can be found in Data.Map.
1004
1005 -- Functions balanceL and balanceR are specialised versions of balance.
1006 -- balanceL only checks whether the left subtree is too big,
1007 -- balanceR only checks whether the right subtree is too big.
1008
1009 -- balanceL is called when left subtree might have been inserted to or when
1010 -- right subtree might have been deleted from.
1011 balanceL :: a -> Set a -> Set a -> Set a
1012 balanceL x l r = case r of
1013 Tip -> case l of
1014 Tip -> Bin 1 x Tip Tip
1015 (Bin _ _ Tip Tip) -> Bin 2 x l Tip
1016 (Bin _ lx Tip (Bin _ lrx _ _)) -> Bin 3 lrx (Bin 1 lx Tip Tip) (Bin 1 x Tip Tip)
1017 (Bin _ lx ll@(Bin _ _ _ _) Tip) -> Bin 3 lx ll (Bin 1 x Tip Tip)
1018 (Bin ls lx ll@(Bin lls _ _ _) lr@(Bin lrs lrx lrl lrr))
1019 | lrs < ratio*lls -> Bin (1+ls) lx ll (Bin (1+lrs) x lr Tip)
1020 | otherwise -> Bin (1+ls) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+size lrr) x lrr Tip)
1021
1022 (Bin rs _ _ _) -> case l of
1023 Tip -> Bin (1+rs) x Tip r
1024
1025 (Bin ls lx ll lr)
1026 | ls > delta*rs -> case (ll, lr) of
1027 (Bin lls _ _ _, Bin lrs lrx lrl lrr)
1028 | lrs < ratio*lls -> Bin (1+ls+rs) lx ll (Bin (1+rs+lrs) x lr r)
1029 | otherwise -> Bin (1+ls+rs) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+rs+size lrr) x lrr r)
1030 (_, _) -> error "Failure in Data.Map.balanceL"
1031 | otherwise -> Bin (1+ls+rs) x l r
1032 {-# NOINLINE balanceL #-}
1033
1034 -- balanceR is called when right subtree might have been inserted to or when
1035 -- left subtree might have been deleted from.
1036 balanceR :: a -> Set a -> Set a -> Set a
1037 balanceR x l r = case l of
1038 Tip -> case r of
1039 Tip -> Bin 1 x Tip Tip
1040 (Bin _ _ Tip Tip) -> Bin 2 x Tip r
1041 (Bin _ rx Tip rr@(Bin _ _ _ _)) -> Bin 3 rx (Bin 1 x Tip Tip) rr
1042 (Bin _ rx (Bin _ rlx _ _) Tip) -> Bin 3 rlx (Bin 1 x Tip Tip) (Bin 1 rx Tip Tip)
1043 (Bin rs rx rl@(Bin rls rlx rll rlr) rr@(Bin rrs _ _ _))
1044 | rls < ratio*rrs -> Bin (1+rs) rx (Bin (1+rls) x Tip rl) rr
1045 | otherwise -> Bin (1+rs) rlx (Bin (1+size rll) x Tip rll) (Bin (1+rrs+size rlr) rx rlr rr)
1046
1047 (Bin ls _ _ _) -> case r of
1048 Tip -> Bin (1+ls) x l Tip
1049
1050 (Bin rs rx rl rr)
1051 | rs > delta*ls -> case (rl, rr) of
1052 (Bin rls rlx rll rlr, Bin rrs _ _ _)
1053 | rls < ratio*rrs -> Bin (1+ls+rs) rx (Bin (1+ls+rls) x l rl) rr
1054 | otherwise -> Bin (1+ls+rs) rlx (Bin (1+ls+size rll) x l rll) (Bin (1+rrs+size rlr) rx rlr rr)
1055 (_, _) -> error "Failure in Data.Map.balanceR"
1056 | otherwise -> Bin (1+ls+rs) x l r
1057 {-# NOINLINE balanceR #-}
1058
1059 {--------------------------------------------------------------------
1060 The bin constructor maintains the size of the tree
1061 --------------------------------------------------------------------}
1062 bin :: a -> Set a -> Set a -> Set a
1063 bin x l r
1064 = Bin (size l + size r + 1) x l r
1065 {-# INLINE bin #-}
1066
1067
1068 {--------------------------------------------------------------------
1069 Utilities
1070 --------------------------------------------------------------------}
1071 foldlStrict :: (a -> b -> a) -> a -> [b] -> a
1072 foldlStrict f = go
1073 where
1074 go z [] = z
1075 go z (x:xs) = let z' = f z x in z' `seq` go z' xs
1076 {-# INLINE foldlStrict #-}
1077
1078 {--------------------------------------------------------------------
1079 Debugging
1080 --------------------------------------------------------------------}
1081 -- | /O(n)/. Show the tree that implements the set. The tree is shown
1082 -- in a compressed, hanging format.
1083 showTree :: Show a => Set a -> String
1084 showTree s
1085 = showTreeWith True False s
1086
1087
1088 {- | /O(n)/. The expression (@showTreeWith hang wide map@) shows
1089 the tree that implements the set. If @hang@ is
1090 @True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If
1091 @wide@ is 'True', an extra wide version is shown.
1092
1093 > Set> putStrLn $ showTreeWith True False $ fromDistinctAscList [1..5]
1094 > 4
1095 > +--2
1096 > | +--1
1097 > | +--3
1098 > +--5
1099 >
1100 > Set> putStrLn $ showTreeWith True True $ fromDistinctAscList [1..5]
1101 > 4
1102 > |
1103 > +--2
1104 > | |
1105 > | +--1
1106 > | |
1107 > | +--3
1108 > |
1109 > +--5
1110 >
1111 > Set> putStrLn $ showTreeWith False True $ fromDistinctAscList [1..5]
1112 > +--5
1113 > |
1114 > 4
1115 > |
1116 > | +--3
1117 > | |
1118 > +--2
1119 > |
1120 > +--1
1121
1122 -}
1123 showTreeWith :: Show a => Bool -> Bool -> Set a -> String
1124 showTreeWith hang wide t
1125 | hang = (showsTreeHang wide [] t) ""
1126 | otherwise = (showsTree wide [] [] t) ""
1127
1128 showsTree :: Show a => Bool -> [String] -> [String] -> Set a -> ShowS
1129 showsTree wide lbars rbars t
1130 = case t of
1131 Tip -> showsBars lbars . showString "|\n"
1132 Bin _ x Tip Tip
1133 -> showsBars lbars . shows x . showString "\n"
1134 Bin _ x l r
1135 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
1136 showWide wide rbars .
1137 showsBars lbars . shows x . showString "\n" .
1138 showWide wide lbars .
1139 showsTree wide (withEmpty lbars) (withBar lbars) l
1140
1141 showsTreeHang :: Show a => Bool -> [String] -> Set a -> ShowS
1142 showsTreeHang wide bars t
1143 = case t of
1144 Tip -> showsBars bars . showString "|\n"
1145 Bin _ x Tip Tip
1146 -> showsBars bars . shows x . showString "\n"
1147 Bin _ x l r
1148 -> showsBars bars . shows x . showString "\n" .
1149 showWide wide bars .
1150 showsTreeHang wide (withBar bars) l .
1151 showWide wide bars .
1152 showsTreeHang wide (withEmpty bars) r
1153
1154 showWide :: Bool -> [String] -> String -> String
1155 showWide wide bars
1156 | wide = showString (concat (reverse bars)) . showString "|\n"
1157 | otherwise = id
1158
1159 showsBars :: [String] -> ShowS
1160 showsBars bars
1161 = case bars of
1162 [] -> id
1163 _ -> showString (concat (reverse (tail bars))) . showString node
1164
1165 node :: String
1166 node = "+--"
1167
1168 withBar, withEmpty :: [String] -> [String]
1169 withBar bars = "| ":bars
1170 withEmpty bars = " ":bars
1171
1172 {--------------------------------------------------------------------
1173 Assertions
1174 --------------------------------------------------------------------}
1175 -- | /O(n)/. Test if the internal set structure is valid.
1176 valid :: Ord a => Set a -> Bool
1177 valid t
1178 = balanced t && ordered t && validsize t
1179
1180 ordered :: Ord a => Set a -> Bool
1181 ordered t
1182 = bounded (const True) (const True) t
1183 where
1184 bounded lo hi t'
1185 = case t' of
1186 Tip -> True
1187 Bin _ x l r -> (lo x) && (hi x) && bounded lo (<x) l && bounded (>x) hi r
1188
1189 balanced :: Set a -> Bool
1190 balanced t
1191 = case t of
1192 Tip -> True
1193 Bin _ _ l r -> (size l + size r <= 1 || (size l <= delta*size r && size r <= delta*size l)) &&
1194 balanced l && balanced r
1195
1196 validsize :: Set a -> Bool
1197 validsize t
1198 = (realsize t == Just (size t))
1199 where
1200 realsize t'
1201 = case t' of
1202 Tip -> Just 0
1203 Bin sz _ l r -> case (realsize l,realsize r) of
1204 (Just n,Just m) | n+m+1 == sz -> Just sz
1205 _ -> Nothing