Rename to splitRoot. Add the same for Sets and expose it.
[packages/containers.git] / Data / Set / Base.hs
1 {-# LANGUAGE CPP #-}
2 #if __GLASGOW_HASKELL__
3 {-# LANGUAGE DeriveDataTypeable, StandaloneDeriving #-}
4 #endif
5 #if !defined(TESTING) && __GLASGOW_HASKELL__ >= 703
6 {-# LANGUAGE Trustworthy #-}
7 #endif
8 -----------------------------------------------------------------------------
9 -- |
10 -- Module : Data.Set.Base
11 -- Copyright : (c) Daan Leijen 2002
12 -- License : BSD-style
13 -- Maintainer : libraries@haskell.org
14 -- Stability : provisional
15 -- Portability : portable
16 --
17 -- An efficient implementation of sets.
18 --
19 -- These modules are intended to be imported qualified, to avoid name
20 -- clashes with Prelude functions, e.g.
21 --
22 -- > import Data.Set (Set)
23 -- > import qualified Data.Set as Set
24 --
25 -- The implementation of 'Set' is based on /size balanced/ binary trees (or
26 -- trees of /bounded balance/) as described by:
27 --
28 -- * Stephen Adams, \"/Efficient sets: a balancing act/\",
29 -- Journal of Functional Programming 3(4):553-562, October 1993,
30 -- <http://www.swiss.ai.mit.edu/~adams/BB/>.
31 --
32 -- * J. Nievergelt and E.M. Reingold,
33 -- \"/Binary search trees of bounded balance/\",
34 -- SIAM journal of computing 2(1), March 1973.
35 --
36 -- Note that the implementation is /left-biased/ -- the elements of a
37 -- first argument are always preferred to the second, for example in
38 -- 'union' or 'insert'. Of course, left-biasing can only be observed
39 -- when equality is an equivalence relation instead of structural
40 -- equality.
41 -----------------------------------------------------------------------------
42
43 -- [Note: Using INLINABLE]
44 -- ~~~~~~~~~~~~~~~~~~~~~~~
45 -- It is crucial to the performance that the functions specialize on the Ord
46 -- type when possible. GHC 7.0 and higher does this by itself when it sees th
47 -- unfolding of a function -- that is why all public functions are marked
48 -- INLINABLE (that exposes the unfolding).
49
50
51 -- [Note: Using INLINE]
52 -- ~~~~~~~~~~~~~~~~~~~~
53 -- For other compilers and GHC pre 7.0, we mark some of the functions INLINE.
54 -- We mark the functions that just navigate down the tree (lookup, insert,
55 -- delete and similar). That navigation code gets inlined and thus specialized
56 -- when possible. There is a price to pay -- code growth. The code INLINED is
57 -- therefore only the tree navigation, all the real work (rebalancing) is not
58 -- INLINED by using a NOINLINE.
59 --
60 -- All methods marked INLINE have to be nonrecursive -- a 'go' function doing
61 -- the real work is provided.
62
63
64 -- [Note: Type of local 'go' function]
65 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
66 -- If the local 'go' function uses an Ord class, it sometimes heap-allocates
67 -- the Ord dictionary when the 'go' function does not have explicit type.
68 -- In that case we give 'go' explicit type. But this slightly decrease
69 -- performance, as the resulting 'go' function can float out to top level.
70
71
72 -- [Note: Local 'go' functions and capturing]
73 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
74 -- As opposed to IntSet, when 'go' function captures an argument, increased
75 -- heap-allocation can occur: sometimes in a polymorphic function, the 'go'
76 -- floats out of its enclosing function and then it heap-allocates the
77 -- dictionary and the argument. Maybe it floats out too late and strictness
78 -- analyzer cannot see that these could be passed on stack.
79 --
80 -- For example, change 'member' so that its local 'go' function is not passing
81 -- argument x and then look at the resulting code for hedgeInt.
82
83
84 -- [Note: Order of constructors]
85 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
86 -- The order of constructors of Set matters when considering performance.
87 -- Currently in GHC 7.0, when type has 2 constructors, a forward conditional
88 -- jump is made when successfully matching second constructor. Successful match
89 -- of first constructor results in the forward jump not taken.
90 -- On GHC 7.0, reordering constructors from Tip | Bin to Bin | Tip
91 -- improves the benchmark by up to 10% on x86.
92
93 module Data.Set.Base (
94 -- * Set type
95 Set(..) -- instance Eq,Ord,Show,Read,Data,Typeable
96
97 -- * Operators
98 , (\\)
99
100 -- * Query
101 , null
102 , size
103 , member
104 , notMember
105 , lookupLT
106 , lookupGT
107 , lookupLE
108 , lookupGE
109 , isSubsetOf
110 , isProperSubsetOf
111
112 -- * Construction
113 , empty
114 , singleton
115 , insert
116 , delete
117
118 -- * Combine
119 , union
120 , unions
121 , difference
122 , intersection
123
124 -- * Filter
125 , filter
126 , partition
127 , split
128 , splitMember
129 , splitRoot
130
131 -- * Indexed
132 , lookupIndex
133 , findIndex
134 , elemAt
135 , deleteAt
136
137 -- * Map
138 , map
139 , mapMonotonic
140
141 -- * Folds
142 , foldr
143 , foldl
144 -- ** Strict folds
145 , foldr'
146 , foldl'
147 -- ** Legacy folds
148 , fold
149
150 -- * Min\/Max
151 , findMin
152 , findMax
153 , deleteMin
154 , deleteMax
155 , deleteFindMin
156 , deleteFindMax
157 , maxView
158 , minView
159
160 -- * Conversion
161
162 -- ** List
163 , elems
164 , toList
165 , fromList
166
167 -- ** Ordered list
168 , toAscList
169 , toDescList
170 , fromAscList
171 , fromDistinctAscList
172
173 -- * Debugging
174 , showTree
175 , showTreeWith
176 , valid
177
178 -- Internals (for testing)
179 , bin
180 , balanced
181 , link
182 , merge
183 ) where
184
185 import Prelude hiding (filter,foldl,foldr,null,map)
186 import qualified Data.List as List
187 import Data.Bits (shiftL, shiftR)
188 import Data.Monoid (Monoid(..))
189 import qualified Data.Foldable as Foldable
190 import Data.Typeable
191 import Control.DeepSeq (NFData(rnf))
192
193 import Data.StrictPair
194
195 #if __GLASGOW_HASKELL__
196 import GHC.Exts ( build )
197 import Text.Read
198 import Data.Data
199 #endif
200
201 -- Use macros to define strictness of functions.
202 -- STRICT_x_OF_y denotes an y-ary function strict in the x-th parameter.
203 -- We do not use BangPatterns, because they are not in any standard and we
204 -- want the compilers to be compiled by as many compilers as possible.
205 #define STRICT_1_OF_2(fn) fn arg _ | arg `seq` False = undefined
206 #define STRICT_1_OF_3(fn) fn arg _ _ | arg `seq` False = undefined
207 #define STRICT_2_OF_3(fn) fn _ arg _ | arg `seq` False = undefined
208
209 {--------------------------------------------------------------------
210 Operators
211 --------------------------------------------------------------------}
212 infixl 9 \\ --
213
214 -- | /O(n+m)/. See 'difference'.
215 (\\) :: Ord a => Set a -> Set a -> Set a
216 m1 \\ m2 = difference m1 m2
217 #if __GLASGOW_HASKELL__ >= 700
218 {-# INLINABLE (\\) #-}
219 #endif
220
221 {--------------------------------------------------------------------
222 Sets are size balanced trees
223 --------------------------------------------------------------------}
224 -- | A set of values @a@.
225
226 -- See Note: Order of constructors
227 data Set a = Bin {-# UNPACK #-} !Size !a !(Set a) !(Set a)
228 | Tip
229
230 type Size = Int
231
232 instance Ord a => Monoid (Set a) where
233 mempty = empty
234 mappend = union
235 mconcat = unions
236
237 instance Foldable.Foldable Set where
238 fold t = go t
239 where go Tip = mempty
240 go (Bin 1 k _ _) = k
241 go (Bin _ k l r) = go l `mappend` (k `mappend` go r)
242 {-# INLINABLE fold #-}
243 foldr = foldr
244 {-# INLINE foldr #-}
245 foldl = foldl
246 {-# INLINE foldl #-}
247 foldMap f t = go t
248 where go Tip = mempty
249 go (Bin 1 k _ _) = f k
250 go (Bin _ k l r) = go l `mappend` (f k `mappend` go r)
251 {-# INLINE foldMap #-}
252
253 #if __GLASGOW_HASKELL__
254
255 {--------------------------------------------------------------------
256 A Data instance
257 --------------------------------------------------------------------}
258
259 -- This instance preserves data abstraction at the cost of inefficiency.
260 -- We provide limited reflection services for the sake of data abstraction.
261
262 instance (Data a, Ord a) => Data (Set a) where
263 gfoldl f z set = z fromList `f` (toList set)
264 toConstr _ = fromListConstr
265 gunfold k z c = case constrIndex c of
266 1 -> k (z fromList)
267 _ -> error "gunfold"
268 dataTypeOf _ = setDataType
269 dataCast1 f = gcast1 f
270
271 fromListConstr :: Constr
272 fromListConstr = mkConstr setDataType "fromList" [] Prefix
273
274 setDataType :: DataType
275 setDataType = mkDataType "Data.Set.Base.Set" [fromListConstr]
276
277 #endif
278
279 {--------------------------------------------------------------------
280 Query
281 --------------------------------------------------------------------}
282 -- | /O(1)/. Is this the empty set?
283 null :: Set a -> Bool
284 null Tip = True
285 null (Bin {}) = False
286 {-# INLINE null #-}
287
288 -- | /O(1)/. The number of elements in the set.
289 size :: Set a -> Int
290 size Tip = 0
291 size (Bin sz _ _ _) = sz
292 {-# INLINE size #-}
293
294 -- | /O(log n)/. Is the element in the set?
295 member :: Ord a => a -> Set a -> Bool
296 member = go
297 where
298 STRICT_1_OF_2(go)
299 go _ Tip = False
300 go x (Bin _ y l r) = case compare x y of
301 LT -> go x l
302 GT -> go x r
303 EQ -> True
304 #if __GLASGOW_HASKELL__ >= 700
305 {-# INLINABLE member #-}
306 #else
307 {-# INLINE member #-}
308 #endif
309
310 -- | /O(log n)/. Is the element not in the set?
311 notMember :: Ord a => a -> Set a -> Bool
312 notMember a t = not $ member a t
313 #if __GLASGOW_HASKELL__ >= 700
314 {-# INLINABLE notMember #-}
315 #else
316 {-# INLINE notMember #-}
317 #endif
318
319 -- | /O(log n)/. Find largest element smaller than the given one.
320 --
321 -- > lookupLT 3 (fromList [3, 5]) == Nothing
322 -- > lookupLT 5 (fromList [3, 5]) == Just 3
323 lookupLT :: Ord a => a -> Set a -> Maybe a
324 lookupLT = goNothing
325 where
326 STRICT_1_OF_2(goNothing)
327 goNothing _ Tip = Nothing
328 goNothing x (Bin _ y l r) | x <= y = goNothing x l
329 | otherwise = goJust x y r
330
331 STRICT_1_OF_3(goJust)
332 goJust _ best Tip = Just best
333 goJust x best (Bin _ y l r) | x <= y = goJust x best l
334 | otherwise = goJust x y r
335 #if __GLASGOW_HASKELL__ >= 700
336 {-# INLINABLE lookupLT #-}
337 #else
338 {-# INLINE lookupLT #-}
339 #endif
340
341 -- | /O(log n)/. Find smallest element greater than the given one.
342 --
343 -- > lookupGT 4 (fromList [3, 5]) == Just 5
344 -- > lookupGT 5 (fromList [3, 5]) == Nothing
345 lookupGT :: Ord a => a -> Set a -> Maybe a
346 lookupGT = goNothing
347 where
348 STRICT_1_OF_2(goNothing)
349 goNothing _ Tip = Nothing
350 goNothing x (Bin _ y l r) | x < y = goJust x y l
351 | otherwise = goNothing x r
352
353 STRICT_1_OF_3(goJust)
354 goJust _ best Tip = Just best
355 goJust x best (Bin _ y l r) | x < y = goJust x y l
356 | otherwise = goJust x best r
357 #if __GLASGOW_HASKELL__ >= 700
358 {-# INLINABLE lookupGT #-}
359 #else
360 {-# INLINE lookupGT #-}
361 #endif
362
363 -- | /O(log n)/. Find largest element smaller or equal to the given one.
364 --
365 -- > lookupLE 2 (fromList [3, 5]) == Nothing
366 -- > lookupLE 4 (fromList [3, 5]) == Just 3
367 -- > lookupLE 5 (fromList [3, 5]) == Just 5
368 lookupLE :: Ord a => a -> Set a -> Maybe a
369 lookupLE = goNothing
370 where
371 STRICT_1_OF_2(goNothing)
372 goNothing _ Tip = Nothing
373 goNothing x (Bin _ y l r) = case compare x y of LT -> goNothing x l
374 EQ -> Just y
375 GT -> goJust x y r
376
377 STRICT_1_OF_3(goJust)
378 goJust _ best Tip = Just best
379 goJust x best (Bin _ y l r) = case compare x y of LT -> goJust x best l
380 EQ -> Just y
381 GT -> goJust x y r
382 #if __GLASGOW_HASKELL__ >= 700
383 {-# INLINABLE lookupLE #-}
384 #else
385 {-# INLINE lookupLE #-}
386 #endif
387
388 -- | /O(log n)/. Find smallest element greater or equal to the given one.
389 --
390 -- > lookupGE 3 (fromList [3, 5]) == Just 3
391 -- > lookupGE 4 (fromList [3, 5]) == Just 5
392 -- > lookupGE 6 (fromList [3, 5]) == Nothing
393 lookupGE :: Ord a => a -> Set a -> Maybe a
394 lookupGE = goNothing
395 where
396 STRICT_1_OF_2(goNothing)
397 goNothing _ Tip = Nothing
398 goNothing x (Bin _ y l r) = case compare x y of LT -> goJust x y l
399 EQ -> Just y
400 GT -> goNothing x r
401
402 STRICT_1_OF_3(goJust)
403 goJust _ best Tip = Just best
404 goJust x best (Bin _ y l r) = case compare x y of LT -> goJust x y l
405 EQ -> Just y
406 GT -> goJust x best r
407 #if __GLASGOW_HASKELL__ >= 700
408 {-# INLINABLE lookupGE #-}
409 #else
410 {-# INLINE lookupGE #-}
411 #endif
412
413 {--------------------------------------------------------------------
414 Construction
415 --------------------------------------------------------------------}
416 -- | /O(1)/. The empty set.
417 empty :: Set a
418 empty = Tip
419 {-# INLINE empty #-}
420
421 -- | /O(1)/. Create a singleton set.
422 singleton :: a -> Set a
423 singleton x = Bin 1 x Tip Tip
424 {-# INLINE singleton #-}
425
426 {--------------------------------------------------------------------
427 Insertion, Deletion
428 --------------------------------------------------------------------}
429 -- | /O(log n)/. Insert an element in a set.
430 -- If the set already contains an element equal to the given value,
431 -- it is replaced with the new value.
432
433 -- See Note: Type of local 'go' function
434 insert :: Ord a => a -> Set a -> Set a
435 insert = go
436 where
437 go :: Ord a => a -> Set a -> Set a
438 STRICT_1_OF_2(go)
439 go x Tip = singleton x
440 go x (Bin sz y l r) = case compare x y of
441 LT -> balanceL y (go x l) r
442 GT -> balanceR y l (go x r)
443 EQ -> Bin sz x l r
444 #if __GLASGOW_HASKELL__ >= 700
445 {-# INLINABLE insert #-}
446 #else
447 {-# INLINE insert #-}
448 #endif
449
450 -- Insert an element to the set only if it is not in the set.
451 -- Used by `union`.
452
453 -- See Note: Type of local 'go' function
454 insertR :: Ord a => a -> Set a -> Set a
455 insertR = go
456 where
457 go :: Ord a => a -> Set a -> Set a
458 STRICT_1_OF_2(go)
459 go x Tip = singleton x
460 go x t@(Bin _ y l r) = case compare x y of
461 LT -> balanceL y (go x l) r
462 GT -> balanceR y l (go x r)
463 EQ -> t
464 #if __GLASGOW_HASKELL__ >= 700
465 {-# INLINABLE insertR #-}
466 #else
467 {-# INLINE insertR #-}
468 #endif
469
470 -- | /O(log n)/. Delete an element from a set.
471
472 -- See Note: Type of local 'go' function
473 delete :: Ord a => a -> Set a -> Set a
474 delete = go
475 where
476 go :: Ord a => a -> Set a -> Set a
477 STRICT_1_OF_2(go)
478 go _ Tip = Tip
479 go x (Bin _ y l r) = case compare x y of
480 LT -> balanceR y (go x l) r
481 GT -> balanceL y l (go x r)
482 EQ -> glue l r
483 #if __GLASGOW_HASKELL__ >= 700
484 {-# INLINABLE delete #-}
485 #else
486 {-# INLINE delete #-}
487 #endif
488
489 {--------------------------------------------------------------------
490 Subset
491 --------------------------------------------------------------------}
492 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
493 isProperSubsetOf :: Ord a => Set a -> Set a -> Bool
494 isProperSubsetOf s1 s2
495 = (size s1 < size s2) && (isSubsetOf s1 s2)
496 #if __GLASGOW_HASKELL__ >= 700
497 {-# INLINABLE isProperSubsetOf #-}
498 #endif
499
500
501 -- | /O(n+m)/. Is this a subset?
502 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
503 isSubsetOf :: Ord a => Set a -> Set a -> Bool
504 isSubsetOf t1 t2
505 = (size t1 <= size t2) && (isSubsetOfX t1 t2)
506 #if __GLASGOW_HASKELL__ >= 700
507 {-# INLINABLE isSubsetOf #-}
508 #endif
509
510 isSubsetOfX :: Ord a => Set a -> Set a -> Bool
511 isSubsetOfX Tip _ = True
512 isSubsetOfX _ Tip = False
513 isSubsetOfX (Bin _ x l r) t
514 = found && isSubsetOfX l lt && isSubsetOfX r gt
515 where
516 (lt,found,gt) = splitMember x t
517 #if __GLASGOW_HASKELL__ >= 700
518 {-# INLINABLE isSubsetOfX #-}
519 #endif
520
521
522 {--------------------------------------------------------------------
523 Minimal, Maximal
524 --------------------------------------------------------------------}
525 -- | /O(log n)/. The minimal element of a set.
526 findMin :: Set a -> a
527 findMin (Bin _ x Tip _) = x
528 findMin (Bin _ _ l _) = findMin l
529 findMin Tip = error "Set.findMin: empty set has no minimal element"
530
531 -- | /O(log n)/. The maximal element of a set.
532 findMax :: Set a -> a
533 findMax (Bin _ x _ Tip) = x
534 findMax (Bin _ _ _ r) = findMax r
535 findMax Tip = error "Set.findMax: empty set has no maximal element"
536
537 -- | /O(log n)/. Delete the minimal element. Returns an empty set if the set is empty.
538 deleteMin :: Set a -> Set a
539 deleteMin (Bin _ _ Tip r) = r
540 deleteMin (Bin _ x l r) = balanceR x (deleteMin l) r
541 deleteMin Tip = Tip
542
543 -- | /O(log n)/. Delete the maximal element. Returns an empty set if the set is empty.
544 deleteMax :: Set a -> Set a
545 deleteMax (Bin _ _ l Tip) = l
546 deleteMax (Bin _ x l r) = balanceL x l (deleteMax r)
547 deleteMax Tip = Tip
548
549 {--------------------------------------------------------------------
550 Union.
551 --------------------------------------------------------------------}
552 -- | The union of a list of sets: (@'unions' == 'foldl' 'union' 'empty'@).
553 unions :: Ord a => [Set a] -> Set a
554 unions = foldlStrict union empty
555 #if __GLASGOW_HASKELL__ >= 700
556 {-# INLINABLE unions #-}
557 #endif
558
559 -- | /O(n+m)/. The union of two sets, preferring the first set when
560 -- equal elements are encountered.
561 -- The implementation uses the efficient /hedge-union/ algorithm.
562 union :: Ord a => Set a -> Set a -> Set a
563 union Tip t2 = t2
564 union t1 Tip = t1
565 union t1 t2 = hedgeUnion NothingS NothingS t1 t2
566 #if __GLASGOW_HASKELL__ >= 700
567 {-# INLINABLE union #-}
568 #endif
569
570 hedgeUnion :: Ord a => MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
571 hedgeUnion _ _ t1 Tip = t1
572 hedgeUnion blo bhi Tip (Bin _ x l r) = link x (filterGt blo l) (filterLt bhi r)
573 hedgeUnion _ _ t1 (Bin _ x Tip Tip) = insertR x t1 -- According to benchmarks, this special case increases
574 -- performance up to 30%. It does not help in difference or intersection.
575 hedgeUnion blo bhi (Bin _ x l r) t2 = link x (hedgeUnion blo bmi l (trim blo bmi t2))
576 (hedgeUnion bmi bhi r (trim bmi bhi t2))
577 where bmi = JustS x
578 #if __GLASGOW_HASKELL__ >= 700
579 {-# INLINABLE hedgeUnion #-}
580 #endif
581
582 {--------------------------------------------------------------------
583 Difference
584 --------------------------------------------------------------------}
585 -- | /O(n+m)/. Difference of two sets.
586 -- The implementation uses an efficient /hedge/ algorithm comparable with /hedge-union/.
587 difference :: Ord a => Set a -> Set a -> Set a
588 difference Tip _ = Tip
589 difference t1 Tip = t1
590 difference t1 t2 = hedgeDiff NothingS NothingS t1 t2
591 #if __GLASGOW_HASKELL__ >= 700
592 {-# INLINABLE difference #-}
593 #endif
594
595 hedgeDiff :: Ord a => MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
596 hedgeDiff _ _ Tip _ = Tip
597 hedgeDiff blo bhi (Bin _ x l r) Tip = link x (filterGt blo l) (filterLt bhi r)
598 hedgeDiff blo bhi t (Bin _ x l r) = merge (hedgeDiff blo bmi (trim blo bmi t) l)
599 (hedgeDiff bmi bhi (trim bmi bhi t) r)
600 where bmi = JustS x
601 #if __GLASGOW_HASKELL__ >= 700
602 {-# INLINABLE hedgeDiff #-}
603 #endif
604
605 {--------------------------------------------------------------------
606 Intersection
607 --------------------------------------------------------------------}
608 -- | /O(n+m)/. The intersection of two sets. The implementation uses an
609 -- efficient /hedge/ algorithm comparable with /hedge-union/. Elements of the
610 -- result come from the first set, so for example
611 --
612 -- > import qualified Data.Set as S
613 -- > data AB = A | B deriving Show
614 -- > instance Ord AB where compare _ _ = EQ
615 -- > instance Eq AB where _ == _ = True
616 -- > main = print (S.singleton A `S.intersection` S.singleton B,
617 -- > S.singleton B `S.intersection` S.singleton A)
618 --
619 -- prints @(fromList [A],fromList [B])@.
620 intersection :: Ord a => Set a -> Set a -> Set a
621 intersection Tip _ = Tip
622 intersection _ Tip = Tip
623 intersection t1 t2 = hedgeInt NothingS NothingS t1 t2
624 #if __GLASGOW_HASKELL__ >= 700
625 {-# INLINABLE intersection #-}
626 #endif
627
628 hedgeInt :: Ord a => MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
629 hedgeInt _ _ _ Tip = Tip
630 hedgeInt _ _ Tip _ = Tip
631 hedgeInt blo bhi (Bin _ x l r) t2 = let l' = hedgeInt blo bmi l (trim blo bmi t2)
632 r' = hedgeInt bmi bhi r (trim bmi bhi t2)
633 in if x `member` t2 then link x l' r' else merge l' r'
634 where bmi = JustS x
635 #if __GLASGOW_HASKELL__ >= 700
636 {-# INLINABLE hedgeInt #-}
637 #endif
638
639 {--------------------------------------------------------------------
640 Filter and partition
641 --------------------------------------------------------------------}
642 -- | /O(n)/. Filter all elements that satisfy the predicate.
643 filter :: (a -> Bool) -> Set a -> Set a
644 filter _ Tip = Tip
645 filter p (Bin _ x l r)
646 | p x = link x (filter p l) (filter p r)
647 | otherwise = merge (filter p l) (filter p r)
648
649 -- | /O(n)/. Partition the set into two sets, one with all elements that satisfy
650 -- the predicate and one with all elements that don't satisfy the predicate.
651 -- See also 'split'.
652 partition :: (a -> Bool) -> Set a -> (Set a,Set a)
653 partition p0 t0 = toPair $ go p0 t0
654 where
655 go _ Tip = (Tip :*: Tip)
656 go p (Bin _ x l r) = case (go p l, go p r) of
657 ((l1 :*: l2), (r1 :*: r2))
658 | p x -> link x l1 r1 :*: merge l2 r2
659 | otherwise -> merge l1 r1 :*: link x l2 r2
660
661 {----------------------------------------------------------------------
662 Map
663 ----------------------------------------------------------------------}
664
665 -- | /O(n*log n)/.
666 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
667 --
668 -- It's worth noting that the size of the result may be smaller if,
669 -- for some @(x,y)@, @x \/= y && f x == f y@
670
671 map :: Ord b => (a->b) -> Set a -> Set b
672 map f = fromList . List.map f . toList
673 #if __GLASGOW_HASKELL__ >= 700
674 {-# INLINABLE map #-}
675 #endif
676
677 -- | /O(n)/. The
678 --
679 -- @'mapMonotonic' f s == 'map' f s@, but works only when @f@ is monotonic.
680 -- /The precondition is not checked./
681 -- Semi-formally, we have:
682 --
683 -- > and [x < y ==> f x < f y | x <- ls, y <- ls]
684 -- > ==> mapMonotonic f s == map f s
685 -- > where ls = toList s
686
687 mapMonotonic :: (a->b) -> Set a -> Set b
688 mapMonotonic _ Tip = Tip
689 mapMonotonic f (Bin sz x l r) = Bin sz (f x) (mapMonotonic f l) (mapMonotonic f r)
690
691 {--------------------------------------------------------------------
692 Fold
693 --------------------------------------------------------------------}
694 -- | /O(n)/. Fold the elements in the set using the given right-associative
695 -- binary operator. This function is an equivalent of 'foldr' and is present
696 -- for compatibility only.
697 --
698 -- /Please note that fold will be deprecated in the future and removed./
699 fold :: (a -> b -> b) -> b -> Set a -> b
700 fold = foldr
701 {-# INLINE fold #-}
702
703 -- | /O(n)/. Fold the elements in the set using the given right-associative
704 -- binary operator, such that @'foldr' f z == 'Prelude.foldr' f z . 'toAscList'@.
705 --
706 -- For example,
707 --
708 -- > toAscList set = foldr (:) [] set
709 foldr :: (a -> b -> b) -> b -> Set a -> b
710 foldr f z = go z
711 where
712 go z' Tip = z'
713 go z' (Bin _ x l r) = go (f x (go z' r)) l
714 {-# INLINE foldr #-}
715
716 -- | /O(n)/. A strict version of 'foldr'. Each application of the operator is
717 -- evaluated before using the result in the next application. This
718 -- function is strict in the starting value.
719 foldr' :: (a -> b -> b) -> b -> Set a -> b
720 foldr' f z = go z
721 where
722 STRICT_1_OF_2(go)
723 go z' Tip = z'
724 go z' (Bin _ x l r) = go (f x (go z' r)) l
725 {-# INLINE foldr' #-}
726
727 -- | /O(n)/. Fold the elements in the set using the given left-associative
728 -- binary operator, such that @'foldl' f z == 'Prelude.foldl' f z . 'toAscList'@.
729 --
730 -- For example,
731 --
732 -- > toDescList set = foldl (flip (:)) [] set
733 foldl :: (a -> b -> a) -> a -> Set b -> a
734 foldl f z = go z
735 where
736 go z' Tip = z'
737 go z' (Bin _ x l r) = go (f (go z' l) x) r
738 {-# INLINE foldl #-}
739
740 -- | /O(n)/. A strict version of 'foldl'. Each application of the operator is
741 -- evaluated before using the result in the next application. This
742 -- function is strict in the starting value.
743 foldl' :: (a -> b -> a) -> a -> Set b -> a
744 foldl' f z = go z
745 where
746 STRICT_1_OF_2(go)
747 go z' Tip = z'
748 go z' (Bin _ x l r) = go (f (go z' l) x) r
749 {-# INLINE foldl' #-}
750
751 {--------------------------------------------------------------------
752 List variations
753 --------------------------------------------------------------------}
754 -- | /O(n)/. An alias of 'toAscList'. The elements of a set in ascending order.
755 -- Subject to list fusion.
756 elems :: Set a -> [a]
757 elems = toAscList
758
759 {--------------------------------------------------------------------
760 Lists
761 --------------------------------------------------------------------}
762 -- | /O(n)/. Convert the set to a list of elements. Subject to list fusion.
763 toList :: Set a -> [a]
764 toList = toAscList
765
766 -- | /O(n)/. Convert the set to an ascending list of elements. Subject to list fusion.
767 toAscList :: Set a -> [a]
768 toAscList = foldr (:) []
769
770 -- | /O(n)/. Convert the set to a descending list of elements. Subject to list
771 -- fusion.
772 toDescList :: Set a -> [a]
773 toDescList = foldl (flip (:)) []
774
775 -- List fusion for the list generating functions.
776 #if __GLASGOW_HASKELL__
777 -- The foldrFB and foldlFB are foldr and foldl equivalents, used for list fusion.
778 -- They are important to convert unfused to{Asc,Desc}List back, see mapFB in prelude.
779 foldrFB :: (a -> b -> b) -> b -> Set a -> b
780 foldrFB = foldr
781 {-# INLINE[0] foldrFB #-}
782 foldlFB :: (a -> b -> a) -> a -> Set b -> a
783 foldlFB = foldl
784 {-# INLINE[0] foldlFB #-}
785
786 -- Inline elems and toList, so that we need to fuse only toAscList.
787 {-# INLINE elems #-}
788 {-# INLINE toList #-}
789
790 -- The fusion is enabled up to phase 2 included. If it does not succeed,
791 -- convert in phase 1 the expanded to{Asc,Desc}List calls back to
792 -- to{Asc,Desc}List. In phase 0, we inline fold{lr}FB (which were used in
793 -- a list fusion, otherwise it would go away in phase 1), and let compiler do
794 -- whatever it wants with to{Asc,Desc}List -- it was forbidden to inline it
795 -- before phase 0, otherwise the fusion rules would not fire at all.
796 {-# NOINLINE[0] toAscList #-}
797 {-# NOINLINE[0] toDescList #-}
798 {-# RULES "Set.toAscList" [~1] forall s . toAscList s = build (\c n -> foldrFB c n s) #-}
799 {-# RULES "Set.toAscListBack" [1] foldrFB (:) [] = toAscList #-}
800 {-# RULES "Set.toDescList" [~1] forall s . toDescList s = build (\c n -> foldlFB (\xs x -> c x xs) n s) #-}
801 {-# RULES "Set.toDescListBack" [1] foldlFB (\xs x -> x : xs) [] = toDescList #-}
802 #endif
803
804 -- | /O(n*log n)/. Create a set from a list of elements.
805 --
806 -- If the elemens are ordered, linear-time implementation is used,
807 -- with the performance equal to 'fromDistinctAscList'.
808
809 -- For some reason, when 'singleton' is used in fromList or in
810 -- create, it is not inlined, so we inline it manually.
811 fromList :: Ord a => [a] -> Set a
812 fromList [] = Tip
813 fromList [x] = Bin 1 x Tip Tip
814 fromList (x0 : xs0) | not_ordered x0 xs0 = fromList' (Bin 1 x0 Tip Tip) xs0
815 | otherwise = go (1::Int) (Bin 1 x0 Tip Tip) xs0
816 where
817 not_ordered _ [] = False
818 not_ordered x (y : _) = x >= y
819 {-# INLINE not_ordered #-}
820
821 fromList' t0 xs = foldlStrict ins t0 xs
822 where ins t x = insert x t
823
824 STRICT_1_OF_3(go)
825 go _ t [] = t
826 go _ t [x] = insertMax x t
827 go s l xs@(x : xss) | not_ordered x xss = fromList' l xs
828 | otherwise = case create s xss of
829 (r, ys, []) -> go (s `shiftL` 1) (link x l r) ys
830 (r, _, ys) -> fromList' (link x l r) ys
831
832 -- The create is returning a triple (tree, xs, ys). Both xs and ys
833 -- represent not yet processed elements and only one of them can be nonempty.
834 -- If ys is nonempty, the keys in ys are not ordered with respect to tree
835 -- and must be inserted using fromList'. Otherwise the keys have been
836 -- ordered so far.
837 STRICT_1_OF_2(create)
838 create _ [] = (Tip, [], [])
839 create s xs@(x : xss)
840 | s == 1 = if not_ordered x xss then (Bin 1 x Tip Tip, [], xss)
841 else (Bin 1 x Tip Tip, xss, [])
842 | otherwise = case create (s `shiftR` 1) xs of
843 res@(_, [], _) -> res
844 (l, [y], zs) -> (insertMax y l, [], zs)
845 (l, ys@(y:yss), _) | not_ordered y yss -> (l, [], ys)
846 | otherwise -> case create (s `shiftR` 1) yss of
847 (r, zs, ws) -> (link y l r, zs, ws)
848 #if __GLASGOW_HASKELL__ >= 700
849 {-# INLINABLE fromList #-}
850 #endif
851
852 {--------------------------------------------------------------------
853 Building trees from ascending/descending lists can be done in linear time.
854
855 Note that if [xs] is ascending that:
856 fromAscList xs == fromList xs
857 --------------------------------------------------------------------}
858 -- | /O(n)/. Build a set from an ascending list in linear time.
859 -- /The precondition (input list is ascending) is not checked./
860 fromAscList :: Eq a => [a] -> Set a
861 fromAscList xs
862 = fromDistinctAscList (combineEq xs)
863 where
864 -- [combineEq xs] combines equal elements with [const] in an ordered list [xs]
865 combineEq xs'
866 = case xs' of
867 [] -> []
868 [x] -> [x]
869 (x:xx) -> combineEq' x xx
870
871 combineEq' z [] = [z]
872 combineEq' z (x:xs')
873 | z==x = combineEq' z xs'
874 | otherwise = z:combineEq' x xs'
875 #if __GLASGOW_HASKELL__ >= 700
876 {-# INLINABLE fromAscList #-}
877 #endif
878
879
880 -- | /O(n)/. Build a set from an ascending list of distinct elements in linear time.
881 -- /The precondition (input list is strictly ascending) is not checked./
882
883 -- For some reason, when 'singleton' is used in fromDistinctAscList or in
884 -- create, it is not inlined, so we inline it manually.
885 fromDistinctAscList :: [a] -> Set a
886 fromDistinctAscList [] = Tip
887 fromDistinctAscList (x0 : xs0) = go (1::Int) (Bin 1 x0 Tip Tip) xs0
888 where
889 STRICT_1_OF_3(go)
890 go _ t [] = t
891 go s l (x : xs) = case create s xs of
892 (r, ys) -> go (s `shiftL` 1) (link x l r) ys
893
894 STRICT_1_OF_2(create)
895 create _ [] = (Tip, [])
896 create s xs@(x : xs')
897 | s == 1 = (Bin 1 x Tip Tip, xs')
898 | otherwise = case create (s `shiftR` 1) xs of
899 res@(_, []) -> res
900 (l, y:ys) -> case create (s `shiftR` 1) ys of
901 (r, zs) -> (link y l r, zs)
902
903 {--------------------------------------------------------------------
904 Eq converts the set to a list. In a lazy setting, this
905 actually seems one of the faster methods to compare two trees
906 and it is certainly the simplest :-)
907 --------------------------------------------------------------------}
908 instance Eq a => Eq (Set a) where
909 t1 == t2 = (size t1 == size t2) && (toAscList t1 == toAscList t2)
910
911 {--------------------------------------------------------------------
912 Ord
913 --------------------------------------------------------------------}
914
915 instance Ord a => Ord (Set a) where
916 compare s1 s2 = compare (toAscList s1) (toAscList s2)
917
918 {--------------------------------------------------------------------
919 Show
920 --------------------------------------------------------------------}
921 instance Show a => Show (Set a) where
922 showsPrec p xs = showParen (p > 10) $
923 showString "fromList " . shows (toList xs)
924
925 {--------------------------------------------------------------------
926 Read
927 --------------------------------------------------------------------}
928 instance (Read a, Ord a) => Read (Set a) where
929 #ifdef __GLASGOW_HASKELL__
930 readPrec = parens $ prec 10 $ do
931 Ident "fromList" <- lexP
932 xs <- readPrec
933 return (fromList xs)
934
935 readListPrec = readListPrecDefault
936 #else
937 readsPrec p = readParen (p > 10) $ \ r -> do
938 ("fromList",s) <- lex r
939 (xs,t) <- reads s
940 return (fromList xs,t)
941 #endif
942
943 {--------------------------------------------------------------------
944 Typeable/Data
945 --------------------------------------------------------------------}
946
947 #include "Typeable.h"
948 INSTANCE_TYPEABLE1(Set,setTc,"Set")
949
950 {--------------------------------------------------------------------
951 NFData
952 --------------------------------------------------------------------}
953
954 instance NFData a => NFData (Set a) where
955 rnf Tip = ()
956 rnf (Bin _ y l r) = rnf y `seq` rnf l `seq` rnf r
957
958 {--------------------------------------------------------------------
959 Utility functions that return sub-ranges of the original
960 tree. Some functions take a `Maybe value` as an argument to
961 allow comparisons against infinite values. These are called `blow`
962 (Nothing is -\infty) and `bhigh` (here Nothing is +\infty).
963 We use MaybeS value, which is a Maybe strict in the Just case.
964
965 [trim blow bhigh t] A tree that is either empty or where [x > blow]
966 and [x < bhigh] for the value [x] of the root.
967 [filterGt blow t] A tree where for all values [k]. [k > blow]
968 [filterLt bhigh t] A tree where for all values [k]. [k < bhigh]
969
970 [split k t] Returns two trees [l] and [r] where all values
971 in [l] are <[k] and all keys in [r] are >[k].
972 [splitMember k t] Just like [split] but also returns whether [k]
973 was found in the tree.
974 --------------------------------------------------------------------}
975
976 data MaybeS a = NothingS | JustS !a
977
978 {--------------------------------------------------------------------
979 [trim blo bhi t] trims away all subtrees that surely contain no
980 values between the range [blo] to [bhi]. The returned tree is either
981 empty or the key of the root is between @blo@ and @bhi@.
982 --------------------------------------------------------------------}
983 trim :: Ord a => MaybeS a -> MaybeS a -> Set a -> Set a
984 trim NothingS NothingS t = t
985 trim (JustS lx) NothingS t = greater lx t where greater lo (Bin _ x _ r) | x <= lo = greater lo r
986 greater _ t' = t'
987 trim NothingS (JustS hx) t = lesser hx t where lesser hi (Bin _ x l _) | x >= hi = lesser hi l
988 lesser _ t' = t'
989 trim (JustS lx) (JustS hx) t = middle lx hx t where middle lo hi (Bin _ x _ r) | x <= lo = middle lo hi r
990 middle lo hi (Bin _ x l _) | x >= hi = middle lo hi l
991 middle _ _ t' = t'
992 #if __GLASGOW_HASKELL__ >= 700
993 {-# INLINABLE trim #-}
994 #endif
995
996 {--------------------------------------------------------------------
997 [filterGt b t] filter all values >[b] from tree [t]
998 [filterLt b t] filter all values <[b] from tree [t]
999 --------------------------------------------------------------------}
1000 filterGt :: Ord a => MaybeS a -> Set a -> Set a
1001 filterGt NothingS t = t
1002 filterGt (JustS b) t = filter' b t
1003 where filter' _ Tip = Tip
1004 filter' b' (Bin _ x l r) =
1005 case compare b' x of LT -> link x (filter' b' l) r
1006 EQ -> r
1007 GT -> filter' b' r
1008 #if __GLASGOW_HASKELL__ >= 700
1009 {-# INLINABLE filterGt #-}
1010 #endif
1011
1012 filterLt :: Ord a => MaybeS a -> Set a -> Set a
1013 filterLt NothingS t = t
1014 filterLt (JustS b) t = filter' b t
1015 where filter' _ Tip = Tip
1016 filter' b' (Bin _ x l r) =
1017 case compare x b' of LT -> link x l (filter' b' r)
1018 EQ -> l
1019 GT -> filter' b' l
1020 #if __GLASGOW_HASKELL__ >= 700
1021 {-# INLINABLE filterLt #-}
1022 #endif
1023
1024 {--------------------------------------------------------------------
1025 Split
1026 --------------------------------------------------------------------}
1027 -- | /O(log n)/. The expression (@'split' x set@) is a pair @(set1,set2)@
1028 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
1029 -- comprises the elements of @set@ greater than @x@.
1030 split :: Ord a => a -> Set a -> (Set a,Set a)
1031 split x0 t0 = toPair $ go x0 t0
1032 where
1033 go _ Tip = (Tip :*: Tip)
1034 go x (Bin _ y l r)
1035 = case compare x y of
1036 LT -> let (lt :*: gt) = go x l in (lt :*: link y gt r)
1037 GT -> let (lt :*: gt) = go x r in (link y l lt :*: gt)
1038 EQ -> (l :*: r)
1039 #if __GLASGOW_HASKELL__ >= 700
1040 {-# INLINABLE split #-}
1041 #endif
1042
1043 -- | /O(log n)/. Performs a 'split' but also returns whether the pivot
1044 -- element was found in the original set.
1045 splitMember :: Ord a => a -> Set a -> (Set a,Bool,Set a)
1046 splitMember _ Tip = (Tip, False, Tip)
1047 splitMember x (Bin _ y l r)
1048 = case compare x y of
1049 LT -> let (lt, found, gt) = splitMember x l
1050 gt' = link y gt r
1051 in gt' `seq` (lt, found, gt')
1052 GT -> let (lt, found, gt) = splitMember x r
1053 lt' = link y l lt
1054 in lt' `seq` (lt', found, gt)
1055 EQ -> (l, True, r)
1056 #if __GLASGOW_HASKELL__ >= 700
1057 {-# INLINABLE splitMember #-}
1058 #endif
1059
1060 {--------------------------------------------------------------------
1061 Indexing
1062 --------------------------------------------------------------------}
1063
1064 -- | /O(log n)/. Return the /index/ of an element, which is its zero-based
1065 -- index in the sorted sequence of elements. The index is a number from /0/ up
1066 -- to, but not including, the 'size' of the set. Calls 'error' when the element
1067 -- is not a 'member' of the set.
1068 --
1069 -- > findIndex 2 (fromList [5,3]) Error: element is not in the set
1070 -- > findIndex 3 (fromList [5,3]) == 0
1071 -- > findIndex 5 (fromList [5,3]) == 1
1072 -- > findIndex 6 (fromList [5,3]) Error: element is not in the set
1073
1074 -- See Note: Type of local 'go' function
1075 findIndex :: Ord a => a -> Set a -> Int
1076 findIndex = go 0
1077 where
1078 go :: Ord a => Int -> a -> Set a -> Int
1079 STRICT_1_OF_3(go)
1080 STRICT_2_OF_3(go)
1081 go _ _ Tip = error "Set.findIndex: element is not in the set"
1082 go idx x (Bin _ kx l r) = case compare x kx of
1083 LT -> go idx x l
1084 GT -> go (idx + size l + 1) x r
1085 EQ -> idx + size l
1086 #if __GLASGOW_HASKELL__ >= 700
1087 {-# INLINABLE findIndex #-}
1088 #endif
1089
1090 -- | /O(log n)/. Lookup the /index/ of an element, which is its zero-based index in
1091 -- the sorted sequence of elements. The index is a number from /0/ up to, but not
1092 -- including, the 'size' of the set.
1093 --
1094 -- > isJust (lookupIndex 2 (fromList [5,3])) == False
1095 -- > fromJust (lookupIndex 3 (fromList [5,3])) == 0
1096 -- > fromJust (lookupIndex 5 (fromList [5,3])) == 1
1097 -- > isJust (lookupIndex 6 (fromList [5,3])) == False
1098
1099 -- See Note: Type of local 'go' function
1100 lookupIndex :: Ord a => a -> Set a -> Maybe Int
1101 lookupIndex = go 0
1102 where
1103 go :: Ord a => Int -> a -> Set a -> Maybe Int
1104 STRICT_1_OF_3(go)
1105 STRICT_2_OF_3(go)
1106 go _ _ Tip = Nothing
1107 go idx x (Bin _ kx l r) = case compare x kx of
1108 LT -> go idx x l
1109 GT -> go (idx + size l + 1) x r
1110 EQ -> Just $! idx + size l
1111 #if __GLASGOW_HASKELL__ >= 700
1112 {-# INLINABLE lookupIndex #-}
1113 #endif
1114
1115 -- | /O(log n)/. Retrieve an element by its /index/, i.e. by its zero-based
1116 -- index in the sorted sequence of elements. If the /index/ is out of range (less
1117 -- than zero, greater or equal to 'size' of the set), 'error' is called.
1118 --
1119 -- > elemAt 0 (fromList [5,3]) == 3
1120 -- > elemAt 1 (fromList [5,3]) == 5
1121 -- > elemAt 2 (fromList [5,3]) Error: index out of range
1122
1123 elemAt :: Int -> Set a -> a
1124 STRICT_1_OF_2(elemAt)
1125 elemAt _ Tip = error "Set.elemAt: index out of range"
1126 elemAt i (Bin _ x l r)
1127 = case compare i sizeL of
1128 LT -> elemAt i l
1129 GT -> elemAt (i-sizeL-1) r
1130 EQ -> x
1131 where
1132 sizeL = size l
1133
1134 -- | /O(log n)/. Delete the element at /index/, i.e. by its zero-based index in
1135 -- the sorted sequence of elements. If the /index/ is out of range (less than zero,
1136 -- greater or equal to 'size' of the set), 'error' is called.
1137 --
1138 -- > deleteAt 0 (fromList [5,3]) == singleton 5
1139 -- > deleteAt 1 (fromList [5,3]) == singleton 3
1140 -- > deleteAt 2 (fromList [5,3]) Error: index out of range
1141 -- > deleteAt (-1) (fromList [5,3]) Error: index out of range
1142
1143 deleteAt :: Int -> Set a -> Set a
1144 deleteAt i t = i `seq`
1145 case t of
1146 Tip -> error "Set.deleteAt: index out of range"
1147 Bin _ x l r -> case compare i sizeL of
1148 LT -> balanceR x (deleteAt i l) r
1149 GT -> balanceL x l (deleteAt (i-sizeL-1) r)
1150 EQ -> glue l r
1151 where
1152 sizeL = size l
1153
1154
1155 {--------------------------------------------------------------------
1156 Utility functions that maintain the balance properties of the tree.
1157 All constructors assume that all values in [l] < [x] and all values
1158 in [r] > [x], and that [l] and [r] are valid trees.
1159
1160 In order of sophistication:
1161 [Bin sz x l r] The type constructor.
1162 [bin x l r] Maintains the correct size, assumes that both [l]
1163 and [r] are balanced with respect to each other.
1164 [balance x l r] Restores the balance and size.
1165 Assumes that the original tree was balanced and
1166 that [l] or [r] has changed by at most one element.
1167 [link x l r] Restores balance and size.
1168
1169 Furthermore, we can construct a new tree from two trees. Both operations
1170 assume that all values in [l] < all values in [r] and that [l] and [r]
1171 are valid:
1172 [glue l r] Glues [l] and [r] together. Assumes that [l] and
1173 [r] are already balanced with respect to each other.
1174 [merge l r] Merges two trees and restores balance.
1175
1176 Note: in contrast to Adam's paper, we use (<=) comparisons instead
1177 of (<) comparisons in [link], [merge] and [balance].
1178 Quickcheck (on [difference]) showed that this was necessary in order
1179 to maintain the invariants. It is quite unsatisfactory that I haven't
1180 been able to find out why this is actually the case! Fortunately, it
1181 doesn't hurt to be a bit more conservative.
1182 --------------------------------------------------------------------}
1183
1184 {--------------------------------------------------------------------
1185 Link
1186 --------------------------------------------------------------------}
1187 link :: a -> Set a -> Set a -> Set a
1188 link x Tip r = insertMin x r
1189 link x l Tip = insertMax x l
1190 link x l@(Bin sizeL y ly ry) r@(Bin sizeR z lz rz)
1191 | delta*sizeL < sizeR = balanceL z (link x l lz) rz
1192 | delta*sizeR < sizeL = balanceR y ly (link x ry r)
1193 | otherwise = bin x l r
1194
1195
1196 -- insertMin and insertMax don't perform potentially expensive comparisons.
1197 insertMax,insertMin :: a -> Set a -> Set a
1198 insertMax x t
1199 = case t of
1200 Tip -> singleton x
1201 Bin _ y l r
1202 -> balanceR y l (insertMax x r)
1203
1204 insertMin x t
1205 = case t of
1206 Tip -> singleton x
1207 Bin _ y l r
1208 -> balanceL y (insertMin x l) r
1209
1210 {--------------------------------------------------------------------
1211 [merge l r]: merges two trees.
1212 --------------------------------------------------------------------}
1213 merge :: Set a -> Set a -> Set a
1214 merge Tip r = r
1215 merge l Tip = l
1216 merge l@(Bin sizeL x lx rx) r@(Bin sizeR y ly ry)
1217 | delta*sizeL < sizeR = balanceL y (merge l ly) ry
1218 | delta*sizeR < sizeL = balanceR x lx (merge rx r)
1219 | otherwise = glue l r
1220
1221 {--------------------------------------------------------------------
1222 [glue l r]: glues two trees together.
1223 Assumes that [l] and [r] are already balanced with respect to each other.
1224 --------------------------------------------------------------------}
1225 glue :: Set a -> Set a -> Set a
1226 glue Tip r = r
1227 glue l Tip = l
1228 glue l r
1229 | size l > size r = let (m,l') = deleteFindMax l in balanceR m l' r
1230 | otherwise = let (m,r') = deleteFindMin r in balanceL m l r'
1231
1232 -- | /O(log n)/. Delete and find the minimal element.
1233 --
1234 -- > deleteFindMin set = (findMin set, deleteMin set)
1235
1236 deleteFindMin :: Set a -> (a,Set a)
1237 deleteFindMin t
1238 = case t of
1239 Bin _ x Tip r -> (x,r)
1240 Bin _ x l r -> let (xm,l') = deleteFindMin l in (xm,balanceR x l' r)
1241 Tip -> (error "Set.deleteFindMin: can not return the minimal element of an empty set", Tip)
1242
1243 -- | /O(log n)/. Delete and find the maximal element.
1244 --
1245 -- > deleteFindMax set = (findMax set, deleteMax set)
1246 deleteFindMax :: Set a -> (a,Set a)
1247 deleteFindMax t
1248 = case t of
1249 Bin _ x l Tip -> (x,l)
1250 Bin _ x l r -> let (xm,r') = deleteFindMax r in (xm,balanceL x l r')
1251 Tip -> (error "Set.deleteFindMax: can not return the maximal element of an empty set", Tip)
1252
1253 -- | /O(log n)/. Retrieves the minimal key of the set, and the set
1254 -- stripped of that element, or 'Nothing' if passed an empty set.
1255 minView :: Set a -> Maybe (a, Set a)
1256 minView Tip = Nothing
1257 minView x = Just (deleteFindMin x)
1258
1259 -- | /O(log n)/. Retrieves the maximal key of the set, and the set
1260 -- stripped of that element, or 'Nothing' if passed an empty set.
1261 maxView :: Set a -> Maybe (a, Set a)
1262 maxView Tip = Nothing
1263 maxView x = Just (deleteFindMax x)
1264
1265 {--------------------------------------------------------------------
1266 [balance x l r] balances two trees with value x.
1267 The sizes of the trees should balance after decreasing the
1268 size of one of them. (a rotation).
1269
1270 [delta] is the maximal relative difference between the sizes of
1271 two trees, it corresponds with the [w] in Adams' paper.
1272 [ratio] is the ratio between an outer and inner sibling of the
1273 heavier subtree in an unbalanced setting. It determines
1274 whether a double or single rotation should be performed
1275 to restore balance. It is correspondes with the inverse
1276 of $\alpha$ in Adam's article.
1277
1278 Note that according to the Adam's paper:
1279 - [delta] should be larger than 4.646 with a [ratio] of 2.
1280 - [delta] should be larger than 3.745 with a [ratio] of 1.534.
1281
1282 But the Adam's paper is errorneous:
1283 - it can be proved that for delta=2 and delta>=5 there does
1284 not exist any ratio that would work
1285 - delta=4.5 and ratio=2 does not work
1286
1287 That leaves two reasonable variants, delta=3 and delta=4,
1288 both with ratio=2.
1289
1290 - A lower [delta] leads to a more 'perfectly' balanced tree.
1291 - A higher [delta] performs less rebalancing.
1292
1293 In the benchmarks, delta=3 is faster on insert operations,
1294 and delta=4 has slightly better deletes. As the insert speedup
1295 is larger, we currently use delta=3.
1296
1297 --------------------------------------------------------------------}
1298 delta,ratio :: Int
1299 delta = 3
1300 ratio = 2
1301
1302 -- The balance function is equivalent to the following:
1303 --
1304 -- balance :: a -> Set a -> Set a -> Set a
1305 -- balance x l r
1306 -- | sizeL + sizeR <= 1 = Bin sizeX x l r
1307 -- | sizeR > delta*sizeL = rotateL x l r
1308 -- | sizeL > delta*sizeR = rotateR x l r
1309 -- | otherwise = Bin sizeX x l r
1310 -- where
1311 -- sizeL = size l
1312 -- sizeR = size r
1313 -- sizeX = sizeL + sizeR + 1
1314 --
1315 -- rotateL :: a -> Set a -> Set a -> Set a
1316 -- rotateL x l r@(Bin _ _ ly ry) | size ly < ratio*size ry = singleL x l r
1317 -- | otherwise = doubleL x l r
1318 -- rotateR :: a -> Set a -> Set a -> Set a
1319 -- rotateR x l@(Bin _ _ ly ry) r | size ry < ratio*size ly = singleR x l r
1320 -- | otherwise = doubleR x l r
1321 --
1322 -- singleL, singleR :: a -> Set a -> Set a -> Set a
1323 -- singleL x1 t1 (Bin _ x2 t2 t3) = bin x2 (bin x1 t1 t2) t3
1324 -- singleR x1 (Bin _ x2 t1 t2) t3 = bin x2 t1 (bin x1 t2 t3)
1325 --
1326 -- doubleL, doubleR :: a -> Set a -> Set a -> Set a
1327 -- doubleL x1 t1 (Bin _ x2 (Bin _ x3 t2 t3) t4) = bin x3 (bin x1 t1 t2) (bin x2 t3 t4)
1328 -- doubleR x1 (Bin _ x2 t1 (Bin _ x3 t2 t3)) t4 = bin x3 (bin x2 t1 t2) (bin x1 t3 t4)
1329 --
1330 -- It is only written in such a way that every node is pattern-matched only once.
1331 --
1332 -- Only balanceL and balanceR are needed at the moment, so balance is not here anymore.
1333 -- In case it is needed, it can be found in Data.Map.
1334
1335 -- Functions balanceL and balanceR are specialised versions of balance.
1336 -- balanceL only checks whether the left subtree is too big,
1337 -- balanceR only checks whether the right subtree is too big.
1338
1339 -- balanceL is called when left subtree might have been inserted to or when
1340 -- right subtree might have been deleted from.
1341 balanceL :: a -> Set a -> Set a -> Set a
1342 balanceL x l r = case r of
1343 Tip -> case l of
1344 Tip -> Bin 1 x Tip Tip
1345 (Bin _ _ Tip Tip) -> Bin 2 x l Tip
1346 (Bin _ lx Tip (Bin _ lrx _ _)) -> Bin 3 lrx (Bin 1 lx Tip Tip) (Bin 1 x Tip Tip)
1347 (Bin _ lx ll@(Bin _ _ _ _) Tip) -> Bin 3 lx ll (Bin 1 x Tip Tip)
1348 (Bin ls lx ll@(Bin lls _ _ _) lr@(Bin lrs lrx lrl lrr))
1349 | lrs < ratio*lls -> Bin (1+ls) lx ll (Bin (1+lrs) x lr Tip)
1350 | otherwise -> Bin (1+ls) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+size lrr) x lrr Tip)
1351
1352 (Bin rs _ _ _) -> case l of
1353 Tip -> Bin (1+rs) x Tip r
1354
1355 (Bin ls lx ll lr)
1356 | ls > delta*rs -> case (ll, lr) of
1357 (Bin lls _ _ _, Bin lrs lrx lrl lrr)
1358 | lrs < ratio*lls -> Bin (1+ls+rs) lx ll (Bin (1+rs+lrs) x lr r)
1359 | otherwise -> Bin (1+ls+rs) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+rs+size lrr) x lrr r)
1360 (_, _) -> error "Failure in Data.Map.balanceL"
1361 | otherwise -> Bin (1+ls+rs) x l r
1362 {-# NOINLINE balanceL #-}
1363
1364 -- balanceR is called when right subtree might have been inserted to or when
1365 -- left subtree might have been deleted from.
1366 balanceR :: a -> Set a -> Set a -> Set a
1367 balanceR x l r = case l of
1368 Tip -> case r of
1369 Tip -> Bin 1 x Tip Tip
1370 (Bin _ _ Tip Tip) -> Bin 2 x Tip r
1371 (Bin _ rx Tip rr@(Bin _ _ _ _)) -> Bin 3 rx (Bin 1 x Tip Tip) rr
1372 (Bin _ rx (Bin _ rlx _ _) Tip) -> Bin 3 rlx (Bin 1 x Tip Tip) (Bin 1 rx Tip Tip)
1373 (Bin rs rx rl@(Bin rls rlx rll rlr) rr@(Bin rrs _ _ _))
1374 | rls < ratio*rrs -> Bin (1+rs) rx (Bin (1+rls) x Tip rl) rr
1375 | otherwise -> Bin (1+rs) rlx (Bin (1+size rll) x Tip rll) (Bin (1+rrs+size rlr) rx rlr rr)
1376
1377 (Bin ls _ _ _) -> case r of
1378 Tip -> Bin (1+ls) x l Tip
1379
1380 (Bin rs rx rl rr)
1381 | rs > delta*ls -> case (rl, rr) of
1382 (Bin rls rlx rll rlr, Bin rrs _ _ _)
1383 | rls < ratio*rrs -> Bin (1+ls+rs) rx (Bin (1+ls+rls) x l rl) rr
1384 | otherwise -> Bin (1+ls+rs) rlx (Bin (1+ls+size rll) x l rll) (Bin (1+rrs+size rlr) rx rlr rr)
1385 (_, _) -> error "Failure in Data.Map.balanceR"
1386 | otherwise -> Bin (1+ls+rs) x l r
1387 {-# NOINLINE balanceR #-}
1388
1389 {--------------------------------------------------------------------
1390 The bin constructor maintains the size of the tree
1391 --------------------------------------------------------------------}
1392 bin :: a -> Set a -> Set a -> Set a
1393 bin x l r
1394 = Bin (size l + size r + 1) x l r
1395 {-# INLINE bin #-}
1396
1397
1398 {--------------------------------------------------------------------
1399 Utilities
1400 --------------------------------------------------------------------}
1401 foldlStrict :: (a -> b -> a) -> a -> [b] -> a
1402 foldlStrict f = go
1403 where
1404 go z [] = z
1405 go z (x:xs) = let z' = f z x in z' `seq` go z' xs
1406 {-# INLINE foldlStrict #-}
1407
1408 -- | /O(1)/. Decompose a set into pieces, based on the structure of the underlying
1409 -- tree. No guarantee is made as to the sizes of the pieces; an internal, but
1410 -- deterministic process determines this. This is most useful for consuming a set in
1411 -- parallel.
1412 splitRoot :: Set a -> [Set a]
1413 splitRoot orig =
1414 case orig of
1415 Tip -> []
1416 Bin _ v l r -> [singleton v, l, r]
1417 {-# INLINE splitRoot #-}
1418
1419
1420 {--------------------------------------------------------------------
1421 Debugging
1422 --------------------------------------------------------------------}
1423 -- | /O(n)/. Show the tree that implements the set. The tree is shown
1424 -- in a compressed, hanging format.
1425 showTree :: Show a => Set a -> String
1426 showTree s
1427 = showTreeWith True False s
1428
1429
1430 {- | /O(n)/. The expression (@showTreeWith hang wide map@) shows
1431 the tree that implements the set. If @hang@ is
1432 @True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If
1433 @wide@ is 'True', an extra wide version is shown.
1434
1435 > Set> putStrLn $ showTreeWith True False $ fromDistinctAscList [1..5]
1436 > 4
1437 > +--2
1438 > | +--1
1439 > | +--3
1440 > +--5
1441 >
1442 > Set> putStrLn $ showTreeWith True True $ fromDistinctAscList [1..5]
1443 > 4
1444 > |
1445 > +--2
1446 > | |
1447 > | +--1
1448 > | |
1449 > | +--3
1450 > |
1451 > +--5
1452 >
1453 > Set> putStrLn $ showTreeWith False True $ fromDistinctAscList [1..5]
1454 > +--5
1455 > |
1456 > 4
1457 > |
1458 > | +--3
1459 > | |
1460 > +--2
1461 > |
1462 > +--1
1463
1464 -}
1465 showTreeWith :: Show a => Bool -> Bool -> Set a -> String
1466 showTreeWith hang wide t
1467 | hang = (showsTreeHang wide [] t) ""
1468 | otherwise = (showsTree wide [] [] t) ""
1469
1470 showsTree :: Show a => Bool -> [String] -> [String] -> Set a -> ShowS
1471 showsTree wide lbars rbars t
1472 = case t of
1473 Tip -> showsBars lbars . showString "|\n"
1474 Bin _ x Tip Tip
1475 -> showsBars lbars . shows x . showString "\n"
1476 Bin _ x l r
1477 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
1478 showWide wide rbars .
1479 showsBars lbars . shows x . showString "\n" .
1480 showWide wide lbars .
1481 showsTree wide (withEmpty lbars) (withBar lbars) l
1482
1483 showsTreeHang :: Show a => Bool -> [String] -> Set a -> ShowS
1484 showsTreeHang wide bars t
1485 = case t of
1486 Tip -> showsBars bars . showString "|\n"
1487 Bin _ x Tip Tip
1488 -> showsBars bars . shows x . showString "\n"
1489 Bin _ x l r
1490 -> showsBars bars . shows x . showString "\n" .
1491 showWide wide bars .
1492 showsTreeHang wide (withBar bars) l .
1493 showWide wide bars .
1494 showsTreeHang wide (withEmpty bars) r
1495
1496 showWide :: Bool -> [String] -> String -> String
1497 showWide wide bars
1498 | wide = showString (concat (reverse bars)) . showString "|\n"
1499 | otherwise = id
1500
1501 showsBars :: [String] -> ShowS
1502 showsBars bars
1503 = case bars of
1504 [] -> id
1505 _ -> showString (concat (reverse (tail bars))) . showString node
1506
1507 node :: String
1508 node = "+--"
1509
1510 withBar, withEmpty :: [String] -> [String]
1511 withBar bars = "| ":bars
1512 withEmpty bars = " ":bars
1513
1514 {--------------------------------------------------------------------
1515 Assertions
1516 --------------------------------------------------------------------}
1517 -- | /O(n)/. Test if the internal set structure is valid.
1518 valid :: Ord a => Set a -> Bool
1519 valid t
1520 = balanced t && ordered t && validsize t
1521
1522 ordered :: Ord a => Set a -> Bool
1523 ordered t
1524 = bounded (const True) (const True) t
1525 where
1526 bounded lo hi t'
1527 = case t' of
1528 Tip -> True
1529 Bin _ x l r -> (lo x) && (hi x) && bounded lo (<x) l && bounded (>x) hi r
1530
1531 balanced :: Set a -> Bool
1532 balanced t
1533 = case t of
1534 Tip -> True
1535 Bin _ _ l r -> (size l + size r <= 1 || (size l <= delta*size r && size r <= delta*size l)) &&
1536 balanced l && balanced r
1537
1538 validsize :: Set a -> Bool
1539 validsize t
1540 = (realsize t == Just (size t))
1541 where
1542 realsize t'
1543 = case t' of
1544 Tip -> Just 0
1545 Bin sz _ l r -> case (realsize l,realsize r) of
1546 (Just n,Just m) | n+m+1 == sz -> Just sz
1547 _ -> Nothing