d9023109eb6752779980d452e37f66732862f98e
[packages/containers.git] / Data / Set / Base.hs
1 {-# LANGUAGE CPP #-}
2 #if __GLASGOW_HASKELL__
3 {-# LANGUAGE DeriveDataTypeable, StandaloneDeriving #-}
4 #endif
5 #if !defined(TESTING) && __GLASGOW_HASKELL__ >= 703
6 {-# LANGUAGE Trustworthy #-}
7 #endif
8 -----------------------------------------------------------------------------
9 -- |
10 -- Module : Data.Set.Base
11 -- Copyright : (c) Daan Leijen 2002
12 -- License : BSD-style
13 -- Maintainer : libraries@haskell.org
14 -- Stability : provisional
15 -- Portability : portable
16 --
17 -- An efficient implementation of sets.
18 --
19 -- These modules are intended to be imported qualified, to avoid name
20 -- clashes with Prelude functions, e.g.
21 --
22 -- > import Data.Set (Set)
23 -- > import qualified Data.Set as Set
24 --
25 -- The implementation of 'Set' is based on /size balanced/ binary trees (or
26 -- trees of /bounded balance/) as described by:
27 --
28 -- * Stephen Adams, \"/Efficient sets: a balancing act/\",
29 -- Journal of Functional Programming 3(4):553-562, October 1993,
30 -- <http://www.swiss.ai.mit.edu/~adams/BB/>.
31 --
32 -- * J. Nievergelt and E.M. Reingold,
33 -- \"/Binary search trees of bounded balance/\",
34 -- SIAM journal of computing 2(1), March 1973.
35 --
36 -- Note that the implementation is /left-biased/ -- the elements of a
37 -- first argument are always preferred to the second, for example in
38 -- 'union' or 'insert'. Of course, left-biasing can only be observed
39 -- when equality is an equivalence relation instead of structural
40 -- equality.
41 -----------------------------------------------------------------------------
42
43 -- [Note: Using INLINABLE]
44 -- ~~~~~~~~~~~~~~~~~~~~~~~
45 -- It is crucial to the performance that the functions specialize on the Ord
46 -- type when possible. GHC 7.0 and higher does this by itself when it sees th
47 -- unfolding of a function -- that is why all public functions are marked
48 -- INLINABLE (that exposes the unfolding).
49
50
51 -- [Note: Using INLINE]
52 -- ~~~~~~~~~~~~~~~~~~~~
53 -- For other compilers and GHC pre 7.0, we mark some of the functions INLINE.
54 -- We mark the functions that just navigate down the tree (lookup, insert,
55 -- delete and similar). That navigation code gets inlined and thus specialized
56 -- when possible. There is a price to pay -- code growth. The code INLINED is
57 -- therefore only the tree navigation, all the real work (rebalancing) is not
58 -- INLINED by using a NOINLINE.
59 --
60 -- All methods marked INLINE have to be nonrecursive -- a 'go' function doing
61 -- the real work is provided.
62
63
64 -- [Note: Type of local 'go' function]
65 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
66 -- If the local 'go' function uses an Ord class, it sometimes heap-allocates
67 -- the Ord dictionary when the 'go' function does not have explicit type.
68 -- In that case we give 'go' explicit type. But this slightly decrease
69 -- performance, as the resulting 'go' function can float out to top level.
70
71
72 -- [Note: Local 'go' functions and capturing]
73 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
74 -- As opposed to IntSet, when 'go' function captures an argument, increased
75 -- heap-allocation can occur: sometimes in a polymorphic function, the 'go'
76 -- floats out of its enclosing function and then it heap-allocates the
77 -- dictionary and the argument. Maybe it floats out too late and strictness
78 -- analyzer cannot see that these could be passed on stack.
79 --
80 -- For example, change 'member' so that its local 'go' function is not passing
81 -- argument x and then look at the resulting code for hedgeInt.
82
83
84 -- [Note: Order of constructors]
85 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
86 -- The order of constructors of Set matters when considering performance.
87 -- Currently in GHC 7.0, when type has 2 constructors, a forward conditional
88 -- jump is made when successfully matching second constructor. Successful match
89 -- of first constructor results in the forward jump not taken.
90 -- On GHC 7.0, reordering constructors from Tip | Bin to Bin | Tip
91 -- improves the benchmark by up to 10% on x86.
92
93 module Data.Set.Base (
94 -- * Set type
95 Set(..) -- instance Eq,Ord,Show,Read,Data,Typeable
96
97 -- * Operators
98 , (\\)
99
100 -- * Query
101 , null
102 , size
103 , member
104 , notMember
105 , lookupLT
106 , lookupGT
107 , lookupLE
108 , lookupGE
109 , isSubsetOf
110 , isProperSubsetOf
111
112 -- * Construction
113 , empty
114 , singleton
115 , insert
116 , delete
117
118 -- * Combine
119 , union
120 , unions
121 , difference
122 , intersection
123
124 -- * Filter
125 , filter
126 , partition
127 , split
128 , splitMember
129
130 -- * Indexed
131 , lookupIndex
132 , findIndex
133 , elemAt
134 , deleteAt
135
136 -- * Map
137 , map
138 , mapMonotonic
139
140 -- * Folds
141 , foldr
142 , foldl
143 -- ** Strict folds
144 , foldr'
145 , foldl'
146 -- ** Legacy folds
147 , fold
148
149 -- * Min\/Max
150 , findMin
151 , findMax
152 , deleteMin
153 , deleteMax
154 , deleteFindMin
155 , deleteFindMax
156 , maxView
157 , minView
158
159 -- * Conversion
160
161 -- ** List
162 , elems
163 , toList
164 , fromList
165
166 -- ** Ordered list
167 , toAscList
168 , toDescList
169 , fromAscList
170 , fromDistinctAscList
171
172 -- * Debugging
173 , showTree
174 , showTreeWith
175 , valid
176
177 -- Internals (for testing)
178 , bin
179 , balanced
180 , link
181 , merge
182 ) where
183
184 import Prelude hiding (filter,foldl,foldr,null,map)
185 import qualified Data.List as List
186 import Data.Bits (shiftL, shiftR)
187 import Data.Monoid (Monoid(..))
188 import qualified Data.Foldable as Foldable
189 import Data.Typeable
190 import Control.DeepSeq (NFData(rnf))
191
192 import Data.StrictPair
193
194 #if __GLASGOW_HASKELL__
195 import GHC.Exts ( build )
196 import Text.Read
197 import Data.Data
198 #endif
199
200 -- Use macros to define strictness of functions.
201 -- STRICT_x_OF_y denotes an y-ary function strict in the x-th parameter.
202 -- We do not use BangPatterns, because they are not in any standard and we
203 -- want the compilers to be compiled by as many compilers as possible.
204 #define STRICT_1_OF_2(fn) fn arg _ | arg `seq` False = undefined
205 #define STRICT_1_OF_3(fn) fn arg _ _ | arg `seq` False = undefined
206 #define STRICT_2_OF_3(fn) fn _ arg _ | arg `seq` False = undefined
207
208 {--------------------------------------------------------------------
209 Operators
210 --------------------------------------------------------------------}
211 infixl 9 \\ --
212
213 -- | /O(n+m)/. See 'difference'.
214 (\\) :: Ord a => Set a -> Set a -> Set a
215 m1 \\ m2 = difference m1 m2
216 #if __GLASGOW_HASKELL__ >= 700
217 {-# INLINABLE (\\) #-}
218 #endif
219
220 {--------------------------------------------------------------------
221 Sets are size balanced trees
222 --------------------------------------------------------------------}
223 -- | A set of values @a@.
224
225 -- See Note: Order of constructors
226 data Set a = Bin {-# UNPACK #-} !Size !a !(Set a) !(Set a)
227 | Tip
228
229 type Size = Int
230
231 instance Ord a => Monoid (Set a) where
232 mempty = empty
233 mappend = union
234 mconcat = unions
235
236 instance Foldable.Foldable Set where
237 fold t = go t
238 where go Tip = mempty
239 go (Bin 1 k _ _) = k
240 go (Bin _ k l r) = go l `mappend` (k `mappend` go r)
241 {-# INLINABLE fold #-}
242 foldr = foldr
243 {-# INLINE foldr #-}
244 foldl = foldl
245 {-# INLINE foldl #-}
246 foldMap f t = go t
247 where go Tip = mempty
248 go (Bin 1 k _ _) = f k
249 go (Bin _ k l r) = go l `mappend` (f k `mappend` go r)
250 {-# INLINE foldMap #-}
251
252 #if __GLASGOW_HASKELL__
253
254 {--------------------------------------------------------------------
255 A Data instance
256 --------------------------------------------------------------------}
257
258 -- This instance preserves data abstraction at the cost of inefficiency.
259 -- We provide limited reflection services for the sake of data abstraction.
260
261 instance (Data a, Ord a) => Data (Set a) where
262 gfoldl f z set = z fromList `f` (toList set)
263 toConstr _ = fromListConstr
264 gunfold k z c = case constrIndex c of
265 1 -> k (z fromList)
266 _ -> error "gunfold"
267 dataTypeOf _ = setDataType
268 dataCast1 f = gcast1 f
269
270 fromListConstr :: Constr
271 fromListConstr = mkConstr setDataType "fromList" [] Prefix
272
273 setDataType :: DataType
274 setDataType = mkDataType "Data.Set.Base.Set" [fromListConstr]
275
276 #endif
277
278 {--------------------------------------------------------------------
279 Query
280 --------------------------------------------------------------------}
281 -- | /O(1)/. Is this the empty set?
282 null :: Set a -> Bool
283 null Tip = True
284 null (Bin {}) = False
285 {-# INLINE null #-}
286
287 -- | /O(1)/. The number of elements in the set.
288 size :: Set a -> Int
289 size Tip = 0
290 size (Bin sz _ _ _) = sz
291 {-# INLINE size #-}
292
293 -- | /O(log n)/. Is the element in the set?
294 member :: Ord a => a -> Set a -> Bool
295 member = go
296 where
297 STRICT_1_OF_2(go)
298 go _ Tip = False
299 go x (Bin _ y l r) = case compare x y of
300 LT -> go x l
301 GT -> go x r
302 EQ -> True
303 #if __GLASGOW_HASKELL__ >= 700
304 {-# INLINABLE member #-}
305 #else
306 {-# INLINE member #-}
307 #endif
308
309 -- | /O(log n)/. Is the element not in the set?
310 notMember :: Ord a => a -> Set a -> Bool
311 notMember a t = not $ member a t
312 #if __GLASGOW_HASKELL__ >= 700
313 {-# INLINABLE notMember #-}
314 #else
315 {-# INLINE notMember #-}
316 #endif
317
318 -- | /O(log n)/. Find largest element smaller than the given one.
319 --
320 -- > lookupLT 3 (fromList [3, 5]) == Nothing
321 -- > lookupLT 5 (fromList [3, 5]) == Just 3
322 lookupLT :: Ord a => a -> Set a -> Maybe a
323 lookupLT = goNothing
324 where
325 STRICT_1_OF_2(goNothing)
326 goNothing _ Tip = Nothing
327 goNothing x (Bin _ y l r) | x <= y = goNothing x l
328 | otherwise = goJust x y r
329
330 STRICT_1_OF_3(goJust)
331 goJust _ best Tip = Just best
332 goJust x best (Bin _ y l r) | x <= y = goJust x best l
333 | otherwise = goJust x y r
334 #if __GLASGOW_HASKELL__ >= 700
335 {-# INLINABLE lookupLT #-}
336 #else
337 {-# INLINE lookupLT #-}
338 #endif
339
340 -- | /O(log n)/. Find smallest element greater than the given one.
341 --
342 -- > lookupGT 4 (fromList [3, 5]) == Just 5
343 -- > lookupGT 5 (fromList [3, 5]) == Nothing
344 lookupGT :: Ord a => a -> Set a -> Maybe a
345 lookupGT = goNothing
346 where
347 STRICT_1_OF_2(goNothing)
348 goNothing _ Tip = Nothing
349 goNothing x (Bin _ y l r) | x < y = goJust x y l
350 | otherwise = goNothing x r
351
352 STRICT_1_OF_3(goJust)
353 goJust _ best Tip = Just best
354 goJust x best (Bin _ y l r) | x < y = goJust x y l
355 | otherwise = goJust x best r
356 #if __GLASGOW_HASKELL__ >= 700
357 {-# INLINABLE lookupGT #-}
358 #else
359 {-# INLINE lookupGT #-}
360 #endif
361
362 -- | /O(log n)/. Find largest element smaller or equal to the given one.
363 --
364 -- > lookupLE 2 (fromList [3, 5]) == Nothing
365 -- > lookupLE 4 (fromList [3, 5]) == Just 3
366 -- > lookupLE 5 (fromList [3, 5]) == Just 5
367 lookupLE :: Ord a => a -> Set a -> Maybe a
368 lookupLE = goNothing
369 where
370 STRICT_1_OF_2(goNothing)
371 goNothing _ Tip = Nothing
372 goNothing x (Bin _ y l r) = case compare x y of LT -> goNothing x l
373 EQ -> Just y
374 GT -> goJust x y r
375
376 STRICT_1_OF_3(goJust)
377 goJust _ best Tip = Just best
378 goJust x best (Bin _ y l r) = case compare x y of LT -> goJust x best l
379 EQ -> Just y
380 GT -> goJust x y r
381 #if __GLASGOW_HASKELL__ >= 700
382 {-# INLINABLE lookupLE #-}
383 #else
384 {-# INLINE lookupLE #-}
385 #endif
386
387 -- | /O(log n)/. Find smallest element greater or equal to the given one.
388 --
389 -- > lookupGE 3 (fromList [3, 5]) == Just 3
390 -- > lookupGE 4 (fromList [3, 5]) == Just 5
391 -- > lookupGE 6 (fromList [3, 5]) == Nothing
392 lookupGE :: Ord a => a -> Set a -> Maybe a
393 lookupGE = goNothing
394 where
395 STRICT_1_OF_2(goNothing)
396 goNothing _ Tip = Nothing
397 goNothing x (Bin _ y l r) = case compare x y of LT -> goJust x y l
398 EQ -> Just y
399 GT -> goNothing x r
400
401 STRICT_1_OF_3(goJust)
402 goJust _ best Tip = Just best
403 goJust x best (Bin _ y l r) = case compare x y of LT -> goJust x y l
404 EQ -> Just y
405 GT -> goJust x best r
406 #if __GLASGOW_HASKELL__ >= 700
407 {-# INLINABLE lookupGE #-}
408 #else
409 {-# INLINE lookupGE #-}
410 #endif
411
412 {--------------------------------------------------------------------
413 Construction
414 --------------------------------------------------------------------}
415 -- | /O(1)/. The empty set.
416 empty :: Set a
417 empty = Tip
418 {-# INLINE empty #-}
419
420 -- | /O(1)/. Create a singleton set.
421 singleton :: a -> Set a
422 singleton x = Bin 1 x Tip Tip
423 {-# INLINE singleton #-}
424
425 {--------------------------------------------------------------------
426 Insertion, Deletion
427 --------------------------------------------------------------------}
428 -- | /O(log n)/. Insert an element in a set.
429 -- If the set already contains an element equal to the given value,
430 -- it is replaced with the new value.
431
432 -- See Note: Type of local 'go' function
433 insert :: Ord a => a -> Set a -> Set a
434 insert = go
435 where
436 go :: Ord a => a -> Set a -> Set a
437 STRICT_1_OF_2(go)
438 go x Tip = singleton x
439 go x (Bin sz y l r) = case compare x y of
440 LT -> balanceL y (go x l) r
441 GT -> balanceR y l (go x r)
442 EQ -> Bin sz x l r
443 #if __GLASGOW_HASKELL__ >= 700
444 {-# INLINABLE insert #-}
445 #else
446 {-# INLINE insert #-}
447 #endif
448
449 -- Insert an element to the set only if it is not in the set.
450 -- Used by `union`.
451
452 -- See Note: Type of local 'go' function
453 insertR :: Ord a => a -> Set a -> Set a
454 insertR = go
455 where
456 go :: Ord a => a -> Set a -> Set a
457 STRICT_1_OF_2(go)
458 go x Tip = singleton x
459 go x t@(Bin _ y l r) = case compare x y of
460 LT -> balanceL y (go x l) r
461 GT -> balanceR y l (go x r)
462 EQ -> t
463 #if __GLASGOW_HASKELL__ >= 700
464 {-# INLINABLE insertR #-}
465 #else
466 {-# INLINE insertR #-}
467 #endif
468
469 -- | /O(log n)/. Delete an element from a set.
470
471 -- See Note: Type of local 'go' function
472 delete :: Ord a => a -> Set a -> Set a
473 delete = go
474 where
475 go :: Ord a => a -> Set a -> Set a
476 STRICT_1_OF_2(go)
477 go _ Tip = Tip
478 go x (Bin _ y l r) = case compare x y of
479 LT -> balanceR y (go x l) r
480 GT -> balanceL y l (go x r)
481 EQ -> glue l r
482 #if __GLASGOW_HASKELL__ >= 700
483 {-# INLINABLE delete #-}
484 #else
485 {-# INLINE delete #-}
486 #endif
487
488 {--------------------------------------------------------------------
489 Subset
490 --------------------------------------------------------------------}
491 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
492 isProperSubsetOf :: Ord a => Set a -> Set a -> Bool
493 isProperSubsetOf s1 s2
494 = (size s1 < size s2) && (isSubsetOf s1 s2)
495 #if __GLASGOW_HASKELL__ >= 700
496 {-# INLINABLE isProperSubsetOf #-}
497 #endif
498
499
500 -- | /O(n+m)/. Is this a subset?
501 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
502 isSubsetOf :: Ord a => Set a -> Set a -> Bool
503 isSubsetOf t1 t2
504 = (size t1 <= size t2) && (isSubsetOfX t1 t2)
505 #if __GLASGOW_HASKELL__ >= 700
506 {-# INLINABLE isSubsetOf #-}
507 #endif
508
509 isSubsetOfX :: Ord a => Set a -> Set a -> Bool
510 isSubsetOfX Tip _ = True
511 isSubsetOfX _ Tip = False
512 isSubsetOfX (Bin _ x l r) t
513 = found && isSubsetOfX l lt && isSubsetOfX r gt
514 where
515 (lt,found,gt) = splitMember x t
516 #if __GLASGOW_HASKELL__ >= 700
517 {-# INLINABLE isSubsetOfX #-}
518 #endif
519
520
521 {--------------------------------------------------------------------
522 Minimal, Maximal
523 --------------------------------------------------------------------}
524 -- | /O(log n)/. The minimal element of a set.
525 findMin :: Set a -> a
526 findMin (Bin _ x Tip _) = x
527 findMin (Bin _ _ l _) = findMin l
528 findMin Tip = error "Set.findMin: empty set has no minimal element"
529
530 -- | /O(log n)/. The maximal element of a set.
531 findMax :: Set a -> a
532 findMax (Bin _ x _ Tip) = x
533 findMax (Bin _ _ _ r) = findMax r
534 findMax Tip = error "Set.findMax: empty set has no maximal element"
535
536 -- | /O(log n)/. Delete the minimal element. Returns an empty set if the set is empty.
537 deleteMin :: Set a -> Set a
538 deleteMin (Bin _ _ Tip r) = r
539 deleteMin (Bin _ x l r) = balanceR x (deleteMin l) r
540 deleteMin Tip = Tip
541
542 -- | /O(log n)/. Delete the maximal element. Returns an empty set if the set is empty.
543 deleteMax :: Set a -> Set a
544 deleteMax (Bin _ _ l Tip) = l
545 deleteMax (Bin _ x l r) = balanceL x l (deleteMax r)
546 deleteMax Tip = Tip
547
548 {--------------------------------------------------------------------
549 Union.
550 --------------------------------------------------------------------}
551 -- | The union of a list of sets: (@'unions' == 'foldl' 'union' 'empty'@).
552 unions :: Ord a => [Set a] -> Set a
553 unions = foldlStrict union empty
554 #if __GLASGOW_HASKELL__ >= 700
555 {-# INLINABLE unions #-}
556 #endif
557
558 -- | /O(n+m)/. The union of two sets, preferring the first set when
559 -- equal elements are encountered.
560 -- The implementation uses the efficient /hedge-union/ algorithm.
561 union :: Ord a => Set a -> Set a -> Set a
562 union Tip t2 = t2
563 union t1 Tip = t1
564 union t1 t2 = hedgeUnion NothingS NothingS t1 t2
565 #if __GLASGOW_HASKELL__ >= 700
566 {-# INLINABLE union #-}
567 #endif
568
569 hedgeUnion :: Ord a => MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
570 hedgeUnion _ _ t1 Tip = t1
571 hedgeUnion blo bhi Tip (Bin _ x l r) = link x (filterGt blo l) (filterLt bhi r)
572 hedgeUnion _ _ t1 (Bin _ x Tip Tip) = insertR x t1 -- According to benchmarks, this special case increases
573 -- performance up to 30%. It does not help in difference or intersection.
574 hedgeUnion blo bhi (Bin _ x l r) t2 = link x (hedgeUnion blo bmi l (trim blo bmi t2))
575 (hedgeUnion bmi bhi r (trim bmi bhi t2))
576 where bmi = JustS x
577 #if __GLASGOW_HASKELL__ >= 700
578 {-# INLINABLE hedgeUnion #-}
579 #endif
580
581 {--------------------------------------------------------------------
582 Difference
583 --------------------------------------------------------------------}
584 -- | /O(n+m)/. Difference of two sets.
585 -- The implementation uses an efficient /hedge/ algorithm comparable with /hedge-union/.
586 difference :: Ord a => Set a -> Set a -> Set a
587 difference Tip _ = Tip
588 difference t1 Tip = t1
589 difference t1 t2 = hedgeDiff NothingS NothingS t1 t2
590 #if __GLASGOW_HASKELL__ >= 700
591 {-# INLINABLE difference #-}
592 #endif
593
594 hedgeDiff :: Ord a => MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
595 hedgeDiff _ _ Tip _ = Tip
596 hedgeDiff blo bhi (Bin _ x l r) Tip = link x (filterGt blo l) (filterLt bhi r)
597 hedgeDiff blo bhi t (Bin _ x l r) = merge (hedgeDiff blo bmi (trim blo bmi t) l)
598 (hedgeDiff bmi bhi (trim bmi bhi t) r)
599 where bmi = JustS x
600 #if __GLASGOW_HASKELL__ >= 700
601 {-# INLINABLE hedgeDiff #-}
602 #endif
603
604 {--------------------------------------------------------------------
605 Intersection
606 --------------------------------------------------------------------}
607 -- | /O(n+m)/. The intersection of two sets. The implementation uses an
608 -- efficient /hedge/ algorithm comparable with /hedge-union/. Elements of the
609 -- result come from the first set, so for example
610 --
611 -- > import qualified Data.Set as S
612 -- > data AB = A | B deriving Show
613 -- > instance Ord AB where compare _ _ = EQ
614 -- > instance Eq AB where _ == _ = True
615 -- > main = print (S.singleton A `S.intersection` S.singleton B,
616 -- > S.singleton B `S.intersection` S.singleton A)
617 --
618 -- prints @(fromList [A],fromList [B])@.
619 intersection :: Ord a => Set a -> Set a -> Set a
620 intersection Tip _ = Tip
621 intersection _ Tip = Tip
622 intersection t1 t2 = hedgeInt NothingS NothingS t1 t2
623 #if __GLASGOW_HASKELL__ >= 700
624 {-# INLINABLE intersection #-}
625 #endif
626
627 hedgeInt :: Ord a => MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
628 hedgeInt _ _ _ Tip = Tip
629 hedgeInt _ _ Tip _ = Tip
630 hedgeInt blo bhi (Bin _ x l r) t2 = let l' = hedgeInt blo bmi l (trim blo bmi t2)
631 r' = hedgeInt bmi bhi r (trim bmi bhi t2)
632 in if x `member` t2 then link x l' r' else merge l' r'
633 where bmi = JustS x
634 #if __GLASGOW_HASKELL__ >= 700
635 {-# INLINABLE hedgeInt #-}
636 #endif
637
638 {--------------------------------------------------------------------
639 Filter and partition
640 --------------------------------------------------------------------}
641 -- | /O(n)/. Filter all elements that satisfy the predicate.
642 filter :: (a -> Bool) -> Set a -> Set a
643 filter _ Tip = Tip
644 filter p (Bin _ x l r)
645 | p x = link x (filter p l) (filter p r)
646 | otherwise = merge (filter p l) (filter p r)
647
648 -- | /O(n)/. Partition the set into two sets, one with all elements that satisfy
649 -- the predicate and one with all elements that don't satisfy the predicate.
650 -- See also 'split'.
651 partition :: (a -> Bool) -> Set a -> (Set a,Set a)
652 partition p0 t0 = toPair $ go p0 t0
653 where
654 go _ Tip = (Tip :*: Tip)
655 go p (Bin _ x l r) = case (go p l, go p r) of
656 ((l1 :*: l2), (r1 :*: r2))
657 | p x -> link x l1 r1 :*: merge l2 r2
658 | otherwise -> merge l1 r1 :*: link x l2 r2
659
660 {----------------------------------------------------------------------
661 Map
662 ----------------------------------------------------------------------}
663
664 -- | /O(n*log n)/.
665 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
666 --
667 -- It's worth noting that the size of the result may be smaller if,
668 -- for some @(x,y)@, @x \/= y && f x == f y@
669
670 map :: Ord b => (a->b) -> Set a -> Set b
671 map f = fromList . List.map f . toList
672 #if __GLASGOW_HASKELL__ >= 700
673 {-# INLINABLE map #-}
674 #endif
675
676 -- | /O(n)/. The
677 --
678 -- @'mapMonotonic' f s == 'map' f s@, but works only when @f@ is monotonic.
679 -- /The precondition is not checked./
680 -- Semi-formally, we have:
681 --
682 -- > and [x < y ==> f x < f y | x <- ls, y <- ls]
683 -- > ==> mapMonotonic f s == map f s
684 -- > where ls = toList s
685
686 mapMonotonic :: (a->b) -> Set a -> Set b
687 mapMonotonic _ Tip = Tip
688 mapMonotonic f (Bin sz x l r) = Bin sz (f x) (mapMonotonic f l) (mapMonotonic f r)
689
690 {--------------------------------------------------------------------
691 Fold
692 --------------------------------------------------------------------}
693 -- | /O(n)/. Fold the elements in the set using the given right-associative
694 -- binary operator. This function is an equivalent of 'foldr' and is present
695 -- for compatibility only.
696 --
697 -- /Please note that fold will be deprecated in the future and removed./
698 fold :: (a -> b -> b) -> b -> Set a -> b
699 fold = foldr
700 {-# INLINE fold #-}
701
702 -- | /O(n)/. Fold the elements in the set using the given right-associative
703 -- binary operator, such that @'foldr' f z == 'Prelude.foldr' f z . 'toAscList'@.
704 --
705 -- For example,
706 --
707 -- > toAscList set = foldr (:) [] set
708 foldr :: (a -> b -> b) -> b -> Set a -> b
709 foldr f z = go z
710 where
711 go z' Tip = z'
712 go z' (Bin _ x l r) = go (f x (go z' r)) l
713 {-# INLINE foldr #-}
714
715 -- | /O(n)/. A strict version of 'foldr'. Each application of the operator is
716 -- evaluated before using the result in the next application. This
717 -- function is strict in the starting value.
718 foldr' :: (a -> b -> b) -> b -> Set a -> b
719 foldr' f z = go z
720 where
721 STRICT_1_OF_2(go)
722 go z' Tip = z'
723 go z' (Bin _ x l r) = go (f x (go z' r)) l
724 {-# INLINE foldr' #-}
725
726 -- | /O(n)/. Fold the elements in the set using the given left-associative
727 -- binary operator, such that @'foldl' f z == 'Prelude.foldl' f z . 'toAscList'@.
728 --
729 -- For example,
730 --
731 -- > toDescList set = foldl (flip (:)) [] set
732 foldl :: (a -> b -> a) -> a -> Set b -> a
733 foldl f z = go z
734 where
735 go z' Tip = z'
736 go z' (Bin _ x l r) = go (f (go z' l) x) r
737 {-# INLINE foldl #-}
738
739 -- | /O(n)/. A strict version of 'foldl'. Each application of the operator is
740 -- evaluated before using the result in the next application. This
741 -- function is strict in the starting value.
742 foldl' :: (a -> b -> a) -> a -> Set b -> a
743 foldl' f z = go z
744 where
745 STRICT_1_OF_2(go)
746 go z' Tip = z'
747 go z' (Bin _ x l r) = go (f (go z' l) x) r
748 {-# INLINE foldl' #-}
749
750 {--------------------------------------------------------------------
751 List variations
752 --------------------------------------------------------------------}
753 -- | /O(n)/. An alias of 'toAscList'. The elements of a set in ascending order.
754 -- Subject to list fusion.
755 elems :: Set a -> [a]
756 elems = toAscList
757
758 {--------------------------------------------------------------------
759 Lists
760 --------------------------------------------------------------------}
761 -- | /O(n)/. Convert the set to a list of elements. Subject to list fusion.
762 toList :: Set a -> [a]
763 toList = toAscList
764
765 -- | /O(n)/. Convert the set to an ascending list of elements. Subject to list fusion.
766 toAscList :: Set a -> [a]
767 toAscList = foldr (:) []
768
769 -- | /O(n)/. Convert the set to a descending list of elements. Subject to list
770 -- fusion.
771 toDescList :: Set a -> [a]
772 toDescList = foldl (flip (:)) []
773
774 -- List fusion for the list generating functions.
775 #if __GLASGOW_HASKELL__
776 -- The foldrFB and foldlFB are foldr and foldl equivalents, used for list fusion.
777 -- They are important to convert unfused to{Asc,Desc}List back, see mapFB in prelude.
778 foldrFB :: (a -> b -> b) -> b -> Set a -> b
779 foldrFB = foldr
780 {-# INLINE[0] foldrFB #-}
781 foldlFB :: (a -> b -> a) -> a -> Set b -> a
782 foldlFB = foldl
783 {-# INLINE[0] foldlFB #-}
784
785 -- Inline elems and toList, so that we need to fuse only toAscList.
786 {-# INLINE elems #-}
787 {-# INLINE toList #-}
788
789 -- The fusion is enabled up to phase 2 included. If it does not succeed,
790 -- convert in phase 1 the expanded to{Asc,Desc}List calls back to
791 -- to{Asc,Desc}List. In phase 0, we inline fold{lr}FB (which were used in
792 -- a list fusion, otherwise it would go away in phase 1), and let compiler do
793 -- whatever it wants with to{Asc,Desc}List -- it was forbidden to inline it
794 -- before phase 0, otherwise the fusion rules would not fire at all.
795 {-# NOINLINE[0] toAscList #-}
796 {-# NOINLINE[0] toDescList #-}
797 {-# RULES "Set.toAscList" [~1] forall s . toAscList s = build (\c n -> foldrFB c n s) #-}
798 {-# RULES "Set.toAscListBack" [1] foldrFB (:) [] = toAscList #-}
799 {-# RULES "Set.toDescList" [~1] forall s . toDescList s = build (\c n -> foldlFB (\xs x -> c x xs) n s) #-}
800 {-# RULES "Set.toDescListBack" [1] foldlFB (\xs x -> x : xs) [] = toDescList #-}
801 #endif
802
803 -- | /O(n*log n)/. Create a set from a list of elements.
804 --
805 -- If the elemens are ordered, linear-time implementation is used,
806 -- with the performance equal to 'fromDistinctAscList'.
807
808 -- For some reason, when 'singleton' is used in fromList or in
809 -- create, it is not inlined, so we inline it manually.
810 fromList :: Ord a => [a] -> Set a
811 fromList [] = Tip
812 fromList [x] = Bin 1 x Tip Tip
813 fromList (x0 : xs0) | not_ordered x0 xs0 = fromList' (Bin 1 x0 Tip Tip) xs0
814 | otherwise = go (1::Int) (Bin 1 x0 Tip Tip) xs0
815 where
816 not_ordered _ [] = False
817 not_ordered x (y : _) = x >= y
818 {-# INLINE not_ordered #-}
819
820 fromList' t0 xs = foldlStrict ins t0 xs
821 where ins t x = insert x t
822
823 STRICT_1_OF_3(go)
824 go _ t [] = t
825 go _ t [x] = insertMax x t
826 go s l xs@(x : xss) | not_ordered x xss = fromList' l xs
827 | otherwise = case create s xss of
828 (r, ys, []) -> go (s `shiftL` 1) (link x l r) ys
829 (r, _, ys) -> fromList' (link x l r) ys
830
831 -- The create is returning a triple (tree, xs, ys). Both xs and ys
832 -- represent not yet processed elements and only one of them can be nonempty.
833 -- If ys is nonempty, the keys in ys are not ordered with respect to tree
834 -- and must be inserted using fromList'. Otherwise the keys have been
835 -- ordered so far.
836 STRICT_1_OF_2(create)
837 create _ [] = (Tip, [], [])
838 create s xs@(x : xss)
839 | s == 1 = if not_ordered x xss then (Bin 1 x Tip Tip, [], xss)
840 else (Bin 1 x Tip Tip, xss, [])
841 | otherwise = case create (s `shiftR` 1) xs of
842 res@(_, [], _) -> res
843 (l, [y], zs) -> (insertMax y l, [], zs)
844 (l, ys@(y:yss), _) | not_ordered y yss -> (l, [], ys)
845 | otherwise -> case create (s `shiftR` 1) yss of
846 (r, zs, ws) -> (link y l r, zs, ws)
847 #if __GLASGOW_HASKELL__ >= 700
848 {-# INLINABLE fromList #-}
849 #endif
850
851 {--------------------------------------------------------------------
852 Building trees from ascending/descending lists can be done in linear time.
853
854 Note that if [xs] is ascending that:
855 fromAscList xs == fromList xs
856 --------------------------------------------------------------------}
857 -- | /O(n)/. Build a set from an ascending list in linear time.
858 -- /The precondition (input list is ascending) is not checked./
859 fromAscList :: Eq a => [a] -> Set a
860 fromAscList xs
861 = fromDistinctAscList (combineEq xs)
862 where
863 -- [combineEq xs] combines equal elements with [const] in an ordered list [xs]
864 combineEq xs'
865 = case xs' of
866 [] -> []
867 [x] -> [x]
868 (x:xx) -> combineEq' x xx
869
870 combineEq' z [] = [z]
871 combineEq' z (x:xs')
872 | z==x = combineEq' z xs'
873 | otherwise = z:combineEq' x xs'
874 #if __GLASGOW_HASKELL__ >= 700
875 {-# INLINABLE fromAscList #-}
876 #endif
877
878
879 -- | /O(n)/. Build a set from an ascending list of distinct elements in linear time.
880 -- /The precondition (input list is strictly ascending) is not checked./
881
882 -- For some reason, when 'singleton' is used in fromDistinctAscList or in
883 -- create, it is not inlined, so we inline it manually.
884 fromDistinctAscList :: [a] -> Set a
885 fromDistinctAscList [] = Tip
886 fromDistinctAscList (x0 : xs0) = go (1::Int) (Bin 1 x0 Tip Tip) xs0
887 where
888 STRICT_1_OF_3(go)
889 go _ t [] = t
890 go s l (x : xs) = case create s xs of
891 (r, ys) -> go (s `shiftL` 1) (link x l r) ys
892
893 STRICT_1_OF_2(create)
894 create _ [] = (Tip, [])
895 create s xs@(x : xs')
896 | s == 1 = (Bin 1 x Tip Tip, xs')
897 | otherwise = case create (s `shiftR` 1) xs of
898 res@(_, []) -> res
899 (l, y:ys) -> case create (s `shiftR` 1) ys of
900 (r, zs) -> (link y l r, zs)
901
902 {--------------------------------------------------------------------
903 Eq converts the set to a list. In a lazy setting, this
904 actually seems one of the faster methods to compare two trees
905 and it is certainly the simplest :-)
906 --------------------------------------------------------------------}
907 instance Eq a => Eq (Set a) where
908 t1 == t2 = (size t1 == size t2) && (toAscList t1 == toAscList t2)
909
910 {--------------------------------------------------------------------
911 Ord
912 --------------------------------------------------------------------}
913
914 instance Ord a => Ord (Set a) where
915 compare s1 s2 = compare (toAscList s1) (toAscList s2)
916
917 {--------------------------------------------------------------------
918 Show
919 --------------------------------------------------------------------}
920 instance Show a => Show (Set a) where
921 showsPrec p xs = showParen (p > 10) $
922 showString "fromList " . shows (toList xs)
923
924 {--------------------------------------------------------------------
925 Read
926 --------------------------------------------------------------------}
927 instance (Read a, Ord a) => Read (Set a) where
928 #ifdef __GLASGOW_HASKELL__
929 readPrec = parens $ prec 10 $ do
930 Ident "fromList" <- lexP
931 xs <- readPrec
932 return (fromList xs)
933
934 readListPrec = readListPrecDefault
935 #else
936 readsPrec p = readParen (p > 10) $ \ r -> do
937 ("fromList",s) <- lex r
938 (xs,t) <- reads s
939 return (fromList xs,t)
940 #endif
941
942 {--------------------------------------------------------------------
943 Typeable/Data
944 --------------------------------------------------------------------}
945
946 #include "Typeable.h"
947 INSTANCE_TYPEABLE1(Set,setTc,"Set")
948
949 {--------------------------------------------------------------------
950 NFData
951 --------------------------------------------------------------------}
952
953 instance NFData a => NFData (Set a) where
954 rnf Tip = ()
955 rnf (Bin _ y l r) = rnf y `seq` rnf l `seq` rnf r
956
957 {--------------------------------------------------------------------
958 Utility functions that return sub-ranges of the original
959 tree. Some functions take a `Maybe value` as an argument to
960 allow comparisons against infinite values. These are called `blow`
961 (Nothing is -\infty) and `bhigh` (here Nothing is +\infty).
962 We use MaybeS value, which is a Maybe strict in the Just case.
963
964 [trim blow bhigh t] A tree that is either empty or where [x > blow]
965 and [x < bhigh] for the value [x] of the root.
966 [filterGt blow t] A tree where for all values [k]. [k > blow]
967 [filterLt bhigh t] A tree where for all values [k]. [k < bhigh]
968
969 [split k t] Returns two trees [l] and [r] where all values
970 in [l] are <[k] and all keys in [r] are >[k].
971 [splitMember k t] Just like [split] but also returns whether [k]
972 was found in the tree.
973 --------------------------------------------------------------------}
974
975 data MaybeS a = NothingS | JustS !a
976
977 {--------------------------------------------------------------------
978 [trim blo bhi t] trims away all subtrees that surely contain no
979 values between the range [blo] to [bhi]. The returned tree is either
980 empty or the key of the root is between @blo@ and @bhi@.
981 --------------------------------------------------------------------}
982 trim :: Ord a => MaybeS a -> MaybeS a -> Set a -> Set a
983 trim NothingS NothingS t = t
984 trim (JustS lx) NothingS t = greater lx t where greater lo (Bin _ x _ r) | x <= lo = greater lo r
985 greater _ t' = t'
986 trim NothingS (JustS hx) t = lesser hx t where lesser hi (Bin _ x l _) | x >= hi = lesser hi l
987 lesser _ t' = t'
988 trim (JustS lx) (JustS hx) t = middle lx hx t where middle lo hi (Bin _ x _ r) | x <= lo = middle lo hi r
989 middle lo hi (Bin _ x l _) | x >= hi = middle lo hi l
990 middle _ _ t' = t'
991 #if __GLASGOW_HASKELL__ >= 700
992 {-# INLINABLE trim #-}
993 #endif
994
995 {--------------------------------------------------------------------
996 [filterGt b t] filter all values >[b] from tree [t]
997 [filterLt b t] filter all values <[b] from tree [t]
998 --------------------------------------------------------------------}
999 filterGt :: Ord a => MaybeS a -> Set a -> Set a
1000 filterGt NothingS t = t
1001 filterGt (JustS b) t = filter' b t
1002 where filter' _ Tip = Tip
1003 filter' b' (Bin _ x l r) =
1004 case compare b' x of LT -> link x (filter' b' l) r
1005 EQ -> r
1006 GT -> filter' b' r
1007 #if __GLASGOW_HASKELL__ >= 700
1008 {-# INLINABLE filterGt #-}
1009 #endif
1010
1011 filterLt :: Ord a => MaybeS a -> Set a -> Set a
1012 filterLt NothingS t = t
1013 filterLt (JustS b) t = filter' b t
1014 where filter' _ Tip = Tip
1015 filter' b' (Bin _ x l r) =
1016 case compare x b' of LT -> link x l (filter' b' r)
1017 EQ -> l
1018 GT -> filter' b' l
1019 #if __GLASGOW_HASKELL__ >= 700
1020 {-# INLINABLE filterLt #-}
1021 #endif
1022
1023 {--------------------------------------------------------------------
1024 Split
1025 --------------------------------------------------------------------}
1026 -- | /O(log n)/. The expression (@'split' x set@) is a pair @(set1,set2)@
1027 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
1028 -- comprises the elements of @set@ greater than @x@.
1029 split :: Ord a => a -> Set a -> (Set a,Set a)
1030 split x0 t0 = toPair $ go x0 t0
1031 where
1032 go _ Tip = (Tip :*: Tip)
1033 go x (Bin _ y l r)
1034 = case compare x y of
1035 LT -> let (lt :*: gt) = go x l in (lt :*: link y gt r)
1036 GT -> let (lt :*: gt) = go x r in (link y l lt :*: gt)
1037 EQ -> (l :*: r)
1038 #if __GLASGOW_HASKELL__ >= 700
1039 {-# INLINABLE split #-}
1040 #endif
1041
1042 -- | /O(log n)/. Performs a 'split' but also returns whether the pivot
1043 -- element was found in the original set.
1044 splitMember :: Ord a => a -> Set a -> (Set a,Bool,Set a)
1045 splitMember _ Tip = (Tip, False, Tip)
1046 splitMember x (Bin _ y l r)
1047 = case compare x y of
1048 LT -> let (lt, found, gt) = splitMember x l
1049 gt' = link y gt r
1050 in gt' `seq` (lt, found, gt')
1051 GT -> let (lt, found, gt) = splitMember x r
1052 lt' = link y l lt
1053 in lt' `seq` (lt', found, gt)
1054 EQ -> (l, True, r)
1055 #if __GLASGOW_HASKELL__ >= 700
1056 {-# INLINABLE splitMember #-}
1057 #endif
1058
1059 {--------------------------------------------------------------------
1060 Indexing
1061 --------------------------------------------------------------------}
1062
1063 -- | /O(log n)/. Return the /index/ of an element, which is its zero-based
1064 -- index in the sorted sequence of elements. The index is a number from /0/ up
1065 -- to, but not including, the 'size' of the set. Calls 'error' when the element
1066 -- is not a 'member' of the set.
1067 --
1068 -- > findIndex 2 (fromList [5,3]) Error: element is not in the set
1069 -- > findIndex 3 (fromList [5,3]) == 0
1070 -- > findIndex 5 (fromList [5,3]) == 1
1071 -- > findIndex 6 (fromList [5,3]) Error: element is not in the set
1072
1073 -- See Note: Type of local 'go' function
1074 findIndex :: Ord a => a -> Set a -> Int
1075 findIndex = go 0
1076 where
1077 go :: Ord a => Int -> a -> Set a -> Int
1078 STRICT_1_OF_3(go)
1079 STRICT_2_OF_3(go)
1080 go _ _ Tip = error "Set.findIndex: element is not in the set"
1081 go idx x (Bin _ kx l r) = case compare x kx of
1082 LT -> go idx x l
1083 GT -> go (idx + size l + 1) x r
1084 EQ -> idx + size l
1085 #if __GLASGOW_HASKELL__ >= 700
1086 {-# INLINABLE findIndex #-}
1087 #endif
1088
1089 -- | /O(log n)/. Lookup the /index/ of an element, which is its zero-based index in
1090 -- the sorted sequence of elements. The index is a number from /0/ up to, but not
1091 -- including, the 'size' of the set.
1092 --
1093 -- > isJust (lookupIndex 2 (fromList [5,3])) == False
1094 -- > fromJust (lookupIndex 3 (fromList [5,3])) == 0
1095 -- > fromJust (lookupIndex 5 (fromList [5,3])) == 1
1096 -- > isJust (lookupIndex 6 (fromList [5,3])) == False
1097
1098 -- See Note: Type of local 'go' function
1099 lookupIndex :: Ord a => a -> Set a -> Maybe Int
1100 lookupIndex = go 0
1101 where
1102 go :: Ord a => Int -> a -> Set a -> Maybe Int
1103 STRICT_1_OF_3(go)
1104 STRICT_2_OF_3(go)
1105 go _ _ Tip = Nothing
1106 go idx x (Bin _ kx l r) = case compare x kx of
1107 LT -> go idx x l
1108 GT -> go (idx + size l + 1) x r
1109 EQ -> Just $! idx + size l
1110 #if __GLASGOW_HASKELL__ >= 700
1111 {-# INLINABLE lookupIndex #-}
1112 #endif
1113
1114 -- | /O(log n)/. Retrieve an element by its /index/, i.e. by its zero-based
1115 -- index in the sorted sequence of elements. If the /index/ is out of range (less
1116 -- than zero, greater or equal to 'size' of the set), 'error' is called.
1117 --
1118 -- > elemAt 0 (fromList [5,3]) == 3
1119 -- > elemAt 1 (fromList [5,3]) == 5
1120 -- > elemAt 2 (fromList [5,3]) Error: index out of range
1121
1122 elemAt :: Int -> Set a -> a
1123 STRICT_1_OF_2(elemAt)
1124 elemAt _ Tip = error "Set.elemAt: index out of range"
1125 elemAt i (Bin _ x l r)
1126 = case compare i sizeL of
1127 LT -> elemAt i l
1128 GT -> elemAt (i-sizeL-1) r
1129 EQ -> x
1130 where
1131 sizeL = size l
1132
1133 -- | /O(log n)/. Delete the element at /index/, i.e. by its zero-based index in
1134 -- the sorted sequence of elements. If the /index/ is out of range (less than zero,
1135 -- greater or equal to 'size' of the set), 'error' is called.
1136 --
1137 -- > deleteAt 0 (fromList [5,3]) == singleton 5
1138 -- > deleteAt 1 (fromList [5,3]) == singleton 3
1139 -- > deleteAt 2 (fromList [5,3]) Error: index out of range
1140 -- > deleteAt (-1) (fromList [5,3]) Error: index out of range
1141
1142 deleteAt :: Int -> Set a -> Set a
1143 deleteAt i t = i `seq`
1144 case t of
1145 Tip -> error "Set.deleteAt: index out of range"
1146 Bin _ x l r -> case compare i sizeL of
1147 LT -> balanceR x (deleteAt i l) r
1148 GT -> balanceL x l (deleteAt (i-sizeL-1) r)
1149 EQ -> glue l r
1150 where
1151 sizeL = size l
1152
1153
1154 {--------------------------------------------------------------------
1155 Utility functions that maintain the balance properties of the tree.
1156 All constructors assume that all values in [l] < [x] and all values
1157 in [r] > [x], and that [l] and [r] are valid trees.
1158
1159 In order of sophistication:
1160 [Bin sz x l r] The type constructor.
1161 [bin x l r] Maintains the correct size, assumes that both [l]
1162 and [r] are balanced with respect to each other.
1163 [balance x l r] Restores the balance and size.
1164 Assumes that the original tree was balanced and
1165 that [l] or [r] has changed by at most one element.
1166 [link x l r] Restores balance and size.
1167
1168 Furthermore, we can construct a new tree from two trees. Both operations
1169 assume that all values in [l] < all values in [r] and that [l] and [r]
1170 are valid:
1171 [glue l r] Glues [l] and [r] together. Assumes that [l] and
1172 [r] are already balanced with respect to each other.
1173 [merge l r] Merges two trees and restores balance.
1174
1175 Note: in contrast to Adam's paper, we use (<=) comparisons instead
1176 of (<) comparisons in [link], [merge] and [balance].
1177 Quickcheck (on [difference]) showed that this was necessary in order
1178 to maintain the invariants. It is quite unsatisfactory that I haven't
1179 been able to find out why this is actually the case! Fortunately, it
1180 doesn't hurt to be a bit more conservative.
1181 --------------------------------------------------------------------}
1182
1183 {--------------------------------------------------------------------
1184 Link
1185 --------------------------------------------------------------------}
1186 link :: a -> Set a -> Set a -> Set a
1187 link x Tip r = insertMin x r
1188 link x l Tip = insertMax x l
1189 link x l@(Bin sizeL y ly ry) r@(Bin sizeR z lz rz)
1190 | delta*sizeL < sizeR = balanceL z (link x l lz) rz
1191 | delta*sizeR < sizeL = balanceR y ly (link x ry r)
1192 | otherwise = bin x l r
1193
1194
1195 -- insertMin and insertMax don't perform potentially expensive comparisons.
1196 insertMax,insertMin :: a -> Set a -> Set a
1197 insertMax x t
1198 = case t of
1199 Tip -> singleton x
1200 Bin _ y l r
1201 -> balanceR y l (insertMax x r)
1202
1203 insertMin x t
1204 = case t of
1205 Tip -> singleton x
1206 Bin _ y l r
1207 -> balanceL y (insertMin x l) r
1208
1209 {--------------------------------------------------------------------
1210 [merge l r]: merges two trees.
1211 --------------------------------------------------------------------}
1212 merge :: Set a -> Set a -> Set a
1213 merge Tip r = r
1214 merge l Tip = l
1215 merge l@(Bin sizeL x lx rx) r@(Bin sizeR y ly ry)
1216 | delta*sizeL < sizeR = balanceL y (merge l ly) ry
1217 | delta*sizeR < sizeL = balanceR x lx (merge rx r)
1218 | otherwise = glue l r
1219
1220 {--------------------------------------------------------------------
1221 [glue l r]: glues two trees together.
1222 Assumes that [l] and [r] are already balanced with respect to each other.
1223 --------------------------------------------------------------------}
1224 glue :: Set a -> Set a -> Set a
1225 glue Tip r = r
1226 glue l Tip = l
1227 glue l r
1228 | size l > size r = let (m,l') = deleteFindMax l in balanceR m l' r
1229 | otherwise = let (m,r') = deleteFindMin r in balanceL m l r'
1230
1231 -- | /O(log n)/. Delete and find the minimal element.
1232 --
1233 -- > deleteFindMin set = (findMin set, deleteMin set)
1234
1235 deleteFindMin :: Set a -> (a,Set a)
1236 deleteFindMin t
1237 = case t of
1238 Bin _ x Tip r -> (x,r)
1239 Bin _ x l r -> let (xm,l') = deleteFindMin l in (xm,balanceR x l' r)
1240 Tip -> (error "Set.deleteFindMin: can not return the minimal element of an empty set", Tip)
1241
1242 -- | /O(log n)/. Delete and find the maximal element.
1243 --
1244 -- > deleteFindMax set = (findMax set, deleteMax set)
1245 deleteFindMax :: Set a -> (a,Set a)
1246 deleteFindMax t
1247 = case t of
1248 Bin _ x l Tip -> (x,l)
1249 Bin _ x l r -> let (xm,r') = deleteFindMax r in (xm,balanceL x l r')
1250 Tip -> (error "Set.deleteFindMax: can not return the maximal element of an empty set", Tip)
1251
1252 -- | /O(log n)/. Retrieves the minimal key of the set, and the set
1253 -- stripped of that element, or 'Nothing' if passed an empty set.
1254 minView :: Set a -> Maybe (a, Set a)
1255 minView Tip = Nothing
1256 minView x = Just (deleteFindMin x)
1257
1258 -- | /O(log n)/. Retrieves the maximal key of the set, and the set
1259 -- stripped of that element, or 'Nothing' if passed an empty set.
1260 maxView :: Set a -> Maybe (a, Set a)
1261 maxView Tip = Nothing
1262 maxView x = Just (deleteFindMax x)
1263
1264 {--------------------------------------------------------------------
1265 [balance x l r] balances two trees with value x.
1266 The sizes of the trees should balance after decreasing the
1267 size of one of them. (a rotation).
1268
1269 [delta] is the maximal relative difference between the sizes of
1270 two trees, it corresponds with the [w] in Adams' paper.
1271 [ratio] is the ratio between an outer and inner sibling of the
1272 heavier subtree in an unbalanced setting. It determines
1273 whether a double or single rotation should be performed
1274 to restore balance. It is correspondes with the inverse
1275 of $\alpha$ in Adam's article.
1276
1277 Note that according to the Adam's paper:
1278 - [delta] should be larger than 4.646 with a [ratio] of 2.
1279 - [delta] should be larger than 3.745 with a [ratio] of 1.534.
1280
1281 But the Adam's paper is errorneous:
1282 - it can be proved that for delta=2 and delta>=5 there does
1283 not exist any ratio that would work
1284 - delta=4.5 and ratio=2 does not work
1285
1286 That leaves two reasonable variants, delta=3 and delta=4,
1287 both with ratio=2.
1288
1289 - A lower [delta] leads to a more 'perfectly' balanced tree.
1290 - A higher [delta] performs less rebalancing.
1291
1292 In the benchmarks, delta=3 is faster on insert operations,
1293 and delta=4 has slightly better deletes. As the insert speedup
1294 is larger, we currently use delta=3.
1295
1296 --------------------------------------------------------------------}
1297 delta,ratio :: Int
1298 delta = 3
1299 ratio = 2
1300
1301 -- The balance function is equivalent to the following:
1302 --
1303 -- balance :: a -> Set a -> Set a -> Set a
1304 -- balance x l r
1305 -- | sizeL + sizeR <= 1 = Bin sizeX x l r
1306 -- | sizeR > delta*sizeL = rotateL x l r
1307 -- | sizeL > delta*sizeR = rotateR x l r
1308 -- | otherwise = Bin sizeX x l r
1309 -- where
1310 -- sizeL = size l
1311 -- sizeR = size r
1312 -- sizeX = sizeL + sizeR + 1
1313 --
1314 -- rotateL :: a -> Set a -> Set a -> Set a
1315 -- rotateL x l r@(Bin _ _ ly ry) | size ly < ratio*size ry = singleL x l r
1316 -- | otherwise = doubleL x l r
1317 -- rotateR :: a -> Set a -> Set a -> Set a
1318 -- rotateR x l@(Bin _ _ ly ry) r | size ry < ratio*size ly = singleR x l r
1319 -- | otherwise = doubleR x l r
1320 --
1321 -- singleL, singleR :: a -> Set a -> Set a -> Set a
1322 -- singleL x1 t1 (Bin _ x2 t2 t3) = bin x2 (bin x1 t1 t2) t3
1323 -- singleR x1 (Bin _ x2 t1 t2) t3 = bin x2 t1 (bin x1 t2 t3)
1324 --
1325 -- doubleL, doubleR :: a -> Set a -> Set a -> Set a
1326 -- doubleL x1 t1 (Bin _ x2 (Bin _ x3 t2 t3) t4) = bin x3 (bin x1 t1 t2) (bin x2 t3 t4)
1327 -- doubleR x1 (Bin _ x2 t1 (Bin _ x3 t2 t3)) t4 = bin x3 (bin x2 t1 t2) (bin x1 t3 t4)
1328 --
1329 -- It is only written in such a way that every node is pattern-matched only once.
1330 --
1331 -- Only balanceL and balanceR are needed at the moment, so balance is not here anymore.
1332 -- In case it is needed, it can be found in Data.Map.
1333
1334 -- Functions balanceL and balanceR are specialised versions of balance.
1335 -- balanceL only checks whether the left subtree is too big,
1336 -- balanceR only checks whether the right subtree is too big.
1337
1338 -- balanceL is called when left subtree might have been inserted to or when
1339 -- right subtree might have been deleted from.
1340 balanceL :: a -> Set a -> Set a -> Set a
1341 balanceL x l r = case r of
1342 Tip -> case l of
1343 Tip -> Bin 1 x Tip Tip
1344 (Bin _ _ Tip Tip) -> Bin 2 x l Tip
1345 (Bin _ lx Tip (Bin _ lrx _ _)) -> Bin 3 lrx (Bin 1 lx Tip Tip) (Bin 1 x Tip Tip)
1346 (Bin _ lx ll@(Bin _ _ _ _) Tip) -> Bin 3 lx ll (Bin 1 x Tip Tip)
1347 (Bin ls lx ll@(Bin lls _ _ _) lr@(Bin lrs lrx lrl lrr))
1348 | lrs < ratio*lls -> Bin (1+ls) lx ll (Bin (1+lrs) x lr Tip)
1349 | otherwise -> Bin (1+ls) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+size lrr) x lrr Tip)
1350
1351 (Bin rs _ _ _) -> case l of
1352 Tip -> Bin (1+rs) x Tip r
1353
1354 (Bin ls lx ll lr)
1355 | ls > delta*rs -> case (ll, lr) of
1356 (Bin lls _ _ _, Bin lrs lrx lrl lrr)
1357 | lrs < ratio*lls -> Bin (1+ls+rs) lx ll (Bin (1+rs+lrs) x lr r)
1358 | otherwise -> Bin (1+ls+rs) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+rs+size lrr) x lrr r)
1359 (_, _) -> error "Failure in Data.Map.balanceL"
1360 | otherwise -> Bin (1+ls+rs) x l r
1361 {-# NOINLINE balanceL #-}
1362
1363 -- balanceR is called when right subtree might have been inserted to or when
1364 -- left subtree might have been deleted from.
1365 balanceR :: a -> Set a -> Set a -> Set a
1366 balanceR x l r = case l of
1367 Tip -> case r of
1368 Tip -> Bin 1 x Tip Tip
1369 (Bin _ _ Tip Tip) -> Bin 2 x Tip r
1370 (Bin _ rx Tip rr@(Bin _ _ _ _)) -> Bin 3 rx (Bin 1 x Tip Tip) rr
1371 (Bin _ rx (Bin _ rlx _ _) Tip) -> Bin 3 rlx (Bin 1 x Tip Tip) (Bin 1 rx Tip Tip)
1372 (Bin rs rx rl@(Bin rls rlx rll rlr) rr@(Bin rrs _ _ _))
1373 | rls < ratio*rrs -> Bin (1+rs) rx (Bin (1+rls) x Tip rl) rr
1374 | otherwise -> Bin (1+rs) rlx (Bin (1+size rll) x Tip rll) (Bin (1+rrs+size rlr) rx rlr rr)
1375
1376 (Bin ls _ _ _) -> case r of
1377 Tip -> Bin (1+ls) x l Tip
1378
1379 (Bin rs rx rl rr)
1380 | rs > delta*ls -> case (rl, rr) of
1381 (Bin rls rlx rll rlr, Bin rrs _ _ _)
1382 | rls < ratio*rrs -> Bin (1+ls+rs) rx (Bin (1+ls+rls) x l rl) rr
1383 | otherwise -> Bin (1+ls+rs) rlx (Bin (1+ls+size rll) x l rll) (Bin (1+rrs+size rlr) rx rlr rr)
1384 (_, _) -> error "Failure in Data.Map.balanceR"
1385 | otherwise -> Bin (1+ls+rs) x l r
1386 {-# NOINLINE balanceR #-}
1387
1388 {--------------------------------------------------------------------
1389 The bin constructor maintains the size of the tree
1390 --------------------------------------------------------------------}
1391 bin :: a -> Set a -> Set a -> Set a
1392 bin x l r
1393 = Bin (size l + size r + 1) x l r
1394 {-# INLINE bin #-}
1395
1396
1397 {--------------------------------------------------------------------
1398 Utilities
1399 --------------------------------------------------------------------}
1400 foldlStrict :: (a -> b -> a) -> a -> [b] -> a
1401 foldlStrict f = go
1402 where
1403 go z [] = z
1404 go z (x:xs) = let z' = f z x in z' `seq` go z' xs
1405 {-# INLINE foldlStrict #-}
1406
1407 {--------------------------------------------------------------------
1408 Debugging
1409 --------------------------------------------------------------------}
1410 -- | /O(n)/. Show the tree that implements the set. The tree is shown
1411 -- in a compressed, hanging format.
1412 showTree :: Show a => Set a -> String
1413 showTree s
1414 = showTreeWith True False s
1415
1416
1417 {- | /O(n)/. The expression (@showTreeWith hang wide map@) shows
1418 the tree that implements the set. If @hang@ is
1419 @True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If
1420 @wide@ is 'True', an extra wide version is shown.
1421
1422 > Set> putStrLn $ showTreeWith True False $ fromDistinctAscList [1..5]
1423 > 4
1424 > +--2
1425 > | +--1
1426 > | +--3
1427 > +--5
1428 >
1429 > Set> putStrLn $ showTreeWith True True $ fromDistinctAscList [1..5]
1430 > 4
1431 > |
1432 > +--2
1433 > | |
1434 > | +--1
1435 > | |
1436 > | +--3
1437 > |
1438 > +--5
1439 >
1440 > Set> putStrLn $ showTreeWith False True $ fromDistinctAscList [1..5]
1441 > +--5
1442 > |
1443 > 4
1444 > |
1445 > | +--3
1446 > | |
1447 > +--2
1448 > |
1449 > +--1
1450
1451 -}
1452 showTreeWith :: Show a => Bool -> Bool -> Set a -> String
1453 showTreeWith hang wide t
1454 | hang = (showsTreeHang wide [] t) ""
1455 | otherwise = (showsTree wide [] [] t) ""
1456
1457 showsTree :: Show a => Bool -> [String] -> [String] -> Set a -> ShowS
1458 showsTree wide lbars rbars t
1459 = case t of
1460 Tip -> showsBars lbars . showString "|\n"
1461 Bin _ x Tip Tip
1462 -> showsBars lbars . shows x . showString "\n"
1463 Bin _ x l r
1464 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
1465 showWide wide rbars .
1466 showsBars lbars . shows x . showString "\n" .
1467 showWide wide lbars .
1468 showsTree wide (withEmpty lbars) (withBar lbars) l
1469
1470 showsTreeHang :: Show a => Bool -> [String] -> Set a -> ShowS
1471 showsTreeHang wide bars t
1472 = case t of
1473 Tip -> showsBars bars . showString "|\n"
1474 Bin _ x Tip Tip
1475 -> showsBars bars . shows x . showString "\n"
1476 Bin _ x l r
1477 -> showsBars bars . shows x . showString "\n" .
1478 showWide wide bars .
1479 showsTreeHang wide (withBar bars) l .
1480 showWide wide bars .
1481 showsTreeHang wide (withEmpty bars) r
1482
1483 showWide :: Bool -> [String] -> String -> String
1484 showWide wide bars
1485 | wide = showString (concat (reverse bars)) . showString "|\n"
1486 | otherwise = id
1487
1488 showsBars :: [String] -> ShowS
1489 showsBars bars
1490 = case bars of
1491 [] -> id
1492 _ -> showString (concat (reverse (tail bars))) . showString node
1493
1494 node :: String
1495 node = "+--"
1496
1497 withBar, withEmpty :: [String] -> [String]
1498 withBar bars = "| ":bars
1499 withEmpty bars = " ":bars
1500
1501 {--------------------------------------------------------------------
1502 Assertions
1503 --------------------------------------------------------------------}
1504 -- | /O(n)/. Test if the internal set structure is valid.
1505 valid :: Ord a => Set a -> Bool
1506 valid t
1507 = balanced t && ordered t && validsize t
1508
1509 ordered :: Ord a => Set a -> Bool
1510 ordered t
1511 = bounded (const True) (const True) t
1512 where
1513 bounded lo hi t'
1514 = case t' of
1515 Tip -> True
1516 Bin _ x l r -> (lo x) && (hi x) && bounded lo (<x) l && bounded (>x) hi r
1517
1518 balanced :: Set a -> Bool
1519 balanced t
1520 = case t of
1521 Tip -> True
1522 Bin _ _ l r -> (size l + size r <= 1 || (size l <= delta*size r && size r <= delta*size l)) &&
1523 balanced l && balanced r
1524
1525 validsize :: Set a -> Bool
1526 validsize t
1527 = (realsize t == Just (size t))
1528 where
1529 realsize t'
1530 = case t' of
1531 Tip -> Just 0
1532 Bin sz _ l r -> case (realsize l,realsize r) of
1533 (Just n,Just m) | n+m+1 == sz -> Just sz
1534 _ -> Nothing