ffcdfd0c2c19ee7d30a54a692fd2fc2e3a0a7638
[packages/containers.git] / Data / Set / Base.hs
1 {-# LANGUAGE CPP #-}
2 #if __GLASGOW_HASKELL__
3 {-# LANGUAGE DeriveDataTypeable, StandaloneDeriving #-}
4 #endif
5 #if !defined(TESTING) && __GLASGOW_HASKELL__ >= 703
6 {-# LANGUAGE Trustworthy #-}
7 #endif
8 #if __GLASGOW_HASKELL__ >= 708
9 {-# LANGUAGE RoleAnnotations #-}
10 {-# LANGUAGE TypeFamilies #-}
11 #endif
12 -----------------------------------------------------------------------------
13 -- |
14 -- Module : Data.Set.Base
15 -- Copyright : (c) Daan Leijen 2002
16 -- License : BSD-style
17 -- Maintainer : libraries@haskell.org
18 -- Stability : provisional
19 -- Portability : portable
20 --
21 -- An efficient implementation of sets.
22 --
23 -- These modules are intended to be imported qualified, to avoid name
24 -- clashes with Prelude functions, e.g.
25 --
26 -- > import Data.Set (Set)
27 -- > import qualified Data.Set as Set
28 --
29 -- The implementation of 'Set' is based on /size balanced/ binary trees (or
30 -- trees of /bounded balance/) as described by:
31 --
32 -- * Stephen Adams, \"/Efficient sets: a balancing act/\",
33 -- Journal of Functional Programming 3(4):553-562, October 1993,
34 -- <http://www.swiss.ai.mit.edu/~adams/BB/>.
35 --
36 -- * J. Nievergelt and E.M. Reingold,
37 -- \"/Binary search trees of bounded balance/\",
38 -- SIAM journal of computing 2(1), March 1973.
39 --
40 -- Note that the implementation is /left-biased/ -- the elements of a
41 -- first argument are always preferred to the second, for example in
42 -- 'union' or 'insert'. Of course, left-biasing can only be observed
43 -- when equality is an equivalence relation instead of structural
44 -- equality.
45 -----------------------------------------------------------------------------
46
47 -- [Note: Using INLINABLE]
48 -- ~~~~~~~~~~~~~~~~~~~~~~~
49 -- It is crucial to the performance that the functions specialize on the Ord
50 -- type when possible. GHC 7.0 and higher does this by itself when it sees th
51 -- unfolding of a function -- that is why all public functions are marked
52 -- INLINABLE (that exposes the unfolding).
53
54
55 -- [Note: Using INLINE]
56 -- ~~~~~~~~~~~~~~~~~~~~
57 -- For other compilers and GHC pre 7.0, we mark some of the functions INLINE.
58 -- We mark the functions that just navigate down the tree (lookup, insert,
59 -- delete and similar). That navigation code gets inlined and thus specialized
60 -- when possible. There is a price to pay -- code growth. The code INLINED is
61 -- therefore only the tree navigation, all the real work (rebalancing) is not
62 -- INLINED by using a NOINLINE.
63 --
64 -- All methods marked INLINE have to be nonrecursive -- a 'go' function doing
65 -- the real work is provided.
66
67
68 -- [Note: Type of local 'go' function]
69 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
70 -- If the local 'go' function uses an Ord class, it sometimes heap-allocates
71 -- the Ord dictionary when the 'go' function does not have explicit type.
72 -- In that case we give 'go' explicit type. But this slightly decrease
73 -- performance, as the resulting 'go' function can float out to top level.
74
75
76 -- [Note: Local 'go' functions and capturing]
77 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
78 -- As opposed to IntSet, when 'go' function captures an argument, increased
79 -- heap-allocation can occur: sometimes in a polymorphic function, the 'go'
80 -- floats out of its enclosing function and then it heap-allocates the
81 -- dictionary and the argument. Maybe it floats out too late and strictness
82 -- analyzer cannot see that these could be passed on stack.
83 --
84 -- For example, change 'member' so that its local 'go' function is not passing
85 -- argument x and then look at the resulting code for hedgeInt.
86
87
88 -- [Note: Order of constructors]
89 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
90 -- The order of constructors of Set matters when considering performance.
91 -- Currently in GHC 7.0, when type has 2 constructors, a forward conditional
92 -- jump is made when successfully matching second constructor. Successful match
93 -- of first constructor results in the forward jump not taken.
94 -- On GHC 7.0, reordering constructors from Tip | Bin to Bin | Tip
95 -- improves the benchmark by up to 10% on x86.
96
97 module Data.Set.Base (
98 -- * Set type
99 Set(..) -- instance Eq,Ord,Show,Read,Data,Typeable
100
101 -- * Operators
102 , (\\)
103
104 -- * Query
105 , null
106 , size
107 , member
108 , notMember
109 , lookupLT
110 , lookupGT
111 , lookupLE
112 , lookupGE
113 , isSubsetOf
114 , isProperSubsetOf
115
116 -- * Construction
117 , empty
118 , singleton
119 , insert
120 , delete
121
122 -- * Combine
123 , union
124 , unions
125 , difference
126 , intersection
127
128 -- * Filter
129 , filter
130 , partition
131 , split
132 , splitMember
133 , splitRoot
134
135 -- * Indexed
136 , lookupIndex
137 , findIndex
138 , elemAt
139 , deleteAt
140
141 -- * Map
142 , map
143 , mapMonotonic
144
145 -- * Folds
146 , foldr
147 , foldl
148 -- ** Strict folds
149 , foldr'
150 , foldl'
151 -- ** Legacy folds
152 , fold
153
154 -- * Min\/Max
155 , findMin
156 , findMax
157 , deleteMin
158 , deleteMax
159 , deleteFindMin
160 , deleteFindMax
161 , maxView
162 , minView
163
164 -- * Conversion
165
166 -- ** List
167 , elems
168 , toList
169 , fromList
170
171 -- ** Ordered list
172 , toAscList
173 , toDescList
174 , fromAscList
175 , fromDistinctAscList
176
177 -- * Debugging
178 , showTree
179 , showTreeWith
180 , valid
181
182 -- Internals (for testing)
183 , bin
184 , balanced
185 , link
186 , merge
187 ) where
188
189 import Prelude hiding (filter,foldl,foldr,null,map)
190 import qualified Data.List as List
191 import Data.Bits (shiftL, shiftR)
192 import Data.Monoid (Monoid(..))
193 import qualified Data.Foldable as Foldable
194 import Data.Typeable
195 import Control.DeepSeq (NFData(rnf))
196
197 import Data.StrictPair
198
199 #if __GLASGOW_HASKELL__
200 import GHC.Exts ( build )
201 #if __GLASGOW_HASKELL__ >= 708
202 import qualified GHC.Exts as GHCExts
203 #endif
204 import Text.Read
205 import Data.Data
206 #endif
207
208 -- Use macros to define strictness of functions.
209 -- STRICT_x_OF_y denotes an y-ary function strict in the x-th parameter.
210 -- We do not use BangPatterns, because they are not in any standard and we
211 -- want the compilers to be compiled by as many compilers as possible.
212 #define STRICT_1_OF_2(fn) fn arg _ | arg `seq` False = undefined
213 #define STRICT_1_OF_3(fn) fn arg _ _ | arg `seq` False = undefined
214 #define STRICT_2_OF_3(fn) fn _ arg _ | arg `seq` False = undefined
215
216 {--------------------------------------------------------------------
217 Operators
218 --------------------------------------------------------------------}
219 infixl 9 \\ --
220
221 -- | /O(n+m)/. See 'difference'.
222 (\\) :: Ord a => Set a -> Set a -> Set a
223 m1 \\ m2 = difference m1 m2
224 #if __GLASGOW_HASKELL__ >= 700
225 {-# INLINABLE (\\) #-}
226 #endif
227
228 {--------------------------------------------------------------------
229 Sets are size balanced trees
230 --------------------------------------------------------------------}
231 -- | A set of values @a@.
232
233 -- See Note: Order of constructors
234 data Set a = Bin {-# UNPACK #-} !Size !a !(Set a) !(Set a)
235 | Tip
236
237 type Size = Int
238
239 #if __GLASGOW_HASKELL__ >= 708
240 type role Set nominal
241 #endif
242
243 instance Ord a => Monoid (Set a) where
244 mempty = empty
245 mappend = union
246 mconcat = unions
247
248 instance Foldable.Foldable Set where
249 fold t = go t
250 where go Tip = mempty
251 go (Bin 1 k _ _) = k
252 go (Bin _ k l r) = go l `mappend` (k `mappend` go r)
253 {-# INLINABLE fold #-}
254 foldr = foldr
255 {-# INLINE foldr #-}
256 foldl = foldl
257 {-# INLINE foldl #-}
258 foldMap f t = go t
259 where go Tip = mempty
260 go (Bin 1 k _ _) = f k
261 go (Bin _ k l r) = go l `mappend` (f k `mappend` go r)
262 {-# INLINE foldMap #-}
263
264 #if __GLASGOW_HASKELL__
265
266 {--------------------------------------------------------------------
267 A Data instance
268 --------------------------------------------------------------------}
269
270 -- This instance preserves data abstraction at the cost of inefficiency.
271 -- We provide limited reflection services for the sake of data abstraction.
272
273 instance (Data a, Ord a) => Data (Set a) where
274 gfoldl f z set = z fromList `f` (toList set)
275 toConstr _ = fromListConstr
276 gunfold k z c = case constrIndex c of
277 1 -> k (z fromList)
278 _ -> error "gunfold"
279 dataTypeOf _ = setDataType
280 dataCast1 f = gcast1 f
281
282 fromListConstr :: Constr
283 fromListConstr = mkConstr setDataType "fromList" [] Prefix
284
285 setDataType :: DataType
286 setDataType = mkDataType "Data.Set.Base.Set" [fromListConstr]
287
288 #endif
289
290 {--------------------------------------------------------------------
291 Query
292 --------------------------------------------------------------------}
293 -- | /O(1)/. Is this the empty set?
294 null :: Set a -> Bool
295 null Tip = True
296 null (Bin {}) = False
297 {-# INLINE null #-}
298
299 -- | /O(1)/. The number of elements in the set.
300 size :: Set a -> Int
301 size Tip = 0
302 size (Bin sz _ _ _) = sz
303 {-# INLINE size #-}
304
305 -- | /O(log n)/. Is the element in the set?
306 member :: Ord a => a -> Set a -> Bool
307 member = go
308 where
309 STRICT_1_OF_2(go)
310 go _ Tip = False
311 go x (Bin _ y l r) = case compare x y of
312 LT -> go x l
313 GT -> go x r
314 EQ -> True
315 #if __GLASGOW_HASKELL__ >= 700
316 {-# INLINABLE member #-}
317 #else
318 {-# INLINE member #-}
319 #endif
320
321 -- | /O(log n)/. Is the element not in the set?
322 notMember :: Ord a => a -> Set a -> Bool
323 notMember a t = not $ member a t
324 #if __GLASGOW_HASKELL__ >= 700
325 {-# INLINABLE notMember #-}
326 #else
327 {-# INLINE notMember #-}
328 #endif
329
330 -- | /O(log n)/. Find largest element smaller than the given one.
331 --
332 -- > lookupLT 3 (fromList [3, 5]) == Nothing
333 -- > lookupLT 5 (fromList [3, 5]) == Just 3
334 lookupLT :: Ord a => a -> Set a -> Maybe a
335 lookupLT = goNothing
336 where
337 STRICT_1_OF_2(goNothing)
338 goNothing _ Tip = Nothing
339 goNothing x (Bin _ y l r) | x <= y = goNothing x l
340 | otherwise = goJust x y r
341
342 STRICT_1_OF_3(goJust)
343 goJust _ best Tip = Just best
344 goJust x best (Bin _ y l r) | x <= y = goJust x best l
345 | otherwise = goJust x y r
346 #if __GLASGOW_HASKELL__ >= 700
347 {-# INLINABLE lookupLT #-}
348 #else
349 {-# INLINE lookupLT #-}
350 #endif
351
352 -- | /O(log n)/. Find smallest element greater than the given one.
353 --
354 -- > lookupGT 4 (fromList [3, 5]) == Just 5
355 -- > lookupGT 5 (fromList [3, 5]) == Nothing
356 lookupGT :: Ord a => a -> Set a -> Maybe a
357 lookupGT = goNothing
358 where
359 STRICT_1_OF_2(goNothing)
360 goNothing _ Tip = Nothing
361 goNothing x (Bin _ y l r) | x < y = goJust x y l
362 | otherwise = goNothing x r
363
364 STRICT_1_OF_3(goJust)
365 goJust _ best Tip = Just best
366 goJust x best (Bin _ y l r) | x < y = goJust x y l
367 | otherwise = goJust x best r
368 #if __GLASGOW_HASKELL__ >= 700
369 {-# INLINABLE lookupGT #-}
370 #else
371 {-# INLINE lookupGT #-}
372 #endif
373
374 -- | /O(log n)/. Find largest element smaller or equal to the given one.
375 --
376 -- > lookupLE 2 (fromList [3, 5]) == Nothing
377 -- > lookupLE 4 (fromList [3, 5]) == Just 3
378 -- > lookupLE 5 (fromList [3, 5]) == Just 5
379 lookupLE :: Ord a => a -> Set a -> Maybe a
380 lookupLE = goNothing
381 where
382 STRICT_1_OF_2(goNothing)
383 goNothing _ Tip = Nothing
384 goNothing x (Bin _ y l r) = case compare x y of LT -> goNothing x l
385 EQ -> Just y
386 GT -> goJust x y r
387
388 STRICT_1_OF_3(goJust)
389 goJust _ best Tip = Just best
390 goJust x best (Bin _ y l r) = case compare x y of LT -> goJust x best l
391 EQ -> Just y
392 GT -> goJust x y r
393 #if __GLASGOW_HASKELL__ >= 700
394 {-# INLINABLE lookupLE #-}
395 #else
396 {-# INLINE lookupLE #-}
397 #endif
398
399 -- | /O(log n)/. Find smallest element greater or equal to the given one.
400 --
401 -- > lookupGE 3 (fromList [3, 5]) == Just 3
402 -- > lookupGE 4 (fromList [3, 5]) == Just 5
403 -- > lookupGE 6 (fromList [3, 5]) == Nothing
404 lookupGE :: Ord a => a -> Set a -> Maybe a
405 lookupGE = goNothing
406 where
407 STRICT_1_OF_2(goNothing)
408 goNothing _ Tip = Nothing
409 goNothing x (Bin _ y l r) = case compare x y of LT -> goJust x y l
410 EQ -> Just y
411 GT -> goNothing x r
412
413 STRICT_1_OF_3(goJust)
414 goJust _ best Tip = Just best
415 goJust x best (Bin _ y l r) = case compare x y of LT -> goJust x y l
416 EQ -> Just y
417 GT -> goJust x best r
418 #if __GLASGOW_HASKELL__ >= 700
419 {-# INLINABLE lookupGE #-}
420 #else
421 {-# INLINE lookupGE #-}
422 #endif
423
424 {--------------------------------------------------------------------
425 Construction
426 --------------------------------------------------------------------}
427 -- | /O(1)/. The empty set.
428 empty :: Set a
429 empty = Tip
430 {-# INLINE empty #-}
431
432 -- | /O(1)/. Create a singleton set.
433 singleton :: a -> Set a
434 singleton x = Bin 1 x Tip Tip
435 {-# INLINE singleton #-}
436
437 {--------------------------------------------------------------------
438 Insertion, Deletion
439 --------------------------------------------------------------------}
440 -- | /O(log n)/. Insert an element in a set.
441 -- If the set already contains an element equal to the given value,
442 -- it is replaced with the new value.
443
444 -- See Note: Type of local 'go' function
445 insert :: Ord a => a -> Set a -> Set a
446 insert = go
447 where
448 go :: Ord a => a -> Set a -> Set a
449 STRICT_1_OF_2(go)
450 go x Tip = singleton x
451 go x (Bin sz y l r) = case compare x y of
452 LT -> balanceL y (go x l) r
453 GT -> balanceR y l (go x r)
454 EQ -> Bin sz x l r
455 #if __GLASGOW_HASKELL__ >= 700
456 {-# INLINABLE insert #-}
457 #else
458 {-# INLINE insert #-}
459 #endif
460
461 -- Insert an element to the set only if it is not in the set.
462 -- Used by `union`.
463
464 -- See Note: Type of local 'go' function
465 insertR :: Ord a => a -> Set a -> Set a
466 insertR = go
467 where
468 go :: Ord a => a -> Set a -> Set a
469 STRICT_1_OF_2(go)
470 go x Tip = singleton x
471 go x t@(Bin _ y l r) = case compare x y of
472 LT -> balanceL y (go x l) r
473 GT -> balanceR y l (go x r)
474 EQ -> t
475 #if __GLASGOW_HASKELL__ >= 700
476 {-# INLINABLE insertR #-}
477 #else
478 {-# INLINE insertR #-}
479 #endif
480
481 -- | /O(log n)/. Delete an element from a set.
482
483 -- See Note: Type of local 'go' function
484 delete :: Ord a => a -> Set a -> Set a
485 delete = go
486 where
487 go :: Ord a => a -> Set a -> Set a
488 STRICT_1_OF_2(go)
489 go _ Tip = Tip
490 go x (Bin _ y l r) = case compare x y of
491 LT -> balanceR y (go x l) r
492 GT -> balanceL y l (go x r)
493 EQ -> glue l r
494 #if __GLASGOW_HASKELL__ >= 700
495 {-# INLINABLE delete #-}
496 #else
497 {-# INLINE delete #-}
498 #endif
499
500 {--------------------------------------------------------------------
501 Subset
502 --------------------------------------------------------------------}
503 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
504 isProperSubsetOf :: Ord a => Set a -> Set a -> Bool
505 isProperSubsetOf s1 s2
506 = (size s1 < size s2) && (isSubsetOf s1 s2)
507 #if __GLASGOW_HASKELL__ >= 700
508 {-# INLINABLE isProperSubsetOf #-}
509 #endif
510
511
512 -- | /O(n+m)/. Is this a subset?
513 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
514 isSubsetOf :: Ord a => Set a -> Set a -> Bool
515 isSubsetOf t1 t2
516 = (size t1 <= size t2) && (isSubsetOfX t1 t2)
517 #if __GLASGOW_HASKELL__ >= 700
518 {-# INLINABLE isSubsetOf #-}
519 #endif
520
521 isSubsetOfX :: Ord a => Set a -> Set a -> Bool
522 isSubsetOfX Tip _ = True
523 isSubsetOfX _ Tip = False
524 isSubsetOfX (Bin _ x l r) t
525 = found && isSubsetOfX l lt && isSubsetOfX r gt
526 where
527 (lt,found,gt) = splitMember x t
528 #if __GLASGOW_HASKELL__ >= 700
529 {-# INLINABLE isSubsetOfX #-}
530 #endif
531
532
533 {--------------------------------------------------------------------
534 Minimal, Maximal
535 --------------------------------------------------------------------}
536 -- | /O(log n)/. The minimal element of a set.
537 findMin :: Set a -> a
538 findMin (Bin _ x Tip _) = x
539 findMin (Bin _ _ l _) = findMin l
540 findMin Tip = error "Set.findMin: empty set has no minimal element"
541
542 -- | /O(log n)/. The maximal element of a set.
543 findMax :: Set a -> a
544 findMax (Bin _ x _ Tip) = x
545 findMax (Bin _ _ _ r) = findMax r
546 findMax Tip = error "Set.findMax: empty set has no maximal element"
547
548 -- | /O(log n)/. Delete the minimal element. Returns an empty set if the set is empty.
549 deleteMin :: Set a -> Set a
550 deleteMin (Bin _ _ Tip r) = r
551 deleteMin (Bin _ x l r) = balanceR x (deleteMin l) r
552 deleteMin Tip = Tip
553
554 -- | /O(log n)/. Delete the maximal element. Returns an empty set if the set is empty.
555 deleteMax :: Set a -> Set a
556 deleteMax (Bin _ _ l Tip) = l
557 deleteMax (Bin _ x l r) = balanceL x l (deleteMax r)
558 deleteMax Tip = Tip
559
560 {--------------------------------------------------------------------
561 Union.
562 --------------------------------------------------------------------}
563 -- | The union of a list of sets: (@'unions' == 'foldl' 'union' 'empty'@).
564 unions :: Ord a => [Set a] -> Set a
565 unions = foldlStrict union empty
566 #if __GLASGOW_HASKELL__ >= 700
567 {-# INLINABLE unions #-}
568 #endif
569
570 -- | /O(n+m)/. The union of two sets, preferring the first set when
571 -- equal elements are encountered.
572 -- The implementation uses the efficient /hedge-union/ algorithm.
573 union :: Ord a => Set a -> Set a -> Set a
574 union Tip t2 = t2
575 union t1 Tip = t1
576 union t1 t2 = hedgeUnion NothingS NothingS t1 t2
577 #if __GLASGOW_HASKELL__ >= 700
578 {-# INLINABLE union #-}
579 #endif
580
581 hedgeUnion :: Ord a => MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
582 hedgeUnion _ _ t1 Tip = t1
583 hedgeUnion blo bhi Tip (Bin _ x l r) = link x (filterGt blo l) (filterLt bhi r)
584 hedgeUnion _ _ t1 (Bin _ x Tip Tip) = insertR x t1 -- According to benchmarks, this special case increases
585 -- performance up to 30%. It does not help in difference or intersection.
586 hedgeUnion blo bhi (Bin _ x l r) t2 = link x (hedgeUnion blo bmi l (trim blo bmi t2))
587 (hedgeUnion bmi bhi r (trim bmi bhi t2))
588 where bmi = JustS x
589 #if __GLASGOW_HASKELL__ >= 700
590 {-# INLINABLE hedgeUnion #-}
591 #endif
592
593 {--------------------------------------------------------------------
594 Difference
595 --------------------------------------------------------------------}
596 -- | /O(n+m)/. Difference of two sets.
597 -- The implementation uses an efficient /hedge/ algorithm comparable with /hedge-union/.
598 difference :: Ord a => Set a -> Set a -> Set a
599 difference Tip _ = Tip
600 difference t1 Tip = t1
601 difference t1 t2 = hedgeDiff NothingS NothingS t1 t2
602 #if __GLASGOW_HASKELL__ >= 700
603 {-# INLINABLE difference #-}
604 #endif
605
606 hedgeDiff :: Ord a => MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
607 hedgeDiff _ _ Tip _ = Tip
608 hedgeDiff blo bhi (Bin _ x l r) Tip = link x (filterGt blo l) (filterLt bhi r)
609 hedgeDiff blo bhi t (Bin _ x l r) = merge (hedgeDiff blo bmi (trim blo bmi t) l)
610 (hedgeDiff bmi bhi (trim bmi bhi t) r)
611 where bmi = JustS x
612 #if __GLASGOW_HASKELL__ >= 700
613 {-# INLINABLE hedgeDiff #-}
614 #endif
615
616 {--------------------------------------------------------------------
617 Intersection
618 --------------------------------------------------------------------}
619 -- | /O(n+m)/. The intersection of two sets. The implementation uses an
620 -- efficient /hedge/ algorithm comparable with /hedge-union/. Elements of the
621 -- result come from the first set, so for example
622 --
623 -- > import qualified Data.Set as S
624 -- > data AB = A | B deriving Show
625 -- > instance Ord AB where compare _ _ = EQ
626 -- > instance Eq AB where _ == _ = True
627 -- > main = print (S.singleton A `S.intersection` S.singleton B,
628 -- > S.singleton B `S.intersection` S.singleton A)
629 --
630 -- prints @(fromList [A],fromList [B])@.
631 intersection :: Ord a => Set a -> Set a -> Set a
632 intersection Tip _ = Tip
633 intersection _ Tip = Tip
634 intersection t1 t2 = hedgeInt NothingS NothingS t1 t2
635 #if __GLASGOW_HASKELL__ >= 700
636 {-# INLINABLE intersection #-}
637 #endif
638
639 hedgeInt :: Ord a => MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
640 hedgeInt _ _ _ Tip = Tip
641 hedgeInt _ _ Tip _ = Tip
642 hedgeInt blo bhi (Bin _ x l r) t2 = let l' = hedgeInt blo bmi l (trim blo bmi t2)
643 r' = hedgeInt bmi bhi r (trim bmi bhi t2)
644 in if x `member` t2 then link x l' r' else merge l' r'
645 where bmi = JustS x
646 #if __GLASGOW_HASKELL__ >= 700
647 {-# INLINABLE hedgeInt #-}
648 #endif
649
650 {--------------------------------------------------------------------
651 Filter and partition
652 --------------------------------------------------------------------}
653 -- | /O(n)/. Filter all elements that satisfy the predicate.
654 filter :: (a -> Bool) -> Set a -> Set a
655 filter _ Tip = Tip
656 filter p (Bin _ x l r)
657 | p x = link x (filter p l) (filter p r)
658 | otherwise = merge (filter p l) (filter p r)
659
660 -- | /O(n)/. Partition the set into two sets, one with all elements that satisfy
661 -- the predicate and one with all elements that don't satisfy the predicate.
662 -- See also 'split'.
663 partition :: (a -> Bool) -> Set a -> (Set a,Set a)
664 partition p0 t0 = toPair $ go p0 t0
665 where
666 go _ Tip = (Tip :*: Tip)
667 go p (Bin _ x l r) = case (go p l, go p r) of
668 ((l1 :*: l2), (r1 :*: r2))
669 | p x -> link x l1 r1 :*: merge l2 r2
670 | otherwise -> merge l1 r1 :*: link x l2 r2
671
672 {----------------------------------------------------------------------
673 Map
674 ----------------------------------------------------------------------}
675
676 -- | /O(n*log n)/.
677 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
678 --
679 -- It's worth noting that the size of the result may be smaller if,
680 -- for some @(x,y)@, @x \/= y && f x == f y@
681
682 map :: Ord b => (a->b) -> Set a -> Set b
683 map f = fromList . List.map f . toList
684 #if __GLASGOW_HASKELL__ >= 700
685 {-# INLINABLE map #-}
686 #endif
687
688 -- | /O(n)/. The
689 --
690 -- @'mapMonotonic' f s == 'map' f s@, but works only when @f@ is monotonic.
691 -- /The precondition is not checked./
692 -- Semi-formally, we have:
693 --
694 -- > and [x < y ==> f x < f y | x <- ls, y <- ls]
695 -- > ==> mapMonotonic f s == map f s
696 -- > where ls = toList s
697
698 mapMonotonic :: (a->b) -> Set a -> Set b
699 mapMonotonic _ Tip = Tip
700 mapMonotonic f (Bin sz x l r) = Bin sz (f x) (mapMonotonic f l) (mapMonotonic f r)
701
702 {--------------------------------------------------------------------
703 Fold
704 --------------------------------------------------------------------}
705 -- | /O(n)/. Fold the elements in the set using the given right-associative
706 -- binary operator. This function is an equivalent of 'foldr' and is present
707 -- for compatibility only.
708 --
709 -- /Please note that fold will be deprecated in the future and removed./
710 fold :: (a -> b -> b) -> b -> Set a -> b
711 fold = foldr
712 {-# INLINE fold #-}
713
714 -- | /O(n)/. Fold the elements in the set using the given right-associative
715 -- binary operator, such that @'foldr' f z == 'Prelude.foldr' f z . 'toAscList'@.
716 --
717 -- For example,
718 --
719 -- > toAscList set = foldr (:) [] set
720 foldr :: (a -> b -> b) -> b -> Set a -> b
721 foldr f z = go z
722 where
723 go z' Tip = z'
724 go z' (Bin _ x l r) = go (f x (go z' r)) l
725 {-# INLINE foldr #-}
726
727 -- | /O(n)/. A strict version of 'foldr'. Each application of the operator is
728 -- evaluated before using the result in the next application. This
729 -- function is strict in the starting value.
730 foldr' :: (a -> b -> b) -> b -> Set a -> b
731 foldr' f z = go z
732 where
733 STRICT_1_OF_2(go)
734 go z' Tip = z'
735 go z' (Bin _ x l r) = go (f x (go z' r)) l
736 {-# INLINE foldr' #-}
737
738 -- | /O(n)/. Fold the elements in the set using the given left-associative
739 -- binary operator, such that @'foldl' f z == 'Prelude.foldl' f z . 'toAscList'@.
740 --
741 -- For example,
742 --
743 -- > toDescList set = foldl (flip (:)) [] set
744 foldl :: (a -> b -> a) -> a -> Set b -> a
745 foldl f z = go z
746 where
747 go z' Tip = z'
748 go z' (Bin _ x l r) = go (f (go z' l) x) r
749 {-# INLINE foldl #-}
750
751 -- | /O(n)/. A strict version of 'foldl'. Each application of the operator is
752 -- evaluated before using the result in the next application. This
753 -- function is strict in the starting value.
754 foldl' :: (a -> b -> a) -> a -> Set b -> a
755 foldl' f z = go z
756 where
757 STRICT_1_OF_2(go)
758 go z' Tip = z'
759 go z' (Bin _ x l r) = go (f (go z' l) x) r
760 {-# INLINE foldl' #-}
761
762 {--------------------------------------------------------------------
763 List variations
764 --------------------------------------------------------------------}
765 -- | /O(n)/. An alias of 'toAscList'. The elements of a set in ascending order.
766 -- Subject to list fusion.
767 elems :: Set a -> [a]
768 elems = toAscList
769
770 {--------------------------------------------------------------------
771 Lists
772 --------------------------------------------------------------------}
773 #if __GLASGOW_HASKELL__ >= 708
774 instance (Ord a) => GHCExts.IsList (Set a) where
775 type Item (Set a) = a
776 fromList = fromList
777 toList = toList
778 #endif
779
780 -- | /O(n)/. Convert the set to a list of elements. Subject to list fusion.
781 toList :: Set a -> [a]
782 toList = toAscList
783
784 -- | /O(n)/. Convert the set to an ascending list of elements. Subject to list fusion.
785 toAscList :: Set a -> [a]
786 toAscList = foldr (:) []
787
788 -- | /O(n)/. Convert the set to a descending list of elements. Subject to list
789 -- fusion.
790 toDescList :: Set a -> [a]
791 toDescList = foldl (flip (:)) []
792
793 -- List fusion for the list generating functions.
794 #if __GLASGOW_HASKELL__
795 -- The foldrFB and foldlFB are foldr and foldl equivalents, used for list fusion.
796 -- They are important to convert unfused to{Asc,Desc}List back, see mapFB in prelude.
797 foldrFB :: (a -> b -> b) -> b -> Set a -> b
798 foldrFB = foldr
799 {-# INLINE[0] foldrFB #-}
800 foldlFB :: (a -> b -> a) -> a -> Set b -> a
801 foldlFB = foldl
802 {-# INLINE[0] foldlFB #-}
803
804 -- Inline elems and toList, so that we need to fuse only toAscList.
805 {-# INLINE elems #-}
806 {-# INLINE toList #-}
807
808 -- The fusion is enabled up to phase 2 included. If it does not succeed,
809 -- convert in phase 1 the expanded to{Asc,Desc}List calls back to
810 -- to{Asc,Desc}List. In phase 0, we inline fold{lr}FB (which were used in
811 -- a list fusion, otherwise it would go away in phase 1), and let compiler do
812 -- whatever it wants with to{Asc,Desc}List -- it was forbidden to inline it
813 -- before phase 0, otherwise the fusion rules would not fire at all.
814 {-# NOINLINE[0] toAscList #-}
815 {-# NOINLINE[0] toDescList #-}
816 {-# RULES "Set.toAscList" [~1] forall s . toAscList s = build (\c n -> foldrFB c n s) #-}
817 {-# RULES "Set.toAscListBack" [1] foldrFB (:) [] = toAscList #-}
818 {-# RULES "Set.toDescList" [~1] forall s . toDescList s = build (\c n -> foldlFB (\xs x -> c x xs) n s) #-}
819 {-# RULES "Set.toDescListBack" [1] foldlFB (\xs x -> x : xs) [] = toDescList #-}
820 #endif
821
822 -- | /O(n*log n)/. Create a set from a list of elements.
823 --
824 -- If the elemens are ordered, linear-time implementation is used,
825 -- with the performance equal to 'fromDistinctAscList'.
826
827 -- For some reason, when 'singleton' is used in fromList or in
828 -- create, it is not inlined, so we inline it manually.
829 fromList :: Ord a => [a] -> Set a
830 fromList [] = Tip
831 fromList [x] = Bin 1 x Tip Tip
832 fromList (x0 : xs0) | not_ordered x0 xs0 = fromList' (Bin 1 x0 Tip Tip) xs0
833 | otherwise = go (1::Int) (Bin 1 x0 Tip Tip) xs0
834 where
835 not_ordered _ [] = False
836 not_ordered x (y : _) = x >= y
837 {-# INLINE not_ordered #-}
838
839 fromList' t0 xs = foldlStrict ins t0 xs
840 where ins t x = insert x t
841
842 STRICT_1_OF_3(go)
843 go _ t [] = t
844 go _ t [x] = insertMax x t
845 go s l xs@(x : xss) | not_ordered x xss = fromList' l xs
846 | otherwise = case create s xss of
847 (r, ys, []) -> go (s `shiftL` 1) (link x l r) ys
848 (r, _, ys) -> fromList' (link x l r) ys
849
850 -- The create is returning a triple (tree, xs, ys). Both xs and ys
851 -- represent not yet processed elements and only one of them can be nonempty.
852 -- If ys is nonempty, the keys in ys are not ordered with respect to tree
853 -- and must be inserted using fromList'. Otherwise the keys have been
854 -- ordered so far.
855 STRICT_1_OF_2(create)
856 create _ [] = (Tip, [], [])
857 create s xs@(x : xss)
858 | s == 1 = if not_ordered x xss then (Bin 1 x Tip Tip, [], xss)
859 else (Bin 1 x Tip Tip, xss, [])
860 | otherwise = case create (s `shiftR` 1) xs of
861 res@(_, [], _) -> res
862 (l, [y], zs) -> (insertMax y l, [], zs)
863 (l, ys@(y:yss), _) | not_ordered y yss -> (l, [], ys)
864 | otherwise -> case create (s `shiftR` 1) yss of
865 (r, zs, ws) -> (link y l r, zs, ws)
866 #if __GLASGOW_HASKELL__ >= 700
867 {-# INLINABLE fromList #-}
868 #endif
869
870 {--------------------------------------------------------------------
871 Building trees from ascending/descending lists can be done in linear time.
872
873 Note that if [xs] is ascending that:
874 fromAscList xs == fromList xs
875 --------------------------------------------------------------------}
876 -- | /O(n)/. Build a set from an ascending list in linear time.
877 -- /The precondition (input list is ascending) is not checked./
878 fromAscList :: Eq a => [a] -> Set a
879 fromAscList xs
880 = fromDistinctAscList (combineEq xs)
881 where
882 -- [combineEq xs] combines equal elements with [const] in an ordered list [xs]
883 combineEq xs'
884 = case xs' of
885 [] -> []
886 [x] -> [x]
887 (x:xx) -> combineEq' x xx
888
889 combineEq' z [] = [z]
890 combineEq' z (x:xs')
891 | z==x = combineEq' z xs'
892 | otherwise = z:combineEq' x xs'
893 #if __GLASGOW_HASKELL__ >= 700
894 {-# INLINABLE fromAscList #-}
895 #endif
896
897
898 -- | /O(n)/. Build a set from an ascending list of distinct elements in linear time.
899 -- /The precondition (input list is strictly ascending) is not checked./
900
901 -- For some reason, when 'singleton' is used in fromDistinctAscList or in
902 -- create, it is not inlined, so we inline it manually.
903 fromDistinctAscList :: [a] -> Set a
904 fromDistinctAscList [] = Tip
905 fromDistinctAscList (x0 : xs0) = go (1::Int) (Bin 1 x0 Tip Tip) xs0
906 where
907 STRICT_1_OF_3(go)
908 go _ t [] = t
909 go s l (x : xs) = case create s xs of
910 (r, ys) -> go (s `shiftL` 1) (link x l r) ys
911
912 STRICT_1_OF_2(create)
913 create _ [] = (Tip, [])
914 create s xs@(x : xs')
915 | s == 1 = (Bin 1 x Tip Tip, xs')
916 | otherwise = case create (s `shiftR` 1) xs of
917 res@(_, []) -> res
918 (l, y:ys) -> case create (s `shiftR` 1) ys of
919 (r, zs) -> (link y l r, zs)
920
921 {--------------------------------------------------------------------
922 Eq converts the set to a list. In a lazy setting, this
923 actually seems one of the faster methods to compare two trees
924 and it is certainly the simplest :-)
925 --------------------------------------------------------------------}
926 instance Eq a => Eq (Set a) where
927 t1 == t2 = (size t1 == size t2) && (toAscList t1 == toAscList t2)
928
929 {--------------------------------------------------------------------
930 Ord
931 --------------------------------------------------------------------}
932
933 instance Ord a => Ord (Set a) where
934 compare s1 s2 = compare (toAscList s1) (toAscList s2)
935
936 {--------------------------------------------------------------------
937 Show
938 --------------------------------------------------------------------}
939 instance Show a => Show (Set a) where
940 showsPrec p xs = showParen (p > 10) $
941 showString "fromList " . shows (toList xs)
942
943 {--------------------------------------------------------------------
944 Read
945 --------------------------------------------------------------------}
946 instance (Read a, Ord a) => Read (Set a) where
947 #ifdef __GLASGOW_HASKELL__
948 readPrec = parens $ prec 10 $ do
949 Ident "fromList" <- lexP
950 xs <- readPrec
951 return (fromList xs)
952
953 readListPrec = readListPrecDefault
954 #else
955 readsPrec p = readParen (p > 10) $ \ r -> do
956 ("fromList",s) <- lex r
957 (xs,t) <- reads s
958 return (fromList xs,t)
959 #endif
960
961 {--------------------------------------------------------------------
962 Typeable/Data
963 --------------------------------------------------------------------}
964
965 #include "Typeable.h"
966 INSTANCE_TYPEABLE1(Set,setTc,"Set")
967
968 {--------------------------------------------------------------------
969 NFData
970 --------------------------------------------------------------------}
971
972 instance NFData a => NFData (Set a) where
973 rnf Tip = ()
974 rnf (Bin _ y l r) = rnf y `seq` rnf l `seq` rnf r
975
976 {--------------------------------------------------------------------
977 Utility functions that return sub-ranges of the original
978 tree. Some functions take a `Maybe value` as an argument to
979 allow comparisons against infinite values. These are called `blow`
980 (Nothing is -\infty) and `bhigh` (here Nothing is +\infty).
981 We use MaybeS value, which is a Maybe strict in the Just case.
982
983 [trim blow bhigh t] A tree that is either empty or where [x > blow]
984 and [x < bhigh] for the value [x] of the root.
985 [filterGt blow t] A tree where for all values [k]. [k > blow]
986 [filterLt bhigh t] A tree where for all values [k]. [k < bhigh]
987
988 [split k t] Returns two trees [l] and [r] where all values
989 in [l] are <[k] and all keys in [r] are >[k].
990 [splitMember k t] Just like [split] but also returns whether [k]
991 was found in the tree.
992 --------------------------------------------------------------------}
993
994 data MaybeS a = NothingS | JustS !a
995
996 {--------------------------------------------------------------------
997 [trim blo bhi t] trims away all subtrees that surely contain no
998 values between the range [blo] to [bhi]. The returned tree is either
999 empty or the key of the root is between @blo@ and @bhi@.
1000 --------------------------------------------------------------------}
1001 trim :: Ord a => MaybeS a -> MaybeS a -> Set a -> Set a
1002 trim NothingS NothingS t = t
1003 trim (JustS lx) NothingS t = greater lx t where greater lo (Bin _ x _ r) | x <= lo = greater lo r
1004 greater _ t' = t'
1005 trim NothingS (JustS hx) t = lesser hx t where lesser hi (Bin _ x l _) | x >= hi = lesser hi l
1006 lesser _ t' = t'
1007 trim (JustS lx) (JustS hx) t = middle lx hx t where middle lo hi (Bin _ x _ r) | x <= lo = middle lo hi r
1008 middle lo hi (Bin _ x l _) | x >= hi = middle lo hi l
1009 middle _ _ t' = t'
1010 #if __GLASGOW_HASKELL__ >= 700
1011 {-# INLINABLE trim #-}
1012 #endif
1013
1014 {--------------------------------------------------------------------
1015 [filterGt b t] filter all values >[b] from tree [t]
1016 [filterLt b t] filter all values <[b] from tree [t]
1017 --------------------------------------------------------------------}
1018 filterGt :: Ord a => MaybeS a -> Set a -> Set a
1019 filterGt NothingS t = t
1020 filterGt (JustS b) t = filter' b t
1021 where filter' _ Tip = Tip
1022 filter' b' (Bin _ x l r) =
1023 case compare b' x of LT -> link x (filter' b' l) r
1024 EQ -> r
1025 GT -> filter' b' r
1026 #if __GLASGOW_HASKELL__ >= 700
1027 {-# INLINABLE filterGt #-}
1028 #endif
1029
1030 filterLt :: Ord a => MaybeS a -> Set a -> Set a
1031 filterLt NothingS t = t
1032 filterLt (JustS b) t = filter' b t
1033 where filter' _ Tip = Tip
1034 filter' b' (Bin _ x l r) =
1035 case compare x b' of LT -> link x l (filter' b' r)
1036 EQ -> l
1037 GT -> filter' b' l
1038 #if __GLASGOW_HASKELL__ >= 700
1039 {-# INLINABLE filterLt #-}
1040 #endif
1041
1042 {--------------------------------------------------------------------
1043 Split
1044 --------------------------------------------------------------------}
1045 -- | /O(log n)/. The expression (@'split' x set@) is a pair @(set1,set2)@
1046 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
1047 -- comprises the elements of @set@ greater than @x@.
1048 split :: Ord a => a -> Set a -> (Set a,Set a)
1049 split x0 t0 = toPair $ go x0 t0
1050 where
1051 go _ Tip = (Tip :*: Tip)
1052 go x (Bin _ y l r)
1053 = case compare x y of
1054 LT -> let (lt :*: gt) = go x l in (lt :*: link y gt r)
1055 GT -> let (lt :*: gt) = go x r in (link y l lt :*: gt)
1056 EQ -> (l :*: r)
1057 #if __GLASGOW_HASKELL__ >= 700
1058 {-# INLINABLE split #-}
1059 #endif
1060
1061 -- | /O(log n)/. Performs a 'split' but also returns whether the pivot
1062 -- element was found in the original set.
1063 splitMember :: Ord a => a -> Set a -> (Set a,Bool,Set a)
1064 splitMember _ Tip = (Tip, False, Tip)
1065 splitMember x (Bin _ y l r)
1066 = case compare x y of
1067 LT -> let (lt, found, gt) = splitMember x l
1068 gt' = link y gt r
1069 in gt' `seq` (lt, found, gt')
1070 GT -> let (lt, found, gt) = splitMember x r
1071 lt' = link y l lt
1072 in lt' `seq` (lt', found, gt)
1073 EQ -> (l, True, r)
1074 #if __GLASGOW_HASKELL__ >= 700
1075 {-# INLINABLE splitMember #-}
1076 #endif
1077
1078 {--------------------------------------------------------------------
1079 Indexing
1080 --------------------------------------------------------------------}
1081
1082 -- | /O(log n)/. Return the /index/ of an element, which is its zero-based
1083 -- index in the sorted sequence of elements. The index is a number from /0/ up
1084 -- to, but not including, the 'size' of the set. Calls 'error' when the element
1085 -- is not a 'member' of the set.
1086 --
1087 -- > findIndex 2 (fromList [5,3]) Error: element is not in the set
1088 -- > findIndex 3 (fromList [5,3]) == 0
1089 -- > findIndex 5 (fromList [5,3]) == 1
1090 -- > findIndex 6 (fromList [5,3]) Error: element is not in the set
1091
1092 -- See Note: Type of local 'go' function
1093 findIndex :: Ord a => a -> Set a -> Int
1094 findIndex = go 0
1095 where
1096 go :: Ord a => Int -> a -> Set a -> Int
1097 STRICT_1_OF_3(go)
1098 STRICT_2_OF_3(go)
1099 go _ _ Tip = error "Set.findIndex: element is not in the set"
1100 go idx x (Bin _ kx l r) = case compare x kx of
1101 LT -> go idx x l
1102 GT -> go (idx + size l + 1) x r
1103 EQ -> idx + size l
1104 #if __GLASGOW_HASKELL__ >= 700
1105 {-# INLINABLE findIndex #-}
1106 #endif
1107
1108 -- | /O(log n)/. Lookup the /index/ of an element, which is its zero-based index in
1109 -- the sorted sequence of elements. The index is a number from /0/ up to, but not
1110 -- including, the 'size' of the set.
1111 --
1112 -- > isJust (lookupIndex 2 (fromList [5,3])) == False
1113 -- > fromJust (lookupIndex 3 (fromList [5,3])) == 0
1114 -- > fromJust (lookupIndex 5 (fromList [5,3])) == 1
1115 -- > isJust (lookupIndex 6 (fromList [5,3])) == False
1116
1117 -- See Note: Type of local 'go' function
1118 lookupIndex :: Ord a => a -> Set a -> Maybe Int
1119 lookupIndex = go 0
1120 where
1121 go :: Ord a => Int -> a -> Set a -> Maybe Int
1122 STRICT_1_OF_3(go)
1123 STRICT_2_OF_3(go)
1124 go _ _ Tip = Nothing
1125 go idx x (Bin _ kx l r) = case compare x kx of
1126 LT -> go idx x l
1127 GT -> go (idx + size l + 1) x r
1128 EQ -> Just $! idx + size l
1129 #if __GLASGOW_HASKELL__ >= 700
1130 {-# INLINABLE lookupIndex #-}
1131 #endif
1132
1133 -- | /O(log n)/. Retrieve an element by its /index/, i.e. by its zero-based
1134 -- index in the sorted sequence of elements. If the /index/ is out of range (less
1135 -- than zero, greater or equal to 'size' of the set), 'error' is called.
1136 --
1137 -- > elemAt 0 (fromList [5,3]) == 3
1138 -- > elemAt 1 (fromList [5,3]) == 5
1139 -- > elemAt 2 (fromList [5,3]) Error: index out of range
1140
1141 elemAt :: Int -> Set a -> a
1142 STRICT_1_OF_2(elemAt)
1143 elemAt _ Tip = error "Set.elemAt: index out of range"
1144 elemAt i (Bin _ x l r)
1145 = case compare i sizeL of
1146 LT -> elemAt i l
1147 GT -> elemAt (i-sizeL-1) r
1148 EQ -> x
1149 where
1150 sizeL = size l
1151
1152 -- | /O(log n)/. Delete the element at /index/, i.e. by its zero-based index in
1153 -- the sorted sequence of elements. If the /index/ is out of range (less than zero,
1154 -- greater or equal to 'size' of the set), 'error' is called.
1155 --
1156 -- > deleteAt 0 (fromList [5,3]) == singleton 5
1157 -- > deleteAt 1 (fromList [5,3]) == singleton 3
1158 -- > deleteAt 2 (fromList [5,3]) Error: index out of range
1159 -- > deleteAt (-1) (fromList [5,3]) Error: index out of range
1160
1161 deleteAt :: Int -> Set a -> Set a
1162 deleteAt i t = i `seq`
1163 case t of
1164 Tip -> error "Set.deleteAt: index out of range"
1165 Bin _ x l r -> case compare i sizeL of
1166 LT -> balanceR x (deleteAt i l) r
1167 GT -> balanceL x l (deleteAt (i-sizeL-1) r)
1168 EQ -> glue l r
1169 where
1170 sizeL = size l
1171
1172
1173 {--------------------------------------------------------------------
1174 Utility functions that maintain the balance properties of the tree.
1175 All constructors assume that all values in [l] < [x] and all values
1176 in [r] > [x], and that [l] and [r] are valid trees.
1177
1178 In order of sophistication:
1179 [Bin sz x l r] The type constructor.
1180 [bin x l r] Maintains the correct size, assumes that both [l]
1181 and [r] are balanced with respect to each other.
1182 [balance x l r] Restores the balance and size.
1183 Assumes that the original tree was balanced and
1184 that [l] or [r] has changed by at most one element.
1185 [link x l r] Restores balance and size.
1186
1187 Furthermore, we can construct a new tree from two trees. Both operations
1188 assume that all values in [l] < all values in [r] and that [l] and [r]
1189 are valid:
1190 [glue l r] Glues [l] and [r] together. Assumes that [l] and
1191 [r] are already balanced with respect to each other.
1192 [merge l r] Merges two trees and restores balance.
1193
1194 Note: in contrast to Adam's paper, we use (<=) comparisons instead
1195 of (<) comparisons in [link], [merge] and [balance].
1196 Quickcheck (on [difference]) showed that this was necessary in order
1197 to maintain the invariants. It is quite unsatisfactory that I haven't
1198 been able to find out why this is actually the case! Fortunately, it
1199 doesn't hurt to be a bit more conservative.
1200 --------------------------------------------------------------------}
1201
1202 {--------------------------------------------------------------------
1203 Link
1204 --------------------------------------------------------------------}
1205 link :: a -> Set a -> Set a -> Set a
1206 link x Tip r = insertMin x r
1207 link x l Tip = insertMax x l
1208 link x l@(Bin sizeL y ly ry) r@(Bin sizeR z lz rz)
1209 | delta*sizeL < sizeR = balanceL z (link x l lz) rz
1210 | delta*sizeR < sizeL = balanceR y ly (link x ry r)
1211 | otherwise = bin x l r
1212
1213
1214 -- insertMin and insertMax don't perform potentially expensive comparisons.
1215 insertMax,insertMin :: a -> Set a -> Set a
1216 insertMax x t
1217 = case t of
1218 Tip -> singleton x
1219 Bin _ y l r
1220 -> balanceR y l (insertMax x r)
1221
1222 insertMin x t
1223 = case t of
1224 Tip -> singleton x
1225 Bin _ y l r
1226 -> balanceL y (insertMin x l) r
1227
1228 {--------------------------------------------------------------------
1229 [merge l r]: merges two trees.
1230 --------------------------------------------------------------------}
1231 merge :: Set a -> Set a -> Set a
1232 merge Tip r = r
1233 merge l Tip = l
1234 merge l@(Bin sizeL x lx rx) r@(Bin sizeR y ly ry)
1235 | delta*sizeL < sizeR = balanceL y (merge l ly) ry
1236 | delta*sizeR < sizeL = balanceR x lx (merge rx r)
1237 | otherwise = glue l r
1238
1239 {--------------------------------------------------------------------
1240 [glue l r]: glues two trees together.
1241 Assumes that [l] and [r] are already balanced with respect to each other.
1242 --------------------------------------------------------------------}
1243 glue :: Set a -> Set a -> Set a
1244 glue Tip r = r
1245 glue l Tip = l
1246 glue l r
1247 | size l > size r = let (m,l') = deleteFindMax l in balanceR m l' r
1248 | otherwise = let (m,r') = deleteFindMin r in balanceL m l r'
1249
1250 -- | /O(log n)/. Delete and find the minimal element.
1251 --
1252 -- > deleteFindMin set = (findMin set, deleteMin set)
1253
1254 deleteFindMin :: Set a -> (a,Set a)
1255 deleteFindMin t
1256 = case t of
1257 Bin _ x Tip r -> (x,r)
1258 Bin _ x l r -> let (xm,l') = deleteFindMin l in (xm,balanceR x l' r)
1259 Tip -> (error "Set.deleteFindMin: can not return the minimal element of an empty set", Tip)
1260
1261 -- | /O(log n)/. Delete and find the maximal element.
1262 --
1263 -- > deleteFindMax set = (findMax set, deleteMax set)
1264 deleteFindMax :: Set a -> (a,Set a)
1265 deleteFindMax t
1266 = case t of
1267 Bin _ x l Tip -> (x,l)
1268 Bin _ x l r -> let (xm,r') = deleteFindMax r in (xm,balanceL x l r')
1269 Tip -> (error "Set.deleteFindMax: can not return the maximal element of an empty set", Tip)
1270
1271 -- | /O(log n)/. Retrieves the minimal key of the set, and the set
1272 -- stripped of that element, or 'Nothing' if passed an empty set.
1273 minView :: Set a -> Maybe (a, Set a)
1274 minView Tip = Nothing
1275 minView x = Just (deleteFindMin x)
1276
1277 -- | /O(log n)/. Retrieves the maximal key of the set, and the set
1278 -- stripped of that element, or 'Nothing' if passed an empty set.
1279 maxView :: Set a -> Maybe (a, Set a)
1280 maxView Tip = Nothing
1281 maxView x = Just (deleteFindMax x)
1282
1283 {--------------------------------------------------------------------
1284 [balance x l r] balances two trees with value x.
1285 The sizes of the trees should balance after decreasing the
1286 size of one of them. (a rotation).
1287
1288 [delta] is the maximal relative difference between the sizes of
1289 two trees, it corresponds with the [w] in Adams' paper.
1290 [ratio] is the ratio between an outer and inner sibling of the
1291 heavier subtree in an unbalanced setting. It determines
1292 whether a double or single rotation should be performed
1293 to restore balance. It is correspondes with the inverse
1294 of $\alpha$ in Adam's article.
1295
1296 Note that according to the Adam's paper:
1297 - [delta] should be larger than 4.646 with a [ratio] of 2.
1298 - [delta] should be larger than 3.745 with a [ratio] of 1.534.
1299
1300 But the Adam's paper is errorneous:
1301 - it can be proved that for delta=2 and delta>=5 there does
1302 not exist any ratio that would work
1303 - delta=4.5 and ratio=2 does not work
1304
1305 That leaves two reasonable variants, delta=3 and delta=4,
1306 both with ratio=2.
1307
1308 - A lower [delta] leads to a more 'perfectly' balanced tree.
1309 - A higher [delta] performs less rebalancing.
1310
1311 In the benchmarks, delta=3 is faster on insert operations,
1312 and delta=4 has slightly better deletes. As the insert speedup
1313 is larger, we currently use delta=3.
1314
1315 --------------------------------------------------------------------}
1316 delta,ratio :: Int
1317 delta = 3
1318 ratio = 2
1319
1320 -- The balance function is equivalent to the following:
1321 --
1322 -- balance :: a -> Set a -> Set a -> Set a
1323 -- balance x l r
1324 -- | sizeL + sizeR <= 1 = Bin sizeX x l r
1325 -- | sizeR > delta*sizeL = rotateL x l r
1326 -- | sizeL > delta*sizeR = rotateR x l r
1327 -- | otherwise = Bin sizeX x l r
1328 -- where
1329 -- sizeL = size l
1330 -- sizeR = size r
1331 -- sizeX = sizeL + sizeR + 1
1332 --
1333 -- rotateL :: a -> Set a -> Set a -> Set a
1334 -- rotateL x l r@(Bin _ _ ly ry) | size ly < ratio*size ry = singleL x l r
1335 -- | otherwise = doubleL x l r
1336 -- rotateR :: a -> Set a -> Set a -> Set a
1337 -- rotateR x l@(Bin _ _ ly ry) r | size ry < ratio*size ly = singleR x l r
1338 -- | otherwise = doubleR x l r
1339 --
1340 -- singleL, singleR :: a -> Set a -> Set a -> Set a
1341 -- singleL x1 t1 (Bin _ x2 t2 t3) = bin x2 (bin x1 t1 t2) t3
1342 -- singleR x1 (Bin _ x2 t1 t2) t3 = bin x2 t1 (bin x1 t2 t3)
1343 --
1344 -- doubleL, doubleR :: a -> Set a -> Set a -> Set a
1345 -- doubleL x1 t1 (Bin _ x2 (Bin _ x3 t2 t3) t4) = bin x3 (bin x1 t1 t2) (bin x2 t3 t4)
1346 -- doubleR x1 (Bin _ x2 t1 (Bin _ x3 t2 t3)) t4 = bin x3 (bin x2 t1 t2) (bin x1 t3 t4)
1347 --
1348 -- It is only written in such a way that every node is pattern-matched only once.
1349 --
1350 -- Only balanceL and balanceR are needed at the moment, so balance is not here anymore.
1351 -- In case it is needed, it can be found in Data.Map.
1352
1353 -- Functions balanceL and balanceR are specialised versions of balance.
1354 -- balanceL only checks whether the left subtree is too big,
1355 -- balanceR only checks whether the right subtree is too big.
1356
1357 -- balanceL is called when left subtree might have been inserted to or when
1358 -- right subtree might have been deleted from.
1359 balanceL :: a -> Set a -> Set a -> Set a
1360 balanceL x l r = case r of
1361 Tip -> case l of
1362 Tip -> Bin 1 x Tip Tip
1363 (Bin _ _ Tip Tip) -> Bin 2 x l Tip
1364 (Bin _ lx Tip (Bin _ lrx _ _)) -> Bin 3 lrx (Bin 1 lx Tip Tip) (Bin 1 x Tip Tip)
1365 (Bin _ lx ll@(Bin _ _ _ _) Tip) -> Bin 3 lx ll (Bin 1 x Tip Tip)
1366 (Bin ls lx ll@(Bin lls _ _ _) lr@(Bin lrs lrx lrl lrr))
1367 | lrs < ratio*lls -> Bin (1+ls) lx ll (Bin (1+lrs) x lr Tip)
1368 | otherwise -> Bin (1+ls) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+size lrr) x lrr Tip)
1369
1370 (Bin rs _ _ _) -> case l of
1371 Tip -> Bin (1+rs) x Tip r
1372
1373 (Bin ls lx ll lr)
1374 | ls > delta*rs -> case (ll, lr) of
1375 (Bin lls _ _ _, Bin lrs lrx lrl lrr)
1376 | lrs < ratio*lls -> Bin (1+ls+rs) lx ll (Bin (1+rs+lrs) x lr r)
1377 | otherwise -> Bin (1+ls+rs) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+rs+size lrr) x lrr r)
1378 (_, _) -> error "Failure in Data.Map.balanceL"
1379 | otherwise -> Bin (1+ls+rs) x l r
1380 {-# NOINLINE balanceL #-}
1381
1382 -- balanceR is called when right subtree might have been inserted to or when
1383 -- left subtree might have been deleted from.
1384 balanceR :: a -> Set a -> Set a -> Set a
1385 balanceR x l r = case l of
1386 Tip -> case r of
1387 Tip -> Bin 1 x Tip Tip
1388 (Bin _ _ Tip Tip) -> Bin 2 x Tip r
1389 (Bin _ rx Tip rr@(Bin _ _ _ _)) -> Bin 3 rx (Bin 1 x Tip Tip) rr
1390 (Bin _ rx (Bin _ rlx _ _) Tip) -> Bin 3 rlx (Bin 1 x Tip Tip) (Bin 1 rx Tip Tip)
1391 (Bin rs rx rl@(Bin rls rlx rll rlr) rr@(Bin rrs _ _ _))
1392 | rls < ratio*rrs -> Bin (1+rs) rx (Bin (1+rls) x Tip rl) rr
1393 | otherwise -> Bin (1+rs) rlx (Bin (1+size rll) x Tip rll) (Bin (1+rrs+size rlr) rx rlr rr)
1394
1395 (Bin ls _ _ _) -> case r of
1396 Tip -> Bin (1+ls) x l Tip
1397
1398 (Bin rs rx rl rr)
1399 | rs > delta*ls -> case (rl, rr) of
1400 (Bin rls rlx rll rlr, Bin rrs _ _ _)
1401 | rls < ratio*rrs -> Bin (1+ls+rs) rx (Bin (1+ls+rls) x l rl) rr
1402 | otherwise -> Bin (1+ls+rs) rlx (Bin (1+ls+size rll) x l rll) (Bin (1+rrs+size rlr) rx rlr rr)
1403 (_, _) -> error "Failure in Data.Map.balanceR"
1404 | otherwise -> Bin (1+ls+rs) x l r
1405 {-# NOINLINE balanceR #-}
1406
1407 {--------------------------------------------------------------------
1408 The bin constructor maintains the size of the tree
1409 --------------------------------------------------------------------}
1410 bin :: a -> Set a -> Set a -> Set a
1411 bin x l r
1412 = Bin (size l + size r + 1) x l r
1413 {-# INLINE bin #-}
1414
1415
1416 {--------------------------------------------------------------------
1417 Utilities
1418 --------------------------------------------------------------------}
1419 foldlStrict :: (a -> b -> a) -> a -> [b] -> a
1420 foldlStrict f = go
1421 where
1422 go z [] = z
1423 go z (x:xs) = let z' = f z x in z' `seq` go z' xs
1424 {-# INLINE foldlStrict #-}
1425
1426 -- | /O(1)/. Decompose a set into pieces based on the structure of the underlying
1427 -- tree. This function is useful for consuming a set in parallel.
1428 --
1429 -- No guarantee is made as to the sizes of the pieces; an internal, but
1430 -- deterministic process determines this. However, it is guaranteed that the pieces
1431 -- returned will be in ascending order (all elements in the first subset less than all
1432 -- elements in the second, and so on).
1433 --
1434 -- Examples:
1435 --
1436 -- > splitRoot (fromList [1..6]) ==
1437 -- > [fromList [1,2,3],fromList [4],fromList [5,6]]
1438 --
1439 -- > splitRoot empty == []
1440 --
1441 -- Note that the current implementation does not return more than three subsets,
1442 -- but you should not depend on this behaviour because it can change in the
1443 -- future without notice.
1444 splitRoot :: Set a -> [Set a]
1445 splitRoot orig =
1446 case orig of
1447 Tip -> []
1448 Bin _ v l r -> [l, singleton v, r]
1449 {-# INLINE splitRoot #-}
1450
1451
1452 {--------------------------------------------------------------------
1453 Debugging
1454 --------------------------------------------------------------------}
1455 -- | /O(n)/. Show the tree that implements the set. The tree is shown
1456 -- in a compressed, hanging format.
1457 showTree :: Show a => Set a -> String
1458 showTree s
1459 = showTreeWith True False s
1460
1461
1462 {- | /O(n)/. The expression (@showTreeWith hang wide map@) shows
1463 the tree that implements the set. If @hang@ is
1464 @True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If
1465 @wide@ is 'True', an extra wide version is shown.
1466
1467 > Set> putStrLn $ showTreeWith True False $ fromDistinctAscList [1..5]
1468 > 4
1469 > +--2
1470 > | +--1
1471 > | +--3
1472 > +--5
1473 >
1474 > Set> putStrLn $ showTreeWith True True $ fromDistinctAscList [1..5]
1475 > 4
1476 > |
1477 > +--2
1478 > | |
1479 > | +--1
1480 > | |
1481 > | +--3
1482 > |
1483 > +--5
1484 >
1485 > Set> putStrLn $ showTreeWith False True $ fromDistinctAscList [1..5]
1486 > +--5
1487 > |
1488 > 4
1489 > |
1490 > | +--3
1491 > | |
1492 > +--2
1493 > |
1494 > +--1
1495
1496 -}
1497 showTreeWith :: Show a => Bool -> Bool -> Set a -> String
1498 showTreeWith hang wide t
1499 | hang = (showsTreeHang wide [] t) ""
1500 | otherwise = (showsTree wide [] [] t) ""
1501
1502 showsTree :: Show a => Bool -> [String] -> [String] -> Set a -> ShowS
1503 showsTree wide lbars rbars t
1504 = case t of
1505 Tip -> showsBars lbars . showString "|\n"
1506 Bin _ x Tip Tip
1507 -> showsBars lbars . shows x . showString "\n"
1508 Bin _ x l r
1509 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
1510 showWide wide rbars .
1511 showsBars lbars . shows x . showString "\n" .
1512 showWide wide lbars .
1513 showsTree wide (withEmpty lbars) (withBar lbars) l
1514
1515 showsTreeHang :: Show a => Bool -> [String] -> Set a -> ShowS
1516 showsTreeHang wide bars t
1517 = case t of
1518 Tip -> showsBars bars . showString "|\n"
1519 Bin _ x Tip Tip
1520 -> showsBars bars . shows x . showString "\n"
1521 Bin _ x l r
1522 -> showsBars bars . shows x . showString "\n" .
1523 showWide wide bars .
1524 showsTreeHang wide (withBar bars) l .
1525 showWide wide bars .
1526 showsTreeHang wide (withEmpty bars) r
1527
1528 showWide :: Bool -> [String] -> String -> String
1529 showWide wide bars
1530 | wide = showString (concat (reverse bars)) . showString "|\n"
1531 | otherwise = id
1532
1533 showsBars :: [String] -> ShowS
1534 showsBars bars
1535 = case bars of
1536 [] -> id
1537 _ -> showString (concat (reverse (tail bars))) . showString node
1538
1539 node :: String
1540 node = "+--"
1541
1542 withBar, withEmpty :: [String] -> [String]
1543 withBar bars = "| ":bars
1544 withEmpty bars = " ":bars
1545
1546 {--------------------------------------------------------------------
1547 Assertions
1548 --------------------------------------------------------------------}
1549 -- | /O(n)/. Test if the internal set structure is valid.
1550 valid :: Ord a => Set a -> Bool
1551 valid t
1552 = balanced t && ordered t && validsize t
1553
1554 ordered :: Ord a => Set a -> Bool
1555 ordered t
1556 = bounded (const True) (const True) t
1557 where
1558 bounded lo hi t'
1559 = case t' of
1560 Tip -> True
1561 Bin _ x l r -> (lo x) && (hi x) && bounded lo (<x) l && bounded (>x) hi r
1562
1563 balanced :: Set a -> Bool
1564 balanced t
1565 = case t of
1566 Tip -> True
1567 Bin _ _ l r -> (size l + size r <= 1 || (size l <= delta*size r && size r <= delta*size l)) &&
1568 balanced l && balanced r
1569
1570 validsize :: Set a -> Bool
1571 validsize t
1572 = (realsize t == Just (size t))
1573 where
1574 realsize t'
1575 = case t' of
1576 Tip -> Just 0
1577 Bin sz _ l r -> case (realsize l,realsize r) of
1578 (Just n,Just m) | n+m+1 == sz -> Just sz
1579 _ -> Nothing