c563d1c6aaf34ba6d3dbb9a2da764768e1dd1cff
[packages/containers.git] / Data / IntSet.hs
1 {-# OPTIONS -cpp #-}
2 -----------------------------------------------------------------------------
3 -- |
4 -- Module : Data.IntSet
5 -- Copyright : (c) Daan Leijen 2002
6 -- License : BSD-style
7 -- Maintainer : libraries@haskell.org
8 -- Stability : provisional
9 -- Portability : portable
10 --
11 -- An efficient implementation of integer sets.
12 --
13 -- Since many function names (but not the type name) clash with
14 -- "Prelude" names, this module is usually imported @qualified@, e.g.
15 --
16 -- > import Data.IntSet (IntSet)
17 -- > import qualified Data.IntSet as IntSet
18 --
19 -- The implementation is based on /big-endian patricia trees/. This data
20 -- structure performs especially well on binary operations like 'union'
21 -- and 'intersection'. However, my benchmarks show that it is also
22 -- (much) faster on insertions and deletions when compared to a generic
23 -- size-balanced set implementation (see "Data.Set").
24 --
25 -- * Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\",
26 -- Workshop on ML, September 1998, pages 77-86,
27 -- <http://citeseer.ist.psu.edu/okasaki98fast.html>
28 --
29 -- * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
30 -- Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
31 -- October 1968, pages 514-534.
32 --
33 -- Many operations have a worst-case complexity of /O(min(n,W))/.
34 -- This means that the operation can become linear in the number of
35 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
36 -- (32 or 64).
37 -----------------------------------------------------------------------------
38
39 module Data.IntSet (
40 -- * Set type
41 IntSet -- instance Eq,Show
42
43 -- * Operators
44 , (\\)
45
46 -- * Query
47 , null
48 , size
49 , member
50 , notMember
51 , isSubsetOf
52 , isProperSubsetOf
53
54 -- * Construction
55 , empty
56 , singleton
57 , insert
58 , delete
59
60 -- * Combine
61 , union, unions
62 , difference
63 , intersection
64
65 -- * Filter
66 , filter
67 , partition
68 , split
69 , splitMember
70
71 -- * Min\/Max
72 , findMin
73 , findMax
74 , deleteMin
75 , deleteMax
76 , deleteFindMin
77 , deleteFindMax
78 , maxView
79 , minView
80
81 -- * Map
82 , map
83
84 -- * Fold
85 , fold
86
87 -- * Conversion
88 -- ** List
89 , elems
90 , toList
91 , fromList
92
93 -- ** Ordered list
94 , toAscList
95 , fromAscList
96 , fromDistinctAscList
97
98 -- * Debugging
99 , showTree
100 , showTreeWith
101 ) where
102
103
104 import Prelude hiding (lookup,filter,foldr,foldl,null,map)
105 import Data.Bits
106
107 import qualified Data.List as List
108 import Data.Monoid (Monoid(..))
109 import Data.Maybe (fromMaybe)
110 import Data.Typeable
111
112 {-
113 -- just for testing
114 import Test.QuickCheck
115 import List (nub,sort)
116 import qualified List
117 import qualified Data.Set as Set
118 -}
119
120 #if __GLASGOW_HASKELL__
121 import Text.Read
122 import Data.Data (Data(..), mkNoRepType)
123 #endif
124
125 #if __GLASGOW_HASKELL__ >= 503
126 import GHC.Exts ( Word(..), Int(..), shiftRL# )
127 #elif __GLASGOW_HASKELL__
128 import Word
129 import GlaExts ( Word(..), Int(..), shiftRL# )
130 #else
131 import Data.Word
132 #endif
133
134 infixl 9 \\{-This comment teaches CPP correct behaviour -}
135
136 -- A "Nat" is a natural machine word (an unsigned Int)
137 type Nat = Word
138
139 natFromInt :: Int -> Nat
140 natFromInt i = fromIntegral i
141
142 intFromNat :: Nat -> Int
143 intFromNat w = fromIntegral w
144
145 shiftRL :: Nat -> Int -> Nat
146 #if __GLASGOW_HASKELL__
147 {--------------------------------------------------------------------
148 GHC: use unboxing to get @shiftRL@ inlined.
149 --------------------------------------------------------------------}
150 shiftRL (W# x) (I# i)
151 = W# (shiftRL# x i)
152 #else
153 shiftRL x i = shiftR x i
154 #endif
155
156 {--------------------------------------------------------------------
157 Operators
158 --------------------------------------------------------------------}
159 -- | /O(n+m)/. See 'difference'.
160 (\\) :: IntSet -> IntSet -> IntSet
161 m1 \\ m2 = difference m1 m2
162
163 {--------------------------------------------------------------------
164 Types
165 --------------------------------------------------------------------}
166 -- | A set of integers.
167 data IntSet = Nil
168 | Tip {-# UNPACK #-} !Int
169 | Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
170 -- Invariant: Nil is never found as a child of Bin.
171 -- Invariant: The Mask is a power of 2. It is the largest bit position at which
172 -- two elements of the set differ.
173 -- Invariant: Prefix is the common high-order bits that all elements share to
174 -- the left of the Mask bit.
175 -- Invariant: In Bin prefix mask left right, left consists of the elements that
176 -- don't have the mask bit set; right is all the elements that do.
177
178
179 type Prefix = Int
180 type Mask = Int
181
182 instance Monoid IntSet where
183 mempty = empty
184 mappend = union
185 mconcat = unions
186
187 #if __GLASGOW_HASKELL__
188
189 {--------------------------------------------------------------------
190 A Data instance
191 --------------------------------------------------------------------}
192
193 -- This instance preserves data abstraction at the cost of inefficiency.
194 -- We omit reflection services for the sake of data abstraction.
195
196 instance Data IntSet where
197 gfoldl f z is = z fromList `f` (toList is)
198 toConstr _ = error "toConstr"
199 gunfold _ _ = error "gunfold"
200 dataTypeOf _ = mkNoRepType "Data.IntSet.IntSet"
201
202 #endif
203
204 {--------------------------------------------------------------------
205 Query
206 --------------------------------------------------------------------}
207 -- | /O(1)/. Is the set empty?
208 null :: IntSet -> Bool
209 null Nil = True
210 null _ = False
211
212 -- | /O(n)/. Cardinality of the set.
213 size :: IntSet -> Int
214 size t
215 = case t of
216 Bin _ _ l r -> size l + size r
217 Tip _ -> 1
218 Nil -> 0
219
220 -- | /O(min(n,W))/. Is the value a member of the set?
221 member :: Int -> IntSet -> Bool
222 member x t
223 = case t of
224 Bin p m l r
225 | nomatch x p m -> False
226 | zero x m -> member x l
227 | otherwise -> member x r
228 Tip y -> (x==y)
229 Nil -> False
230
231 -- | /O(min(n,W))/. Is the element not in the set?
232 notMember :: Int -> IntSet -> Bool
233 notMember k = not . member k
234
235 -- 'lookup' is used by 'intersection' for left-biasing
236 lookup :: Int -> IntSet -> Maybe Int
237 lookup k t
238 = let nk = natFromInt k in seq nk (lookupN nk t)
239
240 lookupN :: Nat -> IntSet -> Maybe Int
241 lookupN k t
242 = case t of
243 Bin _ m l r
244 | zeroN k (natFromInt m) -> lookupN k l
245 | otherwise -> lookupN k r
246 Tip kx
247 | (k == natFromInt kx) -> Just kx
248 | otherwise -> Nothing
249 Nil -> Nothing
250
251 {--------------------------------------------------------------------
252 Construction
253 --------------------------------------------------------------------}
254 -- | /O(1)/. The empty set.
255 empty :: IntSet
256 empty
257 = Nil
258
259 -- | /O(1)/. A set of one element.
260 singleton :: Int -> IntSet
261 singleton x
262 = Tip x
263
264 {--------------------------------------------------------------------
265 Insert
266 --------------------------------------------------------------------}
267 -- | /O(min(n,W))/. Add a value to the set. When the value is already
268 -- an element of the set, it is replaced by the new one, ie. 'insert'
269 -- is left-biased.
270 insert :: Int -> IntSet -> IntSet
271 insert x t
272 = case t of
273 Bin p m l r
274 | nomatch x p m -> join x (Tip x) p t
275 | zero x m -> Bin p m (insert x l) r
276 | otherwise -> Bin p m l (insert x r)
277 Tip y
278 | x==y -> Tip x
279 | otherwise -> join x (Tip x) y t
280 Nil -> Tip x
281
282 -- right-biased insertion, used by 'union'
283 insertR :: Int -> IntSet -> IntSet
284 insertR x t
285 = case t of
286 Bin p m l r
287 | nomatch x p m -> join x (Tip x) p t
288 | zero x m -> Bin p m (insert x l) r
289 | otherwise -> Bin p m l (insert x r)
290 Tip y
291 | x==y -> t
292 | otherwise -> join x (Tip x) y t
293 Nil -> Tip x
294
295 -- | /O(min(n,W))/. Delete a value in the set. Returns the
296 -- original set when the value was not present.
297 delete :: Int -> IntSet -> IntSet
298 delete x t
299 = case t of
300 Bin p m l r
301 | nomatch x p m -> t
302 | zero x m -> bin p m (delete x l) r
303 | otherwise -> bin p m l (delete x r)
304 Tip y
305 | x==y -> Nil
306 | otherwise -> t
307 Nil -> Nil
308
309
310 {--------------------------------------------------------------------
311 Union
312 --------------------------------------------------------------------}
313 -- | The union of a list of sets.
314 unions :: [IntSet] -> IntSet
315 unions xs
316 = foldlStrict union empty xs
317
318
319 -- | /O(n+m)/. The union of two sets.
320 union :: IntSet -> IntSet -> IntSet
321 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
322 | shorter m1 m2 = union1
323 | shorter m2 m1 = union2
324 | p1 == p2 = Bin p1 m1 (union l1 l2) (union r1 r2)
325 | otherwise = join p1 t1 p2 t2
326 where
327 union1 | nomatch p2 p1 m1 = join p1 t1 p2 t2
328 | zero p2 m1 = Bin p1 m1 (union l1 t2) r1
329 | otherwise = Bin p1 m1 l1 (union r1 t2)
330
331 union2 | nomatch p1 p2 m2 = join p1 t1 p2 t2
332 | zero p1 m2 = Bin p2 m2 (union t1 l2) r2
333 | otherwise = Bin p2 m2 l2 (union t1 r2)
334
335 union (Tip x) t = insert x t
336 union t (Tip x) = insertR x t -- right bias
337 union Nil t = t
338 union t Nil = t
339
340
341 {--------------------------------------------------------------------
342 Difference
343 --------------------------------------------------------------------}
344 -- | /O(n+m)/. Difference between two sets.
345 difference :: IntSet -> IntSet -> IntSet
346 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
347 | shorter m1 m2 = difference1
348 | shorter m2 m1 = difference2
349 | p1 == p2 = bin p1 m1 (difference l1 l2) (difference r1 r2)
350 | otherwise = t1
351 where
352 difference1 | nomatch p2 p1 m1 = t1
353 | zero p2 m1 = bin p1 m1 (difference l1 t2) r1
354 | otherwise = bin p1 m1 l1 (difference r1 t2)
355
356 difference2 | nomatch p1 p2 m2 = t1
357 | zero p1 m2 = difference t1 l2
358 | otherwise = difference t1 r2
359
360 difference t1@(Tip x) t2
361 | member x t2 = Nil
362 | otherwise = t1
363
364 difference Nil _ = Nil
365 difference t (Tip x) = delete x t
366 difference t Nil = t
367
368
369
370 {--------------------------------------------------------------------
371 Intersection
372 --------------------------------------------------------------------}
373 -- | /O(n+m)/. The intersection of two sets.
374 intersection :: IntSet -> IntSet -> IntSet
375 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
376 | shorter m1 m2 = intersection1
377 | shorter m2 m1 = intersection2
378 | p1 == p2 = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
379 | otherwise = Nil
380 where
381 intersection1 | nomatch p2 p1 m1 = Nil
382 | zero p2 m1 = intersection l1 t2
383 | otherwise = intersection r1 t2
384
385 intersection2 | nomatch p1 p2 m2 = Nil
386 | zero p1 m2 = intersection t1 l2
387 | otherwise = intersection t1 r2
388
389 intersection t1@(Tip x) t2
390 | member x t2 = t1
391 | otherwise = Nil
392 intersection t (Tip x)
393 = case lookup x t of
394 Just y -> Tip y
395 Nothing -> Nil
396 intersection Nil _ = Nil
397 intersection _ Nil = Nil
398
399
400
401 {--------------------------------------------------------------------
402 Subset
403 --------------------------------------------------------------------}
404 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
405 isProperSubsetOf :: IntSet -> IntSet -> Bool
406 isProperSubsetOf t1 t2
407 = case subsetCmp t1 t2 of
408 LT -> True
409 _ -> False
410
411 subsetCmp :: IntSet -> IntSet -> Ordering
412 subsetCmp t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
413 | shorter m1 m2 = GT
414 | shorter m2 m1 = case subsetCmpLt of
415 GT -> GT
416 _ -> LT
417 | p1 == p2 = subsetCmpEq
418 | otherwise = GT -- disjoint
419 where
420 subsetCmpLt | nomatch p1 p2 m2 = GT
421 | zero p1 m2 = subsetCmp t1 l2
422 | otherwise = subsetCmp t1 r2
423 subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
424 (GT,_ ) -> GT
425 (_ ,GT) -> GT
426 (EQ,EQ) -> EQ
427 _ -> LT
428
429 subsetCmp (Bin _ _ _ _) _ = GT
430 subsetCmp (Tip x) (Tip y)
431 | x==y = EQ
432 | otherwise = GT -- disjoint
433 subsetCmp (Tip x) t
434 | member x t = LT
435 | otherwise = GT -- disjoint
436 subsetCmp Nil Nil = EQ
437 subsetCmp Nil _ = LT
438
439 -- | /O(n+m)/. Is this a subset?
440 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
441
442 isSubsetOf :: IntSet -> IntSet -> Bool
443 isSubsetOf t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
444 | shorter m1 m2 = False
445 | shorter m2 m1 = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
446 else isSubsetOf t1 r2)
447 | otherwise = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
448 isSubsetOf (Bin _ _ _ _) _ = False
449 isSubsetOf (Tip x) t = member x t
450 isSubsetOf Nil _ = True
451
452
453 {--------------------------------------------------------------------
454 Filter
455 --------------------------------------------------------------------}
456 -- | /O(n)/. Filter all elements that satisfy some predicate.
457 filter :: (Int -> Bool) -> IntSet -> IntSet
458 filter predicate t
459 = case t of
460 Bin p m l r
461 -> bin p m (filter predicate l) (filter predicate r)
462 Tip x
463 | predicate x -> t
464 | otherwise -> Nil
465 Nil -> Nil
466
467 -- | /O(n)/. partition the set according to some predicate.
468 partition :: (Int -> Bool) -> IntSet -> (IntSet,IntSet)
469 partition predicate t
470 = case t of
471 Bin p m l r
472 -> let (l1,l2) = partition predicate l
473 (r1,r2) = partition predicate r
474 in (bin p m l1 r1, bin p m l2 r2)
475 Tip x
476 | predicate x -> (t,Nil)
477 | otherwise -> (Nil,t)
478 Nil -> (Nil,Nil)
479
480
481 -- | /O(min(n,W))/. The expression (@'split' x set@) is a pair @(set1,set2)@
482 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
483 -- comprises the elements of @set@ greater than @x@.
484 --
485 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [4,5])
486 split :: Int -> IntSet -> (IntSet,IntSet)
487 split x t
488 = case t of
489 Bin _ m l r
490 | m < 0 -> if x >= 0 then let (lt,gt) = split' x l in (union r lt, gt)
491 else let (lt,gt) = split' x r in (lt, union gt l)
492 -- handle negative numbers.
493 | otherwise -> split' x t
494 Tip y
495 | x>y -> (t,Nil)
496 | x<y -> (Nil,t)
497 | otherwise -> (Nil,Nil)
498 Nil -> (Nil, Nil)
499
500 split' :: Int -> IntSet -> (IntSet,IntSet)
501 split' x t
502 = case t of
503 Bin p m l r
504 | match x p m -> if zero x m then let (lt,gt) = split' x l in (lt,union gt r)
505 else let (lt,gt) = split' x r in (union l lt,gt)
506 | otherwise -> if x < p then (Nil, t)
507 else (t, Nil)
508 Tip y
509 | x>y -> (t,Nil)
510 | x<y -> (Nil,t)
511 | otherwise -> (Nil,Nil)
512 Nil -> (Nil,Nil)
513
514 -- | /O(min(n,W))/. Performs a 'split' but also returns whether the pivot
515 -- element was found in the original set.
516 splitMember :: Int -> IntSet -> (IntSet,Bool,IntSet)
517 splitMember x t
518 = case t of
519 Bin _ m l r
520 | m < 0 -> if x >= 0 then let (lt,found,gt) = splitMember' x l in (union r lt, found, gt)
521 else let (lt,found,gt) = splitMember' x r in (lt, found, union gt l)
522 -- handle negative numbers.
523 | otherwise -> splitMember' x t
524 Tip y
525 | x>y -> (t,False,Nil)
526 | x<y -> (Nil,False,t)
527 | otherwise -> (Nil,True,Nil)
528 Nil -> (Nil,False,Nil)
529
530 splitMember' :: Int -> IntSet -> (IntSet,Bool,IntSet)
531 splitMember' x t
532 = case t of
533 Bin p m l r
534 | match x p m -> if zero x m then let (lt,found,gt) = splitMember x l in (lt,found,union gt r)
535 else let (lt,found,gt) = splitMember x r in (union l lt,found,gt)
536 | otherwise -> if x < p then (Nil, False, t)
537 else (t, False, Nil)
538 Tip y
539 | x>y -> (t,False,Nil)
540 | x<y -> (Nil,False,t)
541 | otherwise -> (Nil,True,Nil)
542 Nil -> (Nil,False,Nil)
543
544 {----------------------------------------------------------------------
545 Min/Max
546 ----------------------------------------------------------------------}
547
548 -- | /O(min(n,W))/. Retrieves the maximal key of the set, and the set
549 -- stripped of that element, or 'Nothing' if passed an empty set.
550 maxView :: IntSet -> Maybe (Int, IntSet)
551 maxView t
552 = case t of
553 Bin p m l r | m < 0 -> let (result,t') = maxViewUnsigned l in Just (result, bin p m t' r)
554 Bin p m l r -> let (result,t') = maxViewUnsigned r in Just (result, bin p m l t')
555 Tip y -> Just (y,Nil)
556 Nil -> Nothing
557
558 maxViewUnsigned :: IntSet -> (Int, IntSet)
559 maxViewUnsigned t
560 = case t of
561 Bin p m l r -> let (result,t') = maxViewUnsigned r in (result, bin p m l t')
562 Tip y -> (y, Nil)
563 Nil -> error "maxViewUnsigned Nil"
564
565 -- | /O(min(n,W))/. Retrieves the minimal key of the set, and the set
566 -- stripped of that element, or 'Nothing' if passed an empty set.
567 minView :: IntSet -> Maybe (Int, IntSet)
568 minView t
569 = case t of
570 Bin p m l r | m < 0 -> let (result,t') = minViewUnsigned r in Just (result, bin p m l t')
571 Bin p m l r -> let (result,t') = minViewUnsigned l in Just (result, bin p m t' r)
572 Tip y -> Just (y, Nil)
573 Nil -> Nothing
574
575 minViewUnsigned :: IntSet -> (Int, IntSet)
576 minViewUnsigned t
577 = case t of
578 Bin p m l r -> let (result,t') = minViewUnsigned l in (result, bin p m t' r)
579 Tip y -> (y, Nil)
580 Nil -> error "minViewUnsigned Nil"
581
582 -- | /O(min(n,W))/. Delete and find the minimal element.
583 --
584 -- > deleteFindMin set = (findMin set, deleteMin set)
585 deleteFindMin :: IntSet -> (Int, IntSet)
586 deleteFindMin = fromMaybe (error "deleteFindMin: empty set has no minimal element") . minView
587
588 -- | /O(min(n,W))/. Delete and find the maximal element.
589 --
590 -- > deleteFindMax set = (findMax set, deleteMax set)
591 deleteFindMax :: IntSet -> (Int, IntSet)
592 deleteFindMax = fromMaybe (error "deleteFindMax: empty set has no maximal element") . maxView
593
594 -- | /O(min(n,W))/. The minimal element of a set.
595 findMin :: IntSet -> Int
596 findMin = maybe (error "findMin: empty set has no minimal element") fst . minView
597
598 -- | /O(min(n,W))/. The maximal element of a set.
599 findMax :: IntSet -> Int
600 findMax = maybe (error "findMax: empty set has no maximal element") fst . maxView
601
602 -- | /O(min(n,W))/. Delete the minimal element.
603 deleteMin :: IntSet -> IntSet
604 deleteMin = maybe (error "deleteMin: empty set has no minimal element") snd . minView
605
606 -- | /O(min(n,W))/. Delete the maximal element.
607 deleteMax :: IntSet -> IntSet
608 deleteMax = maybe (error "deleteMax: empty set has no maximal element") snd . maxView
609
610 {----------------------------------------------------------------------
611 Map
612 ----------------------------------------------------------------------}
613
614 -- | /O(n*min(n,W))/.
615 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
616 --
617 -- It's worth noting that the size of the result may be smaller if,
618 -- for some @(x,y)@, @x \/= y && f x == f y@
619
620 map :: (Int->Int) -> IntSet -> IntSet
621 map f = fromList . List.map f . toList
622
623 {--------------------------------------------------------------------
624 Fold
625 --------------------------------------------------------------------}
626 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
627 --
628 -- > sum set == fold (+) 0 set
629 -- > elems set == fold (:) [] set
630 fold :: (Int -> b -> b) -> b -> IntSet -> b
631 fold f z t
632 = case t of
633 Bin 0 m l r | m < 0 -> foldr f (foldr f z l) r
634 -- put negative numbers before.
635 Bin _ _ _ _ -> foldr f z t
636 Tip x -> f x z
637 Nil -> z
638
639 foldr :: (Int -> b -> b) -> b -> IntSet -> b
640 foldr f z t
641 = case t of
642 Bin _ _ l r -> foldr f (foldr f z r) l
643 Tip x -> f x z
644 Nil -> z
645
646 {--------------------------------------------------------------------
647 List variations
648 --------------------------------------------------------------------}
649 -- | /O(n)/. The elements of a set. (For sets, this is equivalent to toList)
650 elems :: IntSet -> [Int]
651 elems s
652 = toList s
653
654 {--------------------------------------------------------------------
655 Lists
656 --------------------------------------------------------------------}
657 -- | /O(n)/. Convert the set to a list of elements.
658 toList :: IntSet -> [Int]
659 toList t
660 = fold (:) [] t
661
662 -- | /O(n)/. Convert the set to an ascending list of elements.
663 toAscList :: IntSet -> [Int]
664 toAscList t = toList t
665
666 -- | /O(n*min(n,W))/. Create a set from a list of integers.
667 fromList :: [Int] -> IntSet
668 fromList xs
669 = foldlStrict ins empty xs
670 where
671 ins t x = insert x t
672
673 -- | /O(n*min(n,W))/. Build a set from an ascending list of elements.
674 fromAscList :: [Int] -> IntSet
675 fromAscList xs
676 = fromList xs
677
678 -- | /O(n*min(n,W))/. Build a set from an ascending list of distinct elements.
679 fromDistinctAscList :: [Int] -> IntSet
680 fromDistinctAscList xs
681 = fromList xs
682
683
684 {--------------------------------------------------------------------
685 Eq
686 --------------------------------------------------------------------}
687 instance Eq IntSet where
688 t1 == t2 = equal t1 t2
689 t1 /= t2 = nequal t1 t2
690
691 equal :: IntSet -> IntSet -> Bool
692 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
693 = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
694 equal (Tip x) (Tip y)
695 = (x==y)
696 equal Nil Nil = True
697 equal _ _ = False
698
699 nequal :: IntSet -> IntSet -> Bool
700 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
701 = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
702 nequal (Tip x) (Tip y)
703 = (x/=y)
704 nequal Nil Nil = False
705 nequal _ _ = True
706
707 {--------------------------------------------------------------------
708 Ord
709 --------------------------------------------------------------------}
710
711 instance Ord IntSet where
712 compare s1 s2 = compare (toAscList s1) (toAscList s2)
713 -- tentative implementation. See if more efficient exists.
714
715 {--------------------------------------------------------------------
716 Show
717 --------------------------------------------------------------------}
718 instance Show IntSet where
719 showsPrec p xs = showParen (p > 10) $
720 showString "fromList " . shows (toList xs)
721
722 {-
723 XXX unused code
724 showSet :: [Int] -> ShowS
725 showSet []
726 = showString "{}"
727 showSet (x:xs)
728 = showChar '{' . shows x . showTail xs
729 where
730 showTail [] = showChar '}'
731 showTail (x':xs') = showChar ',' . shows x' . showTail xs'
732 -}
733
734 {--------------------------------------------------------------------
735 Read
736 --------------------------------------------------------------------}
737 instance Read IntSet where
738 #ifdef __GLASGOW_HASKELL__
739 readPrec = parens $ prec 10 $ do
740 Ident "fromList" <- lexP
741 xs <- readPrec
742 return (fromList xs)
743
744 readListPrec = readListPrecDefault
745 #else
746 readsPrec p = readParen (p > 10) $ \ r -> do
747 ("fromList",s) <- lex r
748 (xs,t) <- reads s
749 return (fromList xs,t)
750 #endif
751
752 {--------------------------------------------------------------------
753 Typeable
754 --------------------------------------------------------------------}
755
756 #include "Typeable.h"
757 INSTANCE_TYPEABLE0(IntSet,intSetTc,"IntSet")
758
759 {--------------------------------------------------------------------
760 Debugging
761 --------------------------------------------------------------------}
762 -- | /O(n)/. Show the tree that implements the set. The tree is shown
763 -- in a compressed, hanging format.
764 showTree :: IntSet -> String
765 showTree s
766 = showTreeWith True False s
767
768
769 {- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows
770 the tree that implements the set. If @hang@ is
771 'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If
772 @wide@ is 'True', an extra wide version is shown.
773 -}
774 showTreeWith :: Bool -> Bool -> IntSet -> String
775 showTreeWith hang wide t
776 | hang = (showsTreeHang wide [] t) ""
777 | otherwise = (showsTree wide [] [] t) ""
778
779 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
780 showsTree wide lbars rbars t
781 = case t of
782 Bin p m l r
783 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
784 showWide wide rbars .
785 showsBars lbars . showString (showBin p m) . showString "\n" .
786 showWide wide lbars .
787 showsTree wide (withEmpty lbars) (withBar lbars) l
788 Tip x
789 -> showsBars lbars . showString " " . shows x . showString "\n"
790 Nil -> showsBars lbars . showString "|\n"
791
792 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
793 showsTreeHang wide bars t
794 = case t of
795 Bin p m l r
796 -> showsBars bars . showString (showBin p m) . showString "\n" .
797 showWide wide bars .
798 showsTreeHang wide (withBar bars) l .
799 showWide wide bars .
800 showsTreeHang wide (withEmpty bars) r
801 Tip x
802 -> showsBars bars . showString " " . shows x . showString "\n"
803 Nil -> showsBars bars . showString "|\n"
804
805 showBin :: Prefix -> Mask -> String
806 showBin _ _
807 = "*" -- ++ show (p,m)
808
809 showWide :: Bool -> [String] -> String -> String
810 showWide wide bars
811 | wide = showString (concat (reverse bars)) . showString "|\n"
812 | otherwise = id
813
814 showsBars :: [String] -> ShowS
815 showsBars bars
816 = case bars of
817 [] -> id
818 _ -> showString (concat (reverse (tail bars))) . showString node
819
820 node :: String
821 node = "+--"
822
823 withBar, withEmpty :: [String] -> [String]
824 withBar bars = "| ":bars
825 withEmpty bars = " ":bars
826
827
828 {--------------------------------------------------------------------
829 Helpers
830 --------------------------------------------------------------------}
831 {--------------------------------------------------------------------
832 Join
833 --------------------------------------------------------------------}
834 join :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
835 join p1 t1 p2 t2
836 | zero p1 m = Bin p m t1 t2
837 | otherwise = Bin p m t2 t1
838 where
839 m = branchMask p1 p2
840 p = mask p1 m
841
842 {--------------------------------------------------------------------
843 @bin@ assures that we never have empty trees within a tree.
844 --------------------------------------------------------------------}
845 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
846 bin _ _ l Nil = l
847 bin _ _ Nil r = r
848 bin p m l r = Bin p m l r
849
850
851 {--------------------------------------------------------------------
852 Endian independent bit twiddling
853 --------------------------------------------------------------------}
854 zero :: Int -> Mask -> Bool
855 zero i m
856 = (natFromInt i) .&. (natFromInt m) == 0
857
858 nomatch,match :: Int -> Prefix -> Mask -> Bool
859 nomatch i p m
860 = (mask i m) /= p
861
862 match i p m
863 = (mask i m) == p
864
865 -- Suppose a is largest such that 2^a divides 2*m.
866 -- Then mask i m is i with the low a bits zeroed out.
867 mask :: Int -> Mask -> Prefix
868 mask i m
869 = maskW (natFromInt i) (natFromInt m)
870
871 zeroN :: Nat -> Nat -> Bool
872 zeroN i m = (i .&. m) == 0
873
874 {--------------------------------------------------------------------
875 Big endian operations
876 --------------------------------------------------------------------}
877 maskW :: Nat -> Nat -> Prefix
878 maskW i m
879 = intFromNat (i .&. (complement (m-1) `xor` m))
880
881 shorter :: Mask -> Mask -> Bool
882 shorter m1 m2
883 = (natFromInt m1) > (natFromInt m2)
884
885 branchMask :: Prefix -> Prefix -> Mask
886 branchMask p1 p2
887 = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
888
889 {----------------------------------------------------------------------
890 Finding the highest bit (mask) in a word [x] can be done efficiently in
891 three ways:
892 * convert to a floating point value and the mantissa tells us the
893 [log2(x)] that corresponds with the highest bit position. The mantissa
894 is retrieved either via the standard C function [frexp] or by some bit
895 twiddling on IEEE compatible numbers (float). Note that one needs to
896 use at least [double] precision for an accurate mantissa of 32 bit
897 numbers.
898 * use bit twiddling, a logarithmic sequence of bitwise or's and shifts (bit).
899 * use processor specific assembler instruction (asm).
900
901 The most portable way would be [bit], but is it efficient enough?
902 I have measured the cycle counts of the different methods on an AMD
903 Athlon-XP 1800 (~ Pentium III 1.8Ghz) using the RDTSC instruction:
904
905 highestBitMask: method cycles
906 --------------
907 frexp 200
908 float 33
909 bit 11
910 asm 12
911
912 highestBit: method cycles
913 --------------
914 frexp 195
915 float 33
916 bit 11
917 asm 11
918
919 Wow, the bit twiddling is on today's RISC like machines even faster
920 than a single CISC instruction (BSR)!
921 ----------------------------------------------------------------------}
922
923 {----------------------------------------------------------------------
924 [highestBitMask] returns a word where only the highest bit is set.
925 It is found by first setting all bits in lower positions than the
926 highest bit and than taking an exclusive or with the original value.
927 Allthough the function may look expensive, GHC compiles this into
928 excellent C code that subsequently compiled into highly efficient
929 machine code. The algorithm is derived from Jorg Arndt's FXT library.
930 ----------------------------------------------------------------------}
931 highestBitMask :: Nat -> Nat
932 highestBitMask x0
933 = case (x0 .|. shiftRL x0 1) of
934 x1 -> case (x1 .|. shiftRL x1 2) of
935 x2 -> case (x2 .|. shiftRL x2 4) of
936 x3 -> case (x3 .|. shiftRL x3 8) of
937 x4 -> case (x4 .|. shiftRL x4 16) of
938 x5 -> case (x5 .|. shiftRL x5 32) of -- for 64 bit platforms
939 x6 -> (x6 `xor` (shiftRL x6 1))
940
941
942 {--------------------------------------------------------------------
943 Utilities
944 --------------------------------------------------------------------}
945 foldlStrict :: (a -> b -> a) -> a -> [b] -> a
946 foldlStrict f z xs
947 = case xs of
948 [] -> z
949 (x:xx) -> let z' = f z x in seq z' (foldlStrict f z' xx)
950
951
952 {-
953 {--------------------------------------------------------------------
954 Testing
955 --------------------------------------------------------------------}
956 testTree :: [Int] -> IntSet
957 testTree xs = fromList xs
958 test1 = testTree [1..20]
959 test2 = testTree [30,29..10]
960 test3 = testTree [1,4,6,89,2323,53,43,234,5,79,12,9,24,9,8,423,8,42,4,8,9,3]
961
962 {--------------------------------------------------------------------
963 QuickCheck
964 --------------------------------------------------------------------}
965 qcheck prop
966 = check config prop
967 where
968 config = Config
969 { configMaxTest = 500
970 , configMaxFail = 5000
971 , configSize = \n -> (div n 2 + 3)
972 , configEvery = \n args -> let s = show n in s ++ [ '\b' | _ <- s ]
973 }
974
975
976 {--------------------------------------------------------------------
977 Arbitrary, reasonably balanced trees
978 --------------------------------------------------------------------}
979 instance Arbitrary IntSet where
980 arbitrary = do{ xs <- arbitrary
981 ; return (fromList xs)
982 }
983
984
985 {--------------------------------------------------------------------
986 Single, Insert, Delete
987 --------------------------------------------------------------------}
988 prop_Single :: Int -> Bool
989 prop_Single x
990 = (insert x empty == singleton x)
991
992 prop_InsertDelete :: Int -> IntSet -> Property
993 prop_InsertDelete k t
994 = not (member k t) ==> delete k (insert k t) == t
995
996
997 {--------------------------------------------------------------------
998 Union
999 --------------------------------------------------------------------}
1000 prop_UnionInsert :: Int -> IntSet -> Bool
1001 prop_UnionInsert x t
1002 = union t (singleton x) == insert x t
1003
1004 prop_UnionAssoc :: IntSet -> IntSet -> IntSet -> Bool
1005 prop_UnionAssoc t1 t2 t3
1006 = union t1 (union t2 t3) == union (union t1 t2) t3
1007
1008 prop_UnionComm :: IntSet -> IntSet -> Bool
1009 prop_UnionComm t1 t2
1010 = (union t1 t2 == union t2 t1)
1011
1012 prop_Diff :: [Int] -> [Int] -> Bool
1013 prop_Diff xs ys
1014 = toAscList (difference (fromList xs) (fromList ys))
1015 == List.sort ((List.\\) (nub xs) (nub ys))
1016
1017 prop_Int :: [Int] -> [Int] -> Bool
1018 prop_Int xs ys
1019 = toAscList (intersection (fromList xs) (fromList ys))
1020 == List.sort (nub ((List.intersect) (xs) (ys)))
1021
1022 {--------------------------------------------------------------------
1023 Lists
1024 --------------------------------------------------------------------}
1025 prop_Ordered
1026 = forAll (choose (5,100)) $ \n ->
1027 let xs = [0..n::Int]
1028 in fromAscList xs == fromList xs
1029
1030 prop_List :: [Int] -> Bool
1031 prop_List xs
1032 = (sort (nub xs) == toAscList (fromList xs))
1033
1034 {--------------------------------------------------------------------
1035 Bin invariants
1036 --------------------------------------------------------------------}
1037 powersOf2 :: IntSet
1038 powersOf2 = fromList [2^i | i <- [0..63]]
1039
1040 -- Check the invariant that the mask is a power of 2.
1041 prop_MaskPow2 :: IntSet -> Bool
1042 prop_MaskPow2 (Bin _ msk left right) = member msk powersOf2 && prop_MaskPow2 left && prop_MaskPow2 right
1043 prop_MaskPow2 _ = True
1044
1045 -- Check that the prefix satisfies its invariant.
1046 prop_Prefix :: IntSet -> Bool
1047 prop_Prefix s@(Bin prefix msk left right) = all (\elem -> match elem prefix msk) (toList s) && prop_Prefix left && prop_Prefix right
1048 prop_Prefix _ = True
1049
1050 -- Check that the left elements don't have the mask bit set, and the right
1051 -- ones do.
1052 prop_LeftRight :: IntSet -> Bool
1053 prop_LeftRight (Bin _ msk left right) = and [x .&. msk == 0 | x <- toList left] && and [x .&. msk == msk | x <- toList right]
1054 prop_LeftRight _ = True
1055
1056 {--------------------------------------------------------------------
1057 IntSet operations are like Set operations
1058 --------------------------------------------------------------------}
1059 toSet :: IntSet -> Set.Set Int
1060 toSet = Set.fromList . toList
1061
1062 -- Check that IntSet.isProperSubsetOf is the same as Set.isProperSubsetOf.
1063 prop_isProperSubsetOf :: IntSet -> IntSet -> Bool
1064 prop_isProperSubsetOf a b = isProperSubsetOf a b == Set.isProperSubsetOf (toSet a) (toSet b)
1065
1066 -- In the above test, isProperSubsetOf almost always returns False (since a
1067 -- random set is almost never a subset of another random set). So this second
1068 -- test checks the True case.
1069 prop_isProperSubsetOf2 :: IntSet -> IntSet -> Bool
1070 prop_isProperSubsetOf2 a b = isProperSubsetOf a c == (a /= c) where
1071 c = union a b
1072 -}