Add min/max handling operations for IntSet/IntMap
[packages/random.git] / Data / IntSet.hs
1 {-# OPTIONS -cpp -fglasgow-exts #-}
2 -----------------------------------------------------------------------------
3 -- |
4 -- Module : Data.IntSet
5 -- Copyright : (c) Daan Leijen 2002
6 -- License : BSD-style
7 -- Maintainer : libraries@haskell.org
8 -- Stability : provisional
9 -- Portability : portable
10 --
11 -- An efficient implementation of integer sets.
12 --
13 -- Since many function names (but not the type name) clash with
14 -- "Prelude" names, this module is usually imported @qualified@, e.g.
15 --
16 -- > import Data.IntSet (IntSet)
17 -- > import qualified Data.IntSet as IntSet
18 --
19 -- The implementation is based on /big-endian patricia trees/. This data
20 -- structure performs especially well on binary operations like 'union'
21 -- and 'intersection'. However, my benchmarks show that it is also
22 -- (much) faster on insertions and deletions when compared to a generic
23 -- size-balanced set implementation (see "Data.Set").
24 --
25 -- * Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\",
26 -- Workshop on ML, September 1998, pages 77-86,
27 -- <http://www.cse.ogi.edu/~andy/pub/finite.htm>
28 --
29 -- * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
30 -- Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
31 -- October 1968, pages 514-534.
32 --
33 -- Many operations have a worst-case complexity of /O(min(n,W))/.
34 -- This means that the operation can become linear in the number of
35 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
36 -- (32 or 64).
37 -----------------------------------------------------------------------------
38
39 module Data.IntSet (
40 -- * Set type
41 IntSet -- instance Eq,Show
42
43 -- * Operators
44 , (\\)
45
46 -- * Query
47 , null
48 , size
49 , member
50 , notMember
51 , isSubsetOf
52 , isProperSubsetOf
53
54 -- * Construction
55 , empty
56 , singleton
57 , insert
58 , delete
59
60 -- * Combine
61 , union, unions
62 , difference
63 , intersection
64
65 -- * Filter
66 , filter
67 , partition
68 , split
69 , splitMember
70
71 -- * Min\/Max
72 , findMin
73 , findMax
74 , deleteMin
75 , deleteMax
76 , deleteFindMin
77 , deleteFindMax
78 , maxView
79 , minView
80
81 -- * Map
82 , map
83
84 -- * Fold
85 , fold
86
87 -- * Conversion
88 -- ** List
89 , elems
90 , toList
91 , fromList
92
93 -- ** Ordered list
94 , toAscList
95 , fromAscList
96 , fromDistinctAscList
97
98 -- * Debugging
99 , showTree
100 , showTreeWith
101 ) where
102
103
104 import Prelude hiding (lookup,filter,foldr,foldl,null,map)
105 import Data.Bits
106
107 import qualified Data.List as List
108 import Data.Monoid (Monoid(..))
109 import Data.Typeable
110
111 {-
112 -- just for testing
113 import QuickCheck
114 import List (nub,sort)
115 import qualified List
116 -}
117
118 #if __GLASGOW_HASKELL__
119 import Text.Read
120 import Data.Generics.Basics (Data(..), mkNorepType)
121 import Data.Generics.Instances ()
122 #endif
123
124 #if __GLASGOW_HASKELL__ >= 503
125 import GHC.Exts ( Word(..), Int(..), shiftRL# )
126 #elif __GLASGOW_HASKELL__
127 import Word
128 import GlaExts ( Word(..), Int(..), shiftRL# )
129 #else
130 import Data.Word
131 #endif
132
133 infixl 9 \\{-This comment teaches CPP correct behaviour -}
134
135 -- A "Nat" is a natural machine word (an unsigned Int)
136 type Nat = Word
137
138 natFromInt :: Int -> Nat
139 natFromInt i = fromIntegral i
140
141 intFromNat :: Nat -> Int
142 intFromNat w = fromIntegral w
143
144 shiftRL :: Nat -> Int -> Nat
145 #if __GLASGOW_HASKELL__
146 {--------------------------------------------------------------------
147 GHC: use unboxing to get @shiftRL@ inlined.
148 --------------------------------------------------------------------}
149 shiftRL (W# x) (I# i)
150 = W# (shiftRL# x i)
151 #else
152 shiftRL x i = shiftR x i
153 #endif
154
155 {--------------------------------------------------------------------
156 Operators
157 --------------------------------------------------------------------}
158 -- | /O(n+m)/. See 'difference'.
159 (\\) :: IntSet -> IntSet -> IntSet
160 m1 \\ m2 = difference m1 m2
161
162 {--------------------------------------------------------------------
163 Types
164 --------------------------------------------------------------------}
165 -- | A set of integers.
166 data IntSet = Nil
167 | Tip {-# UNPACK #-} !Int
168 | Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
169 -- Invariant: Nil is never found as a child of Bin.
170
171
172 type Prefix = Int
173 type Mask = Int
174
175 instance Monoid IntSet where
176 mempty = empty
177 mappend = union
178 mconcat = unions
179
180 #if __GLASGOW_HASKELL__
181
182 {--------------------------------------------------------------------
183 A Data instance
184 --------------------------------------------------------------------}
185
186 -- This instance preserves data abstraction at the cost of inefficiency.
187 -- We omit reflection services for the sake of data abstraction.
188
189 instance Data IntSet where
190 gfoldl f z is = z fromList `f` (toList is)
191 toConstr _ = error "toConstr"
192 gunfold _ _ = error "gunfold"
193 dataTypeOf _ = mkNorepType "Data.IntSet.IntSet"
194
195 #endif
196
197 {--------------------------------------------------------------------
198 Query
199 --------------------------------------------------------------------}
200 -- | /O(1)/. Is the set empty?
201 null :: IntSet -> Bool
202 null Nil = True
203 null other = False
204
205 -- | /O(n)/. Cardinality of the set.
206 size :: IntSet -> Int
207 size t
208 = case t of
209 Bin p m l r -> size l + size r
210 Tip y -> 1
211 Nil -> 0
212
213 -- | /O(min(n,W))/. Is the value a member of the set?
214 member :: Int -> IntSet -> Bool
215 member x t
216 = case t of
217 Bin p m l r
218 | nomatch x p m -> False
219 | zero x m -> member x l
220 | otherwise -> member x r
221 Tip y -> (x==y)
222 Nil -> False
223
224 -- | /O(min(n,W))/. Is the element not in the set?
225 notMember :: Int -> IntSet -> Bool
226 notMember k = not . member k
227
228 -- 'lookup' is used by 'intersection' for left-biasing
229 lookup :: Int -> IntSet -> Maybe Int
230 lookup k t
231 = let nk = natFromInt k in seq nk (lookupN nk t)
232
233 lookupN :: Nat -> IntSet -> Maybe Int
234 lookupN k t
235 = case t of
236 Bin p m l r
237 | zeroN k (natFromInt m) -> lookupN k l
238 | otherwise -> lookupN k r
239 Tip kx
240 | (k == natFromInt kx) -> Just kx
241 | otherwise -> Nothing
242 Nil -> Nothing
243
244 {--------------------------------------------------------------------
245 Construction
246 --------------------------------------------------------------------}
247 -- | /O(1)/. The empty set.
248 empty :: IntSet
249 empty
250 = Nil
251
252 -- | /O(1)/. A set of one element.
253 singleton :: Int -> IntSet
254 singleton x
255 = Tip x
256
257 {--------------------------------------------------------------------
258 Insert
259 --------------------------------------------------------------------}
260 -- | /O(min(n,W))/. Add a value to the set. When the value is already
261 -- an element of the set, it is replaced by the new one, ie. 'insert'
262 -- is left-biased.
263 insert :: Int -> IntSet -> IntSet
264 insert x t
265 = case t of
266 Bin p m l r
267 | nomatch x p m -> join x (Tip x) p t
268 | zero x m -> Bin p m (insert x l) r
269 | otherwise -> Bin p m l (insert x r)
270 Tip y
271 | x==y -> Tip x
272 | otherwise -> join x (Tip x) y t
273 Nil -> Tip x
274
275 -- right-biased insertion, used by 'union'
276 insertR :: Int -> IntSet -> IntSet
277 insertR x t
278 = case t of
279 Bin p m l r
280 | nomatch x p m -> join x (Tip x) p t
281 | zero x m -> Bin p m (insert x l) r
282 | otherwise -> Bin p m l (insert x r)
283 Tip y
284 | x==y -> t
285 | otherwise -> join x (Tip x) y t
286 Nil -> Tip x
287
288 -- | /O(min(n,W))/. Delete a value in the set. Returns the
289 -- original set when the value was not present.
290 delete :: Int -> IntSet -> IntSet
291 delete x t
292 = case t of
293 Bin p m l r
294 | nomatch x p m -> t
295 | zero x m -> bin p m (delete x l) r
296 | otherwise -> bin p m l (delete x r)
297 Tip y
298 | x==y -> Nil
299 | otherwise -> t
300 Nil -> Nil
301
302
303 {--------------------------------------------------------------------
304 Union
305 --------------------------------------------------------------------}
306 -- | The union of a list of sets.
307 unions :: [IntSet] -> IntSet
308 unions xs
309 = foldlStrict union empty xs
310
311
312 -- | /O(n+m)/. The union of two sets.
313 union :: IntSet -> IntSet -> IntSet
314 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
315 | shorter m1 m2 = union1
316 | shorter m2 m1 = union2
317 | p1 == p2 = Bin p1 m1 (union l1 l2) (union r1 r2)
318 | otherwise = join p1 t1 p2 t2
319 where
320 union1 | nomatch p2 p1 m1 = join p1 t1 p2 t2
321 | zero p2 m1 = Bin p1 m1 (union l1 t2) r1
322 | otherwise = Bin p1 m1 l1 (union r1 t2)
323
324 union2 | nomatch p1 p2 m2 = join p1 t1 p2 t2
325 | zero p1 m2 = Bin p2 m2 (union t1 l2) r2
326 | otherwise = Bin p2 m2 l2 (union t1 r2)
327
328 union (Tip x) t = insert x t
329 union t (Tip x) = insertR x t -- right bias
330 union Nil t = t
331 union t Nil = t
332
333
334 {--------------------------------------------------------------------
335 Difference
336 --------------------------------------------------------------------}
337 -- | /O(n+m)/. Difference between two sets.
338 difference :: IntSet -> IntSet -> IntSet
339 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
340 | shorter m1 m2 = difference1
341 | shorter m2 m1 = difference2
342 | p1 == p2 = bin p1 m1 (difference l1 l2) (difference r1 r2)
343 | otherwise = t1
344 where
345 difference1 | nomatch p2 p1 m1 = t1
346 | zero p2 m1 = bin p1 m1 (difference l1 t2) r1
347 | otherwise = bin p1 m1 l1 (difference r1 t2)
348
349 difference2 | nomatch p1 p2 m2 = t1
350 | zero p1 m2 = difference t1 l2
351 | otherwise = difference t1 r2
352
353 difference t1@(Tip x) t2
354 | member x t2 = Nil
355 | otherwise = t1
356
357 difference Nil t = Nil
358 difference t (Tip x) = delete x t
359 difference t Nil = t
360
361
362
363 {--------------------------------------------------------------------
364 Intersection
365 --------------------------------------------------------------------}
366 -- | /O(n+m)/. The intersection of two sets.
367 intersection :: IntSet -> IntSet -> IntSet
368 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
369 | shorter m1 m2 = intersection1
370 | shorter m2 m1 = intersection2
371 | p1 == p2 = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
372 | otherwise = Nil
373 where
374 intersection1 | nomatch p2 p1 m1 = Nil
375 | zero p2 m1 = intersection l1 t2
376 | otherwise = intersection r1 t2
377
378 intersection2 | nomatch p1 p2 m2 = Nil
379 | zero p1 m2 = intersection t1 l2
380 | otherwise = intersection t1 r2
381
382 intersection t1@(Tip x) t2
383 | member x t2 = t1
384 | otherwise = Nil
385 intersection t (Tip x)
386 = case lookup x t of
387 Just y -> Tip y
388 Nothing -> Nil
389 intersection Nil t = Nil
390 intersection t Nil = Nil
391
392
393
394 {--------------------------------------------------------------------
395 Subset
396 --------------------------------------------------------------------}
397 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
398 isProperSubsetOf :: IntSet -> IntSet -> Bool
399 isProperSubsetOf t1 t2
400 = case subsetCmp t1 t2 of
401 LT -> True
402 ge -> False
403
404 subsetCmp t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
405 | shorter m1 m2 = GT
406 | shorter m2 m1 = subsetCmpLt
407 | p1 == p2 = subsetCmpEq
408 | otherwise = GT -- disjoint
409 where
410 subsetCmpLt | nomatch p1 p2 m2 = GT
411 | zero p1 m2 = subsetCmp t1 l2
412 | otherwise = subsetCmp t1 r2
413 subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
414 (GT,_ ) -> GT
415 (_ ,GT) -> GT
416 (EQ,EQ) -> EQ
417 other -> LT
418
419 subsetCmp (Bin p m l r) t = GT
420 subsetCmp (Tip x) (Tip y)
421 | x==y = EQ
422 | otherwise = GT -- disjoint
423 subsetCmp (Tip x) t
424 | member x t = LT
425 | otherwise = GT -- disjoint
426 subsetCmp Nil Nil = EQ
427 subsetCmp Nil t = LT
428
429 -- | /O(n+m)/. Is this a subset?
430 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
431
432 isSubsetOf :: IntSet -> IntSet -> Bool
433 isSubsetOf t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
434 | shorter m1 m2 = False
435 | shorter m2 m1 = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
436 else isSubsetOf t1 r2)
437 | otherwise = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
438 isSubsetOf (Bin p m l r) t = False
439 isSubsetOf (Tip x) t = member x t
440 isSubsetOf Nil t = True
441
442
443 {--------------------------------------------------------------------
444 Filter
445 --------------------------------------------------------------------}
446 -- | /O(n)/. Filter all elements that satisfy some predicate.
447 filter :: (Int -> Bool) -> IntSet -> IntSet
448 filter pred t
449 = case t of
450 Bin p m l r
451 -> bin p m (filter pred l) (filter pred r)
452 Tip x
453 | pred x -> t
454 | otherwise -> Nil
455 Nil -> Nil
456
457 -- | /O(n)/. partition the set according to some predicate.
458 partition :: (Int -> Bool) -> IntSet -> (IntSet,IntSet)
459 partition pred t
460 = case t of
461 Bin p m l r
462 -> let (l1,l2) = partition pred l
463 (r1,r2) = partition pred r
464 in (bin p m l1 r1, bin p m l2 r2)
465 Tip x
466 | pred x -> (t,Nil)
467 | otherwise -> (Nil,t)
468 Nil -> (Nil,Nil)
469
470
471 -- | /O(min(n,W))/. The expression (@'split' x set@) is a pair @(set1,set2)@
472 -- where all elements in @set1@ are lower than @x@ and all elements in
473 -- @set2@ larger than @x@.
474 --
475 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [3,4])
476 split :: Int -> IntSet -> (IntSet,IntSet)
477 split x t
478 = case t of
479 Bin p m l r
480 | m < 0 -> if x >= 0 then let (lt,gt) = split' x l in (union r lt, gt)
481 else let (lt,gt) = split' x r in (lt, union gt l)
482 -- handle negative numbers.
483 | otherwise -> split' x t
484 Tip y
485 | x>y -> (t,Nil)
486 | x<y -> (Nil,t)
487 | otherwise -> (Nil,Nil)
488 Nil -> (Nil, Nil)
489
490 split' :: Int -> IntSet -> (IntSet,IntSet)
491 split' x t
492 = case t of
493 Bin p m l r
494 | match x p m -> if zero x m then let (lt,gt) = split' x l in (lt,union gt r)
495 else let (lt,gt) = split' x r in (union l lt,gt)
496 | otherwise -> if x < p then (Nil, t)
497 else (t, Nil)
498 Tip y
499 | x>y -> (t,Nil)
500 | x<y -> (Nil,t)
501 | otherwise -> (Nil,Nil)
502 Nil -> (Nil,Nil)
503
504 -- | /O(min(n,W))/. Performs a 'split' but also returns whether the pivot
505 -- element was found in the original set.
506 splitMember :: Int -> IntSet -> (IntSet,Bool,IntSet)
507 splitMember x t
508 = case t of
509 Bin p m l r
510 | m < 0 -> if x >= 0 then let (lt,found,gt) = splitMember' x l in (union r lt, found, gt)
511 else let (lt,found,gt) = splitMember' x r in (lt, found, union gt l)
512 -- handle negative numbers.
513 | otherwise -> splitMember' x t
514 Tip y
515 | x>y -> (t,False,Nil)
516 | x<y -> (Nil,False,t)
517 | otherwise -> (Nil,True,Nil)
518 Nil -> (Nil,False,Nil)
519
520 splitMember' :: Int -> IntSet -> (IntSet,Bool,IntSet)
521 splitMember' x t
522 = case t of
523 Bin p m l r
524 | match x p m -> if zero x m then let (lt,found,gt) = splitMember x l in (lt,found,union gt r)
525 else let (lt,found,gt) = splitMember x r in (union l lt,found,gt)
526 | otherwise -> if x < p then (Nil, False, t)
527 else (t, False, Nil)
528 Tip y
529 | x>y -> (t,False,Nil)
530 | x<y -> (Nil,False,t)
531 | otherwise -> (Nil,True,Nil)
532 Nil -> (Nil,False,Nil)
533
534 {----------------------------------------------------------------------
535 Min/Max
536 ----------------------------------------------------------------------}
537
538 -- | /O(min(n,W))/. Retrieves the maximal key of the set, and the set stripped from that element
539 -- @fail@s (in the monad) when passed an empty set.
540 maxView :: (Monad m) => IntSet -> m (Int, IntSet)
541 maxView t
542 = case t of
543 Bin p m l r | m < 0 -> let (result,t') = maxViewUnsigned l in return (result, bin p m t' r)
544 Bin p m l r -> let (result,t') = maxViewUnsigned r in return (result, bin p m l t')
545 Tip y -> return (y,Nil)
546 Nil -> fail "maxView: empty set has no maximal element"
547
548 maxViewUnsigned :: IntSet -> (Int, IntSet)
549 maxViewUnsigned t
550 = case t of
551 Bin p m l r -> let (result,t') = maxViewUnsigned r in (result, bin p m l t')
552 Tip y -> (y, Nil)
553
554 -- | /O(min(n,W))/. Retrieves the minimal key of the set, and the set stripped from that element
555 -- @fail@s (in the monad) when passed an empty set.
556 minView :: (Monad m) => IntSet -> m (Int, IntSet)
557 minView t
558 = case t of
559 Bin p m l r | m < 0 -> let (result,t') = minViewUnsigned r in return (result, bin p m l t')
560 Bin p m l r -> let (result,t') = minViewUnsigned l in return (result, bin p m t' r)
561 Tip y -> return (y, Nil)
562 Nil -> fail "minView: empty set has no minimal element"
563
564 minViewUnsigned :: IntSet -> (Int, IntSet)
565 minViewUnsigned t
566 = case t of
567 Bin p m l r -> let (result,t') = minViewUnsigned l in (result, bin p m t' r)
568 Tip y -> (y, Nil)
569
570
571 -- Duplicate the Identity monad here because base < mtl.
572 newtype Identity a = Identity { runIdentity :: a }
573 instance Monad Identity where
574 return a = Identity a
575 m >>= k = k (runIdentity m)
576
577
578 -- | /O(min(n,W))/. Delete and find the minimal element.
579 --
580 -- > deleteFindMin set = (findMin set, deleteMin set)
581 deleteFindMin :: IntSet -> (Int, IntSet)
582 deleteFindMin = runIdentity . minView
583
584 -- | /O(min(n,W))/. Delete and find the maximal element.
585 --
586 -- > deleteFindMax set = (findMax set, deleteMax set)
587 deleteFindMax :: IntSet -> (Int, IntSet)
588 deleteFindMax = runIdentity . maxView
589
590 -- | /O(min(n,W))/. The minimal element of a set.
591 findMin :: IntSet -> Int
592 findMin = fst . runIdentity . minView
593
594 -- | /O(min(n,W))/. The maximal element of a set.
595 findMax :: IntSet -> Int
596 findMax = fst . runIdentity . maxView
597
598 -- | /O(min(n,W))/. Delete the minimal element.
599 deleteMin :: IntSet -> IntSet
600 deleteMin = snd . runIdentity . minView
601
602 -- | /O(min(n,W))/. Delete the maximal element.
603 deleteMax :: IntSet -> IntSet
604 deleteMax = snd . runIdentity . maxView
605
606
607
608 {----------------------------------------------------------------------
609 Map
610 ----------------------------------------------------------------------}
611
612 -- | /O(n*min(n,W))/.
613 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
614 --
615 -- It's worth noting that the size of the result may be smaller if,
616 -- for some @(x,y)@, @x \/= y && f x == f y@
617
618 map :: (Int->Int) -> IntSet -> IntSet
619 map f = fromList . List.map f . toList
620
621 {--------------------------------------------------------------------
622 Fold
623 --------------------------------------------------------------------}
624 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
625 --
626 -- > sum set == fold (+) 0 set
627 -- > elems set == fold (:) [] set
628 fold :: (Int -> b -> b) -> b -> IntSet -> b
629 fold f z t
630 = case t of
631 Bin 0 m l r | m < 0 -> foldr f (foldr f z l) r
632 -- put negative numbers before.
633 Bin p m l r -> foldr f z t
634 Tip x -> f x z
635 Nil -> z
636
637 foldr :: (Int -> b -> b) -> b -> IntSet -> b
638 foldr f z t
639 = case t of
640 Bin p m l r -> foldr f (foldr f z r) l
641 Tip x -> f x z
642 Nil -> z
643
644 {--------------------------------------------------------------------
645 List variations
646 --------------------------------------------------------------------}
647 -- | /O(n)/. The elements of a set. (For sets, this is equivalent to toList)
648 elems :: IntSet -> [Int]
649 elems s
650 = toList s
651
652 {--------------------------------------------------------------------
653 Lists
654 --------------------------------------------------------------------}
655 -- | /O(n)/. Convert the set to a list of elements.
656 toList :: IntSet -> [Int]
657 toList t
658 = fold (:) [] t
659
660 -- | /O(n)/. Convert the set to an ascending list of elements.
661 toAscList :: IntSet -> [Int]
662 toAscList t = toList t
663
664 -- | /O(n*min(n,W))/. Create a set from a list of integers.
665 fromList :: [Int] -> IntSet
666 fromList xs
667 = foldlStrict ins empty xs
668 where
669 ins t x = insert x t
670
671 -- | /O(n*min(n,W))/. Build a set from an ascending list of elements.
672 fromAscList :: [Int] -> IntSet
673 fromAscList xs
674 = fromList xs
675
676 -- | /O(n*min(n,W))/. Build a set from an ascending list of distinct elements.
677 fromDistinctAscList :: [Int] -> IntSet
678 fromDistinctAscList xs
679 = fromList xs
680
681
682 {--------------------------------------------------------------------
683 Eq
684 --------------------------------------------------------------------}
685 instance Eq IntSet where
686 t1 == t2 = equal t1 t2
687 t1 /= t2 = nequal t1 t2
688
689 equal :: IntSet -> IntSet -> Bool
690 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
691 = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
692 equal (Tip x) (Tip y)
693 = (x==y)
694 equal Nil Nil = True
695 equal t1 t2 = False
696
697 nequal :: IntSet -> IntSet -> Bool
698 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
699 = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
700 nequal (Tip x) (Tip y)
701 = (x/=y)
702 nequal Nil Nil = False
703 nequal t1 t2 = True
704
705 {--------------------------------------------------------------------
706 Ord
707 --------------------------------------------------------------------}
708
709 instance Ord IntSet where
710 compare s1 s2 = compare (toAscList s1) (toAscList s2)
711 -- tentative implementation. See if more efficient exists.
712
713 {--------------------------------------------------------------------
714 Show
715 --------------------------------------------------------------------}
716 instance Show IntSet where
717 showsPrec p xs = showParen (p > 10) $
718 showString "fromList " . shows (toList xs)
719
720 showSet :: [Int] -> ShowS
721 showSet []
722 = showString "{}"
723 showSet (x:xs)
724 = showChar '{' . shows x . showTail xs
725 where
726 showTail [] = showChar '}'
727 showTail (x:xs) = showChar ',' . shows x . showTail xs
728
729 {--------------------------------------------------------------------
730 Read
731 --------------------------------------------------------------------}
732 instance Read IntSet where
733 #ifdef __GLASGOW_HASKELL__
734 readPrec = parens $ prec 10 $ do
735 Ident "fromList" <- lexP
736 xs <- readPrec
737 return (fromList xs)
738
739 readListPrec = readListPrecDefault
740 #else
741 readsPrec p = readParen (p > 10) $ \ r -> do
742 ("fromList",s) <- lex r
743 (xs,t) <- reads s
744 return (fromList xs,t)
745 #endif
746
747 {--------------------------------------------------------------------
748 Typeable
749 --------------------------------------------------------------------}
750
751 #include "Typeable.h"
752 INSTANCE_TYPEABLE0(IntSet,intSetTc,"IntSet")
753
754 {--------------------------------------------------------------------
755 Debugging
756 --------------------------------------------------------------------}
757 -- | /O(n)/. Show the tree that implements the set. The tree is shown
758 -- in a compressed, hanging format.
759 showTree :: IntSet -> String
760 showTree s
761 = showTreeWith True False s
762
763
764 {- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows
765 the tree that implements the set. If @hang@ is
766 'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If
767 @wide@ is 'True', an extra wide version is shown.
768 -}
769 showTreeWith :: Bool -> Bool -> IntSet -> String
770 showTreeWith hang wide t
771 | hang = (showsTreeHang wide [] t) ""
772 | otherwise = (showsTree wide [] [] t) ""
773
774 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
775 showsTree wide lbars rbars t
776 = case t of
777 Bin p m l r
778 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
779 showWide wide rbars .
780 showsBars lbars . showString (showBin p m) . showString "\n" .
781 showWide wide lbars .
782 showsTree wide (withEmpty lbars) (withBar lbars) l
783 Tip x
784 -> showsBars lbars . showString " " . shows x . showString "\n"
785 Nil -> showsBars lbars . showString "|\n"
786
787 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
788 showsTreeHang wide bars t
789 = case t of
790 Bin p m l r
791 -> showsBars bars . showString (showBin p m) . showString "\n" .
792 showWide wide bars .
793 showsTreeHang wide (withBar bars) l .
794 showWide wide bars .
795 showsTreeHang wide (withEmpty bars) r
796 Tip x
797 -> showsBars bars . showString " " . shows x . showString "\n"
798 Nil -> showsBars bars . showString "|\n"
799
800 showBin p m
801 = "*" -- ++ show (p,m)
802
803 showWide wide bars
804 | wide = showString (concat (reverse bars)) . showString "|\n"
805 | otherwise = id
806
807 showsBars :: [String] -> ShowS
808 showsBars bars
809 = case bars of
810 [] -> id
811 _ -> showString (concat (reverse (tail bars))) . showString node
812
813 node = "+--"
814 withBar bars = "| ":bars
815 withEmpty bars = " ":bars
816
817
818 {--------------------------------------------------------------------
819 Helpers
820 --------------------------------------------------------------------}
821 {--------------------------------------------------------------------
822 Join
823 --------------------------------------------------------------------}
824 join :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
825 join p1 t1 p2 t2
826 | zero p1 m = Bin p m t1 t2
827 | otherwise = Bin p m t2 t1
828 where
829 m = branchMask p1 p2
830 p = mask p1 m
831
832 {--------------------------------------------------------------------
833 @bin@ assures that we never have empty trees within a tree.
834 --------------------------------------------------------------------}
835 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
836 bin p m l Nil = l
837 bin p m Nil r = r
838 bin p m l r = Bin p m l r
839
840
841 {--------------------------------------------------------------------
842 Endian independent bit twiddling
843 --------------------------------------------------------------------}
844 zero :: Int -> Mask -> Bool
845 zero i m
846 = (natFromInt i) .&. (natFromInt m) == 0
847
848 nomatch,match :: Int -> Prefix -> Mask -> Bool
849 nomatch i p m
850 = (mask i m) /= p
851
852 match i p m
853 = (mask i m) == p
854
855 mask :: Int -> Mask -> Prefix
856 mask i m
857 = maskW (natFromInt i) (natFromInt m)
858
859 zeroN :: Nat -> Nat -> Bool
860 zeroN i m = (i .&. m) == 0
861
862 {--------------------------------------------------------------------
863 Big endian operations
864 --------------------------------------------------------------------}
865 maskW :: Nat -> Nat -> Prefix
866 maskW i m
867 = intFromNat (i .&. (complement (m-1) `xor` m))
868
869 shorter :: Mask -> Mask -> Bool
870 shorter m1 m2
871 = (natFromInt m1) > (natFromInt m2)
872
873 branchMask :: Prefix -> Prefix -> Mask
874 branchMask p1 p2
875 = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
876
877 {----------------------------------------------------------------------
878 Finding the highest bit (mask) in a word [x] can be done efficiently in
879 three ways:
880 * convert to a floating point value and the mantissa tells us the
881 [log2(x)] that corresponds with the highest bit position. The mantissa
882 is retrieved either via the standard C function [frexp] or by some bit
883 twiddling on IEEE compatible numbers (float). Note that one needs to
884 use at least [double] precision for an accurate mantissa of 32 bit
885 numbers.
886 * use bit twiddling, a logarithmic sequence of bitwise or's and shifts (bit).
887 * use processor specific assembler instruction (asm).
888
889 The most portable way would be [bit], but is it efficient enough?
890 I have measured the cycle counts of the different methods on an AMD
891 Athlon-XP 1800 (~ Pentium III 1.8Ghz) using the RDTSC instruction:
892
893 highestBitMask: method cycles
894 --------------
895 frexp 200
896 float 33
897 bit 11
898 asm 12
899
900 highestBit: method cycles
901 --------------
902 frexp 195
903 float 33
904 bit 11
905 asm 11
906
907 Wow, the bit twiddling is on today's RISC like machines even faster
908 than a single CISC instruction (BSR)!
909 ----------------------------------------------------------------------}
910
911 {----------------------------------------------------------------------
912 [highestBitMask] returns a word where only the highest bit is set.
913 It is found by first setting all bits in lower positions than the
914 highest bit and than taking an exclusive or with the original value.
915 Allthough the function may look expensive, GHC compiles this into
916 excellent C code that subsequently compiled into highly efficient
917 machine code. The algorithm is derived from Jorg Arndt's FXT library.
918 ----------------------------------------------------------------------}
919 highestBitMask :: Nat -> Nat
920 highestBitMask x
921 = case (x .|. shiftRL x 1) of
922 x -> case (x .|. shiftRL x 2) of
923 x -> case (x .|. shiftRL x 4) of
924 x -> case (x .|. shiftRL x 8) of
925 x -> case (x .|. shiftRL x 16) of
926 x -> case (x .|. shiftRL x 32) of -- for 64 bit platforms
927 x -> (x `xor` (shiftRL x 1))
928
929
930 {--------------------------------------------------------------------
931 Utilities
932 --------------------------------------------------------------------}
933 foldlStrict f z xs
934 = case xs of
935 [] -> z
936 (x:xx) -> let z' = f z x in seq z' (foldlStrict f z' xx)
937
938
939 {-
940 {--------------------------------------------------------------------
941 Testing
942 --------------------------------------------------------------------}
943 testTree :: [Int] -> IntSet
944 testTree xs = fromList xs
945 test1 = testTree [1..20]
946 test2 = testTree [30,29..10]
947 test3 = testTree [1,4,6,89,2323,53,43,234,5,79,12,9,24,9,8,423,8,42,4,8,9,3]
948
949 {--------------------------------------------------------------------
950 QuickCheck
951 --------------------------------------------------------------------}
952 qcheck prop
953 = check config prop
954 where
955 config = Config
956 { configMaxTest = 500
957 , configMaxFail = 5000
958 , configSize = \n -> (div n 2 + 3)
959 , configEvery = \n args -> let s = show n in s ++ [ '\b' | _ <- s ]
960 }
961
962
963 {--------------------------------------------------------------------
964 Arbitrary, reasonably balanced trees
965 --------------------------------------------------------------------}
966 instance Arbitrary IntSet where
967 arbitrary = do{ xs <- arbitrary
968 ; return (fromList xs)
969 }
970
971
972 {--------------------------------------------------------------------
973 Single, Insert, Delete
974 --------------------------------------------------------------------}
975 prop_Single :: Int -> Bool
976 prop_Single x
977 = (insert x empty == singleton x)
978
979 prop_InsertDelete :: Int -> IntSet -> Property
980 prop_InsertDelete k t
981 = not (member k t) ==> delete k (insert k t) == t
982
983
984 {--------------------------------------------------------------------
985 Union
986 --------------------------------------------------------------------}
987 prop_UnionInsert :: Int -> IntSet -> Bool
988 prop_UnionInsert x t
989 = union t (singleton x) == insert x t
990
991 prop_UnionAssoc :: IntSet -> IntSet -> IntSet -> Bool
992 prop_UnionAssoc t1 t2 t3
993 = union t1 (union t2 t3) == union (union t1 t2) t3
994
995 prop_UnionComm :: IntSet -> IntSet -> Bool
996 prop_UnionComm t1 t2
997 = (union t1 t2 == union t2 t1)
998
999 prop_Diff :: [Int] -> [Int] -> Bool
1000 prop_Diff xs ys
1001 = toAscList (difference (fromList xs) (fromList ys))
1002 == List.sort ((List.\\) (nub xs) (nub ys))
1003
1004 prop_Int :: [Int] -> [Int] -> Bool
1005 prop_Int xs ys
1006 = toAscList (intersection (fromList xs) (fromList ys))
1007 == List.sort (nub ((List.intersect) (xs) (ys)))
1008
1009 {--------------------------------------------------------------------
1010 Lists
1011 --------------------------------------------------------------------}
1012 prop_Ordered
1013 = forAll (choose (5,100)) $ \n ->
1014 let xs = [0..n::Int]
1015 in fromAscList xs == fromList xs
1016
1017 prop_List :: [Int] -> Bool
1018 prop_List xs
1019 = (sort (nub xs) == toAscList (fromList xs))
1020 -}