O(n) fromAscList IntSet / IntMap
[packages/containers.git] / Data / IntSet.hs
1 {-# OPTIONS -cpp #-}
2 -----------------------------------------------------------------------------
3 -- |
4 -- Module : Data.IntSet
5 -- Copyright : (c) Daan Leijen 2002
6 -- License : BSD-style
7 -- Maintainer : libraries@haskell.org
8 -- Stability : provisional
9 -- Portability : portable
10 --
11 -- An efficient implementation of integer sets.
12 --
13 -- Since many function names (but not the type name) clash with
14 -- "Prelude" names, this module is usually imported @qualified@, e.g.
15 --
16 -- > import Data.IntSet (IntSet)
17 -- > import qualified Data.IntSet as IntSet
18 --
19 -- The implementation is based on /big-endian patricia trees/. This data
20 -- structure performs especially well on binary operations like 'union'
21 -- and 'intersection'. However, my benchmarks show that it is also
22 -- (much) faster on insertions and deletions when compared to a generic
23 -- size-balanced set implementation (see "Data.Set").
24 --
25 -- * Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\",
26 -- Workshop on ML, September 1998, pages 77-86,
27 -- <http://citeseer.ist.psu.edu/okasaki98fast.html>
28 --
29 -- * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
30 -- Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
31 -- October 1968, pages 514-534.
32 --
33 -- Many operations have a worst-case complexity of /O(min(n,W))/.
34 -- This means that the operation can become linear in the number of
35 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
36 -- (32 or 64).
37 -----------------------------------------------------------------------------
38
39 module Data.IntSet (
40 -- * Set type
41 IntSet -- instance Eq,Show
42
43 -- * Operators
44 , (\\)
45
46 -- * Query
47 , null
48 , size
49 , member
50 , notMember
51 , isSubsetOf
52 , isProperSubsetOf
53
54 -- * Construction
55 , empty
56 , singleton
57 , insert
58 , delete
59
60 -- * Combine
61 , union, unions
62 , difference
63 , intersection
64
65 -- * Filter
66 , filter
67 , partition
68 , split
69 , splitMember
70
71 -- * Min\/Max
72 , findMin
73 , findMax
74 , deleteMin
75 , deleteMax
76 , deleteFindMin
77 , deleteFindMax
78 , maxView
79 , minView
80
81 -- * Map
82 , map
83
84 -- * Fold
85 , fold
86
87 -- * Conversion
88 -- ** List
89 , elems
90 , toList
91 , fromList
92
93 -- ** Ordered list
94 , toAscList
95 , fromAscList
96 , fromDistinctAscList
97
98 -- * Debugging
99 , showTree
100 , showTreeWith
101 ) where
102
103
104 import Prelude hiding (lookup,filter,foldr,foldl,null,map)
105 import Data.Bits
106
107 import qualified Data.List as List
108 import Data.Monoid (Monoid(..))
109 import Data.Maybe (fromMaybe)
110 import Data.Typeable
111
112 {-
113 -- just for testing
114 import Test.QuickCheck
115 import List (nub,sort)
116 import qualified List
117 import qualified Data.Set as Set
118 -}
119
120 #if __GLASGOW_HASKELL__
121 import Text.Read
122 import Data.Data (Data(..), mkNoRepType)
123 #endif
124
125 #if __GLASGOW_HASKELL__ >= 503
126 import GHC.Exts ( Word(..), Int(..), shiftRL# )
127 #elif __GLASGOW_HASKELL__
128 import Word
129 import GlaExts ( Word(..), Int(..), shiftRL# )
130 #else
131 import Data.Word
132 #endif
133
134 infixl 9 \\{-This comment teaches CPP correct behaviour -}
135
136 -- A "Nat" is a natural machine word (an unsigned Int)
137 type Nat = Word
138
139 natFromInt :: Int -> Nat
140 natFromInt i = fromIntegral i
141
142 intFromNat :: Nat -> Int
143 intFromNat w = fromIntegral w
144
145 shiftRL :: Nat -> Int -> Nat
146 #if __GLASGOW_HASKELL__
147 {--------------------------------------------------------------------
148 GHC: use unboxing to get @shiftRL@ inlined.
149 --------------------------------------------------------------------}
150 shiftRL (W# x) (I# i)
151 = W# (shiftRL# x i)
152 #else
153 shiftRL x i = shiftR x i
154 #endif
155
156 {--------------------------------------------------------------------
157 Operators
158 --------------------------------------------------------------------}
159 -- | /O(n+m)/. See 'difference'.
160 (\\) :: IntSet -> IntSet -> IntSet
161 m1 \\ m2 = difference m1 m2
162
163 {--------------------------------------------------------------------
164 Types
165 --------------------------------------------------------------------}
166 -- | A set of integers.
167 data IntSet = Nil
168 | Tip {-# UNPACK #-} !Int
169 | Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
170 -- Invariant: Nil is never found as a child of Bin.
171 -- Invariant: The Mask is a power of 2. It is the largest bit position at which
172 -- two elements of the set differ.
173 -- Invariant: Prefix is the common high-order bits that all elements share to
174 -- the left of the Mask bit.
175 -- Invariant: In Bin prefix mask left right, left consists of the elements that
176 -- don't have the mask bit set; right is all the elements that do.
177
178
179 type Prefix = Int
180 type Mask = Int
181
182 instance Monoid IntSet where
183 mempty = empty
184 mappend = union
185 mconcat = unions
186
187 #if __GLASGOW_HASKELL__
188
189 {--------------------------------------------------------------------
190 A Data instance
191 --------------------------------------------------------------------}
192
193 -- This instance preserves data abstraction at the cost of inefficiency.
194 -- We omit reflection services for the sake of data abstraction.
195
196 instance Data IntSet where
197 gfoldl f z is = z fromList `f` (toList is)
198 toConstr _ = error "toConstr"
199 gunfold _ _ = error "gunfold"
200 dataTypeOf _ = mkNoRepType "Data.IntSet.IntSet"
201
202 #endif
203
204 {--------------------------------------------------------------------
205 Query
206 --------------------------------------------------------------------}
207 -- | /O(1)/. Is the set empty?
208 null :: IntSet -> Bool
209 null Nil = True
210 null _ = False
211
212 -- | /O(n)/. Cardinality of the set.
213 size :: IntSet -> Int
214 size t
215 = case t of
216 Bin _ _ l r -> size l + size r
217 Tip _ -> 1
218 Nil -> 0
219
220 -- | /O(min(n,W))/. Is the value a member of the set?
221 member :: Int -> IntSet -> Bool
222 member x t
223 = case t of
224 Bin p m l r
225 | nomatch x p m -> False
226 | zero x m -> member x l
227 | otherwise -> member x r
228 Tip y -> (x==y)
229 Nil -> False
230
231 -- | /O(min(n,W))/. Is the element not in the set?
232 notMember :: Int -> IntSet -> Bool
233 notMember k = not . member k
234
235 -- 'lookup' is used by 'intersection' for left-biasing
236 lookup :: Int -> IntSet -> Maybe Int
237 lookup k t
238 = let nk = natFromInt k in seq nk (lookupN nk t)
239
240 lookupN :: Nat -> IntSet -> Maybe Int
241 lookupN k t
242 = case t of
243 Bin _ m l r
244 | zeroN k (natFromInt m) -> lookupN k l
245 | otherwise -> lookupN k r
246 Tip kx
247 | (k == natFromInt kx) -> Just kx
248 | otherwise -> Nothing
249 Nil -> Nothing
250
251 {--------------------------------------------------------------------
252 Construction
253 --------------------------------------------------------------------}
254 -- | /O(1)/. The empty set.
255 empty :: IntSet
256 empty
257 = Nil
258
259 -- | /O(1)/. A set of one element.
260 singleton :: Int -> IntSet
261 singleton x
262 = Tip x
263
264 {--------------------------------------------------------------------
265 Insert
266 --------------------------------------------------------------------}
267 -- | /O(min(n,W))/. Add a value to the set. When the value is already
268 -- an element of the set, it is replaced by the new one, ie. 'insert'
269 -- is left-biased.
270 insert :: Int -> IntSet -> IntSet
271 insert x t
272 = case t of
273 Bin p m l r
274 | nomatch x p m -> join x (Tip x) p t
275 | zero x m -> Bin p m (insert x l) r
276 | otherwise -> Bin p m l (insert x r)
277 Tip y
278 | x==y -> Tip x
279 | otherwise -> join x (Tip x) y t
280 Nil -> Tip x
281
282 -- right-biased insertion, used by 'union'
283 insertR :: Int -> IntSet -> IntSet
284 insertR x t
285 = case t of
286 Bin p m l r
287 | nomatch x p m -> join x (Tip x) p t
288 | zero x m -> Bin p m (insert x l) r
289 | otherwise -> Bin p m l (insert x r)
290 Tip y
291 | x==y -> t
292 | otherwise -> join x (Tip x) y t
293 Nil -> Tip x
294
295 -- | /O(min(n,W))/. Delete a value in the set. Returns the
296 -- original set when the value was not present.
297 delete :: Int -> IntSet -> IntSet
298 delete x t
299 = case t of
300 Bin p m l r
301 | nomatch x p m -> t
302 | zero x m -> bin p m (delete x l) r
303 | otherwise -> bin p m l (delete x r)
304 Tip y
305 | x==y -> Nil
306 | otherwise -> t
307 Nil -> Nil
308
309
310 {--------------------------------------------------------------------
311 Union
312 --------------------------------------------------------------------}
313 -- | The union of a list of sets.
314 unions :: [IntSet] -> IntSet
315 unions xs
316 = foldlStrict union empty xs
317
318
319 -- | /O(n+m)/. The union of two sets.
320 union :: IntSet -> IntSet -> IntSet
321 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
322 | shorter m1 m2 = union1
323 | shorter m2 m1 = union2
324 | p1 == p2 = Bin p1 m1 (union l1 l2) (union r1 r2)
325 | otherwise = join p1 t1 p2 t2
326 where
327 union1 | nomatch p2 p1 m1 = join p1 t1 p2 t2
328 | zero p2 m1 = Bin p1 m1 (union l1 t2) r1
329 | otherwise = Bin p1 m1 l1 (union r1 t2)
330
331 union2 | nomatch p1 p2 m2 = join p1 t1 p2 t2
332 | zero p1 m2 = Bin p2 m2 (union t1 l2) r2
333 | otherwise = Bin p2 m2 l2 (union t1 r2)
334
335 union (Tip x) t = insert x t
336 union t (Tip x) = insertR x t -- right bias
337 union Nil t = t
338 union t Nil = t
339
340
341 {--------------------------------------------------------------------
342 Difference
343 --------------------------------------------------------------------}
344 -- | /O(n+m)/. Difference between two sets.
345 difference :: IntSet -> IntSet -> IntSet
346 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
347 | shorter m1 m2 = difference1
348 | shorter m2 m1 = difference2
349 | p1 == p2 = bin p1 m1 (difference l1 l2) (difference r1 r2)
350 | otherwise = t1
351 where
352 difference1 | nomatch p2 p1 m1 = t1
353 | zero p2 m1 = bin p1 m1 (difference l1 t2) r1
354 | otherwise = bin p1 m1 l1 (difference r1 t2)
355
356 difference2 | nomatch p1 p2 m2 = t1
357 | zero p1 m2 = difference t1 l2
358 | otherwise = difference t1 r2
359
360 difference t1@(Tip x) t2
361 | member x t2 = Nil
362 | otherwise = t1
363
364 difference Nil _ = Nil
365 difference t (Tip x) = delete x t
366 difference t Nil = t
367
368
369
370 {--------------------------------------------------------------------
371 Intersection
372 --------------------------------------------------------------------}
373 -- | /O(n+m)/. The intersection of two sets.
374 intersection :: IntSet -> IntSet -> IntSet
375 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
376 | shorter m1 m2 = intersection1
377 | shorter m2 m1 = intersection2
378 | p1 == p2 = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
379 | otherwise = Nil
380 where
381 intersection1 | nomatch p2 p1 m1 = Nil
382 | zero p2 m1 = intersection l1 t2
383 | otherwise = intersection r1 t2
384
385 intersection2 | nomatch p1 p2 m2 = Nil
386 | zero p1 m2 = intersection t1 l2
387 | otherwise = intersection t1 r2
388
389 intersection t1@(Tip x) t2
390 | member x t2 = t1
391 | otherwise = Nil
392 intersection t (Tip x)
393 = case lookup x t of
394 Just y -> Tip y
395 Nothing -> Nil
396 intersection Nil _ = Nil
397 intersection _ Nil = Nil
398
399
400
401 {--------------------------------------------------------------------
402 Subset
403 --------------------------------------------------------------------}
404 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
405 isProperSubsetOf :: IntSet -> IntSet -> Bool
406 isProperSubsetOf t1 t2
407 = case subsetCmp t1 t2 of
408 LT -> True
409 _ -> False
410
411 subsetCmp :: IntSet -> IntSet -> Ordering
412 subsetCmp t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
413 | shorter m1 m2 = GT
414 | shorter m2 m1 = case subsetCmpLt of
415 GT -> GT
416 _ -> LT
417 | p1 == p2 = subsetCmpEq
418 | otherwise = GT -- disjoint
419 where
420 subsetCmpLt | nomatch p1 p2 m2 = GT
421 | zero p1 m2 = subsetCmp t1 l2
422 | otherwise = subsetCmp t1 r2
423 subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
424 (GT,_ ) -> GT
425 (_ ,GT) -> GT
426 (EQ,EQ) -> EQ
427 _ -> LT
428
429 subsetCmp (Bin _ _ _ _) _ = GT
430 subsetCmp (Tip x) (Tip y)
431 | x==y = EQ
432 | otherwise = GT -- disjoint
433 subsetCmp (Tip x) t
434 | member x t = LT
435 | otherwise = GT -- disjoint
436 subsetCmp Nil Nil = EQ
437 subsetCmp Nil _ = LT
438
439 -- | /O(n+m)/. Is this a subset?
440 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
441
442 isSubsetOf :: IntSet -> IntSet -> Bool
443 isSubsetOf t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
444 | shorter m1 m2 = False
445 | shorter m2 m1 = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
446 else isSubsetOf t1 r2)
447 | otherwise = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
448 isSubsetOf (Bin _ _ _ _) _ = False
449 isSubsetOf (Tip x) t = member x t
450 isSubsetOf Nil _ = True
451
452
453 {--------------------------------------------------------------------
454 Filter
455 --------------------------------------------------------------------}
456 -- | /O(n)/. Filter all elements that satisfy some predicate.
457 filter :: (Int -> Bool) -> IntSet -> IntSet
458 filter predicate t
459 = case t of
460 Bin p m l r
461 -> bin p m (filter predicate l) (filter predicate r)
462 Tip x
463 | predicate x -> t
464 | otherwise -> Nil
465 Nil -> Nil
466
467 -- | /O(n)/. partition the set according to some predicate.
468 partition :: (Int -> Bool) -> IntSet -> (IntSet,IntSet)
469 partition predicate t
470 = case t of
471 Bin p m l r
472 -> let (l1,l2) = partition predicate l
473 (r1,r2) = partition predicate r
474 in (bin p m l1 r1, bin p m l2 r2)
475 Tip x
476 | predicate x -> (t,Nil)
477 | otherwise -> (Nil,t)
478 Nil -> (Nil,Nil)
479
480
481 -- | /O(min(n,W))/. The expression (@'split' x set@) is a pair @(set1,set2)@
482 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
483 -- comprises the elements of @set@ greater than @x@.
484 --
485 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [4,5])
486 split :: Int -> IntSet -> (IntSet,IntSet)
487 split x t
488 = case t of
489 Bin _ m l r
490 | m < 0 -> if x >= 0 then let (lt,gt) = split' x l in (union r lt, gt)
491 else let (lt,gt) = split' x r in (lt, union gt l)
492 -- handle negative numbers.
493 | otherwise -> split' x t
494 Tip y
495 | x>y -> (t,Nil)
496 | x<y -> (Nil,t)
497 | otherwise -> (Nil,Nil)
498 Nil -> (Nil, Nil)
499
500 split' :: Int -> IntSet -> (IntSet,IntSet)
501 split' x t
502 = case t of
503 Bin p m l r
504 | match x p m -> if zero x m then let (lt,gt) = split' x l in (lt,union gt r)
505 else let (lt,gt) = split' x r in (union l lt,gt)
506 | otherwise -> if x < p then (Nil, t)
507 else (t, Nil)
508 Tip y
509 | x>y -> (t,Nil)
510 | x<y -> (Nil,t)
511 | otherwise -> (Nil,Nil)
512 Nil -> (Nil,Nil)
513
514 -- | /O(min(n,W))/. Performs a 'split' but also returns whether the pivot
515 -- element was found in the original set.
516 splitMember :: Int -> IntSet -> (IntSet,Bool,IntSet)
517 splitMember x t
518 = case t of
519 Bin _ m l r
520 | m < 0 -> if x >= 0 then let (lt,found,gt) = splitMember' x l in (union r lt, found, gt)
521 else let (lt,found,gt) = splitMember' x r in (lt, found, union gt l)
522 -- handle negative numbers.
523 | otherwise -> splitMember' x t
524 Tip y
525 | x>y -> (t,False,Nil)
526 | x<y -> (Nil,False,t)
527 | otherwise -> (Nil,True,Nil)
528 Nil -> (Nil,False,Nil)
529
530 splitMember' :: Int -> IntSet -> (IntSet,Bool,IntSet)
531 splitMember' x t
532 = case t of
533 Bin p m l r
534 | match x p m -> if zero x m then let (lt,found,gt) = splitMember x l in (lt,found,union gt r)
535 else let (lt,found,gt) = splitMember x r in (union l lt,found,gt)
536 | otherwise -> if x < p then (Nil, False, t)
537 else (t, False, Nil)
538 Tip y
539 | x>y -> (t,False,Nil)
540 | x<y -> (Nil,False,t)
541 | otherwise -> (Nil,True,Nil)
542 Nil -> (Nil,False,Nil)
543
544 {----------------------------------------------------------------------
545 Min/Max
546 ----------------------------------------------------------------------}
547
548 -- | /O(min(n,W))/. Retrieves the maximal key of the set, and the set
549 -- stripped of that element, or 'Nothing' if passed an empty set.
550 maxView :: IntSet -> Maybe (Int, IntSet)
551 maxView t
552 = case t of
553 Bin p m l r | m < 0 -> let (result,t') = maxViewUnsigned l in Just (result, bin p m t' r)
554 Bin p m l r -> let (result,t') = maxViewUnsigned r in Just (result, bin p m l t')
555 Tip y -> Just (y,Nil)
556 Nil -> Nothing
557
558 maxViewUnsigned :: IntSet -> (Int, IntSet)
559 maxViewUnsigned t
560 = case t of
561 Bin p m l r -> let (result,t') = maxViewUnsigned r in (result, bin p m l t')
562 Tip y -> (y, Nil)
563 Nil -> error "maxViewUnsigned Nil"
564
565 -- | /O(min(n,W))/. Retrieves the minimal key of the set, and the set
566 -- stripped of that element, or 'Nothing' if passed an empty set.
567 minView :: IntSet -> Maybe (Int, IntSet)
568 minView t
569 = case t of
570 Bin p m l r | m < 0 -> let (result,t') = minViewUnsigned r in Just (result, bin p m l t')
571 Bin p m l r -> let (result,t') = minViewUnsigned l in Just (result, bin p m t' r)
572 Tip y -> Just (y, Nil)
573 Nil -> Nothing
574
575 minViewUnsigned :: IntSet -> (Int, IntSet)
576 minViewUnsigned t
577 = case t of
578 Bin p m l r -> let (result,t') = minViewUnsigned l in (result, bin p m t' r)
579 Tip y -> (y, Nil)
580 Nil -> error "minViewUnsigned Nil"
581
582 -- | /O(min(n,W))/. Delete and find the minimal element.
583 --
584 -- > deleteFindMin set = (findMin set, deleteMin set)
585 deleteFindMin :: IntSet -> (Int, IntSet)
586 deleteFindMin = fromMaybe (error "deleteFindMin: empty set has no minimal element") . minView
587
588 -- | /O(min(n,W))/. Delete and find the maximal element.
589 --
590 -- > deleteFindMax set = (findMax set, deleteMax set)
591 deleteFindMax :: IntSet -> (Int, IntSet)
592 deleteFindMax = fromMaybe (error "deleteFindMax: empty set has no maximal element") . maxView
593
594 -- | /O(min(n,W))/. The minimal element of a set.
595 findMin :: IntSet -> Int
596 findMin = maybe (error "findMin: empty set has no minimal element") fst . minView
597
598 -- | /O(min(n,W))/. The maximal element of a set.
599 findMax :: IntSet -> Int
600 findMax = maybe (error "findMax: empty set has no maximal element") fst . maxView
601
602 -- | /O(min(n,W))/. Delete the minimal element.
603 deleteMin :: IntSet -> IntSet
604 deleteMin = maybe (error "deleteMin: empty set has no minimal element") snd . minView
605
606 -- | /O(min(n,W))/. Delete the maximal element.
607 deleteMax :: IntSet -> IntSet
608 deleteMax = maybe (error "deleteMax: empty set has no maximal element") snd . maxView
609
610 {----------------------------------------------------------------------
611 Map
612 ----------------------------------------------------------------------}
613
614 -- | /O(n*min(n,W))/.
615 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
616 --
617 -- It's worth noting that the size of the result may be smaller if,
618 -- for some @(x,y)@, @x \/= y && f x == f y@
619
620 map :: (Int->Int) -> IntSet -> IntSet
621 map f = fromList . List.map f . toList
622
623 {--------------------------------------------------------------------
624 Fold
625 --------------------------------------------------------------------}
626 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
627 --
628 -- > sum set == fold (+) 0 set
629 -- > elems set == fold (:) [] set
630 fold :: (Int -> b -> b) -> b -> IntSet -> b
631 fold f z t
632 = case t of
633 Bin 0 m l r | m < 0 -> foldr f (foldr f z l) r
634 -- put negative numbers before.
635 Bin _ _ _ _ -> foldr f z t
636 Tip x -> f x z
637 Nil -> z
638
639 foldr :: (Int -> b -> b) -> b -> IntSet -> b
640 foldr f z t
641 = case t of
642 Bin _ _ l r -> foldr f (foldr f z r) l
643 Tip x -> f x z
644 Nil -> z
645
646 {--------------------------------------------------------------------
647 List variations
648 --------------------------------------------------------------------}
649 -- | /O(n)/. The elements of a set. (For sets, this is equivalent to toList)
650 elems :: IntSet -> [Int]
651 elems s
652 = toList s
653
654 {--------------------------------------------------------------------
655 Lists
656 --------------------------------------------------------------------}
657 -- | /O(n)/. Convert the set to a list of elements.
658 toList :: IntSet -> [Int]
659 toList t
660 = fold (:) [] t
661
662 -- | /O(n)/. Convert the set to an ascending list of elements.
663 toAscList :: IntSet -> [Int]
664 toAscList t = toList t
665
666 -- | /O(n*min(n,W))/. Create a set from a list of integers.
667 fromList :: [Int] -> IntSet
668 fromList xs
669 = foldlStrict ins empty xs
670 where
671 ins t x = insert x t
672
673 -- | /O(n)/. Build a set from an ascending list of elements.
674 -- /The precondition (input list is ascending) is not checked./
675 fromAscList :: [Int] -> IntSet
676 fromAscList [] = Nil
677 fromAscList (x:xs) = fromDistinctAscList (combineEq x xs)
678 where
679 combineEq x' [] = [x']
680 combineEq x' (x:xs)
681 | x==x' = combineEq x' xs
682 | otherwise = x' : combineEq x xs
683
684 -- | /O(n)/. Build a set from an ascending list of distinct elements.
685 -- /The precondition (input list is strictly ascending) is not checked./
686 fromDistinctAscList :: [Int] -> IntSet
687 fromDistinctAscList [] = Nil
688 fromDistinctAscList (z:zs) = work z zs Nada
689 where
690 work x [] stk = finish x (Tip x) stk
691 work x (z:zs) stk = reduce z zs (branchMask z x) x (Tip x) stk
692
693 reduce z zs _ px tx Nada = work z zs (Push px tx Nada)
694 reduce z zs m px tx stk@(Push py ty stk') =
695 let mxy = branchMask px py
696 pxy = mask px mxy
697 in if shorter m mxy
698 then reduce z zs m pxy (Bin pxy mxy ty tx) stk'
699 else work z zs (Push px tx stk)
700
701 finish _ t Nada = t
702 finish px tx (Push py ty stk) = finish p (join py ty px tx) stk
703 where m = branchMask px py
704 p = mask px m
705
706 data Stack = Push {-# UNPACK #-} !Prefix !IntSet !Stack | Nada
707
708
709 {--------------------------------------------------------------------
710 Eq
711 --------------------------------------------------------------------}
712 instance Eq IntSet where
713 t1 == t2 = equal t1 t2
714 t1 /= t2 = nequal t1 t2
715
716 equal :: IntSet -> IntSet -> Bool
717 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
718 = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
719 equal (Tip x) (Tip y)
720 = (x==y)
721 equal Nil Nil = True
722 equal _ _ = False
723
724 nequal :: IntSet -> IntSet -> Bool
725 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
726 = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
727 nequal (Tip x) (Tip y)
728 = (x/=y)
729 nequal Nil Nil = False
730 nequal _ _ = True
731
732 {--------------------------------------------------------------------
733 Ord
734 --------------------------------------------------------------------}
735
736 instance Ord IntSet where
737 compare s1 s2 = compare (toAscList s1) (toAscList s2)
738 -- tentative implementation. See if more efficient exists.
739
740 {--------------------------------------------------------------------
741 Show
742 --------------------------------------------------------------------}
743 instance Show IntSet where
744 showsPrec p xs = showParen (p > 10) $
745 showString "fromList " . shows (toList xs)
746
747 {-
748 XXX unused code
749 showSet :: [Int] -> ShowS
750 showSet []
751 = showString "{}"
752 showSet (x:xs)
753 = showChar '{' . shows x . showTail xs
754 where
755 showTail [] = showChar '}'
756 showTail (x':xs') = showChar ',' . shows x' . showTail xs'
757 -}
758
759 {--------------------------------------------------------------------
760 Read
761 --------------------------------------------------------------------}
762 instance Read IntSet where
763 #ifdef __GLASGOW_HASKELL__
764 readPrec = parens $ prec 10 $ do
765 Ident "fromList" <- lexP
766 xs <- readPrec
767 return (fromList xs)
768
769 readListPrec = readListPrecDefault
770 #else
771 readsPrec p = readParen (p > 10) $ \ r -> do
772 ("fromList",s) <- lex r
773 (xs,t) <- reads s
774 return (fromList xs,t)
775 #endif
776
777 {--------------------------------------------------------------------
778 Typeable
779 --------------------------------------------------------------------}
780
781 #include "Typeable.h"
782 INSTANCE_TYPEABLE0(IntSet,intSetTc,"IntSet")
783
784 {--------------------------------------------------------------------
785 Debugging
786 --------------------------------------------------------------------}
787 -- | /O(n)/. Show the tree that implements the set. The tree is shown
788 -- in a compressed, hanging format.
789 showTree :: IntSet -> String
790 showTree s
791 = showTreeWith True False s
792
793
794 {- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows
795 the tree that implements the set. If @hang@ is
796 'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If
797 @wide@ is 'True', an extra wide version is shown.
798 -}
799 showTreeWith :: Bool -> Bool -> IntSet -> String
800 showTreeWith hang wide t
801 | hang = (showsTreeHang wide [] t) ""
802 | otherwise = (showsTree wide [] [] t) ""
803
804 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
805 showsTree wide lbars rbars t
806 = case t of
807 Bin p m l r
808 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
809 showWide wide rbars .
810 showsBars lbars . showString (showBin p m) . showString "\n" .
811 showWide wide lbars .
812 showsTree wide (withEmpty lbars) (withBar lbars) l
813 Tip x
814 -> showsBars lbars . showString " " . shows x . showString "\n"
815 Nil -> showsBars lbars . showString "|\n"
816
817 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
818 showsTreeHang wide bars t
819 = case t of
820 Bin p m l r
821 -> showsBars bars . showString (showBin p m) . showString "\n" .
822 showWide wide bars .
823 showsTreeHang wide (withBar bars) l .
824 showWide wide bars .
825 showsTreeHang wide (withEmpty bars) r
826 Tip x
827 -> showsBars bars . showString " " . shows x . showString "\n"
828 Nil -> showsBars bars . showString "|\n"
829
830 showBin :: Prefix -> Mask -> String
831 showBin _ _
832 = "*" -- ++ show (p,m)
833
834 showWide :: Bool -> [String] -> String -> String
835 showWide wide bars
836 | wide = showString (concat (reverse bars)) . showString "|\n"
837 | otherwise = id
838
839 showsBars :: [String] -> ShowS
840 showsBars bars
841 = case bars of
842 [] -> id
843 _ -> showString (concat (reverse (tail bars))) . showString node
844
845 node :: String
846 node = "+--"
847
848 withBar, withEmpty :: [String] -> [String]
849 withBar bars = "| ":bars
850 withEmpty bars = " ":bars
851
852
853 {--------------------------------------------------------------------
854 Helpers
855 --------------------------------------------------------------------}
856 {--------------------------------------------------------------------
857 Join
858 --------------------------------------------------------------------}
859 join :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
860 join p1 t1 p2 t2
861 | zero p1 m = Bin p m t1 t2
862 | otherwise = Bin p m t2 t1
863 where
864 m = branchMask p1 p2
865 p = mask p1 m
866
867 {--------------------------------------------------------------------
868 @bin@ assures that we never have empty trees within a tree.
869 --------------------------------------------------------------------}
870 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
871 bin _ _ l Nil = l
872 bin _ _ Nil r = r
873 bin p m l r = Bin p m l r
874
875
876 {--------------------------------------------------------------------
877 Endian independent bit twiddling
878 --------------------------------------------------------------------}
879 zero :: Int -> Mask -> Bool
880 zero i m
881 = (natFromInt i) .&. (natFromInt m) == 0
882
883 nomatch,match :: Int -> Prefix -> Mask -> Bool
884 nomatch i p m
885 = (mask i m) /= p
886
887 match i p m
888 = (mask i m) == p
889
890 -- Suppose a is largest such that 2^a divides 2*m.
891 -- Then mask i m is i with the low a bits zeroed out.
892 mask :: Int -> Mask -> Prefix
893 mask i m
894 = maskW (natFromInt i) (natFromInt m)
895
896 zeroN :: Nat -> Nat -> Bool
897 zeroN i m = (i .&. m) == 0
898
899 {--------------------------------------------------------------------
900 Big endian operations
901 --------------------------------------------------------------------}
902 maskW :: Nat -> Nat -> Prefix
903 maskW i m
904 = intFromNat (i .&. (complement (m-1) `xor` m))
905
906 shorter :: Mask -> Mask -> Bool
907 shorter m1 m2
908 = (natFromInt m1) > (natFromInt m2)
909
910 branchMask :: Prefix -> Prefix -> Mask
911 branchMask p1 p2
912 = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
913
914 {----------------------------------------------------------------------
915 Finding the highest bit (mask) in a word [x] can be done efficiently in
916 three ways:
917 * convert to a floating point value and the mantissa tells us the
918 [log2(x)] that corresponds with the highest bit position. The mantissa
919 is retrieved either via the standard C function [frexp] or by some bit
920 twiddling on IEEE compatible numbers (float). Note that one needs to
921 use at least [double] precision for an accurate mantissa of 32 bit
922 numbers.
923 * use bit twiddling, a logarithmic sequence of bitwise or's and shifts (bit).
924 * use processor specific assembler instruction (asm).
925
926 The most portable way would be [bit], but is it efficient enough?
927 I have measured the cycle counts of the different methods on an AMD
928 Athlon-XP 1800 (~ Pentium III 1.8Ghz) using the RDTSC instruction:
929
930 highestBitMask: method cycles
931 --------------
932 frexp 200
933 float 33
934 bit 11
935 asm 12
936
937 highestBit: method cycles
938 --------------
939 frexp 195
940 float 33
941 bit 11
942 asm 11
943
944 Wow, the bit twiddling is on today's RISC like machines even faster
945 than a single CISC instruction (BSR)!
946 ----------------------------------------------------------------------}
947
948 {----------------------------------------------------------------------
949 [highestBitMask] returns a word where only the highest bit is set.
950 It is found by first setting all bits in lower positions than the
951 highest bit and than taking an exclusive or with the original value.
952 Allthough the function may look expensive, GHC compiles this into
953 excellent C code that subsequently compiled into highly efficient
954 machine code. The algorithm is derived from Jorg Arndt's FXT library.
955 ----------------------------------------------------------------------}
956 highestBitMask :: Nat -> Nat
957 highestBitMask x0
958 = case (x0 .|. shiftRL x0 1) of
959 x1 -> case (x1 .|. shiftRL x1 2) of
960 x2 -> case (x2 .|. shiftRL x2 4) of
961 x3 -> case (x3 .|. shiftRL x3 8) of
962 x4 -> case (x4 .|. shiftRL x4 16) of
963 x5 -> case (x5 .|. shiftRL x5 32) of -- for 64 bit platforms
964 x6 -> (x6 `xor` (shiftRL x6 1))
965
966
967 {--------------------------------------------------------------------
968 Utilities
969 --------------------------------------------------------------------}
970 foldlStrict :: (a -> b -> a) -> a -> [b] -> a
971 foldlStrict f z xs
972 = case xs of
973 [] -> z
974 (x:xx) -> let z' = f z x in seq z' (foldlStrict f z' xx)
975
976
977 {-
978 {--------------------------------------------------------------------
979 Testing
980 --------------------------------------------------------------------}
981 testTree :: [Int] -> IntSet
982 testTree xs = fromList xs
983 test1 = testTree [1..20]
984 test2 = testTree [30,29..10]
985 test3 = testTree [1,4,6,89,2323,53,43,234,5,79,12,9,24,9,8,423,8,42,4,8,9,3]
986
987 {--------------------------------------------------------------------
988 QuickCheck
989 --------------------------------------------------------------------}
990 qcheck prop
991 = check config prop
992 where
993 config = Config
994 { configMaxTest = 500
995 , configMaxFail = 5000
996 , configSize = \n -> (div n 2 + 3)
997 , configEvery = \n args -> let s = show n in s ++ [ '\b' | _ <- s ]
998 }
999
1000
1001 {--------------------------------------------------------------------
1002 Arbitrary, reasonably balanced trees
1003 --------------------------------------------------------------------}
1004 instance Arbitrary IntSet where
1005 arbitrary = do{ xs <- arbitrary
1006 ; return (fromList xs)
1007 }
1008
1009
1010 {--------------------------------------------------------------------
1011 Single, Insert, Delete
1012 --------------------------------------------------------------------}
1013 prop_Single :: Int -> Bool
1014 prop_Single x
1015 = (insert x empty == singleton x)
1016
1017 prop_InsertDelete :: Int -> IntSet -> Property
1018 prop_InsertDelete k t
1019 = not (member k t) ==> delete k (insert k t) == t
1020
1021
1022 {--------------------------------------------------------------------
1023 Union
1024 --------------------------------------------------------------------}
1025 prop_UnionInsert :: Int -> IntSet -> Bool
1026 prop_UnionInsert x t
1027 = union t (singleton x) == insert x t
1028
1029 prop_UnionAssoc :: IntSet -> IntSet -> IntSet -> Bool
1030 prop_UnionAssoc t1 t2 t3
1031 = union t1 (union t2 t3) == union (union t1 t2) t3
1032
1033 prop_UnionComm :: IntSet -> IntSet -> Bool
1034 prop_UnionComm t1 t2
1035 = (union t1 t2 == union t2 t1)
1036
1037 prop_Diff :: [Int] -> [Int] -> Bool
1038 prop_Diff xs ys
1039 = toAscList (difference (fromList xs) (fromList ys))
1040 == List.sort ((List.\\) (nub xs) (nub ys))
1041
1042 prop_Int :: [Int] -> [Int] -> Bool
1043 prop_Int xs ys
1044 = toAscList (intersection (fromList xs) (fromList ys))
1045 == List.sort (nub ((List.intersect) (xs) (ys)))
1046
1047 {--------------------------------------------------------------------
1048 Lists
1049 --------------------------------------------------------------------}
1050 prop_Ordered
1051 = forAll (choose (5,100)) $ \n ->
1052 let xs = concat [[i-n,i-n]|i<-[0..2*n :: Int]]
1053 in fromAscList xs == fromList xs
1054
1055 prop_List :: [Int] -> Bool
1056 prop_List xs
1057 = (sort (nub xs) == toAscList (fromList xs))
1058
1059 {--------------------------------------------------------------------
1060 Bin invariants
1061 --------------------------------------------------------------------}
1062 powersOf2 :: IntSet
1063 powersOf2 = fromList [2^i | i <- [0..63]]
1064
1065 -- Check the invariant that the mask is a power of 2.
1066 prop_MaskPow2 :: IntSet -> Bool
1067 prop_MaskPow2 (Bin _ msk left right) = member msk powersOf2 && prop_MaskPow2 left && prop_MaskPow2 right
1068 prop_MaskPow2 _ = True
1069
1070 -- Check that the prefix satisfies its invariant.
1071 prop_Prefix :: IntSet -> Bool
1072 prop_Prefix s@(Bin prefix msk left right) = all (\elem -> match elem prefix msk) (toList s) && prop_Prefix left && prop_Prefix right
1073 prop_Prefix _ = True
1074
1075 -- Check that the left elements don't have the mask bit set, and the right
1076 -- ones do.
1077 prop_LeftRight :: IntSet -> Bool
1078 prop_LeftRight (Bin _ msk left right) = and [x .&. msk == 0 | x <- toList left] && and [x .&. msk == msk | x <- toList right]
1079 prop_LeftRight _ = True
1080
1081 {--------------------------------------------------------------------
1082 IntSet operations are like Set operations
1083 --------------------------------------------------------------------}
1084 toSet :: IntSet -> Set.Set Int
1085 toSet = Set.fromList . toList
1086
1087 -- Check that IntSet.isProperSubsetOf is the same as Set.isProperSubsetOf.
1088 prop_isProperSubsetOf :: IntSet -> IntSet -> Bool
1089 prop_isProperSubsetOf a b = isProperSubsetOf a b == Set.isProperSubsetOf (toSet a) (toSet b)
1090
1091 -- In the above test, isProperSubsetOf almost always returns False (since a
1092 -- random set is almost never a subset of another random set). So this second
1093 -- test checks the True case.
1094 prop_isProperSubsetOf2 :: IntSet -> IntSet -> Bool
1095 prop_isProperSubsetOf2 a b = isProperSubsetOf a c == (a /= c) where
1096 c = union a b
1097 -}