[project @ 2005-10-21 10:26:57 by ross]
[packages/old-time.git] / Data / IntSet.hs
1 {-# OPTIONS -cpp -fglasgow-exts #-}
2 -----------------------------------------------------------------------------
3 -- |
4 -- Module : Data.IntSet
5 -- Copyright : (c) Daan Leijen 2002
6 -- License : BSD-style
7 -- Maintainer : libraries@haskell.org
8 -- Stability : provisional
9 -- Portability : portable
10 --
11 -- An efficient implementation of integer sets.
12 --
13 -- This module is intended to be imported @qualified@, to avoid name
14 -- clashes with "Prelude" functions. eg.
15 --
16 -- > import Data.IntSet as Set
17 --
18 -- The implementation is based on /big-endian patricia trees/. This data
19 -- structure performs especially well on binary operations like 'union'
20 -- and 'intersection'. However, my benchmarks show that it is also
21 -- (much) faster on insertions and deletions when compared to a generic
22 -- size-balanced set implementation (see "Data.Set").
23 --
24 -- * Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\",
25 -- Workshop on ML, September 1998, pages 77-86,
26 -- <http://www.cse.ogi.edu/~andy/pub/finite.htm>
27 --
28 -- * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
29 -- Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
30 -- October 1968, pages 514-534.
31 --
32 -- Many operations have a worst-case complexity of /O(min(n,W))/.
33 -- This means that the operation can become linear in the number of
34 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
35 -- (32 or 64).
36 -----------------------------------------------------------------------------
37
38 module Data.IntSet (
39 -- * Set type
40 IntSet -- instance Eq,Show
41
42 -- * Operators
43 , (\\)
44
45 -- * Query
46 , null
47 , size
48 , member
49 , isSubsetOf
50 , isProperSubsetOf
51
52 -- * Construction
53 , empty
54 , singleton
55 , insert
56 , delete
57
58 -- * Combine
59 , union, unions
60 , difference
61 , intersection
62
63 -- * Filter
64 , filter
65 , partition
66 , split
67 , splitMember
68
69 -- * Map
70 , map
71
72 -- * Fold
73 , fold
74
75 -- * Conversion
76 -- ** List
77 , elems
78 , toList
79 , fromList
80
81 -- ** Ordered list
82 , toAscList
83 , fromAscList
84 , fromDistinctAscList
85
86 -- * Debugging
87 , showTree
88 , showTreeWith
89 ) where
90
91
92 import Prelude hiding (lookup,filter,foldr,foldl,null,map)
93 import Data.Bits
94 import Data.Int
95
96 import qualified Data.List as List
97 import Data.Typeable
98
99 {-
100 -- just for testing
101 import QuickCheck
102 import List (nub,sort)
103 import qualified List
104 -}
105
106 #if __GLASGOW_HASKELL__
107 import Data.Generics.Basics
108 import Data.Generics.Instances
109 #endif
110
111 #if __GLASGOW_HASKELL__ >= 503
112 import GHC.Word
113 import GHC.Exts ( Word(..), Int(..), shiftRL# )
114 #elif __GLASGOW_HASKELL__
115 import Word
116 import GlaExts ( Word(..), Int(..), shiftRL# )
117 #else
118 import Data.Word
119 #endif
120
121 infixl 9 \\{-This comment teaches CPP correct behaviour -}
122
123 -- A "Nat" is a natural machine word (an unsigned Int)
124 type Nat = Word
125
126 natFromInt :: Int -> Nat
127 natFromInt i = fromIntegral i
128
129 intFromNat :: Nat -> Int
130 intFromNat w = fromIntegral w
131
132 shiftRL :: Nat -> Int -> Nat
133 #if __GLASGOW_HASKELL__
134 {--------------------------------------------------------------------
135 GHC: use unboxing to get @shiftRL@ inlined.
136 --------------------------------------------------------------------}
137 shiftRL (W# x) (I# i)
138 = W# (shiftRL# x i)
139 #else
140 shiftRL x i = shiftR x i
141 #endif
142
143 {--------------------------------------------------------------------
144 Operators
145 --------------------------------------------------------------------}
146 -- | /O(n+m)/. See 'difference'.
147 (\\) :: IntSet -> IntSet -> IntSet
148 m1 \\ m2 = difference m1 m2
149
150 {--------------------------------------------------------------------
151 Types
152 --------------------------------------------------------------------}
153 -- | A set of integers.
154 data IntSet = Nil
155 | Tip {-# UNPACK #-} !Int
156 | Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
157
158 type Prefix = Int
159 type Mask = Int
160
161 #if __GLASGOW_HASKELL__
162
163 {--------------------------------------------------------------------
164 A Data instance
165 --------------------------------------------------------------------}
166
167 -- This instance preserves data abstraction at the cost of inefficiency.
168 -- We omit reflection services for the sake of data abstraction.
169
170 instance Data IntSet where
171 gfoldl f z is = z fromList `f` (toList is)
172 toConstr _ = error "toConstr"
173 gunfold _ _ = error "gunfold"
174 dataTypeOf _ = mkNorepType "Data.IntSet.IntSet"
175
176 #endif
177
178 {--------------------------------------------------------------------
179 Query
180 --------------------------------------------------------------------}
181 -- | /O(1)/. Is the set empty?
182 null :: IntSet -> Bool
183 null Nil = True
184 null other = False
185
186 -- | /O(n)/. Cardinality of the set.
187 size :: IntSet -> Int
188 size t
189 = case t of
190 Bin p m l r -> size l + size r
191 Tip y -> 1
192 Nil -> 0
193
194 -- | /O(min(n,W))/. Is the value a member of the set?
195 member :: Int -> IntSet -> Bool
196 member x t
197 = case t of
198 Bin p m l r
199 | nomatch x p m -> False
200 | zero x m -> member x l
201 | otherwise -> member x r
202 Tip y -> (x==y)
203 Nil -> False
204
205 -- 'lookup' is used by 'intersection' for left-biasing
206 lookup :: Int -> IntSet -> Maybe Int
207 lookup k t
208 = let nk = natFromInt k in seq nk (lookupN nk t)
209
210 lookupN :: Nat -> IntSet -> Maybe Int
211 lookupN k t
212 = case t of
213 Bin p m l r
214 | zeroN k (natFromInt m) -> lookupN k l
215 | otherwise -> lookupN k r
216 Tip kx
217 | (k == natFromInt kx) -> Just kx
218 | otherwise -> Nothing
219 Nil -> Nothing
220
221 {--------------------------------------------------------------------
222 Construction
223 --------------------------------------------------------------------}
224 -- | /O(1)/. The empty set.
225 empty :: IntSet
226 empty
227 = Nil
228
229 -- | /O(1)/. A set of one element.
230 singleton :: Int -> IntSet
231 singleton x
232 = Tip x
233
234 {--------------------------------------------------------------------
235 Insert
236 --------------------------------------------------------------------}
237 -- | /O(min(n,W))/. Add a value to the set. When the value is already
238 -- an element of the set, it is replaced by the new one, ie. 'insert'
239 -- is left-biased.
240 insert :: Int -> IntSet -> IntSet
241 insert x t
242 = case t of
243 Bin p m l r
244 | nomatch x p m -> join x (Tip x) p t
245 | zero x m -> Bin p m (insert x l) r
246 | otherwise -> Bin p m l (insert x r)
247 Tip y
248 | x==y -> Tip x
249 | otherwise -> join x (Tip x) y t
250 Nil -> Tip x
251
252 -- right-biased insertion, used by 'union'
253 insertR :: Int -> IntSet -> IntSet
254 insertR x t
255 = case t of
256 Bin p m l r
257 | nomatch x p m -> join x (Tip x) p t
258 | zero x m -> Bin p m (insert x l) r
259 | otherwise -> Bin p m l (insert x r)
260 Tip y
261 | x==y -> t
262 | otherwise -> join x (Tip x) y t
263 Nil -> Tip x
264
265 -- | /O(min(n,W))/. Delete a value in the set. Returns the
266 -- original set when the value was not present.
267 delete :: Int -> IntSet -> IntSet
268 delete x t
269 = case t of
270 Bin p m l r
271 | nomatch x p m -> t
272 | zero x m -> bin p m (delete x l) r
273 | otherwise -> bin p m l (delete x r)
274 Tip y
275 | x==y -> Nil
276 | otherwise -> t
277 Nil -> Nil
278
279
280 {--------------------------------------------------------------------
281 Union
282 --------------------------------------------------------------------}
283 -- | The union of a list of sets.
284 unions :: [IntSet] -> IntSet
285 unions xs
286 = foldlStrict union empty xs
287
288
289 -- | /O(n+m)/. The union of two sets.
290 union :: IntSet -> IntSet -> IntSet
291 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
292 | shorter m1 m2 = union1
293 | shorter m2 m1 = union2
294 | p1 == p2 = Bin p1 m1 (union l1 l2) (union r1 r2)
295 | otherwise = join p1 t1 p2 t2
296 where
297 union1 | nomatch p2 p1 m1 = join p1 t1 p2 t2
298 | zero p2 m1 = Bin p1 m1 (union l1 t2) r1
299 | otherwise = Bin p1 m1 l1 (union r1 t2)
300
301 union2 | nomatch p1 p2 m2 = join p1 t1 p2 t2
302 | zero p1 m2 = Bin p2 m2 (union t1 l2) r2
303 | otherwise = Bin p2 m2 l2 (union t1 r2)
304
305 union (Tip x) t = insert x t
306 union t (Tip x) = insertR x t -- right bias
307 union Nil t = t
308 union t Nil = t
309
310
311 {--------------------------------------------------------------------
312 Difference
313 --------------------------------------------------------------------}
314 -- | /O(n+m)/. Difference between two sets.
315 difference :: IntSet -> IntSet -> IntSet
316 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
317 | shorter m1 m2 = difference1
318 | shorter m2 m1 = difference2
319 | p1 == p2 = bin p1 m1 (difference l1 l2) (difference r1 r2)
320 | otherwise = t1
321 where
322 difference1 | nomatch p2 p1 m1 = t1
323 | zero p2 m1 = bin p1 m1 (difference l1 t2) r1
324 | otherwise = bin p1 m1 l1 (difference r1 t2)
325
326 difference2 | nomatch p1 p2 m2 = t1
327 | zero p1 m2 = difference t1 l2
328 | otherwise = difference t1 r2
329
330 difference t1@(Tip x) t2
331 | member x t2 = Nil
332 | otherwise = t1
333
334 difference Nil t = Nil
335 difference t (Tip x) = delete x t
336 difference t Nil = t
337
338
339
340 {--------------------------------------------------------------------
341 Intersection
342 --------------------------------------------------------------------}
343 -- | /O(n+m)/. The intersection of two sets.
344 intersection :: IntSet -> IntSet -> IntSet
345 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
346 | shorter m1 m2 = intersection1
347 | shorter m2 m1 = intersection2
348 | p1 == p2 = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
349 | otherwise = Nil
350 where
351 intersection1 | nomatch p2 p1 m1 = Nil
352 | zero p2 m1 = intersection l1 t2
353 | otherwise = intersection r1 t2
354
355 intersection2 | nomatch p1 p2 m2 = Nil
356 | zero p1 m2 = intersection t1 l2
357 | otherwise = intersection t1 r2
358
359 intersection t1@(Tip x) t2
360 | member x t2 = t1
361 | otherwise = Nil
362 intersection t (Tip x)
363 = case lookup x t of
364 Just y -> Tip y
365 Nothing -> Nil
366 intersection Nil t = Nil
367 intersection t Nil = Nil
368
369
370
371 {--------------------------------------------------------------------
372 Subset
373 --------------------------------------------------------------------}
374 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
375 isProperSubsetOf :: IntSet -> IntSet -> Bool
376 isProperSubsetOf t1 t2
377 = case subsetCmp t1 t2 of
378 LT -> True
379 ge -> False
380
381 subsetCmp t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
382 | shorter m1 m2 = GT
383 | shorter m2 m1 = subsetCmpLt
384 | p1 == p2 = subsetCmpEq
385 | otherwise = GT -- disjoint
386 where
387 subsetCmpLt | nomatch p1 p2 m2 = GT
388 | zero p1 m2 = subsetCmp t1 l2
389 | otherwise = subsetCmp t1 r2
390 subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
391 (GT,_ ) -> GT
392 (_ ,GT) -> GT
393 (EQ,EQ) -> EQ
394 other -> LT
395
396 subsetCmp (Bin p m l r) t = GT
397 subsetCmp (Tip x) (Tip y)
398 | x==y = EQ
399 | otherwise = GT -- disjoint
400 subsetCmp (Tip x) t
401 | member x t = LT
402 | otherwise = GT -- disjoint
403 subsetCmp Nil Nil = EQ
404 subsetCmp Nil t = LT
405
406 -- | /O(n+m)/. Is this a subset?
407 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
408
409 isSubsetOf :: IntSet -> IntSet -> Bool
410 isSubsetOf t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
411 | shorter m1 m2 = False
412 | shorter m2 m1 = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
413 else isSubsetOf t1 r2)
414 | otherwise = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
415 isSubsetOf (Bin p m l r) t = False
416 isSubsetOf (Tip x) t = member x t
417 isSubsetOf Nil t = True
418
419
420 {--------------------------------------------------------------------
421 Filter
422 --------------------------------------------------------------------}
423 -- | /O(n)/. Filter all elements that satisfy some predicate.
424 filter :: (Int -> Bool) -> IntSet -> IntSet
425 filter pred t
426 = case t of
427 Bin p m l r
428 -> bin p m (filter pred l) (filter pred r)
429 Tip x
430 | pred x -> t
431 | otherwise -> Nil
432 Nil -> Nil
433
434 -- | /O(n)/. partition the set according to some predicate.
435 partition :: (Int -> Bool) -> IntSet -> (IntSet,IntSet)
436 partition pred t
437 = case t of
438 Bin p m l r
439 -> let (l1,l2) = partition pred l
440 (r1,r2) = partition pred r
441 in (bin p m l1 r1, bin p m l2 r2)
442 Tip x
443 | pred x -> (t,Nil)
444 | otherwise -> (Nil,t)
445 Nil -> (Nil,Nil)
446
447
448 -- | /O(log n)/. The expression (@'split' x set@) is a pair @(set1,set2)@
449 -- where all elements in @set1@ are lower than @x@ and all elements in
450 -- @set2@ larger than @x@.
451 --
452 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [3,4])
453 split :: Int -> IntSet -> (IntSet,IntSet)
454 split x t
455 = case t of
456 Bin p m l r
457 | zero x m -> let (lt,gt) = split x l in (lt,union gt r)
458 | otherwise -> let (lt,gt) = split x r in (union l lt,gt)
459 Tip y
460 | x>y -> (t,Nil)
461 | x<y -> (Nil,t)
462 | otherwise -> (Nil,Nil)
463 Nil -> (Nil,Nil)
464
465 -- | /O(log n)/. Performs a 'split' but also returns whether the pivot
466 -- element was found in the original set.
467 splitMember :: Int -> IntSet -> (IntSet,Bool,IntSet)
468 splitMember x t
469 = case t of
470 Bin p m l r
471 | zero x m -> let (lt,found,gt) = splitMember x l in (lt,found,union gt r)
472 | otherwise -> let (lt,found,gt) = splitMember x r in (union l lt,found,gt)
473 Tip y
474 | x>y -> (t,False,Nil)
475 | x<y -> (Nil,False,t)
476 | otherwise -> (Nil,True,Nil)
477 Nil -> (Nil,False,Nil)
478
479 {----------------------------------------------------------------------
480 Map
481 ----------------------------------------------------------------------}
482
483 -- | /O(n*min(n,W))/.
484 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
485 --
486 -- It's worth noting that the size of the result may be smaller if,
487 -- for some @(x,y)@, @x \/= y && f x == f y@
488
489 map :: (Int->Int) -> IntSet -> IntSet
490 map f = fromList . List.map f . toList
491
492 {--------------------------------------------------------------------
493 Fold
494 --------------------------------------------------------------------}
495 -- | /O(n)/. Fold over the elements of a set in an unspecified order.
496 --
497 -- > sum set == fold (+) 0 set
498 -- > elems set == fold (:) [] set
499 fold :: (Int -> b -> b) -> b -> IntSet -> b
500 fold f z t
501 = foldr f z t
502
503 foldr :: (Int -> b -> b) -> b -> IntSet -> b
504 foldr f z t
505 = case t of
506 Bin p m l r -> foldr f (foldr f z r) l
507 Tip x -> f x z
508 Nil -> z
509
510 {--------------------------------------------------------------------
511 List variations
512 --------------------------------------------------------------------}
513 -- | /O(n)/. The elements of a set. (For sets, this is equivalent to toList)
514 elems :: IntSet -> [Int]
515 elems s
516 = toList s
517
518 {--------------------------------------------------------------------
519 Lists
520 --------------------------------------------------------------------}
521 -- | /O(n)/. Convert the set to a list of elements.
522 toList :: IntSet -> [Int]
523 toList t
524 = fold (:) [] t
525
526 -- | /O(n)/. Convert the set to an ascending list of elements.
527 toAscList :: IntSet -> [Int]
528 toAscList t
529 = -- NOTE: the following algorithm only works for big-endian trees
530 let (pos,neg) = span (>=0) (foldr (:) [] t) in neg ++ pos
531
532 -- | /O(n*min(n,W))/. Create a set from a list of integers.
533 fromList :: [Int] -> IntSet
534 fromList xs
535 = foldlStrict ins empty xs
536 where
537 ins t x = insert x t
538
539 -- | /O(n*min(n,W))/. Build a set from an ascending list of elements.
540 fromAscList :: [Int] -> IntSet
541 fromAscList xs
542 = fromList xs
543
544 -- | /O(n*min(n,W))/. Build a set from an ascending list of distinct elements.
545 fromDistinctAscList :: [Int] -> IntSet
546 fromDistinctAscList xs
547 = fromList xs
548
549
550 {--------------------------------------------------------------------
551 Eq
552 --------------------------------------------------------------------}
553 instance Eq IntSet where
554 t1 == t2 = equal t1 t2
555 t1 /= t2 = nequal t1 t2
556
557 equal :: IntSet -> IntSet -> Bool
558 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
559 = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
560 equal (Tip x) (Tip y)
561 = (x==y)
562 equal Nil Nil = True
563 equal t1 t2 = False
564
565 nequal :: IntSet -> IntSet -> Bool
566 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
567 = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
568 nequal (Tip x) (Tip y)
569 = (x/=y)
570 nequal Nil Nil = False
571 nequal t1 t2 = True
572
573 {--------------------------------------------------------------------
574 Ord
575 --------------------------------------------------------------------}
576
577 instance Ord IntSet where
578 compare s1 s2 = compare (toAscList s1) (toAscList s2)
579 -- tentative implementation. See if more efficient exists.
580
581 {--------------------------------------------------------------------
582 Show
583 --------------------------------------------------------------------}
584 instance Show IntSet where
585 showsPrec d s = showSet (toList s)
586
587 showSet :: [Int] -> ShowS
588 showSet []
589 = showString "{}"
590 showSet (x:xs)
591 = showChar '{' . shows x . showTail xs
592 where
593 showTail [] = showChar '}'
594 showTail (x:xs) = showChar ',' . shows x . showTail xs
595
596 {--------------------------------------------------------------------
597 Typeable
598 --------------------------------------------------------------------}
599
600 #include "Typeable.h"
601 INSTANCE_TYPEABLE0(IntSet,intSetTc,"IntSet")
602
603 {--------------------------------------------------------------------
604 Debugging
605 --------------------------------------------------------------------}
606 -- | /O(n)/. Show the tree that implements the set. The tree is shown
607 -- in a compressed, hanging format.
608 showTree :: IntSet -> String
609 showTree s
610 = showTreeWith True False s
611
612
613 {- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows
614 the tree that implements the set. If @hang@ is
615 'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If
616 @wide@ is 'True', an extra wide version is shown.
617 -}
618 showTreeWith :: Bool -> Bool -> IntSet -> String
619 showTreeWith hang wide t
620 | hang = (showsTreeHang wide [] t) ""
621 | otherwise = (showsTree wide [] [] t) ""
622
623 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
624 showsTree wide lbars rbars t
625 = case t of
626 Bin p m l r
627 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
628 showWide wide rbars .
629 showsBars lbars . showString (showBin p m) . showString "\n" .
630 showWide wide lbars .
631 showsTree wide (withEmpty lbars) (withBar lbars) l
632 Tip x
633 -> showsBars lbars . showString " " . shows x . showString "\n"
634 Nil -> showsBars lbars . showString "|\n"
635
636 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
637 showsTreeHang wide bars t
638 = case t of
639 Bin p m l r
640 -> showsBars bars . showString (showBin p m) . showString "\n" .
641 showWide wide bars .
642 showsTreeHang wide (withBar bars) l .
643 showWide wide bars .
644 showsTreeHang wide (withEmpty bars) r
645 Tip x
646 -> showsBars bars . showString " " . shows x . showString "\n"
647 Nil -> showsBars bars . showString "|\n"
648
649 showBin p m
650 = "*" -- ++ show (p,m)
651
652 showWide wide bars
653 | wide = showString (concat (reverse bars)) . showString "|\n"
654 | otherwise = id
655
656 showsBars :: [String] -> ShowS
657 showsBars bars
658 = case bars of
659 [] -> id
660 _ -> showString (concat (reverse (tail bars))) . showString node
661
662 node = "+--"
663 withBar bars = "| ":bars
664 withEmpty bars = " ":bars
665
666
667 {--------------------------------------------------------------------
668 Helpers
669 --------------------------------------------------------------------}
670 {--------------------------------------------------------------------
671 Join
672 --------------------------------------------------------------------}
673 join :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
674 join p1 t1 p2 t2
675 | zero p1 m = Bin p m t1 t2
676 | otherwise = Bin p m t2 t1
677 where
678 m = branchMask p1 p2
679 p = mask p1 m
680
681 {--------------------------------------------------------------------
682 @bin@ assures that we never have empty trees within a tree.
683 --------------------------------------------------------------------}
684 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
685 bin p m l Nil = l
686 bin p m Nil r = r
687 bin p m l r = Bin p m l r
688
689
690 {--------------------------------------------------------------------
691 Endian independent bit twiddling
692 --------------------------------------------------------------------}
693 zero :: Int -> Mask -> Bool
694 zero i m
695 = (natFromInt i) .&. (natFromInt m) == 0
696
697 nomatch,match :: Int -> Prefix -> Mask -> Bool
698 nomatch i p m
699 = (mask i m) /= p
700
701 match i p m
702 = (mask i m) == p
703
704 mask :: Int -> Mask -> Prefix
705 mask i m
706 = maskW (natFromInt i) (natFromInt m)
707
708 zeroN :: Nat -> Nat -> Bool
709 zeroN i m = (i .&. m) == 0
710
711 {--------------------------------------------------------------------
712 Big endian operations
713 --------------------------------------------------------------------}
714 maskW :: Nat -> Nat -> Prefix
715 maskW i m
716 = intFromNat (i .&. (complement (m-1) `xor` m))
717
718 shorter :: Mask -> Mask -> Bool
719 shorter m1 m2
720 = (natFromInt m1) > (natFromInt m2)
721
722 branchMask :: Prefix -> Prefix -> Mask
723 branchMask p1 p2
724 = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
725
726 {----------------------------------------------------------------------
727 Finding the highest bit (mask) in a word [x] can be done efficiently in
728 three ways:
729 * convert to a floating point value and the mantissa tells us the
730 [log2(x)] that corresponds with the highest bit position. The mantissa
731 is retrieved either via the standard C function [frexp] or by some bit
732 twiddling on IEEE compatible numbers (float). Note that one needs to
733 use at least [double] precision for an accurate mantissa of 32 bit
734 numbers.
735 * use bit twiddling, a logarithmic sequence of bitwise or's and shifts (bit).
736 * use processor specific assembler instruction (asm).
737
738 The most portable way would be [bit], but is it efficient enough?
739 I have measured the cycle counts of the different methods on an AMD
740 Athlon-XP 1800 (~ Pentium III 1.8Ghz) using the RDTSC instruction:
741
742 highestBitMask: method cycles
743 --------------
744 frexp 200
745 float 33
746 bit 11
747 asm 12
748
749 highestBit: method cycles
750 --------------
751 frexp 195
752 float 33
753 bit 11
754 asm 11
755
756 Wow, the bit twiddling is on today's RISC like machines even faster
757 than a single CISC instruction (BSR)!
758 ----------------------------------------------------------------------}
759
760 {----------------------------------------------------------------------
761 [highestBitMask] returns a word where only the highest bit is set.
762 It is found by first setting all bits in lower positions than the
763 highest bit and than taking an exclusive or with the original value.
764 Allthough the function may look expensive, GHC compiles this into
765 excellent C code that subsequently compiled into highly efficient
766 machine code. The algorithm is derived from Jorg Arndt's FXT library.
767 ----------------------------------------------------------------------}
768 highestBitMask :: Nat -> Nat
769 highestBitMask x
770 = case (x .|. shiftRL x 1) of
771 x -> case (x .|. shiftRL x 2) of
772 x -> case (x .|. shiftRL x 4) of
773 x -> case (x .|. shiftRL x 8) of
774 x -> case (x .|. shiftRL x 16) of
775 x -> case (x .|. shiftRL x 32) of -- for 64 bit platforms
776 x -> (x `xor` (shiftRL x 1))
777
778
779 {--------------------------------------------------------------------
780 Utilities
781 --------------------------------------------------------------------}
782 foldlStrict f z xs
783 = case xs of
784 [] -> z
785 (x:xx) -> let z' = f z x in seq z' (foldlStrict f z' xx)
786
787
788 {-
789 {--------------------------------------------------------------------
790 Testing
791 --------------------------------------------------------------------}
792 testTree :: [Int] -> IntSet
793 testTree xs = fromList xs
794 test1 = testTree [1..20]
795 test2 = testTree [30,29..10]
796 test3 = testTree [1,4,6,89,2323,53,43,234,5,79,12,9,24,9,8,423,8,42,4,8,9,3]
797
798 {--------------------------------------------------------------------
799 QuickCheck
800 --------------------------------------------------------------------}
801 qcheck prop
802 = check config prop
803 where
804 config = Config
805 { configMaxTest = 500
806 , configMaxFail = 5000
807 , configSize = \n -> (div n 2 + 3)
808 , configEvery = \n args -> let s = show n in s ++ [ '\b' | _ <- s ]
809 }
810
811
812 {--------------------------------------------------------------------
813 Arbitrary, reasonably balanced trees
814 --------------------------------------------------------------------}
815 instance Arbitrary IntSet where
816 arbitrary = do{ xs <- arbitrary
817 ; return (fromList xs)
818 }
819
820
821 {--------------------------------------------------------------------
822 Single, Insert, Delete
823 --------------------------------------------------------------------}
824 prop_Single :: Int -> Bool
825 prop_Single x
826 = (insert x empty == singleton x)
827
828 prop_InsertDelete :: Int -> IntSet -> Property
829 prop_InsertDelete k t
830 = not (member k t) ==> delete k (insert k t) == t
831
832
833 {--------------------------------------------------------------------
834 Union
835 --------------------------------------------------------------------}
836 prop_UnionInsert :: Int -> IntSet -> Bool
837 prop_UnionInsert x t
838 = union t (singleton x) == insert x t
839
840 prop_UnionAssoc :: IntSet -> IntSet -> IntSet -> Bool
841 prop_UnionAssoc t1 t2 t3
842 = union t1 (union t2 t3) == union (union t1 t2) t3
843
844 prop_UnionComm :: IntSet -> IntSet -> Bool
845 prop_UnionComm t1 t2
846 = (union t1 t2 == union t2 t1)
847
848 prop_Diff :: [Int] -> [Int] -> Bool
849 prop_Diff xs ys
850 = toAscList (difference (fromList xs) (fromList ys))
851 == List.sort ((List.\\) (nub xs) (nub ys))
852
853 prop_Int :: [Int] -> [Int] -> Bool
854 prop_Int xs ys
855 = toAscList (intersection (fromList xs) (fromList ys))
856 == List.sort (nub ((List.intersect) (xs) (ys)))
857
858 {--------------------------------------------------------------------
859 Lists
860 --------------------------------------------------------------------}
861 prop_Ordered
862 = forAll (choose (5,100)) $ \n ->
863 let xs = [0..n::Int]
864 in fromAscList xs == fromList xs
865
866 prop_List :: [Int] -> Bool
867 prop_List xs
868 = (sort (nub xs) == toAscList (fromList xs))
869 -}