Merge changes release by GHC HQ as 0.7.4.1
[packages/containers.git] / Data / IntSet / Base.hs
1 {-# LANGUAGE CPP #-}
2 #if __GLASGOW_HASKELL__
3 {-# LANGUAGE MagicHash, BangPatterns, DeriveDataTypeable, StandaloneDeriving #-}
4 #endif
5 #if !defined(TESTING) && __GLASGOW_HASKELL__ >= 703
6 {-# LANGUAGE Trustworthy #-}
7 #endif
8 -----------------------------------------------------------------------------
9 -- |
10 -- Module : Data.IntSet.Base
11 -- Copyright : (c) Daan Leijen 2002
12 -- (c) Joachim Breitner 2011
13 -- License : BSD-style
14 -- Maintainer : libraries@haskell.org
15 -- Stability : provisional
16 -- Portability : portable
17 --
18 -- An efficient implementation of integer sets.
19 --
20 -- These modules are intended to be imported qualified, to avoid name
21 -- clashes with Prelude functions, e.g.
22 --
23 -- > import Data.IntSet (IntSet)
24 -- > import qualified Data.IntSet as IntSet
25 --
26 -- The implementation is based on /big-endian patricia trees/. This data
27 -- structure performs especially well on binary operations like 'union'
28 -- and 'intersection'. However, my benchmarks show that it is also
29 -- (much) faster on insertions and deletions when compared to a generic
30 -- size-balanced set implementation (see "Data.Set").
31 --
32 -- * Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\",
33 -- Workshop on ML, September 1998, pages 77-86,
34 -- <http://citeseer.ist.psu.edu/okasaki98fast.html>
35 --
36 -- * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
37 -- Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
38 -- October 1968, pages 514-534.
39 --
40 -- Additionally, this implementation places bitmaps in the leaves of the tree.
41 -- Their size is the natural size of a machine word (32 or 64 bits) and greatly
42 -- reduce memory footprint and execution times for dense sets, e.g. sets where
43 -- it is likely that many values lie close to each other. The asymptotics are
44 -- not affected by this optimization.
45 --
46 -- Many operations have a worst-case complexity of /O(min(n,W))/.
47 -- This means that the operation can become linear in the number of
48 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
49 -- (32 or 64).
50 -----------------------------------------------------------------------------
51
52 -- [Note: INLINE bit fiddling]
53 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
54 -- It is essential that the bit fiddling functions like mask, zero, branchMask
55 -- etc are inlined. If they do not, the memory allocation skyrockets. The GHC
56 -- usually gets it right, but it is disastrous if it does not. Therefore we
57 -- explicitly mark these functions INLINE.
58
59
60 -- [Note: Local 'go' functions and capturing]
61 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
62 -- Care must be taken when using 'go' function which captures an argument.
63 -- Sometimes (for example when the argument is passed to a data constructor,
64 -- as in insert), GHC heap-allocates more than necessary. Therefore C-- code
65 -- must be checked for increased allocation when creating and modifying such
66 -- functions.
67
68
69 -- [Note: Order of constructors]
70 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
71 -- The order of constructors of IntSet matters when considering performance.
72 -- Currently in GHC 7.0, when type has 3 constructors, they are matched from
73 -- the first to the last -- the best performance is achieved when the
74 -- constructors are ordered by frequency.
75 -- On GHC 7.0, reordering constructors from Nil | Tip | Bin to Bin | Tip | Nil
76 -- improves the benchmark by circa 10%.
77
78 module Data.IntSet.Base (
79 -- * Set type
80 IntSet(..) -- instance Eq,Show
81
82 -- * Operators
83 , (\\)
84
85 -- * Query
86 , null
87 , size
88 , member
89 , notMember
90 , lookupLT
91 , lookupGT
92 , lookupLE
93 , lookupGE
94 , isSubsetOf
95 , isProperSubsetOf
96
97 -- * Construction
98 , empty
99 , singleton
100 , insert
101 , delete
102
103 -- * Combine
104 , union
105 , unions
106 , difference
107 , intersection
108
109 -- * Filter
110 , filter
111 , partition
112 , split
113 , splitMember
114
115 -- * Map
116 , map
117
118 -- * Folds
119 , foldr
120 , foldl
121 -- ** Strict folds
122 , foldr'
123 , foldl'
124 -- ** Legacy folds
125 , fold
126
127 -- * Min\/Max
128 , findMin
129 , findMax
130 , deleteMin
131 , deleteMax
132 , deleteFindMin
133 , deleteFindMax
134 , maxView
135 , minView
136
137 -- * Conversion
138
139 -- ** List
140 , elems
141 , toList
142 , fromList
143
144 -- ** Ordered list
145 , toAscList
146 , toDescList
147 , fromAscList
148 , fromDistinctAscList
149
150 -- * Debugging
151 , showTree
152 , showTreeWith
153
154 -- * Internals
155 , match
156 , suffixBitMask
157 , prefixBitMask
158 , bitmapOf
159 ) where
160
161
162 import Prelude hiding (filter,foldr,foldl,null,map)
163 import Data.Bits
164
165 import qualified Data.List as List
166 import Data.Monoid (Monoid(..))
167 import Data.Maybe (fromMaybe)
168 import Data.Typeable
169 import Control.DeepSeq (NFData)
170
171 #if __GLASGOW_HASKELL__
172 import Text.Read
173 import Data.Data (Data(..), mkNoRepType)
174 #endif
175
176 #if __GLASGOW_HASKELL__
177 import GHC.Exts ( Word(..), Int(..), build )
178 import GHC.Prim ( uncheckedShiftL#, uncheckedShiftRL#, indexInt8OffAddr# )
179 #else
180 import Data.Word
181 #endif
182
183 -- On GHC, include MachDeps.h to get WORD_SIZE_IN_BITS macro.
184 #if defined(__GLASGOW_HASKELL__)
185 #include "MachDeps.h"
186 #endif
187
188 -- Use macros to define strictness of functions.
189 -- STRICT_x_OF_y denotes an y-ary function strict in the x-th parameter.
190 -- We do not use BangPatterns, because they are not in any standard and we
191 -- want the compilers to be compiled by as many compilers as possible.
192 #define STRICT_1_OF_2(fn) fn arg _ | arg `seq` False = undefined
193 #define STRICT_2_OF_2(fn) fn _ arg | arg `seq` False = undefined
194 #define STRICT_1_OF_3(fn) fn arg _ _ | arg `seq` False = undefined
195 #define STRICT_2_OF_3(fn) fn _ arg _ | arg `seq` False = undefined
196
197 infixl 9 \\{-This comment teaches CPP correct behaviour -}
198
199 -- A "Nat" is a natural machine word (an unsigned Int)
200 type Nat = Word
201
202 natFromInt :: Int -> Nat
203 natFromInt i = fromIntegral i
204 {-# INLINE natFromInt #-}
205
206 intFromNat :: Nat -> Int
207 intFromNat w = fromIntegral w
208 {-# INLINE intFromNat #-}
209
210 -- Right and left logical shifts.
211 shiftRL, shiftLL :: Nat -> Int -> Nat
212 #if __GLASGOW_HASKELL__
213 {--------------------------------------------------------------------
214 GHC: use unboxing to get @shiftRL@ and @shiftLL@ inlined.
215 --------------------------------------------------------------------}
216 shiftRL (W# x) (I# i) = W# (uncheckedShiftRL# x i)
217 shiftLL (W# x) (I# i) = W# (uncheckedShiftL# x i)
218 #else
219 shiftRL x i = shiftR x i
220 shiftLL x i = shiftL x i
221 #endif
222 {-# INLINE shiftRL #-}
223 {-# INLINE shiftLL #-}
224
225 {--------------------------------------------------------------------
226 Operators
227 --------------------------------------------------------------------}
228 -- | /O(n+m)/. See 'difference'.
229 (\\) :: IntSet -> IntSet -> IntSet
230 m1 \\ m2 = difference m1 m2
231
232 {--------------------------------------------------------------------
233 Types
234 --------------------------------------------------------------------}
235
236 -- | A set of integers.
237
238 -- See Note: Order of constructors
239 data IntSet = Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
240 -- Invariant: Nil is never found as a child of Bin.
241 -- Invariant: The Mask is a power of 2. It is the largest bit position at which
242 -- two elements of the set differ.
243 -- Invariant: Prefix is the common high-order bits that all elements share to
244 -- the left of the Mask bit.
245 -- Invariant: In Bin prefix mask left right, left consists of the elements that
246 -- don't have the mask bit set; right is all the elements that do.
247 | Tip {-# UNPACK #-} !Prefix {-# UNPACK #-} !BitMap
248 -- Invariant: The Prefix is zero for all but the last 5 (on 32 bit arches) or 6
249 -- bits (on 64 bit arches). The values of the map represented by a tip
250 -- are the prefix plus the indices of the set bits in the bit map.
251 | Nil
252
253 -- A number stored in a set is stored as
254 -- * Prefix (all but last 5-6 bits) and
255 -- * BitMap (last 5-6 bits stored as a bitmask)
256 -- Last 5-6 bits are called a Suffix.
257
258 type Prefix = Int
259 type Mask = Int
260 type BitMap = Word
261
262 instance Monoid IntSet where
263 mempty = empty
264 mappend = union
265 mconcat = unions
266
267 #if __GLASGOW_HASKELL__
268
269 {--------------------------------------------------------------------
270 A Data instance
271 --------------------------------------------------------------------}
272
273 -- This instance preserves data abstraction at the cost of inefficiency.
274 -- We omit reflection services for the sake of data abstraction.
275
276 instance Data IntSet where
277 gfoldl f z is = z fromList `f` (toList is)
278 toConstr _ = error "toConstr"
279 gunfold _ _ = error "gunfold"
280 dataTypeOf _ = mkNoRepType "Data.IntSet.IntSet"
281
282 #endif
283
284 {--------------------------------------------------------------------
285 Query
286 --------------------------------------------------------------------}
287 -- | /O(1)/. Is the set empty?
288 null :: IntSet -> Bool
289 null Nil = True
290 null _ = False
291 {-# INLINE null #-}
292
293 -- | /O(n)/. Cardinality of the set.
294 size :: IntSet -> Int
295 size t
296 = case t of
297 Bin _ _ l r -> size l + size r
298 Tip _ bm -> bitcount 0 bm
299 Nil -> 0
300
301 -- | /O(min(n,W))/. Is the value a member of the set?
302
303 -- See Note: Local 'go' functions and capturing]
304 member :: Int -> IntSet -> Bool
305 member x = x `seq` go
306 where
307 go (Bin p m l r)
308 | nomatch x p m = False
309 | zero x m = go l
310 | otherwise = go r
311 go (Tip y bm) = prefixOf x == y && bitmapOf x .&. bm /= 0
312 go Nil = False
313
314 -- | /O(min(n,W))/. Is the element not in the set?
315 notMember :: Int -> IntSet -> Bool
316 notMember k = not . member k
317
318 -- | /O(log n)/. Find largest element smaller than the given one.
319 --
320 -- > lookupLT 3 (fromList [3, 5]) == Nothing
321 -- > lookupLT 5 (fromList [3, 5]) == Just 3
322
323 -- See Note: Local 'go' functions and capturing.
324 lookupLT :: Int -> IntSet -> Maybe Int
325 lookupLT x t = x `seq` case t of
326 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r
327 _ -> go Nil t
328 where
329 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r
330 | zero x m = go def l
331 | otherwise = go l r
332 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm
333 | prefixOf x == kx && maskLT /= 0 = Just $ kx + highestBitSet maskLT
334 | otherwise = unsafeFindMax def
335 where maskLT = (bitmapOf x - 1) .&. bm
336 go def Nil = unsafeFindMax def
337
338
339 -- | /O(log n)/. Find smallest element greater than the given one.
340 --
341 -- > lookupGT 4 (fromList [3, 5]) == Just 5
342 -- > lookupGT 5 (fromList [3, 5]) == Nothing
343
344 -- See Note: Local 'go' functions and capturing.
345 lookupGT :: Int -> IntSet -> Maybe Int
346 lookupGT x t = x `seq` case t of
347 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r
348 _ -> go Nil t
349 where
350 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def
351 | zero x m = go r l
352 | otherwise = go def r
353 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm
354 | prefixOf x == kx && maskGT /= 0 = Just $ kx + lowestBitSet maskGT
355 | otherwise = unsafeFindMin def
356 where maskGT = (- ((bitmapOf x) `shiftLL` 1)) .&. bm
357 go def Nil = unsafeFindMin def
358
359
360 -- | /O(log n)/. Find largest element smaller or equal to the given one.
361 --
362 -- > lookupLE 2 (fromList [3, 5]) == Nothing
363 -- > lookupLE 4 (fromList [3, 5]) == Just 3
364 -- > lookupLE 5 (fromList [3, 5]) == Just 5
365
366 -- See Note: Local 'go' functions and capturing.
367 lookupLE :: Int -> IntSet -> Maybe Int
368 lookupLE x t = x `seq` case t of
369 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r
370 _ -> go Nil t
371 where
372 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r
373 | zero x m = go def l
374 | otherwise = go l r
375 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm
376 | prefixOf x == kx && maskLE /= 0 = Just $ kx + highestBitSet maskLE
377 | otherwise = unsafeFindMax def
378 where maskLE = (((bitmapOf x) `shiftLL` 1) - 1) .&. bm
379 go def Nil = unsafeFindMax def
380
381
382 -- | /O(log n)/. Find smallest element greater or equal to the given one.
383 --
384 -- > lookupGE 3 (fromList [3, 5]) == Just 3
385 -- > lookupGE 4 (fromList [3, 5]) == Just 5
386 -- > lookupGE 6 (fromList [3, 5]) == Nothing
387
388 -- See Note: Local 'go' functions and capturing.
389 lookupGE :: Int -> IntSet -> Maybe Int
390 lookupGE x t = x `seq` case t of
391 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r
392 _ -> go Nil t
393 where
394 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def
395 | zero x m = go r l
396 | otherwise = go def r
397 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm
398 | prefixOf x == kx && maskGE /= 0 = Just $ kx + lowestBitSet maskGE
399 | otherwise = unsafeFindMin def
400 where maskGE = (- (bitmapOf x)) .&. bm
401 go def Nil = unsafeFindMin def
402
403
404
405 -- Helper function for lookupGE and lookupGT. It assumes that if a Bin node is
406 -- given, it has m > 0.
407 unsafeFindMin :: IntSet -> Maybe Int
408 unsafeFindMin Nil = Nothing
409 unsafeFindMin (Tip kx bm) = Just $ kx + lowestBitSet bm
410 unsafeFindMin (Bin _ _ l _) = unsafeFindMin l
411
412 -- Helper function for lookupLE and lookupLT. It assumes that if a Bin node is
413 -- given, it has m > 0.
414 unsafeFindMax :: IntSet -> Maybe Int
415 unsafeFindMax Nil = Nothing
416 unsafeFindMax (Tip kx bm) = Just $ kx + highestBitSet bm
417 unsafeFindMax (Bin _ _ _ r) = unsafeFindMax r
418
419 {--------------------------------------------------------------------
420 Construction
421 --------------------------------------------------------------------}
422 -- | /O(1)/. The empty set.
423 empty :: IntSet
424 empty
425 = Nil
426 {-# INLINE empty #-}
427
428 -- | /O(1)/. A set of one element.
429 singleton :: Int -> IntSet
430 singleton x
431 = Tip (prefixOf x) (bitmapOf x)
432 {-# INLINE singleton #-}
433
434 {--------------------------------------------------------------------
435 Insert
436 --------------------------------------------------------------------}
437 -- | /O(min(n,W))/. Add a value to the set. There is no left- or right bias for
438 -- IntSets.
439 insert :: Int -> IntSet -> IntSet
440 insert x = x `seq` insertBM (prefixOf x) (bitmapOf x)
441
442 -- Helper function for insert and union.
443 insertBM :: Prefix -> BitMap -> IntSet -> IntSet
444 insertBM kx bm t = kx `seq` bm `seq`
445 case t of
446 Bin p m l r
447 | nomatch kx p m -> join kx (Tip kx bm) p t
448 | zero kx m -> Bin p m (insertBM kx bm l) r
449 | otherwise -> Bin p m l (insertBM kx bm r)
450 Tip kx' bm'
451 | kx' == kx -> Tip kx' (bm .|. bm')
452 | otherwise -> join kx (Tip kx bm) kx' t
453 Nil -> Tip kx bm
454
455 -- | /O(min(n,W))/. Delete a value in the set. Returns the
456 -- original set when the value was not present.
457 delete :: Int -> IntSet -> IntSet
458 delete x = x `seq` deleteBM (prefixOf x) (bitmapOf x)
459
460 -- Deletes all values mentioned in the BitMap from the set.
461 -- Helper function for delete and difference.
462 deleteBM :: Prefix -> BitMap -> IntSet -> IntSet
463 deleteBM kx bm t = kx `seq` bm `seq`
464 case t of
465 Bin p m l r
466 | nomatch kx p m -> t
467 | zero kx m -> bin p m (deleteBM kx bm l) r
468 | otherwise -> bin p m l (deleteBM kx bm r)
469 Tip kx' bm'
470 | kx' == kx -> tip kx (bm' .&. complement bm)
471 | otherwise -> t
472 Nil -> Nil
473
474
475 {--------------------------------------------------------------------
476 Union
477 --------------------------------------------------------------------}
478 -- | The union of a list of sets.
479 unions :: [IntSet] -> IntSet
480 unions xs
481 = foldlStrict union empty xs
482
483
484 -- | /O(n+m)/. The union of two sets.
485 union :: IntSet -> IntSet -> IntSet
486 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
487 | shorter m1 m2 = union1
488 | shorter m2 m1 = union2
489 | p1 == p2 = Bin p1 m1 (union l1 l2) (union r1 r2)
490 | otherwise = join p1 t1 p2 t2
491 where
492 union1 | nomatch p2 p1 m1 = join p1 t1 p2 t2
493 | zero p2 m1 = Bin p1 m1 (union l1 t2) r1
494 | otherwise = Bin p1 m1 l1 (union r1 t2)
495
496 union2 | nomatch p1 p2 m2 = join p1 t1 p2 t2
497 | zero p1 m2 = Bin p2 m2 (union t1 l2) r2
498 | otherwise = Bin p2 m2 l2 (union t1 r2)
499
500 union t@(Bin _ _ _ _) (Tip kx bm) = insertBM kx bm t
501 union t@(Bin _ _ _ _) Nil = t
502 union (Tip kx bm) t = insertBM kx bm t
503 union Nil t = t
504
505
506 {--------------------------------------------------------------------
507 Difference
508 --------------------------------------------------------------------}
509 -- | /O(n+m)/. Difference between two sets.
510 difference :: IntSet -> IntSet -> IntSet
511 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
512 | shorter m1 m2 = difference1
513 | shorter m2 m1 = difference2
514 | p1 == p2 = bin p1 m1 (difference l1 l2) (difference r1 r2)
515 | otherwise = t1
516 where
517 difference1 | nomatch p2 p1 m1 = t1
518 | zero p2 m1 = bin p1 m1 (difference l1 t2) r1
519 | otherwise = bin p1 m1 l1 (difference r1 t2)
520
521 difference2 | nomatch p1 p2 m2 = t1
522 | zero p1 m2 = difference t1 l2
523 | otherwise = difference t1 r2
524
525 difference t@(Bin _ _ _ _) (Tip kx bm) = deleteBM kx bm t
526 difference t@(Bin _ _ _ _) Nil = t
527
528 difference t1@(Tip kx bm) t2 = differenceTip t2
529 where differenceTip (Bin p2 m2 l2 r2) | nomatch kx p2 m2 = t1
530 | zero kx m2 = differenceTip l2
531 | otherwise = differenceTip r2
532 differenceTip (Tip kx2 bm2) | kx == kx2 = tip kx (bm .&. complement bm2)
533 | otherwise = t1
534 differenceTip Nil = t1
535
536 difference Nil _ = Nil
537
538
539
540 {--------------------------------------------------------------------
541 Intersection
542 --------------------------------------------------------------------}
543 -- | /O(n+m)/. The intersection of two sets.
544 intersection :: IntSet -> IntSet -> IntSet
545 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
546 | shorter m1 m2 = intersection1
547 | shorter m2 m1 = intersection2
548 | p1 == p2 = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
549 | otherwise = Nil
550 where
551 intersection1 | nomatch p2 p1 m1 = Nil
552 | zero p2 m1 = intersection l1 t2
553 | otherwise = intersection r1 t2
554
555 intersection2 | nomatch p1 p2 m2 = Nil
556 | zero p1 m2 = intersection t1 l2
557 | otherwise = intersection t1 r2
558
559 intersection t1@(Bin _ _ _ _) (Tip kx2 bm2) = intersectBM t1
560 where intersectBM (Bin p1 m1 l1 r1) | nomatch kx2 p1 m1 = Nil
561 | zero kx2 m1 = intersectBM l1
562 | otherwise = intersectBM r1
563 intersectBM (Tip kx1 bm1) | kx1 == kx2 = tip kx1 (bm1 .&. bm2)
564 | otherwise = Nil
565 intersectBM Nil = Nil
566
567 intersection (Bin _ _ _ _) Nil = Nil
568
569 intersection (Tip kx1 bm1) t2 = intersectBM t2
570 where intersectBM (Bin p2 m2 l2 r2) | nomatch kx1 p2 m2 = Nil
571 | zero kx1 m2 = intersectBM l2
572 | otherwise = intersectBM r2
573 intersectBM (Tip kx2 bm2) | kx1 == kx2 = tip kx1 (bm1 .&. bm2)
574 | otherwise = Nil
575 intersectBM Nil = Nil
576
577 intersection Nil _ = Nil
578
579 {--------------------------------------------------------------------
580 Subset
581 --------------------------------------------------------------------}
582 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
583 isProperSubsetOf :: IntSet -> IntSet -> Bool
584 isProperSubsetOf t1 t2
585 = case subsetCmp t1 t2 of
586 LT -> True
587 _ -> False
588
589 subsetCmp :: IntSet -> IntSet -> Ordering
590 subsetCmp t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
591 | shorter m1 m2 = GT
592 | shorter m2 m1 = case subsetCmpLt of
593 GT -> GT
594 _ -> LT
595 | p1 == p2 = subsetCmpEq
596 | otherwise = GT -- disjoint
597 where
598 subsetCmpLt | nomatch p1 p2 m2 = GT
599 | zero p1 m2 = subsetCmp t1 l2
600 | otherwise = subsetCmp t1 r2
601 subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
602 (GT,_ ) -> GT
603 (_ ,GT) -> GT
604 (EQ,EQ) -> EQ
605 _ -> LT
606
607 subsetCmp (Bin _ _ _ _) _ = GT
608 subsetCmp (Tip kx1 bm1) (Tip kx2 bm2)
609 | kx1 /= kx2 = GT -- disjoint
610 | bm1 == bm2 = EQ
611 | bm1 .&. complement bm2 == 0 = LT
612 | otherwise = GT
613 subsetCmp t1@(Tip kx _) (Bin p m l r)
614 | nomatch kx p m = GT
615 | zero kx m = case subsetCmp t1 l of GT -> GT ; _ -> LT
616 | otherwise = case subsetCmp t1 r of GT -> GT ; _ -> LT
617 subsetCmp (Tip _ _) Nil = GT -- disjoint
618 subsetCmp Nil Nil = EQ
619 subsetCmp Nil _ = LT
620
621 -- | /O(n+m)/. Is this a subset?
622 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
623
624 isSubsetOf :: IntSet -> IntSet -> Bool
625 isSubsetOf t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
626 | shorter m1 m2 = False
627 | shorter m2 m1 = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
628 else isSubsetOf t1 r2)
629 | otherwise = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
630 isSubsetOf (Bin _ _ _ _) _ = False
631 isSubsetOf (Tip kx1 bm1) (Tip kx2 bm2) = kx1 == kx2 && bm1 .&. complement bm2 == 0
632 isSubsetOf t1@(Tip kx _) (Bin p m l r)
633 | nomatch kx p m = False
634 | zero kx m = isSubsetOf t1 l
635 | otherwise = isSubsetOf t1 r
636 isSubsetOf (Tip _ _) Nil = False
637 isSubsetOf Nil _ = True
638
639
640 {--------------------------------------------------------------------
641 Filter
642 --------------------------------------------------------------------}
643 -- | /O(n)/. Filter all elements that satisfy some predicate.
644 filter :: (Int -> Bool) -> IntSet -> IntSet
645 filter predicate t
646 = case t of
647 Bin p m l r
648 -> bin p m (filter predicate l) (filter predicate r)
649 Tip kx bm
650 -> tip kx (foldl'Bits 0 (bitPred kx) 0 bm)
651 Nil -> Nil
652 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi
653 | otherwise = bm
654 {-# INLINE bitPred #-}
655
656 -- | /O(n)/. partition the set according to some predicate.
657 partition :: (Int -> Bool) -> IntSet -> (IntSet,IntSet)
658 partition predicate t
659 = case t of
660 Bin p m l r
661 -> let (l1,l2) = partition predicate l
662 (r1,r2) = partition predicate r
663 in (bin p m l1 r1, bin p m l2 r2)
664 Tip kx bm
665 -> let bm1 = foldl'Bits 0 (bitPred kx) 0 bm
666 in (tip kx bm1, tip kx (bm `xor` bm1))
667 Nil -> (Nil,Nil)
668 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi
669 | otherwise = bm
670 {-# INLINE bitPred #-}
671
672
673 -- | /O(min(n,W))/. The expression (@'split' x set@) is a pair @(set1,set2)@
674 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
675 -- comprises the elements of @set@ greater than @x@.
676 --
677 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [4,5])
678 split :: Int -> IntSet -> (IntSet,IntSet)
679 split x t =
680 case t of Bin _ m l r | m < 0 -> if x >= 0 then case go x l of (lt, gt) -> (union lt r, gt)
681 else case go x r of (lt, gt) -> (lt, union gt l)
682 _ -> go x t
683 where
684 go x' t'@(Bin p m l r) | match x' p m = if zero x' m then case go x' l of (lt, gt) -> (lt, union gt r)
685 else case go x' r of (lt, gt) -> (union lt l, gt)
686 | otherwise = if x' < p then (Nil, t')
687 else (t', Nil)
688 go x' t'@(Tip kx' bm) | kx' > x' = (Nil, t')
689 -- equivalent to kx' > prefixOf x'
690 | kx' < prefixOf x' = (t', Nil)
691 | otherwise = (tip kx' (bm .&. lowerBitmap), tip kx' (bm .&. higherBitmap))
692 where lowerBitmap = bitmapOf x' - 1
693 higherBitmap = complement (lowerBitmap + bitmapOf x')
694 go _ Nil = (Nil, Nil)
695
696 -- | /O(min(n,W))/. Performs a 'split' but also returns whether the pivot
697 -- element was found in the original set.
698 splitMember :: Int -> IntSet -> (IntSet,Bool,IntSet)
699 splitMember x t =
700 case t of Bin _ m l r | m < 0 -> if x >= 0 then case go x l of (lt, fnd, gt) -> (union lt r, fnd, gt)
701 else case go x r of (lt, fnd, gt) -> (lt, fnd, union gt l)
702 _ -> go x t
703 where
704 go x' t'@(Bin p m l r) | match x' p m = if zero x' m then case go x' l of (lt, fnd, gt) -> (lt, fnd, union gt r)
705 else case go x' r of (lt, fnd, gt) -> (union lt l, fnd, gt)
706 | otherwise = if x' < p then (Nil, False, t')
707 else (t', False, Nil)
708 go x' t'@(Tip kx' bm) | kx' > x' = (Nil, False, t')
709 -- equivalent to kx' > prefixOf x'
710 | kx' < prefixOf x' = (t', False, Nil)
711 | otherwise = (tip kx' (bm .&. lowerBitmap), (bm .&. bitmapOfx') /= 0, tip kx' (bm .&. higherBitmap))
712 where bitmapOfx' = bitmapOf x'
713 lowerBitmap = bitmapOfx' - 1
714 higherBitmap = complement (lowerBitmap + bitmapOfx')
715 go _ Nil = (Nil, False, Nil)
716
717
718 {----------------------------------------------------------------------
719 Min/Max
720 ----------------------------------------------------------------------}
721
722 -- | /O(min(n,W))/. Retrieves the maximal key of the set, and the set
723 -- stripped of that element, or 'Nothing' if passed an empty set.
724 maxView :: IntSet -> Maybe (Int, IntSet)
725 maxView t =
726 case t of Nil -> Nothing
727 Bin p m l r | m < 0 -> case go l of (result, l') -> Just (result, bin p m l' r)
728 _ -> Just (go t)
729 where
730 go (Bin p m l r) = case go r of (result, r') -> (result, bin p m l r')
731 go (Tip kx bm) = case highestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi)))
732 go Nil = error "maxView Nil"
733
734 -- | /O(min(n,W))/. Retrieves the minimal key of the set, and the set
735 -- stripped of that element, or 'Nothing' if passed an empty set.
736 minView :: IntSet -> Maybe (Int, IntSet)
737 minView t =
738 case t of Nil -> Nothing
739 Bin p m l r | m < 0 -> case go r of (result, r') -> Just (result, bin p m l r')
740 _ -> Just (go t)
741 where
742 go (Bin p m l r) = case go l of (result, l') -> (result, bin p m l' r)
743 go (Tip kx bm) = case lowestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi)))
744 go Nil = error "minView Nil"
745
746 -- | /O(min(n,W))/. Delete and find the minimal element.
747 --
748 -- > deleteFindMin set = (findMin set, deleteMin set)
749 deleteFindMin :: IntSet -> (Int, IntSet)
750 deleteFindMin = fromMaybe (error "deleteFindMin: empty set has no minimal element") . minView
751
752 -- | /O(min(n,W))/. Delete and find the maximal element.
753 --
754 -- > deleteFindMax set = (findMax set, deleteMax set)
755 deleteFindMax :: IntSet -> (Int, IntSet)
756 deleteFindMax = fromMaybe (error "deleteFindMax: empty set has no maximal element") . maxView
757
758
759 -- | /O(min(n,W))/. The minimal element of the set.
760 findMin :: IntSet -> Int
761 findMin Nil = error "findMin: empty set has no minimal element"
762 findMin (Tip kx bm) = kx + lowestBitSet bm
763 findMin (Bin _ m l r)
764 | m < 0 = find r
765 | otherwise = find l
766 where find (Tip kx bm) = kx + lowestBitSet bm
767 find (Bin _ _ l' _) = find l'
768 find Nil = error "findMin Nil"
769
770 -- | /O(min(n,W))/. The maximal element of a set.
771 findMax :: IntSet -> Int
772 findMax Nil = error "findMax: empty set has no maximal element"
773 findMax (Tip kx bm) = kx + highestBitSet bm
774 findMax (Bin _ m l r)
775 | m < 0 = find l
776 | otherwise = find r
777 where find (Tip kx bm) = kx + highestBitSet bm
778 find (Bin _ _ _ r') = find r'
779 find Nil = error "findMax Nil"
780
781
782 -- | /O(min(n,W))/. Delete the minimal element.
783 deleteMin :: IntSet -> IntSet
784 deleteMin = maybe Nil snd . minView
785
786 -- | /O(min(n,W))/. Delete the maximal element.
787 deleteMax :: IntSet -> IntSet
788 deleteMax = maybe Nil snd . maxView
789
790 {----------------------------------------------------------------------
791 Map
792 ----------------------------------------------------------------------}
793
794 -- | /O(n*min(n,W))/.
795 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
796 --
797 -- It's worth noting that the size of the result may be smaller if,
798 -- for some @(x,y)@, @x \/= y && f x == f y@
799
800 map :: (Int->Int) -> IntSet -> IntSet
801 map f = fromList . List.map f . toList
802
803 {--------------------------------------------------------------------
804 Fold
805 --------------------------------------------------------------------}
806 -- | /O(n)/. Fold the elements in the set using the given right-associative
807 -- binary operator. This function is an equivalent of 'foldr' and is present
808 -- for compatibility only.
809 --
810 -- /Please note that fold will be deprecated in the future and removed./
811 fold :: (Int -> b -> b) -> b -> IntSet -> b
812 fold = foldr
813 {-# INLINE fold #-}
814
815 -- | /O(n)/. Fold the elements in the set using the given right-associative
816 -- binary operator, such that @'foldr' f z == 'Prelude.foldr' f z . 'toAscList'@.
817 --
818 -- For example,
819 --
820 -- > toAscList set = foldr (:) [] set
821 foldr :: (Int -> b -> b) -> b -> IntSet -> b
822 foldr f z = \t -> -- Use lambda t to be inlinable with two arguments only.
823 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before
824 | otherwise -> go (go z r) l
825 _ -> go z t
826 where
827 go z' Nil = z'
828 go z' (Tip kx bm) = foldrBits kx f z' bm
829 go z' (Bin _ _ l r) = go (go z' r) l
830 {-# INLINE foldr #-}
831
832 -- | /O(n)/. A strict version of 'foldr'. Each application of the operator is
833 -- evaluated before using the result in the next application. This
834 -- function is strict in the starting value.
835 foldr' :: (Int -> b -> b) -> b -> IntSet -> b
836 foldr' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
837 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before
838 | otherwise -> go (go z r) l
839 _ -> go z t
840 where
841 STRICT_1_OF_2(go)
842 go z' Nil = z'
843 go z' (Tip kx bm) = foldr'Bits kx f z' bm
844 go z' (Bin _ _ l r) = go (go z' r) l
845 {-# INLINE foldr' #-}
846
847 -- | /O(n)/. Fold the elements in the set using the given left-associative
848 -- binary operator, such that @'foldl' f z == 'Prelude.foldl' f z . 'toAscList'@.
849 --
850 -- For example,
851 --
852 -- > toDescList set = foldl (flip (:)) [] set
853 foldl :: (a -> Int -> a) -> a -> IntSet -> a
854 foldl f z = \t -> -- Use lambda t to be inlinable with two arguments only.
855 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before
856 | otherwise -> go (go z l) r
857 _ -> go z t
858 where
859 STRICT_1_OF_2(go)
860 go z' Nil = z'
861 go z' (Tip kx bm) = foldlBits kx f z' bm
862 go z' (Bin _ _ l r) = go (go z' l) r
863 {-# INLINE foldl #-}
864
865 -- | /O(n)/. A strict version of 'foldl'. Each application of the operator is
866 -- evaluated before using the result in the next application. This
867 -- function is strict in the starting value.
868 foldl' :: (a -> Int -> a) -> a -> IntSet -> a
869 foldl' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
870 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before
871 | otherwise -> go (go z l) r
872 _ -> go z t
873 where
874 STRICT_1_OF_2(go)
875 go z' Nil = z'
876 go z' (Tip kx bm) = foldl'Bits kx f z' bm
877 go z' (Bin _ _ l r) = go (go z' l) r
878 {-# INLINE foldl' #-}
879
880 {--------------------------------------------------------------------
881 List variations
882 --------------------------------------------------------------------}
883 -- | /O(n)/. An alias of 'toAscList'. The elements of a set in ascending order.
884 -- Subject to list fusion.
885 elems :: IntSet -> [Int]
886 elems
887 = toAscList
888
889 {--------------------------------------------------------------------
890 Lists
891 --------------------------------------------------------------------}
892 -- | /O(n)/. Convert the set to a list of elements. Subject to list fusion.
893 toList :: IntSet -> [Int]
894 toList
895 = toAscList
896
897 -- | /O(n)/. Convert the set to an ascending list of elements. Subject to list
898 -- fusion.
899 toAscList :: IntSet -> [Int]
900 toAscList = foldr (:) []
901
902 -- | /O(n)/. Convert the set to a descending list of elements. Subject to list
903 -- fusion.
904 toDescList :: IntSet -> [Int]
905 toDescList = foldl (flip (:)) []
906
907 -- List fusion for the list generating functions.
908 #if __GLASGOW_HASKELL__
909 -- The foldrFB and foldlFB are foldr and foldl equivalents, used for list fusion.
910 -- They are important to convert unfused to{Asc,Desc}List back, see mapFB in prelude.
911 foldrFB :: (Int -> b -> b) -> b -> IntSet -> b
912 foldrFB = foldr
913 {-# INLINE[0] foldrFB #-}
914 foldlFB :: (a -> Int -> a) -> a -> IntSet -> a
915 foldlFB = foldl
916 {-# INLINE[0] foldlFB #-}
917
918 -- Inline elems and toList, so that we need to fuse only toAscList.
919 {-# INLINE elems #-}
920 {-# INLINE toList #-}
921
922 -- The fusion is enabled up to phase 2 included. If it does not succeed,
923 -- convert in phase 1 the expanded to{Asc,Desc}List calls back to
924 -- to{Asc,Desc}List. In phase 0, we inline fold{lr}FB (which were used in
925 -- a list fusion, otherwise it would go away in phase 1), and let compiler do
926 -- whatever it wants with to{Asc,Desc}List -- it was forbidden to inline it
927 -- before phase 0, otherwise the fusion rules would not fire at all.
928 {-# NOINLINE[0] toAscList #-}
929 {-# NOINLINE[0] toDescList #-}
930 {-# RULES "IntSet.toAscList" [~1] forall s . toAscList s = build (\c n -> foldrFB c n s) #-}
931 {-# RULES "IntSet.toAscListBack" [1] foldrFB (:) [] = toAscList #-}
932 {-# RULES "IntSet.toDescList" [~1] forall s . toDescList s = build (\c n -> foldlFB (\xs x -> c x xs) n s) #-}
933 {-# RULES "IntSet.toDescListBack" [1] foldlFB (\xs x -> x : xs) [] = toDescList #-}
934 #endif
935
936
937 -- | /O(n*min(n,W))/. Create a set from a list of integers.
938 fromList :: [Int] -> IntSet
939 fromList xs
940 = foldlStrict ins empty xs
941 where
942 ins t x = insert x t
943
944 -- | /O(n)/. Build a set from an ascending list of elements.
945 -- /The precondition (input list is ascending) is not checked./
946 fromAscList :: [Int] -> IntSet
947 fromAscList [] = Nil
948 fromAscList (x0 : xs0) = fromDistinctAscList (combineEq x0 xs0)
949 where
950 combineEq x' [] = [x']
951 combineEq x' (x:xs)
952 | x==x' = combineEq x' xs
953 | otherwise = x' : combineEq x xs
954
955 -- | /O(n)/. Build a set from an ascending list of distinct elements.
956 -- /The precondition (input list is strictly ascending) is not checked./
957 fromDistinctAscList :: [Int] -> IntSet
958 fromDistinctAscList [] = Nil
959 fromDistinctAscList (z0 : zs0) = work (prefixOf z0) (bitmapOf z0) zs0 Nada
960 where
961 -- 'work' accumulates all values that go into one tip, before passing this Tip
962 -- to 'reduce'
963 work kx bm [] stk = finish kx (Tip kx bm) stk
964 work kx bm (z:zs) stk | kx == prefixOf z = work kx (bm .|. bitmapOf z) zs stk
965 work kx bm (z:zs) stk = reduce z zs (branchMask z kx) kx (Tip kx bm) stk
966
967 reduce z zs _ px tx Nada = work (prefixOf z) (bitmapOf z) zs (Push px tx Nada)
968 reduce z zs m px tx stk@(Push py ty stk') =
969 let mxy = branchMask px py
970 pxy = mask px mxy
971 in if shorter m mxy
972 then reduce z zs m pxy (Bin pxy mxy ty tx) stk'
973 else work (prefixOf z) (bitmapOf z) zs (Push px tx stk)
974
975 finish _ t Nada = t
976 finish px tx (Push py ty stk) = finish p (join py ty px tx) stk
977 where m = branchMask px py
978 p = mask px m
979
980 data Stack = Push {-# UNPACK #-} !Prefix !IntSet !Stack | Nada
981
982
983 {--------------------------------------------------------------------
984 Eq
985 --------------------------------------------------------------------}
986 instance Eq IntSet where
987 t1 == t2 = equal t1 t2
988 t1 /= t2 = nequal t1 t2
989
990 equal :: IntSet -> IntSet -> Bool
991 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
992 = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
993 equal (Tip kx1 bm1) (Tip kx2 bm2)
994 = kx1 == kx2 && bm1 == bm2
995 equal Nil Nil = True
996 equal _ _ = False
997
998 nequal :: IntSet -> IntSet -> Bool
999 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
1000 = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
1001 nequal (Tip kx1 bm1) (Tip kx2 bm2)
1002 = kx1 /= kx2 || bm1 /= bm2
1003 nequal Nil Nil = False
1004 nequal _ _ = True
1005
1006 {--------------------------------------------------------------------
1007 Ord
1008 --------------------------------------------------------------------}
1009
1010 instance Ord IntSet where
1011 compare s1 s2 = compare (toAscList s1) (toAscList s2)
1012 -- tentative implementation. See if more efficient exists.
1013
1014 {--------------------------------------------------------------------
1015 Show
1016 --------------------------------------------------------------------}
1017 instance Show IntSet where
1018 showsPrec p xs = showParen (p > 10) $
1019 showString "fromList " . shows (toList xs)
1020
1021 {--------------------------------------------------------------------
1022 Read
1023 --------------------------------------------------------------------}
1024 instance Read IntSet where
1025 #ifdef __GLASGOW_HASKELL__
1026 readPrec = parens $ prec 10 $ do
1027 Ident "fromList" <- lexP
1028 xs <- readPrec
1029 return (fromList xs)
1030
1031 readListPrec = readListPrecDefault
1032 #else
1033 readsPrec p = readParen (p > 10) $ \ r -> do
1034 ("fromList",s) <- lex r
1035 (xs,t) <- reads s
1036 return (fromList xs,t)
1037 #endif
1038
1039 {--------------------------------------------------------------------
1040 Typeable
1041 --------------------------------------------------------------------}
1042
1043 #include "Typeable.h"
1044 INSTANCE_TYPEABLE0(IntSet,intSetTc,"IntSet")
1045
1046 {--------------------------------------------------------------------
1047 NFData
1048 --------------------------------------------------------------------}
1049
1050 -- The IntSet constructors consist only of strict fields of Ints and
1051 -- IntSets, thus the default NFData instance which evaluates to whnf
1052 -- should suffice
1053 instance NFData IntSet
1054
1055 {--------------------------------------------------------------------
1056 Debugging
1057 --------------------------------------------------------------------}
1058 -- | /O(n)/. Show the tree that implements the set. The tree is shown
1059 -- in a compressed, hanging format.
1060 showTree :: IntSet -> String
1061 showTree s
1062 = showTreeWith True False s
1063
1064
1065 {- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows
1066 the tree that implements the set. If @hang@ is
1067 'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If
1068 @wide@ is 'True', an extra wide version is shown.
1069 -}
1070 showTreeWith :: Bool -> Bool -> IntSet -> String
1071 showTreeWith hang wide t
1072 | hang = (showsTreeHang wide [] t) ""
1073 | otherwise = (showsTree wide [] [] t) ""
1074
1075 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
1076 showsTree wide lbars rbars t
1077 = case t of
1078 Bin p m l r
1079 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
1080 showWide wide rbars .
1081 showsBars lbars . showString (showBin p m) . showString "\n" .
1082 showWide wide lbars .
1083 showsTree wide (withEmpty lbars) (withBar lbars) l
1084 Tip kx bm
1085 -> showsBars lbars . showString " " . shows kx . showString " + " .
1086 showsBitMap bm . showString "\n"
1087 Nil -> showsBars lbars . showString "|\n"
1088
1089 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
1090 showsTreeHang wide bars t
1091 = case t of
1092 Bin p m l r
1093 -> showsBars bars . showString (showBin p m) . showString "\n" .
1094 showWide wide bars .
1095 showsTreeHang wide (withBar bars) l .
1096 showWide wide bars .
1097 showsTreeHang wide (withEmpty bars) r
1098 Tip kx bm
1099 -> showsBars bars . showString " " . shows kx . showString " + " .
1100 showsBitMap bm . showString "\n"
1101 Nil -> showsBars bars . showString "|\n"
1102
1103 showBin :: Prefix -> Mask -> String
1104 showBin _ _
1105 = "*" -- ++ show (p,m)
1106
1107 showWide :: Bool -> [String] -> String -> String
1108 showWide wide bars
1109 | wide = showString (concat (reverse bars)) . showString "|\n"
1110 | otherwise = id
1111
1112 showsBars :: [String] -> ShowS
1113 showsBars bars
1114 = case bars of
1115 [] -> id
1116 _ -> showString (concat (reverse (tail bars))) . showString node
1117
1118 showsBitMap :: Word -> ShowS
1119 showsBitMap = showString . showBitMap
1120
1121 showBitMap :: Word -> String
1122 showBitMap w = show $ foldrBits 0 (:) [] w
1123
1124 node :: String
1125 node = "+--"
1126
1127 withBar, withEmpty :: [String] -> [String]
1128 withBar bars = "| ":bars
1129 withEmpty bars = " ":bars
1130
1131
1132 {--------------------------------------------------------------------
1133 Helpers
1134 --------------------------------------------------------------------}
1135 {--------------------------------------------------------------------
1136 Join
1137 --------------------------------------------------------------------}
1138 join :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
1139 join p1 t1 p2 t2
1140 | zero p1 m = Bin p m t1 t2
1141 | otherwise = Bin p m t2 t1
1142 where
1143 m = branchMask p1 p2
1144 p = mask p1 m
1145 {-# INLINE join #-}
1146
1147 {--------------------------------------------------------------------
1148 @bin@ assures that we never have empty trees within a tree.
1149 --------------------------------------------------------------------}
1150 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
1151 bin _ _ l Nil = l
1152 bin _ _ Nil r = r
1153 bin p m l r = Bin p m l r
1154 {-# INLINE bin #-}
1155
1156 {--------------------------------------------------------------------
1157 @tip@ assures that we never have empty bitmaps within a tree.
1158 --------------------------------------------------------------------}
1159 tip :: Prefix -> BitMap -> IntSet
1160 tip _ 0 = Nil
1161 tip kx bm = Tip kx bm
1162 {-# INLINE tip #-}
1163
1164
1165 {----------------------------------------------------------------------
1166 Functions that generate Prefix and BitMap of a Key or a Suffix.
1167 ----------------------------------------------------------------------}
1168
1169 suffixBitMask :: Int
1170 suffixBitMask = bitSize (undefined::Word) - 1
1171 {-# INLINE suffixBitMask #-}
1172
1173 prefixBitMask :: Int
1174 prefixBitMask = complement suffixBitMask
1175 {-# INLINE prefixBitMask #-}
1176
1177 prefixOf :: Int -> Prefix
1178 prefixOf x = x .&. prefixBitMask
1179 {-# INLINE prefixOf #-}
1180
1181 suffixOf :: Int -> Int
1182 suffixOf x = x .&. suffixBitMask
1183 {-# INLINE suffixOf #-}
1184
1185 bitmapOfSuffix :: Int -> BitMap
1186 bitmapOfSuffix s = 1 `shiftLL` s
1187 {-# INLINE bitmapOfSuffix #-}
1188
1189 bitmapOf :: Int -> BitMap
1190 bitmapOf x = bitmapOfSuffix (suffixOf x)
1191 {-# INLINE bitmapOf #-}
1192
1193
1194 {--------------------------------------------------------------------
1195 Endian independent bit twiddling
1196 --------------------------------------------------------------------}
1197 zero :: Int -> Mask -> Bool
1198 zero i m
1199 = (natFromInt i) .&. (natFromInt m) == 0
1200 {-# INLINE zero #-}
1201
1202 nomatch,match :: Int -> Prefix -> Mask -> Bool
1203 nomatch i p m
1204 = (mask i m) /= p
1205 {-# INLINE nomatch #-}
1206
1207 match i p m
1208 = (mask i m) == p
1209 {-# INLINE match #-}
1210
1211 -- Suppose a is largest such that 2^a divides 2*m.
1212 -- Then mask i m is i with the low a bits zeroed out.
1213 mask :: Int -> Mask -> Prefix
1214 mask i m
1215 = maskW (natFromInt i) (natFromInt m)
1216 {-# INLINE mask #-}
1217
1218 {--------------------------------------------------------------------
1219 Big endian operations
1220 --------------------------------------------------------------------}
1221 maskW :: Nat -> Nat -> Prefix
1222 maskW i m
1223 = intFromNat (i .&. (complement (m-1) `xor` m))
1224 {-# INLINE maskW #-}
1225
1226 shorter :: Mask -> Mask -> Bool
1227 shorter m1 m2
1228 = (natFromInt m1) > (natFromInt m2)
1229 {-# INLINE shorter #-}
1230
1231 branchMask :: Prefix -> Prefix -> Mask
1232 branchMask p1 p2
1233 = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
1234 {-# INLINE branchMask #-}
1235
1236 {----------------------------------------------------------------------
1237 Finding the highest bit (mask) in a word [x] can be done efficiently in
1238 three ways:
1239 * convert to a floating point value and the mantissa tells us the
1240 [log2(x)] that corresponds with the highest bit position. The mantissa
1241 is retrieved either via the standard C function [frexp] or by some bit
1242 twiddling on IEEE compatible numbers (float). Note that one needs to
1243 use at least [double] precision for an accurate mantissa of 32 bit
1244 numbers.
1245 * use bit twiddling, a logarithmic sequence of bitwise or's and shifts (bit).
1246 * use processor specific assembler instruction (asm).
1247
1248 The most portable way would be [bit], but is it efficient enough?
1249 I have measured the cycle counts of the different methods on an AMD
1250 Athlon-XP 1800 (~ Pentium III 1.8Ghz) using the RDTSC instruction:
1251
1252 highestBitMask: method cycles
1253 --------------
1254 frexp 200
1255 float 33
1256 bit 11
1257 asm 12
1258
1259 highestBit: method cycles
1260 --------------
1261 frexp 195
1262 float 33
1263 bit 11
1264 asm 11
1265
1266 Wow, the bit twiddling is on today's RISC like machines even faster
1267 than a single CISC instruction (BSR)!
1268 ----------------------------------------------------------------------}
1269
1270 {----------------------------------------------------------------------
1271 [highestBitMask] returns a word where only the highest bit is set.
1272 It is found by first setting all bits in lower positions than the
1273 highest bit and than taking an exclusive or with the original value.
1274 Allthough the function may look expensive, GHC compiles this into
1275 excellent C code that subsequently compiled into highly efficient
1276 machine code. The algorithm is derived from Jorg Arndt's FXT library.
1277 ----------------------------------------------------------------------}
1278 highestBitMask :: Nat -> Nat
1279 highestBitMask x0
1280 = case (x0 .|. shiftRL x0 1) of
1281 x1 -> case (x1 .|. shiftRL x1 2) of
1282 x2 -> case (x2 .|. shiftRL x2 4) of
1283 x3 -> case (x3 .|. shiftRL x3 8) of
1284 x4 -> case (x4 .|. shiftRL x4 16) of
1285 #if !(defined(__GLASGOW_HASKELL__) && WORD_SIZE_IN_BITS==32)
1286 x5 -> case (x5 .|. shiftRL x5 32) of -- for 64 bit platforms
1287 #endif
1288 x6 -> (x6 `xor` (shiftRL x6 1))
1289 {-# INLINE highestBitMask #-}
1290
1291 {----------------------------------------------------------------------
1292 To get best performance, we provide fast implementations of
1293 lowestBitSet, highestBitSet and fold[lr][l]Bits for GHC.
1294 If the intel bsf and bsr instructions ever become GHC primops,
1295 this code should be reimplemented using these.
1296
1297 Performance of this code is crucial for folds, toList, filter, partition.
1298
1299 The signatures of methods in question are placed after this comment.
1300 ----------------------------------------------------------------------}
1301
1302 lowestBitSet :: Nat -> Int
1303 highestBitSet :: Nat -> Int
1304 foldlBits :: Int -> (a -> Int -> a) -> a -> Nat -> a
1305 foldl'Bits :: Int -> (a -> Int -> a) -> a -> Nat -> a
1306 foldrBits :: Int -> (Int -> a -> a) -> a -> Nat -> a
1307 foldr'Bits :: Int -> (Int -> a -> a) -> a -> Nat -> a
1308
1309 {-# INLINE lowestBitSet #-}
1310 {-# INLINE highestBitSet #-}
1311 {-# INLINE foldlBits #-}
1312 {-# INLINE foldl'Bits #-}
1313 {-# INLINE foldrBits #-}
1314 {-# INLINE foldr'Bits #-}
1315
1316 #if defined(__GLASGOW_HASKELL__) && (WORD_SIZE_IN_BITS==32 || WORD_SIZE_IN_BITS==64)
1317 {----------------------------------------------------------------------
1318 For lowestBitSet we use wordsize-dependant implementation based on
1319 multiplication and DeBrujn indeces, which was proposed by Edward Kmett
1320 <http://haskell.org/pipermail/libraries/2011-September/016749.html>
1321
1322 The core of this implementation is fast indexOfTheOnlyBit,
1323 which is given a Nat with exactly one bit set, and returns
1324 its index.
1325
1326 Lot of effort was put in these implementations, please benchmark carefully
1327 before changing this code.
1328 ----------------------------------------------------------------------}
1329
1330 indexOfTheOnlyBit :: Nat -> Int
1331 {-# INLINE indexOfTheOnlyBit #-}
1332 indexOfTheOnlyBit bitmask =
1333 I# (lsbArray `indexInt8OffAddr#` unboxInt (intFromNat ((bitmask * magic) `shiftRL` offset)))
1334 where unboxInt (I# i) = i
1335 #if WORD_SIZE_IN_BITS==32
1336 magic = 0x077CB531
1337 offset = 27
1338 !lsbArray = "\0\1\28\2\29\14\24\3\30\22\20\15\25\17\4\8\31\27\13\23\21\19\16\7\26\12\18\6\11\5\10\9"#
1339 #else
1340 magic = 0x07EDD5E59A4E28C2
1341 offset = 58
1342 !lsbArray = "\63\0\58\1\59\47\53\2\60\39\48\27\54\33\42\3\61\51\37\40\49\18\28\20\55\30\34\11\43\14\22\4\62\57\46\52\38\26\32\41\50\36\17\19\29\10\13\21\56\45\25\31\35\16\9\12\44\24\15\8\23\7\6\5"#
1343 #endif
1344 -- The lsbArray gets inlined to every call site of indexOfTheOnlyBit.
1345 -- That cannot be easily avoided, as GHC forbids top-level Addr# literal.
1346 -- One could go around that by supplying getLsbArray :: () -> Addr# marked
1347 -- as NOINLINE. But the code size of calling it and processing the result
1348 -- is 48B on 32-bit and 56B on 64-bit architectures -- so the 32B and 64B array
1349 -- is actually improvement on 32-bit and only a 8B size increase on 64-bit.
1350
1351 lowestBitMask :: Nat -> Nat
1352 lowestBitMask x = x .&. negate x
1353 {-# INLINE lowestBitMask #-}
1354
1355 -- Reverse the order of bits in the Nat.
1356 revNat :: Nat -> Nat
1357 #if WORD_SIZE_IN_BITS==32
1358 revNat x1 = case ((x1 `shiftRL` 1) .&. 0x55555555) .|. ((x1 .&. 0x55555555) `shiftLL` 1) of
1359 x2 -> case ((x2 `shiftRL` 2) .&. 0x33333333) .|. ((x2 .&. 0x33333333) `shiftLL` 2) of
1360 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F) `shiftLL` 4) of
1361 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF) .|. ((x4 .&. 0x00FF00FF) `shiftLL` 8) of
1362 x5 -> ( x5 `shiftRL` 16 ) .|. ( x5 `shiftLL` 16);
1363 #else
1364 revNat x1 = case ((x1 `shiftRL` 1) .&. 0x5555555555555555) .|. ((x1 .&. 0x5555555555555555) `shiftLL` 1) of
1365 x2 -> case ((x2 `shiftRL` 2) .&. 0x3333333333333333) .|. ((x2 .&. 0x3333333333333333) `shiftLL` 2) of
1366 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F0F0F0F0F) `shiftLL` 4) of
1367 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF00FF00FF) .|. ((x4 .&. 0x00FF00FF00FF00FF) `shiftLL` 8) of
1368 x5 -> case ((x5 `shiftRL` 16) .&. 0x0000FFFF0000FFFF) .|. ((x5 .&. 0x0000FFFF0000FFFF) `shiftLL` 16) of
1369 x6 -> ( x6 `shiftRL` 32 ) .|. ( x6 `shiftLL` 32);
1370 #endif
1371
1372 lowestBitSet x = indexOfTheOnlyBit (lowestBitMask x)
1373
1374 highestBitSet x = indexOfTheOnlyBit (highestBitMask x)
1375
1376 foldlBits prefix f z bitmap = go bitmap z
1377 where go bm acc | bm == 0 = acc
1378 | otherwise = case lowestBitMask bm of
1379 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1380 bi -> bi `seq` go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1381
1382 foldl'Bits prefix f z bitmap = go bitmap z
1383 where STRICT_2_OF_2(go)
1384 go bm acc | bm == 0 = acc
1385 | otherwise = case lowestBitMask bm of
1386 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1387 bi -> bi `seq` go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1388
1389 foldrBits prefix f z bitmap = go (revNat bitmap) z
1390 where go bm acc | bm == 0 = acc
1391 | otherwise = case lowestBitMask bm of
1392 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1393 bi -> bi `seq` go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1394
1395 foldr'Bits prefix f z bitmap = go (revNat bitmap) z
1396 where STRICT_2_OF_2(go)
1397 go bm acc | bm == 0 = acc
1398 | otherwise = case lowestBitMask bm of
1399 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1400 bi -> bi `seq` go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1401
1402 #else
1403 {----------------------------------------------------------------------
1404 In general case we use logarithmic implementation of
1405 lowestBitSet and highestBitSet, which works up to bit sizes of 64.
1406
1407 Folds are linear scans.
1408 ----------------------------------------------------------------------}
1409
1410 lowestBitSet n0 =
1411 let (n1,b1) = if n0 .&. 0xFFFFFFFF /= 0 then (n0,0) else (n0 `shiftRL` 32, 32)
1412 (n2,b2) = if n1 .&. 0xFFFF /= 0 then (n1,b1) else (n1 `shiftRL` 16, 16+b1)
1413 (n3,b3) = if n2 .&. 0xFF /= 0 then (n2,b2) else (n2 `shiftRL` 8, 8+b2)
1414 (n4,b4) = if n3 .&. 0xF /= 0 then (n3,b3) else (n3 `shiftRL` 4, 4+b3)
1415 (n5,b5) = if n4 .&. 0x3 /= 0 then (n4,b4) else (n4 `shiftRL` 2, 2+b4)
1416 b6 = if n5 .&. 0x1 /= 0 then b5 else 1+b5
1417 in b6
1418
1419 highestBitSet n0 =
1420 let (n1,b1) = if n0 .&. 0xFFFFFFFF00000000 /= 0 then (n0 `shiftRL` 32, 32) else (n0,0)
1421 (n2,b2) = if n1 .&. 0xFFFF0000 /= 0 then (n1 `shiftRL` 16, 16+b1) else (n1,b1)
1422 (n3,b3) = if n2 .&. 0xFF00 /= 0 then (n2 `shiftRL` 8, 8+b2) else (n2,b2)
1423 (n4,b4) = if n3 .&. 0xF0 /= 0 then (n3 `shiftRL` 4, 4+b3) else (n3,b3)
1424 (n5,b5) = if n4 .&. 0xC /= 0 then (n4 `shiftRL` 2, 2+b4) else (n4,b4)
1425 b6 = if n5 .&. 0x2 /= 0 then 1+b5 else b5
1426 in b6
1427
1428 foldlBits prefix f z bm = let lb = lowestBitSet bm
1429 in go (prefix+lb) z (bm `shiftRL` lb)
1430 where STRICT_1_OF_3(go)
1431 go _ acc 0 = acc
1432 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1)
1433 | otherwise = go (bi + 1) acc (n `shiftRL` 1)
1434
1435 foldl'Bits prefix f z bm = let lb = lowestBitSet bm
1436 in go (prefix+lb) z (bm `shiftRL` lb)
1437 where STRICT_1_OF_3(go)
1438 STRICT_2_OF_3(go)
1439 go _ acc 0 = acc
1440 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1)
1441 | otherwise = go (bi + 1) acc (n `shiftRL` 1)
1442
1443 foldrBits prefix f z bm = let lb = lowestBitSet bm
1444 in go (prefix+lb) (bm `shiftRL` lb)
1445 where STRICT_1_OF_2(go)
1446 go _ 0 = z
1447 go bi n | n `testBit` 0 = f bi (go (bi + 1) (n `shiftRL` 1))
1448 | otherwise = go (bi + 1) (n `shiftRL` 1)
1449
1450 foldr'Bits prefix f z bm = let lb = lowestBitSet bm
1451 in go (prefix+lb) (bm `shiftRL` lb)
1452 where STRICT_1_OF_2(go)
1453 go _ 0 = z
1454 go bi n | n `testBit` 0 = f bi $! go (bi + 1) (n `shiftRL` 1)
1455 | otherwise = go (bi + 1) (n `shiftRL` 1)
1456
1457 #endif
1458
1459 {----------------------------------------------------------------------
1460 [bitcount] as posted by David F. Place to haskell-cafe on April 11, 2006,
1461 based on the code on
1462 http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetKernighan,
1463 where the following source is given:
1464 Published in 1988, the C Programming Language 2nd Ed. (by Brian W.
1465 Kernighan and Dennis M. Ritchie) mentions this in exercise 2-9. On April
1466 19, 2006 Don Knuth pointed out to me that this method "was first published
1467 by Peter Wegner in CACM 3 (1960), 322. (Also discovered independently by
1468 Derrick Lehmer and published in 1964 in a book edited by Beckenbach.)"
1469 ----------------------------------------------------------------------}
1470 bitcount :: Int -> Word -> Int
1471 bitcount a0 x0 = go a0 x0
1472 where go a 0 = a
1473 go a x = go (a + 1) (x .&. (x-1))
1474 {-# INLINE bitcount #-}
1475
1476
1477 {--------------------------------------------------------------------
1478 Utilities
1479 --------------------------------------------------------------------}
1480 foldlStrict :: (a -> b -> a) -> a -> [b] -> a
1481 foldlStrict f = go
1482 where
1483 go z [] = z
1484 go z (x:xs) = let z' = f z x in z' `seq` go z' xs
1485 {-# INLINE foldlStrict #-}