Switch to in-order splitRoot. Provide the same for IntSet/IntMap.
[packages/containers.git] / Data / IntSet / Base.hs
1 {-# LANGUAGE CPP #-}
2 #if __GLASGOW_HASKELL__
3 {-# LANGUAGE MagicHash, BangPatterns, DeriveDataTypeable, StandaloneDeriving #-}
4 #endif
5 #if !defined(TESTING) && __GLASGOW_HASKELL__ >= 703
6 {-# LANGUAGE Trustworthy #-}
7 #endif
8 -----------------------------------------------------------------------------
9 -- |
10 -- Module : Data.IntSet.Base
11 -- Copyright : (c) Daan Leijen 2002
12 -- (c) Joachim Breitner 2011
13 -- License : BSD-style
14 -- Maintainer : libraries@haskell.org
15 -- Stability : provisional
16 -- Portability : portable
17 --
18 -- An efficient implementation of integer sets.
19 --
20 -- These modules are intended to be imported qualified, to avoid name
21 -- clashes with Prelude functions, e.g.
22 --
23 -- > import Data.IntSet (IntSet)
24 -- > import qualified Data.IntSet as IntSet
25 --
26 -- The implementation is based on /big-endian patricia trees/. This data
27 -- structure performs especially well on binary operations like 'union'
28 -- and 'intersection'. However, my benchmarks show that it is also
29 -- (much) faster on insertions and deletions when compared to a generic
30 -- size-balanced set implementation (see "Data.Set").
31 --
32 -- * Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\",
33 -- Workshop on ML, September 1998, pages 77-86,
34 -- <http://citeseer.ist.psu.edu/okasaki98fast.html>
35 --
36 -- * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
37 -- Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
38 -- October 1968, pages 514-534.
39 --
40 -- Additionally, this implementation places bitmaps in the leaves of the tree.
41 -- Their size is the natural size of a machine word (32 or 64 bits) and greatly
42 -- reduce memory footprint and execution times for dense sets, e.g. sets where
43 -- it is likely that many values lie close to each other. The asymptotics are
44 -- not affected by this optimization.
45 --
46 -- Many operations have a worst-case complexity of /O(min(n,W))/.
47 -- This means that the operation can become linear in the number of
48 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
49 -- (32 or 64).
50 -----------------------------------------------------------------------------
51
52 -- [Note: INLINE bit fiddling]
53 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
54 -- It is essential that the bit fiddling functions like mask, zero, branchMask
55 -- etc are inlined. If they do not, the memory allocation skyrockets. The GHC
56 -- usually gets it right, but it is disastrous if it does not. Therefore we
57 -- explicitly mark these functions INLINE.
58
59
60 -- [Note: Local 'go' functions and capturing]
61 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
62 -- Care must be taken when using 'go' function which captures an argument.
63 -- Sometimes (for example when the argument is passed to a data constructor,
64 -- as in insert), GHC heap-allocates more than necessary. Therefore C-- code
65 -- must be checked for increased allocation when creating and modifying such
66 -- functions.
67
68
69 -- [Note: Order of constructors]
70 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
71 -- The order of constructors of IntSet matters when considering performance.
72 -- Currently in GHC 7.0, when type has 3 constructors, they are matched from
73 -- the first to the last -- the best performance is achieved when the
74 -- constructors are ordered by frequency.
75 -- On GHC 7.0, reordering constructors from Nil | Tip | Bin to Bin | Tip | Nil
76 -- improves the benchmark by circa 10%.
77
78 module Data.IntSet.Base (
79 -- * Set type
80 IntSet(..), Key -- instance Eq,Show
81
82 -- * Operators
83 , (\\)
84
85 -- * Query
86 , null
87 , size
88 , member
89 , notMember
90 , lookupLT
91 , lookupGT
92 , lookupLE
93 , lookupGE
94 , isSubsetOf
95 , isProperSubsetOf
96
97 -- * Construction
98 , empty
99 , singleton
100 , insert
101 , delete
102
103 -- * Combine
104 , union
105 , unions
106 , difference
107 , intersection
108
109 -- * Filter
110 , filter
111 , partition
112 , split
113 , splitMember
114 , splitRoot
115
116 -- * Map
117 , map
118
119 -- * Folds
120 , foldr
121 , foldl
122 -- ** Strict folds
123 , foldr'
124 , foldl'
125 -- ** Legacy folds
126 , fold
127
128 -- * Min\/Max
129 , findMin
130 , findMax
131 , deleteMin
132 , deleteMax
133 , deleteFindMin
134 , deleteFindMax
135 , maxView
136 , minView
137
138 -- * Conversion
139
140 -- ** List
141 , elems
142 , toList
143 , fromList
144
145 -- ** Ordered list
146 , toAscList
147 , toDescList
148 , fromAscList
149 , fromDistinctAscList
150
151 -- * Debugging
152 , showTree
153 , showTreeWith
154
155 -- * Internals
156 , match
157 , suffixBitMask
158 , prefixBitMask
159 , bitmapOf
160 ) where
161
162 -- We want to be able to compile without cabal. Nevertheless
163 -- #if defined(MIN_VERSION_base) && MIN_VERSION_base(4,5,0)
164 -- does not work, because if MIN_VERSION_base is undefined,
165 -- the last condition is syntactically wrong.
166 #define MIN_VERSION_base_4_5_0 0
167 #ifdef MIN_VERSION_base
168 #if MIN_VERSION_base(4,5,0)
169 #undef MIN_VERSION_base_4_5_0
170 #define MIN_VERSION_base_4_5_0 1
171 #endif
172 #endif
173
174 #define MIN_VERSION_base_4_7_0 0
175 #ifdef MIN_VERSION_base
176 #if MIN_VERSION_base(4,7,0)
177 #undef MIN_VERSION_base_4_7_0
178 #define MIN_VERSION_base_4_7_0 1
179 #endif
180 #endif
181
182 import Control.DeepSeq (NFData)
183 import Data.Bits
184 import qualified Data.List as List
185 import Data.Maybe (fromMaybe)
186 import Data.Monoid (Monoid(..))
187 import Data.Typeable
188 import Data.Word (Word)
189 import Prelude hiding (filter, foldr, foldl, null, map)
190
191 import Data.BitUtil
192 import Data.StrictPair
193
194 #if __GLASGOW_HASKELL__
195 import Data.Data (Data(..), Constr, mkConstr, constrIndex, Fixity(Prefix), DataType, mkDataType)
196 import Text.Read
197 #endif
198
199 #if __GLASGOW_HASKELL__
200 import GHC.Exts (Int(..), build)
201 import GHC.Prim (indexInt8OffAddr#)
202 #endif
203
204 -- On GHC, include MachDeps.h to get WORD_SIZE_IN_BITS macro.
205 #if defined(__GLASGOW_HASKELL__)
206 # include "MachDeps.h"
207 #endif
208
209 -- Use macros to define strictness of functions.
210 -- STRICT_x_OF_y denotes an y-ary function strict in the x-th parameter.
211 -- We do not use BangPatterns, because they are not in any standard and we
212 -- want the compilers to be compiled by as many compilers as possible.
213 #define STRICT_1_OF_2(fn) fn arg _ | arg `seq` False = undefined
214 #define STRICT_2_OF_2(fn) fn _ arg | arg `seq` False = undefined
215 #define STRICT_1_OF_3(fn) fn arg _ _ | arg `seq` False = undefined
216 #define STRICT_2_OF_3(fn) fn _ arg _ | arg `seq` False = undefined
217
218 infixl 9 \\{-This comment teaches CPP correct behaviour -}
219
220 -- A "Nat" is a natural machine word (an unsigned Int)
221 type Nat = Word
222
223 natFromInt :: Int -> Nat
224 natFromInt i = fromIntegral i
225 {-# INLINE natFromInt #-}
226
227 intFromNat :: Nat -> Int
228 intFromNat w = fromIntegral w
229 {-# INLINE intFromNat #-}
230
231 {--------------------------------------------------------------------
232 Operators
233 --------------------------------------------------------------------}
234 -- | /O(n+m)/. See 'difference'.
235 (\\) :: IntSet -> IntSet -> IntSet
236 m1 \\ m2 = difference m1 m2
237
238 {--------------------------------------------------------------------
239 Types
240 --------------------------------------------------------------------}
241
242 -- | A set of integers.
243
244 -- See Note: Order of constructors
245 data IntSet = Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
246 -- Invariant: Nil is never found as a child of Bin.
247 -- Invariant: The Mask is a power of 2. It is the largest bit position at which
248 -- two elements of the set differ.
249 -- Invariant: Prefix is the common high-order bits that all elements share to
250 -- the left of the Mask bit.
251 -- Invariant: In Bin prefix mask left right, left consists of the elements that
252 -- don't have the mask bit set; right is all the elements that do.
253 | Tip {-# UNPACK #-} !Prefix {-# UNPACK #-} !BitMap
254 -- Invariant: The Prefix is zero for all but the last 5 (on 32 bit arches) or 6
255 -- bits (on 64 bit arches). The values of the map represented by a tip
256 -- are the prefix plus the indices of the set bits in the bit map.
257 | Nil
258
259 -- A number stored in a set is stored as
260 -- * Prefix (all but last 5-6 bits) and
261 -- * BitMap (last 5-6 bits stored as a bitmask)
262 -- Last 5-6 bits are called a Suffix.
263
264 type Prefix = Int
265 type Mask = Int
266 type BitMap = Word
267 type Key = Int
268
269 instance Monoid IntSet where
270 mempty = empty
271 mappend = union
272 mconcat = unions
273
274 #if __GLASGOW_HASKELL__
275
276 {--------------------------------------------------------------------
277 A Data instance
278 --------------------------------------------------------------------}
279
280 -- This instance preserves data abstraction at the cost of inefficiency.
281 -- We provide limited reflection services for the sake of data abstraction.
282
283 instance Data IntSet where
284 gfoldl f z is = z fromList `f` (toList is)
285 toConstr _ = fromListConstr
286 gunfold k z c = case constrIndex c of
287 1 -> k (z fromList)
288 _ -> error "gunfold"
289 dataTypeOf _ = intSetDataType
290
291 fromListConstr :: Constr
292 fromListConstr = mkConstr intSetDataType "fromList" [] Prefix
293
294 intSetDataType :: DataType
295 intSetDataType = mkDataType "Data.IntSet.Base.IntSet" [fromListConstr]
296
297 #endif
298
299 {--------------------------------------------------------------------
300 Query
301 --------------------------------------------------------------------}
302 -- | /O(1)/. Is the set empty?
303 null :: IntSet -> Bool
304 null Nil = True
305 null _ = False
306 {-# INLINE null #-}
307
308 -- | /O(n)/. Cardinality of the set.
309 size :: IntSet -> Int
310 size t
311 = case t of
312 Bin _ _ l r -> size l + size r
313 Tip _ bm -> bitcount 0 bm
314 Nil -> 0
315
316 -- | /O(min(n,W))/. Is the value a member of the set?
317
318 -- See Note: Local 'go' functions and capturing]
319 member :: Key -> IntSet -> Bool
320 member x = x `seq` go
321 where
322 go (Bin p m l r)
323 | nomatch x p m = False
324 | zero x m = go l
325 | otherwise = go r
326 go (Tip y bm) = prefixOf x == y && bitmapOf x .&. bm /= 0
327 go Nil = False
328
329 -- | /O(min(n,W))/. Is the element not in the set?
330 notMember :: Key -> IntSet -> Bool
331 notMember k = not . member k
332
333 -- | /O(log n)/. Find largest element smaller than the given one.
334 --
335 -- > lookupLT 3 (fromList [3, 5]) == Nothing
336 -- > lookupLT 5 (fromList [3, 5]) == Just 3
337
338 -- See Note: Local 'go' functions and capturing.
339 lookupLT :: Key -> IntSet -> Maybe Key
340 lookupLT x t = x `seq` case t of
341 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r
342 _ -> go Nil t
343 where
344 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r
345 | zero x m = go def l
346 | otherwise = go l r
347 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm
348 | prefixOf x == kx && maskLT /= 0 = Just $ kx + highestBitSet maskLT
349 | otherwise = unsafeFindMax def
350 where maskLT = (bitmapOf x - 1) .&. bm
351 go def Nil = unsafeFindMax def
352
353
354 -- | /O(log n)/. Find smallest element greater than the given one.
355 --
356 -- > lookupGT 4 (fromList [3, 5]) == Just 5
357 -- > lookupGT 5 (fromList [3, 5]) == Nothing
358
359 -- See Note: Local 'go' functions and capturing.
360 lookupGT :: Key -> IntSet -> Maybe Key
361 lookupGT x t = x `seq` case t of
362 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r
363 _ -> go Nil t
364 where
365 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def
366 | zero x m = go r l
367 | otherwise = go def r
368 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm
369 | prefixOf x == kx && maskGT /= 0 = Just $ kx + lowestBitSet maskGT
370 | otherwise = unsafeFindMin def
371 where maskGT = (- ((bitmapOf x) `shiftLL` 1)) .&. bm
372 go def Nil = unsafeFindMin def
373
374
375 -- | /O(log n)/. Find largest element smaller or equal to the given one.
376 --
377 -- > lookupLE 2 (fromList [3, 5]) == Nothing
378 -- > lookupLE 4 (fromList [3, 5]) == Just 3
379 -- > lookupLE 5 (fromList [3, 5]) == Just 5
380
381 -- See Note: Local 'go' functions and capturing.
382 lookupLE :: Key -> IntSet -> Maybe Key
383 lookupLE x t = x `seq` case t of
384 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r
385 _ -> go Nil t
386 where
387 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r
388 | zero x m = go def l
389 | otherwise = go l r
390 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm
391 | prefixOf x == kx && maskLE /= 0 = Just $ kx + highestBitSet maskLE
392 | otherwise = unsafeFindMax def
393 where maskLE = (((bitmapOf x) `shiftLL` 1) - 1) .&. bm
394 go def Nil = unsafeFindMax def
395
396
397 -- | /O(log n)/. Find smallest element greater or equal to the given one.
398 --
399 -- > lookupGE 3 (fromList [3, 5]) == Just 3
400 -- > lookupGE 4 (fromList [3, 5]) == Just 5
401 -- > lookupGE 6 (fromList [3, 5]) == Nothing
402
403 -- See Note: Local 'go' functions and capturing.
404 lookupGE :: Key -> IntSet -> Maybe Key
405 lookupGE x t = x `seq` case t of
406 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r
407 _ -> go Nil t
408 where
409 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def
410 | zero x m = go r l
411 | otherwise = go def r
412 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm
413 | prefixOf x == kx && maskGE /= 0 = Just $ kx + lowestBitSet maskGE
414 | otherwise = unsafeFindMin def
415 where maskGE = (- (bitmapOf x)) .&. bm
416 go def Nil = unsafeFindMin def
417
418
419
420 -- Helper function for lookupGE and lookupGT. It assumes that if a Bin node is
421 -- given, it has m > 0.
422 unsafeFindMin :: IntSet -> Maybe Key
423 unsafeFindMin Nil = Nothing
424 unsafeFindMin (Tip kx bm) = Just $ kx + lowestBitSet bm
425 unsafeFindMin (Bin _ _ l _) = unsafeFindMin l
426
427 -- Helper function for lookupLE and lookupLT. It assumes that if a Bin node is
428 -- given, it has m > 0.
429 unsafeFindMax :: IntSet -> Maybe Key
430 unsafeFindMax Nil = Nothing
431 unsafeFindMax (Tip kx bm) = Just $ kx + highestBitSet bm
432 unsafeFindMax (Bin _ _ _ r) = unsafeFindMax r
433
434 {--------------------------------------------------------------------
435 Construction
436 --------------------------------------------------------------------}
437 -- | /O(1)/. The empty set.
438 empty :: IntSet
439 empty
440 = Nil
441 {-# INLINE empty #-}
442
443 -- | /O(1)/. A set of one element.
444 singleton :: Key -> IntSet
445 singleton x
446 = Tip (prefixOf x) (bitmapOf x)
447 {-# INLINE singleton #-}
448
449 {--------------------------------------------------------------------
450 Insert
451 --------------------------------------------------------------------}
452 -- | /O(min(n,W))/. Add a value to the set. There is no left- or right bias for
453 -- IntSets.
454 insert :: Key -> IntSet -> IntSet
455 insert x = x `seq` insertBM (prefixOf x) (bitmapOf x)
456
457 -- Helper function for insert and union.
458 insertBM :: Prefix -> BitMap -> IntSet -> IntSet
459 insertBM kx bm t = kx `seq` bm `seq`
460 case t of
461 Bin p m l r
462 | nomatch kx p m -> link kx (Tip kx bm) p t
463 | zero kx m -> Bin p m (insertBM kx bm l) r
464 | otherwise -> Bin p m l (insertBM kx bm r)
465 Tip kx' bm'
466 | kx' == kx -> Tip kx' (bm .|. bm')
467 | otherwise -> link kx (Tip kx bm) kx' t
468 Nil -> Tip kx bm
469
470 -- | /O(min(n,W))/. Delete a value in the set. Returns the
471 -- original set when the value was not present.
472 delete :: Key -> IntSet -> IntSet
473 delete x = x `seq` deleteBM (prefixOf x) (bitmapOf x)
474
475 -- Deletes all values mentioned in the BitMap from the set.
476 -- Helper function for delete and difference.
477 deleteBM :: Prefix -> BitMap -> IntSet -> IntSet
478 deleteBM kx bm t = kx `seq` bm `seq`
479 case t of
480 Bin p m l r
481 | nomatch kx p m -> t
482 | zero kx m -> bin p m (deleteBM kx bm l) r
483 | otherwise -> bin p m l (deleteBM kx bm r)
484 Tip kx' bm'
485 | kx' == kx -> tip kx (bm' .&. complement bm)
486 | otherwise -> t
487 Nil -> Nil
488
489
490 {--------------------------------------------------------------------
491 Union
492 --------------------------------------------------------------------}
493 -- | The union of a list of sets.
494 unions :: [IntSet] -> IntSet
495 unions xs
496 = foldlStrict union empty xs
497
498
499 -- | /O(n+m)/. The union of two sets.
500 union :: IntSet -> IntSet -> IntSet
501 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
502 | shorter m1 m2 = union1
503 | shorter m2 m1 = union2
504 | p1 == p2 = Bin p1 m1 (union l1 l2) (union r1 r2)
505 | otherwise = link p1 t1 p2 t2
506 where
507 union1 | nomatch p2 p1 m1 = link p1 t1 p2 t2
508 | zero p2 m1 = Bin p1 m1 (union l1 t2) r1
509 | otherwise = Bin p1 m1 l1 (union r1 t2)
510
511 union2 | nomatch p1 p2 m2 = link p1 t1 p2 t2
512 | zero p1 m2 = Bin p2 m2 (union t1 l2) r2
513 | otherwise = Bin p2 m2 l2 (union t1 r2)
514
515 union t@(Bin _ _ _ _) (Tip kx bm) = insertBM kx bm t
516 union t@(Bin _ _ _ _) Nil = t
517 union (Tip kx bm) t = insertBM kx bm t
518 union Nil t = t
519
520
521 {--------------------------------------------------------------------
522 Difference
523 --------------------------------------------------------------------}
524 -- | /O(n+m)/. Difference between two sets.
525 difference :: IntSet -> IntSet -> IntSet
526 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
527 | shorter m1 m2 = difference1
528 | shorter m2 m1 = difference2
529 | p1 == p2 = bin p1 m1 (difference l1 l2) (difference r1 r2)
530 | otherwise = t1
531 where
532 difference1 | nomatch p2 p1 m1 = t1
533 | zero p2 m1 = bin p1 m1 (difference l1 t2) r1
534 | otherwise = bin p1 m1 l1 (difference r1 t2)
535
536 difference2 | nomatch p1 p2 m2 = t1
537 | zero p1 m2 = difference t1 l2
538 | otherwise = difference t1 r2
539
540 difference t@(Bin _ _ _ _) (Tip kx bm) = deleteBM kx bm t
541 difference t@(Bin _ _ _ _) Nil = t
542
543 difference t1@(Tip kx bm) t2 = differenceTip t2
544 where differenceTip (Bin p2 m2 l2 r2) | nomatch kx p2 m2 = t1
545 | zero kx m2 = differenceTip l2
546 | otherwise = differenceTip r2
547 differenceTip (Tip kx2 bm2) | kx == kx2 = tip kx (bm .&. complement bm2)
548 | otherwise = t1
549 differenceTip Nil = t1
550
551 difference Nil _ = Nil
552
553
554
555 {--------------------------------------------------------------------
556 Intersection
557 --------------------------------------------------------------------}
558 -- | /O(n+m)/. The intersection of two sets.
559 intersection :: IntSet -> IntSet -> IntSet
560 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
561 | shorter m1 m2 = intersection1
562 | shorter m2 m1 = intersection2
563 | p1 == p2 = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
564 | otherwise = Nil
565 where
566 intersection1 | nomatch p2 p1 m1 = Nil
567 | zero p2 m1 = intersection l1 t2
568 | otherwise = intersection r1 t2
569
570 intersection2 | nomatch p1 p2 m2 = Nil
571 | zero p1 m2 = intersection t1 l2
572 | otherwise = intersection t1 r2
573
574 intersection t1@(Bin _ _ _ _) (Tip kx2 bm2) = intersectBM t1
575 where intersectBM (Bin p1 m1 l1 r1) | nomatch kx2 p1 m1 = Nil
576 | zero kx2 m1 = intersectBM l1
577 | otherwise = intersectBM r1
578 intersectBM (Tip kx1 bm1) | kx1 == kx2 = tip kx1 (bm1 .&. bm2)
579 | otherwise = Nil
580 intersectBM Nil = Nil
581
582 intersection (Bin _ _ _ _) Nil = Nil
583
584 intersection (Tip kx1 bm1) t2 = intersectBM t2
585 where intersectBM (Bin p2 m2 l2 r2) | nomatch kx1 p2 m2 = Nil
586 | zero kx1 m2 = intersectBM l2
587 | otherwise = intersectBM r2
588 intersectBM (Tip kx2 bm2) | kx1 == kx2 = tip kx1 (bm1 .&. bm2)
589 | otherwise = Nil
590 intersectBM Nil = Nil
591
592 intersection Nil _ = Nil
593
594 {--------------------------------------------------------------------
595 Subset
596 --------------------------------------------------------------------}
597 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
598 isProperSubsetOf :: IntSet -> IntSet -> Bool
599 isProperSubsetOf t1 t2
600 = case subsetCmp t1 t2 of
601 LT -> True
602 _ -> False
603
604 subsetCmp :: IntSet -> IntSet -> Ordering
605 subsetCmp t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
606 | shorter m1 m2 = GT
607 | shorter m2 m1 = case subsetCmpLt of
608 GT -> GT
609 _ -> LT
610 | p1 == p2 = subsetCmpEq
611 | otherwise = GT -- disjoint
612 where
613 subsetCmpLt | nomatch p1 p2 m2 = GT
614 | zero p1 m2 = subsetCmp t1 l2
615 | otherwise = subsetCmp t1 r2
616 subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
617 (GT,_ ) -> GT
618 (_ ,GT) -> GT
619 (EQ,EQ) -> EQ
620 _ -> LT
621
622 subsetCmp (Bin _ _ _ _) _ = GT
623 subsetCmp (Tip kx1 bm1) (Tip kx2 bm2)
624 | kx1 /= kx2 = GT -- disjoint
625 | bm1 == bm2 = EQ
626 | bm1 .&. complement bm2 == 0 = LT
627 | otherwise = GT
628 subsetCmp t1@(Tip kx _) (Bin p m l r)
629 | nomatch kx p m = GT
630 | zero kx m = case subsetCmp t1 l of GT -> GT ; _ -> LT
631 | otherwise = case subsetCmp t1 r of GT -> GT ; _ -> LT
632 subsetCmp (Tip _ _) Nil = GT -- disjoint
633 subsetCmp Nil Nil = EQ
634 subsetCmp Nil _ = LT
635
636 -- | /O(n+m)/. Is this a subset?
637 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
638
639 isSubsetOf :: IntSet -> IntSet -> Bool
640 isSubsetOf t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
641 | shorter m1 m2 = False
642 | shorter m2 m1 = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
643 else isSubsetOf t1 r2)
644 | otherwise = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
645 isSubsetOf (Bin _ _ _ _) _ = False
646 isSubsetOf (Tip kx1 bm1) (Tip kx2 bm2) = kx1 == kx2 && bm1 .&. complement bm2 == 0
647 isSubsetOf t1@(Tip kx _) (Bin p m l r)
648 | nomatch kx p m = False
649 | zero kx m = isSubsetOf t1 l
650 | otherwise = isSubsetOf t1 r
651 isSubsetOf (Tip _ _) Nil = False
652 isSubsetOf Nil _ = True
653
654
655 {--------------------------------------------------------------------
656 Filter
657 --------------------------------------------------------------------}
658 -- | /O(n)/. Filter all elements that satisfy some predicate.
659 filter :: (Key -> Bool) -> IntSet -> IntSet
660 filter predicate t
661 = case t of
662 Bin p m l r
663 -> bin p m (filter predicate l) (filter predicate r)
664 Tip kx bm
665 -> tip kx (foldl'Bits 0 (bitPred kx) 0 bm)
666 Nil -> Nil
667 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi
668 | otherwise = bm
669 {-# INLINE bitPred #-}
670
671 -- | /O(n)/. partition the set according to some predicate.
672 partition :: (Key -> Bool) -> IntSet -> (IntSet,IntSet)
673 partition predicate0 t0 = toPair $ go predicate0 t0
674 where
675 go predicate t
676 = case t of
677 Bin p m l r
678 -> let (l1 :*: l2) = go predicate l
679 (r1 :*: r2) = go predicate r
680 in bin p m l1 r1 :*: bin p m l2 r2
681 Tip kx bm
682 -> let bm1 = foldl'Bits 0 (bitPred kx) 0 bm
683 in tip kx bm1 :*: tip kx (bm `xor` bm1)
684 Nil -> (Nil :*: Nil)
685 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi
686 | otherwise = bm
687 {-# INLINE bitPred #-}
688
689
690 -- | /O(min(n,W))/. The expression (@'split' x set@) is a pair @(set1,set2)@
691 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
692 -- comprises the elements of @set@ greater than @x@.
693 --
694 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [4,5])
695 split :: Key -> IntSet -> (IntSet,IntSet)
696 split x t =
697 case t of
698 Bin _ m l r
699 | m < 0 -> if x >= 0 -- handle negative numbers.
700 then case go x l of (lt :*: gt) -> let lt' = union lt r
701 in lt' `seq` (lt', gt)
702 else case go x r of (lt :*: gt) -> let gt' = union gt l
703 in gt' `seq` (lt, gt')
704 _ -> case go x t of
705 (lt :*: gt) -> (lt, gt)
706 where
707 go !x' t'@(Bin p m l r)
708 | match x' p m = if zero x' m
709 then case go x' l of
710 (lt :*: gt) -> lt :*: union gt r
711 else case go x' r of
712 (lt :*: gt) -> union lt l :*: gt
713 | otherwise = if x' < p then (Nil :*: t')
714 else (t' :*: Nil)
715 go x' t'@(Tip kx' bm)
716 | kx' > x' = (Nil :*: t')
717 -- equivalent to kx' > prefixOf x'
718 | kx' < prefixOf x' = (t' :*: Nil)
719 | otherwise = tip kx' (bm .&. lowerBitmap) :*: tip kx' (bm .&. higherBitmap)
720 where lowerBitmap = bitmapOf x' - 1
721 higherBitmap = complement (lowerBitmap + bitmapOf x')
722 go _ Nil = (Nil :*: Nil)
723
724 -- | /O(min(n,W))/. Performs a 'split' but also returns whether the pivot
725 -- element was found in the original set.
726 splitMember :: Key -> IntSet -> (IntSet,Bool,IntSet)
727 splitMember x t =
728 case t of
729 Bin _ m l r | m < 0 -> if x >= 0
730 then case go x l of
731 (lt, fnd, gt) -> let lt' = union lt r
732 in lt' `seq` (lt', fnd, gt)
733 else case go x r of
734 (lt, fnd, gt) -> let gt' = union gt l
735 in gt' `seq` (lt, fnd, gt')
736 _ -> go x t
737 where
738 go x' t'@(Bin p m l r)
739 | match x' p m = if zero x' m
740 then case go x' l of
741 (lt, fnd, gt) -> (lt, fnd, union gt r)
742 else case go x' r of
743 (lt, fnd, gt) -> (union lt l, fnd, gt)
744 | otherwise = if x' < p then (Nil, False, t') else (t', False, Nil)
745 go x' t'@(Tip kx' bm)
746 | kx' > x' = (Nil, False, t')
747 -- equivalent to kx' > prefixOf x'
748 | kx' < prefixOf x' = (t', False, Nil)
749 | otherwise = let lt = tip kx' (bm .&. lowerBitmap)
750 found = (bm .&. bitmapOfx') /= 0
751 gt = tip kx' (bm .&. higherBitmap)
752 in lt `seq` found `seq` gt `seq` (lt, found, gt)
753 where bitmapOfx' = bitmapOf x'
754 lowerBitmap = bitmapOfx' - 1
755 higherBitmap = complement (lowerBitmap + bitmapOfx')
756 go _ Nil = (Nil, False, Nil)
757
758
759 {----------------------------------------------------------------------
760 Min/Max
761 ----------------------------------------------------------------------}
762
763 -- | /O(min(n,W))/. Retrieves the maximal key of the set, and the set
764 -- stripped of that element, or 'Nothing' if passed an empty set.
765 maxView :: IntSet -> Maybe (Key, IntSet)
766 maxView t =
767 case t of Nil -> Nothing
768 Bin p m l r | m < 0 -> case go l of (result, l') -> Just (result, bin p m l' r)
769 _ -> Just (go t)
770 where
771 go (Bin p m l r) = case go r of (result, r') -> (result, bin p m l r')
772 go (Tip kx bm) = case highestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi)))
773 go Nil = error "maxView Nil"
774
775 -- | /O(min(n,W))/. Retrieves the minimal key of the set, and the set
776 -- stripped of that element, or 'Nothing' if passed an empty set.
777 minView :: IntSet -> Maybe (Key, IntSet)
778 minView t =
779 case t of Nil -> Nothing
780 Bin p m l r | m < 0 -> case go r of (result, r') -> Just (result, bin p m l r')
781 _ -> Just (go t)
782 where
783 go (Bin p m l r) = case go l of (result, l') -> (result, bin p m l' r)
784 go (Tip kx bm) = case lowestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi)))
785 go Nil = error "minView Nil"
786
787 -- | /O(min(n,W))/. Delete and find the minimal element.
788 --
789 -- > deleteFindMin set = (findMin set, deleteMin set)
790 deleteFindMin :: IntSet -> (Key, IntSet)
791 deleteFindMin = fromMaybe (error "deleteFindMin: empty set has no minimal element") . minView
792
793 -- | /O(min(n,W))/. Delete and find the maximal element.
794 --
795 -- > deleteFindMax set = (findMax set, deleteMax set)
796 deleteFindMax :: IntSet -> (Key, IntSet)
797 deleteFindMax = fromMaybe (error "deleteFindMax: empty set has no maximal element") . maxView
798
799
800 -- | /O(min(n,W))/. The minimal element of the set.
801 findMin :: IntSet -> Key
802 findMin Nil = error "findMin: empty set has no minimal element"
803 findMin (Tip kx bm) = kx + lowestBitSet bm
804 findMin (Bin _ m l r)
805 | m < 0 = find r
806 | otherwise = find l
807 where find (Tip kx bm) = kx + lowestBitSet bm
808 find (Bin _ _ l' _) = find l'
809 find Nil = error "findMin Nil"
810
811 -- | /O(min(n,W))/. The maximal element of a set.
812 findMax :: IntSet -> Key
813 findMax Nil = error "findMax: empty set has no maximal element"
814 findMax (Tip kx bm) = kx + highestBitSet bm
815 findMax (Bin _ m l r)
816 | m < 0 = find l
817 | otherwise = find r
818 where find (Tip kx bm) = kx + highestBitSet bm
819 find (Bin _ _ _ r') = find r'
820 find Nil = error "findMax Nil"
821
822
823 -- | /O(min(n,W))/. Delete the minimal element. Returns an empty set if the set is empty.
824 --
825 -- Note that this is a change of behaviour for consistency with 'Data.Set.Set' &#8211;
826 -- versions prior to 0.5 threw an error if the 'IntSet' was already empty.
827 deleteMin :: IntSet -> IntSet
828 deleteMin = maybe Nil snd . minView
829
830 -- | /O(min(n,W))/. Delete the maximal element. Returns an empty set if the set is empty.
831 --
832 -- Note that this is a change of behaviour for consistency with 'Data.Set.Set' &#8211;
833 -- versions prior to 0.5 threw an error if the 'IntSet' was already empty.
834 deleteMax :: IntSet -> IntSet
835 deleteMax = maybe Nil snd . maxView
836
837 {----------------------------------------------------------------------
838 Map
839 ----------------------------------------------------------------------}
840
841 -- | /O(n*min(n,W))/.
842 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
843 --
844 -- It's worth noting that the size of the result may be smaller if,
845 -- for some @(x,y)@, @x \/= y && f x == f y@
846
847 map :: (Key -> Key) -> IntSet -> IntSet
848 map f = fromList . List.map f . toList
849
850 {--------------------------------------------------------------------
851 Fold
852 --------------------------------------------------------------------}
853 -- | /O(n)/. Fold the elements in the set using the given right-associative
854 -- binary operator. This function is an equivalent of 'foldr' and is present
855 -- for compatibility only.
856 --
857 -- /Please note that fold will be deprecated in the future and removed./
858 fold :: (Key -> b -> b) -> b -> IntSet -> b
859 fold = foldr
860 {-# INLINE fold #-}
861
862 -- | /O(n)/. Fold the elements in the set using the given right-associative
863 -- binary operator, such that @'foldr' f z == 'Prelude.foldr' f z . 'toAscList'@.
864 --
865 -- For example,
866 --
867 -- > toAscList set = foldr (:) [] set
868 foldr :: (Key -> b -> b) -> b -> IntSet -> b
869 foldr f z = \t -> -- Use lambda t to be inlinable with two arguments only.
870 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before
871 | otherwise -> go (go z r) l
872 _ -> go z t
873 where
874 go z' Nil = z'
875 go z' (Tip kx bm) = foldrBits kx f z' bm
876 go z' (Bin _ _ l r) = go (go z' r) l
877 {-# INLINE foldr #-}
878
879 -- | /O(n)/. A strict version of 'foldr'. Each application of the operator is
880 -- evaluated before using the result in the next application. This
881 -- function is strict in the starting value.
882 foldr' :: (Key -> b -> b) -> b -> IntSet -> b
883 foldr' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
884 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before
885 | otherwise -> go (go z r) l
886 _ -> go z t
887 where
888 STRICT_1_OF_2(go)
889 go z' Nil = z'
890 go z' (Tip kx bm) = foldr'Bits kx f z' bm
891 go z' (Bin _ _ l r) = go (go z' r) l
892 {-# INLINE foldr' #-}
893
894 -- | /O(n)/. Fold the elements in the set using the given left-associative
895 -- binary operator, such that @'foldl' f z == 'Prelude.foldl' f z . 'toAscList'@.
896 --
897 -- For example,
898 --
899 -- > toDescList set = foldl (flip (:)) [] set
900 foldl :: (a -> Key -> a) -> a -> IntSet -> a
901 foldl f z = \t -> -- Use lambda t to be inlinable with two arguments only.
902 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before
903 | otherwise -> go (go z l) r
904 _ -> go z t
905 where
906 STRICT_1_OF_2(go)
907 go z' Nil = z'
908 go z' (Tip kx bm) = foldlBits kx f z' bm
909 go z' (Bin _ _ l r) = go (go z' l) r
910 {-# INLINE foldl #-}
911
912 -- | /O(n)/. A strict version of 'foldl'. Each application of the operator is
913 -- evaluated before using the result in the next application. This
914 -- function is strict in the starting value.
915 foldl' :: (a -> Key -> a) -> a -> IntSet -> a
916 foldl' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
917 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before
918 | otherwise -> go (go z l) r
919 _ -> go z t
920 where
921 STRICT_1_OF_2(go)
922 go z' Nil = z'
923 go z' (Tip kx bm) = foldl'Bits kx f z' bm
924 go z' (Bin _ _ l r) = go (go z' l) r
925 {-# INLINE foldl' #-}
926
927 {--------------------------------------------------------------------
928 List variations
929 --------------------------------------------------------------------}
930 -- | /O(n)/. An alias of 'toAscList'. The elements of a set in ascending order.
931 -- Subject to list fusion.
932 elems :: IntSet -> [Key]
933 elems
934 = toAscList
935
936 {--------------------------------------------------------------------
937 Lists
938 --------------------------------------------------------------------}
939 -- | /O(n)/. Convert the set to a list of elements. Subject to list fusion.
940 toList :: IntSet -> [Key]
941 toList
942 = toAscList
943
944 -- | /O(n)/. Convert the set to an ascending list of elements. Subject to list
945 -- fusion.
946 toAscList :: IntSet -> [Key]
947 toAscList = foldr (:) []
948
949 -- | /O(n)/. Convert the set to a descending list of elements. Subject to list
950 -- fusion.
951 toDescList :: IntSet -> [Key]
952 toDescList = foldl (flip (:)) []
953
954 -- List fusion for the list generating functions.
955 #if __GLASGOW_HASKELL__
956 -- The foldrFB and foldlFB are foldr and foldl equivalents, used for list fusion.
957 -- They are important to convert unfused to{Asc,Desc}List back, see mapFB in prelude.
958 foldrFB :: (Key -> b -> b) -> b -> IntSet -> b
959 foldrFB = foldr
960 {-# INLINE[0] foldrFB #-}
961 foldlFB :: (a -> Key -> a) -> a -> IntSet -> a
962 foldlFB = foldl
963 {-# INLINE[0] foldlFB #-}
964
965 -- Inline elems and toList, so that we need to fuse only toAscList.
966 {-# INLINE elems #-}
967 {-# INLINE toList #-}
968
969 -- The fusion is enabled up to phase 2 included. If it does not succeed,
970 -- convert in phase 1 the expanded to{Asc,Desc}List calls back to
971 -- to{Asc,Desc}List. In phase 0, we inline fold{lr}FB (which were used in
972 -- a list fusion, otherwise it would go away in phase 1), and let compiler do
973 -- whatever it wants with to{Asc,Desc}List -- it was forbidden to inline it
974 -- before phase 0, otherwise the fusion rules would not fire at all.
975 {-# NOINLINE[0] toAscList #-}
976 {-# NOINLINE[0] toDescList #-}
977 {-# RULES "IntSet.toAscList" [~1] forall s . toAscList s = build (\c n -> foldrFB c n s) #-}
978 {-# RULES "IntSet.toAscListBack" [1] foldrFB (:) [] = toAscList #-}
979 {-# RULES "IntSet.toDescList" [~1] forall s . toDescList s = build (\c n -> foldlFB (\xs x -> c x xs) n s) #-}
980 {-# RULES "IntSet.toDescListBack" [1] foldlFB (\xs x -> x : xs) [] = toDescList #-}
981 #endif
982
983
984 -- | /O(n*min(n,W))/. Create a set from a list of integers.
985 fromList :: [Key] -> IntSet
986 fromList xs
987 = foldlStrict ins empty xs
988 where
989 ins t x = insert x t
990
991 -- | /O(n)/. Build a set from an ascending list of elements.
992 -- /The precondition (input list is ascending) is not checked./
993 fromAscList :: [Key] -> IntSet
994 fromAscList [] = Nil
995 fromAscList (x0 : xs0) = fromDistinctAscList (combineEq x0 xs0)
996 where
997 combineEq x' [] = [x']
998 combineEq x' (x:xs)
999 | x==x' = combineEq x' xs
1000 | otherwise = x' : combineEq x xs
1001
1002 -- | /O(n)/. Build a set from an ascending list of distinct elements.
1003 -- /The precondition (input list is strictly ascending) is not checked./
1004 fromDistinctAscList :: [Key] -> IntSet
1005 fromDistinctAscList [] = Nil
1006 fromDistinctAscList (z0 : zs0) = work (prefixOf z0) (bitmapOf z0) zs0 Nada
1007 where
1008 -- 'work' accumulates all values that go into one tip, before passing this Tip
1009 -- to 'reduce'
1010 work kx bm [] stk = finish kx (Tip kx bm) stk
1011 work kx bm (z:zs) stk | kx == prefixOf z = work kx (bm .|. bitmapOf z) zs stk
1012 work kx bm (z:zs) stk = reduce z zs (branchMask z kx) kx (Tip kx bm) stk
1013
1014 reduce z zs _ px tx Nada = work (prefixOf z) (bitmapOf z) zs (Push px tx Nada)
1015 reduce z zs m px tx stk@(Push py ty stk') =
1016 let mxy = branchMask px py
1017 pxy = mask px mxy
1018 in if shorter m mxy
1019 then reduce z zs m pxy (Bin pxy mxy ty tx) stk'
1020 else work (prefixOf z) (bitmapOf z) zs (Push px tx stk)
1021
1022 finish _ t Nada = t
1023 finish px tx (Push py ty stk) = finish p (link py ty px tx) stk
1024 where m = branchMask px py
1025 p = mask px m
1026
1027 data Stack = Push {-# UNPACK #-} !Prefix !IntSet !Stack | Nada
1028
1029
1030 {--------------------------------------------------------------------
1031 Eq
1032 --------------------------------------------------------------------}
1033 instance Eq IntSet where
1034 t1 == t2 = equal t1 t2
1035 t1 /= t2 = nequal t1 t2
1036
1037 equal :: IntSet -> IntSet -> Bool
1038 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
1039 = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
1040 equal (Tip kx1 bm1) (Tip kx2 bm2)
1041 = kx1 == kx2 && bm1 == bm2
1042 equal Nil Nil = True
1043 equal _ _ = False
1044
1045 nequal :: IntSet -> IntSet -> Bool
1046 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
1047 = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
1048 nequal (Tip kx1 bm1) (Tip kx2 bm2)
1049 = kx1 /= kx2 || bm1 /= bm2
1050 nequal Nil Nil = False
1051 nequal _ _ = True
1052
1053 {--------------------------------------------------------------------
1054 Ord
1055 --------------------------------------------------------------------}
1056
1057 instance Ord IntSet where
1058 compare s1 s2 = compare (toAscList s1) (toAscList s2)
1059 -- tentative implementation. See if more efficient exists.
1060
1061 {--------------------------------------------------------------------
1062 Show
1063 --------------------------------------------------------------------}
1064 instance Show IntSet where
1065 showsPrec p xs = showParen (p > 10) $
1066 showString "fromList " . shows (toList xs)
1067
1068 {--------------------------------------------------------------------
1069 Read
1070 --------------------------------------------------------------------}
1071 instance Read IntSet where
1072 #ifdef __GLASGOW_HASKELL__
1073 readPrec = parens $ prec 10 $ do
1074 Ident "fromList" <- lexP
1075 xs <- readPrec
1076 return (fromList xs)
1077
1078 readListPrec = readListPrecDefault
1079 #else
1080 readsPrec p = readParen (p > 10) $ \ r -> do
1081 ("fromList",s) <- lex r
1082 (xs,t) <- reads s
1083 return (fromList xs,t)
1084 #endif
1085
1086 {--------------------------------------------------------------------
1087 Typeable
1088 --------------------------------------------------------------------}
1089
1090 #include "Typeable.h"
1091 INSTANCE_TYPEABLE0(IntSet,intSetTc,"IntSet")
1092
1093 {--------------------------------------------------------------------
1094 NFData
1095 --------------------------------------------------------------------}
1096
1097 -- The IntSet constructors consist only of strict fields of Ints and
1098 -- IntSets, thus the default NFData instance which evaluates to whnf
1099 -- should suffice
1100 instance NFData IntSet
1101
1102 {--------------------------------------------------------------------
1103 Debugging
1104 --------------------------------------------------------------------}
1105 -- | /O(n)/. Show the tree that implements the set. The tree is shown
1106 -- in a compressed, hanging format.
1107 showTree :: IntSet -> String
1108 showTree s
1109 = showTreeWith True False s
1110
1111
1112 {- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows
1113 the tree that implements the set. If @hang@ is
1114 'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If
1115 @wide@ is 'True', an extra wide version is shown.
1116 -}
1117 showTreeWith :: Bool -> Bool -> IntSet -> String
1118 showTreeWith hang wide t
1119 | hang = (showsTreeHang wide [] t) ""
1120 | otherwise = (showsTree wide [] [] t) ""
1121
1122 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
1123 showsTree wide lbars rbars t
1124 = case t of
1125 Bin p m l r
1126 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
1127 showWide wide rbars .
1128 showsBars lbars . showString (showBin p m) . showString "\n" .
1129 showWide wide lbars .
1130 showsTree wide (withEmpty lbars) (withBar lbars) l
1131 Tip kx bm
1132 -> showsBars lbars . showString " " . shows kx . showString " + " .
1133 showsBitMap bm . showString "\n"
1134 Nil -> showsBars lbars . showString "|\n"
1135
1136 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
1137 showsTreeHang wide bars t
1138 = case t of
1139 Bin p m l r
1140 -> showsBars bars . showString (showBin p m) . showString "\n" .
1141 showWide wide bars .
1142 showsTreeHang wide (withBar bars) l .
1143 showWide wide bars .
1144 showsTreeHang wide (withEmpty bars) r
1145 Tip kx bm
1146 -> showsBars bars . showString " " . shows kx . showString " + " .
1147 showsBitMap bm . showString "\n"
1148 Nil -> showsBars bars . showString "|\n"
1149
1150 showBin :: Prefix -> Mask -> String
1151 showBin _ _
1152 = "*" -- ++ show (p,m)
1153
1154 showWide :: Bool -> [String] -> String -> String
1155 showWide wide bars
1156 | wide = showString (concat (reverse bars)) . showString "|\n"
1157 | otherwise = id
1158
1159 showsBars :: [String] -> ShowS
1160 showsBars bars
1161 = case bars of
1162 [] -> id
1163 _ -> showString (concat (reverse (tail bars))) . showString node
1164
1165 showsBitMap :: Word -> ShowS
1166 showsBitMap = showString . showBitMap
1167
1168 showBitMap :: Word -> String
1169 showBitMap w = show $ foldrBits 0 (:) [] w
1170
1171 node :: String
1172 node = "+--"
1173
1174 withBar, withEmpty :: [String] -> [String]
1175 withBar bars = "| ":bars
1176 withEmpty bars = " ":bars
1177
1178
1179 {--------------------------------------------------------------------
1180 Helpers
1181 --------------------------------------------------------------------}
1182 {--------------------------------------------------------------------
1183 Link
1184 --------------------------------------------------------------------}
1185 link :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
1186 link p1 t1 p2 t2
1187 | zero p1 m = Bin p m t1 t2
1188 | otherwise = Bin p m t2 t1
1189 where
1190 m = branchMask p1 p2
1191 p = mask p1 m
1192 {-# INLINE link #-}
1193
1194 {--------------------------------------------------------------------
1195 @bin@ assures that we never have empty trees within a tree.
1196 --------------------------------------------------------------------}
1197 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
1198 bin _ _ l Nil = l
1199 bin _ _ Nil r = r
1200 bin p m l r = Bin p m l r
1201 {-# INLINE bin #-}
1202
1203 {--------------------------------------------------------------------
1204 @tip@ assures that we never have empty bitmaps within a tree.
1205 --------------------------------------------------------------------}
1206 tip :: Prefix -> BitMap -> IntSet
1207 tip _ 0 = Nil
1208 tip kx bm = Tip kx bm
1209 {-# INLINE tip #-}
1210
1211
1212 {----------------------------------------------------------------------
1213 Functions that generate Prefix and BitMap of a Key or a Suffix.
1214 ----------------------------------------------------------------------}
1215
1216 suffixBitMask :: Int
1217 #if MIN_VERSION_base_4_7_0
1218 suffixBitMask = finiteBitSize (undefined::Word) - 1
1219 #else
1220 suffixBitMask = bitSize (undefined::Word) - 1
1221 #endif
1222 {-# INLINE suffixBitMask #-}
1223
1224 prefixBitMask :: Int
1225 prefixBitMask = complement suffixBitMask
1226 {-# INLINE prefixBitMask #-}
1227
1228 prefixOf :: Int -> Prefix
1229 prefixOf x = x .&. prefixBitMask
1230 {-# INLINE prefixOf #-}
1231
1232 suffixOf :: Int -> Int
1233 suffixOf x = x .&. suffixBitMask
1234 {-# INLINE suffixOf #-}
1235
1236 bitmapOfSuffix :: Int -> BitMap
1237 bitmapOfSuffix s = 1 `shiftLL` s
1238 {-# INLINE bitmapOfSuffix #-}
1239
1240 bitmapOf :: Int -> BitMap
1241 bitmapOf x = bitmapOfSuffix (suffixOf x)
1242 {-# INLINE bitmapOf #-}
1243
1244
1245 {--------------------------------------------------------------------
1246 Endian independent bit twiddling
1247 --------------------------------------------------------------------}
1248 zero :: Int -> Mask -> Bool
1249 zero i m
1250 = (natFromInt i) .&. (natFromInt m) == 0
1251 {-# INLINE zero #-}
1252
1253 nomatch,match :: Int -> Prefix -> Mask -> Bool
1254 nomatch i p m
1255 = (mask i m) /= p
1256 {-# INLINE nomatch #-}
1257
1258 match i p m
1259 = (mask i m) == p
1260 {-# INLINE match #-}
1261
1262 -- Suppose a is largest such that 2^a divides 2*m.
1263 -- Then mask i m is i with the low a bits zeroed out.
1264 mask :: Int -> Mask -> Prefix
1265 mask i m
1266 = maskW (natFromInt i) (natFromInt m)
1267 {-# INLINE mask #-}
1268
1269 {--------------------------------------------------------------------
1270 Big endian operations
1271 --------------------------------------------------------------------}
1272 maskW :: Nat -> Nat -> Prefix
1273 maskW i m
1274 = intFromNat (i .&. (complement (m-1) `xor` m))
1275 {-# INLINE maskW #-}
1276
1277 shorter :: Mask -> Mask -> Bool
1278 shorter m1 m2
1279 = (natFromInt m1) > (natFromInt m2)
1280 {-# INLINE shorter #-}
1281
1282 branchMask :: Prefix -> Prefix -> Mask
1283 branchMask p1 p2
1284 = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
1285 {-# INLINE branchMask #-}
1286
1287 {----------------------------------------------------------------------
1288 To get best performance, we provide fast implementations of
1289 lowestBitSet, highestBitSet and fold[lr][l]Bits for GHC.
1290 If the intel bsf and bsr instructions ever become GHC primops,
1291 this code should be reimplemented using these.
1292
1293 Performance of this code is crucial for folds, toList, filter, partition.
1294
1295 The signatures of methods in question are placed after this comment.
1296 ----------------------------------------------------------------------}
1297
1298 lowestBitSet :: Nat -> Int
1299 highestBitSet :: Nat -> Int
1300 foldlBits :: Int -> (a -> Int -> a) -> a -> Nat -> a
1301 foldl'Bits :: Int -> (a -> Int -> a) -> a -> Nat -> a
1302 foldrBits :: Int -> (Int -> a -> a) -> a -> Nat -> a
1303 foldr'Bits :: Int -> (Int -> a -> a) -> a -> Nat -> a
1304
1305 {-# INLINE lowestBitSet #-}
1306 {-# INLINE highestBitSet #-}
1307 {-# INLINE foldlBits #-}
1308 {-# INLINE foldl'Bits #-}
1309 {-# INLINE foldrBits #-}
1310 {-# INLINE foldr'Bits #-}
1311
1312 #if defined(__GLASGOW_HASKELL__) && (WORD_SIZE_IN_BITS==32 || WORD_SIZE_IN_BITS==64)
1313 {----------------------------------------------------------------------
1314 For lowestBitSet we use wordsize-dependant implementation based on
1315 multiplication and DeBrujn indeces, which was proposed by Edward Kmett
1316 <http://haskell.org/pipermail/libraries/2011-September/016749.html>
1317
1318 The core of this implementation is fast indexOfTheOnlyBit,
1319 which is given a Nat with exactly one bit set, and returns
1320 its index.
1321
1322 Lot of effort was put in these implementations, please benchmark carefully
1323 before changing this code.
1324 ----------------------------------------------------------------------}
1325
1326 indexOfTheOnlyBit :: Nat -> Int
1327 {-# INLINE indexOfTheOnlyBit #-}
1328 indexOfTheOnlyBit bitmask =
1329 I# (lsbArray `indexInt8OffAddr#` unboxInt (intFromNat ((bitmask * magic) `shiftRL` offset)))
1330 where unboxInt (I# i) = i
1331 #if WORD_SIZE_IN_BITS==32
1332 magic = 0x077CB531
1333 offset = 27
1334 !lsbArray = "\0\1\28\2\29\14\24\3\30\22\20\15\25\17\4\8\31\27\13\23\21\19\16\7\26\12\18\6\11\5\10\9"#
1335 #else
1336 magic = 0x07EDD5E59A4E28C2
1337 offset = 58
1338 !lsbArray = "\63\0\58\1\59\47\53\2\60\39\48\27\54\33\42\3\61\51\37\40\49\18\28\20\55\30\34\11\43\14\22\4\62\57\46\52\38\26\32\41\50\36\17\19\29\10\13\21\56\45\25\31\35\16\9\12\44\24\15\8\23\7\6\5"#
1339 #endif
1340 -- The lsbArray gets inlined to every call site of indexOfTheOnlyBit.
1341 -- That cannot be easily avoided, as GHC forbids top-level Addr# literal.
1342 -- One could go around that by supplying getLsbArray :: () -> Addr# marked
1343 -- as NOINLINE. But the code size of calling it and processing the result
1344 -- is 48B on 32-bit and 56B on 64-bit architectures -- so the 32B and 64B array
1345 -- is actually improvement on 32-bit and only a 8B size increase on 64-bit.
1346
1347 lowestBitMask :: Nat -> Nat
1348 lowestBitMask x = x .&. negate x
1349 {-# INLINE lowestBitMask #-}
1350
1351 -- Reverse the order of bits in the Nat.
1352 revNat :: Nat -> Nat
1353 #if WORD_SIZE_IN_BITS==32
1354 revNat x1 = case ((x1 `shiftRL` 1) .&. 0x55555555) .|. ((x1 .&. 0x55555555) `shiftLL` 1) of
1355 x2 -> case ((x2 `shiftRL` 2) .&. 0x33333333) .|. ((x2 .&. 0x33333333) `shiftLL` 2) of
1356 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F) `shiftLL` 4) of
1357 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF) .|. ((x4 .&. 0x00FF00FF) `shiftLL` 8) of
1358 x5 -> ( x5 `shiftRL` 16 ) .|. ( x5 `shiftLL` 16);
1359 #else
1360 revNat x1 = case ((x1 `shiftRL` 1) .&. 0x5555555555555555) .|. ((x1 .&. 0x5555555555555555) `shiftLL` 1) of
1361 x2 -> case ((x2 `shiftRL` 2) .&. 0x3333333333333333) .|. ((x2 .&. 0x3333333333333333) `shiftLL` 2) of
1362 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F0F0F0F0F) `shiftLL` 4) of
1363 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF00FF00FF) .|. ((x4 .&. 0x00FF00FF00FF00FF) `shiftLL` 8) of
1364 x5 -> case ((x5 `shiftRL` 16) .&. 0x0000FFFF0000FFFF) .|. ((x5 .&. 0x0000FFFF0000FFFF) `shiftLL` 16) of
1365 x6 -> ( x6 `shiftRL` 32 ) .|. ( x6 `shiftLL` 32);
1366 #endif
1367
1368 lowestBitSet x = indexOfTheOnlyBit (lowestBitMask x)
1369
1370 highestBitSet x = indexOfTheOnlyBit (highestBitMask x)
1371
1372 foldlBits prefix f z bitmap = go bitmap z
1373 where go bm acc | bm == 0 = acc
1374 | otherwise = case lowestBitMask bm of
1375 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1376 bi -> bi `seq` go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1377
1378 foldl'Bits prefix f z bitmap = go bitmap z
1379 where STRICT_2_OF_2(go)
1380 go bm acc | bm == 0 = acc
1381 | otherwise = case lowestBitMask bm of
1382 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1383 bi -> bi `seq` go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1384
1385 foldrBits prefix f z bitmap = go (revNat bitmap) z
1386 where go bm acc | bm == 0 = acc
1387 | otherwise = case lowestBitMask bm of
1388 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1389 bi -> bi `seq` go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1390
1391 foldr'Bits prefix f z bitmap = go (revNat bitmap) z
1392 where STRICT_2_OF_2(go)
1393 go bm acc | bm == 0 = acc
1394 | otherwise = case lowestBitMask bm of
1395 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1396 bi -> bi `seq` go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1397
1398 #else
1399 {----------------------------------------------------------------------
1400 In general case we use logarithmic implementation of
1401 lowestBitSet and highestBitSet, which works up to bit sizes of 64.
1402
1403 Folds are linear scans.
1404 ----------------------------------------------------------------------}
1405
1406 lowestBitSet n0 =
1407 let (n1,b1) = if n0 .&. 0xFFFFFFFF /= 0 then (n0,0) else (n0 `shiftRL` 32, 32)
1408 (n2,b2) = if n1 .&. 0xFFFF /= 0 then (n1,b1) else (n1 `shiftRL` 16, 16+b1)
1409 (n3,b3) = if n2 .&. 0xFF /= 0 then (n2,b2) else (n2 `shiftRL` 8, 8+b2)
1410 (n4,b4) = if n3 .&. 0xF /= 0 then (n3,b3) else (n3 `shiftRL` 4, 4+b3)
1411 (n5,b5) = if n4 .&. 0x3 /= 0 then (n4,b4) else (n4 `shiftRL` 2, 2+b4)
1412 b6 = if n5 .&. 0x1 /= 0 then b5 else 1+b5
1413 in b6
1414
1415 highestBitSet n0 =
1416 let (n1,b1) = if n0 .&. 0xFFFFFFFF00000000 /= 0 then (n0 `shiftRL` 32, 32) else (n0,0)
1417 (n2,b2) = if n1 .&. 0xFFFF0000 /= 0 then (n1 `shiftRL` 16, 16+b1) else (n1,b1)
1418 (n3,b3) = if n2 .&. 0xFF00 /= 0 then (n2 `shiftRL` 8, 8+b2) else (n2,b2)
1419 (n4,b4) = if n3 .&. 0xF0 /= 0 then (n3 `shiftRL` 4, 4+b3) else (n3,b3)
1420 (n5,b5) = if n4 .&. 0xC /= 0 then (n4 `shiftRL` 2, 2+b4) else (n4,b4)
1421 b6 = if n5 .&. 0x2 /= 0 then 1+b5 else b5
1422 in b6
1423
1424 foldlBits prefix f z bm = let lb = lowestBitSet bm
1425 in go (prefix+lb) z (bm `shiftRL` lb)
1426 where STRICT_1_OF_3(go)
1427 go _ acc 0 = acc
1428 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1)
1429 | otherwise = go (bi + 1) acc (n `shiftRL` 1)
1430
1431 foldl'Bits prefix f z bm = let lb = lowestBitSet bm
1432 in go (prefix+lb) z (bm `shiftRL` lb)
1433 where STRICT_1_OF_3(go)
1434 STRICT_2_OF_3(go)
1435 go _ acc 0 = acc
1436 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1)
1437 | otherwise = go (bi + 1) acc (n `shiftRL` 1)
1438
1439 foldrBits prefix f z bm = let lb = lowestBitSet bm
1440 in go (prefix+lb) (bm `shiftRL` lb)
1441 where STRICT_1_OF_2(go)
1442 go _ 0 = z
1443 go bi n | n `testBit` 0 = f bi (go (bi + 1) (n `shiftRL` 1))
1444 | otherwise = go (bi + 1) (n `shiftRL` 1)
1445
1446 foldr'Bits prefix f z bm = let lb = lowestBitSet bm
1447 in go (prefix+lb) (bm `shiftRL` lb)
1448 where STRICT_1_OF_2(go)
1449 go _ 0 = z
1450 go bi n | n `testBit` 0 = f bi $! go (bi + 1) (n `shiftRL` 1)
1451 | otherwise = go (bi + 1) (n `shiftRL` 1)
1452
1453 #endif
1454
1455 {----------------------------------------------------------------------
1456 [bitcount] as posted by David F. Place to haskell-cafe on April 11, 2006,
1457 based on the code on
1458 http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetKernighan,
1459 where the following source is given:
1460 Published in 1988, the C Programming Language 2nd Ed. (by Brian W.
1461 Kernighan and Dennis M. Ritchie) mentions this in exercise 2-9. On April
1462 19, 2006 Don Knuth pointed out to me that this method "was first published
1463 by Peter Wegner in CACM 3 (1960), 322. (Also discovered independently by
1464 Derrick Lehmer and published in 1964 in a book edited by Beckenbach.)"
1465 ----------------------------------------------------------------------}
1466
1467 bitcount :: Int -> Word -> Int
1468 #if MIN_VERSION_base_4_5_0
1469 bitcount a x = a + popCount x
1470 #else
1471 bitcount a0 x0 = go a0 x0
1472 where go a 0 = a
1473 go a x = go (a + 1) (x .&. (x-1))
1474 #endif
1475 {-# INLINE bitcount #-}
1476
1477
1478 {--------------------------------------------------------------------
1479 Utilities
1480 --------------------------------------------------------------------}
1481 foldlStrict :: (a -> b -> a) -> a -> [b] -> a
1482 foldlStrict f = go
1483 where
1484 go z [] = z
1485 go z (x:xs) = let z' = f z x in z' `seq` go z' xs
1486 {-# INLINE foldlStrict #-}
1487
1488 -- | /O(1)/. Decompose a set into pieces based on the structure of the underlying
1489 -- tree. This function is useful for consuming a set in parallel.
1490 --
1491 -- No guarantee is made as to the sizes of the pieces; an internal, but
1492 -- deterministic process determines this. However, it is guaranteed that the pieces
1493 -- returned will be in ascending order (all elements in the first subset less than all
1494 -- elements in the second, and so on).
1495 --
1496 -- Examples:
1497 --
1498 -- > splitRoot (fromList [1..120]) == [fromList [1..63],fromList [64..120]]
1499 -- > splitRoot empty == []
1500 --
1501 -- Note that the current implementation will not return more than two subsets, but
1502 -- you should not depend on this remaining the case in future versions. Also, the
1503 -- current version will not continue splitting all the way to individual singleton
1504 -- sets -- it will stop before that.
1505 splitRoot :: IntSet -> [IntSet]
1506 splitRoot orig =
1507 case orig of
1508 Nil -> []
1509 -- NOTE: we don't currently split below Tip, but we could.
1510 x@(Tip _ _) -> [x]
1511 Bin _ _ l r -> [l, r]
1512 {-# INLINE splitRoot #-}