3bc157ba1a4e11436a9e0f31d580ca1daebd43ea
[packages/containers.git] / Data / IntSet / Internal.hs
1 {-# LANGUAGE CPP #-}
2 {-# LANGUAGE BangPatterns #-}
3 #if __GLASGOW_HASKELL__
4 {-# LANGUAGE MagicHash, DeriveDataTypeable, StandaloneDeriving #-}
5 #endif
6 #if !defined(TESTING) && defined(__GLASGOW_HASKELL__)
7 {-# LANGUAGE Trustworthy #-}
8 #endif
9 #if __GLASGOW_HASKELL__ >= 708
10 {-# LANGUAGE TypeFamilies #-}
11 #endif
12
13 {-# OPTIONS_HADDOCK not-home #-}
14
15 #include "containers.h"
16
17 -----------------------------------------------------------------------------
18 -- |
19 -- Module : Data.IntSet.Internal
20 -- Copyright : (c) Daan Leijen 2002
21 -- (c) Joachim Breitner 2011
22 -- License : BSD-style
23 -- Maintainer : libraries@haskell.org
24 -- Portability : portable
25 --
26 -- = WARNING
27 --
28 -- This module is considered __internal__.
29 --
30 -- The Package Versioning Policy __does not apply__.
31 --
32 -- The contents of this module may change __in any way whatsoever__
33 -- and __without any warning__ between minor versions of this package.
34 --
35 -- Authors importing this module are expected to track development
36 -- closely.
37 --
38 -- = Description
39 --
40 -- An efficient implementation of integer sets.
41 --
42 -- These modules are intended to be imported qualified, to avoid name
43 -- clashes with Prelude functions, e.g.
44 --
45 -- > import Data.IntSet (IntSet)
46 -- > import qualified Data.IntSet as IntSet
47 --
48 -- The implementation is based on /big-endian patricia trees/. This data
49 -- structure performs especially well on binary operations like 'union'
50 -- and 'intersection'. However, my benchmarks show that it is also
51 -- (much) faster on insertions and deletions when compared to a generic
52 -- size-balanced set implementation (see "Data.Set").
53 --
54 -- * Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\",
55 -- Workshop on ML, September 1998, pages 77-86,
56 -- <http://citeseer.ist.psu.edu/okasaki98fast.html>
57 --
58 -- * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
59 -- Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
60 -- October 1968, pages 514-534.
61 --
62 -- Additionally, this implementation places bitmaps in the leaves of the tree.
63 -- Their size is the natural size of a machine word (32 or 64 bits) and greatly
64 -- reduce memory footprint and execution times for dense sets, e.g. sets where
65 -- it is likely that many values lie close to each other. The asymptotics are
66 -- not affected by this optimization.
67 --
68 -- Many operations have a worst-case complexity of /O(min(n,W))/.
69 -- This means that the operation can become linear in the number of
70 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
71 -- (32 or 64).
72 --
73 -- @since 0.5.9
74 -----------------------------------------------------------------------------
75
76 -- [Note: INLINE bit fiddling]
77 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
78 -- It is essential that the bit fiddling functions like mask, zero, branchMask
79 -- etc are inlined. If they do not, the memory allocation skyrockets. The GHC
80 -- usually gets it right, but it is disastrous if it does not. Therefore we
81 -- explicitly mark these functions INLINE.
82
83
84 -- [Note: Local 'go' functions and capturing]
85 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
86 -- Care must be taken when using 'go' function which captures an argument.
87 -- Sometimes (for example when the argument is passed to a data constructor,
88 -- as in insert), GHC heap-allocates more than necessary. Therefore C-- code
89 -- must be checked for increased allocation when creating and modifying such
90 -- functions.
91
92
93 -- [Note: Order of constructors]
94 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
95 -- The order of constructors of IntSet matters when considering performance.
96 -- Currently in GHC 7.0, when type has 3 constructors, they are matched from
97 -- the first to the last -- the best performance is achieved when the
98 -- constructors are ordered by frequency.
99 -- On GHC 7.0, reordering constructors from Nil | Tip | Bin to Bin | Tip | Nil
100 -- improves the benchmark by circa 10%.
101
102 module Data.IntSet.Internal (
103 -- * Set type
104 IntSet(..), Key -- instance Eq,Show
105 , Prefix, Mask, BitMap
106
107 -- * Operators
108 , (\\)
109
110 -- * Query
111 , null
112 , size
113 , member
114 , notMember
115 , lookupLT
116 , lookupGT
117 , lookupLE
118 , lookupGE
119 , isSubsetOf
120 , isProperSubsetOf
121 , disjoint
122
123 -- * Construction
124 , empty
125 , singleton
126 , insert
127 , delete
128
129 -- * Combine
130 , union
131 , unions
132 , difference
133 , intersection
134
135 -- * Filter
136 , filter
137 , partition
138 , split
139 , splitMember
140 , splitRoot
141
142 -- * Map
143 , map
144
145 -- * Folds
146 , foldr
147 , foldl
148 -- ** Strict folds
149 , foldr'
150 , foldl'
151 -- ** Legacy folds
152 , fold
153
154 -- * Min\/Max
155 , findMin
156 , findMax
157 , deleteMin
158 , deleteMax
159 , deleteFindMin
160 , deleteFindMax
161 , maxView
162 , minView
163
164 -- * Conversion
165
166 -- ** List
167 , elems
168 , toList
169 , fromList
170
171 -- ** Ordered list
172 , toAscList
173 , toDescList
174 , fromAscList
175 , fromDistinctAscList
176
177 -- * Debugging
178 , showTree
179 , showTreeWith
180
181 -- * Internals
182 , match
183 , suffixBitMask
184 , prefixBitMask
185 , bitmapOf
186 , zero
187 ) where
188
189 import Control.DeepSeq (NFData(rnf))
190 import Data.Bits
191 import qualified Data.List as List
192 import Data.Maybe (fromMaybe)
193 #if !MIN_VERSION_base(4,8,0)
194 import Data.Monoid (Monoid(..))
195 import Data.Word (Word)
196 #endif
197 #if MIN_VERSION_base(4,9,0)
198 import Data.Semigroup (Semigroup(stimes))
199 #endif
200 #if !(MIN_VERSION_base(4,11,0)) && MIN_VERSION_base(4,9,0)
201 import Data.Semigroup (Semigroup((<>)))
202 #endif
203 #if MIN_VERSION_base(4,9,0)
204 import Data.Semigroup (stimesIdempotentMonoid)
205 #endif
206 import Data.Typeable
207 import Prelude hiding (filter, foldr, foldl, null, map)
208
209 import Utils.Containers.Internal.BitUtil
210 import Utils.Containers.Internal.StrictPair
211
212 #if __GLASGOW_HASKELL__
213 import Data.Data (Data(..), Constr, mkConstr, constrIndex, Fixity(Prefix), DataType, mkDataType)
214 import Text.Read
215 #endif
216
217 #if __GLASGOW_HASKELL__
218 import GHC.Exts (Int(..), build)
219 #if __GLASGOW_HASKELL__ >= 708
220 import qualified GHC.Exts as GHCExts
221 #endif
222 import GHC.Exts (indexInt8OffAddr#)
223 #endif
224
225 import qualified Data.Foldable as Foldable
226 #if !MIN_VERSION_base(4,8,0)
227 import Data.Foldable (Foldable())
228 #endif
229
230 infixl 9 \\{-This comment teaches CPP correct behaviour -}
231
232 -- A "Nat" is a natural machine word (an unsigned Int)
233 type Nat = Word
234
235 natFromInt :: Int -> Nat
236 natFromInt i = fromIntegral i
237 {-# INLINE natFromInt #-}
238
239 intFromNat :: Nat -> Int
240 intFromNat w = fromIntegral w
241 {-# INLINE intFromNat #-}
242
243 {--------------------------------------------------------------------
244 Operators
245 --------------------------------------------------------------------}
246 -- | /O(n+m)/. See 'difference'.
247 (\\) :: IntSet -> IntSet -> IntSet
248 m1 \\ m2 = difference m1 m2
249
250 {--------------------------------------------------------------------
251 Types
252 --------------------------------------------------------------------}
253
254 -- | A set of integers.
255
256 -- See Note: Order of constructors
257 data IntSet = Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
258 -- Invariant: Nil is never found as a child of Bin.
259 -- Invariant: The Mask is a power of 2. It is the largest bit position at which
260 -- two elements of the set differ.
261 -- Invariant: Prefix is the common high-order bits that all elements share to
262 -- the left of the Mask bit.
263 -- Invariant: In Bin prefix mask left right, left consists of the elements that
264 -- don't have the mask bit set; right is all the elements that do.
265 | Tip {-# UNPACK #-} !Prefix {-# UNPACK #-} !BitMap
266 -- Invariant: The Prefix is zero for the last 5 (on 32 bit arches) or 6 bits
267 -- (on 64 bit arches). The values of the set represented by a tip
268 -- are the prefix plus the indices of the set bits in the bit map.
269 | Nil
270
271 -- A number stored in a set is stored as
272 -- * Prefix (all but last 5-6 bits) and
273 -- * BitMap (last 5-6 bits stored as a bitmask)
274 -- Last 5-6 bits are called a Suffix.
275
276 type Prefix = Int
277 type Mask = Int
278 type BitMap = Word
279 type Key = Int
280
281 instance Monoid IntSet where
282 mempty = empty
283 mconcat = unions
284 #if !(MIN_VERSION_base(4,9,0))
285 mappend = union
286 #else
287 mappend = (<>)
288
289 -- | @since 0.5.7
290 instance Semigroup IntSet where
291 (<>) = union
292 stimes = stimesIdempotentMonoid
293 #endif
294
295 #if __GLASGOW_HASKELL__
296
297 {--------------------------------------------------------------------
298 A Data instance
299 --------------------------------------------------------------------}
300
301 -- This instance preserves data abstraction at the cost of inefficiency.
302 -- We provide limited reflection services for the sake of data abstraction.
303
304 instance Data IntSet where
305 gfoldl f z is = z fromList `f` (toList is)
306 toConstr _ = fromListConstr
307 gunfold k z c = case constrIndex c of
308 1 -> k (z fromList)
309 _ -> error "gunfold"
310 dataTypeOf _ = intSetDataType
311
312 fromListConstr :: Constr
313 fromListConstr = mkConstr intSetDataType "fromList" [] Prefix
314
315 intSetDataType :: DataType
316 intSetDataType = mkDataType "Data.IntSet.Internal.IntSet" [fromListConstr]
317
318 #endif
319
320 {--------------------------------------------------------------------
321 Query
322 --------------------------------------------------------------------}
323 -- | /O(1)/. Is the set empty?
324 null :: IntSet -> Bool
325 null Nil = True
326 null _ = False
327 {-# INLINE null #-}
328
329 -- | /O(n)/. Cardinality of the set.
330 size :: IntSet -> Int
331 size = go 0
332 where
333 go !acc (Bin _ _ l r) = go (go acc l) r
334 go acc (Tip _ bm) = acc + bitcount 0 bm
335 go acc Nil = acc
336
337 -- | /O(min(n,W))/. Is the value a member of the set?
338
339 -- See Note: Local 'go' functions and capturing.
340 member :: Key -> IntSet -> Bool
341 member !x = go
342 where
343 go (Bin p m l r)
344 | nomatch x p m = False
345 | zero x m = go l
346 | otherwise = go r
347 go (Tip y bm) = prefixOf x == y && bitmapOf x .&. bm /= 0
348 go Nil = False
349
350 -- | /O(min(n,W))/. Is the element not in the set?
351 notMember :: Key -> IntSet -> Bool
352 notMember k = not . member k
353
354 -- | /O(log n)/. Find largest element smaller than the given one.
355 --
356 -- > lookupLT 3 (fromList [3, 5]) == Nothing
357 -- > lookupLT 5 (fromList [3, 5]) == Just 3
358
359 -- See Note: Local 'go' functions and capturing.
360 lookupLT :: Key -> IntSet -> Maybe Key
361 lookupLT !x t = case t of
362 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r
363 _ -> go Nil t
364 where
365 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r
366 | zero x m = go def l
367 | otherwise = go l r
368 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm
369 | prefixOf x == kx && maskLT /= 0 = Just $ kx + highestBitSet maskLT
370 | otherwise = unsafeFindMax def
371 where maskLT = (bitmapOf x - 1) .&. bm
372 go def Nil = unsafeFindMax def
373
374
375 -- | /O(log n)/. Find smallest element greater than the given one.
376 --
377 -- > lookupGT 4 (fromList [3, 5]) == Just 5
378 -- > lookupGT 5 (fromList [3, 5]) == Nothing
379
380 -- See Note: Local 'go' functions and capturing.
381 lookupGT :: Key -> IntSet -> Maybe Key
382 lookupGT !x t = case t of
383 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r
384 _ -> go Nil t
385 where
386 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def
387 | zero x m = go r l
388 | otherwise = go def r
389 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm
390 | prefixOf x == kx && maskGT /= 0 = Just $ kx + lowestBitSet maskGT
391 | otherwise = unsafeFindMin def
392 where maskGT = (- ((bitmapOf x) `shiftLL` 1)) .&. bm
393 go def Nil = unsafeFindMin def
394
395
396 -- | /O(log n)/. Find largest element smaller or equal to the given one.
397 --
398 -- > lookupLE 2 (fromList [3, 5]) == Nothing
399 -- > lookupLE 4 (fromList [3, 5]) == Just 3
400 -- > lookupLE 5 (fromList [3, 5]) == Just 5
401
402 -- See Note: Local 'go' functions and capturing.
403 lookupLE :: Key -> IntSet -> Maybe Key
404 lookupLE !x t = case t of
405 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r
406 _ -> go Nil t
407 where
408 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r
409 | zero x m = go def l
410 | otherwise = go l r
411 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm
412 | prefixOf x == kx && maskLE /= 0 = Just $ kx + highestBitSet maskLE
413 | otherwise = unsafeFindMax def
414 where maskLE = (((bitmapOf x) `shiftLL` 1) - 1) .&. bm
415 go def Nil = unsafeFindMax def
416
417
418 -- | /O(log n)/. Find smallest element greater or equal to the given one.
419 --
420 -- > lookupGE 3 (fromList [3, 5]) == Just 3
421 -- > lookupGE 4 (fromList [3, 5]) == Just 5
422 -- > lookupGE 6 (fromList [3, 5]) == Nothing
423
424 -- See Note: Local 'go' functions and capturing.
425 lookupGE :: Key -> IntSet -> Maybe Key
426 lookupGE !x t = case t of
427 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r
428 _ -> go Nil t
429 where
430 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def
431 | zero x m = go r l
432 | otherwise = go def r
433 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm
434 | prefixOf x == kx && maskGE /= 0 = Just $ kx + lowestBitSet maskGE
435 | otherwise = unsafeFindMin def
436 where maskGE = (- (bitmapOf x)) .&. bm
437 go def Nil = unsafeFindMin def
438
439
440
441 -- Helper function for lookupGE and lookupGT. It assumes that if a Bin node is
442 -- given, it has m > 0.
443 unsafeFindMin :: IntSet -> Maybe Key
444 unsafeFindMin Nil = Nothing
445 unsafeFindMin (Tip kx bm) = Just $ kx + lowestBitSet bm
446 unsafeFindMin (Bin _ _ l _) = unsafeFindMin l
447
448 -- Helper function for lookupLE and lookupLT. It assumes that if a Bin node is
449 -- given, it has m > 0.
450 unsafeFindMax :: IntSet -> Maybe Key
451 unsafeFindMax Nil = Nothing
452 unsafeFindMax (Tip kx bm) = Just $ kx + highestBitSet bm
453 unsafeFindMax (Bin _ _ _ r) = unsafeFindMax r
454
455 {--------------------------------------------------------------------
456 Construction
457 --------------------------------------------------------------------}
458 -- | /O(1)/. The empty set.
459 empty :: IntSet
460 empty
461 = Nil
462 {-# INLINE empty #-}
463
464 -- | /O(1)/. A set of one element.
465 singleton :: Key -> IntSet
466 singleton x
467 = Tip (prefixOf x) (bitmapOf x)
468 {-# INLINE singleton #-}
469
470 {--------------------------------------------------------------------
471 Insert
472 --------------------------------------------------------------------}
473 -- | /O(min(n,W))/. Add a value to the set. There is no left- or right bias for
474 -- IntSets.
475 insert :: Key -> IntSet -> IntSet
476 insert !x = insertBM (prefixOf x) (bitmapOf x)
477
478 -- Helper function for insert and union.
479 insertBM :: Prefix -> BitMap -> IntSet -> IntSet
480 insertBM !kx !bm t@(Bin p m l r)
481 | nomatch kx p m = link kx (Tip kx bm) p t
482 | zero kx m = Bin p m (insertBM kx bm l) r
483 | otherwise = Bin p m l (insertBM kx bm r)
484 insertBM kx bm t@(Tip kx' bm')
485 | kx' == kx = Tip kx' (bm .|. bm')
486 | otherwise = link kx (Tip kx bm) kx' t
487 insertBM kx bm Nil = Tip kx bm
488
489 -- | /O(min(n,W))/. Delete a value in the set. Returns the
490 -- original set when the value was not present.
491 delete :: Key -> IntSet -> IntSet
492 delete !x = deleteBM (prefixOf x) (bitmapOf x)
493
494 -- Deletes all values mentioned in the BitMap from the set.
495 -- Helper function for delete and difference.
496 deleteBM :: Prefix -> BitMap -> IntSet -> IntSet
497 deleteBM !kx !bm t@(Bin p m l r)
498 | nomatch kx p m = t
499 | zero kx m = bin p m (deleteBM kx bm l) r
500 | otherwise = bin p m l (deleteBM kx bm r)
501 deleteBM kx bm t@(Tip kx' bm')
502 | kx' == kx = tip kx (bm' .&. complement bm)
503 | otherwise = t
504 deleteBM _ _ Nil = Nil
505
506
507 {--------------------------------------------------------------------
508 Union
509 --------------------------------------------------------------------}
510 -- | The union of a list of sets.
511 unions :: Foldable f => f IntSet -> IntSet
512 unions xs
513 = Foldable.foldl' union empty xs
514
515
516 -- | /O(n+m)/. The union of two sets.
517 union :: IntSet -> IntSet -> IntSet
518 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
519 | shorter m1 m2 = union1
520 | shorter m2 m1 = union2
521 | p1 == p2 = Bin p1 m1 (union l1 l2) (union r1 r2)
522 | otherwise = link p1 t1 p2 t2
523 where
524 union1 | nomatch p2 p1 m1 = link p1 t1 p2 t2
525 | zero p2 m1 = Bin p1 m1 (union l1 t2) r1
526 | otherwise = Bin p1 m1 l1 (union r1 t2)
527
528 union2 | nomatch p1 p2 m2 = link p1 t1 p2 t2
529 | zero p1 m2 = Bin p2 m2 (union t1 l2) r2
530 | otherwise = Bin p2 m2 l2 (union t1 r2)
531
532 union t@(Bin _ _ _ _) (Tip kx bm) = insertBM kx bm t
533 union t@(Bin _ _ _ _) Nil = t
534 union (Tip kx bm) t = insertBM kx bm t
535 union Nil t = t
536
537
538 {--------------------------------------------------------------------
539 Difference
540 --------------------------------------------------------------------}
541 -- | /O(n+m)/. Difference between two sets.
542 difference :: IntSet -> IntSet -> IntSet
543 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
544 | shorter m1 m2 = difference1
545 | shorter m2 m1 = difference2
546 | p1 == p2 = bin p1 m1 (difference l1 l2) (difference r1 r2)
547 | otherwise = t1
548 where
549 difference1 | nomatch p2 p1 m1 = t1
550 | zero p2 m1 = bin p1 m1 (difference l1 t2) r1
551 | otherwise = bin p1 m1 l1 (difference r1 t2)
552
553 difference2 | nomatch p1 p2 m2 = t1
554 | zero p1 m2 = difference t1 l2
555 | otherwise = difference t1 r2
556
557 difference t@(Bin _ _ _ _) (Tip kx bm) = deleteBM kx bm t
558 difference t@(Bin _ _ _ _) Nil = t
559
560 difference t1@(Tip kx bm) t2 = differenceTip t2
561 where differenceTip (Bin p2 m2 l2 r2) | nomatch kx p2 m2 = t1
562 | zero kx m2 = differenceTip l2
563 | otherwise = differenceTip r2
564 differenceTip (Tip kx2 bm2) | kx == kx2 = tip kx (bm .&. complement bm2)
565 | otherwise = t1
566 differenceTip Nil = t1
567
568 difference Nil _ = Nil
569
570
571
572 {--------------------------------------------------------------------
573 Intersection
574 --------------------------------------------------------------------}
575 -- | /O(n+m)/. The intersection of two sets.
576 intersection :: IntSet -> IntSet -> IntSet
577 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
578 | shorter m1 m2 = intersection1
579 | shorter m2 m1 = intersection2
580 | p1 == p2 = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
581 | otherwise = Nil
582 where
583 intersection1 | nomatch p2 p1 m1 = Nil
584 | zero p2 m1 = intersection l1 t2
585 | otherwise = intersection r1 t2
586
587 intersection2 | nomatch p1 p2 m2 = Nil
588 | zero p1 m2 = intersection t1 l2
589 | otherwise = intersection t1 r2
590
591 intersection t1@(Bin _ _ _ _) (Tip kx2 bm2) = intersectBM t1
592 where intersectBM (Bin p1 m1 l1 r1) | nomatch kx2 p1 m1 = Nil
593 | zero kx2 m1 = intersectBM l1
594 | otherwise = intersectBM r1
595 intersectBM (Tip kx1 bm1) | kx1 == kx2 = tip kx1 (bm1 .&. bm2)
596 | otherwise = Nil
597 intersectBM Nil = Nil
598
599 intersection (Bin _ _ _ _) Nil = Nil
600
601 intersection (Tip kx1 bm1) t2 = intersectBM t2
602 where intersectBM (Bin p2 m2 l2 r2) | nomatch kx1 p2 m2 = Nil
603 | zero kx1 m2 = intersectBM l2
604 | otherwise = intersectBM r2
605 intersectBM (Tip kx2 bm2) | kx1 == kx2 = tip kx1 (bm1 .&. bm2)
606 | otherwise = Nil
607 intersectBM Nil = Nil
608
609 intersection Nil _ = Nil
610
611 {--------------------------------------------------------------------
612 Subset
613 --------------------------------------------------------------------}
614 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
615 isProperSubsetOf :: IntSet -> IntSet -> Bool
616 isProperSubsetOf t1 t2
617 = case subsetCmp t1 t2 of
618 LT -> True
619 _ -> False
620
621 subsetCmp :: IntSet -> IntSet -> Ordering
622 subsetCmp t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
623 | shorter m1 m2 = GT
624 | shorter m2 m1 = case subsetCmpLt of
625 GT -> GT
626 _ -> LT
627 | p1 == p2 = subsetCmpEq
628 | otherwise = GT -- disjoint
629 where
630 subsetCmpLt | nomatch p1 p2 m2 = GT
631 | zero p1 m2 = subsetCmp t1 l2
632 | otherwise = subsetCmp t1 r2
633 subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
634 (GT,_ ) -> GT
635 (_ ,GT) -> GT
636 (EQ,EQ) -> EQ
637 _ -> LT
638
639 subsetCmp (Bin _ _ _ _) _ = GT
640 subsetCmp (Tip kx1 bm1) (Tip kx2 bm2)
641 | kx1 /= kx2 = GT -- disjoint
642 | bm1 == bm2 = EQ
643 | bm1 .&. complement bm2 == 0 = LT
644 | otherwise = GT
645 subsetCmp t1@(Tip kx _) (Bin p m l r)
646 | nomatch kx p m = GT
647 | zero kx m = case subsetCmp t1 l of GT -> GT ; _ -> LT
648 | otherwise = case subsetCmp t1 r of GT -> GT ; _ -> LT
649 subsetCmp (Tip _ _) Nil = GT -- disjoint
650 subsetCmp Nil Nil = EQ
651 subsetCmp Nil _ = LT
652
653 -- | /O(n+m)/. Is this a subset?
654 -- @(s1 \`isSubsetOf\` s2)@ tells whether @s1@ is a subset of @s2@.
655
656 isSubsetOf :: IntSet -> IntSet -> Bool
657 isSubsetOf t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
658 | shorter m1 m2 = False
659 | shorter m2 m1 = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
660 else isSubsetOf t1 r2)
661 | otherwise = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
662 isSubsetOf (Bin _ _ _ _) _ = False
663 isSubsetOf (Tip kx1 bm1) (Tip kx2 bm2) = kx1 == kx2 && bm1 .&. complement bm2 == 0
664 isSubsetOf t1@(Tip kx _) (Bin p m l r)
665 | nomatch kx p m = False
666 | zero kx m = isSubsetOf t1 l
667 | otherwise = isSubsetOf t1 r
668 isSubsetOf (Tip _ _) Nil = False
669 isSubsetOf Nil _ = True
670
671
672 {--------------------------------------------------------------------
673 Disjoint
674 --------------------------------------------------------------------}
675 -- | /O(n+m)/. Check whether two sets are disjoint (i.e. their intersection
676 -- is empty).
677 --
678 -- > disjoint (fromList [2,4,6]) (fromList [1,3]) == True
679 -- > disjoint (fromList [2,4,6,8]) (fromList [2,3,5,7]) == False
680 -- > disjoint (fromList [1,2]) (fromList [1,2,3,4]) == False
681 -- > disjoint (fromList []) (fromList []) == True
682 --
683 -- @since 0.5.11
684 disjoint :: IntSet -> IntSet -> Bool
685 disjoint t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
686 | shorter m1 m2 = disjoint1
687 | shorter m2 m1 = disjoint2
688 | p1 == p2 = disjoint l1 l2 && disjoint r1 r2
689 | otherwise = True
690 where
691 disjoint1 | nomatch p2 p1 m1 = True
692 | zero p2 m1 = disjoint l1 t2
693 | otherwise = disjoint r1 t2
694
695 disjoint2 | nomatch p1 p2 m2 = True
696 | zero p1 m2 = disjoint t1 l2
697 | otherwise = disjoint t1 r2
698
699 disjoint t1@(Bin _ _ _ _) (Tip kx2 bm2) = disjointBM t1
700 where disjointBM (Bin p1 m1 l1 r1) | nomatch kx2 p1 m1 = True
701 | zero kx2 m1 = disjointBM l1
702 | otherwise = disjointBM r1
703 disjointBM (Tip kx1 bm1) | kx1 == kx2 = (bm1 .&. bm2) == 0
704 | otherwise = True
705 disjointBM Nil = True
706
707 disjoint (Bin _ _ _ _) Nil = True
708
709 disjoint (Tip kx1 bm1) t2 = disjointBM t2
710 where disjointBM (Bin p2 m2 l2 r2) | nomatch kx1 p2 m2 = True
711 | zero kx1 m2 = disjointBM l2
712 | otherwise = disjointBM r2
713 disjointBM (Tip kx2 bm2) | kx1 == kx2 = (bm1 .&. bm2) == 0
714 | otherwise = True
715 disjointBM Nil = True
716
717 disjoint Nil _ = True
718
719
720 {--------------------------------------------------------------------
721 Filter
722 --------------------------------------------------------------------}
723 -- | /O(n)/. Filter all elements that satisfy some predicate.
724 filter :: (Key -> Bool) -> IntSet -> IntSet
725 filter predicate t
726 = case t of
727 Bin p m l r
728 -> bin p m (filter predicate l) (filter predicate r)
729 Tip kx bm
730 -> tip kx (foldl'Bits 0 (bitPred kx) 0 bm)
731 Nil -> Nil
732 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi
733 | otherwise = bm
734 {-# INLINE bitPred #-}
735
736 -- | /O(n)/. partition the set according to some predicate.
737 partition :: (Key -> Bool) -> IntSet -> (IntSet,IntSet)
738 partition predicate0 t0 = toPair $ go predicate0 t0
739 where
740 go predicate t
741 = case t of
742 Bin p m l r
743 -> let (l1 :*: l2) = go predicate l
744 (r1 :*: r2) = go predicate r
745 in bin p m l1 r1 :*: bin p m l2 r2
746 Tip kx bm
747 -> let bm1 = foldl'Bits 0 (bitPred kx) 0 bm
748 in tip kx bm1 :*: tip kx (bm `xor` bm1)
749 Nil -> (Nil :*: Nil)
750 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi
751 | otherwise = bm
752 {-# INLINE bitPred #-}
753
754
755 -- | /O(min(n,W))/. The expression (@'split' x set@) is a pair @(set1,set2)@
756 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
757 -- comprises the elements of @set@ greater than @x@.
758 --
759 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [4,5])
760 split :: Key -> IntSet -> (IntSet,IntSet)
761 split x t =
762 case t of
763 Bin _ m l r
764 | m < 0 -> if x >= 0 -- handle negative numbers.
765 then case go x l of (lt :*: gt) -> let !lt' = union lt r
766 in (lt', gt)
767 else case go x r of (lt :*: gt) -> let !gt' = union gt l
768 in (lt, gt')
769 _ -> case go x t of
770 (lt :*: gt) -> (lt, gt)
771 where
772 go !x' t'@(Bin p m l r)
773 | match x' p m = if zero x' m
774 then case go x' l of
775 (lt :*: gt) -> lt :*: union gt r
776 else case go x' r of
777 (lt :*: gt) -> union lt l :*: gt
778 | otherwise = if x' < p then (Nil :*: t')
779 else (t' :*: Nil)
780 go x' t'@(Tip kx' bm)
781 | kx' > x' = (Nil :*: t')
782 -- equivalent to kx' > prefixOf x'
783 | kx' < prefixOf x' = (t' :*: Nil)
784 | otherwise = tip kx' (bm .&. lowerBitmap) :*: tip kx' (bm .&. higherBitmap)
785 where lowerBitmap = bitmapOf x' - 1
786 higherBitmap = complement (lowerBitmap + bitmapOf x')
787 go _ Nil = (Nil :*: Nil)
788
789 -- | /O(min(n,W))/. Performs a 'split' but also returns whether the pivot
790 -- element was found in the original set.
791 splitMember :: Key -> IntSet -> (IntSet,Bool,IntSet)
792 splitMember x t =
793 case t of
794 Bin _ m l r | m < 0 -> if x >= 0
795 then case go x l of
796 (lt, fnd, gt) -> let !lt' = union lt r
797 in (lt', fnd, gt)
798 else case go x r of
799 (lt, fnd, gt) -> let !gt' = union gt l
800 in (lt, fnd, gt')
801 _ -> go x t
802 where
803 go x' t'@(Bin p m l r)
804 | match x' p m = if zero x' m
805 then case go x' l of
806 (lt, fnd, gt) -> (lt, fnd, union gt r)
807 else case go x' r of
808 (lt, fnd, gt) -> (union lt l, fnd, gt)
809 | otherwise = if x' < p then (Nil, False, t') else (t', False, Nil)
810 go x' t'@(Tip kx' bm)
811 | kx' > x' = (Nil, False, t')
812 -- equivalent to kx' > prefixOf x'
813 | kx' < prefixOf x' = (t', False, Nil)
814 | otherwise = let !lt = tip kx' (bm .&. lowerBitmap)
815 !found = (bm .&. bitmapOfx') /= 0
816 !gt = tip kx' (bm .&. higherBitmap)
817 in (lt, found, gt)
818 where bitmapOfx' = bitmapOf x'
819 lowerBitmap = bitmapOfx' - 1
820 higherBitmap = complement (lowerBitmap + bitmapOfx')
821 go _ Nil = (Nil, False, Nil)
822
823 {----------------------------------------------------------------------
824 Min/Max
825 ----------------------------------------------------------------------}
826
827 -- | /O(min(n,W))/. Retrieves the maximal key of the set, and the set
828 -- stripped of that element, or 'Nothing' if passed an empty set.
829 maxView :: IntSet -> Maybe (Key, IntSet)
830 maxView t =
831 case t of Nil -> Nothing
832 Bin p m l r | m < 0 -> case go l of (result, l') -> Just (result, bin p m l' r)
833 _ -> Just (go t)
834 where
835 go (Bin p m l r) = case go r of (result, r') -> (result, bin p m l r')
836 go (Tip kx bm) = case highestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi)))
837 go Nil = error "maxView Nil"
838
839 -- | /O(min(n,W))/. Retrieves the minimal key of the set, and the set
840 -- stripped of that element, or 'Nothing' if passed an empty set.
841 minView :: IntSet -> Maybe (Key, IntSet)
842 minView t =
843 case t of Nil -> Nothing
844 Bin p m l r | m < 0 -> case go r of (result, r') -> Just (result, bin p m l r')
845 _ -> Just (go t)
846 where
847 go (Bin p m l r) = case go l of (result, l') -> (result, bin p m l' r)
848 go (Tip kx bm) = case lowestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi)))
849 go Nil = error "minView Nil"
850
851 -- | /O(min(n,W))/. Delete and find the minimal element.
852 --
853 -- > deleteFindMin set = (findMin set, deleteMin set)
854 deleteFindMin :: IntSet -> (Key, IntSet)
855 deleteFindMin = fromMaybe (error "deleteFindMin: empty set has no minimal element") . minView
856
857 -- | /O(min(n,W))/. Delete and find the maximal element.
858 --
859 -- > deleteFindMax set = (findMax set, deleteMax set)
860 deleteFindMax :: IntSet -> (Key, IntSet)
861 deleteFindMax = fromMaybe (error "deleteFindMax: empty set has no maximal element") . maxView
862
863
864 -- | /O(min(n,W))/. The minimal element of the set.
865 findMin :: IntSet -> Key
866 findMin Nil = error "findMin: empty set has no minimal element"
867 findMin (Tip kx bm) = kx + lowestBitSet bm
868 findMin (Bin _ m l r)
869 | m < 0 = find r
870 | otherwise = find l
871 where find (Tip kx bm) = kx + lowestBitSet bm
872 find (Bin _ _ l' _) = find l'
873 find Nil = error "findMin Nil"
874
875 -- | /O(min(n,W))/. The maximal element of a set.
876 findMax :: IntSet -> Key
877 findMax Nil = error "findMax: empty set has no maximal element"
878 findMax (Tip kx bm) = kx + highestBitSet bm
879 findMax (Bin _ m l r)
880 | m < 0 = find l
881 | otherwise = find r
882 where find (Tip kx bm) = kx + highestBitSet bm
883 find (Bin _ _ _ r') = find r'
884 find Nil = error "findMax Nil"
885
886
887 -- | /O(min(n,W))/. Delete the minimal element. Returns an empty set if the set is empty.
888 --
889 -- Note that this is a change of behaviour for consistency with 'Data.Set.Set' &#8211;
890 -- versions prior to 0.5 threw an error if the 'IntSet' was already empty.
891 deleteMin :: IntSet -> IntSet
892 deleteMin = maybe Nil snd . minView
893
894 -- | /O(min(n,W))/. Delete the maximal element. Returns an empty set if the set is empty.
895 --
896 -- Note that this is a change of behaviour for consistency with 'Data.Set.Set' &#8211;
897 -- versions prior to 0.5 threw an error if the 'IntSet' was already empty.
898 deleteMax :: IntSet -> IntSet
899 deleteMax = maybe Nil snd . maxView
900
901 {----------------------------------------------------------------------
902 Map
903 ----------------------------------------------------------------------}
904
905 -- | /O(n*min(n,W))/.
906 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
907 --
908 -- It's worth noting that the size of the result may be smaller if,
909 -- for some @(x,y)@, @x \/= y && f x == f y@
910
911 map :: (Key -> Key) -> IntSet -> IntSet
912 map f = fromList . List.map f . toList
913
914 {--------------------------------------------------------------------
915 Fold
916 --------------------------------------------------------------------}
917 -- | /O(n)/. Fold the elements in the set using the given right-associative
918 -- binary operator. This function is an equivalent of 'foldr' and is present
919 -- for compatibility only.
920 --
921 -- /Please note that fold will be deprecated in the future and removed./
922 fold :: (Key -> b -> b) -> b -> IntSet -> b
923 fold = foldr
924 {-# INLINE fold #-}
925
926 -- | /O(n)/. Fold the elements in the set using the given right-associative
927 -- binary operator, such that @'foldr' f z == 'Prelude.foldr' f z . 'toAscList'@.
928 --
929 -- For example,
930 --
931 -- > toAscList set = foldr (:) [] set
932 foldr :: (Key -> b -> b) -> b -> IntSet -> b
933 foldr f z = \t -> -- Use lambda t to be inlinable with two arguments only.
934 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before
935 | otherwise -> go (go z r) l
936 _ -> go z t
937 where
938 go z' Nil = z'
939 go z' (Tip kx bm) = foldrBits kx f z' bm
940 go z' (Bin _ _ l r) = go (go z' r) l
941 {-# INLINE foldr #-}
942
943 -- | /O(n)/. A strict version of 'foldr'. Each application of the operator is
944 -- evaluated before using the result in the next application. This
945 -- function is strict in the starting value.
946 foldr' :: (Key -> b -> b) -> b -> IntSet -> b
947 foldr' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
948 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before
949 | otherwise -> go (go z r) l
950 _ -> go z t
951 where
952 go !z' Nil = z'
953 go z' (Tip kx bm) = foldr'Bits kx f z' bm
954 go z' (Bin _ _ l r) = go (go z' r) l
955 {-# INLINE foldr' #-}
956
957 -- | /O(n)/. Fold the elements in the set using the given left-associative
958 -- binary operator, such that @'foldl' f z == 'Prelude.foldl' f z . 'toAscList'@.
959 --
960 -- For example,
961 --
962 -- > toDescList set = foldl (flip (:)) [] set
963 foldl :: (a -> Key -> a) -> a -> IntSet -> a
964 foldl f z = \t -> -- Use lambda t to be inlinable with two arguments only.
965 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before
966 | otherwise -> go (go z l) r
967 _ -> go z t
968 where
969 go z' Nil = z'
970 go z' (Tip kx bm) = foldlBits kx f z' bm
971 go z' (Bin _ _ l r) = go (go z' l) r
972 {-# INLINE foldl #-}
973
974 -- | /O(n)/. A strict version of 'foldl'. Each application of the operator is
975 -- evaluated before using the result in the next application. This
976 -- function is strict in the starting value.
977 foldl' :: (a -> Key -> a) -> a -> IntSet -> a
978 foldl' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
979 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before
980 | otherwise -> go (go z l) r
981 _ -> go z t
982 where
983 go !z' Nil = z'
984 go z' (Tip kx bm) = foldl'Bits kx f z' bm
985 go z' (Bin _ _ l r) = go (go z' l) r
986 {-# INLINE foldl' #-}
987
988 {--------------------------------------------------------------------
989 List variations
990 --------------------------------------------------------------------}
991 -- | /O(n)/. An alias of 'toAscList'. The elements of a set in ascending order.
992 -- Subject to list fusion.
993 elems :: IntSet -> [Key]
994 elems
995 = toAscList
996
997 {--------------------------------------------------------------------
998 Lists
999 --------------------------------------------------------------------}
1000 #if __GLASGOW_HASKELL__ >= 708
1001 -- | @since 0.5.6.2
1002 instance GHCExts.IsList IntSet where
1003 type Item IntSet = Key
1004 fromList = fromList
1005 toList = toList
1006 #endif
1007
1008 -- | /O(n)/. Convert the set to a list of elements. Subject to list fusion.
1009 toList :: IntSet -> [Key]
1010 toList
1011 = toAscList
1012
1013 -- | /O(n)/. Convert the set to an ascending list of elements. Subject to list
1014 -- fusion.
1015 toAscList :: IntSet -> [Key]
1016 toAscList = foldr (:) []
1017
1018 -- | /O(n)/. Convert the set to a descending list of elements. Subject to list
1019 -- fusion.
1020 toDescList :: IntSet -> [Key]
1021 toDescList = foldl (flip (:)) []
1022
1023 -- List fusion for the list generating functions.
1024 #if __GLASGOW_HASKELL__
1025 -- The foldrFB and foldlFB are foldr and foldl equivalents, used for list fusion.
1026 -- They are important to convert unfused to{Asc,Desc}List back, see mapFB in prelude.
1027 foldrFB :: (Key -> b -> b) -> b -> IntSet -> b
1028 foldrFB = foldr
1029 {-# INLINE[0] foldrFB #-}
1030 foldlFB :: (a -> Key -> a) -> a -> IntSet -> a
1031 foldlFB = foldl
1032 {-# INLINE[0] foldlFB #-}
1033
1034 -- Inline elems and toList, so that we need to fuse only toAscList.
1035 {-# INLINE elems #-}
1036 {-# INLINE toList #-}
1037
1038 -- The fusion is enabled up to phase 2 included. If it does not succeed,
1039 -- convert in phase 1 the expanded to{Asc,Desc}List calls back to
1040 -- to{Asc,Desc}List. In phase 0, we inline fold{lr}FB (which were used in
1041 -- a list fusion, otherwise it would go away in phase 1), and let compiler do
1042 -- whatever it wants with to{Asc,Desc}List -- it was forbidden to inline it
1043 -- before phase 0, otherwise the fusion rules would not fire at all.
1044 {-# NOINLINE[0] toAscList #-}
1045 {-# NOINLINE[0] toDescList #-}
1046 {-# RULES "IntSet.toAscList" [~1] forall s . toAscList s = build (\c n -> foldrFB c n s) #-}
1047 {-# RULES "IntSet.toAscListBack" [1] foldrFB (:) [] = toAscList #-}
1048 {-# RULES "IntSet.toDescList" [~1] forall s . toDescList s = build (\c n -> foldlFB (\xs x -> c x xs) n s) #-}
1049 {-# RULES "IntSet.toDescListBack" [1] foldlFB (\xs x -> x : xs) [] = toDescList #-}
1050 #endif
1051
1052
1053 -- | /O(n*min(n,W))/. Create a set from a list of integers.
1054 fromList :: [Key] -> IntSet
1055 fromList xs
1056 = Foldable.foldl' ins empty xs
1057 where
1058 ins t x = insert x t
1059
1060 -- | /O(n)/. Build a set from an ascending list of elements.
1061 -- /The precondition (input list is ascending) is not checked./
1062 fromAscList :: [Key] -> IntSet
1063 fromAscList [] = Nil
1064 fromAscList (x0 : xs0) = fromDistinctAscList (combineEq x0 xs0)
1065 where
1066 combineEq x' [] = [x']
1067 combineEq x' (x:xs)
1068 | x==x' = combineEq x' xs
1069 | otherwise = x' : combineEq x xs
1070
1071 -- | /O(n)/. Build a set from an ascending list of distinct elements.
1072 -- /The precondition (input list is strictly ascending) is not checked./
1073 fromDistinctAscList :: [Key] -> IntSet
1074 fromDistinctAscList [] = Nil
1075 fromDistinctAscList (z0 : zs0) = work (prefixOf z0) (bitmapOf z0) zs0 Nada
1076 where
1077 -- 'work' accumulates all values that go into one tip, before passing this Tip
1078 -- to 'reduce'
1079 work kx bm [] stk = finish kx (Tip kx bm) stk
1080 work kx bm (z:zs) stk | kx == prefixOf z = work kx (bm .|. bitmapOf z) zs stk
1081 work kx bm (z:zs) stk = reduce z zs (branchMask z kx) kx (Tip kx bm) stk
1082
1083 reduce z zs _ px tx Nada = work (prefixOf z) (bitmapOf z) zs (Push px tx Nada)
1084 reduce z zs m px tx stk@(Push py ty stk') =
1085 let mxy = branchMask px py
1086 pxy = mask px mxy
1087 in if shorter m mxy
1088 then reduce z zs m pxy (Bin pxy mxy ty tx) stk'
1089 else work (prefixOf z) (bitmapOf z) zs (Push px tx stk)
1090
1091 finish _ t Nada = t
1092 finish px tx (Push py ty stk) = finish p (link py ty px tx) stk
1093 where m = branchMask px py
1094 p = mask px m
1095
1096 data Stack = Push {-# UNPACK #-} !Prefix !IntSet !Stack | Nada
1097
1098
1099 {--------------------------------------------------------------------
1100 Eq
1101 --------------------------------------------------------------------}
1102 instance Eq IntSet where
1103 t1 == t2 = equal t1 t2
1104 t1 /= t2 = nequal t1 t2
1105
1106 equal :: IntSet -> IntSet -> Bool
1107 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
1108 = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
1109 equal (Tip kx1 bm1) (Tip kx2 bm2)
1110 = kx1 == kx2 && bm1 == bm2
1111 equal Nil Nil = True
1112 equal _ _ = False
1113
1114 nequal :: IntSet -> IntSet -> Bool
1115 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
1116 = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
1117 nequal (Tip kx1 bm1) (Tip kx2 bm2)
1118 = kx1 /= kx2 || bm1 /= bm2
1119 nequal Nil Nil = False
1120 nequal _ _ = True
1121
1122 {--------------------------------------------------------------------
1123 Ord
1124 --------------------------------------------------------------------}
1125
1126 instance Ord IntSet where
1127 compare s1 s2 = compare (toAscList s1) (toAscList s2)
1128 -- tentative implementation. See if more efficient exists.
1129
1130 {--------------------------------------------------------------------
1131 Show
1132 --------------------------------------------------------------------}
1133 instance Show IntSet where
1134 showsPrec p xs = showParen (p > 10) $
1135 showString "fromList " . shows (toList xs)
1136
1137 {--------------------------------------------------------------------
1138 Read
1139 --------------------------------------------------------------------}
1140 instance Read IntSet where
1141 #ifdef __GLASGOW_HASKELL__
1142 readPrec = parens $ prec 10 $ do
1143 Ident "fromList" <- lexP
1144 xs <- readPrec
1145 return (fromList xs)
1146
1147 readListPrec = readListPrecDefault
1148 #else
1149 readsPrec p = readParen (p > 10) $ \ r -> do
1150 ("fromList",s) <- lex r
1151 (xs,t) <- reads s
1152 return (fromList xs,t)
1153 #endif
1154
1155 {--------------------------------------------------------------------
1156 Typeable
1157 --------------------------------------------------------------------}
1158
1159 INSTANCE_TYPEABLE0(IntSet)
1160
1161 {--------------------------------------------------------------------
1162 NFData
1163 --------------------------------------------------------------------}
1164
1165 -- The IntSet constructors consist only of strict fields of Ints and
1166 -- IntSets, thus the default NFData instance which evaluates to whnf
1167 -- should suffice
1168 instance NFData IntSet where rnf x = seq x ()
1169
1170 {--------------------------------------------------------------------
1171 Debugging
1172 --------------------------------------------------------------------}
1173 -- | /O(n)/. Show the tree that implements the set. The tree is shown
1174 -- in a compressed, hanging format.
1175 showTree :: IntSet -> String
1176 showTree s
1177 = showTreeWith True False s
1178
1179
1180 {- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows
1181 the tree that implements the set. If @hang@ is
1182 'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If
1183 @wide@ is 'True', an extra wide version is shown.
1184 -}
1185 showTreeWith :: Bool -> Bool -> IntSet -> String
1186 showTreeWith hang wide t
1187 | hang = (showsTreeHang wide [] t) ""
1188 | otherwise = (showsTree wide [] [] t) ""
1189
1190 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
1191 showsTree wide lbars rbars t
1192 = case t of
1193 Bin p m l r
1194 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
1195 showWide wide rbars .
1196 showsBars lbars . showString (showBin p m) . showString "\n" .
1197 showWide wide lbars .
1198 showsTree wide (withEmpty lbars) (withBar lbars) l
1199 Tip kx bm
1200 -> showsBars lbars . showString " " . shows kx . showString " + " .
1201 showsBitMap bm . showString "\n"
1202 Nil -> showsBars lbars . showString "|\n"
1203
1204 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
1205 showsTreeHang wide bars t
1206 = case t of
1207 Bin p m l r
1208 -> showsBars bars . showString (showBin p m) . showString "\n" .
1209 showWide wide bars .
1210 showsTreeHang wide (withBar bars) l .
1211 showWide wide bars .
1212 showsTreeHang wide (withEmpty bars) r
1213 Tip kx bm
1214 -> showsBars bars . showString " " . shows kx . showString " + " .
1215 showsBitMap bm . showString "\n"
1216 Nil -> showsBars bars . showString "|\n"
1217
1218 showBin :: Prefix -> Mask -> String
1219 showBin _ _
1220 = "*" -- ++ show (p,m)
1221
1222 showWide :: Bool -> [String] -> String -> String
1223 showWide wide bars
1224 | wide = showString (concat (reverse bars)) . showString "|\n"
1225 | otherwise = id
1226
1227 showsBars :: [String] -> ShowS
1228 showsBars [] = id
1229 showsBars bars = showString (concat (reverse (tail bars))) . showString node
1230
1231 showsBitMap :: Word -> ShowS
1232 showsBitMap = showString . showBitMap
1233
1234 showBitMap :: Word -> String
1235 showBitMap w = show $ foldrBits 0 (:) [] w
1236
1237 node :: String
1238 node = "+--"
1239
1240 withBar, withEmpty :: [String] -> [String]
1241 withBar bars = "| ":bars
1242 withEmpty bars = " ":bars
1243
1244
1245 {--------------------------------------------------------------------
1246 Helpers
1247 --------------------------------------------------------------------}
1248 {--------------------------------------------------------------------
1249 Link
1250 --------------------------------------------------------------------}
1251 link :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
1252 link p1 t1 p2 t2
1253 | zero p1 m = Bin p m t1 t2
1254 | otherwise = Bin p m t2 t1
1255 where
1256 m = branchMask p1 p2
1257 p = mask p1 m
1258 {-# INLINE link #-}
1259
1260 {--------------------------------------------------------------------
1261 @bin@ assures that we never have empty trees within a tree.
1262 --------------------------------------------------------------------}
1263 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
1264 bin _ _ l Nil = l
1265 bin _ _ Nil r = r
1266 bin p m l r = Bin p m l r
1267 {-# INLINE bin #-}
1268
1269 {--------------------------------------------------------------------
1270 @tip@ assures that we never have empty bitmaps within a tree.
1271 --------------------------------------------------------------------}
1272 tip :: Prefix -> BitMap -> IntSet
1273 tip _ 0 = Nil
1274 tip kx bm = Tip kx bm
1275 {-# INLINE tip #-}
1276
1277
1278 {----------------------------------------------------------------------
1279 Functions that generate Prefix and BitMap of a Key or a Suffix.
1280 ----------------------------------------------------------------------}
1281
1282 suffixBitMask :: Int
1283 #if MIN_VERSION_base(4,7,0)
1284 suffixBitMask = finiteBitSize (undefined::Word) - 1
1285 #else
1286 suffixBitMask = bitSize (undefined::Word) - 1
1287 #endif
1288 {-# INLINE suffixBitMask #-}
1289
1290 prefixBitMask :: Int
1291 prefixBitMask = complement suffixBitMask
1292 {-# INLINE prefixBitMask #-}
1293
1294 prefixOf :: Int -> Prefix
1295 prefixOf x = x .&. prefixBitMask
1296 {-# INLINE prefixOf #-}
1297
1298 suffixOf :: Int -> Int
1299 suffixOf x = x .&. suffixBitMask
1300 {-# INLINE suffixOf #-}
1301
1302 bitmapOfSuffix :: Int -> BitMap
1303 bitmapOfSuffix s = 1 `shiftLL` s
1304 {-# INLINE bitmapOfSuffix #-}
1305
1306 bitmapOf :: Int -> BitMap
1307 bitmapOf x = bitmapOfSuffix (suffixOf x)
1308 {-# INLINE bitmapOf #-}
1309
1310
1311 {--------------------------------------------------------------------
1312 Endian independent bit twiddling
1313 --------------------------------------------------------------------}
1314 -- Returns True iff the bits set in i and the Mask m are disjoint.
1315 zero :: Int -> Mask -> Bool
1316 zero i m
1317 = (natFromInt i) .&. (natFromInt m) == 0
1318 {-# INLINE zero #-}
1319
1320 nomatch,match :: Int -> Prefix -> Mask -> Bool
1321 nomatch i p m
1322 = (mask i m) /= p
1323 {-# INLINE nomatch #-}
1324
1325 match i p m
1326 = (mask i m) == p
1327 {-# INLINE match #-}
1328
1329 -- Suppose a is largest such that 2^a divides 2*m.
1330 -- Then mask i m is i with the low a bits zeroed out.
1331 mask :: Int -> Mask -> Prefix
1332 mask i m
1333 = maskW (natFromInt i) (natFromInt m)
1334 {-# INLINE mask #-}
1335
1336 {--------------------------------------------------------------------
1337 Big endian operations
1338 --------------------------------------------------------------------}
1339 maskW :: Nat -> Nat -> Prefix
1340 maskW i m
1341 = intFromNat (i .&. (complement (m-1) `xor` m))
1342 {-# INLINE maskW #-}
1343
1344 shorter :: Mask -> Mask -> Bool
1345 shorter m1 m2
1346 = (natFromInt m1) > (natFromInt m2)
1347 {-# INLINE shorter #-}
1348
1349 branchMask :: Prefix -> Prefix -> Mask
1350 branchMask p1 p2
1351 = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
1352 {-# INLINE branchMask #-}
1353
1354 {----------------------------------------------------------------------
1355 To get best performance, we provide fast implementations of
1356 lowestBitSet, highestBitSet and fold[lr][l]Bits for GHC.
1357 If the intel bsf and bsr instructions ever become GHC primops,
1358 this code should be reimplemented using these.
1359
1360 Performance of this code is crucial for folds, toList, filter, partition.
1361
1362 The signatures of methods in question are placed after this comment.
1363 ----------------------------------------------------------------------}
1364
1365 lowestBitSet :: Nat -> Int
1366 highestBitSet :: Nat -> Int
1367 foldlBits :: Int -> (a -> Int -> a) -> a -> Nat -> a
1368 foldl'Bits :: Int -> (a -> Int -> a) -> a -> Nat -> a
1369 foldrBits :: Int -> (Int -> a -> a) -> a -> Nat -> a
1370 foldr'Bits :: Int -> (Int -> a -> a) -> a -> Nat -> a
1371
1372 {-# INLINE lowestBitSet #-}
1373 {-# INLINE highestBitSet #-}
1374 {-# INLINE foldlBits #-}
1375 {-# INLINE foldl'Bits #-}
1376 {-# INLINE foldrBits #-}
1377 {-# INLINE foldr'Bits #-}
1378
1379 #if defined(__GLASGOW_HASKELL__) && (WORD_SIZE_IN_BITS==32 || WORD_SIZE_IN_BITS==64)
1380 {----------------------------------------------------------------------
1381 For lowestBitSet we use wordsize-dependant implementation based on
1382 multiplication and DeBrujn indeces, which was proposed by Edward Kmett
1383 <http://haskell.org/pipermail/libraries/2011-September/016749.html>
1384
1385 The core of this implementation is fast indexOfTheOnlyBit,
1386 which is given a Nat with exactly one bit set, and returns
1387 its index.
1388
1389 Lot of effort was put in these implementations, please benchmark carefully
1390 before changing this code.
1391 ----------------------------------------------------------------------}
1392
1393 indexOfTheOnlyBit :: Nat -> Int
1394 {-# INLINE indexOfTheOnlyBit #-}
1395 indexOfTheOnlyBit bitmask =
1396 I# (lsbArray `indexInt8OffAddr#` unboxInt (intFromNat ((bitmask * magic) `shiftRL` offset)))
1397 where unboxInt (I# i) = i
1398 #if WORD_SIZE_IN_BITS==32
1399 magic = 0x077CB531
1400 offset = 27
1401 !lsbArray = "\0\1\28\2\29\14\24\3\30\22\20\15\25\17\4\8\31\27\13\23\21\19\16\7\26\12\18\6\11\5\10\9"#
1402 #else
1403 magic = 0x07EDD5E59A4E28C2
1404 offset = 58
1405 !lsbArray = "\63\0\58\1\59\47\53\2\60\39\48\27\54\33\42\3\61\51\37\40\49\18\28\20\55\30\34\11\43\14\22\4\62\57\46\52\38\26\32\41\50\36\17\19\29\10\13\21\56\45\25\31\35\16\9\12\44\24\15\8\23\7\6\5"#
1406 #endif
1407 -- The lsbArray gets inlined to every call site of indexOfTheOnlyBit.
1408 -- That cannot be easily avoided, as GHC forbids top-level Addr# literal.
1409 -- One could go around that by supplying getLsbArray :: () -> Addr# marked
1410 -- as NOINLINE. But the code size of calling it and processing the result
1411 -- is 48B on 32-bit and 56B on 64-bit architectures -- so the 32B and 64B array
1412 -- is actually improvement on 32-bit and only a 8B size increase on 64-bit.
1413
1414 lowestBitMask :: Nat -> Nat
1415 lowestBitMask x = x .&. negate x
1416 {-# INLINE lowestBitMask #-}
1417
1418 -- Reverse the order of bits in the Nat.
1419 revNat :: Nat -> Nat
1420 #if WORD_SIZE_IN_BITS==32
1421 revNat x1 = case ((x1 `shiftRL` 1) .&. 0x55555555) .|. ((x1 .&. 0x55555555) `shiftLL` 1) of
1422 x2 -> case ((x2 `shiftRL` 2) .&. 0x33333333) .|. ((x2 .&. 0x33333333) `shiftLL` 2) of
1423 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F) `shiftLL` 4) of
1424 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF) .|. ((x4 .&. 0x00FF00FF) `shiftLL` 8) of
1425 x5 -> ( x5 `shiftRL` 16 ) .|. ( x5 `shiftLL` 16);
1426 #else
1427 revNat x1 = case ((x1 `shiftRL` 1) .&. 0x5555555555555555) .|. ((x1 .&. 0x5555555555555555) `shiftLL` 1) of
1428 x2 -> case ((x2 `shiftRL` 2) .&. 0x3333333333333333) .|. ((x2 .&. 0x3333333333333333) `shiftLL` 2) of
1429 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F0F0F0F0F) `shiftLL` 4) of
1430 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF00FF00FF) .|. ((x4 .&. 0x00FF00FF00FF00FF) `shiftLL` 8) of
1431 x5 -> case ((x5 `shiftRL` 16) .&. 0x0000FFFF0000FFFF) .|. ((x5 .&. 0x0000FFFF0000FFFF) `shiftLL` 16) of
1432 x6 -> ( x6 `shiftRL` 32 ) .|. ( x6 `shiftLL` 32);
1433 #endif
1434
1435 lowestBitSet x = indexOfTheOnlyBit (lowestBitMask x)
1436
1437 highestBitSet x = indexOfTheOnlyBit (highestBitMask x)
1438
1439 foldlBits prefix f z bitmap = go bitmap z
1440 where go 0 acc = acc
1441 go bm acc = go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1442 where
1443 !bitmask = lowestBitMask bm
1444 !bi = indexOfTheOnlyBit bitmask
1445
1446 foldl'Bits prefix f z bitmap = go bitmap z
1447 where go 0 acc = acc
1448 go bm !acc = go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1449 where !bitmask = lowestBitMask bm
1450 !bi = indexOfTheOnlyBit bitmask
1451
1452 foldrBits prefix f z bitmap = go (revNat bitmap) z
1453 where go 0 acc = acc
1454 go bm acc = go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1455 where !bitmask = lowestBitMask bm
1456 !bi = indexOfTheOnlyBit bitmask
1457
1458
1459 foldr'Bits prefix f z bitmap = go (revNat bitmap) z
1460 where go 0 acc = acc
1461 go bm !acc = go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1462 where !bitmask = lowestBitMask bm
1463 !bi = indexOfTheOnlyBit bitmask
1464
1465 #else
1466 {----------------------------------------------------------------------
1467 In general case we use logarithmic implementation of
1468 lowestBitSet and highestBitSet, which works up to bit sizes of 64.
1469
1470 Folds are linear scans.
1471 ----------------------------------------------------------------------}
1472
1473 lowestBitSet n0 =
1474 let (n1,b1) = if n0 .&. 0xFFFFFFFF /= 0 then (n0,0) else (n0 `shiftRL` 32, 32)
1475 (n2,b2) = if n1 .&. 0xFFFF /= 0 then (n1,b1) else (n1 `shiftRL` 16, 16+b1)
1476 (n3,b3) = if n2 .&. 0xFF /= 0 then (n2,b2) else (n2 `shiftRL` 8, 8+b2)
1477 (n4,b4) = if n3 .&. 0xF /= 0 then (n3,b3) else (n3 `shiftRL` 4, 4+b3)
1478 (n5,b5) = if n4 .&. 0x3 /= 0 then (n4,b4) else (n4 `shiftRL` 2, 2+b4)
1479 b6 = if n5 .&. 0x1 /= 0 then b5 else 1+b5
1480 in b6
1481
1482 highestBitSet n0 =
1483 let (n1,b1) = if n0 .&. 0xFFFFFFFF00000000 /= 0 then (n0 `shiftRL` 32, 32) else (n0,0)
1484 (n2,b2) = if n1 .&. 0xFFFF0000 /= 0 then (n1 `shiftRL` 16, 16+b1) else (n1,b1)
1485 (n3,b3) = if n2 .&. 0xFF00 /= 0 then (n2 `shiftRL` 8, 8+b2) else (n2,b2)
1486 (n4,b4) = if n3 .&. 0xF0 /= 0 then (n3 `shiftRL` 4, 4+b3) else (n3,b3)
1487 (n5,b5) = if n4 .&. 0xC /= 0 then (n4 `shiftRL` 2, 2+b4) else (n4,b4)
1488 b6 = if n5 .&. 0x2 /= 0 then 1+b5 else b5
1489 in b6
1490
1491 foldlBits prefix f z bm = let lb = lowestBitSet bm
1492 in go (prefix+lb) z (bm `shiftRL` lb)
1493 where go !_ acc 0 = acc
1494 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1)
1495 | otherwise = go (bi + 1) acc (n `shiftRL` 1)
1496
1497 foldl'Bits prefix f z bm = let lb = lowestBitSet bm
1498 in go (prefix+lb) z (bm `shiftRL` lb)
1499 where go !_ !acc 0 = acc
1500 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1)
1501 | otherwise = go (bi + 1) acc (n `shiftRL` 1)
1502
1503 foldrBits prefix f z bm = let lb = lowestBitSet bm
1504 in go (prefix+lb) (bm `shiftRL` lb)
1505 where go !_ 0 = z
1506 go bi n | n `testBit` 0 = f bi (go (bi + 1) (n `shiftRL` 1))
1507 | otherwise = go (bi + 1) (n `shiftRL` 1)
1508
1509 foldr'Bits prefix f z bm = let lb = lowestBitSet bm
1510 in go (prefix+lb) (bm `shiftRL` lb)
1511 where
1512 go !_ 0 = z
1513 go bi n | n `testBit` 0 = f bi $! go (bi + 1) (n `shiftRL` 1)
1514 | otherwise = go (bi + 1) (n `shiftRL` 1)
1515
1516 #endif
1517
1518
1519 {--------------------------------------------------------------------
1520 Utilities
1521 --------------------------------------------------------------------}
1522
1523 -- | /O(1)/. Decompose a set into pieces based on the structure of the underlying
1524 -- tree. This function is useful for consuming a set in parallel.
1525 --
1526 -- No guarantee is made as to the sizes of the pieces; an internal, but
1527 -- deterministic process determines this. However, it is guaranteed that the
1528 -- pieces returned will be in ascending order (all elements in the first submap
1529 -- less than all elements in the second, and so on).
1530 --
1531 -- Examples:
1532 --
1533 -- > splitRoot (fromList [1..120]) == [fromList [1..63],fromList [64..120]]
1534 -- > splitRoot empty == []
1535 --
1536 -- Note that the current implementation does not return more than two subsets,
1537 -- but you should not depend on this behaviour because it can change in the
1538 -- future without notice. Also, the current version does not continue
1539 -- splitting all the way to individual singleton sets -- it stops at some
1540 -- point.
1541 splitRoot :: IntSet -> [IntSet]
1542 splitRoot Nil = []
1543 -- NOTE: we don't currently split below Tip, but we could.
1544 splitRoot x@(Tip _ _) = [x]
1545 splitRoot (Bin _ m l r) | m < 0 = [r, l]
1546 | otherwise = [l, r]
1547 {-# INLINE splitRoot #-}