remove foldlStrict, generalize type of unions, see #520 (#524)
[packages/containers.git] / Data / IntSet / Internal.hs
1 {-# LANGUAGE CPP #-}
2 {-# LANGUAGE BangPatterns #-}
3 #if __GLASGOW_HASKELL__
4 {-# LANGUAGE MagicHash, DeriveDataTypeable, StandaloneDeriving #-}
5 #endif
6 #if !defined(TESTING) && __GLASGOW_HASKELL__ >= 703
7 {-# LANGUAGE Trustworthy #-}
8 #endif
9 #if __GLASGOW_HASKELL__ >= 708
10 {-# LANGUAGE TypeFamilies #-}
11 #endif
12
13 {-# OPTIONS_HADDOCK not-home #-}
14
15 #include "containers.h"
16
17 -----------------------------------------------------------------------------
18 -- |
19 -- Module : Data.IntSet.Internal
20 -- Copyright : (c) Daan Leijen 2002
21 -- (c) Joachim Breitner 2011
22 -- License : BSD-style
23 -- Maintainer : libraries@haskell.org
24 -- Portability : portable
25 --
26 -- = WARNING
27 --
28 -- This module is considered __internal__.
29 --
30 -- The Package Versioning Policy __does not apply__.
31 --
32 -- This contents of this module may change __in any way whatsoever__
33 -- and __without any warning__ between minor versions of this package.
34 --
35 -- Authors importing this module are expected to track development
36 -- closely.
37 --
38 -- = Description
39 --
40 -- An efficient implementation of integer sets.
41 --
42 -- These modules are intended to be imported qualified, to avoid name
43 -- clashes with Prelude functions, e.g.
44 --
45 -- > import Data.IntSet (IntSet)
46 -- > import qualified Data.IntSet as IntSet
47 --
48 -- The implementation is based on /big-endian patricia trees/. This data
49 -- structure performs especially well on binary operations like 'union'
50 -- and 'intersection'. However, my benchmarks show that it is also
51 -- (much) faster on insertions and deletions when compared to a generic
52 -- size-balanced set implementation (see "Data.Set").
53 --
54 -- * Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\",
55 -- Workshop on ML, September 1998, pages 77-86,
56 -- <http://citeseer.ist.psu.edu/okasaki98fast.html>
57 --
58 -- * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
59 -- Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
60 -- October 1968, pages 514-534.
61 --
62 -- Additionally, this implementation places bitmaps in the leaves of the tree.
63 -- Their size is the natural size of a machine word (32 or 64 bits) and greatly
64 -- reduce memory footprint and execution times for dense sets, e.g. sets where
65 -- it is likely that many values lie close to each other. The asymptotics are
66 -- not affected by this optimization.
67 --
68 -- Many operations have a worst-case complexity of /O(min(n,W))/.
69 -- This means that the operation can become linear in the number of
70 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
71 -- (32 or 64).
72 --
73 -- @since 0.5.9
74 -----------------------------------------------------------------------------
75
76 -- [Note: INLINE bit fiddling]
77 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
78 -- It is essential that the bit fiddling functions like mask, zero, branchMask
79 -- etc are inlined. If they do not, the memory allocation skyrockets. The GHC
80 -- usually gets it right, but it is disastrous if it does not. Therefore we
81 -- explicitly mark these functions INLINE.
82
83
84 -- [Note: Local 'go' functions and capturing]
85 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
86 -- Care must be taken when using 'go' function which captures an argument.
87 -- Sometimes (for example when the argument is passed to a data constructor,
88 -- as in insert), GHC heap-allocates more than necessary. Therefore C-- code
89 -- must be checked for increased allocation when creating and modifying such
90 -- functions.
91
92
93 -- [Note: Order of constructors]
94 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
95 -- The order of constructors of IntSet matters when considering performance.
96 -- Currently in GHC 7.0, when type has 3 constructors, they are matched from
97 -- the first to the last -- the best performance is achieved when the
98 -- constructors are ordered by frequency.
99 -- On GHC 7.0, reordering constructors from Nil | Tip | Bin to Bin | Tip | Nil
100 -- improves the benchmark by circa 10%.
101
102 module Data.IntSet.Internal (
103 -- * Set type
104 IntSet(..), Key -- instance Eq,Show
105 , Prefix, Mask, BitMap
106
107 -- * Operators
108 , (\\)
109
110 -- * Query
111 , null
112 , size
113 , member
114 , notMember
115 , lookupLT
116 , lookupGT
117 , lookupLE
118 , lookupGE
119 , isSubsetOf
120 , isProperSubsetOf
121 , disjoint
122
123 -- * Construction
124 , empty
125 , singleton
126 , insert
127 , delete
128
129 -- * Combine
130 , union
131 , unions
132 , difference
133 , intersection
134
135 -- * Filter
136 , filter
137 , partition
138 , split
139 , splitMember
140 , splitRoot
141
142 -- * Map
143 , map
144
145 -- * Folds
146 , foldr
147 , foldl
148 -- ** Strict folds
149 , foldr'
150 , foldl'
151 -- ** Legacy folds
152 , fold
153
154 -- * Min\/Max
155 , findMin
156 , findMax
157 , deleteMin
158 , deleteMax
159 , deleteFindMin
160 , deleteFindMax
161 , maxView
162 , minView
163
164 -- * Conversion
165
166 -- ** List
167 , elems
168 , toList
169 , fromList
170
171 -- ** Ordered list
172 , toAscList
173 , toDescList
174 , fromAscList
175 , fromDistinctAscList
176
177 -- * Debugging
178 , showTree
179 , showTreeWith
180
181 -- * Internals
182 , match
183 , suffixBitMask
184 , prefixBitMask
185 , bitmapOf
186 , zero
187 ) where
188
189 import Control.DeepSeq (NFData(rnf))
190 import Data.Bits
191 import qualified Data.List as List
192 import Data.Maybe (fromMaybe)
193 #if !MIN_VERSION_base(4,8,0)
194 import Data.Monoid (Monoid(..))
195 import Data.Word (Word)
196 #endif
197 #if MIN_VERSION_base(4,9,0)
198 import Data.Semigroup (Semigroup((<>), stimes), stimesIdempotentMonoid)
199 #endif
200 import Data.Typeable
201 import Prelude hiding (filter, foldr, foldl, null, map)
202
203 import Utils.Containers.Internal.BitUtil
204 import Utils.Containers.Internal.StrictPair
205
206 #if __GLASGOW_HASKELL__
207 import Data.Data (Data(..), Constr, mkConstr, constrIndex, Fixity(Prefix), DataType, mkDataType)
208 import Text.Read
209 #endif
210
211 #if __GLASGOW_HASKELL__
212 import GHC.Exts (Int(..), build)
213 #if __GLASGOW_HASKELL__ >= 708
214 import qualified GHC.Exts as GHCExts
215 #endif
216 import GHC.Prim (indexInt8OffAddr#)
217 #endif
218
219 import qualified Data.Foldable as Foldable
220 import Data.Foldable (Foldable())
221
222 infixl 9 \\{-This comment teaches CPP correct behaviour -}
223
224 -- A "Nat" is a natural machine word (an unsigned Int)
225 type Nat = Word
226
227 natFromInt :: Int -> Nat
228 natFromInt i = fromIntegral i
229 {-# INLINE natFromInt #-}
230
231 intFromNat :: Nat -> Int
232 intFromNat w = fromIntegral w
233 {-# INLINE intFromNat #-}
234
235 {--------------------------------------------------------------------
236 Operators
237 --------------------------------------------------------------------}
238 -- | /O(n+m)/. See 'difference'.
239 (\\) :: IntSet -> IntSet -> IntSet
240 m1 \\ m2 = difference m1 m2
241
242 {--------------------------------------------------------------------
243 Types
244 --------------------------------------------------------------------}
245
246 -- | A set of integers.
247
248 -- See Note: Order of constructors
249 data IntSet = Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
250 -- Invariant: Nil is never found as a child of Bin.
251 -- Invariant: The Mask is a power of 2. It is the largest bit position at which
252 -- two elements of the set differ.
253 -- Invariant: Prefix is the common high-order bits that all elements share to
254 -- the left of the Mask bit.
255 -- Invariant: In Bin prefix mask left right, left consists of the elements that
256 -- don't have the mask bit set; right is all the elements that do.
257 | Tip {-# UNPACK #-} !Prefix {-# UNPACK #-} !BitMap
258 -- Invariant: The Prefix is zero for the last 5 (on 32 bit arches) or 6 bits
259 -- (on 64 bit arches). The values of the set represented by a tip
260 -- are the prefix plus the indices of the set bits in the bit map.
261 | Nil
262
263 -- A number stored in a set is stored as
264 -- * Prefix (all but last 5-6 bits) and
265 -- * BitMap (last 5-6 bits stored as a bitmask)
266 -- Last 5-6 bits are called a Suffix.
267
268 type Prefix = Int
269 type Mask = Int
270 type BitMap = Word
271 type Key = Int
272
273 instance Monoid IntSet where
274 mempty = empty
275 mconcat = unions
276 #if !(MIN_VERSION_base(4,9,0))
277 mappend = union
278 #else
279 mappend = (<>)
280
281 -- | @since 0.5.7
282 instance Semigroup IntSet where
283 (<>) = union
284 stimes = stimesIdempotentMonoid
285 #endif
286
287 #if __GLASGOW_HASKELL__
288
289 {--------------------------------------------------------------------
290 A Data instance
291 --------------------------------------------------------------------}
292
293 -- This instance preserves data abstraction at the cost of inefficiency.
294 -- We provide limited reflection services for the sake of data abstraction.
295
296 instance Data IntSet where
297 gfoldl f z is = z fromList `f` (toList is)
298 toConstr _ = fromListConstr
299 gunfold k z c = case constrIndex c of
300 1 -> k (z fromList)
301 _ -> error "gunfold"
302 dataTypeOf _ = intSetDataType
303
304 fromListConstr :: Constr
305 fromListConstr = mkConstr intSetDataType "fromList" [] Prefix
306
307 intSetDataType :: DataType
308 intSetDataType = mkDataType "Data.IntSet.Internal.IntSet" [fromListConstr]
309
310 #endif
311
312 {--------------------------------------------------------------------
313 Query
314 --------------------------------------------------------------------}
315 -- | /O(1)/. Is the set empty?
316 null :: IntSet -> Bool
317 null Nil = True
318 null _ = False
319 {-# INLINE null #-}
320
321 -- | /O(n)/. Cardinality of the set.
322 size :: IntSet -> Int
323 size = go 0
324 where
325 go !acc (Bin _ _ l r) = go (go acc l) r
326 go acc (Tip _ bm) = acc + bitcount 0 bm
327 go acc Nil = acc
328
329 -- | /O(min(n,W))/. Is the value a member of the set?
330
331 -- See Note: Local 'go' functions and capturing.
332 member :: Key -> IntSet -> Bool
333 member !x = go
334 where
335 go (Bin p m l r)
336 | nomatch x p m = False
337 | zero x m = go l
338 | otherwise = go r
339 go (Tip y bm) = prefixOf x == y && bitmapOf x .&. bm /= 0
340 go Nil = False
341
342 -- | /O(min(n,W))/. Is the element not in the set?
343 notMember :: Key -> IntSet -> Bool
344 notMember k = not . member k
345
346 -- | /O(log n)/. Find largest element smaller than the given one.
347 --
348 -- > lookupLT 3 (fromList [3, 5]) == Nothing
349 -- > lookupLT 5 (fromList [3, 5]) == Just 3
350
351 -- See Note: Local 'go' functions and capturing.
352 lookupLT :: Key -> IntSet -> Maybe Key
353 lookupLT !x t = case t of
354 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r
355 _ -> go Nil t
356 where
357 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r
358 | zero x m = go def l
359 | otherwise = go l r
360 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm
361 | prefixOf x == kx && maskLT /= 0 = Just $ kx + highestBitSet maskLT
362 | otherwise = unsafeFindMax def
363 where maskLT = (bitmapOf x - 1) .&. bm
364 go def Nil = unsafeFindMax def
365
366
367 -- | /O(log n)/. Find smallest element greater than the given one.
368 --
369 -- > lookupGT 4 (fromList [3, 5]) == Just 5
370 -- > lookupGT 5 (fromList [3, 5]) == Nothing
371
372 -- See Note: Local 'go' functions and capturing.
373 lookupGT :: Key -> IntSet -> Maybe Key
374 lookupGT !x t = case t of
375 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r
376 _ -> go Nil t
377 where
378 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def
379 | zero x m = go r l
380 | otherwise = go def r
381 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm
382 | prefixOf x == kx && maskGT /= 0 = Just $ kx + lowestBitSet maskGT
383 | otherwise = unsafeFindMin def
384 where maskGT = (- ((bitmapOf x) `shiftLL` 1)) .&. bm
385 go def Nil = unsafeFindMin def
386
387
388 -- | /O(log n)/. Find largest element smaller or equal to the given one.
389 --
390 -- > lookupLE 2 (fromList [3, 5]) == Nothing
391 -- > lookupLE 4 (fromList [3, 5]) == Just 3
392 -- > lookupLE 5 (fromList [3, 5]) == Just 5
393
394 -- See Note: Local 'go' functions and capturing.
395 lookupLE :: Key -> IntSet -> Maybe Key
396 lookupLE !x t = case t of
397 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r
398 _ -> go Nil t
399 where
400 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r
401 | zero x m = go def l
402 | otherwise = go l r
403 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm
404 | prefixOf x == kx && maskLE /= 0 = Just $ kx + highestBitSet maskLE
405 | otherwise = unsafeFindMax def
406 where maskLE = (((bitmapOf x) `shiftLL` 1) - 1) .&. bm
407 go def Nil = unsafeFindMax def
408
409
410 -- | /O(log n)/. Find smallest element greater or equal to the given one.
411 --
412 -- > lookupGE 3 (fromList [3, 5]) == Just 3
413 -- > lookupGE 4 (fromList [3, 5]) == Just 5
414 -- > lookupGE 6 (fromList [3, 5]) == Nothing
415
416 -- See Note: Local 'go' functions and capturing.
417 lookupGE :: Key -> IntSet -> Maybe Key
418 lookupGE !x t = case t of
419 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r
420 _ -> go Nil t
421 where
422 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def
423 | zero x m = go r l
424 | otherwise = go def r
425 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm
426 | prefixOf x == kx && maskGE /= 0 = Just $ kx + lowestBitSet maskGE
427 | otherwise = unsafeFindMin def
428 where maskGE = (- (bitmapOf x)) .&. bm
429 go def Nil = unsafeFindMin def
430
431
432
433 -- Helper function for lookupGE and lookupGT. It assumes that if a Bin node is
434 -- given, it has m > 0.
435 unsafeFindMin :: IntSet -> Maybe Key
436 unsafeFindMin Nil = Nothing
437 unsafeFindMin (Tip kx bm) = Just $ kx + lowestBitSet bm
438 unsafeFindMin (Bin _ _ l _) = unsafeFindMin l
439
440 -- Helper function for lookupLE and lookupLT. It assumes that if a Bin node is
441 -- given, it has m > 0.
442 unsafeFindMax :: IntSet -> Maybe Key
443 unsafeFindMax Nil = Nothing
444 unsafeFindMax (Tip kx bm) = Just $ kx + highestBitSet bm
445 unsafeFindMax (Bin _ _ _ r) = unsafeFindMax r
446
447 {--------------------------------------------------------------------
448 Construction
449 --------------------------------------------------------------------}
450 -- | /O(1)/. The empty set.
451 empty :: IntSet
452 empty
453 = Nil
454 {-# INLINE empty #-}
455
456 -- | /O(1)/. A set of one element.
457 singleton :: Key -> IntSet
458 singleton x
459 = Tip (prefixOf x) (bitmapOf x)
460 {-# INLINE singleton #-}
461
462 {--------------------------------------------------------------------
463 Insert
464 --------------------------------------------------------------------}
465 -- | /O(min(n,W))/. Add a value to the set. There is no left- or right bias for
466 -- IntSets.
467 insert :: Key -> IntSet -> IntSet
468 insert !x = insertBM (prefixOf x) (bitmapOf x)
469
470 -- Helper function for insert and union.
471 insertBM :: Prefix -> BitMap -> IntSet -> IntSet
472 insertBM !kx !bm t@(Bin p m l r)
473 | nomatch kx p m = link kx (Tip kx bm) p t
474 | zero kx m = Bin p m (insertBM kx bm l) r
475 | otherwise = Bin p m l (insertBM kx bm r)
476 insertBM kx bm t@(Tip kx' bm')
477 | kx' == kx = Tip kx' (bm .|. bm')
478 | otherwise = link kx (Tip kx bm) kx' t
479 insertBM kx bm Nil = Tip kx bm
480
481 -- | /O(min(n,W))/. Delete a value in the set. Returns the
482 -- original set when the value was not present.
483 delete :: Key -> IntSet -> IntSet
484 delete !x = deleteBM (prefixOf x) (bitmapOf x)
485
486 -- Deletes all values mentioned in the BitMap from the set.
487 -- Helper function for delete and difference.
488 deleteBM :: Prefix -> BitMap -> IntSet -> IntSet
489 deleteBM !kx !bm t@(Bin p m l r)
490 | nomatch kx p m = t
491 | zero kx m = bin p m (deleteBM kx bm l) r
492 | otherwise = bin p m l (deleteBM kx bm r)
493 deleteBM kx bm t@(Tip kx' bm')
494 | kx' == kx = tip kx (bm' .&. complement bm)
495 | otherwise = t
496 deleteBM _ _ Nil = Nil
497
498
499 {--------------------------------------------------------------------
500 Union
501 --------------------------------------------------------------------}
502 -- | The union of a list of sets.
503 unions :: Foldable f => f IntSet -> IntSet
504 unions xs
505 = Foldable.foldl' union empty xs
506
507
508 -- | /O(n+m)/. The union of two sets.
509 union :: IntSet -> IntSet -> IntSet
510 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
511 | shorter m1 m2 = union1
512 | shorter m2 m1 = union2
513 | p1 == p2 = Bin p1 m1 (union l1 l2) (union r1 r2)
514 | otherwise = link p1 t1 p2 t2
515 where
516 union1 | nomatch p2 p1 m1 = link p1 t1 p2 t2
517 | zero p2 m1 = Bin p1 m1 (union l1 t2) r1
518 | otherwise = Bin p1 m1 l1 (union r1 t2)
519
520 union2 | nomatch p1 p2 m2 = link p1 t1 p2 t2
521 | zero p1 m2 = Bin p2 m2 (union t1 l2) r2
522 | otherwise = Bin p2 m2 l2 (union t1 r2)
523
524 union t@(Bin _ _ _ _) (Tip kx bm) = insertBM kx bm t
525 union t@(Bin _ _ _ _) Nil = t
526 union (Tip kx bm) t = insertBM kx bm t
527 union Nil t = t
528
529
530 {--------------------------------------------------------------------
531 Difference
532 --------------------------------------------------------------------}
533 -- | /O(n+m)/. Difference between two sets.
534 difference :: IntSet -> IntSet -> IntSet
535 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
536 | shorter m1 m2 = difference1
537 | shorter m2 m1 = difference2
538 | p1 == p2 = bin p1 m1 (difference l1 l2) (difference r1 r2)
539 | otherwise = t1
540 where
541 difference1 | nomatch p2 p1 m1 = t1
542 | zero p2 m1 = bin p1 m1 (difference l1 t2) r1
543 | otherwise = bin p1 m1 l1 (difference r1 t2)
544
545 difference2 | nomatch p1 p2 m2 = t1
546 | zero p1 m2 = difference t1 l2
547 | otherwise = difference t1 r2
548
549 difference t@(Bin _ _ _ _) (Tip kx bm) = deleteBM kx bm t
550 difference t@(Bin _ _ _ _) Nil = t
551
552 difference t1@(Tip kx bm) t2 = differenceTip t2
553 where differenceTip (Bin p2 m2 l2 r2) | nomatch kx p2 m2 = t1
554 | zero kx m2 = differenceTip l2
555 | otherwise = differenceTip r2
556 differenceTip (Tip kx2 bm2) | kx == kx2 = tip kx (bm .&. complement bm2)
557 | otherwise = t1
558 differenceTip Nil = t1
559
560 difference Nil _ = Nil
561
562
563
564 {--------------------------------------------------------------------
565 Intersection
566 --------------------------------------------------------------------}
567 -- | /O(n+m)/. The intersection of two sets.
568 intersection :: IntSet -> IntSet -> IntSet
569 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
570 | shorter m1 m2 = intersection1
571 | shorter m2 m1 = intersection2
572 | p1 == p2 = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
573 | otherwise = Nil
574 where
575 intersection1 | nomatch p2 p1 m1 = Nil
576 | zero p2 m1 = intersection l1 t2
577 | otherwise = intersection r1 t2
578
579 intersection2 | nomatch p1 p2 m2 = Nil
580 | zero p1 m2 = intersection t1 l2
581 | otherwise = intersection t1 r2
582
583 intersection t1@(Bin _ _ _ _) (Tip kx2 bm2) = intersectBM t1
584 where intersectBM (Bin p1 m1 l1 r1) | nomatch kx2 p1 m1 = Nil
585 | zero kx2 m1 = intersectBM l1
586 | otherwise = intersectBM r1
587 intersectBM (Tip kx1 bm1) | kx1 == kx2 = tip kx1 (bm1 .&. bm2)
588 | otherwise = Nil
589 intersectBM Nil = Nil
590
591 intersection (Bin _ _ _ _) Nil = Nil
592
593 intersection (Tip kx1 bm1) t2 = intersectBM t2
594 where intersectBM (Bin p2 m2 l2 r2) | nomatch kx1 p2 m2 = Nil
595 | zero kx1 m2 = intersectBM l2
596 | otherwise = intersectBM r2
597 intersectBM (Tip kx2 bm2) | kx1 == kx2 = tip kx1 (bm1 .&. bm2)
598 | otherwise = Nil
599 intersectBM Nil = Nil
600
601 intersection Nil _ = Nil
602
603 {--------------------------------------------------------------------
604 Subset
605 --------------------------------------------------------------------}
606 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
607 isProperSubsetOf :: IntSet -> IntSet -> Bool
608 isProperSubsetOf t1 t2
609 = case subsetCmp t1 t2 of
610 LT -> True
611 _ -> False
612
613 subsetCmp :: IntSet -> IntSet -> Ordering
614 subsetCmp t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
615 | shorter m1 m2 = GT
616 | shorter m2 m1 = case subsetCmpLt of
617 GT -> GT
618 _ -> LT
619 | p1 == p2 = subsetCmpEq
620 | otherwise = GT -- disjoint
621 where
622 subsetCmpLt | nomatch p1 p2 m2 = GT
623 | zero p1 m2 = subsetCmp t1 l2
624 | otherwise = subsetCmp t1 r2
625 subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
626 (GT,_ ) -> GT
627 (_ ,GT) -> GT
628 (EQ,EQ) -> EQ
629 _ -> LT
630
631 subsetCmp (Bin _ _ _ _) _ = GT
632 subsetCmp (Tip kx1 bm1) (Tip kx2 bm2)
633 | kx1 /= kx2 = GT -- disjoint
634 | bm1 == bm2 = EQ
635 | bm1 .&. complement bm2 == 0 = LT
636 | otherwise = GT
637 subsetCmp t1@(Tip kx _) (Bin p m l r)
638 | nomatch kx p m = GT
639 | zero kx m = case subsetCmp t1 l of GT -> GT ; _ -> LT
640 | otherwise = case subsetCmp t1 r of GT -> GT ; _ -> LT
641 subsetCmp (Tip _ _) Nil = GT -- disjoint
642 subsetCmp Nil Nil = EQ
643 subsetCmp Nil _ = LT
644
645 -- | /O(n+m)/. Is this a subset?
646 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
647
648 isSubsetOf :: IntSet -> IntSet -> Bool
649 isSubsetOf t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
650 | shorter m1 m2 = False
651 | shorter m2 m1 = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
652 else isSubsetOf t1 r2)
653 | otherwise = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
654 isSubsetOf (Bin _ _ _ _) _ = False
655 isSubsetOf (Tip kx1 bm1) (Tip kx2 bm2) = kx1 == kx2 && bm1 .&. complement bm2 == 0
656 isSubsetOf t1@(Tip kx _) (Bin p m l r)
657 | nomatch kx p m = False
658 | zero kx m = isSubsetOf t1 l
659 | otherwise = isSubsetOf t1 r
660 isSubsetOf (Tip _ _) Nil = False
661 isSubsetOf Nil _ = True
662
663
664 {--------------------------------------------------------------------
665 Disjoint
666 --------------------------------------------------------------------}
667 -- | /O(n+m)/. Check whether two sets are disjoint (i.e. their intersection
668 -- is empty).
669 --
670 -- > disjoint (fromList [2,4,6]) (fromList [1,3]) == True
671 -- > disjoint (fromList [2,4,6,8]) (fromList [2,3,5,7]) == False
672 -- > disjoint (fromList [1,2]) (fromList [1,2,3,4]) == False
673 -- > disjoint (fromList []) (fromList []) == True
674 --
675 -- @since 0.5.11
676 disjoint :: IntSet -> IntSet -> Bool
677 disjoint t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
678 | shorter m1 m2 = disjoint1
679 | shorter m2 m1 = disjoint2
680 | p1 == p2 = disjoint l1 l2 && disjoint r1 r2
681 | otherwise = True
682 where
683 disjoint1 | nomatch p2 p1 m1 = True
684 | zero p2 m1 = disjoint l1 t2
685 | otherwise = disjoint r1 t2
686
687 disjoint2 | nomatch p1 p2 m2 = True
688 | zero p1 m2 = disjoint t1 l2
689 | otherwise = disjoint t1 r2
690
691 disjoint t1@(Bin _ _ _ _) (Tip kx2 bm2) = disjointBM t1
692 where disjointBM (Bin p1 m1 l1 r1) | nomatch kx2 p1 m1 = True
693 | zero kx2 m1 = disjointBM l1
694 | otherwise = disjointBM r1
695 disjointBM (Tip kx1 bm1) | kx1 == kx2 = (bm1 .&. bm2) == 0
696 | otherwise = True
697 disjointBM Nil = True
698
699 disjoint (Bin _ _ _ _) Nil = True
700
701 disjoint (Tip kx1 bm1) t2 = disjointBM t2
702 where disjointBM (Bin p2 m2 l2 r2) | nomatch kx1 p2 m2 = True
703 | zero kx1 m2 = disjointBM l2
704 | otherwise = disjointBM r2
705 disjointBM (Tip kx2 bm2) | kx1 == kx2 = (bm1 .&. bm2) == 0
706 | otherwise = True
707 disjointBM Nil = True
708
709 disjoint Nil _ = True
710
711
712 {--------------------------------------------------------------------
713 Filter
714 --------------------------------------------------------------------}
715 -- | /O(n)/. Filter all elements that satisfy some predicate.
716 filter :: (Key -> Bool) -> IntSet -> IntSet
717 filter predicate t
718 = case t of
719 Bin p m l r
720 -> bin p m (filter predicate l) (filter predicate r)
721 Tip kx bm
722 -> tip kx (foldl'Bits 0 (bitPred kx) 0 bm)
723 Nil -> Nil
724 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi
725 | otherwise = bm
726 {-# INLINE bitPred #-}
727
728 -- | /O(n)/. partition the set according to some predicate.
729 partition :: (Key -> Bool) -> IntSet -> (IntSet,IntSet)
730 partition predicate0 t0 = toPair $ go predicate0 t0
731 where
732 go predicate t
733 = case t of
734 Bin p m l r
735 -> let (l1 :*: l2) = go predicate l
736 (r1 :*: r2) = go predicate r
737 in bin p m l1 r1 :*: bin p m l2 r2
738 Tip kx bm
739 -> let bm1 = foldl'Bits 0 (bitPred kx) 0 bm
740 in tip kx bm1 :*: tip kx (bm `xor` bm1)
741 Nil -> (Nil :*: Nil)
742 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi
743 | otherwise = bm
744 {-# INLINE bitPred #-}
745
746
747 -- | /O(min(n,W))/. The expression (@'split' x set@) is a pair @(set1,set2)@
748 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
749 -- comprises the elements of @set@ greater than @x@.
750 --
751 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [4,5])
752 split :: Key -> IntSet -> (IntSet,IntSet)
753 split x t =
754 case t of
755 Bin _ m l r
756 | m < 0 -> if x >= 0 -- handle negative numbers.
757 then case go x l of (lt :*: gt) -> let !lt' = union lt r
758 in (lt', gt)
759 else case go x r of (lt :*: gt) -> let !gt' = union gt l
760 in (lt, gt')
761 _ -> case go x t of
762 (lt :*: gt) -> (lt, gt)
763 where
764 go !x' t'@(Bin p m l r)
765 | match x' p m = if zero x' m
766 then case go x' l of
767 (lt :*: gt) -> lt :*: union gt r
768 else case go x' r of
769 (lt :*: gt) -> union lt l :*: gt
770 | otherwise = if x' < p then (Nil :*: t')
771 else (t' :*: Nil)
772 go x' t'@(Tip kx' bm)
773 | kx' > x' = (Nil :*: t')
774 -- equivalent to kx' > prefixOf x'
775 | kx' < prefixOf x' = (t' :*: Nil)
776 | otherwise = tip kx' (bm .&. lowerBitmap) :*: tip kx' (bm .&. higherBitmap)
777 where lowerBitmap = bitmapOf x' - 1
778 higherBitmap = complement (lowerBitmap + bitmapOf x')
779 go _ Nil = (Nil :*: Nil)
780
781 -- | /O(min(n,W))/. Performs a 'split' but also returns whether the pivot
782 -- element was found in the original set.
783 splitMember :: Key -> IntSet -> (IntSet,Bool,IntSet)
784 splitMember x t =
785 case t of
786 Bin _ m l r | m < 0 -> if x >= 0
787 then case go x l of
788 (lt, fnd, gt) -> let !lt' = union lt r
789 in (lt', fnd, gt)
790 else case go x r of
791 (lt, fnd, gt) -> let !gt' = union gt l
792 in (lt, fnd, gt')
793 _ -> go x t
794 where
795 go x' t'@(Bin p m l r)
796 | match x' p m = if zero x' m
797 then case go x' l of
798 (lt, fnd, gt) -> (lt, fnd, union gt r)
799 else case go x' r of
800 (lt, fnd, gt) -> (union lt l, fnd, gt)
801 | otherwise = if x' < p then (Nil, False, t') else (t', False, Nil)
802 go x' t'@(Tip kx' bm)
803 | kx' > x' = (Nil, False, t')
804 -- equivalent to kx' > prefixOf x'
805 | kx' < prefixOf x' = (t', False, Nil)
806 | otherwise = let !lt = tip kx' (bm .&. lowerBitmap)
807 !found = (bm .&. bitmapOfx') /= 0
808 !gt = tip kx' (bm .&. higherBitmap)
809 in (lt, found, gt)
810 where bitmapOfx' = bitmapOf x'
811 lowerBitmap = bitmapOfx' - 1
812 higherBitmap = complement (lowerBitmap + bitmapOfx')
813 go _ Nil = (Nil, False, Nil)
814
815 {----------------------------------------------------------------------
816 Min/Max
817 ----------------------------------------------------------------------}
818
819 -- | /O(min(n,W))/. Retrieves the maximal key of the set, and the set
820 -- stripped of that element, or 'Nothing' if passed an empty set.
821 maxView :: IntSet -> Maybe (Key, IntSet)
822 maxView t =
823 case t of Nil -> Nothing
824 Bin p m l r | m < 0 -> case go l of (result, l') -> Just (result, bin p m l' r)
825 _ -> Just (go t)
826 where
827 go (Bin p m l r) = case go r of (result, r') -> (result, bin p m l r')
828 go (Tip kx bm) = case highestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi)))
829 go Nil = error "maxView Nil"
830
831 -- | /O(min(n,W))/. Retrieves the minimal key of the set, and the set
832 -- stripped of that element, or 'Nothing' if passed an empty set.
833 minView :: IntSet -> Maybe (Key, IntSet)
834 minView t =
835 case t of Nil -> Nothing
836 Bin p m l r | m < 0 -> case go r of (result, r') -> Just (result, bin p m l r')
837 _ -> Just (go t)
838 where
839 go (Bin p m l r) = case go l of (result, l') -> (result, bin p m l' r)
840 go (Tip kx bm) = case lowestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi)))
841 go Nil = error "minView Nil"
842
843 -- | /O(min(n,W))/. Delete and find the minimal element.
844 --
845 -- > deleteFindMin set = (findMin set, deleteMin set)
846 deleteFindMin :: IntSet -> (Key, IntSet)
847 deleteFindMin = fromMaybe (error "deleteFindMin: empty set has no minimal element") . minView
848
849 -- | /O(min(n,W))/. Delete and find the maximal element.
850 --
851 -- > deleteFindMax set = (findMax set, deleteMax set)
852 deleteFindMax :: IntSet -> (Key, IntSet)
853 deleteFindMax = fromMaybe (error "deleteFindMax: empty set has no maximal element") . maxView
854
855
856 -- | /O(min(n,W))/. The minimal element of the set.
857 findMin :: IntSet -> Key
858 findMin Nil = error "findMin: empty set has no minimal element"
859 findMin (Tip kx bm) = kx + lowestBitSet bm
860 findMin (Bin _ m l r)
861 | m < 0 = find r
862 | otherwise = find l
863 where find (Tip kx bm) = kx + lowestBitSet bm
864 find (Bin _ _ l' _) = find l'
865 find Nil = error "findMin Nil"
866
867 -- | /O(min(n,W))/. The maximal element of a set.
868 findMax :: IntSet -> Key
869 findMax Nil = error "findMax: empty set has no maximal element"
870 findMax (Tip kx bm) = kx + highestBitSet bm
871 findMax (Bin _ m l r)
872 | m < 0 = find l
873 | otherwise = find r
874 where find (Tip kx bm) = kx + highestBitSet bm
875 find (Bin _ _ _ r') = find r'
876 find Nil = error "findMax Nil"
877
878
879 -- | /O(min(n,W))/. Delete the minimal element. Returns an empty set if the set is empty.
880 --
881 -- Note that this is a change of behaviour for consistency with 'Data.Set.Set' &#8211;
882 -- versions prior to 0.5 threw an error if the 'IntSet' was already empty.
883 deleteMin :: IntSet -> IntSet
884 deleteMin = maybe Nil snd . minView
885
886 -- | /O(min(n,W))/. Delete the maximal element. Returns an empty set if the set is empty.
887 --
888 -- Note that this is a change of behaviour for consistency with 'Data.Set.Set' &#8211;
889 -- versions prior to 0.5 threw an error if the 'IntSet' was already empty.
890 deleteMax :: IntSet -> IntSet
891 deleteMax = maybe Nil snd . maxView
892
893 {----------------------------------------------------------------------
894 Map
895 ----------------------------------------------------------------------}
896
897 -- | /O(n*min(n,W))/.
898 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
899 --
900 -- It's worth noting that the size of the result may be smaller if,
901 -- for some @(x,y)@, @x \/= y && f x == f y@
902
903 map :: (Key -> Key) -> IntSet -> IntSet
904 map f = fromList . List.map f . toList
905
906 {--------------------------------------------------------------------
907 Fold
908 --------------------------------------------------------------------}
909 -- | /O(n)/. Fold the elements in the set using the given right-associative
910 -- binary operator. This function is an equivalent of 'foldr' and is present
911 -- for compatibility only.
912 --
913 -- /Please note that fold will be deprecated in the future and removed./
914 fold :: (Key -> b -> b) -> b -> IntSet -> b
915 fold = foldr
916 {-# INLINE fold #-}
917
918 -- | /O(n)/. Fold the elements in the set using the given right-associative
919 -- binary operator, such that @'foldr' f z == 'Prelude.foldr' f z . 'toAscList'@.
920 --
921 -- For example,
922 --
923 -- > toAscList set = foldr (:) [] set
924 foldr :: (Key -> b -> b) -> b -> IntSet -> b
925 foldr f z = \t -> -- Use lambda t to be inlinable with two arguments only.
926 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before
927 | otherwise -> go (go z r) l
928 _ -> go z t
929 where
930 go z' Nil = z'
931 go z' (Tip kx bm) = foldrBits kx f z' bm
932 go z' (Bin _ _ l r) = go (go z' r) l
933 {-# INLINE foldr #-}
934
935 -- | /O(n)/. A strict version of 'foldr'. Each application of the operator is
936 -- evaluated before using the result in the next application. This
937 -- function is strict in the starting value.
938 foldr' :: (Key -> b -> b) -> b -> IntSet -> b
939 foldr' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
940 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before
941 | otherwise -> go (go z r) l
942 _ -> go z t
943 where
944 go !z' Nil = z'
945 go z' (Tip kx bm) = foldr'Bits kx f z' bm
946 go z' (Bin _ _ l r) = go (go z' r) l
947 {-# INLINE foldr' #-}
948
949 -- | /O(n)/. Fold the elements in the set using the given left-associative
950 -- binary operator, such that @'foldl' f z == 'Prelude.foldl' f z . 'toAscList'@.
951 --
952 -- For example,
953 --
954 -- > toDescList set = foldl (flip (:)) [] set
955 foldl :: (a -> Key -> a) -> a -> IntSet -> a
956 foldl f z = \t -> -- Use lambda t to be inlinable with two arguments only.
957 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before
958 | otherwise -> go (go z l) r
959 _ -> go z t
960 where
961 go z' Nil = z'
962 go z' (Tip kx bm) = foldlBits kx f z' bm
963 go z' (Bin _ _ l r) = go (go z' l) r
964 {-# INLINE foldl #-}
965
966 -- | /O(n)/. A strict version of 'foldl'. Each application of the operator is
967 -- evaluated before using the result in the next application. This
968 -- function is strict in the starting value.
969 foldl' :: (a -> Key -> a) -> a -> IntSet -> a
970 foldl' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
971 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before
972 | otherwise -> go (go z l) r
973 _ -> go z t
974 where
975 go !z' Nil = z'
976 go z' (Tip kx bm) = foldl'Bits kx f z' bm
977 go z' (Bin _ _ l r) = go (go z' l) r
978 {-# INLINE foldl' #-}
979
980 {--------------------------------------------------------------------
981 List variations
982 --------------------------------------------------------------------}
983 -- | /O(n)/. An alias of 'toAscList'. The elements of a set in ascending order.
984 -- Subject to list fusion.
985 elems :: IntSet -> [Key]
986 elems
987 = toAscList
988
989 {--------------------------------------------------------------------
990 Lists
991 --------------------------------------------------------------------}
992 #if __GLASGOW_HASKELL__ >= 708
993 -- | @since 0.5.6.2
994 instance GHCExts.IsList IntSet where
995 type Item IntSet = Key
996 fromList = fromList
997 toList = toList
998 #endif
999
1000 -- | /O(n)/. Convert the set to a list of elements. Subject to list fusion.
1001 toList :: IntSet -> [Key]
1002 toList
1003 = toAscList
1004
1005 -- | /O(n)/. Convert the set to an ascending list of elements. Subject to list
1006 -- fusion.
1007 toAscList :: IntSet -> [Key]
1008 toAscList = foldr (:) []
1009
1010 -- | /O(n)/. Convert the set to a descending list of elements. Subject to list
1011 -- fusion.
1012 toDescList :: IntSet -> [Key]
1013 toDescList = foldl (flip (:)) []
1014
1015 -- List fusion for the list generating functions.
1016 #if __GLASGOW_HASKELL__
1017 -- The foldrFB and foldlFB are foldr and foldl equivalents, used for list fusion.
1018 -- They are important to convert unfused to{Asc,Desc}List back, see mapFB in prelude.
1019 foldrFB :: (Key -> b -> b) -> b -> IntSet -> b
1020 foldrFB = foldr
1021 {-# INLINE[0] foldrFB #-}
1022 foldlFB :: (a -> Key -> a) -> a -> IntSet -> a
1023 foldlFB = foldl
1024 {-# INLINE[0] foldlFB #-}
1025
1026 -- Inline elems and toList, so that we need to fuse only toAscList.
1027 {-# INLINE elems #-}
1028 {-# INLINE toList #-}
1029
1030 -- The fusion is enabled up to phase 2 included. If it does not succeed,
1031 -- convert in phase 1 the expanded to{Asc,Desc}List calls back to
1032 -- to{Asc,Desc}List. In phase 0, we inline fold{lr}FB (which were used in
1033 -- a list fusion, otherwise it would go away in phase 1), and let compiler do
1034 -- whatever it wants with to{Asc,Desc}List -- it was forbidden to inline it
1035 -- before phase 0, otherwise the fusion rules would not fire at all.
1036 {-# NOINLINE[0] toAscList #-}
1037 {-# NOINLINE[0] toDescList #-}
1038 {-# RULES "IntSet.toAscList" [~1] forall s . toAscList s = build (\c n -> foldrFB c n s) #-}
1039 {-# RULES "IntSet.toAscListBack" [1] foldrFB (:) [] = toAscList #-}
1040 {-# RULES "IntSet.toDescList" [~1] forall s . toDescList s = build (\c n -> foldlFB (\xs x -> c x xs) n s) #-}
1041 {-# RULES "IntSet.toDescListBack" [1] foldlFB (\xs x -> x : xs) [] = toDescList #-}
1042 #endif
1043
1044
1045 -- | /O(n*min(n,W))/. Create a set from a list of integers.
1046 fromList :: [Key] -> IntSet
1047 fromList xs
1048 = Foldable.foldl' ins empty xs
1049 where
1050 ins t x = insert x t
1051
1052 -- | /O(n)/. Build a set from an ascending list of elements.
1053 -- /The precondition (input list is ascending) is not checked./
1054 fromAscList :: [Key] -> IntSet
1055 fromAscList [] = Nil
1056 fromAscList (x0 : xs0) = fromDistinctAscList (combineEq x0 xs0)
1057 where
1058 combineEq x' [] = [x']
1059 combineEq x' (x:xs)
1060 | x==x' = combineEq x' xs
1061 | otherwise = x' : combineEq x xs
1062
1063 -- | /O(n)/. Build a set from an ascending list of distinct elements.
1064 -- /The precondition (input list is strictly ascending) is not checked./
1065 fromDistinctAscList :: [Key] -> IntSet
1066 fromDistinctAscList [] = Nil
1067 fromDistinctAscList (z0 : zs0) = work (prefixOf z0) (bitmapOf z0) zs0 Nada
1068 where
1069 -- 'work' accumulates all values that go into one tip, before passing this Tip
1070 -- to 'reduce'
1071 work kx bm [] stk = finish kx (Tip kx bm) stk
1072 work kx bm (z:zs) stk | kx == prefixOf z = work kx (bm .|. bitmapOf z) zs stk
1073 work kx bm (z:zs) stk = reduce z zs (branchMask z kx) kx (Tip kx bm) stk
1074
1075 reduce z zs _ px tx Nada = work (prefixOf z) (bitmapOf z) zs (Push px tx Nada)
1076 reduce z zs m px tx stk@(Push py ty stk') =
1077 let mxy = branchMask px py
1078 pxy = mask px mxy
1079 in if shorter m mxy
1080 then reduce z zs m pxy (Bin pxy mxy ty tx) stk'
1081 else work (prefixOf z) (bitmapOf z) zs (Push px tx stk)
1082
1083 finish _ t Nada = t
1084 finish px tx (Push py ty stk) = finish p (link py ty px tx) stk
1085 where m = branchMask px py
1086 p = mask px m
1087
1088 data Stack = Push {-# UNPACK #-} !Prefix !IntSet !Stack | Nada
1089
1090
1091 {--------------------------------------------------------------------
1092 Eq
1093 --------------------------------------------------------------------}
1094 instance Eq IntSet where
1095 t1 == t2 = equal t1 t2
1096 t1 /= t2 = nequal t1 t2
1097
1098 equal :: IntSet -> IntSet -> Bool
1099 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
1100 = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
1101 equal (Tip kx1 bm1) (Tip kx2 bm2)
1102 = kx1 == kx2 && bm1 == bm2
1103 equal Nil Nil = True
1104 equal _ _ = False
1105
1106 nequal :: IntSet -> IntSet -> Bool
1107 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
1108 = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
1109 nequal (Tip kx1 bm1) (Tip kx2 bm2)
1110 = kx1 /= kx2 || bm1 /= bm2
1111 nequal Nil Nil = False
1112 nequal _ _ = True
1113
1114 {--------------------------------------------------------------------
1115 Ord
1116 --------------------------------------------------------------------}
1117
1118 instance Ord IntSet where
1119 compare s1 s2 = compare (toAscList s1) (toAscList s2)
1120 -- tentative implementation. See if more efficient exists.
1121
1122 {--------------------------------------------------------------------
1123 Show
1124 --------------------------------------------------------------------}
1125 instance Show IntSet where
1126 showsPrec p xs = showParen (p > 10) $
1127 showString "fromList " . shows (toList xs)
1128
1129 {--------------------------------------------------------------------
1130 Read
1131 --------------------------------------------------------------------}
1132 instance Read IntSet where
1133 #ifdef __GLASGOW_HASKELL__
1134 readPrec = parens $ prec 10 $ do
1135 Ident "fromList" <- lexP
1136 xs <- readPrec
1137 return (fromList xs)
1138
1139 readListPrec = readListPrecDefault
1140 #else
1141 readsPrec p = readParen (p > 10) $ \ r -> do
1142 ("fromList",s) <- lex r
1143 (xs,t) <- reads s
1144 return (fromList xs,t)
1145 #endif
1146
1147 {--------------------------------------------------------------------
1148 Typeable
1149 --------------------------------------------------------------------}
1150
1151 INSTANCE_TYPEABLE0(IntSet)
1152
1153 {--------------------------------------------------------------------
1154 NFData
1155 --------------------------------------------------------------------}
1156
1157 -- The IntSet constructors consist only of strict fields of Ints and
1158 -- IntSets, thus the default NFData instance which evaluates to whnf
1159 -- should suffice
1160 instance NFData IntSet where rnf x = seq x ()
1161
1162 {--------------------------------------------------------------------
1163 Debugging
1164 --------------------------------------------------------------------}
1165 -- | /O(n)/. Show the tree that implements the set. The tree is shown
1166 -- in a compressed, hanging format.
1167 showTree :: IntSet -> String
1168 showTree s
1169 = showTreeWith True False s
1170
1171
1172 {- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows
1173 the tree that implements the set. If @hang@ is
1174 'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If
1175 @wide@ is 'True', an extra wide version is shown.
1176 -}
1177 showTreeWith :: Bool -> Bool -> IntSet -> String
1178 showTreeWith hang wide t
1179 | hang = (showsTreeHang wide [] t) ""
1180 | otherwise = (showsTree wide [] [] t) ""
1181
1182 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
1183 showsTree wide lbars rbars t
1184 = case t of
1185 Bin p m l r
1186 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
1187 showWide wide rbars .
1188 showsBars lbars . showString (showBin p m) . showString "\n" .
1189 showWide wide lbars .
1190 showsTree wide (withEmpty lbars) (withBar lbars) l
1191 Tip kx bm
1192 -> showsBars lbars . showString " " . shows kx . showString " + " .
1193 showsBitMap bm . showString "\n"
1194 Nil -> showsBars lbars . showString "|\n"
1195
1196 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
1197 showsTreeHang wide bars t
1198 = case t of
1199 Bin p m l r
1200 -> showsBars bars . showString (showBin p m) . showString "\n" .
1201 showWide wide bars .
1202 showsTreeHang wide (withBar bars) l .
1203 showWide wide bars .
1204 showsTreeHang wide (withEmpty bars) r
1205 Tip kx bm
1206 -> showsBars bars . showString " " . shows kx . showString " + " .
1207 showsBitMap bm . showString "\n"
1208 Nil -> showsBars bars . showString "|\n"
1209
1210 showBin :: Prefix -> Mask -> String
1211 showBin _ _
1212 = "*" -- ++ show (p,m)
1213
1214 showWide :: Bool -> [String] -> String -> String
1215 showWide wide bars
1216 | wide = showString (concat (reverse bars)) . showString "|\n"
1217 | otherwise = id
1218
1219 showsBars :: [String] -> ShowS
1220 showsBars [] = id
1221 showsBars bars = showString (concat (reverse (tail bars))) . showString node
1222
1223 showsBitMap :: Word -> ShowS
1224 showsBitMap = showString . showBitMap
1225
1226 showBitMap :: Word -> String
1227 showBitMap w = show $ foldrBits 0 (:) [] w
1228
1229 node :: String
1230 node = "+--"
1231
1232 withBar, withEmpty :: [String] -> [String]
1233 withBar bars = "| ":bars
1234 withEmpty bars = " ":bars
1235
1236
1237 {--------------------------------------------------------------------
1238 Helpers
1239 --------------------------------------------------------------------}
1240 {--------------------------------------------------------------------
1241 Link
1242 --------------------------------------------------------------------}
1243 link :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
1244 link p1 t1 p2 t2
1245 | zero p1 m = Bin p m t1 t2
1246 | otherwise = Bin p m t2 t1
1247 where
1248 m = branchMask p1 p2
1249 p = mask p1 m
1250 {-# INLINE link #-}
1251
1252 {--------------------------------------------------------------------
1253 @bin@ assures that we never have empty trees within a tree.
1254 --------------------------------------------------------------------}
1255 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
1256 bin _ _ l Nil = l
1257 bin _ _ Nil r = r
1258 bin p m l r = Bin p m l r
1259 {-# INLINE bin #-}
1260
1261 {--------------------------------------------------------------------
1262 @tip@ assures that we never have empty bitmaps within a tree.
1263 --------------------------------------------------------------------}
1264 tip :: Prefix -> BitMap -> IntSet
1265 tip _ 0 = Nil
1266 tip kx bm = Tip kx bm
1267 {-# INLINE tip #-}
1268
1269
1270 {----------------------------------------------------------------------
1271 Functions that generate Prefix and BitMap of a Key or a Suffix.
1272 ----------------------------------------------------------------------}
1273
1274 suffixBitMask :: Int
1275 #if MIN_VERSION_base(4,7,0)
1276 suffixBitMask = finiteBitSize (undefined::Word) - 1
1277 #else
1278 suffixBitMask = bitSize (undefined::Word) - 1
1279 #endif
1280 {-# INLINE suffixBitMask #-}
1281
1282 prefixBitMask :: Int
1283 prefixBitMask = complement suffixBitMask
1284 {-# INLINE prefixBitMask #-}
1285
1286 prefixOf :: Int -> Prefix
1287 prefixOf x = x .&. prefixBitMask
1288 {-# INLINE prefixOf #-}
1289
1290 suffixOf :: Int -> Int
1291 suffixOf x = x .&. suffixBitMask
1292 {-# INLINE suffixOf #-}
1293
1294 bitmapOfSuffix :: Int -> BitMap
1295 bitmapOfSuffix s = 1 `shiftLL` s
1296 {-# INLINE bitmapOfSuffix #-}
1297
1298 bitmapOf :: Int -> BitMap
1299 bitmapOf x = bitmapOfSuffix (suffixOf x)
1300 {-# INLINE bitmapOf #-}
1301
1302
1303 {--------------------------------------------------------------------
1304 Endian independent bit twiddling
1305 --------------------------------------------------------------------}
1306 -- Returns True iff the bits set in i and the Mask m are disjoint.
1307 zero :: Int -> Mask -> Bool
1308 zero i m
1309 = (natFromInt i) .&. (natFromInt m) == 0
1310 {-# INLINE zero #-}
1311
1312 nomatch,match :: Int -> Prefix -> Mask -> Bool
1313 nomatch i p m
1314 = (mask i m) /= p
1315 {-# INLINE nomatch #-}
1316
1317 match i p m
1318 = (mask i m) == p
1319 {-# INLINE match #-}
1320
1321 -- Suppose a is largest such that 2^a divides 2*m.
1322 -- Then mask i m is i with the low a bits zeroed out.
1323 mask :: Int -> Mask -> Prefix
1324 mask i m
1325 = maskW (natFromInt i) (natFromInt m)
1326 {-# INLINE mask #-}
1327
1328 {--------------------------------------------------------------------
1329 Big endian operations
1330 --------------------------------------------------------------------}
1331 maskW :: Nat -> Nat -> Prefix
1332 maskW i m
1333 = intFromNat (i .&. (complement (m-1) `xor` m))
1334 {-# INLINE maskW #-}
1335
1336 shorter :: Mask -> Mask -> Bool
1337 shorter m1 m2
1338 = (natFromInt m1) > (natFromInt m2)
1339 {-# INLINE shorter #-}
1340
1341 branchMask :: Prefix -> Prefix -> Mask
1342 branchMask p1 p2
1343 = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
1344 {-# INLINE branchMask #-}
1345
1346 {----------------------------------------------------------------------
1347 To get best performance, we provide fast implementations of
1348 lowestBitSet, highestBitSet and fold[lr][l]Bits for GHC.
1349 If the intel bsf and bsr instructions ever become GHC primops,
1350 this code should be reimplemented using these.
1351
1352 Performance of this code is crucial for folds, toList, filter, partition.
1353
1354 The signatures of methods in question are placed after this comment.
1355 ----------------------------------------------------------------------}
1356
1357 lowestBitSet :: Nat -> Int
1358 highestBitSet :: Nat -> Int
1359 foldlBits :: Int -> (a -> Int -> a) -> a -> Nat -> a
1360 foldl'Bits :: Int -> (a -> Int -> a) -> a -> Nat -> a
1361 foldrBits :: Int -> (Int -> a -> a) -> a -> Nat -> a
1362 foldr'Bits :: Int -> (Int -> a -> a) -> a -> Nat -> a
1363
1364 {-# INLINE lowestBitSet #-}
1365 {-# INLINE highestBitSet #-}
1366 {-# INLINE foldlBits #-}
1367 {-# INLINE foldl'Bits #-}
1368 {-# INLINE foldrBits #-}
1369 {-# INLINE foldr'Bits #-}
1370
1371 #if defined(__GLASGOW_HASKELL__) && (WORD_SIZE_IN_BITS==32 || WORD_SIZE_IN_BITS==64)
1372 {----------------------------------------------------------------------
1373 For lowestBitSet we use wordsize-dependant implementation based on
1374 multiplication and DeBrujn indeces, which was proposed by Edward Kmett
1375 <http://haskell.org/pipermail/libraries/2011-September/016749.html>
1376
1377 The core of this implementation is fast indexOfTheOnlyBit,
1378 which is given a Nat with exactly one bit set, and returns
1379 its index.
1380
1381 Lot of effort was put in these implementations, please benchmark carefully
1382 before changing this code.
1383 ----------------------------------------------------------------------}
1384
1385 indexOfTheOnlyBit :: Nat -> Int
1386 {-# INLINE indexOfTheOnlyBit #-}
1387 indexOfTheOnlyBit bitmask =
1388 I# (lsbArray `indexInt8OffAddr#` unboxInt (intFromNat ((bitmask * magic) `shiftRL` offset)))
1389 where unboxInt (I# i) = i
1390 #if WORD_SIZE_IN_BITS==32
1391 magic = 0x077CB531
1392 offset = 27
1393 !lsbArray = "\0\1\28\2\29\14\24\3\30\22\20\15\25\17\4\8\31\27\13\23\21\19\16\7\26\12\18\6\11\5\10\9"#
1394 #else
1395 magic = 0x07EDD5E59A4E28C2
1396 offset = 58
1397 !lsbArray = "\63\0\58\1\59\47\53\2\60\39\48\27\54\33\42\3\61\51\37\40\49\18\28\20\55\30\34\11\43\14\22\4\62\57\46\52\38\26\32\41\50\36\17\19\29\10\13\21\56\45\25\31\35\16\9\12\44\24\15\8\23\7\6\5"#
1398 #endif
1399 -- The lsbArray gets inlined to every call site of indexOfTheOnlyBit.
1400 -- That cannot be easily avoided, as GHC forbids top-level Addr# literal.
1401 -- One could go around that by supplying getLsbArray :: () -> Addr# marked
1402 -- as NOINLINE. But the code size of calling it and processing the result
1403 -- is 48B on 32-bit and 56B on 64-bit architectures -- so the 32B and 64B array
1404 -- is actually improvement on 32-bit and only a 8B size increase on 64-bit.
1405
1406 lowestBitMask :: Nat -> Nat
1407 lowestBitMask x = x .&. negate x
1408 {-# INLINE lowestBitMask #-}
1409
1410 -- Reverse the order of bits in the Nat.
1411 revNat :: Nat -> Nat
1412 #if WORD_SIZE_IN_BITS==32
1413 revNat x1 = case ((x1 `shiftRL` 1) .&. 0x55555555) .|. ((x1 .&. 0x55555555) `shiftLL` 1) of
1414 x2 -> case ((x2 `shiftRL` 2) .&. 0x33333333) .|. ((x2 .&. 0x33333333) `shiftLL` 2) of
1415 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F) `shiftLL` 4) of
1416 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF) .|. ((x4 .&. 0x00FF00FF) `shiftLL` 8) of
1417 x5 -> ( x5 `shiftRL` 16 ) .|. ( x5 `shiftLL` 16);
1418 #else
1419 revNat x1 = case ((x1 `shiftRL` 1) .&. 0x5555555555555555) .|. ((x1 .&. 0x5555555555555555) `shiftLL` 1) of
1420 x2 -> case ((x2 `shiftRL` 2) .&. 0x3333333333333333) .|. ((x2 .&. 0x3333333333333333) `shiftLL` 2) of
1421 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F0F0F0F0F) `shiftLL` 4) of
1422 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF00FF00FF) .|. ((x4 .&. 0x00FF00FF00FF00FF) `shiftLL` 8) of
1423 x5 -> case ((x5 `shiftRL` 16) .&. 0x0000FFFF0000FFFF) .|. ((x5 .&. 0x0000FFFF0000FFFF) `shiftLL` 16) of
1424 x6 -> ( x6 `shiftRL` 32 ) .|. ( x6 `shiftLL` 32);
1425 #endif
1426
1427 lowestBitSet x = indexOfTheOnlyBit (lowestBitMask x)
1428
1429 highestBitSet x = indexOfTheOnlyBit (highestBitMask x)
1430
1431 foldlBits prefix f z bitmap = go bitmap z
1432 where go 0 acc = acc
1433 go bm acc = go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1434 where
1435 !bitmask = lowestBitMask bm
1436 !bi = indexOfTheOnlyBit bitmask
1437
1438 foldl'Bits prefix f z bitmap = go bitmap z
1439 where go 0 acc = acc
1440 go bm !acc = go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1441 where !bitmask = lowestBitMask bm
1442 !bi = indexOfTheOnlyBit bitmask
1443
1444 foldrBits prefix f z bitmap = go (revNat bitmap) z
1445 where go 0 acc = acc
1446 go bm acc = go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1447 where !bitmask = lowestBitMask bm
1448 !bi = indexOfTheOnlyBit bitmask
1449
1450
1451 foldr'Bits prefix f z bitmap = go (revNat bitmap) z
1452 where go 0 acc = acc
1453 go bm !acc = go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1454 where !bitmask = lowestBitMask bm
1455 !bi = indexOfTheOnlyBit bitmask
1456
1457 #else
1458 {----------------------------------------------------------------------
1459 In general case we use logarithmic implementation of
1460 lowestBitSet and highestBitSet, which works up to bit sizes of 64.
1461
1462 Folds are linear scans.
1463 ----------------------------------------------------------------------}
1464
1465 lowestBitSet n0 =
1466 let (n1,b1) = if n0 .&. 0xFFFFFFFF /= 0 then (n0,0) else (n0 `shiftRL` 32, 32)
1467 (n2,b2) = if n1 .&. 0xFFFF /= 0 then (n1,b1) else (n1 `shiftRL` 16, 16+b1)
1468 (n3,b3) = if n2 .&. 0xFF /= 0 then (n2,b2) else (n2 `shiftRL` 8, 8+b2)
1469 (n4,b4) = if n3 .&. 0xF /= 0 then (n3,b3) else (n3 `shiftRL` 4, 4+b3)
1470 (n5,b5) = if n4 .&. 0x3 /= 0 then (n4,b4) else (n4 `shiftRL` 2, 2+b4)
1471 b6 = if n5 .&. 0x1 /= 0 then b5 else 1+b5
1472 in b6
1473
1474 highestBitSet n0 =
1475 let (n1,b1) = if n0 .&. 0xFFFFFFFF00000000 /= 0 then (n0 `shiftRL` 32, 32) else (n0,0)
1476 (n2,b2) = if n1 .&. 0xFFFF0000 /= 0 then (n1 `shiftRL` 16, 16+b1) else (n1,b1)
1477 (n3,b3) = if n2 .&. 0xFF00 /= 0 then (n2 `shiftRL` 8, 8+b2) else (n2,b2)
1478 (n4,b4) = if n3 .&. 0xF0 /= 0 then (n3 `shiftRL` 4, 4+b3) else (n3,b3)
1479 (n5,b5) = if n4 .&. 0xC /= 0 then (n4 `shiftRL` 2, 2+b4) else (n4,b4)
1480 b6 = if n5 .&. 0x2 /= 0 then 1+b5 else b5
1481 in b6
1482
1483 foldlBits prefix f z bm = let lb = lowestBitSet bm
1484 in go (prefix+lb) z (bm `shiftRL` lb)
1485 where go !_ acc 0 = acc
1486 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1)
1487 | otherwise = go (bi + 1) acc (n `shiftRL` 1)
1488
1489 foldl'Bits prefix f z bm = let lb = lowestBitSet bm
1490 in go (prefix+lb) z (bm `shiftRL` lb)
1491 where go !_ !acc 0 = acc
1492 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1)
1493 | otherwise = go (bi + 1) acc (n `shiftRL` 1)
1494
1495 foldrBits prefix f z bm = let lb = lowestBitSet bm
1496 in go (prefix+lb) (bm `shiftRL` lb)
1497 where go !_ 0 = z
1498 go bi n | n `testBit` 0 = f bi (go (bi + 1) (n `shiftRL` 1))
1499 | otherwise = go (bi + 1) (n `shiftRL` 1)
1500
1501 foldr'Bits prefix f z bm = let lb = lowestBitSet bm
1502 in go (prefix+lb) (bm `shiftRL` lb)
1503 where
1504 go !_ 0 = z
1505 go bi n | n `testBit` 0 = f bi $! go (bi + 1) (n `shiftRL` 1)
1506 | otherwise = go (bi + 1) (n `shiftRL` 1)
1507
1508 #endif
1509
1510
1511 {--------------------------------------------------------------------
1512 Utilities
1513 --------------------------------------------------------------------}
1514
1515 -- | /O(1)/. Decompose a set into pieces based on the structure of the underlying
1516 -- tree. This function is useful for consuming a set in parallel.
1517 --
1518 -- No guarantee is made as to the sizes of the pieces; an internal, but
1519 -- deterministic process determines this. However, it is guaranteed that the
1520 -- pieces returned will be in ascending order (all elements in the first submap
1521 -- less than all elements in the second, and so on).
1522 --
1523 -- Examples:
1524 --
1525 -- > splitRoot (fromList [1..120]) == [fromList [1..63],fromList [64..120]]
1526 -- > splitRoot empty == []
1527 --
1528 -- Note that the current implementation does not return more than two subsets,
1529 -- but you should not depend on this behaviour because it can change in the
1530 -- future without notice. Also, the current version does not continue
1531 -- splitting all the way to individual singleton sets -- it stops at some
1532 -- point.
1533 splitRoot :: IntSet -> [IntSet]
1534 splitRoot Nil = []
1535 -- NOTE: we don't currently split below Tip, but we could.
1536 splitRoot x@(Tip _ _) = [x]
1537 splitRoot (Bin _ m l r) | m < 0 = [r, l]
1538 | otherwise = [l, r]
1539 {-# INLINE splitRoot #-}