Refactor internal modules (#324)
[packages/containers.git] / Data / IntSet / Internal.hs
1 {-# LANGUAGE CPP #-}
2 {-# LANGUAGE BangPatterns #-}
3 #if __GLASGOW_HASKELL__
4 {-# LANGUAGE MagicHash, DeriveDataTypeable, StandaloneDeriving #-}
5 #endif
6 #if !defined(TESTING) && __GLASGOW_HASKELL__ >= 703
7 {-# LANGUAGE Trustworthy #-}
8 #endif
9 #if __GLASGOW_HASKELL__ >= 708
10 {-# LANGUAGE TypeFamilies #-}
11 #endif
12
13 #include "containers.h"
14
15 -----------------------------------------------------------------------------
16 -- |
17 -- Module : Data.IntSet.Internal
18 -- Copyright : (c) Daan Leijen 2002
19 -- (c) Joachim Breitner 2011
20 -- License : BSD-style
21 -- Maintainer : libraries@haskell.org
22 -- Stability : provisional
23 -- Portability : portable
24 --
25 -- = WARNING
26 --
27 -- This module is considered __internal__.
28 --
29 -- The Package Versioning Policy __does not apply__.
30 --
31 -- This contents of this module may change __in any way whatsoever__
32 -- and __without any warning__ between minor versions of this package.
33 --
34 -- Authors importing this module are expected to track development
35 -- closely.
36 --
37 -- = Description
38 --
39 -- An efficient implementation of integer sets.
40 --
41 -- These modules are intended to be imported qualified, to avoid name
42 -- clashes with Prelude functions, e.g.
43 --
44 -- > import Data.IntSet (IntSet)
45 -- > import qualified Data.IntSet as IntSet
46 --
47 -- The implementation is based on /big-endian patricia trees/. This data
48 -- structure performs especially well on binary operations like 'union'
49 -- and 'intersection'. However, my benchmarks show that it is also
50 -- (much) faster on insertions and deletions when compared to a generic
51 -- size-balanced set implementation (see "Data.Set").
52 --
53 -- * Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\",
54 -- Workshop on ML, September 1998, pages 77-86,
55 -- <http://citeseer.ist.psu.edu/okasaki98fast.html>
56 --
57 -- * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
58 -- Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
59 -- October 1968, pages 514-534.
60 --
61 -- Additionally, this implementation places bitmaps in the leaves of the tree.
62 -- Their size is the natural size of a machine word (32 or 64 bits) and greatly
63 -- reduce memory footprint and execution times for dense sets, e.g. sets where
64 -- it is likely that many values lie close to each other. The asymptotics are
65 -- not affected by this optimization.
66 --
67 -- Many operations have a worst-case complexity of /O(min(n,W))/.
68 -- This means that the operation can become linear in the number of
69 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
70 -- (32 or 64).
71 -----------------------------------------------------------------------------
72
73 -- [Note: INLINE bit fiddling]
74 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
75 -- It is essential that the bit fiddling functions like mask, zero, branchMask
76 -- etc are inlined. If they do not, the memory allocation skyrockets. The GHC
77 -- usually gets it right, but it is disastrous if it does not. Therefore we
78 -- explicitly mark these functions INLINE.
79
80
81 -- [Note: Local 'go' functions and capturing]
82 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
83 -- Care must be taken when using 'go' function which captures an argument.
84 -- Sometimes (for example when the argument is passed to a data constructor,
85 -- as in insert), GHC heap-allocates more than necessary. Therefore C-- code
86 -- must be checked for increased allocation when creating and modifying such
87 -- functions.
88
89
90 -- [Note: Order of constructors]
91 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
92 -- The order of constructors of IntSet matters when considering performance.
93 -- Currently in GHC 7.0, when type has 3 constructors, they are matched from
94 -- the first to the last -- the best performance is achieved when the
95 -- constructors are ordered by frequency.
96 -- On GHC 7.0, reordering constructors from Nil | Tip | Bin to Bin | Tip | Nil
97 -- improves the benchmark by circa 10%.
98
99 module Data.IntSet.Internal (
100 -- * Set type
101 IntSet(..), Key -- instance Eq,Show
102
103 -- * Operators
104 , (\\)
105
106 -- * Query
107 , null
108 , size
109 , member
110 , notMember
111 , lookupLT
112 , lookupGT
113 , lookupLE
114 , lookupGE
115 , isSubsetOf
116 , isProperSubsetOf
117
118 -- * Construction
119 , empty
120 , singleton
121 , insert
122 , delete
123
124 -- * Combine
125 , union
126 , unions
127 , difference
128 , intersection
129
130 -- * Filter
131 , filter
132 , partition
133 , split
134 , splitMember
135 , splitRoot
136
137 -- * Map
138 , map
139
140 -- * Folds
141 , foldr
142 , foldl
143 -- ** Strict folds
144 , foldr'
145 , foldl'
146 -- ** Legacy folds
147 , fold
148
149 -- * Min\/Max
150 , findMin
151 , findMax
152 , deleteMin
153 , deleteMax
154 , deleteFindMin
155 , deleteFindMax
156 , maxView
157 , minView
158
159 -- * Conversion
160
161 -- ** List
162 , elems
163 , toList
164 , fromList
165
166 -- ** Ordered list
167 , toAscList
168 , toDescList
169 , fromAscList
170 , fromDistinctAscList
171
172 -- * Debugging
173 , showTree
174 , showTreeWith
175
176 -- * Internals
177 , match
178 , suffixBitMask
179 , prefixBitMask
180 , bitmapOf
181 ) where
182
183 import Control.DeepSeq (NFData(rnf))
184 import Data.Bits
185 import qualified Data.List as List
186 import Data.Maybe (fromMaybe)
187 #if !MIN_VERSION_base(4,8,0)
188 import Data.Monoid (Monoid(..))
189 import Data.Word (Word)
190 #endif
191 #if MIN_VERSION_base(4,9,0)
192 import Data.Semigroup (Semigroup((<>), stimes), stimesIdempotentMonoid)
193 #endif
194 import Data.Typeable
195 import Prelude hiding (filter, foldr, foldl, null, map)
196
197 import Data.Utils.BitUtil
198 import Data.Utils.StrictFold
199 import Data.Utils.StrictPair
200
201 #if __GLASGOW_HASKELL__
202 import Data.Data (Data(..), Constr, mkConstr, constrIndex, Fixity(Prefix), DataType, mkDataType)
203 import Text.Read
204 #endif
205
206 #if __GLASGOW_HASKELL__
207 import GHC.Exts (Int(..), build)
208 #if __GLASGOW_HASKELL__ >= 708
209 import qualified GHC.Exts as GHCExts
210 #endif
211 import GHC.Prim (indexInt8OffAddr#)
212 #endif
213
214
215 infixl 9 \\{-This comment teaches CPP correct behaviour -}
216
217 -- A "Nat" is a natural machine word (an unsigned Int)
218 type Nat = Word
219
220 natFromInt :: Int -> Nat
221 natFromInt i = fromIntegral i
222 {-# INLINE natFromInt #-}
223
224 intFromNat :: Nat -> Int
225 intFromNat w = fromIntegral w
226 {-# INLINE intFromNat #-}
227
228 {--------------------------------------------------------------------
229 Operators
230 --------------------------------------------------------------------}
231 -- | /O(n+m)/. See 'difference'.
232 (\\) :: IntSet -> IntSet -> IntSet
233 m1 \\ m2 = difference m1 m2
234
235 {--------------------------------------------------------------------
236 Types
237 --------------------------------------------------------------------}
238
239 -- | A set of integers.
240
241 -- See Note: Order of constructors
242 data IntSet = Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
243 -- Invariant: Nil is never found as a child of Bin.
244 -- Invariant: The Mask is a power of 2. It is the largest bit position at which
245 -- two elements of the set differ.
246 -- Invariant: Prefix is the common high-order bits that all elements share to
247 -- the left of the Mask bit.
248 -- Invariant: In Bin prefix mask left right, left consists of the elements that
249 -- don't have the mask bit set; right is all the elements that do.
250 | Tip {-# UNPACK #-} !Prefix {-# UNPACK #-} !BitMap
251 -- Invariant: The Prefix is zero for all but the last 5 (on 32 bit arches) or 6
252 -- bits (on 64 bit arches). The values of the map represented by a tip
253 -- are the prefix plus the indices of the set bits in the bit map.
254 | Nil
255
256 -- A number stored in a set is stored as
257 -- * Prefix (all but last 5-6 bits) and
258 -- * BitMap (last 5-6 bits stored as a bitmask)
259 -- Last 5-6 bits are called a Suffix.
260
261 type Prefix = Int
262 type Mask = Int
263 type BitMap = Word
264 type Key = Int
265
266 instance Monoid IntSet where
267 mempty = empty
268 mconcat = unions
269 #if !(MIN_VERSION_base(4,9,0))
270 mappend = union
271 #else
272 mappend = (<>)
273
274 instance Semigroup IntSet where
275 (<>) = union
276 stimes = stimesIdempotentMonoid
277 #endif
278
279 #if __GLASGOW_HASKELL__
280
281 {--------------------------------------------------------------------
282 A Data instance
283 --------------------------------------------------------------------}
284
285 -- This instance preserves data abstraction at the cost of inefficiency.
286 -- We provide limited reflection services for the sake of data abstraction.
287
288 instance Data IntSet where
289 gfoldl f z is = z fromList `f` (toList is)
290 toConstr _ = fromListConstr
291 gunfold k z c = case constrIndex c of
292 1 -> k (z fromList)
293 _ -> error "gunfold"
294 dataTypeOf _ = intSetDataType
295
296 fromListConstr :: Constr
297 fromListConstr = mkConstr intSetDataType "fromList" [] Prefix
298
299 intSetDataType :: DataType
300 intSetDataType = mkDataType "Data.IntSet.Internal.IntSet" [fromListConstr]
301
302 #endif
303
304 {--------------------------------------------------------------------
305 Query
306 --------------------------------------------------------------------}
307 -- | /O(1)/. Is the set empty?
308 null :: IntSet -> Bool
309 null Nil = True
310 null _ = False
311 {-# INLINE null #-}
312
313 -- | /O(n)/. Cardinality of the set.
314 size :: IntSet -> Int
315 size (Bin _ _ l r) = size l + size r
316 size (Tip _ bm) = bitcount 0 bm
317 size Nil = 0
318
319 -- | /O(min(n,W))/. Is the value a member of the set?
320
321 -- See Note: Local 'go' functions and capturing]
322 member :: Key -> IntSet -> Bool
323 member !x = go
324 where
325 go (Bin p m l r)
326 | nomatch x p m = False
327 | zero x m = go l
328 | otherwise = go r
329 go (Tip y bm) = prefixOf x == y && bitmapOf x .&. bm /= 0
330 go Nil = False
331
332 -- | /O(min(n,W))/. Is the element not in the set?
333 notMember :: Key -> IntSet -> Bool
334 notMember k = not . member k
335
336 -- | /O(log n)/. Find largest element smaller than the given one.
337 --
338 -- > lookupLT 3 (fromList [3, 5]) == Nothing
339 -- > lookupLT 5 (fromList [3, 5]) == Just 3
340
341 -- See Note: Local 'go' functions and capturing.
342 lookupLT :: Key -> IntSet -> Maybe Key
343 lookupLT !x t = case t of
344 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r
345 _ -> go Nil t
346 where
347 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r
348 | zero x m = go def l
349 | otherwise = go l r
350 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm
351 | prefixOf x == kx && maskLT /= 0 = Just $ kx + highestBitSet maskLT
352 | otherwise = unsafeFindMax def
353 where maskLT = (bitmapOf x - 1) .&. bm
354 go def Nil = unsafeFindMax def
355
356
357 -- | /O(log n)/. Find smallest element greater than the given one.
358 --
359 -- > lookupGT 4 (fromList [3, 5]) == Just 5
360 -- > lookupGT 5 (fromList [3, 5]) == Nothing
361
362 -- See Note: Local 'go' functions and capturing.
363 lookupGT :: Key -> IntSet -> Maybe Key
364 lookupGT !x t = case t of
365 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r
366 _ -> go Nil t
367 where
368 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def
369 | zero x m = go r l
370 | otherwise = go def r
371 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm
372 | prefixOf x == kx && maskGT /= 0 = Just $ kx + lowestBitSet maskGT
373 | otherwise = unsafeFindMin def
374 where maskGT = (- ((bitmapOf x) `shiftLL` 1)) .&. bm
375 go def Nil = unsafeFindMin def
376
377
378 -- | /O(log n)/. Find largest element smaller or equal to the given one.
379 --
380 -- > lookupLE 2 (fromList [3, 5]) == Nothing
381 -- > lookupLE 4 (fromList [3, 5]) == Just 3
382 -- > lookupLE 5 (fromList [3, 5]) == Just 5
383
384 -- See Note: Local 'go' functions and capturing.
385 lookupLE :: Key -> IntSet -> Maybe Key
386 lookupLE !x t = case t of
387 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r
388 _ -> go Nil t
389 where
390 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r
391 | zero x m = go def l
392 | otherwise = go l r
393 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm
394 | prefixOf x == kx && maskLE /= 0 = Just $ kx + highestBitSet maskLE
395 | otherwise = unsafeFindMax def
396 where maskLE = (((bitmapOf x) `shiftLL` 1) - 1) .&. bm
397 go def Nil = unsafeFindMax def
398
399
400 -- | /O(log n)/. Find smallest element greater or equal to the given one.
401 --
402 -- > lookupGE 3 (fromList [3, 5]) == Just 3
403 -- > lookupGE 4 (fromList [3, 5]) == Just 5
404 -- > lookupGE 6 (fromList [3, 5]) == Nothing
405
406 -- See Note: Local 'go' functions and capturing.
407 lookupGE :: Key -> IntSet -> Maybe Key
408 lookupGE !x t = case t of
409 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r
410 _ -> go Nil t
411 where
412 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def
413 | zero x m = go r l
414 | otherwise = go def r
415 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm
416 | prefixOf x == kx && maskGE /= 0 = Just $ kx + lowestBitSet maskGE
417 | otherwise = unsafeFindMin def
418 where maskGE = (- (bitmapOf x)) .&. bm
419 go def Nil = unsafeFindMin def
420
421
422
423 -- Helper function for lookupGE and lookupGT. It assumes that if a Bin node is
424 -- given, it has m > 0.
425 unsafeFindMin :: IntSet -> Maybe Key
426 unsafeFindMin Nil = Nothing
427 unsafeFindMin (Tip kx bm) = Just $ kx + lowestBitSet bm
428 unsafeFindMin (Bin _ _ l _) = unsafeFindMin l
429
430 -- Helper function for lookupLE and lookupLT. It assumes that if a Bin node is
431 -- given, it has m > 0.
432 unsafeFindMax :: IntSet -> Maybe Key
433 unsafeFindMax Nil = Nothing
434 unsafeFindMax (Tip kx bm) = Just $ kx + highestBitSet bm
435 unsafeFindMax (Bin _ _ _ r) = unsafeFindMax r
436
437 {--------------------------------------------------------------------
438 Construction
439 --------------------------------------------------------------------}
440 -- | /O(1)/. The empty set.
441 empty :: IntSet
442 empty
443 = Nil
444 {-# INLINE empty #-}
445
446 -- | /O(1)/. A set of one element.
447 singleton :: Key -> IntSet
448 singleton x
449 = Tip (prefixOf x) (bitmapOf x)
450 {-# INLINE singleton #-}
451
452 {--------------------------------------------------------------------
453 Insert
454 --------------------------------------------------------------------}
455 -- | /O(min(n,W))/. Add a value to the set. There is no left- or right bias for
456 -- IntSets.
457 insert :: Key -> IntSet -> IntSet
458 insert !x = insertBM (prefixOf x) (bitmapOf x)
459
460 -- Helper function for insert and union.
461 insertBM :: Prefix -> BitMap -> IntSet -> IntSet
462 insertBM !kx !bm t@(Bin p m l r)
463 | nomatch kx p m = link kx (Tip kx bm) p t
464 | zero kx m = Bin p m (insertBM kx bm l) r
465 | otherwise = Bin p m l (insertBM kx bm r)
466 insertBM kx bm t@(Tip kx' bm')
467 | kx' == kx = Tip kx' (bm .|. bm')
468 | otherwise = link kx (Tip kx bm) kx' t
469 insertBM kx bm Nil = Tip kx bm
470
471 -- | /O(min(n,W))/. Delete a value in the set. Returns the
472 -- original set when the value was not present.
473 delete :: Key -> IntSet -> IntSet
474 delete !x = deleteBM (prefixOf x) (bitmapOf x)
475
476 -- Deletes all values mentioned in the BitMap from the set.
477 -- Helper function for delete and difference.
478 deleteBM :: Prefix -> BitMap -> IntSet -> IntSet
479 deleteBM !kx !bm t@(Bin p m l r)
480 | nomatch kx p m = t
481 | zero kx m = bin p m (deleteBM kx bm l) r
482 | otherwise = bin p m l (deleteBM kx bm r)
483 deleteBM kx bm t@(Tip kx' bm')
484 | kx' == kx = tip kx (bm' .&. complement bm)
485 | otherwise = t
486 deleteBM _ _ Nil = Nil
487
488
489 {--------------------------------------------------------------------
490 Union
491 --------------------------------------------------------------------}
492 -- | The union of a list of sets.
493 unions :: [IntSet] -> IntSet
494 unions xs
495 = foldlStrict union empty xs
496
497
498 -- | /O(n+m)/. The union of two sets.
499 union :: IntSet -> IntSet -> IntSet
500 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
501 | shorter m1 m2 = union1
502 | shorter m2 m1 = union2
503 | p1 == p2 = Bin p1 m1 (union l1 l2) (union r1 r2)
504 | otherwise = link p1 t1 p2 t2
505 where
506 union1 | nomatch p2 p1 m1 = link p1 t1 p2 t2
507 | zero p2 m1 = Bin p1 m1 (union l1 t2) r1
508 | otherwise = Bin p1 m1 l1 (union r1 t2)
509
510 union2 | nomatch p1 p2 m2 = link p1 t1 p2 t2
511 | zero p1 m2 = Bin p2 m2 (union t1 l2) r2
512 | otherwise = Bin p2 m2 l2 (union t1 r2)
513
514 union t@(Bin _ _ _ _) (Tip kx bm) = insertBM kx bm t
515 union t@(Bin _ _ _ _) Nil = t
516 union (Tip kx bm) t = insertBM kx bm t
517 union Nil t = t
518
519
520 {--------------------------------------------------------------------
521 Difference
522 --------------------------------------------------------------------}
523 -- | /O(n+m)/. Difference between two sets.
524 difference :: IntSet -> IntSet -> IntSet
525 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
526 | shorter m1 m2 = difference1
527 | shorter m2 m1 = difference2
528 | p1 == p2 = bin p1 m1 (difference l1 l2) (difference r1 r2)
529 | otherwise = t1
530 where
531 difference1 | nomatch p2 p1 m1 = t1
532 | zero p2 m1 = bin p1 m1 (difference l1 t2) r1
533 | otherwise = bin p1 m1 l1 (difference r1 t2)
534
535 difference2 | nomatch p1 p2 m2 = t1
536 | zero p1 m2 = difference t1 l2
537 | otherwise = difference t1 r2
538
539 difference t@(Bin _ _ _ _) (Tip kx bm) = deleteBM kx bm t
540 difference t@(Bin _ _ _ _) Nil = t
541
542 difference t1@(Tip kx bm) t2 = differenceTip t2
543 where differenceTip (Bin p2 m2 l2 r2) | nomatch kx p2 m2 = t1
544 | zero kx m2 = differenceTip l2
545 | otherwise = differenceTip r2
546 differenceTip (Tip kx2 bm2) | kx == kx2 = tip kx (bm .&. complement bm2)
547 | otherwise = t1
548 differenceTip Nil = t1
549
550 difference Nil _ = Nil
551
552
553
554 {--------------------------------------------------------------------
555 Intersection
556 --------------------------------------------------------------------}
557 -- | /O(n+m)/. The intersection of two sets.
558 intersection :: IntSet -> IntSet -> IntSet
559 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
560 | shorter m1 m2 = intersection1
561 | shorter m2 m1 = intersection2
562 | p1 == p2 = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
563 | otherwise = Nil
564 where
565 intersection1 | nomatch p2 p1 m1 = Nil
566 | zero p2 m1 = intersection l1 t2
567 | otherwise = intersection r1 t2
568
569 intersection2 | nomatch p1 p2 m2 = Nil
570 | zero p1 m2 = intersection t1 l2
571 | otherwise = intersection t1 r2
572
573 intersection t1@(Bin _ _ _ _) (Tip kx2 bm2) = intersectBM t1
574 where intersectBM (Bin p1 m1 l1 r1) | nomatch kx2 p1 m1 = Nil
575 | zero kx2 m1 = intersectBM l1
576 | otherwise = intersectBM r1
577 intersectBM (Tip kx1 bm1) | kx1 == kx2 = tip kx1 (bm1 .&. bm2)
578 | otherwise = Nil
579 intersectBM Nil = Nil
580
581 intersection (Bin _ _ _ _) Nil = Nil
582
583 intersection (Tip kx1 bm1) t2 = intersectBM t2
584 where intersectBM (Bin p2 m2 l2 r2) | nomatch kx1 p2 m2 = Nil
585 | zero kx1 m2 = intersectBM l2
586 | otherwise = intersectBM r2
587 intersectBM (Tip kx2 bm2) | kx1 == kx2 = tip kx1 (bm1 .&. bm2)
588 | otherwise = Nil
589 intersectBM Nil = Nil
590
591 intersection Nil _ = Nil
592
593 {--------------------------------------------------------------------
594 Subset
595 --------------------------------------------------------------------}
596 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
597 isProperSubsetOf :: IntSet -> IntSet -> Bool
598 isProperSubsetOf t1 t2
599 = case subsetCmp t1 t2 of
600 LT -> True
601 _ -> False
602
603 subsetCmp :: IntSet -> IntSet -> Ordering
604 subsetCmp t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
605 | shorter m1 m2 = GT
606 | shorter m2 m1 = case subsetCmpLt of
607 GT -> GT
608 _ -> LT
609 | p1 == p2 = subsetCmpEq
610 | otherwise = GT -- disjoint
611 where
612 subsetCmpLt | nomatch p1 p2 m2 = GT
613 | zero p1 m2 = subsetCmp t1 l2
614 | otherwise = subsetCmp t1 r2
615 subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
616 (GT,_ ) -> GT
617 (_ ,GT) -> GT
618 (EQ,EQ) -> EQ
619 _ -> LT
620
621 subsetCmp (Bin _ _ _ _) _ = GT
622 subsetCmp (Tip kx1 bm1) (Tip kx2 bm2)
623 | kx1 /= kx2 = GT -- disjoint
624 | bm1 == bm2 = EQ
625 | bm1 .&. complement bm2 == 0 = LT
626 | otherwise = GT
627 subsetCmp t1@(Tip kx _) (Bin p m l r)
628 | nomatch kx p m = GT
629 | zero kx m = case subsetCmp t1 l of GT -> GT ; _ -> LT
630 | otherwise = case subsetCmp t1 r of GT -> GT ; _ -> LT
631 subsetCmp (Tip _ _) Nil = GT -- disjoint
632 subsetCmp Nil Nil = EQ
633 subsetCmp Nil _ = LT
634
635 -- | /O(n+m)/. Is this a subset?
636 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
637
638 isSubsetOf :: IntSet -> IntSet -> Bool
639 isSubsetOf t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
640 | shorter m1 m2 = False
641 | shorter m2 m1 = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
642 else isSubsetOf t1 r2)
643 | otherwise = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
644 isSubsetOf (Bin _ _ _ _) _ = False
645 isSubsetOf (Tip kx1 bm1) (Tip kx2 bm2) = kx1 == kx2 && bm1 .&. complement bm2 == 0
646 isSubsetOf t1@(Tip kx _) (Bin p m l r)
647 | nomatch kx p m = False
648 | zero kx m = isSubsetOf t1 l
649 | otherwise = isSubsetOf t1 r
650 isSubsetOf (Tip _ _) Nil = False
651 isSubsetOf Nil _ = True
652
653
654 {--------------------------------------------------------------------
655 Filter
656 --------------------------------------------------------------------}
657 -- | /O(n)/. Filter all elements that satisfy some predicate.
658 filter :: (Key -> Bool) -> IntSet -> IntSet
659 filter predicate t
660 = case t of
661 Bin p m l r
662 -> bin p m (filter predicate l) (filter predicate r)
663 Tip kx bm
664 -> tip kx (foldl'Bits 0 (bitPred kx) 0 bm)
665 Nil -> Nil
666 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi
667 | otherwise = bm
668 {-# INLINE bitPred #-}
669
670 -- | /O(n)/. partition the set according to some predicate.
671 partition :: (Key -> Bool) -> IntSet -> (IntSet,IntSet)
672 partition predicate0 t0 = toPair $ go predicate0 t0
673 where
674 go predicate t
675 = case t of
676 Bin p m l r
677 -> let (l1 :*: l2) = go predicate l
678 (r1 :*: r2) = go predicate r
679 in bin p m l1 r1 :*: bin p m l2 r2
680 Tip kx bm
681 -> let bm1 = foldl'Bits 0 (bitPred kx) 0 bm
682 in tip kx bm1 :*: tip kx (bm `xor` bm1)
683 Nil -> (Nil :*: Nil)
684 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi
685 | otherwise = bm
686 {-# INLINE bitPred #-}
687
688
689 -- | /O(min(n,W))/. The expression (@'split' x set@) is a pair @(set1,set2)@
690 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
691 -- comprises the elements of @set@ greater than @x@.
692 --
693 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [4,5])
694 split :: Key -> IntSet -> (IntSet,IntSet)
695 split x t =
696 case t of
697 Bin _ m l r
698 | m < 0 -> if x >= 0 -- handle negative numbers.
699 then case go x l of (lt :*: gt) -> let !lt' = union lt r
700 in (lt', gt)
701 else case go x r of (lt :*: gt) -> let !gt' = union gt l
702 in (lt, gt')
703 _ -> case go x t of
704 (lt :*: gt) -> (lt, gt)
705 where
706 go !x' t'@(Bin p m l r)
707 | match x' p m = if zero x' m
708 then case go x' l of
709 (lt :*: gt) -> lt :*: union gt r
710 else case go x' r of
711 (lt :*: gt) -> union lt l :*: gt
712 | otherwise = if x' < p then (Nil :*: t')
713 else (t' :*: Nil)
714 go x' t'@(Tip kx' bm)
715 | kx' > x' = (Nil :*: t')
716 -- equivalent to kx' > prefixOf x'
717 | kx' < prefixOf x' = (t' :*: Nil)
718 | otherwise = tip kx' (bm .&. lowerBitmap) :*: tip kx' (bm .&. higherBitmap)
719 where lowerBitmap = bitmapOf x' - 1
720 higherBitmap = complement (lowerBitmap + bitmapOf x')
721 go _ Nil = (Nil :*: Nil)
722
723 -- | /O(min(n,W))/. Performs a 'split' but also returns whether the pivot
724 -- element was found in the original set.
725 splitMember :: Key -> IntSet -> (IntSet,Bool,IntSet)
726 splitMember x t =
727 case t of
728 Bin _ m l r | m < 0 -> if x >= 0
729 then case go x l of
730 (lt, fnd, gt) -> let !lt' = union lt r
731 in (lt', fnd, gt)
732 else case go x r of
733 (lt, fnd, gt) -> let !gt' = union gt l
734 in (lt, fnd, gt')
735 _ -> go x t
736 where
737 go x' t'@(Bin p m l r)
738 | match x' p m = if zero x' m
739 then case go x' l of
740 (lt, fnd, gt) -> (lt, fnd, union gt r)
741 else case go x' r of
742 (lt, fnd, gt) -> (union lt l, fnd, gt)
743 | otherwise = if x' < p then (Nil, False, t') else (t', False, Nil)
744 go x' t'@(Tip kx' bm)
745 | kx' > x' = (Nil, False, t')
746 -- equivalent to kx' > prefixOf x'
747 | kx' < prefixOf x' = (t', False, Nil)
748 | otherwise = let !lt = tip kx' (bm .&. lowerBitmap)
749 !found = (bm .&. bitmapOfx') /= 0
750 !gt = tip kx' (bm .&. higherBitmap)
751 in (lt, found, gt)
752 where bitmapOfx' = bitmapOf x'
753 lowerBitmap = bitmapOfx' - 1
754 higherBitmap = complement (lowerBitmap + bitmapOfx')
755 go _ Nil = (Nil, False, Nil)
756
757 {----------------------------------------------------------------------
758 Min/Max
759 ----------------------------------------------------------------------}
760
761 -- | /O(min(n,W))/. Retrieves the maximal key of the set, and the set
762 -- stripped of that element, or 'Nothing' if passed an empty set.
763 maxView :: IntSet -> Maybe (Key, IntSet)
764 maxView t =
765 case t of Nil -> Nothing
766 Bin p m l r | m < 0 -> case go l of (result, l') -> Just (result, bin p m l' r)
767 _ -> Just (go t)
768 where
769 go (Bin p m l r) = case go r of (result, r') -> (result, bin p m l r')
770 go (Tip kx bm) = case highestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi)))
771 go Nil = error "maxView Nil"
772
773 -- | /O(min(n,W))/. Retrieves the minimal key of the set, and the set
774 -- stripped of that element, or 'Nothing' if passed an empty set.
775 minView :: IntSet -> Maybe (Key, IntSet)
776 minView t =
777 case t of Nil -> Nothing
778 Bin p m l r | m < 0 -> case go r of (result, r') -> Just (result, bin p m l r')
779 _ -> Just (go t)
780 where
781 go (Bin p m l r) = case go l of (result, l') -> (result, bin p m l' r)
782 go (Tip kx bm) = case lowestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi)))
783 go Nil = error "minView Nil"
784
785 -- | /O(min(n,W))/. Delete and find the minimal element.
786 --
787 -- > deleteFindMin set = (findMin set, deleteMin set)
788 deleteFindMin :: IntSet -> (Key, IntSet)
789 deleteFindMin = fromMaybe (error "deleteFindMin: empty set has no minimal element") . minView
790
791 -- | /O(min(n,W))/. Delete and find the maximal element.
792 --
793 -- > deleteFindMax set = (findMax set, deleteMax set)
794 deleteFindMax :: IntSet -> (Key, IntSet)
795 deleteFindMax = fromMaybe (error "deleteFindMax: empty set has no maximal element") . maxView
796
797
798 -- | /O(min(n,W))/. The minimal element of the set.
799 findMin :: IntSet -> Key
800 findMin Nil = error "findMin: empty set has no minimal element"
801 findMin (Tip kx bm) = kx + lowestBitSet bm
802 findMin (Bin _ m l r)
803 | m < 0 = find r
804 | otherwise = find l
805 where find (Tip kx bm) = kx + lowestBitSet bm
806 find (Bin _ _ l' _) = find l'
807 find Nil = error "findMin Nil"
808
809 -- | /O(min(n,W))/. The maximal element of a set.
810 findMax :: IntSet -> Key
811 findMax Nil = error "findMax: empty set has no maximal element"
812 findMax (Tip kx bm) = kx + highestBitSet bm
813 findMax (Bin _ m l r)
814 | m < 0 = find l
815 | otherwise = find r
816 where find (Tip kx bm) = kx + highestBitSet bm
817 find (Bin _ _ _ r') = find r'
818 find Nil = error "findMax Nil"
819
820
821 -- | /O(min(n,W))/. Delete the minimal element. Returns an empty set if the set is empty.
822 --
823 -- Note that this is a change of behaviour for consistency with 'Data.Set.Set' &#8211;
824 -- versions prior to 0.5 threw an error if the 'IntSet' was already empty.
825 deleteMin :: IntSet -> IntSet
826 deleteMin = maybe Nil snd . minView
827
828 -- | /O(min(n,W))/. Delete the maximal element. Returns an empty set if the set is empty.
829 --
830 -- Note that this is a change of behaviour for consistency with 'Data.Set.Set' &#8211;
831 -- versions prior to 0.5 threw an error if the 'IntSet' was already empty.
832 deleteMax :: IntSet -> IntSet
833 deleteMax = maybe Nil snd . maxView
834
835 {----------------------------------------------------------------------
836 Map
837 ----------------------------------------------------------------------}
838
839 -- | /O(n*min(n,W))/.
840 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
841 --
842 -- It's worth noting that the size of the result may be smaller if,
843 -- for some @(x,y)@, @x \/= y && f x == f y@
844
845 map :: (Key -> Key) -> IntSet -> IntSet
846 map f = fromList . List.map f . toList
847
848 {--------------------------------------------------------------------
849 Fold
850 --------------------------------------------------------------------}
851 -- | /O(n)/. Fold the elements in the set using the given right-associative
852 -- binary operator. This function is an equivalent of 'foldr' and is present
853 -- for compatibility only.
854 --
855 -- /Please note that fold will be deprecated in the future and removed./
856 fold :: (Key -> b -> b) -> b -> IntSet -> b
857 fold = foldr
858 {-# INLINE fold #-}
859
860 -- | /O(n)/. Fold the elements in the set using the given right-associative
861 -- binary operator, such that @'foldr' f z == 'Prelude.foldr' f z . 'toAscList'@.
862 --
863 -- For example,
864 --
865 -- > toAscList set = foldr (:) [] set
866 foldr :: (Key -> b -> b) -> b -> IntSet -> b
867 foldr f z = \t -> -- Use lambda t to be inlinable with two arguments only.
868 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before
869 | otherwise -> go (go z r) l
870 _ -> go z t
871 where
872 go z' Nil = z'
873 go z' (Tip kx bm) = foldrBits kx f z' bm
874 go z' (Bin _ _ l r) = go (go z' r) l
875 {-# INLINE foldr #-}
876
877 -- | /O(n)/. A strict version of 'foldr'. Each application of the operator is
878 -- evaluated before using the result in the next application. This
879 -- function is strict in the starting value.
880 foldr' :: (Key -> b -> b) -> b -> IntSet -> b
881 foldr' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
882 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before
883 | otherwise -> go (go z r) l
884 _ -> go z t
885 where
886 go !z' Nil = z'
887 go z' (Tip kx bm) = foldr'Bits kx f z' bm
888 go z' (Bin _ _ l r) = go (go z' r) l
889 {-# INLINE foldr' #-}
890
891 -- | /O(n)/. Fold the elements in the set using the given left-associative
892 -- binary operator, such that @'foldl' f z == 'Prelude.foldl' f z . 'toAscList'@.
893 --
894 -- For example,
895 --
896 -- > toDescList set = foldl (flip (:)) [] set
897 foldl :: (a -> Key -> a) -> a -> IntSet -> a
898 foldl f z = \t -> -- Use lambda t to be inlinable with two arguments only.
899 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before
900 | otherwise -> go (go z l) r
901 _ -> go z t
902 where
903 go z' Nil = z'
904 go z' (Tip kx bm) = foldlBits kx f z' bm
905 go z' (Bin _ _ l r) = go (go z' l) r
906 {-# INLINE foldl #-}
907
908 -- | /O(n)/. A strict version of 'foldl'. Each application of the operator is
909 -- evaluated before using the result in the next application. This
910 -- function is strict in the starting value.
911 foldl' :: (a -> Key -> a) -> a -> IntSet -> a
912 foldl' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
913 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before
914 | otherwise -> go (go z l) r
915 _ -> go z t
916 where
917 go !z' Nil = z'
918 go z' (Tip kx bm) = foldl'Bits kx f z' bm
919 go z' (Bin _ _ l r) = go (go z' l) r
920 {-# INLINE foldl' #-}
921
922 {--------------------------------------------------------------------
923 List variations
924 --------------------------------------------------------------------}
925 -- | /O(n)/. An alias of 'toAscList'. The elements of a set in ascending order.
926 -- Subject to list fusion.
927 elems :: IntSet -> [Key]
928 elems
929 = toAscList
930
931 {--------------------------------------------------------------------
932 Lists
933 --------------------------------------------------------------------}
934 #if __GLASGOW_HASKELL__ >= 708
935 instance GHCExts.IsList IntSet where
936 type Item IntSet = Key
937 fromList = fromList
938 toList = toList
939 #endif
940
941 -- | /O(n)/. Convert the set to a list of elements. Subject to list fusion.
942 toList :: IntSet -> [Key]
943 toList
944 = toAscList
945
946 -- | /O(n)/. Convert the set to an ascending list of elements. Subject to list
947 -- fusion.
948 toAscList :: IntSet -> [Key]
949 toAscList = foldr (:) []
950
951 -- | /O(n)/. Convert the set to a descending list of elements. Subject to list
952 -- fusion.
953 toDescList :: IntSet -> [Key]
954 toDescList = foldl (flip (:)) []
955
956 -- List fusion for the list generating functions.
957 #if __GLASGOW_HASKELL__
958 -- The foldrFB and foldlFB are foldr and foldl equivalents, used for list fusion.
959 -- They are important to convert unfused to{Asc,Desc}List back, see mapFB in prelude.
960 foldrFB :: (Key -> b -> b) -> b -> IntSet -> b
961 foldrFB = foldr
962 {-# INLINE[0] foldrFB #-}
963 foldlFB :: (a -> Key -> a) -> a -> IntSet -> a
964 foldlFB = foldl
965 {-# INLINE[0] foldlFB #-}
966
967 -- Inline elems and toList, so that we need to fuse only toAscList.
968 {-# INLINE elems #-}
969 {-# INLINE toList #-}
970
971 -- The fusion is enabled up to phase 2 included. If it does not succeed,
972 -- convert in phase 1 the expanded to{Asc,Desc}List calls back to
973 -- to{Asc,Desc}List. In phase 0, we inline fold{lr}FB (which were used in
974 -- a list fusion, otherwise it would go away in phase 1), and let compiler do
975 -- whatever it wants with to{Asc,Desc}List -- it was forbidden to inline it
976 -- before phase 0, otherwise the fusion rules would not fire at all.
977 {-# NOINLINE[0] toAscList #-}
978 {-# NOINLINE[0] toDescList #-}
979 {-# RULES "IntSet.toAscList" [~1] forall s . toAscList s = build (\c n -> foldrFB c n s) #-}
980 {-# RULES "IntSet.toAscListBack" [1] foldrFB (:) [] = toAscList #-}
981 {-# RULES "IntSet.toDescList" [~1] forall s . toDescList s = build (\c n -> foldlFB (\xs x -> c x xs) n s) #-}
982 {-# RULES "IntSet.toDescListBack" [1] foldlFB (\xs x -> x : xs) [] = toDescList #-}
983 #endif
984
985
986 -- | /O(n*min(n,W))/. Create a set from a list of integers.
987 fromList :: [Key] -> IntSet
988 fromList xs
989 = foldlStrict ins empty xs
990 where
991 ins t x = insert x t
992
993 -- | /O(n)/. Build a set from an ascending list of elements.
994 -- /The precondition (input list is ascending) is not checked./
995 fromAscList :: [Key] -> IntSet
996 fromAscList [] = Nil
997 fromAscList (x0 : xs0) = fromDistinctAscList (combineEq x0 xs0)
998 where
999 combineEq x' [] = [x']
1000 combineEq x' (x:xs)
1001 | x==x' = combineEq x' xs
1002 | otherwise = x' : combineEq x xs
1003
1004 -- | /O(n)/. Build a set from an ascending list of distinct elements.
1005 -- /The precondition (input list is strictly ascending) is not checked./
1006 fromDistinctAscList :: [Key] -> IntSet
1007 fromDistinctAscList [] = Nil
1008 fromDistinctAscList (z0 : zs0) = work (prefixOf z0) (bitmapOf z0) zs0 Nada
1009 where
1010 -- 'work' accumulates all values that go into one tip, before passing this Tip
1011 -- to 'reduce'
1012 work kx bm [] stk = finish kx (Tip kx bm) stk
1013 work kx bm (z:zs) stk | kx == prefixOf z = work kx (bm .|. bitmapOf z) zs stk
1014 work kx bm (z:zs) stk = reduce z zs (branchMask z kx) kx (Tip kx bm) stk
1015
1016 reduce z zs _ px tx Nada = work (prefixOf z) (bitmapOf z) zs (Push px tx Nada)
1017 reduce z zs m px tx stk@(Push py ty stk') =
1018 let mxy = branchMask px py
1019 pxy = mask px mxy
1020 in if shorter m mxy
1021 then reduce z zs m pxy (Bin pxy mxy ty tx) stk'
1022 else work (prefixOf z) (bitmapOf z) zs (Push px tx stk)
1023
1024 finish _ t Nada = t
1025 finish px tx (Push py ty stk) = finish p (link py ty px tx) stk
1026 where m = branchMask px py
1027 p = mask px m
1028
1029 data Stack = Push {-# UNPACK #-} !Prefix !IntSet !Stack | Nada
1030
1031
1032 {--------------------------------------------------------------------
1033 Eq
1034 --------------------------------------------------------------------}
1035 instance Eq IntSet where
1036 t1 == t2 = equal t1 t2
1037 t1 /= t2 = nequal t1 t2
1038
1039 equal :: IntSet -> IntSet -> Bool
1040 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
1041 = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
1042 equal (Tip kx1 bm1) (Tip kx2 bm2)
1043 = kx1 == kx2 && bm1 == bm2
1044 equal Nil Nil = True
1045 equal _ _ = False
1046
1047 nequal :: IntSet -> IntSet -> Bool
1048 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
1049 = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
1050 nequal (Tip kx1 bm1) (Tip kx2 bm2)
1051 = kx1 /= kx2 || bm1 /= bm2
1052 nequal Nil Nil = False
1053 nequal _ _ = True
1054
1055 {--------------------------------------------------------------------
1056 Ord
1057 --------------------------------------------------------------------}
1058
1059 instance Ord IntSet where
1060 compare s1 s2 = compare (toAscList s1) (toAscList s2)
1061 -- tentative implementation. See if more efficient exists.
1062
1063 {--------------------------------------------------------------------
1064 Show
1065 --------------------------------------------------------------------}
1066 instance Show IntSet where
1067 showsPrec p xs = showParen (p > 10) $
1068 showString "fromList " . shows (toList xs)
1069
1070 {--------------------------------------------------------------------
1071 Read
1072 --------------------------------------------------------------------}
1073 instance Read IntSet where
1074 #ifdef __GLASGOW_HASKELL__
1075 readPrec = parens $ prec 10 $ do
1076 Ident "fromList" <- lexP
1077 xs <- readPrec
1078 return (fromList xs)
1079
1080 readListPrec = readListPrecDefault
1081 #else
1082 readsPrec p = readParen (p > 10) $ \ r -> do
1083 ("fromList",s) <- lex r
1084 (xs,t) <- reads s
1085 return (fromList xs,t)
1086 #endif
1087
1088 {--------------------------------------------------------------------
1089 Typeable
1090 --------------------------------------------------------------------}
1091
1092 INSTANCE_TYPEABLE0(IntSet)
1093
1094 {--------------------------------------------------------------------
1095 NFData
1096 --------------------------------------------------------------------}
1097
1098 -- The IntSet constructors consist only of strict fields of Ints and
1099 -- IntSets, thus the default NFData instance which evaluates to whnf
1100 -- should suffice
1101 instance NFData IntSet where rnf x = seq x ()
1102
1103 {--------------------------------------------------------------------
1104 Debugging
1105 --------------------------------------------------------------------}
1106 -- | /O(n)/. Show the tree that implements the set. The tree is shown
1107 -- in a compressed, hanging format.
1108 showTree :: IntSet -> String
1109 showTree s
1110 = showTreeWith True False s
1111
1112
1113 {- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows
1114 the tree that implements the set. If @hang@ is
1115 'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If
1116 @wide@ is 'True', an extra wide version is shown.
1117 -}
1118 showTreeWith :: Bool -> Bool -> IntSet -> String
1119 showTreeWith hang wide t
1120 | hang = (showsTreeHang wide [] t) ""
1121 | otherwise = (showsTree wide [] [] t) ""
1122
1123 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
1124 showsTree wide lbars rbars t
1125 = case t of
1126 Bin p m l r
1127 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
1128 showWide wide rbars .
1129 showsBars lbars . showString (showBin p m) . showString "\n" .
1130 showWide wide lbars .
1131 showsTree wide (withEmpty lbars) (withBar lbars) l
1132 Tip kx bm
1133 -> showsBars lbars . showString " " . shows kx . showString " + " .
1134 showsBitMap bm . showString "\n"
1135 Nil -> showsBars lbars . showString "|\n"
1136
1137 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
1138 showsTreeHang wide bars t
1139 = case t of
1140 Bin p m l r
1141 -> showsBars bars . showString (showBin p m) . showString "\n" .
1142 showWide wide bars .
1143 showsTreeHang wide (withBar bars) l .
1144 showWide wide bars .
1145 showsTreeHang wide (withEmpty bars) r
1146 Tip kx bm
1147 -> showsBars bars . showString " " . shows kx . showString " + " .
1148 showsBitMap bm . showString "\n"
1149 Nil -> showsBars bars . showString "|\n"
1150
1151 showBin :: Prefix -> Mask -> String
1152 showBin _ _
1153 = "*" -- ++ show (p,m)
1154
1155 showWide :: Bool -> [String] -> String -> String
1156 showWide wide bars
1157 | wide = showString (concat (reverse bars)) . showString "|\n"
1158 | otherwise = id
1159
1160 showsBars :: [String] -> ShowS
1161 showsBars [] = id
1162 showsBars bars = showString (concat (reverse (tail bars))) . showString node
1163
1164 showsBitMap :: Word -> ShowS
1165 showsBitMap = showString . showBitMap
1166
1167 showBitMap :: Word -> String
1168 showBitMap w = show $ foldrBits 0 (:) [] w
1169
1170 node :: String
1171 node = "+--"
1172
1173 withBar, withEmpty :: [String] -> [String]
1174 withBar bars = "| ":bars
1175 withEmpty bars = " ":bars
1176
1177
1178 {--------------------------------------------------------------------
1179 Helpers
1180 --------------------------------------------------------------------}
1181 {--------------------------------------------------------------------
1182 Link
1183 --------------------------------------------------------------------}
1184 link :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
1185 link p1 t1 p2 t2
1186 | zero p1 m = Bin p m t1 t2
1187 | otherwise = Bin p m t2 t1
1188 where
1189 m = branchMask p1 p2
1190 p = mask p1 m
1191 {-# INLINE link #-}
1192
1193 {--------------------------------------------------------------------
1194 @bin@ assures that we never have empty trees within a tree.
1195 --------------------------------------------------------------------}
1196 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
1197 bin _ _ l Nil = l
1198 bin _ _ Nil r = r
1199 bin p m l r = Bin p m l r
1200 {-# INLINE bin #-}
1201
1202 {--------------------------------------------------------------------
1203 @tip@ assures that we never have empty bitmaps within a tree.
1204 --------------------------------------------------------------------}
1205 tip :: Prefix -> BitMap -> IntSet
1206 tip _ 0 = Nil
1207 tip kx bm = Tip kx bm
1208 {-# INLINE tip #-}
1209
1210
1211 {----------------------------------------------------------------------
1212 Functions that generate Prefix and BitMap of a Key or a Suffix.
1213 ----------------------------------------------------------------------}
1214
1215 suffixBitMask :: Int
1216 #if MIN_VERSION_base(4,7,0)
1217 suffixBitMask = finiteBitSize (undefined::Word) - 1
1218 #else
1219 suffixBitMask = bitSize (undefined::Word) - 1
1220 #endif
1221 {-# INLINE suffixBitMask #-}
1222
1223 prefixBitMask :: Int
1224 prefixBitMask = complement suffixBitMask
1225 {-# INLINE prefixBitMask #-}
1226
1227 prefixOf :: Int -> Prefix
1228 prefixOf x = x .&. prefixBitMask
1229 {-# INLINE prefixOf #-}
1230
1231 suffixOf :: Int -> Int
1232 suffixOf x = x .&. suffixBitMask
1233 {-# INLINE suffixOf #-}
1234
1235 bitmapOfSuffix :: Int -> BitMap
1236 bitmapOfSuffix s = 1 `shiftLL` s
1237 {-# INLINE bitmapOfSuffix #-}
1238
1239 bitmapOf :: Int -> BitMap
1240 bitmapOf x = bitmapOfSuffix (suffixOf x)
1241 {-# INLINE bitmapOf #-}
1242
1243
1244 {--------------------------------------------------------------------
1245 Endian independent bit twiddling
1246 --------------------------------------------------------------------}
1247 zero :: Int -> Mask -> Bool
1248 zero i m
1249 = (natFromInt i) .&. (natFromInt m) == 0
1250 {-# INLINE zero #-}
1251
1252 nomatch,match :: Int -> Prefix -> Mask -> Bool
1253 nomatch i p m
1254 = (mask i m) /= p
1255 {-# INLINE nomatch #-}
1256
1257 match i p m
1258 = (mask i m) == p
1259 {-# INLINE match #-}
1260
1261 -- Suppose a is largest such that 2^a divides 2*m.
1262 -- Then mask i m is i with the low a bits zeroed out.
1263 mask :: Int -> Mask -> Prefix
1264 mask i m
1265 = maskW (natFromInt i) (natFromInt m)
1266 {-# INLINE mask #-}
1267
1268 {--------------------------------------------------------------------
1269 Big endian operations
1270 --------------------------------------------------------------------}
1271 maskW :: Nat -> Nat -> Prefix
1272 maskW i m
1273 = intFromNat (i .&. (complement (m-1) `xor` m))
1274 {-# INLINE maskW #-}
1275
1276 shorter :: Mask -> Mask -> Bool
1277 shorter m1 m2
1278 = (natFromInt m1) > (natFromInt m2)
1279 {-# INLINE shorter #-}
1280
1281 branchMask :: Prefix -> Prefix -> Mask
1282 branchMask p1 p2
1283 = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
1284 {-# INLINE branchMask #-}
1285
1286 {----------------------------------------------------------------------
1287 To get best performance, we provide fast implementations of
1288 lowestBitSet, highestBitSet and fold[lr][l]Bits for GHC.
1289 If the intel bsf and bsr instructions ever become GHC primops,
1290 this code should be reimplemented using these.
1291
1292 Performance of this code is crucial for folds, toList, filter, partition.
1293
1294 The signatures of methods in question are placed after this comment.
1295 ----------------------------------------------------------------------}
1296
1297 lowestBitSet :: Nat -> Int
1298 highestBitSet :: Nat -> Int
1299 foldlBits :: Int -> (a -> Int -> a) -> a -> Nat -> a
1300 foldl'Bits :: Int -> (a -> Int -> a) -> a -> Nat -> a
1301 foldrBits :: Int -> (Int -> a -> a) -> a -> Nat -> a
1302 foldr'Bits :: Int -> (Int -> a -> a) -> a -> Nat -> a
1303
1304 {-# INLINE lowestBitSet #-}
1305 {-# INLINE highestBitSet #-}
1306 {-# INLINE foldlBits #-}
1307 {-# INLINE foldl'Bits #-}
1308 {-# INLINE foldrBits #-}
1309 {-# INLINE foldr'Bits #-}
1310
1311 #if defined(__GLASGOW_HASKELL__) && (WORD_SIZE_IN_BITS==32 || WORD_SIZE_IN_BITS==64)
1312 {----------------------------------------------------------------------
1313 For lowestBitSet we use wordsize-dependant implementation based on
1314 multiplication and DeBrujn indeces, which was proposed by Edward Kmett
1315 <http://haskell.org/pipermail/libraries/2011-September/016749.html>
1316
1317 The core of this implementation is fast indexOfTheOnlyBit,
1318 which is given a Nat with exactly one bit set, and returns
1319 its index.
1320
1321 Lot of effort was put in these implementations, please benchmark carefully
1322 before changing this code.
1323 ----------------------------------------------------------------------}
1324
1325 indexOfTheOnlyBit :: Nat -> Int
1326 {-# INLINE indexOfTheOnlyBit #-}
1327 indexOfTheOnlyBit bitmask =
1328 I# (lsbArray `indexInt8OffAddr#` unboxInt (intFromNat ((bitmask * magic) `shiftRL` offset)))
1329 where unboxInt (I# i) = i
1330 #if WORD_SIZE_IN_BITS==32
1331 magic = 0x077CB531
1332 offset = 27
1333 !lsbArray = "\0\1\28\2\29\14\24\3\30\22\20\15\25\17\4\8\31\27\13\23\21\19\16\7\26\12\18\6\11\5\10\9"#
1334 #else
1335 magic = 0x07EDD5E59A4E28C2
1336 offset = 58
1337 !lsbArray = "\63\0\58\1\59\47\53\2\60\39\48\27\54\33\42\3\61\51\37\40\49\18\28\20\55\30\34\11\43\14\22\4\62\57\46\52\38\26\32\41\50\36\17\19\29\10\13\21\56\45\25\31\35\16\9\12\44\24\15\8\23\7\6\5"#
1338 #endif
1339 -- The lsbArray gets inlined to every call site of indexOfTheOnlyBit.
1340 -- That cannot be easily avoided, as GHC forbids top-level Addr# literal.
1341 -- One could go around that by supplying getLsbArray :: () -> Addr# marked
1342 -- as NOINLINE. But the code size of calling it and processing the result
1343 -- is 48B on 32-bit and 56B on 64-bit architectures -- so the 32B and 64B array
1344 -- is actually improvement on 32-bit and only a 8B size increase on 64-bit.
1345
1346 lowestBitMask :: Nat -> Nat
1347 lowestBitMask x = x .&. negate x
1348 {-# INLINE lowestBitMask #-}
1349
1350 -- Reverse the order of bits in the Nat.
1351 revNat :: Nat -> Nat
1352 #if WORD_SIZE_IN_BITS==32
1353 revNat x1 = case ((x1 `shiftRL` 1) .&. 0x55555555) .|. ((x1 .&. 0x55555555) `shiftLL` 1) of
1354 x2 -> case ((x2 `shiftRL` 2) .&. 0x33333333) .|. ((x2 .&. 0x33333333) `shiftLL` 2) of
1355 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F) `shiftLL` 4) of
1356 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF) .|. ((x4 .&. 0x00FF00FF) `shiftLL` 8) of
1357 x5 -> ( x5 `shiftRL` 16 ) .|. ( x5 `shiftLL` 16);
1358 #else
1359 revNat x1 = case ((x1 `shiftRL` 1) .&. 0x5555555555555555) .|. ((x1 .&. 0x5555555555555555) `shiftLL` 1) of
1360 x2 -> case ((x2 `shiftRL` 2) .&. 0x3333333333333333) .|. ((x2 .&. 0x3333333333333333) `shiftLL` 2) of
1361 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F0F0F0F0F) `shiftLL` 4) of
1362 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF00FF00FF) .|. ((x4 .&. 0x00FF00FF00FF00FF) `shiftLL` 8) of
1363 x5 -> case ((x5 `shiftRL` 16) .&. 0x0000FFFF0000FFFF) .|. ((x5 .&. 0x0000FFFF0000FFFF) `shiftLL` 16) of
1364 x6 -> ( x6 `shiftRL` 32 ) .|. ( x6 `shiftLL` 32);
1365 #endif
1366
1367 lowestBitSet x = indexOfTheOnlyBit (lowestBitMask x)
1368
1369 highestBitSet x = indexOfTheOnlyBit (highestBitMask x)
1370
1371 foldlBits prefix f z bitmap = go bitmap z
1372 where go 0 acc = acc
1373 go bm acc = go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1374 where
1375 !bitmask = lowestBitMask bm
1376 !bi = indexOfTheOnlyBit bitmask
1377
1378 foldl'Bits prefix f z bitmap = go bitmap z
1379 where go 0 acc = acc
1380 go bm !acc = go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1381 where !bitmask = lowestBitMask bm
1382 !bi = indexOfTheOnlyBit bitmask
1383
1384 foldrBits prefix f z bitmap = go (revNat bitmap) z
1385 where go 0 acc = acc
1386 go bm acc = go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1387 where !bitmask = lowestBitMask bm
1388 !bi = indexOfTheOnlyBit bitmask
1389
1390
1391 foldr'Bits prefix f z bitmap = go (revNat bitmap) z
1392 where go 0 acc = acc
1393 go bm !acc = go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1394 where !bitmask = lowestBitMask bm
1395 !bi = indexOfTheOnlyBit bitmask
1396
1397 #else
1398 {----------------------------------------------------------------------
1399 In general case we use logarithmic implementation of
1400 lowestBitSet and highestBitSet, which works up to bit sizes of 64.
1401
1402 Folds are linear scans.
1403 ----------------------------------------------------------------------}
1404
1405 lowestBitSet n0 =
1406 let (n1,b1) = if n0 .&. 0xFFFFFFFF /= 0 then (n0,0) else (n0 `shiftRL` 32, 32)
1407 (n2,b2) = if n1 .&. 0xFFFF /= 0 then (n1,b1) else (n1 `shiftRL` 16, 16+b1)
1408 (n3,b3) = if n2 .&. 0xFF /= 0 then (n2,b2) else (n2 `shiftRL` 8, 8+b2)
1409 (n4,b4) = if n3 .&. 0xF /= 0 then (n3,b3) else (n3 `shiftRL` 4, 4+b3)
1410 (n5,b5) = if n4 .&. 0x3 /= 0 then (n4,b4) else (n4 `shiftRL` 2, 2+b4)
1411 b6 = if n5 .&. 0x1 /= 0 then b5 else 1+b5
1412 in b6
1413
1414 highestBitSet n0 =
1415 let (n1,b1) = if n0 .&. 0xFFFFFFFF00000000 /= 0 then (n0 `shiftRL` 32, 32) else (n0,0)
1416 (n2,b2) = if n1 .&. 0xFFFF0000 /= 0 then (n1 `shiftRL` 16, 16+b1) else (n1,b1)
1417 (n3,b3) = if n2 .&. 0xFF00 /= 0 then (n2 `shiftRL` 8, 8+b2) else (n2,b2)
1418 (n4,b4) = if n3 .&. 0xF0 /= 0 then (n3 `shiftRL` 4, 4+b3) else (n3,b3)
1419 (n5,b5) = if n4 .&. 0xC /= 0 then (n4 `shiftRL` 2, 2+b4) else (n4,b4)
1420 b6 = if n5 .&. 0x2 /= 0 then 1+b5 else b5
1421 in b6
1422
1423 foldlBits prefix f z bm = let lb = lowestBitSet bm
1424 in go (prefix+lb) z (bm `shiftRL` lb)
1425 where go !_ acc 0 = acc
1426 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1)
1427 | otherwise = go (bi + 1) acc (n `shiftRL` 1)
1428
1429 foldl'Bits prefix f z bm = let lb = lowestBitSet bm
1430 in go (prefix+lb) z (bm `shiftRL` lb)
1431 where go !_ !acc 0 = acc
1432 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1)
1433 | otherwise = go (bi + 1) acc (n `shiftRL` 1)
1434
1435 foldrBits prefix f z bm = let lb = lowestBitSet bm
1436 in go (prefix+lb) (bm `shiftRL` lb)
1437 where go !_ 0 = z
1438 go bi n | n `testBit` 0 = f bi (go (bi + 1) (n `shiftRL` 1))
1439 | otherwise = go (bi + 1) (n `shiftRL` 1)
1440
1441 foldr'Bits prefix f z bm = let lb = lowestBitSet bm
1442 in go (prefix+lb) (bm `shiftRL` lb)
1443 where
1444 go !_ 0 = z
1445 go bi n | n `testBit` 0 = f bi $! go (bi + 1) (n `shiftRL` 1)
1446 | otherwise = go (bi + 1) (n `shiftRL` 1)
1447
1448 #endif
1449
1450 {----------------------------------------------------------------------
1451 [bitcount] as posted by David F. Place to haskell-cafe on April 11, 2006,
1452 based on the code on
1453 http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetKernighan,
1454 where the following source is given:
1455 Published in 1988, the C Programming Language 2nd Ed. (by Brian W.
1456 Kernighan and Dennis M. Ritchie) mentions this in exercise 2-9. On April
1457 19, 2006 Don Knuth pointed out to me that this method "was first published
1458 by Peter Wegner in CACM 3 (1960), 322. (Also discovered independently by
1459 Derrick Lehmer and published in 1964 in a book edited by Beckenbach.)"
1460 ----------------------------------------------------------------------}
1461
1462 bitcount :: Int -> Word -> Int
1463 #if MIN_VERSION_base(4,5,0)
1464 bitcount a x = a + popCount x
1465 #else
1466 bitcount a0 x0 = go a0 x0
1467 where go a 0 = a
1468 go a x = go (a + 1) (x .&. (x-1))
1469 #endif
1470 {-# INLINE bitcount #-}
1471
1472
1473 {--------------------------------------------------------------------
1474 Utilities
1475 --------------------------------------------------------------------}
1476
1477 -- | /O(1)/. Decompose a set into pieces based on the structure of the underlying
1478 -- tree. This function is useful for consuming a set in parallel.
1479 --
1480 -- No guarantee is made as to the sizes of the pieces; an internal, but
1481 -- deterministic process determines this. However, it is guaranteed that the
1482 -- pieces returned will be in ascending order (all elements in the first submap
1483 -- less than all elements in the second, and so on).
1484 --
1485 -- Examples:
1486 --
1487 -- > splitRoot (fromList [1..120]) == [fromList [1..63],fromList [64..120]]
1488 -- > splitRoot empty == []
1489 --
1490 -- Note that the current implementation does not return more than two subsets,
1491 -- but you should not depend on this behaviour because it can change in the
1492 -- future without notice. Also, the current version does not continue
1493 -- splitting all the way to individual singleton sets -- it stops at some
1494 -- point.
1495 splitRoot :: IntSet -> [IntSet]
1496 splitRoot Nil = []
1497 -- NOTE: we don't currently split below Tip, but we could.
1498 splitRoot x@(Tip _ _) = [x]
1499 splitRoot (Bin _ m l r) | m < 0 = [r, l]
1500 | otherwise = [l, r]
1501 {-# INLINE splitRoot #-}