0063c3fc971ecff63c21e7ee6e2e97341a0d44e1
[packages/containers.git] / Data / IntSet / Base.hs
1 {-# LANGUAGE CPP #-}
2 #if __GLASGOW_HASKELL__
3 {-# LANGUAGE MagicHash, BangPatterns, DeriveDataTypeable, StandaloneDeriving #-}
4 #endif
5 #if !defined(TESTING) && __GLASGOW_HASKELL__ >= 703
6 {-# LANGUAGE Trustworthy #-}
7 #endif
8 #if __GLASGOW_HASKELL__ >= 708
9 {-# LANGUAGE TypeFamilies #-}
10 #endif
11 -----------------------------------------------------------------------------
12 -- |
13 -- Module : Data.IntSet.Base
14 -- Copyright : (c) Daan Leijen 2002
15 -- (c) Joachim Breitner 2011
16 -- License : BSD-style
17 -- Maintainer : libraries@haskell.org
18 -- Stability : provisional
19 -- Portability : portable
20 --
21 -- An efficient implementation of integer sets.
22 --
23 -- These modules are intended to be imported qualified, to avoid name
24 -- clashes with Prelude functions, e.g.
25 --
26 -- > import Data.IntSet (IntSet)
27 -- > import qualified Data.IntSet as IntSet
28 --
29 -- The implementation is based on /big-endian patricia trees/. This data
30 -- structure performs especially well on binary operations like 'union'
31 -- and 'intersection'. However, my benchmarks show that it is also
32 -- (much) faster on insertions and deletions when compared to a generic
33 -- size-balanced set implementation (see "Data.Set").
34 --
35 -- * Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\",
36 -- Workshop on ML, September 1998, pages 77-86,
37 -- <http://citeseer.ist.psu.edu/okasaki98fast.html>
38 --
39 -- * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
40 -- Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
41 -- October 1968, pages 514-534.
42 --
43 -- Additionally, this implementation places bitmaps in the leaves of the tree.
44 -- Their size is the natural size of a machine word (32 or 64 bits) and greatly
45 -- reduce memory footprint and execution times for dense sets, e.g. sets where
46 -- it is likely that many values lie close to each other. The asymptotics are
47 -- not affected by this optimization.
48 --
49 -- Many operations have a worst-case complexity of /O(min(n,W))/.
50 -- This means that the operation can become linear in the number of
51 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
52 -- (32 or 64).
53 -----------------------------------------------------------------------------
54
55 -- [Note: INLINE bit fiddling]
56 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
57 -- It is essential that the bit fiddling functions like mask, zero, branchMask
58 -- etc are inlined. If they do not, the memory allocation skyrockets. The GHC
59 -- usually gets it right, but it is disastrous if it does not. Therefore we
60 -- explicitly mark these functions INLINE.
61
62
63 -- [Note: Local 'go' functions and capturing]
64 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
65 -- Care must be taken when using 'go' function which captures an argument.
66 -- Sometimes (for example when the argument is passed to a data constructor,
67 -- as in insert), GHC heap-allocates more than necessary. Therefore C-- code
68 -- must be checked for increased allocation when creating and modifying such
69 -- functions.
70
71
72 -- [Note: Order of constructors]
73 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
74 -- The order of constructors of IntSet matters when considering performance.
75 -- Currently in GHC 7.0, when type has 3 constructors, they are matched from
76 -- the first to the last -- the best performance is achieved when the
77 -- constructors are ordered by frequency.
78 -- On GHC 7.0, reordering constructors from Nil | Tip | Bin to Bin | Tip | Nil
79 -- improves the benchmark by circa 10%.
80
81 module Data.IntSet.Base (
82 -- * Set type
83 IntSet(..), Key -- instance Eq,Show
84
85 -- * Operators
86 , (\\)
87
88 -- * Query
89 , null
90 , size
91 , member
92 , notMember
93 , lookupLT
94 , lookupGT
95 , lookupLE
96 , lookupGE
97 , isSubsetOf
98 , isProperSubsetOf
99
100 -- * Construction
101 , empty
102 , singleton
103 , insert
104 , delete
105
106 -- * Combine
107 , union
108 , unions
109 , difference
110 , intersection
111
112 -- * Filter
113 , filter
114 , partition
115 , split
116 , splitMember
117 , splitRoot
118
119 -- * Map
120 , map
121
122 -- * Folds
123 , foldr
124 , foldl
125 -- ** Strict folds
126 , foldr'
127 , foldl'
128 -- ** Legacy folds
129 , fold
130
131 -- * Min\/Max
132 , findMin
133 , findMax
134 , deleteMin
135 , deleteMax
136 , deleteFindMin
137 , deleteFindMax
138 , maxView
139 , minView
140
141 -- * Conversion
142
143 -- ** List
144 , elems
145 , toList
146 , fromList
147
148 -- ** Ordered list
149 , toAscList
150 , toDescList
151 , fromAscList
152 , fromDistinctAscList
153
154 -- * Debugging
155 , showTree
156 , showTreeWith
157
158 -- * Internals
159 , match
160 , suffixBitMask
161 , prefixBitMask
162 , bitmapOf
163 ) where
164
165 -- We want to be able to compile without cabal. Nevertheless
166 -- #if defined(MIN_VERSION_base) && MIN_VERSION_base(4,5,0)
167 -- does not work, because if MIN_VERSION_base is undefined,
168 -- the last condition is syntactically wrong.
169 #define MIN_VERSION_base_4_5_0 0
170 #ifdef MIN_VERSION_base
171 #if MIN_VERSION_base(4,5,0)
172 #undef MIN_VERSION_base_4_5_0
173 #define MIN_VERSION_base_4_5_0 1
174 #endif
175 #endif
176
177 #define MIN_VERSION_base_4_7_0 0
178 #ifdef MIN_VERSION_base
179 #if MIN_VERSION_base(4,7,0)
180 #undef MIN_VERSION_base_4_7_0
181 #define MIN_VERSION_base_4_7_0 1
182 #endif
183 #endif
184
185 import Control.DeepSeq (NFData)
186 import Data.Bits
187 import qualified Data.List as List
188 import Data.Maybe (fromMaybe)
189 import Data.Monoid (Monoid(..))
190 import Data.Typeable
191 import Data.Word (Word)
192 import Prelude hiding (filter, foldr, foldl, null, map)
193
194 import Data.BitUtil
195 import Data.StrictPair
196
197 #if __GLASGOW_HASKELL__
198 import Data.Data (Data(..), Constr, mkConstr, constrIndex, Fixity(Prefix), DataType, mkDataType)
199 import Text.Read
200 #endif
201
202 #if __GLASGOW_HASKELL__
203 import GHC.Exts (Int(..), build)
204 #if __GLASGOW_HASKELL__ >= 708
205 import qualified GHC.Exts as GHCExts
206 #endif
207 import GHC.Prim (indexInt8OffAddr#)
208 #endif
209
210 -- On GHC, include MachDeps.h to get WORD_SIZE_IN_BITS macro.
211 #if defined(__GLASGOW_HASKELL__)
212 # include "MachDeps.h"
213 #endif
214
215 -- Use macros to define strictness of functions.
216 -- STRICT_x_OF_y denotes an y-ary function strict in the x-th parameter.
217 -- We do not use BangPatterns, because they are not in any standard and we
218 -- want the compilers to be compiled by as many compilers as possible.
219 #define STRICT_1_OF_2(fn) fn arg _ | arg `seq` False = undefined
220 #define STRICT_2_OF_2(fn) fn _ arg | arg `seq` False = undefined
221 #define STRICT_1_OF_3(fn) fn arg _ _ | arg `seq` False = undefined
222 #define STRICT_2_OF_3(fn) fn _ arg _ | arg `seq` False = undefined
223
224 infixl 9 \\{-This comment teaches CPP correct behaviour -}
225
226 -- A "Nat" is a natural machine word (an unsigned Int)
227 type Nat = Word
228
229 natFromInt :: Int -> Nat
230 natFromInt i = fromIntegral i
231 {-# INLINE natFromInt #-}
232
233 intFromNat :: Nat -> Int
234 intFromNat w = fromIntegral w
235 {-# INLINE intFromNat #-}
236
237 {--------------------------------------------------------------------
238 Operators
239 --------------------------------------------------------------------}
240 -- | /O(n+m)/. See 'difference'.
241 (\\) :: IntSet -> IntSet -> IntSet
242 m1 \\ m2 = difference m1 m2
243
244 {--------------------------------------------------------------------
245 Types
246 --------------------------------------------------------------------}
247
248 -- | A set of integers.
249
250 -- See Note: Order of constructors
251 data IntSet = Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
252 -- Invariant: Nil is never found as a child of Bin.
253 -- Invariant: The Mask is a power of 2. It is the largest bit position at which
254 -- two elements of the set differ.
255 -- Invariant: Prefix is the common high-order bits that all elements share to
256 -- the left of the Mask bit.
257 -- Invariant: In Bin prefix mask left right, left consists of the elements that
258 -- don't have the mask bit set; right is all the elements that do.
259 | Tip {-# UNPACK #-} !Prefix {-# UNPACK #-} !BitMap
260 -- Invariant: The Prefix is zero for all but the last 5 (on 32 bit arches) or 6
261 -- bits (on 64 bit arches). The values of the map represented by a tip
262 -- are the prefix plus the indices of the set bits in the bit map.
263 | Nil
264
265 -- A number stored in a set is stored as
266 -- * Prefix (all but last 5-6 bits) and
267 -- * BitMap (last 5-6 bits stored as a bitmask)
268 -- Last 5-6 bits are called a Suffix.
269
270 type Prefix = Int
271 type Mask = Int
272 type BitMap = Word
273 type Key = Int
274
275 instance Monoid IntSet where
276 mempty = empty
277 mappend = union
278 mconcat = unions
279
280 #if __GLASGOW_HASKELL__
281
282 {--------------------------------------------------------------------
283 A Data instance
284 --------------------------------------------------------------------}
285
286 -- This instance preserves data abstraction at the cost of inefficiency.
287 -- We provide limited reflection services for the sake of data abstraction.
288
289 instance Data IntSet where
290 gfoldl f z is = z fromList `f` (toList is)
291 toConstr _ = fromListConstr
292 gunfold k z c = case constrIndex c of
293 1 -> k (z fromList)
294 _ -> error "gunfold"
295 dataTypeOf _ = intSetDataType
296
297 fromListConstr :: Constr
298 fromListConstr = mkConstr intSetDataType "fromList" [] Prefix
299
300 intSetDataType :: DataType
301 intSetDataType = mkDataType "Data.IntSet.Base.IntSet" [fromListConstr]
302
303 #endif
304
305 {--------------------------------------------------------------------
306 Query
307 --------------------------------------------------------------------}
308 -- | /O(1)/. Is the set empty?
309 null :: IntSet -> Bool
310 null Nil = True
311 null _ = False
312 {-# INLINE null #-}
313
314 -- | /O(n)/. Cardinality of the set.
315 size :: IntSet -> Int
316 size t
317 = case t of
318 Bin _ _ l r -> size l + size r
319 Tip _ bm -> bitcount 0 bm
320 Nil -> 0
321
322 -- | /O(min(n,W))/. Is the value a member of the set?
323
324 -- See Note: Local 'go' functions and capturing]
325 member :: Key -> IntSet -> Bool
326 member x = x `seq` go
327 where
328 go (Bin p m l r)
329 | nomatch x p m = False
330 | zero x m = go l
331 | otherwise = go r
332 go (Tip y bm) = prefixOf x == y && bitmapOf x .&. bm /= 0
333 go Nil = False
334
335 -- | /O(min(n,W))/. Is the element not in the set?
336 notMember :: Key -> IntSet -> Bool
337 notMember k = not . member k
338
339 -- | /O(log n)/. Find largest element smaller than the given one.
340 --
341 -- > lookupLT 3 (fromList [3, 5]) == Nothing
342 -- > lookupLT 5 (fromList [3, 5]) == Just 3
343
344 -- See Note: Local 'go' functions and capturing.
345 lookupLT :: Key -> IntSet -> Maybe Key
346 lookupLT x t = x `seq` case t of
347 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r
348 _ -> go Nil t
349 where
350 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r
351 | zero x m = go def l
352 | otherwise = go l r
353 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm
354 | prefixOf x == kx && maskLT /= 0 = Just $ kx + highestBitSet maskLT
355 | otherwise = unsafeFindMax def
356 where maskLT = (bitmapOf x - 1) .&. bm
357 go def Nil = unsafeFindMax def
358
359
360 -- | /O(log n)/. Find smallest element greater than the given one.
361 --
362 -- > lookupGT 4 (fromList [3, 5]) == Just 5
363 -- > lookupGT 5 (fromList [3, 5]) == Nothing
364
365 -- See Note: Local 'go' functions and capturing.
366 lookupGT :: Key -> IntSet -> Maybe Key
367 lookupGT x t = x `seq` case t of
368 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r
369 _ -> go Nil t
370 where
371 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def
372 | zero x m = go r l
373 | otherwise = go def r
374 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm
375 | prefixOf x == kx && maskGT /= 0 = Just $ kx + lowestBitSet maskGT
376 | otherwise = unsafeFindMin def
377 where maskGT = (- ((bitmapOf x) `shiftLL` 1)) .&. bm
378 go def Nil = unsafeFindMin def
379
380
381 -- | /O(log n)/. Find largest element smaller or equal to the given one.
382 --
383 -- > lookupLE 2 (fromList [3, 5]) == Nothing
384 -- > lookupLE 4 (fromList [3, 5]) == Just 3
385 -- > lookupLE 5 (fromList [3, 5]) == Just 5
386
387 -- See Note: Local 'go' functions and capturing.
388 lookupLE :: Key -> IntSet -> Maybe Key
389 lookupLE x t = x `seq` case t of
390 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r
391 _ -> go Nil t
392 where
393 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r
394 | zero x m = go def l
395 | otherwise = go l r
396 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm
397 | prefixOf x == kx && maskLE /= 0 = Just $ kx + highestBitSet maskLE
398 | otherwise = unsafeFindMax def
399 where maskLE = (((bitmapOf x) `shiftLL` 1) - 1) .&. bm
400 go def Nil = unsafeFindMax def
401
402
403 -- | /O(log n)/. Find smallest element greater or equal to the given one.
404 --
405 -- > lookupGE 3 (fromList [3, 5]) == Just 3
406 -- > lookupGE 4 (fromList [3, 5]) == Just 5
407 -- > lookupGE 6 (fromList [3, 5]) == Nothing
408
409 -- See Note: Local 'go' functions and capturing.
410 lookupGE :: Key -> IntSet -> Maybe Key
411 lookupGE x t = x `seq` case t of
412 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r
413 _ -> go Nil t
414 where
415 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def
416 | zero x m = go r l
417 | otherwise = go def r
418 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm
419 | prefixOf x == kx && maskGE /= 0 = Just $ kx + lowestBitSet maskGE
420 | otherwise = unsafeFindMin def
421 where maskGE = (- (bitmapOf x)) .&. bm
422 go def Nil = unsafeFindMin def
423
424
425
426 -- Helper function for lookupGE and lookupGT. It assumes that if a Bin node is
427 -- given, it has m > 0.
428 unsafeFindMin :: IntSet -> Maybe Key
429 unsafeFindMin Nil = Nothing
430 unsafeFindMin (Tip kx bm) = Just $ kx + lowestBitSet bm
431 unsafeFindMin (Bin _ _ l _) = unsafeFindMin l
432
433 -- Helper function for lookupLE and lookupLT. It assumes that if a Bin node is
434 -- given, it has m > 0.
435 unsafeFindMax :: IntSet -> Maybe Key
436 unsafeFindMax Nil = Nothing
437 unsafeFindMax (Tip kx bm) = Just $ kx + highestBitSet bm
438 unsafeFindMax (Bin _ _ _ r) = unsafeFindMax r
439
440 {--------------------------------------------------------------------
441 Construction
442 --------------------------------------------------------------------}
443 -- | /O(1)/. The empty set.
444 empty :: IntSet
445 empty
446 = Nil
447 {-# INLINE empty #-}
448
449 -- | /O(1)/. A set of one element.
450 singleton :: Key -> IntSet
451 singleton x
452 = Tip (prefixOf x) (bitmapOf x)
453 {-# INLINE singleton #-}
454
455 {--------------------------------------------------------------------
456 Insert
457 --------------------------------------------------------------------}
458 -- | /O(min(n,W))/. Add a value to the set. There is no left- or right bias for
459 -- IntSets.
460 insert :: Key -> IntSet -> IntSet
461 insert x = x `seq` insertBM (prefixOf x) (bitmapOf x)
462
463 -- Helper function for insert and union.
464 insertBM :: Prefix -> BitMap -> IntSet -> IntSet
465 insertBM kx bm t = kx `seq` bm `seq`
466 case t of
467 Bin p m l r
468 | nomatch kx p m -> link kx (Tip kx bm) p t
469 | zero kx m -> Bin p m (insertBM kx bm l) r
470 | otherwise -> Bin p m l (insertBM kx bm r)
471 Tip kx' bm'
472 | kx' == kx -> Tip kx' (bm .|. bm')
473 | otherwise -> link kx (Tip kx bm) kx' t
474 Nil -> Tip kx bm
475
476 -- | /O(min(n,W))/. Delete a value in the set. Returns the
477 -- original set when the value was not present.
478 delete :: Key -> IntSet -> IntSet
479 delete x = x `seq` deleteBM (prefixOf x) (bitmapOf x)
480
481 -- Deletes all values mentioned in the BitMap from the set.
482 -- Helper function for delete and difference.
483 deleteBM :: Prefix -> BitMap -> IntSet -> IntSet
484 deleteBM kx bm t = kx `seq` bm `seq`
485 case t of
486 Bin p m l r
487 | nomatch kx p m -> t
488 | zero kx m -> bin p m (deleteBM kx bm l) r
489 | otherwise -> bin p m l (deleteBM kx bm r)
490 Tip kx' bm'
491 | kx' == kx -> tip kx (bm' .&. complement bm)
492 | otherwise -> t
493 Nil -> Nil
494
495
496 {--------------------------------------------------------------------
497 Union
498 --------------------------------------------------------------------}
499 -- | The union of a list of sets.
500 unions :: [IntSet] -> IntSet
501 unions xs
502 = foldlStrict union empty xs
503
504
505 -- | /O(n+m)/. The union of two sets.
506 union :: IntSet -> IntSet -> IntSet
507 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
508 | shorter m1 m2 = union1
509 | shorter m2 m1 = union2
510 | p1 == p2 = Bin p1 m1 (union l1 l2) (union r1 r2)
511 | otherwise = link p1 t1 p2 t2
512 where
513 union1 | nomatch p2 p1 m1 = link p1 t1 p2 t2
514 | zero p2 m1 = Bin p1 m1 (union l1 t2) r1
515 | otherwise = Bin p1 m1 l1 (union r1 t2)
516
517 union2 | nomatch p1 p2 m2 = link p1 t1 p2 t2
518 | zero p1 m2 = Bin p2 m2 (union t1 l2) r2
519 | otherwise = Bin p2 m2 l2 (union t1 r2)
520
521 union t@(Bin _ _ _ _) (Tip kx bm) = insertBM kx bm t
522 union t@(Bin _ _ _ _) Nil = t
523 union (Tip kx bm) t = insertBM kx bm t
524 union Nil t = t
525
526
527 {--------------------------------------------------------------------
528 Difference
529 --------------------------------------------------------------------}
530 -- | /O(n+m)/. Difference between two sets.
531 difference :: IntSet -> IntSet -> IntSet
532 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
533 | shorter m1 m2 = difference1
534 | shorter m2 m1 = difference2
535 | p1 == p2 = bin p1 m1 (difference l1 l2) (difference r1 r2)
536 | otherwise = t1
537 where
538 difference1 | nomatch p2 p1 m1 = t1
539 | zero p2 m1 = bin p1 m1 (difference l1 t2) r1
540 | otherwise = bin p1 m1 l1 (difference r1 t2)
541
542 difference2 | nomatch p1 p2 m2 = t1
543 | zero p1 m2 = difference t1 l2
544 | otherwise = difference t1 r2
545
546 difference t@(Bin _ _ _ _) (Tip kx bm) = deleteBM kx bm t
547 difference t@(Bin _ _ _ _) Nil = t
548
549 difference t1@(Tip kx bm) t2 = differenceTip t2
550 where differenceTip (Bin p2 m2 l2 r2) | nomatch kx p2 m2 = t1
551 | zero kx m2 = differenceTip l2
552 | otherwise = differenceTip r2
553 differenceTip (Tip kx2 bm2) | kx == kx2 = tip kx (bm .&. complement bm2)
554 | otherwise = t1
555 differenceTip Nil = t1
556
557 difference Nil _ = Nil
558
559
560
561 {--------------------------------------------------------------------
562 Intersection
563 --------------------------------------------------------------------}
564 -- | /O(n+m)/. The intersection of two sets.
565 intersection :: IntSet -> IntSet -> IntSet
566 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
567 | shorter m1 m2 = intersection1
568 | shorter m2 m1 = intersection2
569 | p1 == p2 = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
570 | otherwise = Nil
571 where
572 intersection1 | nomatch p2 p1 m1 = Nil
573 | zero p2 m1 = intersection l1 t2
574 | otherwise = intersection r1 t2
575
576 intersection2 | nomatch p1 p2 m2 = Nil
577 | zero p1 m2 = intersection t1 l2
578 | otherwise = intersection t1 r2
579
580 intersection t1@(Bin _ _ _ _) (Tip kx2 bm2) = intersectBM t1
581 where intersectBM (Bin p1 m1 l1 r1) | nomatch kx2 p1 m1 = Nil
582 | zero kx2 m1 = intersectBM l1
583 | otherwise = intersectBM r1
584 intersectBM (Tip kx1 bm1) | kx1 == kx2 = tip kx1 (bm1 .&. bm2)
585 | otherwise = Nil
586 intersectBM Nil = Nil
587
588 intersection (Bin _ _ _ _) Nil = Nil
589
590 intersection (Tip kx1 bm1) t2 = intersectBM t2
591 where intersectBM (Bin p2 m2 l2 r2) | nomatch kx1 p2 m2 = Nil
592 | zero kx1 m2 = intersectBM l2
593 | otherwise = intersectBM r2
594 intersectBM (Tip kx2 bm2) | kx1 == kx2 = tip kx1 (bm1 .&. bm2)
595 | otherwise = Nil
596 intersectBM Nil = Nil
597
598 intersection Nil _ = Nil
599
600 {--------------------------------------------------------------------
601 Subset
602 --------------------------------------------------------------------}
603 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
604 isProperSubsetOf :: IntSet -> IntSet -> Bool
605 isProperSubsetOf t1 t2
606 = case subsetCmp t1 t2 of
607 LT -> True
608 _ -> False
609
610 subsetCmp :: IntSet -> IntSet -> Ordering
611 subsetCmp t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
612 | shorter m1 m2 = GT
613 | shorter m2 m1 = case subsetCmpLt of
614 GT -> GT
615 _ -> LT
616 | p1 == p2 = subsetCmpEq
617 | otherwise = GT -- disjoint
618 where
619 subsetCmpLt | nomatch p1 p2 m2 = GT
620 | zero p1 m2 = subsetCmp t1 l2
621 | otherwise = subsetCmp t1 r2
622 subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
623 (GT,_ ) -> GT
624 (_ ,GT) -> GT
625 (EQ,EQ) -> EQ
626 _ -> LT
627
628 subsetCmp (Bin _ _ _ _) _ = GT
629 subsetCmp (Tip kx1 bm1) (Tip kx2 bm2)
630 | kx1 /= kx2 = GT -- disjoint
631 | bm1 == bm2 = EQ
632 | bm1 .&. complement bm2 == 0 = LT
633 | otherwise = GT
634 subsetCmp t1@(Tip kx _) (Bin p m l r)
635 | nomatch kx p m = GT
636 | zero kx m = case subsetCmp t1 l of GT -> GT ; _ -> LT
637 | otherwise = case subsetCmp t1 r of GT -> GT ; _ -> LT
638 subsetCmp (Tip _ _) Nil = GT -- disjoint
639 subsetCmp Nil Nil = EQ
640 subsetCmp Nil _ = LT
641
642 -- | /O(n+m)/. Is this a subset?
643 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
644
645 isSubsetOf :: IntSet -> IntSet -> Bool
646 isSubsetOf t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
647 | shorter m1 m2 = False
648 | shorter m2 m1 = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
649 else isSubsetOf t1 r2)
650 | otherwise = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
651 isSubsetOf (Bin _ _ _ _) _ = False
652 isSubsetOf (Tip kx1 bm1) (Tip kx2 bm2) = kx1 == kx2 && bm1 .&. complement bm2 == 0
653 isSubsetOf t1@(Tip kx _) (Bin p m l r)
654 | nomatch kx p m = False
655 | zero kx m = isSubsetOf t1 l
656 | otherwise = isSubsetOf t1 r
657 isSubsetOf (Tip _ _) Nil = False
658 isSubsetOf Nil _ = True
659
660
661 {--------------------------------------------------------------------
662 Filter
663 --------------------------------------------------------------------}
664 -- | /O(n)/. Filter all elements that satisfy some predicate.
665 filter :: (Key -> Bool) -> IntSet -> IntSet
666 filter predicate t
667 = case t of
668 Bin p m l r
669 -> bin p m (filter predicate l) (filter predicate r)
670 Tip kx bm
671 -> tip kx (foldl'Bits 0 (bitPred kx) 0 bm)
672 Nil -> Nil
673 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi
674 | otherwise = bm
675 {-# INLINE bitPred #-}
676
677 -- | /O(n)/. partition the set according to some predicate.
678 partition :: (Key -> Bool) -> IntSet -> (IntSet,IntSet)
679 partition predicate0 t0 = toPair $ go predicate0 t0
680 where
681 go predicate t
682 = case t of
683 Bin p m l r
684 -> let (l1 :*: l2) = go predicate l
685 (r1 :*: r2) = go predicate r
686 in bin p m l1 r1 :*: bin p m l2 r2
687 Tip kx bm
688 -> let bm1 = foldl'Bits 0 (bitPred kx) 0 bm
689 in tip kx bm1 :*: tip kx (bm `xor` bm1)
690 Nil -> (Nil :*: Nil)
691 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi
692 | otherwise = bm
693 {-# INLINE bitPred #-}
694
695
696 -- | /O(min(n,W))/. The expression (@'split' x set@) is a pair @(set1,set2)@
697 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
698 -- comprises the elements of @set@ greater than @x@.
699 --
700 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [4,5])
701 split :: Key -> IntSet -> (IntSet,IntSet)
702 split x t =
703 case t of
704 Bin _ m l r
705 | m < 0 -> if x >= 0 -- handle negative numbers.
706 then case go x l of (lt :*: gt) -> let lt' = union lt r
707 in lt' `seq` (lt', gt)
708 else case go x r of (lt :*: gt) -> let gt' = union gt l
709 in gt' `seq` (lt, gt')
710 _ -> case go x t of
711 (lt :*: gt) -> (lt, gt)
712 where
713 go !x' t'@(Bin p m l r)
714 | match x' p m = if zero x' m
715 then case go x' l of
716 (lt :*: gt) -> lt :*: union gt r
717 else case go x' r of
718 (lt :*: gt) -> union lt l :*: gt
719 | otherwise = if x' < p then (Nil :*: t')
720 else (t' :*: Nil)
721 go x' t'@(Tip kx' bm)
722 | kx' > x' = (Nil :*: t')
723 -- equivalent to kx' > prefixOf x'
724 | kx' < prefixOf x' = (t' :*: Nil)
725 | otherwise = tip kx' (bm .&. lowerBitmap) :*: tip kx' (bm .&. higherBitmap)
726 where lowerBitmap = bitmapOf x' - 1
727 higherBitmap = complement (lowerBitmap + bitmapOf x')
728 go _ Nil = (Nil :*: Nil)
729
730 -- | /O(min(n,W))/. Performs a 'split' but also returns whether the pivot
731 -- element was found in the original set.
732 splitMember :: Key -> IntSet -> (IntSet,Bool,IntSet)
733 splitMember x t =
734 case t of
735 Bin _ m l r | m < 0 -> if x >= 0
736 then case go x l of
737 (lt, fnd, gt) -> let lt' = union lt r
738 in lt' `seq` (lt', fnd, gt)
739 else case go x r of
740 (lt, fnd, gt) -> let gt' = union gt l
741 in gt' `seq` (lt, fnd, gt')
742 _ -> go x t
743 where
744 go x' t'@(Bin p m l r)
745 | match x' p m = if zero x' m
746 then case go x' l of
747 (lt, fnd, gt) -> (lt, fnd, union gt r)
748 else case go x' r of
749 (lt, fnd, gt) -> (union lt l, fnd, gt)
750 | otherwise = if x' < p then (Nil, False, t') else (t', False, Nil)
751 go x' t'@(Tip kx' bm)
752 | kx' > x' = (Nil, False, t')
753 -- equivalent to kx' > prefixOf x'
754 | kx' < prefixOf x' = (t', False, Nil)
755 | otherwise = let lt = tip kx' (bm .&. lowerBitmap)
756 found = (bm .&. bitmapOfx') /= 0
757 gt = tip kx' (bm .&. higherBitmap)
758 in lt `seq` found `seq` gt `seq` (lt, found, gt)
759 where bitmapOfx' = bitmapOf x'
760 lowerBitmap = bitmapOfx' - 1
761 higherBitmap = complement (lowerBitmap + bitmapOfx')
762 go _ Nil = (Nil, False, Nil)
763
764
765 {----------------------------------------------------------------------
766 Min/Max
767 ----------------------------------------------------------------------}
768
769 -- | /O(min(n,W))/. Retrieves the maximal key of the set, and the set
770 -- stripped of that element, or 'Nothing' if passed an empty set.
771 maxView :: IntSet -> Maybe (Key, IntSet)
772 maxView t =
773 case t of Nil -> Nothing
774 Bin p m l r | m < 0 -> case go l of (result, l') -> Just (result, bin p m l' r)
775 _ -> Just (go t)
776 where
777 go (Bin p m l r) = case go r of (result, r') -> (result, bin p m l r')
778 go (Tip kx bm) = case highestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi)))
779 go Nil = error "maxView Nil"
780
781 -- | /O(min(n,W))/. Retrieves the minimal key of the set, and the set
782 -- stripped of that element, or 'Nothing' if passed an empty set.
783 minView :: IntSet -> Maybe (Key, IntSet)
784 minView t =
785 case t of Nil -> Nothing
786 Bin p m l r | m < 0 -> case go r of (result, r') -> Just (result, bin p m l r')
787 _ -> Just (go t)
788 where
789 go (Bin p m l r) = case go l of (result, l') -> (result, bin p m l' r)
790 go (Tip kx bm) = case lowestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi)))
791 go Nil = error "minView Nil"
792
793 -- | /O(min(n,W))/. Delete and find the minimal element.
794 --
795 -- > deleteFindMin set = (findMin set, deleteMin set)
796 deleteFindMin :: IntSet -> (Key, IntSet)
797 deleteFindMin = fromMaybe (error "deleteFindMin: empty set has no minimal element") . minView
798
799 -- | /O(min(n,W))/. Delete and find the maximal element.
800 --
801 -- > deleteFindMax set = (findMax set, deleteMax set)
802 deleteFindMax :: IntSet -> (Key, IntSet)
803 deleteFindMax = fromMaybe (error "deleteFindMax: empty set has no maximal element") . maxView
804
805
806 -- | /O(min(n,W))/. The minimal element of the set.
807 findMin :: IntSet -> Key
808 findMin Nil = error "findMin: empty set has no minimal element"
809 findMin (Tip kx bm) = kx + lowestBitSet bm
810 findMin (Bin _ m l r)
811 | m < 0 = find r
812 | otherwise = find l
813 where find (Tip kx bm) = kx + lowestBitSet bm
814 find (Bin _ _ l' _) = find l'
815 find Nil = error "findMin Nil"
816
817 -- | /O(min(n,W))/. The maximal element of a set.
818 findMax :: IntSet -> Key
819 findMax Nil = error "findMax: empty set has no maximal element"
820 findMax (Tip kx bm) = kx + highestBitSet bm
821 findMax (Bin _ m l r)
822 | m < 0 = find l
823 | otherwise = find r
824 where find (Tip kx bm) = kx + highestBitSet bm
825 find (Bin _ _ _ r') = find r'
826 find Nil = error "findMax Nil"
827
828
829 -- | /O(min(n,W))/. Delete the minimal element. Returns an empty set if the set is empty.
830 --
831 -- Note that this is a change of behaviour for consistency with 'Data.Set.Set' &#8211;
832 -- versions prior to 0.5 threw an error if the 'IntSet' was already empty.
833 deleteMin :: IntSet -> IntSet
834 deleteMin = maybe Nil snd . minView
835
836 -- | /O(min(n,W))/. Delete the maximal element. Returns an empty set if the set is empty.
837 --
838 -- Note that this is a change of behaviour for consistency with 'Data.Set.Set' &#8211;
839 -- versions prior to 0.5 threw an error if the 'IntSet' was already empty.
840 deleteMax :: IntSet -> IntSet
841 deleteMax = maybe Nil snd . maxView
842
843 {----------------------------------------------------------------------
844 Map
845 ----------------------------------------------------------------------}
846
847 -- | /O(n*min(n,W))/.
848 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
849 --
850 -- It's worth noting that the size of the result may be smaller if,
851 -- for some @(x,y)@, @x \/= y && f x == f y@
852
853 map :: (Key -> Key) -> IntSet -> IntSet
854 map f = fromList . List.map f . toList
855
856 {--------------------------------------------------------------------
857 Fold
858 --------------------------------------------------------------------}
859 -- | /O(n)/. Fold the elements in the set using the given right-associative
860 -- binary operator. This function is an equivalent of 'foldr' and is present
861 -- for compatibility only.
862 --
863 -- /Please note that fold will be deprecated in the future and removed./
864 fold :: (Key -> b -> b) -> b -> IntSet -> b
865 fold = foldr
866 {-# INLINE fold #-}
867
868 -- | /O(n)/. Fold the elements in the set using the given right-associative
869 -- binary operator, such that @'foldr' f z == 'Prelude.foldr' f z . 'toAscList'@.
870 --
871 -- For example,
872 --
873 -- > toAscList set = foldr (:) [] set
874 foldr :: (Key -> b -> b) -> b -> IntSet -> b
875 foldr f z = \t -> -- Use lambda t to be inlinable with two arguments only.
876 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before
877 | otherwise -> go (go z r) l
878 _ -> go z t
879 where
880 go z' Nil = z'
881 go z' (Tip kx bm) = foldrBits kx f z' bm
882 go z' (Bin _ _ l r) = go (go z' r) l
883 {-# INLINE foldr #-}
884
885 -- | /O(n)/. A strict version of 'foldr'. Each application of the operator is
886 -- evaluated before using the result in the next application. This
887 -- function is strict in the starting value.
888 foldr' :: (Key -> b -> b) -> b -> IntSet -> b
889 foldr' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
890 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before
891 | otherwise -> go (go z r) l
892 _ -> go z t
893 where
894 STRICT_1_OF_2(go)
895 go z' Nil = z'
896 go z' (Tip kx bm) = foldr'Bits kx f z' bm
897 go z' (Bin _ _ l r) = go (go z' r) l
898 {-# INLINE foldr' #-}
899
900 -- | /O(n)/. Fold the elements in the set using the given left-associative
901 -- binary operator, such that @'foldl' f z == 'Prelude.foldl' f z . 'toAscList'@.
902 --
903 -- For example,
904 --
905 -- > toDescList set = foldl (flip (:)) [] set
906 foldl :: (a -> Key -> a) -> a -> IntSet -> a
907 foldl f z = \t -> -- Use lambda t to be inlinable with two arguments only.
908 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before
909 | otherwise -> go (go z l) r
910 _ -> go z t
911 where
912 STRICT_1_OF_2(go)
913 go z' Nil = z'
914 go z' (Tip kx bm) = foldlBits kx f z' bm
915 go z' (Bin _ _ l r) = go (go z' l) r
916 {-# INLINE foldl #-}
917
918 -- | /O(n)/. A strict version of 'foldl'. Each application of the operator is
919 -- evaluated before using the result in the next application. This
920 -- function is strict in the starting value.
921 foldl' :: (a -> Key -> a) -> a -> IntSet -> a
922 foldl' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
923 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before
924 | otherwise -> go (go z l) r
925 _ -> go z t
926 where
927 STRICT_1_OF_2(go)
928 go z' Nil = z'
929 go z' (Tip kx bm) = foldl'Bits kx f z' bm
930 go z' (Bin _ _ l r) = go (go z' l) r
931 {-# INLINE foldl' #-}
932
933 {--------------------------------------------------------------------
934 List variations
935 --------------------------------------------------------------------}
936 -- | /O(n)/. An alias of 'toAscList'. The elements of a set in ascending order.
937 -- Subject to list fusion.
938 elems :: IntSet -> [Key]
939 elems
940 = toAscList
941
942 {--------------------------------------------------------------------
943 Lists
944 --------------------------------------------------------------------}
945 #if __GLASGOW_HASKELL__ >= 708
946 instance GHCExts.IsList IntSet where
947 type Item IntSet = Key
948 fromList = fromList
949 toList = toList
950 #endif
951
952 -- | /O(n)/. Convert the set to a list of elements. Subject to list fusion.
953 toList :: IntSet -> [Key]
954 toList
955 = toAscList
956
957 -- | /O(n)/. Convert the set to an ascending list of elements. Subject to list
958 -- fusion.
959 toAscList :: IntSet -> [Key]
960 toAscList = foldr (:) []
961
962 -- | /O(n)/. Convert the set to a descending list of elements. Subject to list
963 -- fusion.
964 toDescList :: IntSet -> [Key]
965 toDescList = foldl (flip (:)) []
966
967 -- List fusion for the list generating functions.
968 #if __GLASGOW_HASKELL__
969 -- The foldrFB and foldlFB are foldr and foldl equivalents, used for list fusion.
970 -- They are important to convert unfused to{Asc,Desc}List back, see mapFB in prelude.
971 foldrFB :: (Key -> b -> b) -> b -> IntSet -> b
972 foldrFB = foldr
973 {-# INLINE[0] foldrFB #-}
974 foldlFB :: (a -> Key -> a) -> a -> IntSet -> a
975 foldlFB = foldl
976 {-# INLINE[0] foldlFB #-}
977
978 -- Inline elems and toList, so that we need to fuse only toAscList.
979 {-# INLINE elems #-}
980 {-# INLINE toList #-}
981
982 -- The fusion is enabled up to phase 2 included. If it does not succeed,
983 -- convert in phase 1 the expanded to{Asc,Desc}List calls back to
984 -- to{Asc,Desc}List. In phase 0, we inline fold{lr}FB (which were used in
985 -- a list fusion, otherwise it would go away in phase 1), and let compiler do
986 -- whatever it wants with to{Asc,Desc}List -- it was forbidden to inline it
987 -- before phase 0, otherwise the fusion rules would not fire at all.
988 {-# NOINLINE[0] toAscList #-}
989 {-# NOINLINE[0] toDescList #-}
990 {-# RULES "IntSet.toAscList" [~1] forall s . toAscList s = build (\c n -> foldrFB c n s) #-}
991 {-# RULES "IntSet.toAscListBack" [1] foldrFB (:) [] = toAscList #-}
992 {-# RULES "IntSet.toDescList" [~1] forall s . toDescList s = build (\c n -> foldlFB (\xs x -> c x xs) n s) #-}
993 {-# RULES "IntSet.toDescListBack" [1] foldlFB (\xs x -> x : xs) [] = toDescList #-}
994 #endif
995
996
997 -- | /O(n*min(n,W))/. Create a set from a list of integers.
998 fromList :: [Key] -> IntSet
999 fromList xs
1000 = foldlStrict ins empty xs
1001 where
1002 ins t x = insert x t
1003
1004 -- | /O(n)/. Build a set from an ascending list of elements.
1005 -- /The precondition (input list is ascending) is not checked./
1006 fromAscList :: [Key] -> IntSet
1007 fromAscList [] = Nil
1008 fromAscList (x0 : xs0) = fromDistinctAscList (combineEq x0 xs0)
1009 where
1010 combineEq x' [] = [x']
1011 combineEq x' (x:xs)
1012 | x==x' = combineEq x' xs
1013 | otherwise = x' : combineEq x xs
1014
1015 -- | /O(n)/. Build a set from an ascending list of distinct elements.
1016 -- /The precondition (input list is strictly ascending) is not checked./
1017 fromDistinctAscList :: [Key] -> IntSet
1018 fromDistinctAscList [] = Nil
1019 fromDistinctAscList (z0 : zs0) = work (prefixOf z0) (bitmapOf z0) zs0 Nada
1020 where
1021 -- 'work' accumulates all values that go into one tip, before passing this Tip
1022 -- to 'reduce'
1023 work kx bm [] stk = finish kx (Tip kx bm) stk
1024 work kx bm (z:zs) stk | kx == prefixOf z = work kx (bm .|. bitmapOf z) zs stk
1025 work kx bm (z:zs) stk = reduce z zs (branchMask z kx) kx (Tip kx bm) stk
1026
1027 reduce z zs _ px tx Nada = work (prefixOf z) (bitmapOf z) zs (Push px tx Nada)
1028 reduce z zs m px tx stk@(Push py ty stk') =
1029 let mxy = branchMask px py
1030 pxy = mask px mxy
1031 in if shorter m mxy
1032 then reduce z zs m pxy (Bin pxy mxy ty tx) stk'
1033 else work (prefixOf z) (bitmapOf z) zs (Push px tx stk)
1034
1035 finish _ t Nada = t
1036 finish px tx (Push py ty stk) = finish p (link py ty px tx) stk
1037 where m = branchMask px py
1038 p = mask px m
1039
1040 data Stack = Push {-# UNPACK #-} !Prefix !IntSet !Stack | Nada
1041
1042
1043 {--------------------------------------------------------------------
1044 Eq
1045 --------------------------------------------------------------------}
1046 instance Eq IntSet where
1047 t1 == t2 = equal t1 t2
1048 t1 /= t2 = nequal t1 t2
1049
1050 equal :: IntSet -> IntSet -> Bool
1051 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
1052 = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
1053 equal (Tip kx1 bm1) (Tip kx2 bm2)
1054 = kx1 == kx2 && bm1 == bm2
1055 equal Nil Nil = True
1056 equal _ _ = False
1057
1058 nequal :: IntSet -> IntSet -> Bool
1059 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
1060 = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
1061 nequal (Tip kx1 bm1) (Tip kx2 bm2)
1062 = kx1 /= kx2 || bm1 /= bm2
1063 nequal Nil Nil = False
1064 nequal _ _ = True
1065
1066 {--------------------------------------------------------------------
1067 Ord
1068 --------------------------------------------------------------------}
1069
1070 instance Ord IntSet where
1071 compare s1 s2 = compare (toAscList s1) (toAscList s2)
1072 -- tentative implementation. See if more efficient exists.
1073
1074 {--------------------------------------------------------------------
1075 Show
1076 --------------------------------------------------------------------}
1077 instance Show IntSet where
1078 showsPrec p xs = showParen (p > 10) $
1079 showString "fromList " . shows (toList xs)
1080
1081 {--------------------------------------------------------------------
1082 Read
1083 --------------------------------------------------------------------}
1084 instance Read IntSet where
1085 #ifdef __GLASGOW_HASKELL__
1086 readPrec = parens $ prec 10 $ do
1087 Ident "fromList" <- lexP
1088 xs <- readPrec
1089 return (fromList xs)
1090
1091 readListPrec = readListPrecDefault
1092 #else
1093 readsPrec p = readParen (p > 10) $ \ r -> do
1094 ("fromList",s) <- lex r
1095 (xs,t) <- reads s
1096 return (fromList xs,t)
1097 #endif
1098
1099 {--------------------------------------------------------------------
1100 Typeable
1101 --------------------------------------------------------------------}
1102
1103 #include "Typeable.h"
1104 INSTANCE_TYPEABLE0(IntSet,intSetTc,"IntSet")
1105
1106 {--------------------------------------------------------------------
1107 NFData
1108 --------------------------------------------------------------------}
1109
1110 -- The IntSet constructors consist only of strict fields of Ints and
1111 -- IntSets, thus the default NFData instance which evaluates to whnf
1112 -- should suffice
1113 instance NFData IntSet
1114
1115 {--------------------------------------------------------------------
1116 Debugging
1117 --------------------------------------------------------------------}
1118 -- | /O(n)/. Show the tree that implements the set. The tree is shown
1119 -- in a compressed, hanging format.
1120 showTree :: IntSet -> String
1121 showTree s
1122 = showTreeWith True False s
1123
1124
1125 {- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows
1126 the tree that implements the set. If @hang@ is
1127 'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If
1128 @wide@ is 'True', an extra wide version is shown.
1129 -}
1130 showTreeWith :: Bool -> Bool -> IntSet -> String
1131 showTreeWith hang wide t
1132 | hang = (showsTreeHang wide [] t) ""
1133 | otherwise = (showsTree wide [] [] t) ""
1134
1135 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
1136 showsTree wide lbars rbars t
1137 = case t of
1138 Bin p m l r
1139 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
1140 showWide wide rbars .
1141 showsBars lbars . showString (showBin p m) . showString "\n" .
1142 showWide wide lbars .
1143 showsTree wide (withEmpty lbars) (withBar lbars) l
1144 Tip kx bm
1145 -> showsBars lbars . showString " " . shows kx . showString " + " .
1146 showsBitMap bm . showString "\n"
1147 Nil -> showsBars lbars . showString "|\n"
1148
1149 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
1150 showsTreeHang wide bars t
1151 = case t of
1152 Bin p m l r
1153 -> showsBars bars . showString (showBin p m) . showString "\n" .
1154 showWide wide bars .
1155 showsTreeHang wide (withBar bars) l .
1156 showWide wide bars .
1157 showsTreeHang wide (withEmpty bars) r
1158 Tip kx bm
1159 -> showsBars bars . showString " " . shows kx . showString " + " .
1160 showsBitMap bm . showString "\n"
1161 Nil -> showsBars bars . showString "|\n"
1162
1163 showBin :: Prefix -> Mask -> String
1164 showBin _ _
1165 = "*" -- ++ show (p,m)
1166
1167 showWide :: Bool -> [String] -> String -> String
1168 showWide wide bars
1169 | wide = showString (concat (reverse bars)) . showString "|\n"
1170 | otherwise = id
1171
1172 showsBars :: [String] -> ShowS
1173 showsBars bars
1174 = case bars of
1175 [] -> id
1176 _ -> showString (concat (reverse (tail bars))) . showString node
1177
1178 showsBitMap :: Word -> ShowS
1179 showsBitMap = showString . showBitMap
1180
1181 showBitMap :: Word -> String
1182 showBitMap w = show $ foldrBits 0 (:) [] w
1183
1184 node :: String
1185 node = "+--"
1186
1187 withBar, withEmpty :: [String] -> [String]
1188 withBar bars = "| ":bars
1189 withEmpty bars = " ":bars
1190
1191
1192 {--------------------------------------------------------------------
1193 Helpers
1194 --------------------------------------------------------------------}
1195 {--------------------------------------------------------------------
1196 Link
1197 --------------------------------------------------------------------}
1198 link :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
1199 link p1 t1 p2 t2
1200 | zero p1 m = Bin p m t1 t2
1201 | otherwise = Bin p m t2 t1
1202 where
1203 m = branchMask p1 p2
1204 p = mask p1 m
1205 {-# INLINE link #-}
1206
1207 {--------------------------------------------------------------------
1208 @bin@ assures that we never have empty trees within a tree.
1209 --------------------------------------------------------------------}
1210 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
1211 bin _ _ l Nil = l
1212 bin _ _ Nil r = r
1213 bin p m l r = Bin p m l r
1214 {-# INLINE bin #-}
1215
1216 {--------------------------------------------------------------------
1217 @tip@ assures that we never have empty bitmaps within a tree.
1218 --------------------------------------------------------------------}
1219 tip :: Prefix -> BitMap -> IntSet
1220 tip _ 0 = Nil
1221 tip kx bm = Tip kx bm
1222 {-# INLINE tip #-}
1223
1224
1225 {----------------------------------------------------------------------
1226 Functions that generate Prefix and BitMap of a Key or a Suffix.
1227 ----------------------------------------------------------------------}
1228
1229 suffixBitMask :: Int
1230 #if MIN_VERSION_base_4_7_0
1231 suffixBitMask = finiteBitSize (undefined::Word) - 1
1232 #else
1233 suffixBitMask = bitSize (undefined::Word) - 1
1234 #endif
1235 {-# INLINE suffixBitMask #-}
1236
1237 prefixBitMask :: Int
1238 prefixBitMask = complement suffixBitMask
1239 {-# INLINE prefixBitMask #-}
1240
1241 prefixOf :: Int -> Prefix
1242 prefixOf x = x .&. prefixBitMask
1243 {-# INLINE prefixOf #-}
1244
1245 suffixOf :: Int -> Int
1246 suffixOf x = x .&. suffixBitMask
1247 {-# INLINE suffixOf #-}
1248
1249 bitmapOfSuffix :: Int -> BitMap
1250 bitmapOfSuffix s = 1 `shiftLL` s
1251 {-# INLINE bitmapOfSuffix #-}
1252
1253 bitmapOf :: Int -> BitMap
1254 bitmapOf x = bitmapOfSuffix (suffixOf x)
1255 {-# INLINE bitmapOf #-}
1256
1257
1258 {--------------------------------------------------------------------
1259 Endian independent bit twiddling
1260 --------------------------------------------------------------------}
1261 zero :: Int -> Mask -> Bool
1262 zero i m
1263 = (natFromInt i) .&. (natFromInt m) == 0
1264 {-# INLINE zero #-}
1265
1266 nomatch,match :: Int -> Prefix -> Mask -> Bool
1267 nomatch i p m
1268 = (mask i m) /= p
1269 {-# INLINE nomatch #-}
1270
1271 match i p m
1272 = (mask i m) == p
1273 {-# INLINE match #-}
1274
1275 -- Suppose a is largest such that 2^a divides 2*m.
1276 -- Then mask i m is i with the low a bits zeroed out.
1277 mask :: Int -> Mask -> Prefix
1278 mask i m
1279 = maskW (natFromInt i) (natFromInt m)
1280 {-# INLINE mask #-}
1281
1282 {--------------------------------------------------------------------
1283 Big endian operations
1284 --------------------------------------------------------------------}
1285 maskW :: Nat -> Nat -> Prefix
1286 maskW i m
1287 = intFromNat (i .&. (complement (m-1) `xor` m))
1288 {-# INLINE maskW #-}
1289
1290 shorter :: Mask -> Mask -> Bool
1291 shorter m1 m2
1292 = (natFromInt m1) > (natFromInt m2)
1293 {-# INLINE shorter #-}
1294
1295 branchMask :: Prefix -> Prefix -> Mask
1296 branchMask p1 p2
1297 = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
1298 {-# INLINE branchMask #-}
1299
1300 {----------------------------------------------------------------------
1301 To get best performance, we provide fast implementations of
1302 lowestBitSet, highestBitSet and fold[lr][l]Bits for GHC.
1303 If the intel bsf and bsr instructions ever become GHC primops,
1304 this code should be reimplemented using these.
1305
1306 Performance of this code is crucial for folds, toList, filter, partition.
1307
1308 The signatures of methods in question are placed after this comment.
1309 ----------------------------------------------------------------------}
1310
1311 lowestBitSet :: Nat -> Int
1312 highestBitSet :: Nat -> Int
1313 foldlBits :: Int -> (a -> Int -> a) -> a -> Nat -> a
1314 foldl'Bits :: Int -> (a -> Int -> a) -> a -> Nat -> a
1315 foldrBits :: Int -> (Int -> a -> a) -> a -> Nat -> a
1316 foldr'Bits :: Int -> (Int -> a -> a) -> a -> Nat -> a
1317
1318 {-# INLINE lowestBitSet #-}
1319 {-# INLINE highestBitSet #-}
1320 {-# INLINE foldlBits #-}
1321 {-# INLINE foldl'Bits #-}
1322 {-# INLINE foldrBits #-}
1323 {-# INLINE foldr'Bits #-}
1324
1325 #if defined(__GLASGOW_HASKELL__) && (WORD_SIZE_IN_BITS==32 || WORD_SIZE_IN_BITS==64)
1326 {----------------------------------------------------------------------
1327 For lowestBitSet we use wordsize-dependant implementation based on
1328 multiplication and DeBrujn indeces, which was proposed by Edward Kmett
1329 <http://haskell.org/pipermail/libraries/2011-September/016749.html>
1330
1331 The core of this implementation is fast indexOfTheOnlyBit,
1332 which is given a Nat with exactly one bit set, and returns
1333 its index.
1334
1335 Lot of effort was put in these implementations, please benchmark carefully
1336 before changing this code.
1337 ----------------------------------------------------------------------}
1338
1339 indexOfTheOnlyBit :: Nat -> Int
1340 {-# INLINE indexOfTheOnlyBit #-}
1341 indexOfTheOnlyBit bitmask =
1342 I# (lsbArray `indexInt8OffAddr#` unboxInt (intFromNat ((bitmask * magic) `shiftRL` offset)))
1343 where unboxInt (I# i) = i
1344 #if WORD_SIZE_IN_BITS==32
1345 magic = 0x077CB531
1346 offset = 27
1347 !lsbArray = "\0\1\28\2\29\14\24\3\30\22\20\15\25\17\4\8\31\27\13\23\21\19\16\7\26\12\18\6\11\5\10\9"#
1348 #else
1349 magic = 0x07EDD5E59A4E28C2
1350 offset = 58
1351 !lsbArray = "\63\0\58\1\59\47\53\2\60\39\48\27\54\33\42\3\61\51\37\40\49\18\28\20\55\30\34\11\43\14\22\4\62\57\46\52\38\26\32\41\50\36\17\19\29\10\13\21\56\45\25\31\35\16\9\12\44\24\15\8\23\7\6\5"#
1352 #endif
1353 -- The lsbArray gets inlined to every call site of indexOfTheOnlyBit.
1354 -- That cannot be easily avoided, as GHC forbids top-level Addr# literal.
1355 -- One could go around that by supplying getLsbArray :: () -> Addr# marked
1356 -- as NOINLINE. But the code size of calling it and processing the result
1357 -- is 48B on 32-bit and 56B on 64-bit architectures -- so the 32B and 64B array
1358 -- is actually improvement on 32-bit and only a 8B size increase on 64-bit.
1359
1360 lowestBitMask :: Nat -> Nat
1361 lowestBitMask x = x .&. negate x
1362 {-# INLINE lowestBitMask #-}
1363
1364 -- Reverse the order of bits in the Nat.
1365 revNat :: Nat -> Nat
1366 #if WORD_SIZE_IN_BITS==32
1367 revNat x1 = case ((x1 `shiftRL` 1) .&. 0x55555555) .|. ((x1 .&. 0x55555555) `shiftLL` 1) of
1368 x2 -> case ((x2 `shiftRL` 2) .&. 0x33333333) .|. ((x2 .&. 0x33333333) `shiftLL` 2) of
1369 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F) `shiftLL` 4) of
1370 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF) .|. ((x4 .&. 0x00FF00FF) `shiftLL` 8) of
1371 x5 -> ( x5 `shiftRL` 16 ) .|. ( x5 `shiftLL` 16);
1372 #else
1373 revNat x1 = case ((x1 `shiftRL` 1) .&. 0x5555555555555555) .|. ((x1 .&. 0x5555555555555555) `shiftLL` 1) of
1374 x2 -> case ((x2 `shiftRL` 2) .&. 0x3333333333333333) .|. ((x2 .&. 0x3333333333333333) `shiftLL` 2) of
1375 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F0F0F0F0F) `shiftLL` 4) of
1376 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF00FF00FF) .|. ((x4 .&. 0x00FF00FF00FF00FF) `shiftLL` 8) of
1377 x5 -> case ((x5 `shiftRL` 16) .&. 0x0000FFFF0000FFFF) .|. ((x5 .&. 0x0000FFFF0000FFFF) `shiftLL` 16) of
1378 x6 -> ( x6 `shiftRL` 32 ) .|. ( x6 `shiftLL` 32);
1379 #endif
1380
1381 lowestBitSet x = indexOfTheOnlyBit (lowestBitMask x)
1382
1383 highestBitSet x = indexOfTheOnlyBit (highestBitMask x)
1384
1385 foldlBits prefix f z bitmap = go bitmap z
1386 where go bm acc | bm == 0 = acc
1387 | otherwise = case lowestBitMask bm of
1388 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1389 bi -> bi `seq` go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1390
1391 foldl'Bits prefix f z bitmap = go bitmap z
1392 where STRICT_2_OF_2(go)
1393 go bm acc | bm == 0 = acc
1394 | otherwise = case lowestBitMask bm of
1395 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1396 bi -> bi `seq` go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1397
1398 foldrBits prefix f z bitmap = go (revNat bitmap) z
1399 where go bm acc | bm == 0 = acc
1400 | otherwise = case lowestBitMask bm of
1401 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1402 bi -> bi `seq` go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1403
1404 foldr'Bits prefix f z bitmap = go (revNat bitmap) z
1405 where STRICT_2_OF_2(go)
1406 go bm acc | bm == 0 = acc
1407 | otherwise = case lowestBitMask bm of
1408 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1409 bi -> bi `seq` go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1410
1411 #else
1412 {----------------------------------------------------------------------
1413 In general case we use logarithmic implementation of
1414 lowestBitSet and highestBitSet, which works up to bit sizes of 64.
1415
1416 Folds are linear scans.
1417 ----------------------------------------------------------------------}
1418
1419 lowestBitSet n0 =
1420 let (n1,b1) = if n0 .&. 0xFFFFFFFF /= 0 then (n0,0) else (n0 `shiftRL` 32, 32)
1421 (n2,b2) = if n1 .&. 0xFFFF /= 0 then (n1,b1) else (n1 `shiftRL` 16, 16+b1)
1422 (n3,b3) = if n2 .&. 0xFF /= 0 then (n2,b2) else (n2 `shiftRL` 8, 8+b2)
1423 (n4,b4) = if n3 .&. 0xF /= 0 then (n3,b3) else (n3 `shiftRL` 4, 4+b3)
1424 (n5,b5) = if n4 .&. 0x3 /= 0 then (n4,b4) else (n4 `shiftRL` 2, 2+b4)
1425 b6 = if n5 .&. 0x1 /= 0 then b5 else 1+b5
1426 in b6
1427
1428 highestBitSet n0 =
1429 let (n1,b1) = if n0 .&. 0xFFFFFFFF00000000 /= 0 then (n0 `shiftRL` 32, 32) else (n0,0)
1430 (n2,b2) = if n1 .&. 0xFFFF0000 /= 0 then (n1 `shiftRL` 16, 16+b1) else (n1,b1)
1431 (n3,b3) = if n2 .&. 0xFF00 /= 0 then (n2 `shiftRL` 8, 8+b2) else (n2,b2)
1432 (n4,b4) = if n3 .&. 0xF0 /= 0 then (n3 `shiftRL` 4, 4+b3) else (n3,b3)
1433 (n5,b5) = if n4 .&. 0xC /= 0 then (n4 `shiftRL` 2, 2+b4) else (n4,b4)
1434 b6 = if n5 .&. 0x2 /= 0 then 1+b5 else b5
1435 in b6
1436
1437 foldlBits prefix f z bm = let lb = lowestBitSet bm
1438 in go (prefix+lb) z (bm `shiftRL` lb)
1439 where STRICT_1_OF_3(go)
1440 go _ acc 0 = acc
1441 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1)
1442 | otherwise = go (bi + 1) acc (n `shiftRL` 1)
1443
1444 foldl'Bits prefix f z bm = let lb = lowestBitSet bm
1445 in go (prefix+lb) z (bm `shiftRL` lb)
1446 where STRICT_1_OF_3(go)
1447 STRICT_2_OF_3(go)
1448 go _ acc 0 = acc
1449 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1)
1450 | otherwise = go (bi + 1) acc (n `shiftRL` 1)
1451
1452 foldrBits prefix f z bm = let lb = lowestBitSet bm
1453 in go (prefix+lb) (bm `shiftRL` lb)
1454 where STRICT_1_OF_2(go)
1455 go _ 0 = z
1456 go bi n | n `testBit` 0 = f bi (go (bi + 1) (n `shiftRL` 1))
1457 | otherwise = go (bi + 1) (n `shiftRL` 1)
1458
1459 foldr'Bits prefix f z bm = let lb = lowestBitSet bm
1460 in go (prefix+lb) (bm `shiftRL` lb)
1461 where STRICT_1_OF_2(go)
1462 go _ 0 = z
1463 go bi n | n `testBit` 0 = f bi $! go (bi + 1) (n `shiftRL` 1)
1464 | otherwise = go (bi + 1) (n `shiftRL` 1)
1465
1466 #endif
1467
1468 {----------------------------------------------------------------------
1469 [bitcount] as posted by David F. Place to haskell-cafe on April 11, 2006,
1470 based on the code on
1471 http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetKernighan,
1472 where the following source is given:
1473 Published in 1988, the C Programming Language 2nd Ed. (by Brian W.
1474 Kernighan and Dennis M. Ritchie) mentions this in exercise 2-9. On April
1475 19, 2006 Don Knuth pointed out to me that this method "was first published
1476 by Peter Wegner in CACM 3 (1960), 322. (Also discovered independently by
1477 Derrick Lehmer and published in 1964 in a book edited by Beckenbach.)"
1478 ----------------------------------------------------------------------}
1479
1480 bitcount :: Int -> Word -> Int
1481 #if MIN_VERSION_base_4_5_0
1482 bitcount a x = a + popCount x
1483 #else
1484 bitcount a0 x0 = go a0 x0
1485 where go a 0 = a
1486 go a x = go (a + 1) (x .&. (x-1))
1487 #endif
1488 {-# INLINE bitcount #-}
1489
1490
1491 {--------------------------------------------------------------------
1492 Utilities
1493 --------------------------------------------------------------------}
1494 foldlStrict :: (a -> b -> a) -> a -> [b] -> a
1495 foldlStrict f = go
1496 where
1497 go z [] = z
1498 go z (x:xs) = let z' = f z x in z' `seq` go z' xs
1499 {-# INLINE foldlStrict #-}
1500
1501 -- | /O(1)/. Decompose a set into pieces based on the structure of the underlying
1502 -- tree. This function is useful for consuming a set in parallel.
1503 --
1504 -- No guarantee is made as to the sizes of the pieces; an internal, but
1505 -- deterministic process determines this. However, it is guaranteed that the
1506 -- pieces returned will be in ascending order (all elements in the first submap
1507 -- less than all elements in the second, and so on).
1508 --
1509 -- Examples:
1510 --
1511 -- > splitRoot (fromList [1..120]) == [fromList [1..63],fromList [64..120]]
1512 -- > splitRoot empty == []
1513 --
1514 -- Note that the current implementation does not return more than two subsets,
1515 -- but you should not depend on this behaviour because it can change in the
1516 -- future without notice. Also, the current version does not continue
1517 -- splitting all the way to individual singleton sets -- it stops at some
1518 -- point.
1519 splitRoot :: IntSet -> [IntSet]
1520 splitRoot orig =
1521 case orig of
1522 Nil -> []
1523 -- NOTE: we don't currently split below Tip, but we could.
1524 x@(Tip _ _) -> [x]
1525 Bin _ m l r | m < 0 -> [r, l]
1526 | otherwise -> [l, r]
1527 {-# INLINE splitRoot #-}