Annotate IntSet.* with Key; move type Key = Int to IntSet.Base
[packages/containers.git] / Data / IntSet / Base.hs
1 {-# LANGUAGE CPP #-}
2 #if __GLASGOW_HASKELL__
3 {-# LANGUAGE MagicHash, BangPatterns, DeriveDataTypeable, StandaloneDeriving #-}
4 #endif
5 #if !defined(TESTING) && __GLASGOW_HASKELL__ >= 703
6 {-# LANGUAGE Trustworthy #-}
7 #endif
8 -----------------------------------------------------------------------------
9 -- |
10 -- Module : Data.IntSet.Base
11 -- Copyright : (c) Daan Leijen 2002
12 -- (c) Joachim Breitner 2011
13 -- License : BSD-style
14 -- Maintainer : libraries@haskell.org
15 -- Stability : provisional
16 -- Portability : portable
17 --
18 -- An efficient implementation of integer sets.
19 --
20 -- These modules are intended to be imported qualified, to avoid name
21 -- clashes with Prelude functions, e.g.
22 --
23 -- > import Data.IntSet (IntSet)
24 -- > import qualified Data.IntSet as IntSet
25 --
26 -- The implementation is based on /big-endian patricia trees/. This data
27 -- structure performs especially well on binary operations like 'union'
28 -- and 'intersection'. However, my benchmarks show that it is also
29 -- (much) faster on insertions and deletions when compared to a generic
30 -- size-balanced set implementation (see "Data.Set").
31 --
32 -- * Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\",
33 -- Workshop on ML, September 1998, pages 77-86,
34 -- <http://citeseer.ist.psu.edu/okasaki98fast.html>
35 --
36 -- * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
37 -- Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
38 -- October 1968, pages 514-534.
39 --
40 -- Additionally, this implementation places bitmaps in the leaves of the tree.
41 -- Their size is the natural size of a machine word (32 or 64 bits) and greatly
42 -- reduce memory footprint and execution times for dense sets, e.g. sets where
43 -- it is likely that many values lie close to each other. The asymptotics are
44 -- not affected by this optimization.
45 --
46 -- Many operations have a worst-case complexity of /O(min(n,W))/.
47 -- This means that the operation can become linear in the number of
48 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
49 -- (32 or 64).
50 -----------------------------------------------------------------------------
51
52 -- [Note: INLINE bit fiddling]
53 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
54 -- It is essential that the bit fiddling functions like mask, zero, branchMask
55 -- etc are inlined. If they do not, the memory allocation skyrockets. The GHC
56 -- usually gets it right, but it is disastrous if it does not. Therefore we
57 -- explicitly mark these functions INLINE.
58
59
60 -- [Note: Local 'go' functions and capturing]
61 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
62 -- Care must be taken when using 'go' function which captures an argument.
63 -- Sometimes (for example when the argument is passed to a data constructor,
64 -- as in insert), GHC heap-allocates more than necessary. Therefore C-- code
65 -- must be checked for increased allocation when creating and modifying such
66 -- functions.
67
68
69 -- [Note: Order of constructors]
70 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
71 -- The order of constructors of IntSet matters when considering performance.
72 -- Currently in GHC 7.0, when type has 3 constructors, they are matched from
73 -- the first to the last -- the best performance is achieved when the
74 -- constructors are ordered by frequency.
75 -- On GHC 7.0, reordering constructors from Nil | Tip | Bin to Bin | Tip | Nil
76 -- improves the benchmark by circa 10%.
77
78 module Data.IntSet.Base (
79 -- * Set type
80 IntSet(..), Key -- instance Eq,Show
81
82 -- * Operators
83 , (\\)
84
85 -- * Query
86 , null
87 , size
88 , member
89 , notMember
90 , lookupLT
91 , lookupGT
92 , lookupLE
93 , lookupGE
94 , isSubsetOf
95 , isProperSubsetOf
96
97 -- * Construction
98 , empty
99 , singleton
100 , insert
101 , delete
102
103 -- * Combine
104 , union
105 , unions
106 , difference
107 , intersection
108
109 -- * Filter
110 , filter
111 , partition
112 , split
113 , splitMember
114
115 -- * Map
116 , map
117
118 -- * Folds
119 , foldr
120 , foldl
121 -- ** Strict folds
122 , foldr'
123 , foldl'
124 -- ** Legacy folds
125 , fold
126
127 -- * Min\/Max
128 , findMin
129 , findMax
130 , deleteMin
131 , deleteMax
132 , deleteFindMin
133 , deleteFindMax
134 , maxView
135 , minView
136
137 -- * Conversion
138
139 -- ** List
140 , elems
141 , toList
142 , fromList
143
144 -- ** Ordered list
145 , toAscList
146 , toDescList
147 , fromAscList
148 , fromDistinctAscList
149
150 -- * Debugging
151 , showTree
152 , showTreeWith
153
154 -- * Internals
155 , match
156 , suffixBitMask
157 , prefixBitMask
158 , bitmapOf
159 ) where
160
161
162 import Prelude hiding (filter,foldr,foldl,null,map)
163 import Data.Bits
164
165 import qualified Data.List as List
166 import Data.Monoid (Monoid(..))
167 import Data.Maybe (fromMaybe)
168 import Data.Typeable
169 import Control.DeepSeq (NFData)
170
171 import Data.StrictPair
172
173 #if __GLASGOW_HASKELL__
174 import Text.Read
175 import Data.Data (Data(..), mkNoRepType)
176 #endif
177
178 #if __GLASGOW_HASKELL__
179 import GHC.Exts ( Word(..), Int(..), build )
180 import GHC.Prim ( uncheckedShiftL#, uncheckedShiftRL#, indexInt8OffAddr# )
181 #else
182 import Data.Word
183 #endif
184
185 -- On GHC, include MachDeps.h to get WORD_SIZE_IN_BITS macro.
186 #if defined(__GLASGOW_HASKELL__)
187 #include "MachDeps.h"
188 #endif
189
190 -- Use macros to define strictness of functions.
191 -- STRICT_x_OF_y denotes an y-ary function strict in the x-th parameter.
192 -- We do not use BangPatterns, because they are not in any standard and we
193 -- want the compilers to be compiled by as many compilers as possible.
194 #define STRICT_1_OF_2(fn) fn arg _ | arg `seq` False = undefined
195 #define STRICT_2_OF_2(fn) fn _ arg | arg `seq` False = undefined
196 #define STRICT_1_OF_3(fn) fn arg _ _ | arg `seq` False = undefined
197 #define STRICT_2_OF_3(fn) fn _ arg _ | arg `seq` False = undefined
198
199 infixl 9 \\{-This comment teaches CPP correct behaviour -}
200
201 -- A "Nat" is a natural machine word (an unsigned Int)
202 type Nat = Word
203
204 natFromInt :: Int -> Nat
205 natFromInt i = fromIntegral i
206 {-# INLINE natFromInt #-}
207
208 intFromNat :: Nat -> Int
209 intFromNat w = fromIntegral w
210 {-# INLINE intFromNat #-}
211
212 -- Right and left logical shifts.
213 shiftRL, shiftLL :: Nat -> Int -> Nat
214 #if __GLASGOW_HASKELL__
215 {--------------------------------------------------------------------
216 GHC: use unboxing to get @shiftRL@ and @shiftLL@ inlined.
217 --------------------------------------------------------------------}
218 shiftRL (W# x) (I# i) = W# (uncheckedShiftRL# x i)
219 shiftLL (W# x) (I# i) = W# (uncheckedShiftL# x i)
220 #else
221 shiftRL x i = shiftR x i
222 shiftLL x i = shiftL x i
223 #endif
224 {-# INLINE shiftRL #-}
225 {-# INLINE shiftLL #-}
226
227 {--------------------------------------------------------------------
228 Operators
229 --------------------------------------------------------------------}
230 -- | /O(n+m)/. See 'difference'.
231 (\\) :: IntSet -> IntSet -> IntSet
232 m1 \\ m2 = difference m1 m2
233
234 {--------------------------------------------------------------------
235 Types
236 --------------------------------------------------------------------}
237
238 -- | A set of integers.
239
240 -- See Note: Order of constructors
241 data IntSet = Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
242 -- Invariant: Nil is never found as a child of Bin.
243 -- Invariant: The Mask is a power of 2. It is the largest bit position at which
244 -- two elements of the set differ.
245 -- Invariant: Prefix is the common high-order bits that all elements share to
246 -- the left of the Mask bit.
247 -- Invariant: In Bin prefix mask left right, left consists of the elements that
248 -- don't have the mask bit set; right is all the elements that do.
249 | Tip {-# UNPACK #-} !Prefix {-# UNPACK #-} !BitMap
250 -- Invariant: The Prefix is zero for all but the last 5 (on 32 bit arches) or 6
251 -- bits (on 64 bit arches). The values of the map represented by a tip
252 -- are the prefix plus the indices of the set bits in the bit map.
253 | Nil
254
255 -- A number stored in a set is stored as
256 -- * Prefix (all but last 5-6 bits) and
257 -- * BitMap (last 5-6 bits stored as a bitmask)
258 -- Last 5-6 bits are called a Suffix.
259
260 type Prefix = Int
261 type Mask = Int
262 type BitMap = Word
263 type Key = Int
264
265 instance Monoid IntSet where
266 mempty = empty
267 mappend = union
268 mconcat = unions
269
270 #if __GLASGOW_HASKELL__
271
272 {--------------------------------------------------------------------
273 A Data instance
274 --------------------------------------------------------------------}
275
276 -- This instance preserves data abstraction at the cost of inefficiency.
277 -- We omit reflection services for the sake of data abstraction.
278
279 instance Data IntSet where
280 gfoldl f z is = z fromList `f` (toList is)
281 toConstr _ = error "toConstr"
282 gunfold _ _ = error "gunfold"
283 dataTypeOf _ = mkNoRepType "Data.IntSet.IntSet"
284
285 #endif
286
287 {--------------------------------------------------------------------
288 Query
289 --------------------------------------------------------------------}
290 -- | /O(1)/. Is the set empty?
291 null :: IntSet -> Bool
292 null Nil = True
293 null _ = False
294 {-# INLINE null #-}
295
296 -- | /O(n)/. Cardinality of the set.
297 size :: IntSet -> Int
298 size t
299 = case t of
300 Bin _ _ l r -> size l + size r
301 Tip _ bm -> bitcount 0 bm
302 Nil -> 0
303
304 -- | /O(min(n,W))/. Is the value a member of the set?
305
306 -- See Note: Local 'go' functions and capturing]
307 member :: Key -> IntSet -> Bool
308 member x = x `seq` go
309 where
310 go (Bin p m l r)
311 | nomatch x p m = False
312 | zero x m = go l
313 | otherwise = go r
314 go (Tip y bm) = prefixOf x == y && bitmapOf x .&. bm /= 0
315 go Nil = False
316
317 -- | /O(min(n,W))/. Is the element not in the set?
318 notMember :: Key -> IntSet -> Bool
319 notMember k = not . member k
320
321 -- | /O(log n)/. Find largest element smaller than the given one.
322 --
323 -- > lookupLT 3 (fromList [3, 5]) == Nothing
324 -- > lookupLT 5 (fromList [3, 5]) == Just 3
325
326 -- See Note: Local 'go' functions and capturing.
327 lookupLT :: Key -> IntSet -> Maybe Key
328 lookupLT x t = x `seq` case t of
329 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r
330 _ -> go Nil t
331 where
332 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r
333 | zero x m = go def l
334 | otherwise = go l r
335 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm
336 | prefixOf x == kx && maskLT /= 0 = Just $ kx + highestBitSet maskLT
337 | otherwise = unsafeFindMax def
338 where maskLT = (bitmapOf x - 1) .&. bm
339 go def Nil = unsafeFindMax def
340
341
342 -- | /O(log n)/. Find smallest element greater than the given one.
343 --
344 -- > lookupGT 4 (fromList [3, 5]) == Just 5
345 -- > lookupGT 5 (fromList [3, 5]) == Nothing
346
347 -- See Note: Local 'go' functions and capturing.
348 lookupGT :: Key -> IntSet -> Maybe Key
349 lookupGT x t = x `seq` case t of
350 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r
351 _ -> go Nil t
352 where
353 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def
354 | zero x m = go r l
355 | otherwise = go def r
356 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm
357 | prefixOf x == kx && maskGT /= 0 = Just $ kx + lowestBitSet maskGT
358 | otherwise = unsafeFindMin def
359 where maskGT = (- ((bitmapOf x) `shiftLL` 1)) .&. bm
360 go def Nil = unsafeFindMin def
361
362
363 -- | /O(log n)/. Find largest element smaller or equal to the given one.
364 --
365 -- > lookupLE 2 (fromList [3, 5]) == Nothing
366 -- > lookupLE 4 (fromList [3, 5]) == Just 3
367 -- > lookupLE 5 (fromList [3, 5]) == Just 5
368
369 -- See Note: Local 'go' functions and capturing.
370 lookupLE :: Key -> IntSet -> Maybe Key
371 lookupLE x t = x `seq` case t of
372 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r
373 _ -> go Nil t
374 where
375 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r
376 | zero x m = go def l
377 | otherwise = go l r
378 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm
379 | prefixOf x == kx && maskLE /= 0 = Just $ kx + highestBitSet maskLE
380 | otherwise = unsafeFindMax def
381 where maskLE = (((bitmapOf x) `shiftLL` 1) - 1) .&. bm
382 go def Nil = unsafeFindMax def
383
384
385 -- | /O(log n)/. Find smallest element greater or equal to the given one.
386 --
387 -- > lookupGE 3 (fromList [3, 5]) == Just 3
388 -- > lookupGE 4 (fromList [3, 5]) == Just 5
389 -- > lookupGE 6 (fromList [3, 5]) == Nothing
390
391 -- See Note: Local 'go' functions and capturing.
392 lookupGE :: Key -> IntSet -> Maybe Key
393 lookupGE x t = x `seq` case t of
394 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r
395 _ -> go Nil t
396 where
397 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def
398 | zero x m = go r l
399 | otherwise = go def r
400 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm
401 | prefixOf x == kx && maskGE /= 0 = Just $ kx + lowestBitSet maskGE
402 | otherwise = unsafeFindMin def
403 where maskGE = (- (bitmapOf x)) .&. bm
404 go def Nil = unsafeFindMin def
405
406
407
408 -- Helper function for lookupGE and lookupGT. It assumes that if a Bin node is
409 -- given, it has m > 0.
410 unsafeFindMin :: IntSet -> Maybe Key
411 unsafeFindMin Nil = Nothing
412 unsafeFindMin (Tip kx bm) = Just $ kx + lowestBitSet bm
413 unsafeFindMin (Bin _ _ l _) = unsafeFindMin l
414
415 -- Helper function for lookupLE and lookupLT. It assumes that if a Bin node is
416 -- given, it has m > 0.
417 unsafeFindMax :: IntSet -> Maybe Key
418 unsafeFindMax Nil = Nothing
419 unsafeFindMax (Tip kx bm) = Just $ kx + highestBitSet bm
420 unsafeFindMax (Bin _ _ _ r) = unsafeFindMax r
421
422 {--------------------------------------------------------------------
423 Construction
424 --------------------------------------------------------------------}
425 -- | /O(1)/. The empty set.
426 empty :: IntSet
427 empty
428 = Nil
429 {-# INLINE empty #-}
430
431 -- | /O(1)/. A set of one element.
432 singleton :: Key -> IntSet
433 singleton x
434 = Tip (prefixOf x) (bitmapOf x)
435 {-# INLINE singleton #-}
436
437 {--------------------------------------------------------------------
438 Insert
439 --------------------------------------------------------------------}
440 -- | /O(min(n,W))/. Add a value to the set. There is no left- or right bias for
441 -- IntSets.
442 insert :: Key -> IntSet -> IntSet
443 insert x = x `seq` insertBM (prefixOf x) (bitmapOf x)
444
445 -- Helper function for insert and union.
446 insertBM :: Prefix -> BitMap -> IntSet -> IntSet
447 insertBM kx bm t = kx `seq` bm `seq`
448 case t of
449 Bin p m l r
450 | nomatch kx p m -> join kx (Tip kx bm) p t
451 | zero kx m -> Bin p m (insertBM kx bm l) r
452 | otherwise -> Bin p m l (insertBM kx bm r)
453 Tip kx' bm'
454 | kx' == kx -> Tip kx' (bm .|. bm')
455 | otherwise -> join kx (Tip kx bm) kx' t
456 Nil -> Tip kx bm
457
458 -- | /O(min(n,W))/. Delete a value in the set. Returns the
459 -- original set when the value was not present.
460 delete :: Key -> IntSet -> IntSet
461 delete x = x `seq` deleteBM (prefixOf x) (bitmapOf x)
462
463 -- Deletes all values mentioned in the BitMap from the set.
464 -- Helper function for delete and difference.
465 deleteBM :: Prefix -> BitMap -> IntSet -> IntSet
466 deleteBM kx bm t = kx `seq` bm `seq`
467 case t of
468 Bin p m l r
469 | nomatch kx p m -> t
470 | zero kx m -> bin p m (deleteBM kx bm l) r
471 | otherwise -> bin p m l (deleteBM kx bm r)
472 Tip kx' bm'
473 | kx' == kx -> tip kx (bm' .&. complement bm)
474 | otherwise -> t
475 Nil -> Nil
476
477
478 {--------------------------------------------------------------------
479 Union
480 --------------------------------------------------------------------}
481 -- | The union of a list of sets.
482 unions :: [IntSet] -> IntSet
483 unions xs
484 = foldlStrict union empty xs
485
486
487 -- | /O(n+m)/. The union of two sets.
488 union :: IntSet -> IntSet -> IntSet
489 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
490 | shorter m1 m2 = union1
491 | shorter m2 m1 = union2
492 | p1 == p2 = Bin p1 m1 (union l1 l2) (union r1 r2)
493 | otherwise = join p1 t1 p2 t2
494 where
495 union1 | nomatch p2 p1 m1 = join p1 t1 p2 t2
496 | zero p2 m1 = Bin p1 m1 (union l1 t2) r1
497 | otherwise = Bin p1 m1 l1 (union r1 t2)
498
499 union2 | nomatch p1 p2 m2 = join p1 t1 p2 t2
500 | zero p1 m2 = Bin p2 m2 (union t1 l2) r2
501 | otherwise = Bin p2 m2 l2 (union t1 r2)
502
503 union t@(Bin _ _ _ _) (Tip kx bm) = insertBM kx bm t
504 union t@(Bin _ _ _ _) Nil = t
505 union (Tip kx bm) t = insertBM kx bm t
506 union Nil t = t
507
508
509 {--------------------------------------------------------------------
510 Difference
511 --------------------------------------------------------------------}
512 -- | /O(n+m)/. Difference between two sets.
513 difference :: IntSet -> IntSet -> IntSet
514 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
515 | shorter m1 m2 = difference1
516 | shorter m2 m1 = difference2
517 | p1 == p2 = bin p1 m1 (difference l1 l2) (difference r1 r2)
518 | otherwise = t1
519 where
520 difference1 | nomatch p2 p1 m1 = t1
521 | zero p2 m1 = bin p1 m1 (difference l1 t2) r1
522 | otherwise = bin p1 m1 l1 (difference r1 t2)
523
524 difference2 | nomatch p1 p2 m2 = t1
525 | zero p1 m2 = difference t1 l2
526 | otherwise = difference t1 r2
527
528 difference t@(Bin _ _ _ _) (Tip kx bm) = deleteBM kx bm t
529 difference t@(Bin _ _ _ _) Nil = t
530
531 difference t1@(Tip kx bm) t2 = differenceTip t2
532 where differenceTip (Bin p2 m2 l2 r2) | nomatch kx p2 m2 = t1
533 | zero kx m2 = differenceTip l2
534 | otherwise = differenceTip r2
535 differenceTip (Tip kx2 bm2) | kx == kx2 = tip kx (bm .&. complement bm2)
536 | otherwise = t1
537 differenceTip Nil = t1
538
539 difference Nil _ = Nil
540
541
542
543 {--------------------------------------------------------------------
544 Intersection
545 --------------------------------------------------------------------}
546 -- | /O(n+m)/. The intersection of two sets.
547 intersection :: IntSet -> IntSet -> IntSet
548 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
549 | shorter m1 m2 = intersection1
550 | shorter m2 m1 = intersection2
551 | p1 == p2 = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
552 | otherwise = Nil
553 where
554 intersection1 | nomatch p2 p1 m1 = Nil
555 | zero p2 m1 = intersection l1 t2
556 | otherwise = intersection r1 t2
557
558 intersection2 | nomatch p1 p2 m2 = Nil
559 | zero p1 m2 = intersection t1 l2
560 | otherwise = intersection t1 r2
561
562 intersection t1@(Bin _ _ _ _) (Tip kx2 bm2) = intersectBM t1
563 where intersectBM (Bin p1 m1 l1 r1) | nomatch kx2 p1 m1 = Nil
564 | zero kx2 m1 = intersectBM l1
565 | otherwise = intersectBM r1
566 intersectBM (Tip kx1 bm1) | kx1 == kx2 = tip kx1 (bm1 .&. bm2)
567 | otherwise = Nil
568 intersectBM Nil = Nil
569
570 intersection (Bin _ _ _ _) Nil = Nil
571
572 intersection (Tip kx1 bm1) t2 = intersectBM t2
573 where intersectBM (Bin p2 m2 l2 r2) | nomatch kx1 p2 m2 = Nil
574 | zero kx1 m2 = intersectBM l2
575 | otherwise = intersectBM r2
576 intersectBM (Tip kx2 bm2) | kx1 == kx2 = tip kx1 (bm1 .&. bm2)
577 | otherwise = Nil
578 intersectBM Nil = Nil
579
580 intersection Nil _ = Nil
581
582 {--------------------------------------------------------------------
583 Subset
584 --------------------------------------------------------------------}
585 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
586 isProperSubsetOf :: IntSet -> IntSet -> Bool
587 isProperSubsetOf t1 t2
588 = case subsetCmp t1 t2 of
589 LT -> True
590 _ -> False
591
592 subsetCmp :: IntSet -> IntSet -> Ordering
593 subsetCmp t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
594 | shorter m1 m2 = GT
595 | shorter m2 m1 = case subsetCmpLt of
596 GT -> GT
597 _ -> LT
598 | p1 == p2 = subsetCmpEq
599 | otherwise = GT -- disjoint
600 where
601 subsetCmpLt | nomatch p1 p2 m2 = GT
602 | zero p1 m2 = subsetCmp t1 l2
603 | otherwise = subsetCmp t1 r2
604 subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
605 (GT,_ ) -> GT
606 (_ ,GT) -> GT
607 (EQ,EQ) -> EQ
608 _ -> LT
609
610 subsetCmp (Bin _ _ _ _) _ = GT
611 subsetCmp (Tip kx1 bm1) (Tip kx2 bm2)
612 | kx1 /= kx2 = GT -- disjoint
613 | bm1 == bm2 = EQ
614 | bm1 .&. complement bm2 == 0 = LT
615 | otherwise = GT
616 subsetCmp t1@(Tip kx _) (Bin p m l r)
617 | nomatch kx p m = GT
618 | zero kx m = case subsetCmp t1 l of GT -> GT ; _ -> LT
619 | otherwise = case subsetCmp t1 r of GT -> GT ; _ -> LT
620 subsetCmp (Tip _ _) Nil = GT -- disjoint
621 subsetCmp Nil Nil = EQ
622 subsetCmp Nil _ = LT
623
624 -- | /O(n+m)/. Is this a subset?
625 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
626
627 isSubsetOf :: IntSet -> IntSet -> Bool
628 isSubsetOf t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
629 | shorter m1 m2 = False
630 | shorter m2 m1 = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
631 else isSubsetOf t1 r2)
632 | otherwise = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
633 isSubsetOf (Bin _ _ _ _) _ = False
634 isSubsetOf (Tip kx1 bm1) (Tip kx2 bm2) = kx1 == kx2 && bm1 .&. complement bm2 == 0
635 isSubsetOf t1@(Tip kx _) (Bin p m l r)
636 | nomatch kx p m = False
637 | zero kx m = isSubsetOf t1 l
638 | otherwise = isSubsetOf t1 r
639 isSubsetOf (Tip _ _) Nil = False
640 isSubsetOf Nil _ = True
641
642
643 {--------------------------------------------------------------------
644 Filter
645 --------------------------------------------------------------------}
646 -- | /O(n)/. Filter all elements that satisfy some predicate.
647 filter :: (Key -> Bool) -> IntSet -> IntSet
648 filter predicate t
649 = case t of
650 Bin p m l r
651 -> bin p m (filter predicate l) (filter predicate r)
652 Tip kx bm
653 -> tip kx (foldl'Bits 0 (bitPred kx) 0 bm)
654 Nil -> Nil
655 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi
656 | otherwise = bm
657 {-# INLINE bitPred #-}
658
659 -- | /O(n)/. partition the set according to some predicate.
660 partition :: (Key -> Bool) -> IntSet -> (IntSet,IntSet)
661 partition predicate0 t0 = toPair $ go predicate0 t0
662 where
663 go predicate t
664 = case t of
665 Bin p m l r
666 -> let (l1 :*: l2) = go predicate l
667 (r1 :*: r2) = go predicate r
668 in bin p m l1 r1 :*: bin p m l2 r2
669 Tip kx bm
670 -> let bm1 = foldl'Bits 0 (bitPred kx) 0 bm
671 in tip kx bm1 :*: tip kx (bm `xor` bm1)
672 Nil -> (Nil :*: Nil)
673 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi
674 | otherwise = bm
675 {-# INLINE bitPred #-}
676
677
678 -- | /O(min(n,W))/. The expression (@'split' x set@) is a pair @(set1,set2)@
679 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
680 -- comprises the elements of @set@ greater than @x@.
681 --
682 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [4,5])
683 split :: Key -> IntSet -> (IntSet,IntSet)
684 split x t =
685 case t of
686 Bin _ m l r
687 | m < 0 -> if x >= 0 -- handle negative numbers.
688 then case go x l of (lt :*: gt) -> let lt' = union lt r
689 in lt' `seq` (lt', gt)
690 else case go x r of (lt :*: gt) -> let gt' = union gt l
691 in gt' `seq` (lt, gt')
692 _ -> case go x t of
693 (lt :*: gt) -> (lt, gt)
694 where
695 go !x' t'@(Bin p m l r)
696 | match x' p m = if zero x' m
697 then case go x' l of
698 (lt :*: gt) -> lt :*: union gt r
699 else case go x' r of
700 (lt :*: gt) -> union lt l :*: gt
701 | otherwise = if x' < p then (Nil :*: t')
702 else (t' :*: Nil)
703 go x' t'@(Tip kx' bm)
704 | kx' > x' = (Nil :*: t')
705 -- equivalent to kx' > prefixOf x'
706 | kx' < prefixOf x' = (t' :*: Nil)
707 | otherwise = tip kx' (bm .&. lowerBitmap) :*: tip kx' (bm .&. higherBitmap)
708 where lowerBitmap = bitmapOf x' - 1
709 higherBitmap = complement (lowerBitmap + bitmapOf x')
710 go _ Nil = (Nil :*: Nil)
711
712 -- | /O(min(n,W))/. Performs a 'split' but also returns whether the pivot
713 -- element was found in the original set.
714 splitMember :: Key -> IntSet -> (IntSet,Bool,IntSet)
715 splitMember x t =
716 case t of
717 Bin _ m l r | m < 0 -> if x >= 0
718 then case go x l of
719 (lt, fnd, gt) -> let lt' = union lt r
720 in lt' `seq` (lt', fnd, gt)
721 else case go x r of
722 (lt, fnd, gt) -> let gt' = union gt l
723 in gt' `seq` (lt, fnd, gt')
724 _ -> go x t
725 where
726 go x' t'@(Bin p m l r)
727 | match x' p m = if zero x' m
728 then case go x' l of
729 (lt, fnd, gt) -> (lt, fnd, union gt r)
730 else case go x' r of
731 (lt, fnd, gt) -> (union lt l, fnd, gt)
732 | otherwise = if x' < p then (Nil, False, t') else (t', False, Nil)
733 go x' t'@(Tip kx' bm)
734 | kx' > x' = (Nil, False, t')
735 -- equivalent to kx' > prefixOf x'
736 | kx' < prefixOf x' = (t', False, Nil)
737 | otherwise = let lt = tip kx' (bm .&. lowerBitmap)
738 found = (bm .&. bitmapOfx') /= 0
739 gt = tip kx' (bm .&. higherBitmap)
740 in lt `seq` found `seq` gt `seq` (lt, found, gt)
741 where bitmapOfx' = bitmapOf x'
742 lowerBitmap = bitmapOfx' - 1
743 higherBitmap = complement (lowerBitmap + bitmapOfx')
744 go _ Nil = (Nil, False, Nil)
745
746
747 {----------------------------------------------------------------------
748 Min/Max
749 ----------------------------------------------------------------------}
750
751 -- | /O(min(n,W))/. Retrieves the maximal key of the set, and the set
752 -- stripped of that element, or 'Nothing' if passed an empty set.
753 maxView :: IntSet -> Maybe (Key, IntSet)
754 maxView t =
755 case t of Nil -> Nothing
756 Bin p m l r | m < 0 -> case go l of (result, l') -> Just (result, bin p m l' r)
757 _ -> Just (go t)
758 where
759 go (Bin p m l r) = case go r of (result, r') -> (result, bin p m l r')
760 go (Tip kx bm) = case highestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi)))
761 go Nil = error "maxView Nil"
762
763 -- | /O(min(n,W))/. Retrieves the minimal key of the set, and the set
764 -- stripped of that element, or 'Nothing' if passed an empty set.
765 minView :: IntSet -> Maybe (Key, IntSet)
766 minView t =
767 case t of Nil -> Nothing
768 Bin p m l r | m < 0 -> case go r of (result, r') -> Just (result, bin p m l r')
769 _ -> Just (go t)
770 where
771 go (Bin p m l r) = case go l of (result, l') -> (result, bin p m l' r)
772 go (Tip kx bm) = case lowestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi)))
773 go Nil = error "minView Nil"
774
775 -- | /O(min(n,W))/. Delete and find the minimal element.
776 --
777 -- > deleteFindMin set = (findMin set, deleteMin set)
778 deleteFindMin :: IntSet -> (Key, IntSet)
779 deleteFindMin = fromMaybe (error "deleteFindMin: empty set has no minimal element") . minView
780
781 -- | /O(min(n,W))/. Delete and find the maximal element.
782 --
783 -- > deleteFindMax set = (findMax set, deleteMax set)
784 deleteFindMax :: IntSet -> (Key, IntSet)
785 deleteFindMax = fromMaybe (error "deleteFindMax: empty set has no maximal element") . maxView
786
787
788 -- | /O(min(n,W))/. The minimal element of the set.
789 findMin :: IntSet -> Key
790 findMin Nil = error "findMin: empty set has no minimal element"
791 findMin (Tip kx bm) = kx + lowestBitSet bm
792 findMin (Bin _ m l r)
793 | m < 0 = find r
794 | otherwise = find l
795 where find (Tip kx bm) = kx + lowestBitSet bm
796 find (Bin _ _ l' _) = find l'
797 find Nil = error "findMin Nil"
798
799 -- | /O(min(n,W))/. The maximal element of a set.
800 findMax :: IntSet -> Key
801 findMax Nil = error "findMax: empty set has no maximal element"
802 findMax (Tip kx bm) = kx + highestBitSet bm
803 findMax (Bin _ m l r)
804 | m < 0 = find l
805 | otherwise = find r
806 where find (Tip kx bm) = kx + highestBitSet bm
807 find (Bin _ _ _ r') = find r'
808 find Nil = error "findMax Nil"
809
810
811 -- | /O(min(n,W))/. Delete the minimal element.
812 deleteMin :: IntSet -> IntSet
813 deleteMin = maybe Nil snd . minView
814
815 -- | /O(min(n,W))/. Delete the maximal element.
816 deleteMax :: IntSet -> IntSet
817 deleteMax = maybe Nil snd . maxView
818
819 {----------------------------------------------------------------------
820 Map
821 ----------------------------------------------------------------------}
822
823 -- | /O(n*min(n,W))/.
824 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
825 --
826 -- It's worth noting that the size of the result may be smaller if,
827 -- for some @(x,y)@, @x \/= y && f x == f y@
828
829 map :: (Key -> Key) -> IntSet -> IntSet
830 map f = fromList . List.map f . toList
831
832 {--------------------------------------------------------------------
833 Fold
834 --------------------------------------------------------------------}
835 -- | /O(n)/. Fold the elements in the set using the given right-associative
836 -- binary operator. This function is an equivalent of 'foldr' and is present
837 -- for compatibility only.
838 --
839 -- /Please note that fold will be deprecated in the future and removed./
840 fold :: (Key -> b -> b) -> b -> IntSet -> b
841 fold = foldr
842 {-# INLINE fold #-}
843
844 -- | /O(n)/. Fold the elements in the set using the given right-associative
845 -- binary operator, such that @'foldr' f z == 'Prelude.foldr' f z . 'toAscList'@.
846 --
847 -- For example,
848 --
849 -- > toAscList set = foldr (:) [] set
850 foldr :: (Key -> b -> b) -> b -> IntSet -> b
851 foldr f z = \t -> -- Use lambda t to be inlinable with two arguments only.
852 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before
853 | otherwise -> go (go z r) l
854 _ -> go z t
855 where
856 go z' Nil = z'
857 go z' (Tip kx bm) = foldrBits kx f z' bm
858 go z' (Bin _ _ l r) = go (go z' r) l
859 {-# INLINE foldr #-}
860
861 -- | /O(n)/. A strict version of 'foldr'. Each application of the operator is
862 -- evaluated before using the result in the next application. This
863 -- function is strict in the starting value.
864 foldr' :: (Key -> b -> b) -> b -> IntSet -> b
865 foldr' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
866 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before
867 | otherwise -> go (go z r) l
868 _ -> go z t
869 where
870 STRICT_1_OF_2(go)
871 go z' Nil = z'
872 go z' (Tip kx bm) = foldr'Bits kx f z' bm
873 go z' (Bin _ _ l r) = go (go z' r) l
874 {-# INLINE foldr' #-}
875
876 -- | /O(n)/. Fold the elements in the set using the given left-associative
877 -- binary operator, such that @'foldl' f z == 'Prelude.foldl' f z . 'toAscList'@.
878 --
879 -- For example,
880 --
881 -- > toDescList set = foldl (flip (:)) [] set
882 foldl :: (a -> Key -> a) -> a -> IntSet -> a
883 foldl f z = \t -> -- Use lambda t to be inlinable with two arguments only.
884 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before
885 | otherwise -> go (go z l) r
886 _ -> go z t
887 where
888 STRICT_1_OF_2(go)
889 go z' Nil = z'
890 go z' (Tip kx bm) = foldlBits kx f z' bm
891 go z' (Bin _ _ l r) = go (go z' l) r
892 {-# INLINE foldl #-}
893
894 -- | /O(n)/. A strict version of 'foldl'. Each application of the operator is
895 -- evaluated before using the result in the next application. This
896 -- function is strict in the starting value.
897 foldl' :: (a -> Key -> a) -> a -> IntSet -> a
898 foldl' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
899 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before
900 | otherwise -> go (go z l) r
901 _ -> go z t
902 where
903 STRICT_1_OF_2(go)
904 go z' Nil = z'
905 go z' (Tip kx bm) = foldl'Bits kx f z' bm
906 go z' (Bin _ _ l r) = go (go z' l) r
907 {-# INLINE foldl' #-}
908
909 {--------------------------------------------------------------------
910 List variations
911 --------------------------------------------------------------------}
912 -- | /O(n)/. An alias of 'toAscList'. The elements of a set in ascending order.
913 -- Subject to list fusion.
914 elems :: IntSet -> [Key]
915 elems
916 = toAscList
917
918 {--------------------------------------------------------------------
919 Lists
920 --------------------------------------------------------------------}
921 -- | /O(n)/. Convert the set to a list of elements. Subject to list fusion.
922 toList :: IntSet -> [Key]
923 toList
924 = toAscList
925
926 -- | /O(n)/. Convert the set to an ascending list of elements. Subject to list
927 -- fusion.
928 toAscList :: IntSet -> [Key]
929 toAscList = foldr (:) []
930
931 -- | /O(n)/. Convert the set to a descending list of elements. Subject to list
932 -- fusion.
933 toDescList :: IntSet -> [Key]
934 toDescList = foldl (flip (:)) []
935
936 -- List fusion for the list generating functions.
937 #if __GLASGOW_HASKELL__
938 -- The foldrFB and foldlFB are foldr and foldl equivalents, used for list fusion.
939 -- They are important to convert unfused to{Asc,Desc}List back, see mapFB in prelude.
940 foldrFB :: (Key -> b -> b) -> b -> IntSet -> b
941 foldrFB = foldr
942 {-# INLINE[0] foldrFB #-}
943 foldlFB :: (a -> Key -> a) -> a -> IntSet -> a
944 foldlFB = foldl
945 {-# INLINE[0] foldlFB #-}
946
947 -- Inline elems and toList, so that we need to fuse only toAscList.
948 {-# INLINE elems #-}
949 {-# INLINE toList #-}
950
951 -- The fusion is enabled up to phase 2 included. If it does not succeed,
952 -- convert in phase 1 the expanded to{Asc,Desc}List calls back to
953 -- to{Asc,Desc}List. In phase 0, we inline fold{lr}FB (which were used in
954 -- a list fusion, otherwise it would go away in phase 1), and let compiler do
955 -- whatever it wants with to{Asc,Desc}List -- it was forbidden to inline it
956 -- before phase 0, otherwise the fusion rules would not fire at all.
957 {-# NOINLINE[0] toAscList #-}
958 {-# NOINLINE[0] toDescList #-}
959 {-# RULES "IntSet.toAscList" [~1] forall s . toAscList s = build (\c n -> foldrFB c n s) #-}
960 {-# RULES "IntSet.toAscListBack" [1] foldrFB (:) [] = toAscList #-}
961 {-# RULES "IntSet.toDescList" [~1] forall s . toDescList s = build (\c n -> foldlFB (\xs x -> c x xs) n s) #-}
962 {-# RULES "IntSet.toDescListBack" [1] foldlFB (\xs x -> x : xs) [] = toDescList #-}
963 #endif
964
965
966 -- | /O(n*min(n,W))/. Create a set from a list of integers.
967 fromList :: [Key] -> IntSet
968 fromList xs
969 = foldlStrict ins empty xs
970 where
971 ins t x = insert x t
972
973 -- | /O(n)/. Build a set from an ascending list of elements.
974 -- /The precondition (input list is ascending) is not checked./
975 fromAscList :: [Key] -> IntSet
976 fromAscList [] = Nil
977 fromAscList (x0 : xs0) = fromDistinctAscList (combineEq x0 xs0)
978 where
979 combineEq x' [] = [x']
980 combineEq x' (x:xs)
981 | x==x' = combineEq x' xs
982 | otherwise = x' : combineEq x xs
983
984 -- | /O(n)/. Build a set from an ascending list of distinct elements.
985 -- /The precondition (input list is strictly ascending) is not checked./
986 fromDistinctAscList :: [Key] -> IntSet
987 fromDistinctAscList [] = Nil
988 fromDistinctAscList (z0 : zs0) = work (prefixOf z0) (bitmapOf z0) zs0 Nada
989 where
990 -- 'work' accumulates all values that go into one tip, before passing this Tip
991 -- to 'reduce'
992 work kx bm [] stk = finish kx (Tip kx bm) stk
993 work kx bm (z:zs) stk | kx == prefixOf z = work kx (bm .|. bitmapOf z) zs stk
994 work kx bm (z:zs) stk = reduce z zs (branchMask z kx) kx (Tip kx bm) stk
995
996 reduce z zs _ px tx Nada = work (prefixOf z) (bitmapOf z) zs (Push px tx Nada)
997 reduce z zs m px tx stk@(Push py ty stk') =
998 let mxy = branchMask px py
999 pxy = mask px mxy
1000 in if shorter m mxy
1001 then reduce z zs m pxy (Bin pxy mxy ty tx) stk'
1002 else work (prefixOf z) (bitmapOf z) zs (Push px tx stk)
1003
1004 finish _ t Nada = t
1005 finish px tx (Push py ty stk) = finish p (join py ty px tx) stk
1006 where m = branchMask px py
1007 p = mask px m
1008
1009 data Stack = Push {-# UNPACK #-} !Prefix !IntSet !Stack | Nada
1010
1011
1012 {--------------------------------------------------------------------
1013 Eq
1014 --------------------------------------------------------------------}
1015 instance Eq IntSet where
1016 t1 == t2 = equal t1 t2
1017 t1 /= t2 = nequal t1 t2
1018
1019 equal :: IntSet -> IntSet -> Bool
1020 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
1021 = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
1022 equal (Tip kx1 bm1) (Tip kx2 bm2)
1023 = kx1 == kx2 && bm1 == bm2
1024 equal Nil Nil = True
1025 equal _ _ = False
1026
1027 nequal :: IntSet -> IntSet -> Bool
1028 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
1029 = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
1030 nequal (Tip kx1 bm1) (Tip kx2 bm2)
1031 = kx1 /= kx2 || bm1 /= bm2
1032 nequal Nil Nil = False
1033 nequal _ _ = True
1034
1035 {--------------------------------------------------------------------
1036 Ord
1037 --------------------------------------------------------------------}
1038
1039 instance Ord IntSet where
1040 compare s1 s2 = compare (toAscList s1) (toAscList s2)
1041 -- tentative implementation. See if more efficient exists.
1042
1043 {--------------------------------------------------------------------
1044 Show
1045 --------------------------------------------------------------------}
1046 instance Show IntSet where
1047 showsPrec p xs = showParen (p > 10) $
1048 showString "fromList " . shows (toList xs)
1049
1050 {--------------------------------------------------------------------
1051 Read
1052 --------------------------------------------------------------------}
1053 instance Read IntSet where
1054 #ifdef __GLASGOW_HASKELL__
1055 readPrec = parens $ prec 10 $ do
1056 Ident "fromList" <- lexP
1057 xs <- readPrec
1058 return (fromList xs)
1059
1060 readListPrec = readListPrecDefault
1061 #else
1062 readsPrec p = readParen (p > 10) $ \ r -> do
1063 ("fromList",s) <- lex r
1064 (xs,t) <- reads s
1065 return (fromList xs,t)
1066 #endif
1067
1068 {--------------------------------------------------------------------
1069 Typeable
1070 --------------------------------------------------------------------}
1071
1072 #include "Typeable.h"
1073 INSTANCE_TYPEABLE0(IntSet,intSetTc,"IntSet")
1074
1075 {--------------------------------------------------------------------
1076 NFData
1077 --------------------------------------------------------------------}
1078
1079 -- The IntSet constructors consist only of strict fields of Ints and
1080 -- IntSets, thus the default NFData instance which evaluates to whnf
1081 -- should suffice
1082 instance NFData IntSet
1083
1084 {--------------------------------------------------------------------
1085 Debugging
1086 --------------------------------------------------------------------}
1087 -- | /O(n)/. Show the tree that implements the set. The tree is shown
1088 -- in a compressed, hanging format.
1089 showTree :: IntSet -> String
1090 showTree s
1091 = showTreeWith True False s
1092
1093
1094 {- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows
1095 the tree that implements the set. If @hang@ is
1096 'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If
1097 @wide@ is 'True', an extra wide version is shown.
1098 -}
1099 showTreeWith :: Bool -> Bool -> IntSet -> String
1100 showTreeWith hang wide t
1101 | hang = (showsTreeHang wide [] t) ""
1102 | otherwise = (showsTree wide [] [] t) ""
1103
1104 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
1105 showsTree wide lbars rbars t
1106 = case t of
1107 Bin p m l r
1108 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
1109 showWide wide rbars .
1110 showsBars lbars . showString (showBin p m) . showString "\n" .
1111 showWide wide lbars .
1112 showsTree wide (withEmpty lbars) (withBar lbars) l
1113 Tip kx bm
1114 -> showsBars lbars . showString " " . shows kx . showString " + " .
1115 showsBitMap bm . showString "\n"
1116 Nil -> showsBars lbars . showString "|\n"
1117
1118 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
1119 showsTreeHang wide bars t
1120 = case t of
1121 Bin p m l r
1122 -> showsBars bars . showString (showBin p m) . showString "\n" .
1123 showWide wide bars .
1124 showsTreeHang wide (withBar bars) l .
1125 showWide wide bars .
1126 showsTreeHang wide (withEmpty bars) r
1127 Tip kx bm
1128 -> showsBars bars . showString " " . shows kx . showString " + " .
1129 showsBitMap bm . showString "\n"
1130 Nil -> showsBars bars . showString "|\n"
1131
1132 showBin :: Prefix -> Mask -> String
1133 showBin _ _
1134 = "*" -- ++ show (p,m)
1135
1136 showWide :: Bool -> [String] -> String -> String
1137 showWide wide bars
1138 | wide = showString (concat (reverse bars)) . showString "|\n"
1139 | otherwise = id
1140
1141 showsBars :: [String] -> ShowS
1142 showsBars bars
1143 = case bars of
1144 [] -> id
1145 _ -> showString (concat (reverse (tail bars))) . showString node
1146
1147 showsBitMap :: Word -> ShowS
1148 showsBitMap = showString . showBitMap
1149
1150 showBitMap :: Word -> String
1151 showBitMap w = show $ foldrBits 0 (:) [] w
1152
1153 node :: String
1154 node = "+--"
1155
1156 withBar, withEmpty :: [String] -> [String]
1157 withBar bars = "| ":bars
1158 withEmpty bars = " ":bars
1159
1160
1161 {--------------------------------------------------------------------
1162 Helpers
1163 --------------------------------------------------------------------}
1164 {--------------------------------------------------------------------
1165 Join
1166 --------------------------------------------------------------------}
1167 join :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
1168 join p1 t1 p2 t2
1169 | zero p1 m = Bin p m t1 t2
1170 | otherwise = Bin p m t2 t1
1171 where
1172 m = branchMask p1 p2
1173 p = mask p1 m
1174 {-# INLINE join #-}
1175
1176 {--------------------------------------------------------------------
1177 @bin@ assures that we never have empty trees within a tree.
1178 --------------------------------------------------------------------}
1179 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
1180 bin _ _ l Nil = l
1181 bin _ _ Nil r = r
1182 bin p m l r = Bin p m l r
1183 {-# INLINE bin #-}
1184
1185 {--------------------------------------------------------------------
1186 @tip@ assures that we never have empty bitmaps within a tree.
1187 --------------------------------------------------------------------}
1188 tip :: Prefix -> BitMap -> IntSet
1189 tip _ 0 = Nil
1190 tip kx bm = Tip kx bm
1191 {-# INLINE tip #-}
1192
1193
1194 {----------------------------------------------------------------------
1195 Functions that generate Prefix and BitMap of a Key or a Suffix.
1196 ----------------------------------------------------------------------}
1197
1198 suffixBitMask :: Int
1199 suffixBitMask = bitSize (undefined::Word) - 1
1200 {-# INLINE suffixBitMask #-}
1201
1202 prefixBitMask :: Int
1203 prefixBitMask = complement suffixBitMask
1204 {-# INLINE prefixBitMask #-}
1205
1206 prefixOf :: Int -> Prefix
1207 prefixOf x = x .&. prefixBitMask
1208 {-# INLINE prefixOf #-}
1209
1210 suffixOf :: Int -> Int
1211 suffixOf x = x .&. suffixBitMask
1212 {-# INLINE suffixOf #-}
1213
1214 bitmapOfSuffix :: Int -> BitMap
1215 bitmapOfSuffix s = 1 `shiftLL` s
1216 {-# INLINE bitmapOfSuffix #-}
1217
1218 bitmapOf :: Int -> BitMap
1219 bitmapOf x = bitmapOfSuffix (suffixOf x)
1220 {-# INLINE bitmapOf #-}
1221
1222
1223 {--------------------------------------------------------------------
1224 Endian independent bit twiddling
1225 --------------------------------------------------------------------}
1226 zero :: Int -> Mask -> Bool
1227 zero i m
1228 = (natFromInt i) .&. (natFromInt m) == 0
1229 {-# INLINE zero #-}
1230
1231 nomatch,match :: Int -> Prefix -> Mask -> Bool
1232 nomatch i p m
1233 = (mask i m) /= p
1234 {-# INLINE nomatch #-}
1235
1236 match i p m
1237 = (mask i m) == p
1238 {-# INLINE match #-}
1239
1240 -- Suppose a is largest such that 2^a divides 2*m.
1241 -- Then mask i m is i with the low a bits zeroed out.
1242 mask :: Int -> Mask -> Prefix
1243 mask i m
1244 = maskW (natFromInt i) (natFromInt m)
1245 {-# INLINE mask #-}
1246
1247 {--------------------------------------------------------------------
1248 Big endian operations
1249 --------------------------------------------------------------------}
1250 maskW :: Nat -> Nat -> Prefix
1251 maskW i m
1252 = intFromNat (i .&. (complement (m-1) `xor` m))
1253 {-# INLINE maskW #-}
1254
1255 shorter :: Mask -> Mask -> Bool
1256 shorter m1 m2
1257 = (natFromInt m1) > (natFromInt m2)
1258 {-# INLINE shorter #-}
1259
1260 branchMask :: Prefix -> Prefix -> Mask
1261 branchMask p1 p2
1262 = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
1263 {-# INLINE branchMask #-}
1264
1265 {----------------------------------------------------------------------
1266 Finding the highest bit (mask) in a word [x] can be done efficiently in
1267 three ways:
1268 * convert to a floating point value and the mantissa tells us the
1269 [log2(x)] that corresponds with the highest bit position. The mantissa
1270 is retrieved either via the standard C function [frexp] or by some bit
1271 twiddling on IEEE compatible numbers (float). Note that one needs to
1272 use at least [double] precision for an accurate mantissa of 32 bit
1273 numbers.
1274 * use bit twiddling, a logarithmic sequence of bitwise or's and shifts (bit).
1275 * use processor specific assembler instruction (asm).
1276
1277 The most portable way would be [bit], but is it efficient enough?
1278 I have measured the cycle counts of the different methods on an AMD
1279 Athlon-XP 1800 (~ Pentium III 1.8Ghz) using the RDTSC instruction:
1280
1281 highestBitMask: method cycles
1282 --------------
1283 frexp 200
1284 float 33
1285 bit 11
1286 asm 12
1287
1288 highestBit: method cycles
1289 --------------
1290 frexp 195
1291 float 33
1292 bit 11
1293 asm 11
1294
1295 Wow, the bit twiddling is on today's RISC like machines even faster
1296 than a single CISC instruction (BSR)!
1297 ----------------------------------------------------------------------}
1298
1299 {----------------------------------------------------------------------
1300 [highestBitMask] returns a word where only the highest bit is set.
1301 It is found by first setting all bits in lower positions than the
1302 highest bit and than taking an exclusive or with the original value.
1303 Allthough the function may look expensive, GHC compiles this into
1304 excellent C code that subsequently compiled into highly efficient
1305 machine code. The algorithm is derived from Jorg Arndt's FXT library.
1306 ----------------------------------------------------------------------}
1307 highestBitMask :: Nat -> Nat
1308 highestBitMask x0
1309 = case (x0 .|. shiftRL x0 1) of
1310 x1 -> case (x1 .|. shiftRL x1 2) of
1311 x2 -> case (x2 .|. shiftRL x2 4) of
1312 x3 -> case (x3 .|. shiftRL x3 8) of
1313 x4 -> case (x4 .|. shiftRL x4 16) of
1314 #if !(defined(__GLASGOW_HASKELL__) && WORD_SIZE_IN_BITS==32)
1315 x5 -> case (x5 .|. shiftRL x5 32) of -- for 64 bit platforms
1316 #endif
1317 x6 -> (x6 `xor` (shiftRL x6 1))
1318 {-# INLINE highestBitMask #-}
1319
1320 {----------------------------------------------------------------------
1321 To get best performance, we provide fast implementations of
1322 lowestBitSet, highestBitSet and fold[lr][l]Bits for GHC.
1323 If the intel bsf and bsr instructions ever become GHC primops,
1324 this code should be reimplemented using these.
1325
1326 Performance of this code is crucial for folds, toList, filter, partition.
1327
1328 The signatures of methods in question are placed after this comment.
1329 ----------------------------------------------------------------------}
1330
1331 lowestBitSet :: Nat -> Int
1332 highestBitSet :: Nat -> Int
1333 foldlBits :: Int -> (a -> Int -> a) -> a -> Nat -> a
1334 foldl'Bits :: Int -> (a -> Int -> a) -> a -> Nat -> a
1335 foldrBits :: Int -> (Int -> a -> a) -> a -> Nat -> a
1336 foldr'Bits :: Int -> (Int -> a -> a) -> a -> Nat -> a
1337
1338 {-# INLINE lowestBitSet #-}
1339 {-# INLINE highestBitSet #-}
1340 {-# INLINE foldlBits #-}
1341 {-# INLINE foldl'Bits #-}
1342 {-# INLINE foldrBits #-}
1343 {-# INLINE foldr'Bits #-}
1344
1345 #if defined(__GLASGOW_HASKELL__) && (WORD_SIZE_IN_BITS==32 || WORD_SIZE_IN_BITS==64)
1346 {----------------------------------------------------------------------
1347 For lowestBitSet we use wordsize-dependant implementation based on
1348 multiplication and DeBrujn indeces, which was proposed by Edward Kmett
1349 <http://haskell.org/pipermail/libraries/2011-September/016749.html>
1350
1351 The core of this implementation is fast indexOfTheOnlyBit,
1352 which is given a Nat with exactly one bit set, and returns
1353 its index.
1354
1355 Lot of effort was put in these implementations, please benchmark carefully
1356 before changing this code.
1357 ----------------------------------------------------------------------}
1358
1359 indexOfTheOnlyBit :: Nat -> Int
1360 {-# INLINE indexOfTheOnlyBit #-}
1361 indexOfTheOnlyBit bitmask =
1362 I# (lsbArray `indexInt8OffAddr#` unboxInt (intFromNat ((bitmask * magic) `shiftRL` offset)))
1363 where unboxInt (I# i) = i
1364 #if WORD_SIZE_IN_BITS==32
1365 magic = 0x077CB531
1366 offset = 27
1367 !lsbArray = "\0\1\28\2\29\14\24\3\30\22\20\15\25\17\4\8\31\27\13\23\21\19\16\7\26\12\18\6\11\5\10\9"#
1368 #else
1369 magic = 0x07EDD5E59A4E28C2
1370 offset = 58
1371 !lsbArray = "\63\0\58\1\59\47\53\2\60\39\48\27\54\33\42\3\61\51\37\40\49\18\28\20\55\30\34\11\43\14\22\4\62\57\46\52\38\26\32\41\50\36\17\19\29\10\13\21\56\45\25\31\35\16\9\12\44\24\15\8\23\7\6\5"#
1372 #endif
1373 -- The lsbArray gets inlined to every call site of indexOfTheOnlyBit.
1374 -- That cannot be easily avoided, as GHC forbids top-level Addr# literal.
1375 -- One could go around that by supplying getLsbArray :: () -> Addr# marked
1376 -- as NOINLINE. But the code size of calling it and processing the result
1377 -- is 48B on 32-bit and 56B on 64-bit architectures -- so the 32B and 64B array
1378 -- is actually improvement on 32-bit and only a 8B size increase on 64-bit.
1379
1380 lowestBitMask :: Nat -> Nat
1381 lowestBitMask x = x .&. negate x
1382 {-# INLINE lowestBitMask #-}
1383
1384 -- Reverse the order of bits in the Nat.
1385 revNat :: Nat -> Nat
1386 #if WORD_SIZE_IN_BITS==32
1387 revNat x1 = case ((x1 `shiftRL` 1) .&. 0x55555555) .|. ((x1 .&. 0x55555555) `shiftLL` 1) of
1388 x2 -> case ((x2 `shiftRL` 2) .&. 0x33333333) .|. ((x2 .&. 0x33333333) `shiftLL` 2) of
1389 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F) `shiftLL` 4) of
1390 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF) .|. ((x4 .&. 0x00FF00FF) `shiftLL` 8) of
1391 x5 -> ( x5 `shiftRL` 16 ) .|. ( x5 `shiftLL` 16);
1392 #else
1393 revNat x1 = case ((x1 `shiftRL` 1) .&. 0x5555555555555555) .|. ((x1 .&. 0x5555555555555555) `shiftLL` 1) of
1394 x2 -> case ((x2 `shiftRL` 2) .&. 0x3333333333333333) .|. ((x2 .&. 0x3333333333333333) `shiftLL` 2) of
1395 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F0F0F0F0F) `shiftLL` 4) of
1396 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF00FF00FF) .|. ((x4 .&. 0x00FF00FF00FF00FF) `shiftLL` 8) of
1397 x5 -> case ((x5 `shiftRL` 16) .&. 0x0000FFFF0000FFFF) .|. ((x5 .&. 0x0000FFFF0000FFFF) `shiftLL` 16) of
1398 x6 -> ( x6 `shiftRL` 32 ) .|. ( x6 `shiftLL` 32);
1399 #endif
1400
1401 lowestBitSet x = indexOfTheOnlyBit (lowestBitMask x)
1402
1403 highestBitSet x = indexOfTheOnlyBit (highestBitMask x)
1404
1405 foldlBits prefix f z bitmap = go bitmap z
1406 where go bm acc | bm == 0 = acc
1407 | otherwise = case lowestBitMask bm of
1408 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1409 bi -> bi `seq` go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1410
1411 foldl'Bits prefix f z bitmap = go bitmap z
1412 where STRICT_2_OF_2(go)
1413 go bm acc | bm == 0 = acc
1414 | otherwise = case lowestBitMask bm of
1415 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1416 bi -> bi `seq` go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1417
1418 foldrBits prefix f z bitmap = go (revNat bitmap) z
1419 where go bm acc | bm == 0 = acc
1420 | otherwise = case lowestBitMask bm of
1421 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1422 bi -> bi `seq` go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1423
1424 foldr'Bits prefix f z bitmap = go (revNat bitmap) z
1425 where STRICT_2_OF_2(go)
1426 go bm acc | bm == 0 = acc
1427 | otherwise = case lowestBitMask bm of
1428 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1429 bi -> bi `seq` go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1430
1431 #else
1432 {----------------------------------------------------------------------
1433 In general case we use logarithmic implementation of
1434 lowestBitSet and highestBitSet, which works up to bit sizes of 64.
1435
1436 Folds are linear scans.
1437 ----------------------------------------------------------------------}
1438
1439 lowestBitSet n0 =
1440 let (n1,b1) = if n0 .&. 0xFFFFFFFF /= 0 then (n0,0) else (n0 `shiftRL` 32, 32)
1441 (n2,b2) = if n1 .&. 0xFFFF /= 0 then (n1,b1) else (n1 `shiftRL` 16, 16+b1)
1442 (n3,b3) = if n2 .&. 0xFF /= 0 then (n2,b2) else (n2 `shiftRL` 8, 8+b2)
1443 (n4,b4) = if n3 .&. 0xF /= 0 then (n3,b3) else (n3 `shiftRL` 4, 4+b3)
1444 (n5,b5) = if n4 .&. 0x3 /= 0 then (n4,b4) else (n4 `shiftRL` 2, 2+b4)
1445 b6 = if n5 .&. 0x1 /= 0 then b5 else 1+b5
1446 in b6
1447
1448 highestBitSet n0 =
1449 let (n1,b1) = if n0 .&. 0xFFFFFFFF00000000 /= 0 then (n0 `shiftRL` 32, 32) else (n0,0)
1450 (n2,b2) = if n1 .&. 0xFFFF0000 /= 0 then (n1 `shiftRL` 16, 16+b1) else (n1,b1)
1451 (n3,b3) = if n2 .&. 0xFF00 /= 0 then (n2 `shiftRL` 8, 8+b2) else (n2,b2)
1452 (n4,b4) = if n3 .&. 0xF0 /= 0 then (n3 `shiftRL` 4, 4+b3) else (n3,b3)
1453 (n5,b5) = if n4 .&. 0xC /= 0 then (n4 `shiftRL` 2, 2+b4) else (n4,b4)
1454 b6 = if n5 .&. 0x2 /= 0 then 1+b5 else b5
1455 in b6
1456
1457 foldlBits prefix f z bm = let lb = lowestBitSet bm
1458 in go (prefix+lb) z (bm `shiftRL` lb)
1459 where STRICT_1_OF_3(go)
1460 go _ acc 0 = acc
1461 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1)
1462 | otherwise = go (bi + 1) acc (n `shiftRL` 1)
1463
1464 foldl'Bits prefix f z bm = let lb = lowestBitSet bm
1465 in go (prefix+lb) z (bm `shiftRL` lb)
1466 where STRICT_1_OF_3(go)
1467 STRICT_2_OF_3(go)
1468 go _ acc 0 = acc
1469 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1)
1470 | otherwise = go (bi + 1) acc (n `shiftRL` 1)
1471
1472 foldrBits prefix f z bm = let lb = lowestBitSet bm
1473 in go (prefix+lb) (bm `shiftRL` lb)
1474 where STRICT_1_OF_2(go)
1475 go _ 0 = z
1476 go bi n | n `testBit` 0 = f bi (go (bi + 1) (n `shiftRL` 1))
1477 | otherwise = go (bi + 1) (n `shiftRL` 1)
1478
1479 foldr'Bits prefix f z bm = let lb = lowestBitSet bm
1480 in go (prefix+lb) (bm `shiftRL` lb)
1481 where STRICT_1_OF_2(go)
1482 go _ 0 = z
1483 go bi n | n `testBit` 0 = f bi $! go (bi + 1) (n `shiftRL` 1)
1484 | otherwise = go (bi + 1) (n `shiftRL` 1)
1485
1486 #endif
1487
1488 {----------------------------------------------------------------------
1489 [bitcount] as posted by David F. Place to haskell-cafe on April 11, 2006,
1490 based on the code on
1491 http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetKernighan,
1492 where the following source is given:
1493 Published in 1988, the C Programming Language 2nd Ed. (by Brian W.
1494 Kernighan and Dennis M. Ritchie) mentions this in exercise 2-9. On April
1495 19, 2006 Don Knuth pointed out to me that this method "was first published
1496 by Peter Wegner in CACM 3 (1960), 322. (Also discovered independently by
1497 Derrick Lehmer and published in 1964 in a book edited by Beckenbach.)"
1498 ----------------------------------------------------------------------}
1499 bitcount :: Int -> Word -> Int
1500 #if MIN_VERSION_base(4,5,0)
1501 bitcount a x = a + popCount x
1502 #else
1503 bitcount a0 x0 = go a0 x0
1504 where go a 0 = a
1505 go a x = go (a + 1) (x .&. (x-1))
1506 #endif
1507 {-# INLINE bitcount #-}
1508
1509
1510 {--------------------------------------------------------------------
1511 Utilities
1512 --------------------------------------------------------------------}
1513 foldlStrict :: (a -> b -> a) -> a -> [b] -> a
1514 foldlStrict f = go
1515 where
1516 go z [] = z
1517 go z (x:xs) = let z' = f z x in z' `seq` go z' xs
1518 {-# INLINE foldlStrict #-}