Merge pull request #23 from ekmett/master
[packages/containers.git] / Data / IntSet / Base.hs
1 {-# LANGUAGE CPP #-}
2 #if __GLASGOW_HASKELL__
3 {-# LANGUAGE MagicHash, BangPatterns, DeriveDataTypeable, StandaloneDeriving #-}
4 #endif
5 #if !defined(TESTING) && __GLASGOW_HASKELL__ >= 703
6 {-# LANGUAGE Trustworthy #-}
7 #endif
8 -----------------------------------------------------------------------------
9 -- |
10 -- Module : Data.IntSet.Base
11 -- Copyright : (c) Daan Leijen 2002
12 -- (c) Joachim Breitner 2011
13 -- License : BSD-style
14 -- Maintainer : libraries@haskell.org
15 -- Stability : provisional
16 -- Portability : portable
17 --
18 -- An efficient implementation of integer sets.
19 --
20 -- These modules are intended to be imported qualified, to avoid name
21 -- clashes with Prelude functions, e.g.
22 --
23 -- > import Data.IntSet (IntSet)
24 -- > import qualified Data.IntSet as IntSet
25 --
26 -- The implementation is based on /big-endian patricia trees/. This data
27 -- structure performs especially well on binary operations like 'union'
28 -- and 'intersection'. However, my benchmarks show that it is also
29 -- (much) faster on insertions and deletions when compared to a generic
30 -- size-balanced set implementation (see "Data.Set").
31 --
32 -- * Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\",
33 -- Workshop on ML, September 1998, pages 77-86,
34 -- <http://citeseer.ist.psu.edu/okasaki98fast.html>
35 --
36 -- * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
37 -- Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
38 -- October 1968, pages 514-534.
39 --
40 -- Additionally, this implementation places bitmaps in the leaves of the tree.
41 -- Their size is the natural size of a machine word (32 or 64 bits) and greatly
42 -- reduce memory footprint and execution times for dense sets, e.g. sets where
43 -- it is likely that many values lie close to each other. The asymptotics are
44 -- not affected by this optimization.
45 --
46 -- Many operations have a worst-case complexity of /O(min(n,W))/.
47 -- This means that the operation can become linear in the number of
48 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
49 -- (32 or 64).
50 -----------------------------------------------------------------------------
51
52 -- [Note: INLINE bit fiddling]
53 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
54 -- It is essential that the bit fiddling functions like mask, zero, branchMask
55 -- etc are inlined. If they do not, the memory allocation skyrockets. The GHC
56 -- usually gets it right, but it is disastrous if it does not. Therefore we
57 -- explicitly mark these functions INLINE.
58
59
60 -- [Note: Local 'go' functions and capturing]
61 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
62 -- Care must be taken when using 'go' function which captures an argument.
63 -- Sometimes (for example when the argument is passed to a data constructor,
64 -- as in insert), GHC heap-allocates more than necessary. Therefore C-- code
65 -- must be checked for increased allocation when creating and modifying such
66 -- functions.
67
68
69 -- [Note: Order of constructors]
70 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
71 -- The order of constructors of IntSet matters when considering performance.
72 -- Currently in GHC 7.0, when type has 3 constructors, they are matched from
73 -- the first to the last -- the best performance is achieved when the
74 -- constructors are ordered by frequency.
75 -- On GHC 7.0, reordering constructors from Nil | Tip | Bin to Bin | Tip | Nil
76 -- improves the benchmark by circa 10%.
77
78 module Data.IntSet.Base (
79 -- * Set type
80 IntSet(..), Key -- instance Eq,Show
81
82 -- * Operators
83 , (\\)
84
85 -- * Query
86 , null
87 , size
88 , member
89 , notMember
90 , lookupLT
91 , lookupGT
92 , lookupLE
93 , lookupGE
94 , isSubsetOf
95 , isProperSubsetOf
96
97 -- * Construction
98 , empty
99 , singleton
100 , insert
101 , delete
102
103 -- * Combine
104 , union
105 , unions
106 , difference
107 , intersection
108
109 -- * Filter
110 , filter
111 , partition
112 , split
113 , splitMember
114
115 -- * Map
116 , map
117
118 -- * Folds
119 , foldr
120 , foldl
121 -- ** Strict folds
122 , foldr'
123 , foldl'
124 -- ** Legacy folds
125 , fold
126
127 -- * Min\/Max
128 , findMin
129 , findMax
130 , deleteMin
131 , deleteMax
132 , deleteFindMin
133 , deleteFindMax
134 , maxView
135 , minView
136
137 -- * Conversion
138
139 -- ** List
140 , elems
141 , toList
142 , fromList
143
144 -- ** Ordered list
145 , toAscList
146 , toDescList
147 , fromAscList
148 , fromDistinctAscList
149
150 -- * Debugging
151 , showTree
152 , showTreeWith
153
154 -- * Internals
155 , match
156 , suffixBitMask
157 , prefixBitMask
158 , bitmapOf
159 ) where
160
161
162 import Prelude hiding (filter,foldr,foldl,null,map)
163 import Data.Bits
164
165 import qualified Data.List as List
166 import Data.Monoid (Monoid(..))
167 import Data.Maybe (fromMaybe)
168 import Data.Typeable
169 import Control.DeepSeq (NFData)
170
171 import Data.StrictPair
172
173 #if __GLASGOW_HASKELL__
174 import Text.Read
175 import Data.Data (Data(..), Constr, mkConstr, constrIndex, Fixity(Prefix), DataType, mkDataType)
176 #endif
177
178 #if __GLASGOW_HASKELL__
179 import GHC.Exts ( Word(..), Int(..), build )
180 import GHC.Prim ( uncheckedShiftL#, uncheckedShiftRL#, indexInt8OffAddr# )
181 #else
182 import Data.Word
183 #endif
184
185 -- On GHC, include MachDeps.h to get WORD_SIZE_IN_BITS macro.
186 #if defined(__GLASGOW_HASKELL__)
187 #include "MachDeps.h"
188 #endif
189
190 -- Use macros to define strictness of functions.
191 -- STRICT_x_OF_y denotes an y-ary function strict in the x-th parameter.
192 -- We do not use BangPatterns, because they are not in any standard and we
193 -- want the compilers to be compiled by as many compilers as possible.
194 #define STRICT_1_OF_2(fn) fn arg _ | arg `seq` False = undefined
195 #define STRICT_2_OF_2(fn) fn _ arg | arg `seq` False = undefined
196 #define STRICT_1_OF_3(fn) fn arg _ _ | arg `seq` False = undefined
197 #define STRICT_2_OF_3(fn) fn _ arg _ | arg `seq` False = undefined
198
199 infixl 9 \\{-This comment teaches CPP correct behaviour -}
200
201 -- A "Nat" is a natural machine word (an unsigned Int)
202 type Nat = Word
203
204 natFromInt :: Int -> Nat
205 natFromInt i = fromIntegral i
206 {-# INLINE natFromInt #-}
207
208 intFromNat :: Nat -> Int
209 intFromNat w = fromIntegral w
210 {-# INLINE intFromNat #-}
211
212 -- Right and left logical shifts.
213 shiftRL, shiftLL :: Nat -> Int -> Nat
214 #if __GLASGOW_HASKELL__
215 {--------------------------------------------------------------------
216 GHC: use unboxing to get @shiftRL@ and @shiftLL@ inlined.
217 --------------------------------------------------------------------}
218 shiftRL (W# x) (I# i) = W# (uncheckedShiftRL# x i)
219 shiftLL (W# x) (I# i) = W# (uncheckedShiftL# x i)
220 #else
221 shiftRL x i = shiftR x i
222 shiftLL x i = shiftL x i
223 #endif
224 {-# INLINE shiftRL #-}
225 {-# INLINE shiftLL #-}
226
227 {--------------------------------------------------------------------
228 Operators
229 --------------------------------------------------------------------}
230 -- | /O(n+m)/. See 'difference'.
231 (\\) :: IntSet -> IntSet -> IntSet
232 m1 \\ m2 = difference m1 m2
233
234 {--------------------------------------------------------------------
235 Types
236 --------------------------------------------------------------------}
237
238 -- | A set of integers.
239
240 -- See Note: Order of constructors
241 data IntSet = Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet
242 -- Invariant: Nil is never found as a child of Bin.
243 -- Invariant: The Mask is a power of 2. It is the largest bit position at which
244 -- two elements of the set differ.
245 -- Invariant: Prefix is the common high-order bits that all elements share to
246 -- the left of the Mask bit.
247 -- Invariant: In Bin prefix mask left right, left consists of the elements that
248 -- don't have the mask bit set; right is all the elements that do.
249 | Tip {-# UNPACK #-} !Prefix {-# UNPACK #-} !BitMap
250 -- Invariant: The Prefix is zero for all but the last 5 (on 32 bit arches) or 6
251 -- bits (on 64 bit arches). The values of the map represented by a tip
252 -- are the prefix plus the indices of the set bits in the bit map.
253 | Nil
254
255 -- A number stored in a set is stored as
256 -- * Prefix (all but last 5-6 bits) and
257 -- * BitMap (last 5-6 bits stored as a bitmask)
258 -- Last 5-6 bits are called a Suffix.
259
260 type Prefix = Int
261 type Mask = Int
262 type BitMap = Word
263 type Key = Int
264
265 instance Monoid IntSet where
266 mempty = empty
267 mappend = union
268 mconcat = unions
269
270 #if __GLASGOW_HASKELL__
271
272 {--------------------------------------------------------------------
273 A Data instance
274 --------------------------------------------------------------------}
275
276 -- This instance preserves data abstraction at the cost of inefficiency.
277 -- We provide limited reflection services for the sake of data abstraction.
278
279 instance Data IntSet where
280 gfoldl f z is = z fromList `f` (toList is)
281 toConstr _ = fromListConstr
282 gunfold k z c = case constrIndex c of
283 1 -> k (z fromList)
284 _ -> error "gunfold"
285 dataTypeOf _ = intSetDataType
286
287 fromListConstr :: Constr
288 fromListConstr = mkConstr intSetDataType "fromList" [] Prefix
289
290 intSetDataType :: DataType
291 intSetDataType = mkDataType "Data.IntSet.Base.IntSet" [fromListConstr]
292
293 #endif
294
295 {--------------------------------------------------------------------
296 Query
297 --------------------------------------------------------------------}
298 -- | /O(1)/. Is the set empty?
299 null :: IntSet -> Bool
300 null Nil = True
301 null _ = False
302 {-# INLINE null #-}
303
304 -- | /O(n)/. Cardinality of the set.
305 size :: IntSet -> Int
306 size t
307 = case t of
308 Bin _ _ l r -> size l + size r
309 Tip _ bm -> bitcount 0 bm
310 Nil -> 0
311
312 -- | /O(min(n,W))/. Is the value a member of the set?
313
314 -- See Note: Local 'go' functions and capturing]
315 member :: Key -> IntSet -> Bool
316 member x = x `seq` go
317 where
318 go (Bin p m l r)
319 | nomatch x p m = False
320 | zero x m = go l
321 | otherwise = go r
322 go (Tip y bm) = prefixOf x == y && bitmapOf x .&. bm /= 0
323 go Nil = False
324
325 -- | /O(min(n,W))/. Is the element not in the set?
326 notMember :: Key -> IntSet -> Bool
327 notMember k = not . member k
328
329 -- | /O(log n)/. Find largest element smaller than the given one.
330 --
331 -- > lookupLT 3 (fromList [3, 5]) == Nothing
332 -- > lookupLT 5 (fromList [3, 5]) == Just 3
333
334 -- See Note: Local 'go' functions and capturing.
335 lookupLT :: Key -> IntSet -> Maybe Key
336 lookupLT x t = x `seq` case t of
337 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r
338 _ -> go Nil t
339 where
340 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r
341 | zero x m = go def l
342 | otherwise = go l r
343 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm
344 | prefixOf x == kx && maskLT /= 0 = Just $ kx + highestBitSet maskLT
345 | otherwise = unsafeFindMax def
346 where maskLT = (bitmapOf x - 1) .&. bm
347 go def Nil = unsafeFindMax def
348
349
350 -- | /O(log n)/. Find smallest element greater than the given one.
351 --
352 -- > lookupGT 4 (fromList [3, 5]) == Just 5
353 -- > lookupGT 5 (fromList [3, 5]) == Nothing
354
355 -- See Note: Local 'go' functions and capturing.
356 lookupGT :: Key -> IntSet -> Maybe Key
357 lookupGT x t = x `seq` case t of
358 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r
359 _ -> go Nil t
360 where
361 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def
362 | zero x m = go r l
363 | otherwise = go def r
364 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm
365 | prefixOf x == kx && maskGT /= 0 = Just $ kx + lowestBitSet maskGT
366 | otherwise = unsafeFindMin def
367 where maskGT = (- ((bitmapOf x) `shiftLL` 1)) .&. bm
368 go def Nil = unsafeFindMin def
369
370
371 -- | /O(log n)/. Find largest element smaller or equal to the given one.
372 --
373 -- > lookupLE 2 (fromList [3, 5]) == Nothing
374 -- > lookupLE 4 (fromList [3, 5]) == Just 3
375 -- > lookupLE 5 (fromList [3, 5]) == Just 5
376
377 -- See Note: Local 'go' functions and capturing.
378 lookupLE :: Key -> IntSet -> Maybe Key
379 lookupLE x t = x `seq` case t of
380 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r
381 _ -> go Nil t
382 where
383 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r
384 | zero x m = go def l
385 | otherwise = go l r
386 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm
387 | prefixOf x == kx && maskLE /= 0 = Just $ kx + highestBitSet maskLE
388 | otherwise = unsafeFindMax def
389 where maskLE = (((bitmapOf x) `shiftLL` 1) - 1) .&. bm
390 go def Nil = unsafeFindMax def
391
392
393 -- | /O(log n)/. Find smallest element greater or equal to the given one.
394 --
395 -- > lookupGE 3 (fromList [3, 5]) == Just 3
396 -- > lookupGE 4 (fromList [3, 5]) == Just 5
397 -- > lookupGE 6 (fromList [3, 5]) == Nothing
398
399 -- See Note: Local 'go' functions and capturing.
400 lookupGE :: Key -> IntSet -> Maybe Key
401 lookupGE x t = x `seq` case t of
402 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r
403 _ -> go Nil t
404 where
405 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def
406 | zero x m = go r l
407 | otherwise = go def r
408 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm
409 | prefixOf x == kx && maskGE /= 0 = Just $ kx + lowestBitSet maskGE
410 | otherwise = unsafeFindMin def
411 where maskGE = (- (bitmapOf x)) .&. bm
412 go def Nil = unsafeFindMin def
413
414
415
416 -- Helper function for lookupGE and lookupGT. It assumes that if a Bin node is
417 -- given, it has m > 0.
418 unsafeFindMin :: IntSet -> Maybe Key
419 unsafeFindMin Nil = Nothing
420 unsafeFindMin (Tip kx bm) = Just $ kx + lowestBitSet bm
421 unsafeFindMin (Bin _ _ l _) = unsafeFindMin l
422
423 -- Helper function for lookupLE and lookupLT. It assumes that if a Bin node is
424 -- given, it has m > 0.
425 unsafeFindMax :: IntSet -> Maybe Key
426 unsafeFindMax Nil = Nothing
427 unsafeFindMax (Tip kx bm) = Just $ kx + highestBitSet bm
428 unsafeFindMax (Bin _ _ _ r) = unsafeFindMax r
429
430 {--------------------------------------------------------------------
431 Construction
432 --------------------------------------------------------------------}
433 -- | /O(1)/. The empty set.
434 empty :: IntSet
435 empty
436 = Nil
437 {-# INLINE empty #-}
438
439 -- | /O(1)/. A set of one element.
440 singleton :: Key -> IntSet
441 singleton x
442 = Tip (prefixOf x) (bitmapOf x)
443 {-# INLINE singleton #-}
444
445 {--------------------------------------------------------------------
446 Insert
447 --------------------------------------------------------------------}
448 -- | /O(min(n,W))/. Add a value to the set. There is no left- or right bias for
449 -- IntSets.
450 insert :: Key -> IntSet -> IntSet
451 insert x = x `seq` insertBM (prefixOf x) (bitmapOf x)
452
453 -- Helper function for insert and union.
454 insertBM :: Prefix -> BitMap -> IntSet -> IntSet
455 insertBM kx bm t = kx `seq` bm `seq`
456 case t of
457 Bin p m l r
458 | nomatch kx p m -> join kx (Tip kx bm) p t
459 | zero kx m -> Bin p m (insertBM kx bm l) r
460 | otherwise -> Bin p m l (insertBM kx bm r)
461 Tip kx' bm'
462 | kx' == kx -> Tip kx' (bm .|. bm')
463 | otherwise -> join kx (Tip kx bm) kx' t
464 Nil -> Tip kx bm
465
466 -- | /O(min(n,W))/. Delete a value in the set. Returns the
467 -- original set when the value was not present.
468 delete :: Key -> IntSet -> IntSet
469 delete x = x `seq` deleteBM (prefixOf x) (bitmapOf x)
470
471 -- Deletes all values mentioned in the BitMap from the set.
472 -- Helper function for delete and difference.
473 deleteBM :: Prefix -> BitMap -> IntSet -> IntSet
474 deleteBM kx bm t = kx `seq` bm `seq`
475 case t of
476 Bin p m l r
477 | nomatch kx p m -> t
478 | zero kx m -> bin p m (deleteBM kx bm l) r
479 | otherwise -> bin p m l (deleteBM kx bm r)
480 Tip kx' bm'
481 | kx' == kx -> tip kx (bm' .&. complement bm)
482 | otherwise -> t
483 Nil -> Nil
484
485
486 {--------------------------------------------------------------------
487 Union
488 --------------------------------------------------------------------}
489 -- | The union of a list of sets.
490 unions :: [IntSet] -> IntSet
491 unions xs
492 = foldlStrict union empty xs
493
494
495 -- | /O(n+m)/. The union of two sets.
496 union :: IntSet -> IntSet -> IntSet
497 union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
498 | shorter m1 m2 = union1
499 | shorter m2 m1 = union2
500 | p1 == p2 = Bin p1 m1 (union l1 l2) (union r1 r2)
501 | otherwise = join p1 t1 p2 t2
502 where
503 union1 | nomatch p2 p1 m1 = join p1 t1 p2 t2
504 | zero p2 m1 = Bin p1 m1 (union l1 t2) r1
505 | otherwise = Bin p1 m1 l1 (union r1 t2)
506
507 union2 | nomatch p1 p2 m2 = join p1 t1 p2 t2
508 | zero p1 m2 = Bin p2 m2 (union t1 l2) r2
509 | otherwise = Bin p2 m2 l2 (union t1 r2)
510
511 union t@(Bin _ _ _ _) (Tip kx bm) = insertBM kx bm t
512 union t@(Bin _ _ _ _) Nil = t
513 union (Tip kx bm) t = insertBM kx bm t
514 union Nil t = t
515
516
517 {--------------------------------------------------------------------
518 Difference
519 --------------------------------------------------------------------}
520 -- | /O(n+m)/. Difference between two sets.
521 difference :: IntSet -> IntSet -> IntSet
522 difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
523 | shorter m1 m2 = difference1
524 | shorter m2 m1 = difference2
525 | p1 == p2 = bin p1 m1 (difference l1 l2) (difference r1 r2)
526 | otherwise = t1
527 where
528 difference1 | nomatch p2 p1 m1 = t1
529 | zero p2 m1 = bin p1 m1 (difference l1 t2) r1
530 | otherwise = bin p1 m1 l1 (difference r1 t2)
531
532 difference2 | nomatch p1 p2 m2 = t1
533 | zero p1 m2 = difference t1 l2
534 | otherwise = difference t1 r2
535
536 difference t@(Bin _ _ _ _) (Tip kx bm) = deleteBM kx bm t
537 difference t@(Bin _ _ _ _) Nil = t
538
539 difference t1@(Tip kx bm) t2 = differenceTip t2
540 where differenceTip (Bin p2 m2 l2 r2) | nomatch kx p2 m2 = t1
541 | zero kx m2 = differenceTip l2
542 | otherwise = differenceTip r2
543 differenceTip (Tip kx2 bm2) | kx == kx2 = tip kx (bm .&. complement bm2)
544 | otherwise = t1
545 differenceTip Nil = t1
546
547 difference Nil _ = Nil
548
549
550
551 {--------------------------------------------------------------------
552 Intersection
553 --------------------------------------------------------------------}
554 -- | /O(n+m)/. The intersection of two sets.
555 intersection :: IntSet -> IntSet -> IntSet
556 intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
557 | shorter m1 m2 = intersection1
558 | shorter m2 m1 = intersection2
559 | p1 == p2 = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
560 | otherwise = Nil
561 where
562 intersection1 | nomatch p2 p1 m1 = Nil
563 | zero p2 m1 = intersection l1 t2
564 | otherwise = intersection r1 t2
565
566 intersection2 | nomatch p1 p2 m2 = Nil
567 | zero p1 m2 = intersection t1 l2
568 | otherwise = intersection t1 r2
569
570 intersection t1@(Bin _ _ _ _) (Tip kx2 bm2) = intersectBM t1
571 where intersectBM (Bin p1 m1 l1 r1) | nomatch kx2 p1 m1 = Nil
572 | zero kx2 m1 = intersectBM l1
573 | otherwise = intersectBM r1
574 intersectBM (Tip kx1 bm1) | kx1 == kx2 = tip kx1 (bm1 .&. bm2)
575 | otherwise = Nil
576 intersectBM Nil = Nil
577
578 intersection (Bin _ _ _ _) Nil = Nil
579
580 intersection (Tip kx1 bm1) t2 = intersectBM t2
581 where intersectBM (Bin p2 m2 l2 r2) | nomatch kx1 p2 m2 = Nil
582 | zero kx1 m2 = intersectBM l2
583 | otherwise = intersectBM r2
584 intersectBM (Tip kx2 bm2) | kx1 == kx2 = tip kx1 (bm1 .&. bm2)
585 | otherwise = Nil
586 intersectBM Nil = Nil
587
588 intersection Nil _ = Nil
589
590 {--------------------------------------------------------------------
591 Subset
592 --------------------------------------------------------------------}
593 -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
594 isProperSubsetOf :: IntSet -> IntSet -> Bool
595 isProperSubsetOf t1 t2
596 = case subsetCmp t1 t2 of
597 LT -> True
598 _ -> False
599
600 subsetCmp :: IntSet -> IntSet -> Ordering
601 subsetCmp t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
602 | shorter m1 m2 = GT
603 | shorter m2 m1 = case subsetCmpLt of
604 GT -> GT
605 _ -> LT
606 | p1 == p2 = subsetCmpEq
607 | otherwise = GT -- disjoint
608 where
609 subsetCmpLt | nomatch p1 p2 m2 = GT
610 | zero p1 m2 = subsetCmp t1 l2
611 | otherwise = subsetCmp t1 r2
612 subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
613 (GT,_ ) -> GT
614 (_ ,GT) -> GT
615 (EQ,EQ) -> EQ
616 _ -> LT
617
618 subsetCmp (Bin _ _ _ _) _ = GT
619 subsetCmp (Tip kx1 bm1) (Tip kx2 bm2)
620 | kx1 /= kx2 = GT -- disjoint
621 | bm1 == bm2 = EQ
622 | bm1 .&. complement bm2 == 0 = LT
623 | otherwise = GT
624 subsetCmp t1@(Tip kx _) (Bin p m l r)
625 | nomatch kx p m = GT
626 | zero kx m = case subsetCmp t1 l of GT -> GT ; _ -> LT
627 | otherwise = case subsetCmp t1 r of GT -> GT ; _ -> LT
628 subsetCmp (Tip _ _) Nil = GT -- disjoint
629 subsetCmp Nil Nil = EQ
630 subsetCmp Nil _ = LT
631
632 -- | /O(n+m)/. Is this a subset?
633 -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
634
635 isSubsetOf :: IntSet -> IntSet -> Bool
636 isSubsetOf t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
637 | shorter m1 m2 = False
638 | shorter m2 m1 = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2
639 else isSubsetOf t1 r2)
640 | otherwise = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2
641 isSubsetOf (Bin _ _ _ _) _ = False
642 isSubsetOf (Tip kx1 bm1) (Tip kx2 bm2) = kx1 == kx2 && bm1 .&. complement bm2 == 0
643 isSubsetOf t1@(Tip kx _) (Bin p m l r)
644 | nomatch kx p m = False
645 | zero kx m = isSubsetOf t1 l
646 | otherwise = isSubsetOf t1 r
647 isSubsetOf (Tip _ _) Nil = False
648 isSubsetOf Nil _ = True
649
650
651 {--------------------------------------------------------------------
652 Filter
653 --------------------------------------------------------------------}
654 -- | /O(n)/. Filter all elements that satisfy some predicate.
655 filter :: (Key -> Bool) -> IntSet -> IntSet
656 filter predicate t
657 = case t of
658 Bin p m l r
659 -> bin p m (filter predicate l) (filter predicate r)
660 Tip kx bm
661 -> tip kx (foldl'Bits 0 (bitPred kx) 0 bm)
662 Nil -> Nil
663 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi
664 | otherwise = bm
665 {-# INLINE bitPred #-}
666
667 -- | /O(n)/. partition the set according to some predicate.
668 partition :: (Key -> Bool) -> IntSet -> (IntSet,IntSet)
669 partition predicate0 t0 = toPair $ go predicate0 t0
670 where
671 go predicate t
672 = case t of
673 Bin p m l r
674 -> let (l1 :*: l2) = go predicate l
675 (r1 :*: r2) = go predicate r
676 in bin p m l1 r1 :*: bin p m l2 r2
677 Tip kx bm
678 -> let bm1 = foldl'Bits 0 (bitPred kx) 0 bm
679 in tip kx bm1 :*: tip kx (bm `xor` bm1)
680 Nil -> (Nil :*: Nil)
681 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi
682 | otherwise = bm
683 {-# INLINE bitPred #-}
684
685
686 -- | /O(min(n,W))/. The expression (@'split' x set@) is a pair @(set1,set2)@
687 -- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
688 -- comprises the elements of @set@ greater than @x@.
689 --
690 -- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [4,5])
691 split :: Key -> IntSet -> (IntSet,IntSet)
692 split x t =
693 case t of
694 Bin _ m l r
695 | m < 0 -> if x >= 0 -- handle negative numbers.
696 then case go x l of (lt :*: gt) -> let lt' = union lt r
697 in lt' `seq` (lt', gt)
698 else case go x r of (lt :*: gt) -> let gt' = union gt l
699 in gt' `seq` (lt, gt')
700 _ -> case go x t of
701 (lt :*: gt) -> (lt, gt)
702 where
703 go !x' t'@(Bin p m l r)
704 | match x' p m = if zero x' m
705 then case go x' l of
706 (lt :*: gt) -> lt :*: union gt r
707 else case go x' r of
708 (lt :*: gt) -> union lt l :*: gt
709 | otherwise = if x' < p then (Nil :*: t')
710 else (t' :*: Nil)
711 go x' t'@(Tip kx' bm)
712 | kx' > x' = (Nil :*: t')
713 -- equivalent to kx' > prefixOf x'
714 | kx' < prefixOf x' = (t' :*: Nil)
715 | otherwise = tip kx' (bm .&. lowerBitmap) :*: tip kx' (bm .&. higherBitmap)
716 where lowerBitmap = bitmapOf x' - 1
717 higherBitmap = complement (lowerBitmap + bitmapOf x')
718 go _ Nil = (Nil :*: Nil)
719
720 -- | /O(min(n,W))/. Performs a 'split' but also returns whether the pivot
721 -- element was found in the original set.
722 splitMember :: Key -> IntSet -> (IntSet,Bool,IntSet)
723 splitMember x t =
724 case t of
725 Bin _ m l r | m < 0 -> if x >= 0
726 then case go x l of
727 (lt, fnd, gt) -> let lt' = union lt r
728 in lt' `seq` (lt', fnd, gt)
729 else case go x r of
730 (lt, fnd, gt) -> let gt' = union gt l
731 in gt' `seq` (lt, fnd, gt')
732 _ -> go x t
733 where
734 go x' t'@(Bin p m l r)
735 | match x' p m = if zero x' m
736 then case go x' l of
737 (lt, fnd, gt) -> (lt, fnd, union gt r)
738 else case go x' r of
739 (lt, fnd, gt) -> (union lt l, fnd, gt)
740 | otherwise = if x' < p then (Nil, False, t') else (t', False, Nil)
741 go x' t'@(Tip kx' bm)
742 | kx' > x' = (Nil, False, t')
743 -- equivalent to kx' > prefixOf x'
744 | kx' < prefixOf x' = (t', False, Nil)
745 | otherwise = let lt = tip kx' (bm .&. lowerBitmap)
746 found = (bm .&. bitmapOfx') /= 0
747 gt = tip kx' (bm .&. higherBitmap)
748 in lt `seq` found `seq` gt `seq` (lt, found, gt)
749 where bitmapOfx' = bitmapOf x'
750 lowerBitmap = bitmapOfx' - 1
751 higherBitmap = complement (lowerBitmap + bitmapOfx')
752 go _ Nil = (Nil, False, Nil)
753
754
755 {----------------------------------------------------------------------
756 Min/Max
757 ----------------------------------------------------------------------}
758
759 -- | /O(min(n,W))/. Retrieves the maximal key of the set, and the set
760 -- stripped of that element, or 'Nothing' if passed an empty set.
761 maxView :: IntSet -> Maybe (Key, IntSet)
762 maxView t =
763 case t of Nil -> Nothing
764 Bin p m l r | m < 0 -> case go l of (result, l') -> Just (result, bin p m l' r)
765 _ -> Just (go t)
766 where
767 go (Bin p m l r) = case go r of (result, r') -> (result, bin p m l r')
768 go (Tip kx bm) = case highestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi)))
769 go Nil = error "maxView Nil"
770
771 -- | /O(min(n,W))/. Retrieves the minimal key of the set, and the set
772 -- stripped of that element, or 'Nothing' if passed an empty set.
773 minView :: IntSet -> Maybe (Key, IntSet)
774 minView t =
775 case t of Nil -> Nothing
776 Bin p m l r | m < 0 -> case go r of (result, r') -> Just (result, bin p m l r')
777 _ -> Just (go t)
778 where
779 go (Bin p m l r) = case go l of (result, l') -> (result, bin p m l' r)
780 go (Tip kx bm) = case lowestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi)))
781 go Nil = error "minView Nil"
782
783 -- | /O(min(n,W))/. Delete and find the minimal element.
784 --
785 -- > deleteFindMin set = (findMin set, deleteMin set)
786 deleteFindMin :: IntSet -> (Key, IntSet)
787 deleteFindMin = fromMaybe (error "deleteFindMin: empty set has no minimal element") . minView
788
789 -- | /O(min(n,W))/. Delete and find the maximal element.
790 --
791 -- > deleteFindMax set = (findMax set, deleteMax set)
792 deleteFindMax :: IntSet -> (Key, IntSet)
793 deleteFindMax = fromMaybe (error "deleteFindMax: empty set has no maximal element") . maxView
794
795
796 -- | /O(min(n,W))/. The minimal element of the set.
797 findMin :: IntSet -> Key
798 findMin Nil = error "findMin: empty set has no minimal element"
799 findMin (Tip kx bm) = kx + lowestBitSet bm
800 findMin (Bin _ m l r)
801 | m < 0 = find r
802 | otherwise = find l
803 where find (Tip kx bm) = kx + lowestBitSet bm
804 find (Bin _ _ l' _) = find l'
805 find Nil = error "findMin Nil"
806
807 -- | /O(min(n,W))/. The maximal element of a set.
808 findMax :: IntSet -> Key
809 findMax Nil = error "findMax: empty set has no maximal element"
810 findMax (Tip kx bm) = kx + highestBitSet bm
811 findMax (Bin _ m l r)
812 | m < 0 = find l
813 | otherwise = find r
814 where find (Tip kx bm) = kx + highestBitSet bm
815 find (Bin _ _ _ r') = find r'
816 find Nil = error "findMax Nil"
817
818
819 -- | /O(min(n,W))/. Delete the minimal element. Returns an empty set if the set is empty.
820 --
821 -- Note that this is a change of behaviour for consistency with 'Data.Set.Set' &#8211;
822 -- versions prior to 0.5 threw an error if the 'IntSet' was already empty.
823 deleteMin :: IntSet -> IntSet
824 deleteMin = maybe Nil snd . minView
825
826 -- | /O(min(n,W))/. Delete the maximal element. Returns an empty set if the set is empty.
827 --
828 -- Note that this is a change of behaviour for consistency with 'Data.Set.Set' &#8211;
829 -- versions prior to 0.5 threw an error if the 'IntSet' was already empty.
830 deleteMax :: IntSet -> IntSet
831 deleteMax = maybe Nil snd . maxView
832
833 {----------------------------------------------------------------------
834 Map
835 ----------------------------------------------------------------------}
836
837 -- | /O(n*min(n,W))/.
838 -- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
839 --
840 -- It's worth noting that the size of the result may be smaller if,
841 -- for some @(x,y)@, @x \/= y && f x == f y@
842
843 map :: (Key -> Key) -> IntSet -> IntSet
844 map f = fromList . List.map f . toList
845
846 {--------------------------------------------------------------------
847 Fold
848 --------------------------------------------------------------------}
849 -- | /O(n)/. Fold the elements in the set using the given right-associative
850 -- binary operator. This function is an equivalent of 'foldr' and is present
851 -- for compatibility only.
852 --
853 -- /Please note that fold will be deprecated in the future and removed./
854 fold :: (Key -> b -> b) -> b -> IntSet -> b
855 fold = foldr
856 {-# INLINE fold #-}
857
858 -- | /O(n)/. Fold the elements in the set using the given right-associative
859 -- binary operator, such that @'foldr' f z == 'Prelude.foldr' f z . 'toAscList'@.
860 --
861 -- For example,
862 --
863 -- > toAscList set = foldr (:) [] set
864 foldr :: (Key -> b -> b) -> b -> IntSet -> b
865 foldr f z = \t -> -- Use lambda t to be inlinable with two arguments only.
866 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before
867 | otherwise -> go (go z r) l
868 _ -> go z t
869 where
870 go z' Nil = z'
871 go z' (Tip kx bm) = foldrBits kx f z' bm
872 go z' (Bin _ _ l r) = go (go z' r) l
873 {-# INLINE foldr #-}
874
875 -- | /O(n)/. A strict version of 'foldr'. Each application of the operator is
876 -- evaluated before using the result in the next application. This
877 -- function is strict in the starting value.
878 foldr' :: (Key -> b -> b) -> b -> IntSet -> b
879 foldr' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
880 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before
881 | otherwise -> go (go z r) l
882 _ -> go z t
883 where
884 STRICT_1_OF_2(go)
885 go z' Nil = z'
886 go z' (Tip kx bm) = foldr'Bits kx f z' bm
887 go z' (Bin _ _ l r) = go (go z' r) l
888 {-# INLINE foldr' #-}
889
890 -- | /O(n)/. Fold the elements in the set using the given left-associative
891 -- binary operator, such that @'foldl' f z == 'Prelude.foldl' f z . 'toAscList'@.
892 --
893 -- For example,
894 --
895 -- > toDescList set = foldl (flip (:)) [] set
896 foldl :: (a -> Key -> a) -> a -> IntSet -> a
897 foldl f z = \t -> -- Use lambda t to be inlinable with two arguments only.
898 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before
899 | otherwise -> go (go z l) r
900 _ -> go z t
901 where
902 STRICT_1_OF_2(go)
903 go z' Nil = z'
904 go z' (Tip kx bm) = foldlBits kx f z' bm
905 go z' (Bin _ _ l r) = go (go z' l) r
906 {-# INLINE foldl #-}
907
908 -- | /O(n)/. A strict version of 'foldl'. Each application of the operator is
909 -- evaluated before using the result in the next application. This
910 -- function is strict in the starting value.
911 foldl' :: (a -> Key -> a) -> a -> IntSet -> a
912 foldl' f z = \t -> -- Use lambda t to be inlinable with two arguments only.
913 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before
914 | otherwise -> go (go z l) r
915 _ -> go z t
916 where
917 STRICT_1_OF_2(go)
918 go z' Nil = z'
919 go z' (Tip kx bm) = foldl'Bits kx f z' bm
920 go z' (Bin _ _ l r) = go (go z' l) r
921 {-# INLINE foldl' #-}
922
923 {--------------------------------------------------------------------
924 List variations
925 --------------------------------------------------------------------}
926 -- | /O(n)/. An alias of 'toAscList'. The elements of a set in ascending order.
927 -- Subject to list fusion.
928 elems :: IntSet -> [Key]
929 elems
930 = toAscList
931
932 {--------------------------------------------------------------------
933 Lists
934 --------------------------------------------------------------------}
935 -- | /O(n)/. Convert the set to a list of elements. Subject to list fusion.
936 toList :: IntSet -> [Key]
937 toList
938 = toAscList
939
940 -- | /O(n)/. Convert the set to an ascending list of elements. Subject to list
941 -- fusion.
942 toAscList :: IntSet -> [Key]
943 toAscList = foldr (:) []
944
945 -- | /O(n)/. Convert the set to a descending list of elements. Subject to list
946 -- fusion.
947 toDescList :: IntSet -> [Key]
948 toDescList = foldl (flip (:)) []
949
950 -- List fusion for the list generating functions.
951 #if __GLASGOW_HASKELL__
952 -- The foldrFB and foldlFB are foldr and foldl equivalents, used for list fusion.
953 -- They are important to convert unfused to{Asc,Desc}List back, see mapFB in prelude.
954 foldrFB :: (Key -> b -> b) -> b -> IntSet -> b
955 foldrFB = foldr
956 {-# INLINE[0] foldrFB #-}
957 foldlFB :: (a -> Key -> a) -> a -> IntSet -> a
958 foldlFB = foldl
959 {-# INLINE[0] foldlFB #-}
960
961 -- Inline elems and toList, so that we need to fuse only toAscList.
962 {-# INLINE elems #-}
963 {-# INLINE toList #-}
964
965 -- The fusion is enabled up to phase 2 included. If it does not succeed,
966 -- convert in phase 1 the expanded to{Asc,Desc}List calls back to
967 -- to{Asc,Desc}List. In phase 0, we inline fold{lr}FB (which were used in
968 -- a list fusion, otherwise it would go away in phase 1), and let compiler do
969 -- whatever it wants with to{Asc,Desc}List -- it was forbidden to inline it
970 -- before phase 0, otherwise the fusion rules would not fire at all.
971 {-# NOINLINE[0] toAscList #-}
972 {-# NOINLINE[0] toDescList #-}
973 {-# RULES "IntSet.toAscList" [~1] forall s . toAscList s = build (\c n -> foldrFB c n s) #-}
974 {-# RULES "IntSet.toAscListBack" [1] foldrFB (:) [] = toAscList #-}
975 {-# RULES "IntSet.toDescList" [~1] forall s . toDescList s = build (\c n -> foldlFB (\xs x -> c x xs) n s) #-}
976 {-# RULES "IntSet.toDescListBack" [1] foldlFB (\xs x -> x : xs) [] = toDescList #-}
977 #endif
978
979
980 -- | /O(n*min(n,W))/. Create a set from a list of integers.
981 fromList :: [Key] -> IntSet
982 fromList xs
983 = foldlStrict ins empty xs
984 where
985 ins t x = insert x t
986
987 -- | /O(n)/. Build a set from an ascending list of elements.
988 -- /The precondition (input list is ascending) is not checked./
989 fromAscList :: [Key] -> IntSet
990 fromAscList [] = Nil
991 fromAscList (x0 : xs0) = fromDistinctAscList (combineEq x0 xs0)
992 where
993 combineEq x' [] = [x']
994 combineEq x' (x:xs)
995 | x==x' = combineEq x' xs
996 | otherwise = x' : combineEq x xs
997
998 -- | /O(n)/. Build a set from an ascending list of distinct elements.
999 -- /The precondition (input list is strictly ascending) is not checked./
1000 fromDistinctAscList :: [Key] -> IntSet
1001 fromDistinctAscList [] = Nil
1002 fromDistinctAscList (z0 : zs0) = work (prefixOf z0) (bitmapOf z0) zs0 Nada
1003 where
1004 -- 'work' accumulates all values that go into one tip, before passing this Tip
1005 -- to 'reduce'
1006 work kx bm [] stk = finish kx (Tip kx bm) stk
1007 work kx bm (z:zs) stk | kx == prefixOf z = work kx (bm .|. bitmapOf z) zs stk
1008 work kx bm (z:zs) stk = reduce z zs (branchMask z kx) kx (Tip kx bm) stk
1009
1010 reduce z zs _ px tx Nada = work (prefixOf z) (bitmapOf z) zs (Push px tx Nada)
1011 reduce z zs m px tx stk@(Push py ty stk') =
1012 let mxy = branchMask px py
1013 pxy = mask px mxy
1014 in if shorter m mxy
1015 then reduce z zs m pxy (Bin pxy mxy ty tx) stk'
1016 else work (prefixOf z) (bitmapOf z) zs (Push px tx stk)
1017
1018 finish _ t Nada = t
1019 finish px tx (Push py ty stk) = finish p (join py ty px tx) stk
1020 where m = branchMask px py
1021 p = mask px m
1022
1023 data Stack = Push {-# UNPACK #-} !Prefix !IntSet !Stack | Nada
1024
1025
1026 {--------------------------------------------------------------------
1027 Eq
1028 --------------------------------------------------------------------}
1029 instance Eq IntSet where
1030 t1 == t2 = equal t1 t2
1031 t1 /= t2 = nequal t1 t2
1032
1033 equal :: IntSet -> IntSet -> Bool
1034 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
1035 = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
1036 equal (Tip kx1 bm1) (Tip kx2 bm2)
1037 = kx1 == kx2 && bm1 == bm2
1038 equal Nil Nil = True
1039 equal _ _ = False
1040
1041 nequal :: IntSet -> IntSet -> Bool
1042 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
1043 = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
1044 nequal (Tip kx1 bm1) (Tip kx2 bm2)
1045 = kx1 /= kx2 || bm1 /= bm2
1046 nequal Nil Nil = False
1047 nequal _ _ = True
1048
1049 {--------------------------------------------------------------------
1050 Ord
1051 --------------------------------------------------------------------}
1052
1053 instance Ord IntSet where
1054 compare s1 s2 = compare (toAscList s1) (toAscList s2)
1055 -- tentative implementation. See if more efficient exists.
1056
1057 {--------------------------------------------------------------------
1058 Show
1059 --------------------------------------------------------------------}
1060 instance Show IntSet where
1061 showsPrec p xs = showParen (p > 10) $
1062 showString "fromList " . shows (toList xs)
1063
1064 {--------------------------------------------------------------------
1065 Read
1066 --------------------------------------------------------------------}
1067 instance Read IntSet where
1068 #ifdef __GLASGOW_HASKELL__
1069 readPrec = parens $ prec 10 $ do
1070 Ident "fromList" <- lexP
1071 xs <- readPrec
1072 return (fromList xs)
1073
1074 readListPrec = readListPrecDefault
1075 #else
1076 readsPrec p = readParen (p > 10) $ \ r -> do
1077 ("fromList",s) <- lex r
1078 (xs,t) <- reads s
1079 return (fromList xs,t)
1080 #endif
1081
1082 {--------------------------------------------------------------------
1083 Typeable
1084 --------------------------------------------------------------------}
1085
1086 #include "Typeable.h"
1087 INSTANCE_TYPEABLE0(IntSet,intSetTc,"IntSet")
1088
1089 {--------------------------------------------------------------------
1090 NFData
1091 --------------------------------------------------------------------}
1092
1093 -- The IntSet constructors consist only of strict fields of Ints and
1094 -- IntSets, thus the default NFData instance which evaluates to whnf
1095 -- should suffice
1096 instance NFData IntSet
1097
1098 {--------------------------------------------------------------------
1099 Debugging
1100 --------------------------------------------------------------------}
1101 -- | /O(n)/. Show the tree that implements the set. The tree is shown
1102 -- in a compressed, hanging format.
1103 showTree :: IntSet -> String
1104 showTree s
1105 = showTreeWith True False s
1106
1107
1108 {- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows
1109 the tree that implements the set. If @hang@ is
1110 'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If
1111 @wide@ is 'True', an extra wide version is shown.
1112 -}
1113 showTreeWith :: Bool -> Bool -> IntSet -> String
1114 showTreeWith hang wide t
1115 | hang = (showsTreeHang wide [] t) ""
1116 | otherwise = (showsTree wide [] [] t) ""
1117
1118 showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
1119 showsTree wide lbars rbars t
1120 = case t of
1121 Bin p m l r
1122 -> showsTree wide (withBar rbars) (withEmpty rbars) r .
1123 showWide wide rbars .
1124 showsBars lbars . showString (showBin p m) . showString "\n" .
1125 showWide wide lbars .
1126 showsTree wide (withEmpty lbars) (withBar lbars) l
1127 Tip kx bm
1128 -> showsBars lbars . showString " " . shows kx . showString " + " .
1129 showsBitMap bm . showString "\n"
1130 Nil -> showsBars lbars . showString "|\n"
1131
1132 showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
1133 showsTreeHang wide bars t
1134 = case t of
1135 Bin p m l r
1136 -> showsBars bars . showString (showBin p m) . showString "\n" .
1137 showWide wide bars .
1138 showsTreeHang wide (withBar bars) l .
1139 showWide wide bars .
1140 showsTreeHang wide (withEmpty bars) r
1141 Tip kx bm
1142 -> showsBars bars . showString " " . shows kx . showString " + " .
1143 showsBitMap bm . showString "\n"
1144 Nil -> showsBars bars . showString "|\n"
1145
1146 showBin :: Prefix -> Mask -> String
1147 showBin _ _
1148 = "*" -- ++ show (p,m)
1149
1150 showWide :: Bool -> [String] -> String -> String
1151 showWide wide bars
1152 | wide = showString (concat (reverse bars)) . showString "|\n"
1153 | otherwise = id
1154
1155 showsBars :: [String] -> ShowS
1156 showsBars bars
1157 = case bars of
1158 [] -> id
1159 _ -> showString (concat (reverse (tail bars))) . showString node
1160
1161 showsBitMap :: Word -> ShowS
1162 showsBitMap = showString . showBitMap
1163
1164 showBitMap :: Word -> String
1165 showBitMap w = show $ foldrBits 0 (:) [] w
1166
1167 node :: String
1168 node = "+--"
1169
1170 withBar, withEmpty :: [String] -> [String]
1171 withBar bars = "| ":bars
1172 withEmpty bars = " ":bars
1173
1174
1175 {--------------------------------------------------------------------
1176 Helpers
1177 --------------------------------------------------------------------}
1178 {--------------------------------------------------------------------
1179 Join
1180 --------------------------------------------------------------------}
1181 join :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
1182 join p1 t1 p2 t2
1183 | zero p1 m = Bin p m t1 t2
1184 | otherwise = Bin p m t2 t1
1185 where
1186 m = branchMask p1 p2
1187 p = mask p1 m
1188 {-# INLINE join #-}
1189
1190 {--------------------------------------------------------------------
1191 @bin@ assures that we never have empty trees within a tree.
1192 --------------------------------------------------------------------}
1193 bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
1194 bin _ _ l Nil = l
1195 bin _ _ Nil r = r
1196 bin p m l r = Bin p m l r
1197 {-# INLINE bin #-}
1198
1199 {--------------------------------------------------------------------
1200 @tip@ assures that we never have empty bitmaps within a tree.
1201 --------------------------------------------------------------------}
1202 tip :: Prefix -> BitMap -> IntSet
1203 tip _ 0 = Nil
1204 tip kx bm = Tip kx bm
1205 {-# INLINE tip #-}
1206
1207
1208 {----------------------------------------------------------------------
1209 Functions that generate Prefix and BitMap of a Key or a Suffix.
1210 ----------------------------------------------------------------------}
1211
1212 suffixBitMask :: Int
1213 suffixBitMask = bitSize (undefined::Word) - 1
1214 {-# INLINE suffixBitMask #-}
1215
1216 prefixBitMask :: Int
1217 prefixBitMask = complement suffixBitMask
1218 {-# INLINE prefixBitMask #-}
1219
1220 prefixOf :: Int -> Prefix
1221 prefixOf x = x .&. prefixBitMask
1222 {-# INLINE prefixOf #-}
1223
1224 suffixOf :: Int -> Int
1225 suffixOf x = x .&. suffixBitMask
1226 {-# INLINE suffixOf #-}
1227
1228 bitmapOfSuffix :: Int -> BitMap
1229 bitmapOfSuffix s = 1 `shiftLL` s
1230 {-# INLINE bitmapOfSuffix #-}
1231
1232 bitmapOf :: Int -> BitMap
1233 bitmapOf x = bitmapOfSuffix (suffixOf x)
1234 {-# INLINE bitmapOf #-}
1235
1236
1237 {--------------------------------------------------------------------
1238 Endian independent bit twiddling
1239 --------------------------------------------------------------------}
1240 zero :: Int -> Mask -> Bool
1241 zero i m
1242 = (natFromInt i) .&. (natFromInt m) == 0
1243 {-# INLINE zero #-}
1244
1245 nomatch,match :: Int -> Prefix -> Mask -> Bool
1246 nomatch i p m
1247 = (mask i m) /= p
1248 {-# INLINE nomatch #-}
1249
1250 match i p m
1251 = (mask i m) == p
1252 {-# INLINE match #-}
1253
1254 -- Suppose a is largest such that 2^a divides 2*m.
1255 -- Then mask i m is i with the low a bits zeroed out.
1256 mask :: Int -> Mask -> Prefix
1257 mask i m
1258 = maskW (natFromInt i) (natFromInt m)
1259 {-# INLINE mask #-}
1260
1261 {--------------------------------------------------------------------
1262 Big endian operations
1263 --------------------------------------------------------------------}
1264 maskW :: Nat -> Nat -> Prefix
1265 maskW i m
1266 = intFromNat (i .&. (complement (m-1) `xor` m))
1267 {-# INLINE maskW #-}
1268
1269 shorter :: Mask -> Mask -> Bool
1270 shorter m1 m2
1271 = (natFromInt m1) > (natFromInt m2)
1272 {-# INLINE shorter #-}
1273
1274 branchMask :: Prefix -> Prefix -> Mask
1275 branchMask p1 p2
1276 = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
1277 {-# INLINE branchMask #-}
1278
1279 {----------------------------------------------------------------------
1280 Finding the highest bit (mask) in a word [x] can be done efficiently in
1281 three ways:
1282 * convert to a floating point value and the mantissa tells us the
1283 [log2(x)] that corresponds with the highest bit position. The mantissa
1284 is retrieved either via the standard C function [frexp] or by some bit
1285 twiddling on IEEE compatible numbers (float). Note that one needs to
1286 use at least [double] precision for an accurate mantissa of 32 bit
1287 numbers.
1288 * use bit twiddling, a logarithmic sequence of bitwise or's and shifts (bit).
1289 * use processor specific assembler instruction (asm).
1290
1291 The most portable way would be [bit], but is it efficient enough?
1292 I have measured the cycle counts of the different methods on an AMD
1293 Athlon-XP 1800 (~ Pentium III 1.8Ghz) using the RDTSC instruction:
1294
1295 highestBitMask: method cycles
1296 --------------
1297 frexp 200
1298 float 33
1299 bit 11
1300 asm 12
1301
1302 highestBit: method cycles
1303 --------------
1304 frexp 195
1305 float 33
1306 bit 11
1307 asm 11
1308
1309 Wow, the bit twiddling is on today's RISC like machines even faster
1310 than a single CISC instruction (BSR)!
1311 ----------------------------------------------------------------------}
1312
1313 {----------------------------------------------------------------------
1314 [highestBitMask] returns a word where only the highest bit is set.
1315 It is found by first setting all bits in lower positions than the
1316 highest bit and than taking an exclusive or with the original value.
1317 Allthough the function may look expensive, GHC compiles this into
1318 excellent C code that subsequently compiled into highly efficient
1319 machine code. The algorithm is derived from Jorg Arndt's FXT library.
1320 ----------------------------------------------------------------------}
1321 highestBitMask :: Nat -> Nat
1322 highestBitMask x0
1323 = case (x0 .|. shiftRL x0 1) of
1324 x1 -> case (x1 .|. shiftRL x1 2) of
1325 x2 -> case (x2 .|. shiftRL x2 4) of
1326 x3 -> case (x3 .|. shiftRL x3 8) of
1327 x4 -> case (x4 .|. shiftRL x4 16) of
1328 #if !(defined(__GLASGOW_HASKELL__) && WORD_SIZE_IN_BITS==32)
1329 x5 -> case (x5 .|. shiftRL x5 32) of -- for 64 bit platforms
1330 #endif
1331 x6 -> (x6 `xor` (shiftRL x6 1))
1332 {-# INLINE highestBitMask #-}
1333
1334 {----------------------------------------------------------------------
1335 To get best performance, we provide fast implementations of
1336 lowestBitSet, highestBitSet and fold[lr][l]Bits for GHC.
1337 If the intel bsf and bsr instructions ever become GHC primops,
1338 this code should be reimplemented using these.
1339
1340 Performance of this code is crucial for folds, toList, filter, partition.
1341
1342 The signatures of methods in question are placed after this comment.
1343 ----------------------------------------------------------------------}
1344
1345 lowestBitSet :: Nat -> Int
1346 highestBitSet :: Nat -> Int
1347 foldlBits :: Int -> (a -> Int -> a) -> a -> Nat -> a
1348 foldl'Bits :: Int -> (a -> Int -> a) -> a -> Nat -> a
1349 foldrBits :: Int -> (Int -> a -> a) -> a -> Nat -> a
1350 foldr'Bits :: Int -> (Int -> a -> a) -> a -> Nat -> a
1351
1352 {-# INLINE lowestBitSet #-}
1353 {-# INLINE highestBitSet #-}
1354 {-# INLINE foldlBits #-}
1355 {-# INLINE foldl'Bits #-}
1356 {-# INLINE foldrBits #-}
1357 {-# INLINE foldr'Bits #-}
1358
1359 #if defined(__GLASGOW_HASKELL__) && (WORD_SIZE_IN_BITS==32 || WORD_SIZE_IN_BITS==64)
1360 {----------------------------------------------------------------------
1361 For lowestBitSet we use wordsize-dependant implementation based on
1362 multiplication and DeBrujn indeces, which was proposed by Edward Kmett
1363 <http://haskell.org/pipermail/libraries/2011-September/016749.html>
1364
1365 The core of this implementation is fast indexOfTheOnlyBit,
1366 which is given a Nat with exactly one bit set, and returns
1367 its index.
1368
1369 Lot of effort was put in these implementations, please benchmark carefully
1370 before changing this code.
1371 ----------------------------------------------------------------------}
1372
1373 indexOfTheOnlyBit :: Nat -> Int
1374 {-# INLINE indexOfTheOnlyBit #-}
1375 indexOfTheOnlyBit bitmask =
1376 I# (lsbArray `indexInt8OffAddr#` unboxInt (intFromNat ((bitmask * magic) `shiftRL` offset)))
1377 where unboxInt (I# i) = i
1378 #if WORD_SIZE_IN_BITS==32
1379 magic = 0x077CB531
1380 offset = 27
1381 !lsbArray = "\0\1\28\2\29\14\24\3\30\22\20\15\25\17\4\8\31\27\13\23\21\19\16\7\26\12\18\6\11\5\10\9"#
1382 #else
1383 magic = 0x07EDD5E59A4E28C2
1384 offset = 58
1385 !lsbArray = "\63\0\58\1\59\47\53\2\60\39\48\27\54\33\42\3\61\51\37\40\49\18\28\20\55\30\34\11\43\14\22\4\62\57\46\52\38\26\32\41\50\36\17\19\29\10\13\21\56\45\25\31\35\16\9\12\44\24\15\8\23\7\6\5"#
1386 #endif
1387 -- The lsbArray gets inlined to every call site of indexOfTheOnlyBit.
1388 -- That cannot be easily avoided, as GHC forbids top-level Addr# literal.
1389 -- One could go around that by supplying getLsbArray :: () -> Addr# marked
1390 -- as NOINLINE. But the code size of calling it and processing the result
1391 -- is 48B on 32-bit and 56B on 64-bit architectures -- so the 32B and 64B array
1392 -- is actually improvement on 32-bit and only a 8B size increase on 64-bit.
1393
1394 lowestBitMask :: Nat -> Nat
1395 lowestBitMask x = x .&. negate x
1396 {-# INLINE lowestBitMask #-}
1397
1398 -- Reverse the order of bits in the Nat.
1399 revNat :: Nat -> Nat
1400 #if WORD_SIZE_IN_BITS==32
1401 revNat x1 = case ((x1 `shiftRL` 1) .&. 0x55555555) .|. ((x1 .&. 0x55555555) `shiftLL` 1) of
1402 x2 -> case ((x2 `shiftRL` 2) .&. 0x33333333) .|. ((x2 .&. 0x33333333) `shiftLL` 2) of
1403 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F) `shiftLL` 4) of
1404 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF) .|. ((x4 .&. 0x00FF00FF) `shiftLL` 8) of
1405 x5 -> ( x5 `shiftRL` 16 ) .|. ( x5 `shiftLL` 16);
1406 #else
1407 revNat x1 = case ((x1 `shiftRL` 1) .&. 0x5555555555555555) .|. ((x1 .&. 0x5555555555555555) `shiftLL` 1) of
1408 x2 -> case ((x2 `shiftRL` 2) .&. 0x3333333333333333) .|. ((x2 .&. 0x3333333333333333) `shiftLL` 2) of
1409 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F0F0F0F0F) `shiftLL` 4) of
1410 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF00FF00FF) .|. ((x4 .&. 0x00FF00FF00FF00FF) `shiftLL` 8) of
1411 x5 -> case ((x5 `shiftRL` 16) .&. 0x0000FFFF0000FFFF) .|. ((x5 .&. 0x0000FFFF0000FFFF) `shiftLL` 16) of
1412 x6 -> ( x6 `shiftRL` 32 ) .|. ( x6 `shiftLL` 32);
1413 #endif
1414
1415 lowestBitSet x = indexOfTheOnlyBit (lowestBitMask x)
1416
1417 highestBitSet x = indexOfTheOnlyBit (highestBitMask x)
1418
1419 foldlBits prefix f z bitmap = go bitmap z
1420 where go bm acc | bm == 0 = acc
1421 | otherwise = case lowestBitMask bm of
1422 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1423 bi -> bi `seq` go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1424
1425 foldl'Bits prefix f z bitmap = go bitmap z
1426 where STRICT_2_OF_2(go)
1427 go bm acc | bm == 0 = acc
1428 | otherwise = case lowestBitMask bm of
1429 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1430 bi -> bi `seq` go (bm `xor` bitmask) ((f acc) $! (prefix+bi))
1431
1432 foldrBits prefix f z bitmap = go (revNat bitmap) z
1433 where go bm acc | bm == 0 = acc
1434 | otherwise = case lowestBitMask bm of
1435 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1436 bi -> bi `seq` go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1437
1438 foldr'Bits prefix f z bitmap = go (revNat bitmap) z
1439 where STRICT_2_OF_2(go)
1440 go bm acc | bm == 0 = acc
1441 | otherwise = case lowestBitMask bm of
1442 bitmask -> bitmask `seq` case indexOfTheOnlyBit bitmask of
1443 bi -> bi `seq` go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc)
1444
1445 #else
1446 {----------------------------------------------------------------------
1447 In general case we use logarithmic implementation of
1448 lowestBitSet and highestBitSet, which works up to bit sizes of 64.
1449
1450 Folds are linear scans.
1451 ----------------------------------------------------------------------}
1452
1453 lowestBitSet n0 =
1454 let (n1,b1) = if n0 .&. 0xFFFFFFFF /= 0 then (n0,0) else (n0 `shiftRL` 32, 32)
1455 (n2,b2) = if n1 .&. 0xFFFF /= 0 then (n1,b1) else (n1 `shiftRL` 16, 16+b1)
1456 (n3,b3) = if n2 .&. 0xFF /= 0 then (n2,b2) else (n2 `shiftRL` 8, 8+b2)
1457 (n4,b4) = if n3 .&. 0xF /= 0 then (n3,b3) else (n3 `shiftRL` 4, 4+b3)
1458 (n5,b5) = if n4 .&. 0x3 /= 0 then (n4,b4) else (n4 `shiftRL` 2, 2+b4)
1459 b6 = if n5 .&. 0x1 /= 0 then b5 else 1+b5
1460 in b6
1461
1462 highestBitSet n0 =
1463 let (n1,b1) = if n0 .&. 0xFFFFFFFF00000000 /= 0 then (n0 `shiftRL` 32, 32) else (n0,0)
1464 (n2,b2) = if n1 .&. 0xFFFF0000 /= 0 then (n1 `shiftRL` 16, 16+b1) else (n1,b1)
1465 (n3,b3) = if n2 .&. 0xFF00 /= 0 then (n2 `shiftRL` 8, 8+b2) else (n2,b2)
1466 (n4,b4) = if n3 .&. 0xF0 /= 0 then (n3 `shiftRL` 4, 4+b3) else (n3,b3)
1467 (n5,b5) = if n4 .&. 0xC /= 0 then (n4 `shiftRL` 2, 2+b4) else (n4,b4)
1468 b6 = if n5 .&. 0x2 /= 0 then 1+b5 else b5
1469 in b6
1470
1471 foldlBits prefix f z bm = let lb = lowestBitSet bm
1472 in go (prefix+lb) z (bm `shiftRL` lb)
1473 where STRICT_1_OF_3(go)
1474 go _ acc 0 = acc
1475 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1)
1476 | otherwise = go (bi + 1) acc (n `shiftRL` 1)
1477
1478 foldl'Bits prefix f z bm = let lb = lowestBitSet bm
1479 in go (prefix+lb) z (bm `shiftRL` lb)
1480 where STRICT_1_OF_3(go)
1481 STRICT_2_OF_3(go)
1482 go _ acc 0 = acc
1483 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1)
1484 | otherwise = go (bi + 1) acc (n `shiftRL` 1)
1485
1486 foldrBits prefix f z bm = let lb = lowestBitSet bm
1487 in go (prefix+lb) (bm `shiftRL` lb)
1488 where STRICT_1_OF_2(go)
1489 go _ 0 = z
1490 go bi n | n `testBit` 0 = f bi (go (bi + 1) (n `shiftRL` 1))
1491 | otherwise = go (bi + 1) (n `shiftRL` 1)
1492
1493 foldr'Bits prefix f z bm = let lb = lowestBitSet bm
1494 in go (prefix+lb) (bm `shiftRL` lb)
1495 where STRICT_1_OF_2(go)
1496 go _ 0 = z
1497 go bi n | n `testBit` 0 = f bi $! go (bi + 1) (n `shiftRL` 1)
1498 | otherwise = go (bi + 1) (n `shiftRL` 1)
1499
1500 #endif
1501
1502 {----------------------------------------------------------------------
1503 [bitcount] as posted by David F. Place to haskell-cafe on April 11, 2006,
1504 based on the code on
1505 http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetKernighan,
1506 where the following source is given:
1507 Published in 1988, the C Programming Language 2nd Ed. (by Brian W.
1508 Kernighan and Dennis M. Ritchie) mentions this in exercise 2-9. On April
1509 19, 2006 Don Knuth pointed out to me that this method "was first published
1510 by Peter Wegner in CACM 3 (1960), 322. (Also discovered independently by
1511 Derrick Lehmer and published in 1964 in a book edited by Beckenbach.)"
1512 ----------------------------------------------------------------------}
1513
1514 -- We want to be able to compile without cabal. Nevertheless
1515 -- #if defined(MIN_VERSION_base) && MIN_VERSION_base(4,5,0)
1516 -- does not work, because if MIN_VERSION_base is undefined,
1517 -- the last condition is syntactically wrong.
1518 #define MIN_VERSION_base_4_5_0 0
1519 #ifdef MIN_VERSION_base
1520 #if MIN_VERSION_base(4,5,0)
1521 #undef MIN_VERSION_base_4_5_0
1522 #define MIN_VERSION_base_4_5_0 1
1523 #endif
1524 #endif
1525
1526 bitcount :: Int -> Word -> Int
1527 #if MIN_VERSION_base_4_5_0
1528 bitcount a x = a + popCount x
1529 #else
1530 bitcount a0 x0 = go a0 x0
1531 where go a 0 = a
1532 go a x = go (a + 1) (x .&. (x-1))
1533 #endif
1534 {-# INLINE bitcount #-}
1535
1536
1537 {--------------------------------------------------------------------
1538 Utilities
1539 --------------------------------------------------------------------}
1540 foldlStrict :: (a -> b -> a) -> a -> [b] -> a
1541 foldlStrict f = go
1542 where
1543 go z [] = z
1544 go z (x:xs) = let z' = f z x in z' `seq` go z' xs
1545 {-# INLINE foldlStrict #-}