f02628bad49150176f8610faff184fb00ce5ec0f
[packages/base.git] / System / Event / IntMap.hs
1 {-# LANGUAGE CPP, MagicHash, NoImplicitPrelude #-}
2 -----------------------------------------------------------------------------
3 -- |
4 -- Module : System.Event.IntMap
5 -- Copyright : (c) Daan Leijen 2002
6 -- (c) Andriy Palamarchuk 2008
7 -- License : BSD-style
8 -- Maintainer : libraries@haskell.org
9 -- Stability : provisional
10 -- Portability : portable
11 --
12 -- An efficient implementation of maps from integer keys to values.
13 --
14 -- Since many function names (but not the type name) clash with
15 -- "Prelude" names, this module is usually imported @qualified@, e.g.
16 --
17 -- > import Data.IntMap (IntMap)
18 -- > import qualified Data.IntMap as IntMap
19 --
20 -- The implementation is based on /big-endian patricia trees/. This data
21 -- structure performs especially well on binary operations like 'union'
22 -- and 'intersection'. However, my benchmarks show that it is also
23 -- (much) faster on insertions and deletions when compared to a generic
24 -- size-balanced map implementation (see "Data.Map").
25 --
26 -- * Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\",
27 -- Workshop on ML, September 1998, pages 77-86,
28 -- <http://citeseer.ist.psu.edu/okasaki98fast.html>
29 --
30 -- * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
31 -- Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
32 -- October 1968, pages 514-534.
33 --
34 -- Operation comments contain the operation time complexity in
35 -- the Big-O notation <http://en.wikipedia.org/wiki/Big_O_notation>.
36 -- Many operations have a worst-case complexity of /O(min(n,W))/.
37 -- This means that the operation can become linear in the number of
38 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
39 -- (32 or 64).
40 -----------------------------------------------------------------------------
41
42 module System.Event.IntMap
43 (
44 -- * Map type
45 IntMap
46 , Key
47
48 -- * Query
49 , lookup
50 , member
51
52 -- * Construction
53 , empty
54
55 -- * Insertion
56 , insertWith
57
58 -- * Delete\/Update
59 , delete
60 , updateWith
61
62 -- * Traversal
63 -- ** Fold
64 , foldWithKey
65
66 -- * Conversion
67 , keys
68 ) where
69
70 import Data.Bits
71
72 import Data.Maybe (Maybe(..))
73 import GHC.Base hiding (foldr)
74 import GHC.Num (Num(..))
75 import GHC.Real (fromIntegral)
76 import GHC.Show (Show(showsPrec), showParen, shows, showString)
77
78 #if __GLASGOW_HASKELL__
79 import GHC.Word (Word(..))
80 #else
81 import Data.Word
82 #endif
83
84 -- | A @Nat@ is a natural machine word (an unsigned Int)
85 type Nat = Word
86
87 natFromInt :: Key -> Nat
88 natFromInt i = fromIntegral i
89
90 intFromNat :: Nat -> Key
91 intFromNat w = fromIntegral w
92
93 shiftRL :: Nat -> Key -> Nat
94 #if __GLASGOW_HASKELL__
95 -- GHC: use unboxing to get @shiftRL@ inlined.
96 shiftRL (W# x) (I# i) = W# (shiftRL# x i)
97 #else
98 shiftRL x i = shiftR x i
99 #endif
100
101 ------------------------------------------------------------------------
102 -- Types
103
104 -- | A map of integers to values @a@.
105 data IntMap a = Nil
106 | Tip {-# UNPACK #-} !Key !a
107 | Bin {-# UNPACK #-} !Prefix
108 {-# UNPACK #-} !Mask
109 !(IntMap a)
110 !(IntMap a)
111
112 type Prefix = Int
113 type Mask = Int
114 type Key = Int
115
116 ------------------------------------------------------------------------
117 -- Query
118
119 -- | /O(min(n,W))/ Lookup the value at a key in the map. See also
120 -- 'Data.Map.lookup'.
121 lookup :: Key -> IntMap a -> Maybe a
122 lookup k t = let nk = natFromInt k in seq nk (lookupN nk t)
123
124 lookupN :: Nat -> IntMap a -> Maybe a
125 lookupN k t
126 = case t of
127 Bin _ m l r
128 | zeroN k (natFromInt m) -> lookupN k l
129 | otherwise -> lookupN k r
130 Tip kx x
131 | (k == natFromInt kx) -> Just x
132 | otherwise -> Nothing
133 Nil -> Nothing
134
135 -- | /O(min(n,W))/. Is the key a member of the map?
136 --
137 -- > member 5 (fromList [(5,'a'), (3,'b')]) == True
138 -- > member 1 (fromList [(5,'a'), (3,'b')]) == False
139
140 member :: Key -> IntMap a -> Bool
141 member k m
142 = case lookup k m of
143 Nothing -> False
144 Just _ -> True
145
146 ------------------------------------------------------------------------
147 -- Construction
148
149 -- | /O(1)/ The empty map.
150 --
151 -- > empty == fromList []
152 -- > size empty == 0
153 empty :: IntMap a
154 empty = Nil
155
156 ------------------------------------------------------------------------
157 -- Insert
158
159 -- | /O(min(n,W))/ Insert with a function, combining new value and old
160 -- value. @insertWith f key value mp@ will insert the pair (key,
161 -- value) into @mp@ if key does not exist in the map. If the key does
162 -- exist, the function will insert the pair (key, f new_value
163 -- old_value). The result is a pair where the first element is the
164 -- old value, if one was present, and the second is the modified map.
165 insertWith :: (a -> a -> a) -> Key -> a -> IntMap a -> (Maybe a, IntMap a)
166 insertWith f k x t = case t of
167 Bin p m l r
168 | nomatch k p m -> (Nothing, join k (Tip k x) p t)
169 | zero k m -> let (found, l') = insertWith f k x l
170 in (found, Bin p m l' r)
171 | otherwise -> let (found, r') = insertWith f k x r
172 in (found, Bin p m l r')
173 Tip ky y
174 | k == ky -> (Just y, Tip k (f x y))
175 | otherwise -> (Nothing, join k (Tip k x) ky t)
176 Nil -> (Nothing, Tip k x)
177
178
179 ------------------------------------------------------------------------
180 -- Delete/Update
181
182 -- | /O(min(n,W))/. Delete a key and its value from the map. When the
183 -- key is not a member of the map, the original map is returned. The
184 -- result is a pair where the first element is the value associated
185 -- with the deleted key, if one existed, and the second element is the
186 -- modified map.
187 delete :: Key -> IntMap a -> (Maybe a, IntMap a)
188 delete k t = case t of
189 Bin p m l r
190 | nomatch k p m -> (Nothing, t)
191 | zero k m -> let (found, l') = delete k l
192 in (found, bin p m l' r)
193 | otherwise -> let (found, r') = delete k r
194 in (found, bin p m l r')
195 Tip ky y
196 | k == ky -> (Just y, Nil)
197 | otherwise -> (Nothing, t)
198 Nil -> (Nothing, Nil)
199
200 updateWith :: (a -> Maybe a) -> Key -> IntMap a -> (Maybe a, IntMap a)
201 updateWith f k t = case t of
202 Bin p m l r
203 | nomatch k p m -> (Nothing, t)
204 | zero k m -> let (found, l') = updateWith f k l
205 in (found, bin p m l' r)
206 | otherwise -> let (found, r') = updateWith f k r
207 in (found, bin p m l r')
208 Tip ky y
209 | k == ky -> case (f y) of
210 Just y' -> (Just y, Tip ky y')
211 Nothing -> (Just y, Nil)
212 | otherwise -> (Nothing, t)
213 Nil -> (Nothing, Nil)
214 -- | /O(n)/. Fold the keys and values in the map, such that
215 -- @'foldWithKey' f z == 'Prelude.foldr' ('uncurry' f) z . 'toAscList'@.
216 -- For example,
217 --
218 -- > keys map = foldWithKey (\k x ks -> k:ks) [] map
219 --
220 -- > let f k a result = result ++ "(" ++ (show k) ++ ":" ++ a ++ ")"
221 -- > foldWithKey f "Map: " (fromList [(5,"a"), (3,"b")]) == "Map: (5:a)(3:b)"
222
223 foldWithKey :: (Key -> a -> b -> b) -> b -> IntMap a -> b
224 foldWithKey f z t
225 = foldr f z t
226
227 -- | /O(n)/. Convert the map to a list of key\/value pairs.
228 --
229 -- > toList (fromList [(5,"a"), (3,"b")]) == [(3,"b"), (5,"a")]
230 -- > toList empty == []
231
232 toList :: IntMap a -> [(Key,a)]
233 toList t
234 = foldWithKey (\k x xs -> (k,x):xs) [] t
235
236 foldr :: (Key -> a -> b -> b) -> b -> IntMap a -> b
237 foldr f z t
238 = case t of
239 Bin 0 m l r | m < 0 -> foldr' f (foldr' f z l) r -- put negative numbers before.
240 Bin _ _ _ _ -> foldr' f z t
241 Tip k x -> f k x z
242 Nil -> z
243
244 foldr' :: (Key -> a -> b -> b) -> b -> IntMap a -> b
245 foldr' f z t
246 = case t of
247 Bin _ _ l r -> foldr' f (foldr' f z r) l
248 Tip k x -> f k x z
249 Nil -> z
250
251 -- | /O(n)/. Return all keys of the map in ascending order.
252 --
253 -- > keys (fromList [(5,"a"), (3,"b")]) == [3,5]
254 -- > keys empty == []
255
256 keys :: IntMap a -> [Key]
257 keys m
258 = foldWithKey (\k _ ks -> k:ks) [] m
259
260 ------------------------------------------------------------------------
261 -- Eq
262
263 instance Eq a => Eq (IntMap a) where
264 t1 == t2 = equal t1 t2
265 t1 /= t2 = nequal t1 t2
266
267 equal :: Eq a => IntMap a -> IntMap a -> Bool
268 equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
269 = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
270 equal (Tip kx x) (Tip ky y)
271 = (kx == ky) && (x==y)
272 equal Nil Nil = True
273 equal _ _ = False
274
275 nequal :: Eq a => IntMap a -> IntMap a -> Bool
276 nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
277 = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
278 nequal (Tip kx x) (Tip ky y)
279 = (kx /= ky) || (x/=y)
280 nequal Nil Nil = False
281 nequal _ _ = True
282
283 instance Show a => Show (IntMap a) where
284 showsPrec d m = showParen (d > 10) $
285 showString "fromList " . shows (toList m)
286
287 ------------------------------------------------------------------------
288 -- Utility functions
289
290 join :: Prefix -> IntMap a -> Prefix -> IntMap a -> IntMap a
291 join p1 t1 p2 t2
292 | zero p1 m = Bin p m t1 t2
293 | otherwise = Bin p m t2 t1
294 where
295 m = branchMask p1 p2
296 p = mask p1 m
297
298 -- | @bin@ assures that we never have empty trees within a tree.
299 bin :: Prefix -> Mask -> IntMap a -> IntMap a -> IntMap a
300 bin _ _ l Nil = l
301 bin _ _ Nil r = r
302 bin p m l r = Bin p m l r
303
304 ------------------------------------------------------------------------
305 -- Endian independent bit twiddling
306
307 zero :: Key -> Mask -> Bool
308 zero i m = (natFromInt i) .&. (natFromInt m) == 0
309
310 nomatch :: Key -> Prefix -> Mask -> Bool
311 nomatch i p m = (mask i m) /= p
312
313 mask :: Key -> Mask -> Prefix
314 mask i m = maskW (natFromInt i) (natFromInt m)
315
316 zeroN :: Nat -> Nat -> Bool
317 zeroN i m = (i .&. m) == 0
318
319 ------------------------------------------------------------------------
320 -- Big endian operations
321
322 maskW :: Nat -> Nat -> Prefix
323 maskW i m = intFromNat (i .&. (complement (m-1) `xor` m))
324
325 branchMask :: Prefix -> Prefix -> Mask
326 branchMask p1 p2
327 = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
328
329 {-
330 Finding the highest bit mask in a word [x] can be done efficiently in
331 three ways:
332
333 * convert to a floating point value and the mantissa tells us the
334 [log2(x)] that corresponds with the highest bit position. The mantissa
335 is retrieved either via the standard C function [frexp] or by some bit
336 twiddling on IEEE compatible numbers (float). Note that one needs to
337 use at least [double] precision for an accurate mantissa of 32 bit
338 numbers.
339
340 * use bit twiddling, a logarithmic sequence of bitwise or's and shifts (bit).
341
342 * use processor specific assembler instruction (asm).
343
344 The most portable way would be [bit], but is it efficient enough?
345 I have measured the cycle counts of the different methods on an AMD
346 Athlon-XP 1800 (~ Pentium III 1.8Ghz) using the RDTSC instruction:
347
348 highestBitMask: method cycles
349 --------------
350 frexp 200
351 float 33
352 bit 11
353 asm 12
354
355 Wow, the bit twiddling is on today's RISC like machines even faster
356 than a single CISC instruction (BSR)!
357 -}
358
359 -- | @highestBitMask@ returns a word where only the highest bit is
360 -- set. It is found by first setting all bits in lower positions than
361 -- the highest bit and than taking an exclusive or with the original
362 -- value. Allthough the function may look expensive, GHC compiles
363 -- this into excellent C code that subsequently compiled into highly
364 -- efficient machine code. The algorithm is derived from Jorg Arndt's
365 -- FXT library.
366 highestBitMask :: Nat -> Nat
367 highestBitMask x0
368 = case (x0 .|. shiftRL x0 1) of
369 x1 -> case (x1 .|. shiftRL x1 2) of
370 x2 -> case (x2 .|. shiftRL x2 4) of
371 x3 -> case (x3 .|. shiftRL x3 8) of
372 x4 -> case (x4 .|. shiftRL x4 16) of
373 x5 -> case (x5 .|. shiftRL x5 32) of -- for 64 bit platforms
374 x6 -> (x6 `xor` (shiftRL x6 1))