testsuite: Assert that testsuite ways are known
[ghc.git] / compiler / utils / UniqDFM.hs
1 {-
2 (c) Bartosz Nitka, Facebook, 2015
3
4 UniqDFM: Specialised deterministic finite maps, for things with @Uniques@.
5
6 Basically, the things need to be in class @Uniquable@, and we use the
7 @getUnique@ method to grab their @Uniques@.
8
9 This is very similar to @UniqFM@, the major difference being that the order of
10 folding is not dependent on @Unique@ ordering, giving determinism.
11 Currently the ordering is determined by insertion order.
12
13 See Note [Unique Determinism] in Unique for explanation why @Unique@ ordering
14 is not deterministic.
15 -}
16
17 {-# LANGUAGE DeriveDataTypeable #-}
18 {-# LANGUAGE DeriveFunctor #-}
19 {-# LANGUAGE FlexibleContexts #-}
20 {-# LANGUAGE TupleSections #-}
21 {-# OPTIONS_GHC -Wall #-}
22
23 module UniqDFM (
24 -- * Unique-keyed deterministic mappings
25 UniqDFM, -- abstract type
26
27 -- ** Manipulating those mappings
28 emptyUDFM,
29 unitUDFM,
30 addToUDFM,
31 addToUDFM_C,
32 addListToUDFM,
33 delFromUDFM,
34 delListFromUDFM,
35 adjustUDFM,
36 alterUDFM,
37 mapUDFM,
38 plusUDFM,
39 plusUDFM_C,
40 lookupUDFM, lookupUDFM_Directly,
41 elemUDFM,
42 foldUDFM,
43 eltsUDFM,
44 filterUDFM, filterUDFM_Directly,
45 isNullUDFM,
46 sizeUDFM,
47 intersectUDFM, udfmIntersectUFM,
48 intersectsUDFM,
49 disjointUDFM, disjointUdfmUfm,
50 equalKeysUDFM,
51 minusUDFM,
52 listToUDFM,
53 udfmMinusUFM,
54 partitionUDFM,
55 anyUDFM, allUDFM,
56 pprUniqDFM, pprUDFM,
57
58 udfmToList,
59 udfmToUfm,
60 nonDetFoldUDFM,
61 alwaysUnsafeUfmToUdfm,
62 ) where
63
64 import GhcPrelude
65
66 import Unique ( Uniquable(..), Unique, getKey )
67 import Outputable
68
69 import qualified Data.IntMap as M
70 import Data.Data
71 import Data.Functor.Classes (Eq1 (..))
72 import Data.List (sortBy)
73 import Data.Function (on)
74 import qualified Data.Semigroup as Semi
75 import UniqFM (UniqFM, listToUFM_Directly, nonDetUFMToList, ufmToIntMap)
76
77 -- Note [Deterministic UniqFM]
78 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
79 -- A @UniqDFM@ is just like @UniqFM@ with the following additional
80 -- property: the function `udfmToList` returns the elements in some
81 -- deterministic order not depending on the Unique key for those elements.
82 --
83 -- If the client of the map performs operations on the map in deterministic
84 -- order then `udfmToList` returns them in deterministic order.
85 --
86 -- There is an implementation cost: each element is given a serial number
87 -- as it is added, and `udfmToList` sorts it's result by this serial
88 -- number. So you should only use `UniqDFM` if you need the deterministic
89 -- property.
90 --
91 -- `foldUDFM` also preserves determinism.
92 --
93 -- Normal @UniqFM@ when you turn it into a list will use
94 -- Data.IntMap.toList function that returns the elements in the order of
95 -- the keys. The keys in @UniqFM@ are always @Uniques@, so you end up with
96 -- with a list ordered by @Uniques@.
97 -- The order of @Uniques@ is known to be not stable across rebuilds.
98 -- See Note [Unique Determinism] in Unique.
99 --
100 --
101 -- There's more than one way to implement this. The implementation here tags
102 -- every value with the insertion time that can later be used to sort the
103 -- values when asked to convert to a list.
104 --
105 -- An alternative would be to have
106 --
107 -- data UniqDFM ele = UDFM (M.IntMap ele) [ele]
108 --
109 -- where the list determines the order. This makes deletion tricky as we'd
110 -- only accumulate elements in that list, but makes merging easier as you
111 -- can just merge both structures independently.
112 -- Deletion can probably be done in amortized fashion when the size of the
113 -- list is twice the size of the set.
114
115 -- | A type of values tagged with insertion time
116 data TaggedVal val =
117 TaggedVal
118 val
119 {-# UNPACK #-} !Int -- ^ insertion time
120 deriving (Data, Functor)
121
122 taggedFst :: TaggedVal val -> val
123 taggedFst (TaggedVal v _) = v
124
125 taggedSnd :: TaggedVal val -> Int
126 taggedSnd (TaggedVal _ i) = i
127
128 instance Eq val => Eq (TaggedVal val) where
129 (TaggedVal v1 _) == (TaggedVal v2 _) = v1 == v2
130
131 -- | Type of unique deterministic finite maps
132 data UniqDFM ele =
133 UDFM
134 !(M.IntMap (TaggedVal ele)) -- A map where keys are Unique's values and
135 -- values are tagged with insertion time.
136 -- The invariant is that all the tags will
137 -- be distinct within a single map
138 {-# UNPACK #-} !Int -- Upper bound on the values' insertion
139 -- time. See Note [Overflow on plusUDFM]
140 deriving (Data, Functor)
141
142 -- | Deterministic, in O(n log n).
143 instance Foldable UniqDFM where
144 foldr = foldUDFM
145
146 -- | Deterministic, in O(n log n).
147 instance Traversable UniqDFM where
148 traverse f = fmap listToUDFM_Directly
149 . traverse (\(u,a) -> (u,) <$> f a)
150 . udfmToList
151
152 emptyUDFM :: UniqDFM elt
153 emptyUDFM = UDFM M.empty 0
154
155 unitUDFM :: Uniquable key => key -> elt -> UniqDFM elt
156 unitUDFM k v = UDFM (M.singleton (getKey $ getUnique k) (TaggedVal v 0)) 1
157
158 -- The new binding always goes to the right of existing ones
159 addToUDFM :: Uniquable key => UniqDFM elt -> key -> elt -> UniqDFM elt
160 addToUDFM m k v = addToUDFM_Directly m (getUnique k) v
161
162 -- The new binding always goes to the right of existing ones
163 addToUDFM_Directly :: UniqDFM elt -> Unique -> elt -> UniqDFM elt
164 addToUDFM_Directly (UDFM m i) u v
165 = UDFM (M.insertWith tf (getKey u) (TaggedVal v i) m) (i + 1)
166 where
167 tf (TaggedVal new_v _) (TaggedVal _ old_i) = TaggedVal new_v old_i
168 -- Keep the old tag, but insert the new value
169 -- This means that udfmToList typically returns elements
170 -- in the order of insertion, rather than the reverse
171
172 addToUDFM_Directly_C
173 :: (elt -> elt -> elt) -- old -> new -> result
174 -> UniqDFM elt
175 -> Unique -> elt
176 -> UniqDFM elt
177 addToUDFM_Directly_C f (UDFM m i) u v
178 = UDFM (M.insertWith tf (getKey u) (TaggedVal v i) m) (i + 1)
179 where
180 tf (TaggedVal new_v _) (TaggedVal old_v old_i)
181 = TaggedVal (f old_v new_v) old_i
182 -- Flip the arguments, because M.insertWith uses (new->old->result)
183 -- but f needs (old->new->result)
184 -- Like addToUDFM_Directly, keep the old tag
185
186 addToUDFM_C
187 :: Uniquable key => (elt -> elt -> elt) -- old -> new -> result
188 -> UniqDFM elt -- old
189 -> key -> elt -- new
190 -> UniqDFM elt -- result
191 addToUDFM_C f m k v = addToUDFM_Directly_C f m (getUnique k) v
192
193 addListToUDFM :: Uniquable key => UniqDFM elt -> [(key,elt)] -> UniqDFM elt
194 addListToUDFM = foldl' (\m (k, v) -> addToUDFM m k v)
195
196 addListToUDFM_Directly :: UniqDFM elt -> [(Unique,elt)] -> UniqDFM elt
197 addListToUDFM_Directly = foldl' (\m (k, v) -> addToUDFM_Directly m k v)
198
199 addListToUDFM_Directly_C
200 :: (elt -> elt -> elt) -> UniqDFM elt -> [(Unique,elt)] -> UniqDFM elt
201 addListToUDFM_Directly_C f = foldl' (\m (k, v) -> addToUDFM_Directly_C f m k v)
202
203 delFromUDFM :: Uniquable key => UniqDFM elt -> key -> UniqDFM elt
204 delFromUDFM (UDFM m i) k = UDFM (M.delete (getKey $ getUnique k) m) i
205
206 plusUDFM_C :: (elt -> elt -> elt) -> UniqDFM elt -> UniqDFM elt -> UniqDFM elt
207 plusUDFM_C f udfml@(UDFM _ i) udfmr@(UDFM _ j)
208 -- we will use the upper bound on the tag as a proxy for the set size,
209 -- to insert the smaller one into the bigger one
210 | i > j = insertUDFMIntoLeft_C f udfml udfmr
211 | otherwise = insertUDFMIntoLeft_C f udfmr udfml
212
213 -- Note [Overflow on plusUDFM]
214 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
215 -- There are multiple ways of implementing plusUDFM.
216 -- The main problem that needs to be solved is overlap on times of
217 -- insertion between different keys in two maps.
218 -- Consider:
219 --
220 -- A = fromList [(a, (x, 1))]
221 -- B = fromList [(b, (y, 1))]
222 --
223 -- If you merge them naively you end up with:
224 --
225 -- C = fromList [(a, (x, 1)), (b, (y, 1))]
226 --
227 -- Which loses information about ordering and brings us back into
228 -- non-deterministic world.
229 --
230 -- The solution I considered before would increment the tags on one of the
231 -- sets by the upper bound of the other set. The problem with this approach
232 -- is that you'll run out of tags for some merge patterns.
233 -- Say you start with A with upper bound 1, you merge A with A to get A' and
234 -- the upper bound becomes 2. You merge A' with A' and the upper bound
235 -- doubles again. After 64 merges you overflow.
236 -- This solution would have the same time complexity as plusUFM, namely O(n+m).
237 --
238 -- The solution I ended up with has time complexity of
239 -- O(m log m + m * min (n+m, W)) where m is the smaller set.
240 -- It simply inserts the elements of the smaller set into the larger
241 -- set in the order that they were inserted into the smaller set. That's
242 -- O(m log m) for extracting the elements from the smaller set in the
243 -- insertion order and O(m * min(n+m, W)) to insert them into the bigger
244 -- set.
245
246 plusUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
247 plusUDFM udfml@(UDFM _ i) udfmr@(UDFM _ j)
248 -- we will use the upper bound on the tag as a proxy for the set size,
249 -- to insert the smaller one into the bigger one
250 | i > j = insertUDFMIntoLeft udfml udfmr
251 | otherwise = insertUDFMIntoLeft udfmr udfml
252
253 insertUDFMIntoLeft :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
254 insertUDFMIntoLeft udfml udfmr = addListToUDFM_Directly udfml $ udfmToList udfmr
255
256 insertUDFMIntoLeft_C
257 :: (elt -> elt -> elt) -> UniqDFM elt -> UniqDFM elt -> UniqDFM elt
258 insertUDFMIntoLeft_C f udfml udfmr =
259 addListToUDFM_Directly_C f udfml $ udfmToList udfmr
260
261 lookupUDFM :: Uniquable key => UniqDFM elt -> key -> Maybe elt
262 lookupUDFM (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey $ getUnique k) m
263
264 lookupUDFM_Directly :: UniqDFM elt -> Unique -> Maybe elt
265 lookupUDFM_Directly (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey k) m
266
267 elemUDFM :: Uniquable key => key -> UniqDFM elt -> Bool
268 elemUDFM k (UDFM m _i) = M.member (getKey $ getUnique k) m
269
270 -- | Performs a deterministic fold over the UniqDFM.
271 -- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
272 foldUDFM :: (elt -> a -> a) -> a -> UniqDFM elt -> a
273 foldUDFM k z m = foldr k z (eltsUDFM m)
274
275 -- | Performs a nondeterministic fold over the UniqDFM.
276 -- It's O(n), same as the corresponding function on `UniqFM`.
277 -- If you use this please provide a justification why it doesn't introduce
278 -- nondeterminism.
279 nonDetFoldUDFM :: (elt -> a -> a) -> a -> UniqDFM elt -> a
280 nonDetFoldUDFM k z (UDFM m _i) = foldr k z $ map taggedFst $ M.elems m
281
282 eltsUDFM :: UniqDFM elt -> [elt]
283 eltsUDFM (UDFM m _i) =
284 map taggedFst $ sortBy (compare `on` taggedSnd) $ M.elems m
285
286 filterUDFM :: (elt -> Bool) -> UniqDFM elt -> UniqDFM elt
287 filterUDFM p (UDFM m i) = UDFM (M.filter (\(TaggedVal v _) -> p v) m) i
288
289 filterUDFM_Directly :: (Unique -> elt -> Bool) -> UniqDFM elt -> UniqDFM elt
290 filterUDFM_Directly p (UDFM m i) = UDFM (M.filterWithKey p' m) i
291 where
292 p' k (TaggedVal v _) = p (getUnique k) v
293
294 -- | Converts `UniqDFM` to a list, with elements in deterministic order.
295 -- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
296 udfmToList :: UniqDFM elt -> [(Unique, elt)]
297 udfmToList (UDFM m _i) =
298 [ (getUnique k, taggedFst v)
299 | (k, v) <- sortBy (compare `on` (taggedSnd . snd)) $ M.toList m ]
300
301 -- Determines whether two 'UniqDFM's contain the same keys.
302 equalKeysUDFM :: UniqDFM a -> UniqDFM b -> Bool
303 equalKeysUDFM (UDFM m1 _) (UDFM m2 _) = liftEq (\_ _ -> True) m1 m2
304
305 isNullUDFM :: UniqDFM elt -> Bool
306 isNullUDFM (UDFM m _) = M.null m
307
308 sizeUDFM :: UniqDFM elt -> Int
309 sizeUDFM (UDFM m _i) = M.size m
310
311 intersectUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
312 intersectUDFM (UDFM x i) (UDFM y _j) = UDFM (M.intersection x y) i
313 -- M.intersection is left biased, that means the result will only have
314 -- a subset of elements from the left set, so `i` is a good upper bound.
315
316 udfmIntersectUFM :: UniqDFM elt1 -> UniqFM elt2 -> UniqDFM elt1
317 udfmIntersectUFM (UDFM x i) y = UDFM (M.intersection x (ufmToIntMap y)) i
318 -- M.intersection is left biased, that means the result will only have
319 -- a subset of elements from the left set, so `i` is a good upper bound.
320
321 intersectsUDFM :: UniqDFM elt -> UniqDFM elt -> Bool
322 intersectsUDFM x y = isNullUDFM (x `intersectUDFM` y)
323
324 disjointUDFM :: UniqDFM elt -> UniqDFM elt -> Bool
325 disjointUDFM (UDFM x _i) (UDFM y _j) = M.null (M.intersection x y)
326
327 disjointUdfmUfm :: UniqDFM elt -> UniqFM elt2 -> Bool
328 disjointUdfmUfm (UDFM x _i) y = M.null (M.intersection x (ufmToIntMap y))
329
330 minusUDFM :: UniqDFM elt1 -> UniqDFM elt2 -> UniqDFM elt1
331 minusUDFM (UDFM x i) (UDFM y _j) = UDFM (M.difference x y) i
332 -- M.difference returns a subset of a left set, so `i` is a good upper
333 -- bound.
334
335 udfmMinusUFM :: UniqDFM elt1 -> UniqFM elt2 -> UniqDFM elt1
336 udfmMinusUFM (UDFM x i) y = UDFM (M.difference x (ufmToIntMap y)) i
337 -- M.difference returns a subset of a left set, so `i` is a good upper
338 -- bound.
339
340 -- | Partition UniqDFM into two UniqDFMs according to the predicate
341 partitionUDFM :: (elt -> Bool) -> UniqDFM elt -> (UniqDFM elt, UniqDFM elt)
342 partitionUDFM p (UDFM m i) =
343 case M.partition (p . taggedFst) m of
344 (left, right) -> (UDFM left i, UDFM right i)
345
346 -- | Delete a list of elements from a UniqDFM
347 delListFromUDFM :: Uniquable key => UniqDFM elt -> [key] -> UniqDFM elt
348 delListFromUDFM = foldl' delFromUDFM
349
350 -- | This allows for lossy conversion from UniqDFM to UniqFM
351 udfmToUfm :: UniqDFM elt -> UniqFM elt
352 udfmToUfm (UDFM m _i) =
353 listToUFM_Directly [(getUnique k, taggedFst tv) | (k, tv) <- M.toList m]
354
355 listToUDFM :: Uniquable key => [(key,elt)] -> UniqDFM elt
356 listToUDFM = foldl' (\m (k, v) -> addToUDFM m k v) emptyUDFM
357
358 listToUDFM_Directly :: [(Unique, elt)] -> UniqDFM elt
359 listToUDFM_Directly = foldl' (\m (u, v) -> addToUDFM_Directly m u v) emptyUDFM
360
361 -- | Apply a function to a particular element
362 adjustUDFM :: Uniquable key => (elt -> elt) -> UniqDFM elt -> key -> UniqDFM elt
363 adjustUDFM f (UDFM m i) k = UDFM (M.adjust (fmap f) (getKey $ getUnique k) m) i
364
365 -- | The expression (alterUDFM f k map) alters value x at k, or absence
366 -- thereof. alterUDFM can be used to insert, delete, or update a value in
367 -- UniqDFM. Use addToUDFM, delFromUDFM or adjustUDFM when possible, they are
368 -- more efficient.
369 alterUDFM
370 :: Uniquable key
371 => (Maybe elt -> Maybe elt) -- How to adjust
372 -> UniqDFM elt -- old
373 -> key -- new
374 -> UniqDFM elt -- result
375 alterUDFM f (UDFM m i) k =
376 UDFM (M.alter alterf (getKey $ getUnique k) m) (i + 1)
377 where
378 alterf Nothing = inject $ f Nothing
379 alterf (Just (TaggedVal v _)) = inject $ f (Just v)
380 inject Nothing = Nothing
381 inject (Just v) = Just $ TaggedVal v i
382
383 -- | Map a function over every value in a UniqDFM
384 mapUDFM :: (elt1 -> elt2) -> UniqDFM elt1 -> UniqDFM elt2
385 mapUDFM f (UDFM m i) = UDFM (M.map (fmap f) m) i
386
387 anyUDFM :: (elt -> Bool) -> UniqDFM elt -> Bool
388 anyUDFM p (UDFM m _i) = M.foldr ((||) . p . taggedFst) False m
389
390 allUDFM :: (elt -> Bool) -> UniqDFM elt -> Bool
391 allUDFM p (UDFM m _i) = M.foldr ((&&) . p . taggedFst) True m
392
393 instance Semi.Semigroup (UniqDFM a) where
394 (<>) = plusUDFM
395
396 instance Monoid (UniqDFM a) where
397 mempty = emptyUDFM
398 mappend = (Semi.<>)
399
400 -- This should not be used in commited code, provided for convenience to
401 -- make ad-hoc conversions when developing
402 alwaysUnsafeUfmToUdfm :: UniqFM elt -> UniqDFM elt
403 alwaysUnsafeUfmToUdfm = listToUDFM_Directly . nonDetUFMToList
404
405 -- Output-ery
406
407 instance Outputable a => Outputable (UniqDFM a) where
408 ppr ufm = pprUniqDFM ppr ufm
409
410 pprUniqDFM :: (a -> SDoc) -> UniqDFM a -> SDoc
411 pprUniqDFM ppr_elt ufm
412 = brackets $ fsep $ punctuate comma $
413 [ ppr uq <+> text ":->" <+> ppr_elt elt
414 | (uq, elt) <- udfmToList ufm ]
415
416 pprUDFM :: UniqDFM a -- ^ The things to be pretty printed
417 -> ([a] -> SDoc) -- ^ The pretty printing function to use on the elements
418 -> SDoc -- ^ 'SDoc' where the things have been pretty
419 -- printed
420 pprUDFM ufm pp = pp (eltsUDFM ufm)