Make inert_model and inert_eqs deterministic sets
[ghc.git] / compiler / utils / UniqDFM.hs
1 {-
2 (c) Bartosz Nitka, Facebook, 2015
3
4 UniqDFM: Specialised deterministic finite maps, for things with @Uniques@.
5
6 Basically, the things need to be in class @Uniquable@, and we use the
7 @getUnique@ method to grab their @Uniques@.
8
9 This is very similar to @UniqFM@, the major difference being that the order of
10 folding is not dependent on @Unique@ ordering, giving determinism.
11 Currently the ordering is determined by insertion order.
12
13 See Note [Unique Determinism] in Unique for explanation why @Unique@ ordering
14 is not deterministic.
15 -}
16
17 {-# LANGUAGE DeriveDataTypeable #-}
18 {-# LANGUAGE DeriveFunctor #-}
19 {-# LANGUAGE FlexibleContexts #-}
20 {-# OPTIONS_GHC -Wall #-}
21
22 module UniqDFM (
23 -- * Unique-keyed deterministic mappings
24 UniqDFM, -- abstract type
25
26 -- ** Manipulating those mappings
27 emptyUDFM,
28 unitUDFM,
29 addToUDFM,
30 addToUDFM_C,
31 delFromUDFM,
32 delListFromUDFM,
33 adjustUDFM,
34 alterUDFM,
35 mapUDFM,
36 plusUDFM,
37 plusUDFM_C,
38 lookupUDFM,
39 elemUDFM,
40 foldUDFM,
41 eltsUDFM,
42 filterUDFM,
43 isNullUDFM,
44 sizeUDFM,
45 intersectUDFM,
46 intersectsUDFM,
47 disjointUDFM, disjointUdfmUfm,
48 minusUDFM,
49 udfmMinusUFM,
50 partitionUDFM,
51 anyUDFM,
52
53 udfmToList,
54 udfmToUfm,
55 nonDetFoldUDFM,
56 alwaysUnsafeUfmToUdfm,
57 ) where
58
59 import Unique ( Uniquable(..), Unique, getKey )
60 import Outputable
61
62 import qualified Data.IntMap as M
63 import Data.Typeable
64 import Data.Data
65 import Data.List (sortBy)
66 import Data.Function (on)
67 import UniqFM (UniqFM, listToUFM_Directly, ufmToList, ufmToIntMap)
68
69 -- Note [Deterministic UniqFM]
70 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
71 -- A @UniqDFM@ is just like @UniqFM@ with the following additional
72 -- property: the function `udfmToList` returns the elements in some
73 -- deterministic order not depending on the Unique key for those elements.
74 --
75 -- If the client of the map performs operations on the map in deterministic
76 -- order then `udfmToList` returns them in deterministic order.
77 --
78 -- There is an implementation cost: each element is given a serial number
79 -- as it is added, and `udfmToList` sorts it's result by this serial
80 -- number. So you should only use `UniqDFM` if you need the deterministic
81 -- property.
82 --
83 -- `foldUDFM` also preserves determinism.
84 --
85 -- Normal @UniqFM@ when you turn it into a list will use
86 -- Data.IntMap.toList function that returns the elements in the order of
87 -- the keys. The keys in @UniqFM@ are always @Uniques@, so you end up with
88 -- with a list ordered by @Uniques@.
89 -- The order of @Uniques@ is known to be not stable across rebuilds.
90 -- See Note [Unique Determinism] in Unique.
91 --
92 --
93 -- There's more than one way to implement this. The implementation here tags
94 -- every value with the insertion time that can later be used to sort the
95 -- values when asked to convert to a list.
96 --
97 -- An alternative would be to have
98 --
99 -- data UniqDFM ele = UDFM (M.IntMap ele) [ele]
100 --
101 -- where the list determines the order. This makes deletion tricky as we'd
102 -- only accumulate elements in that list, but makes merging easier as you
103 -- can just merge both structures independently.
104 -- Deletion can probably be done in amortized fashion when the size of the
105 -- list is twice the size of the set.
106
107 -- | A type of values tagged with insertion time
108 data TaggedVal val =
109 TaggedVal
110 val
111 {-# UNPACK #-} !Int -- ^ insertion time
112 deriving (Data, Typeable)
113
114 taggedFst :: TaggedVal val -> val
115 taggedFst (TaggedVal v _) = v
116
117 taggedSnd :: TaggedVal val -> Int
118 taggedSnd (TaggedVal _ i) = i
119
120 instance Eq val => Eq (TaggedVal val) where
121 (TaggedVal v1 _) == (TaggedVal v2 _) = v1 == v2
122
123 instance Functor TaggedVal where
124 fmap f (TaggedVal val i) = TaggedVal (f val) i
125
126 -- | Type of unique deterministic finite maps
127 data UniqDFM ele =
128 UDFM
129 !(M.IntMap (TaggedVal ele)) -- A map where keys are Unique's values and
130 -- values are tagged with insertion time.
131 -- The invariant is that all the tags will
132 -- be distinct within a single map
133 {-# UNPACK #-} !Int -- Upper bound on the values' insertion
134 -- time. See Note [Overflow on plusUDFM]
135 deriving (Data, Typeable, Functor)
136
137 emptyUDFM :: UniqDFM elt
138 emptyUDFM = UDFM M.empty 0
139
140 unitUDFM :: Uniquable key => key -> elt -> UniqDFM elt
141 unitUDFM k v = UDFM (M.singleton (getKey $ getUnique k) (TaggedVal v 0)) 1
142
143 addToUDFM :: Uniquable key => UniqDFM elt -> key -> elt -> UniqDFM elt
144 addToUDFM (UDFM m i) k v =
145 UDFM (M.insert (getKey $ getUnique k) (TaggedVal v i) m) (i + 1)
146
147 addToUDFM_Directly :: UniqDFM elt -> Unique -> elt -> UniqDFM elt
148 addToUDFM_Directly (UDFM m i) u v =
149 UDFM (M.insert (getKey u) (TaggedVal v i) m) (i + 1)
150
151 addToUDFM_Directly_C
152 :: (elt -> elt -> elt) -> UniqDFM elt -> Unique -> elt -> UniqDFM elt
153 addToUDFM_Directly_C f (UDFM m i) u v =
154 UDFM (M.insertWith tf (getKey u) (TaggedVal v i) m) (i + 1)
155 where
156 tf (TaggedVal a j) (TaggedVal b _) = TaggedVal (f a b) j
157
158 addToUDFM_C
159 :: Uniquable key => (elt -> elt -> elt) -- old -> new -> result
160 -> UniqDFM elt -- old
161 -> key -> elt -- new
162 -> UniqDFM elt -- result
163 addToUDFM_C f (UDFM m i) k v =
164 UDFM (M.insertWith tf (getKey $ getUnique k) (TaggedVal v i) m) (i + 1)
165 where
166 tf (TaggedVal a j) (TaggedVal b _) = TaggedVal (f b a) j
167 -- Flip the arguments, just like
168 -- addToUFM_C does.
169
170 addListToUDFM_Directly :: UniqDFM elt -> [(Unique,elt)] -> UniqDFM elt
171 addListToUDFM_Directly = foldl (\m (k, v) -> addToUDFM_Directly m k v)
172
173 addListToUDFM_Directly_C
174 :: (elt -> elt -> elt) -> UniqDFM elt -> [(Unique,elt)] -> UniqDFM elt
175 addListToUDFM_Directly_C f = foldl (\m (k, v) -> addToUDFM_Directly_C f m k v)
176
177 delFromUDFM :: Uniquable key => UniqDFM elt -> key -> UniqDFM elt
178 delFromUDFM (UDFM m i) k = UDFM (M.delete (getKey $ getUnique k) m) i
179
180 plusUDFM_C :: (elt -> elt -> elt) -> UniqDFM elt -> UniqDFM elt -> UniqDFM elt
181 plusUDFM_C f udfml@(UDFM _ i) udfmr@(UDFM _ j)
182 -- we will use the upper bound on the tag as a proxy for the set size,
183 -- to insert the smaller one into the bigger one
184 | i > j = insertUDFMIntoLeft_C f udfml udfmr
185 | otherwise = insertUDFMIntoLeft_C f udfmr udfml
186
187 -- Note [Overflow on plusUDFM]
188 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
189 -- There are multiple ways of implementing plusUDFM.
190 -- The main problem that needs to be solved is overlap on times of
191 -- insertion between different keys in two maps.
192 -- Consider:
193 --
194 -- A = fromList [(a, (x, 1))]
195 -- B = fromList [(b, (y, 1))]
196 --
197 -- If you merge them naively you end up with:
198 --
199 -- C = fromList [(a, (x, 1)), (b, (y, 1))]
200 --
201 -- Which loses information about ordering and brings us back into
202 -- non-deterministic world.
203 --
204 -- The solution I considered before would increment the tags on one of the
205 -- sets by the upper bound of the other set. The problem with this approach
206 -- is that you'll run out of tags for some merge patterns.
207 -- Say you start with A with upper bound 1, you merge A with A to get A' and
208 -- the upper bound becomes 2. You merge A' with A' and the upper bound
209 -- doubles again. After 64 merges you overflow.
210 -- This solution would have the same time complexity as plusUFM, namely O(n+m).
211 --
212 -- The solution I ended up with has time complexity of
213 -- O(m log m + m * min (n+m, W)) where m is the smaller set.
214 -- It simply inserts the elements of the smaller set into the larger
215 -- set in the order that they were inserted into the smaller set. That's
216 -- O(m log m) for extracting the elements from the smaller set in the
217 -- insertion order and O(m * min(n+m, W)) to insert them into the bigger
218 -- set.
219
220 plusUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
221 plusUDFM udfml@(UDFM _ i) udfmr@(UDFM _ j)
222 -- we will use the upper bound on the tag as a proxy for the set size,
223 -- to insert the smaller one into the bigger one
224 | i > j = insertUDFMIntoLeft udfml udfmr
225 | otherwise = insertUDFMIntoLeft udfmr udfml
226
227 insertUDFMIntoLeft :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
228 insertUDFMIntoLeft udfml udfmr = addListToUDFM_Directly udfml $ udfmToList udfmr
229
230 insertUDFMIntoLeft_C
231 :: (elt -> elt -> elt) -> UniqDFM elt -> UniqDFM elt -> UniqDFM elt
232 insertUDFMIntoLeft_C f udfml udfmr =
233 addListToUDFM_Directly_C f udfml $ udfmToList udfmr
234
235 lookupUDFM :: Uniquable key => UniqDFM elt -> key -> Maybe elt
236 lookupUDFM (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey $ getUnique k) m
237
238 elemUDFM :: Uniquable key => key -> UniqDFM elt -> Bool
239 elemUDFM k (UDFM m _i) = M.member (getKey $ getUnique k) m
240
241 -- | Performs a deterministic fold over the UniqDFM.
242 -- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
243 foldUDFM :: (elt -> a -> a) -> a -> UniqDFM elt -> a
244 foldUDFM k z m = foldr k z (eltsUDFM m)
245
246 -- | Performs a nondeterministic fold over the UniqDFM.
247 -- It's O(n), same as the corresponding function on `UniqFM`.
248 -- If you use this please provide a justification why it doesn't introduce
249 -- nondeterminism.
250 nonDetFoldUDFM :: (elt -> a -> a) -> a -> UniqDFM elt -> a
251 nonDetFoldUDFM k z (UDFM m _i) = foldr k z $ map taggedFst $ M.elems m
252
253 eltsUDFM :: UniqDFM elt -> [elt]
254 eltsUDFM (UDFM m _i) =
255 map taggedFst $ sortBy (compare `on` taggedSnd) $ M.elems m
256
257 filterUDFM :: (elt -> Bool) -> UniqDFM elt -> UniqDFM elt
258 filterUDFM p (UDFM m i) = UDFM (M.filter (\(TaggedVal v _) -> p v) m) i
259
260 -- | Converts `UniqDFM` to a list, with elements in deterministic order.
261 -- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
262 udfmToList :: UniqDFM elt -> [(Unique, elt)]
263 udfmToList (UDFM m _i) =
264 [ (getUnique k, taggedFst v)
265 | (k, v) <- sortBy (compare `on` (taggedSnd . snd)) $ M.toList m ]
266
267 isNullUDFM :: UniqDFM elt -> Bool
268 isNullUDFM (UDFM m _) = M.null m
269
270 sizeUDFM :: UniqDFM elt -> Int
271 sizeUDFM (UDFM m _i) = M.size m
272
273 intersectUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
274 intersectUDFM (UDFM x i) (UDFM y _j) = UDFM (M.intersection x y) i
275 -- M.intersection is left biased, that means the result will only have
276 -- a subset of elements from the left set, so `i` is a good upper bound.
277
278 intersectsUDFM :: UniqDFM elt -> UniqDFM elt -> Bool
279 intersectsUDFM x y = isNullUDFM (x `intersectUDFM` y)
280
281 disjointUDFM :: UniqDFM elt -> UniqDFM elt -> Bool
282 disjointUDFM (UDFM x _i) (UDFM y _j) = M.null (M.intersection x y)
283
284 disjointUdfmUfm :: UniqDFM elt -> UniqFM elt2 -> Bool
285 disjointUdfmUfm (UDFM x _i) y = M.null (M.intersection x (ufmToIntMap y))
286
287 minusUDFM :: UniqDFM elt1 -> UniqDFM elt2 -> UniqDFM elt1
288 minusUDFM (UDFM x i) (UDFM y _j) = UDFM (M.difference x y) i
289 -- M.difference returns a subset of a left set, so `i` is a good upper
290 -- bound.
291
292 udfmMinusUFM :: UniqDFM elt1 -> UniqFM elt2 -> UniqDFM elt1
293 udfmMinusUFM (UDFM x i) y = UDFM (M.difference x (ufmToIntMap y)) i
294 -- M.difference returns a subset of a left set, so `i` is a good upper
295 -- bound.
296
297 -- | Partition UniqDFM into two UniqDFMs according to the predicate
298 partitionUDFM :: (elt -> Bool) -> UniqDFM elt -> (UniqDFM elt, UniqDFM elt)
299 partitionUDFM p (UDFM m i) =
300 case M.partition (p . taggedFst) m of
301 (left, right) -> (UDFM left i, UDFM right i)
302
303 -- | Delete a list of elements from a UniqDFM
304 delListFromUDFM :: Uniquable key => UniqDFM elt -> [key] -> UniqDFM elt
305 delListFromUDFM = foldl delFromUDFM
306
307 -- | This allows for lossy conversion from UniqDFM to UniqFM
308 udfmToUfm :: UniqDFM elt -> UniqFM elt
309 udfmToUfm (UDFM m _i) =
310 listToUFM_Directly [(getUnique k, taggedFst tv) | (k, tv) <- M.toList m]
311
312 listToUDFM_Directly :: [(Unique, elt)] -> UniqDFM elt
313 listToUDFM_Directly = foldl (\m (u, v) -> addToUDFM_Directly m u v) emptyUDFM
314
315 -- | Apply a function to a particular element
316 adjustUDFM :: Uniquable key => (elt -> elt) -> UniqDFM elt -> key -> UniqDFM elt
317 adjustUDFM f (UDFM m i) k = UDFM (M.adjust (fmap f) (getKey $ getUnique k) m) i
318
319 -- | The expression (alterUDFM f k map) alters value x at k, or absence
320 -- thereof. alterUDFM can be used to insert, delete, or update a value in
321 -- UniqDFM. Use addToUDFM, delFromUDFM or adjustUDFM when possible, they are
322 -- more efficient.
323 alterUDFM
324 :: Uniquable key
325 => (Maybe elt -> Maybe elt) -- How to adjust
326 -> UniqDFM elt -- old
327 -> key -- new
328 -> UniqDFM elt -- result
329 alterUDFM f (UDFM m i) k =
330 UDFM (M.alter alterf (getKey $ getUnique k) m) (i + 1)
331 where
332 alterf Nothing = inject $ f Nothing
333 alterf (Just (TaggedVal v _)) = inject $ f (Just v)
334 inject Nothing = Nothing
335 inject (Just v) = Just $ TaggedVal v i
336
337 -- | Map a function over every value in a UniqDFM
338 mapUDFM :: (elt1 -> elt2) -> UniqDFM elt1 -> UniqDFM elt2
339 mapUDFM f (UDFM m i) = UDFM (M.map (fmap f) m) i
340
341 anyUDFM :: (elt -> Bool) -> UniqDFM elt -> Bool
342 anyUDFM p (UDFM m _i) = M.fold ((||) . p . taggedFst) False m
343
344 instance Monoid (UniqDFM a) where
345 mempty = emptyUDFM
346 mappend = plusUDFM
347
348 -- This should not be used in commited code, provided for convenience to
349 -- make ad-hoc conversions when developing
350 alwaysUnsafeUfmToUdfm :: UniqFM elt -> UniqDFM elt
351 alwaysUnsafeUfmToUdfm = listToUDFM_Directly . ufmToList
352
353 -- Output-ery
354
355 instance Outputable a => Outputable (UniqDFM a) where
356 ppr ufm = pprUniqDFM ppr ufm
357
358 pprUniqDFM :: (a -> SDoc) -> UniqDFM a -> SDoc
359 pprUniqDFM ppr_elt ufm
360 = brackets $ fsep $ punctuate comma $
361 [ ppr uq <+> text ":->" <+> ppr_elt elt
362 | (uq, elt) <- udfmToList ufm ]