Add DVarSet - a deterministic set of Vars
[ghc.git] / compiler / utils / UniqDFM.hs
1 {-
2 (c) Bartosz Nitka, Facebook, 2015
3
4 UniqDFM: Specialised deterministic finite maps, for things with @Uniques@.
5
6 Basically, the things need to be in class @Uniquable@, and we use the
7 @getUnique@ method to grab their @Uniques@.
8
9 This is very similar to @UniqFM@, the major difference being that the order of
10 folding is not dependent on @Unique@ ordering, giving determinism.
11 Currently the ordering is determined by insertion order.
12
13 See Note [Unique Determinism] in Unique for explanation why @Unique@ ordering
14 is not deterministic.
15 -}
16
17 {-# LANGUAGE DeriveDataTypeable #-}
18 {-# LANGUAGE DeriveFunctor #-}
19 {-# LANGUAGE FlexibleContexts #-}
20 {-# OPTIONS_GHC -Wall #-}
21
22 module UniqDFM (
23 -- * Unique-keyed deterministic mappings
24 UniqDFM, -- abstract type
25
26 -- ** Manipulating those mappings
27 emptyUDFM,
28 unitUDFM,
29 addToUDFM,
30 delFromUDFM,
31 plusUDFM,
32 lookupUDFM,
33 elemUDFM,
34 foldUDFM,
35 eltsUDFM,
36 filterUDFM,
37 isNullUDFM,
38 sizeUDFM,
39 intersectUDFM,
40 minusUDFM,
41
42 udfmToList,
43 udfmToUfm,
44 alwaysUnsafeUfmToUdfm,
45 ) where
46
47 import FastString
48 import Unique ( Uniquable(..), Unique, getKey )
49 import Outputable
50
51 import qualified Data.IntMap as M
52 import Data.Typeable
53 import Data.Data
54 import Data.List (sortBy)
55 import Data.Function (on)
56 import UniqFM (UniqFM, listToUFM_Directly, ufmToList)
57
58 -- Note [Deterministic UniqFM]
59 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
60 -- A @UniqDFM@ is just like @UniqFM@ with the following additional
61 -- property: the function `udfmToList` returns the elements in some
62 -- deterministic order not depending on the Unique key for those elements.
63 --
64 -- If the client of the map performs operations on the map in deterministic
65 -- order then `udfmToList` returns them in deterministic order.
66 --
67 -- There is an implementation cost: each element is given a serial number
68 -- as it is added, and `udfmToList` sorts it's result by this serial
69 -- number. So you should only use `UniqDFM` if you need the deterministic
70 -- property.
71 --
72 -- `foldUDFM` also preserves determinism.
73 --
74 -- Normal @UniqFM@ when you turn it into a list will use
75 -- Data.IntMap.toList function that returns the elements in the order of
76 -- the keys. The keys in @UniqFM@ are always @Uniques@, so you end up with
77 -- with a list ordered by @Uniques@.
78 -- The order of @Uniques@ is known to be not stable across rebuilds.
79 -- See Note [Unique Determinism] in Unique.
80 --
81 --
82 -- There's more than one way to implement this. The implementation here tags
83 -- every value with the insertion time that can later be used to sort the
84 -- values when asked to convert to a list.
85 --
86 -- An alternative would be to have
87 --
88 -- data UniqDFM ele = UDFM (M.IntMap ele) [ele]
89 --
90 -- where the list determines the order. This makes deletion tricky as we'd
91 -- only accumulate elements in that list, but makes merging easier as you
92 -- can just merge both structures independently.
93 -- Deletion can probably be done in amortized fashion when the size of the
94 -- list is twice the size of the set.
95
96 -- | A type of values tagged with insertion time
97 data TaggedVal val =
98 TaggedVal
99 val
100 {-# UNPACK #-} !Int -- ^ insertion time
101 deriving (Data, Typeable)
102
103 taggedFst :: TaggedVal val -> val
104 taggedFst (TaggedVal v _) = v
105
106 taggedSnd :: TaggedVal val -> Int
107 taggedSnd (TaggedVal _ i) = i
108
109 instance Eq val => Eq (TaggedVal val) where
110 (TaggedVal v1 _) == (TaggedVal v2 _) = v1 == v2
111
112 instance Functor TaggedVal where
113 fmap f (TaggedVal val i) = TaggedVal (f val) i
114
115 -- | Type of unique deterministic finite maps
116 data UniqDFM ele =
117 UDFM
118 !(M.IntMap (TaggedVal ele)) -- A map where keys are Unique's values and
119 -- values are tagged with insertion time.
120 -- The invariant is that all the tags will
121 -- be distinct within a single map
122 {-# UNPACK #-} !Int -- Upper bound on the values' insertion
123 -- time. See Note [Overflow on plusUDFM]
124 deriving (Data, Typeable, Functor)
125
126 emptyUDFM :: UniqDFM elt
127 emptyUDFM = UDFM M.empty 0
128
129 unitUDFM :: Uniquable key => key -> elt -> UniqDFM elt
130 unitUDFM k v = UDFM (M.singleton (getKey $ getUnique k) (TaggedVal v 0)) 1
131
132 addToUDFM :: Uniquable key => UniqDFM elt -> key -> elt -> UniqDFM elt
133 addToUDFM (UDFM m i) k v =
134 UDFM (M.insert (getKey $ getUnique k) (TaggedVal v i) m) (i + 1)
135
136 addToUDFM_Directly :: UniqDFM elt -> Unique -> elt -> UniqDFM elt
137 addToUDFM_Directly (UDFM m i) u v =
138 UDFM (M.insert (getKey u) (TaggedVal v i) m) (i + 1)
139
140 addListToUDFM_Directly :: UniqDFM elt -> [(Unique,elt)] -> UniqDFM elt
141 addListToUDFM_Directly = foldl (\m (k, v) -> addToUDFM_Directly m k v)
142
143 delFromUDFM :: Uniquable key => UniqDFM elt -> key -> UniqDFM elt
144 delFromUDFM (UDFM m i) k = UDFM (M.delete (getKey $ getUnique k) m) i
145
146 -- Note [Overflow on plusUDFM]
147 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
148 -- There are multiple ways of implementing plusUDFM.
149 -- The main problem that needs to be solved is overlap on times of
150 -- insertion between different keys in two maps.
151 -- Consider:
152 --
153 -- A = fromList [(a, (x, 1))]
154 -- B = fromList [(b, (y, 1))]
155 --
156 -- If you merge them naively you end up with:
157 --
158 -- C = fromList [(a, (x, 1)), (b, (y, 1))]
159 --
160 -- Which loses information about ordering and brings us back into
161 -- non-deterministic world.
162 --
163 -- The solution I considered before would increment the tags on one of the
164 -- sets by the upper bound of the other set. The problem with this approach
165 -- is that you'll run out of tags for some merge patterns.
166 -- Say you start with A with upper bound 1, you merge A with A to get A' and
167 -- the upper bound becomes 2. You merge A' with A' and the upper bound
168 -- doubles again. After 64 merges you overflow.
169 -- This solution would have the same time complexity as plusUFM, namely O(n+m).
170 --
171 -- The solution I ended up with has time complexity of
172 -- O(m log m + m * min (n+m, W)) where m is the smaller set.
173 -- It simply inserts the elements of the smaller set into the larger
174 -- set in the order that they were inserted into the smaller set. That's
175 -- O(m log m) for extracting the elements from the smaller set in the
176 -- insertion order and O(m * min(n+m, W)) to insert them into the bigger
177 -- set.
178
179 plusUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
180 plusUDFM udfml@(UDFM _ i) udfmr@(UDFM _ j)
181 -- we will use the upper bound on the tag as a proxy for the set size,
182 -- to insert the smaller one into the bigger one
183 | i > j = insertUDFMIntoLeft udfml udfmr
184 | otherwise = insertUDFMIntoLeft udfmr udfml
185
186 insertUDFMIntoLeft :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
187 insertUDFMIntoLeft udfml udfmr = addListToUDFM_Directly udfml $ udfmToList udfmr
188
189 lookupUDFM :: Uniquable key => UniqDFM elt -> key -> Maybe elt
190 lookupUDFM (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey $ getUnique k) m
191
192 elemUDFM :: Uniquable key => key -> UniqDFM elt -> Bool
193 elemUDFM k (UDFM m _i) = M.member (getKey $ getUnique k) m
194
195 -- | Performs a deterministic fold over the UniqDFM.
196 -- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
197 foldUDFM :: (elt -> a -> a) -> a -> UniqDFM elt -> a
198 foldUDFM k z m = foldr k z (eltsUDFM m)
199
200 eltsUDFM :: UniqDFM elt -> [elt]
201 eltsUDFM (UDFM m _i) =
202 map taggedFst $ sortBy (compare `on` taggedSnd) $ M.elems m
203
204 filterUDFM :: (elt -> Bool) -> UniqDFM elt -> UniqDFM elt
205 filterUDFM p (UDFM m i) = UDFM (M.filter (\(TaggedVal v _) -> p v) m) i
206
207 -- | Converts `UniqDFM` to a list, with elements in deterministic order.
208 -- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
209 udfmToList :: UniqDFM elt -> [(Unique, elt)]
210 udfmToList (UDFM m _i) =
211 [ (getUnique k, taggedFst v)
212 | (k, v) <- sortBy (compare `on` (taggedSnd . snd)) $ M.toList m ]
213
214 isNullUDFM :: UniqDFM elt -> Bool
215 isNullUDFM (UDFM m _) = M.null m
216
217 sizeUDFM :: UniqDFM elt -> Int
218 sizeUDFM (UDFM m _i) = M.size m
219
220 intersectUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
221 intersectUDFM (UDFM x i) (UDFM y _j) = UDFM (M.intersection x y) i
222 -- M.intersection is left biased, that means the result will only have
223 -- a subset of elements from the left set, so `i` is a good upper bound.
224
225 minusUDFM :: UniqDFM elt1 -> UniqDFM elt2 -> UniqDFM elt1
226 minusUDFM (UDFM x i) (UDFM y _j) = UDFM (M.difference x y) i
227 -- M.difference returns a subset of a left set, so `i` is a good upper
228 -- bound.
229
230 -- | This allows for lossy conversion from UniqDFM to UniqFM
231 udfmToUfm :: UniqDFM elt -> UniqFM elt
232 udfmToUfm (UDFM m _i) =
233 listToUFM_Directly [(getUnique k, taggedFst tv) | (k, tv) <- M.toList m]
234
235 listToUDFM_Directly :: [(Unique, elt)] -> UniqDFM elt
236 listToUDFM_Directly = foldl (\m (u, v) -> addToUDFM_Directly m u v) emptyUDFM
237
238 -- This should not be used in commited code, provided for convenience to
239 -- make ad-hoc conversions when developing
240 alwaysUnsafeUfmToUdfm :: UniqFM elt -> UniqDFM elt
241 alwaysUnsafeUfmToUdfm = listToUDFM_Directly . ufmToList
242
243 -- Output-ery
244
245 instance Outputable a => Outputable (UniqDFM a) where
246 ppr ufm = pprUniqDFM ppr ufm
247
248 pprUniqDFM :: (a -> SDoc) -> UniqDFM a -> SDoc
249 pprUniqDFM ppr_elt ufm
250 = brackets $ fsep $ punctuate comma $
251 [ ppr uq <+> ptext (sLit ":->") <+> ppr_elt elt
252 | (uq, elt) <- udfmToList ufm ]