cmm/CBE: Use foldLocalRegsDefd
[ghc.git] / compiler / cmm / CmmCommonBlockElim.hs
1 {-# LANGUAGE GADTs, BangPatterns #-}
2 module CmmCommonBlockElim
3 ( elimCommonBlocks
4 )
5 where
6
7
8 import GhcPrelude hiding (iterate, succ, unzip, zip)
9
10 import BlockId
11 import Cmm
12 import CmmUtils
13 import CmmSwitch (eqSwitchTargetWith)
14 import CmmContFlowOpt
15 -- import PprCmm ()
16
17 import Hoopl.Block
18 import Hoopl.Graph
19 import Hoopl.Label
20 import Hoopl.Collections
21 import Data.Bits
22 import Data.Maybe (mapMaybe)
23 import qualified Data.List as List
24 import Data.Word
25 import qualified Data.Map as M
26 import Outputable
27 import DynFlags (DynFlags)
28 import UniqFM
29 import UniqDFM
30 import qualified TrieMap as TM
31 import Unique
32 import Control.Arrow (first, second)
33
34 -- -----------------------------------------------------------------------------
35 -- Eliminate common blocks
36
37 -- If two blocks are identical except for the label on the first node,
38 -- then we can eliminate one of the blocks. To ensure that the semantics
39 -- of the program are preserved, we have to rewrite each predecessor of the
40 -- eliminated block to proceed with the block we keep.
41
42 -- The algorithm iterates over the blocks in the graph,
43 -- checking whether it has seen another block that is equal modulo labels.
44 -- If so, then it adds an entry in a map indicating that the new block
45 -- is made redundant by the old block.
46 -- Otherwise, it is added to the useful blocks.
47
48 -- To avoid comparing every block with every other block repeatedly, we group
49 -- them by
50 -- * a hash of the block, ignoring labels (explained below)
51 -- * the list of outgoing labels
52 -- The hash is invariant under relabeling, so we only ever compare within
53 -- the same group of blocks.
54 --
55 -- The list of outgoing labels is updated as we merge blocks (that is why they
56 -- are not included in the hash, which we want to calculate only once).
57 --
58 -- All in all, two blocks should never be compared if they have different
59 -- hashes, and at most once otherwise. Previously, we were slower, and people
60 -- rightfully complained: #10397
61
62 -- TODO: Use optimization fuel
63 elimCommonBlocks :: DynFlags -> CmmGraph -> CmmGraph
64 elimCommonBlocks dflags g = replaceLabels env $ copyTicks env g
65 where
66 env = iterate dflags mapEmpty blocks_with_key
67 groups = groupByInt (hash_block dflags) (postorderDfs g)
68 blocks_with_key = [ [ (successors b, [b]) | b <- bs] | bs <- groups]
69
70 -- Invariant: The blocks in the list are pairwise distinct
71 -- (so avoid comparing them again)
72 type DistinctBlocks = [CmmBlock]
73 type Key = [Label]
74 type Subst = LabelMap BlockId
75
76 -- The outer list groups by hash. We retain this grouping throughout.
77 iterate :: DynFlags -> Subst -> [[(Key, DistinctBlocks)]] -> Subst
78 iterate dflags subst blocks
79 | mapNull new_substs = subst
80 | otherwise = iterate dflags subst' updated_blocks
81 where
82 grouped_blocks :: [[(Key, [DistinctBlocks])]]
83 grouped_blocks = map groupByLabel blocks
84
85 merged_blocks :: [[(Key, DistinctBlocks)]]
86 (new_substs, merged_blocks) =
87 List.mapAccumL (List.mapAccumL go) mapEmpty grouped_blocks
88 where
89 go !new_subst1 (k,dbs) = (new_subst1 `mapUnion` new_subst2, (k,db))
90 where
91 (new_subst2, db) = mergeBlockList dflags subst dbs
92
93 subst' = subst `mapUnion` new_substs
94 updated_blocks = map (map (first (map (lookupBid subst')))) merged_blocks
95
96 mergeBlocks :: DynFlags -> Subst
97 -> DistinctBlocks -> DistinctBlocks
98 -> (Subst, DistinctBlocks)
99 mergeBlocks dflags subst existing new = go new
100 where
101 go [] = (mapEmpty, existing)
102 go (b:bs) =
103 case List.find (eqBlockBodyWith dflags (eqBid subst) b) existing of
104 -- This block is a duplicate. Drop it, and add it to the substitution
105 Just b' -> first (mapInsert (entryLabel b) (entryLabel b')) $ go bs
106 -- This block is not a duplicate, keep it.
107 Nothing -> second (b:) $ go bs
108
109 mergeBlockList :: DynFlags -> Subst -> [DistinctBlocks]
110 -> (Subst, DistinctBlocks)
111 mergeBlockList _ _ [] = pprPanic "mergeBlockList" empty
112 mergeBlockList dflags subst (b:bs) = go mapEmpty b bs
113 where
114 go !new_subst1 b [] = (new_subst1, b)
115 go !new_subst1 b1 (b2:bs) = go new_subst b bs
116 where
117 (new_subst2, b) = mergeBlocks dflags subst b1 b2
118 new_subst = new_subst1 `mapUnion` new_subst2
119
120
121 -- -----------------------------------------------------------------------------
122 -- Hashing and equality on blocks
123
124 -- Below here is mostly boilerplate: hashing blocks ignoring labels,
125 -- and comparing blocks modulo a label mapping.
126
127 -- To speed up comparisons, we hash each basic block modulo jump labels.
128 -- The hashing is a bit arbitrary (the numbers are completely arbitrary),
129 -- but it should be fast and good enough.
130
131 -- We want to get as many small buckets as possible, as comparing blocks is
132 -- expensive. So include as much as possible in the hash. Ideally everything
133 -- that is compared with (==) in eqBlockBodyWith.
134
135 {-
136 Note [Equivalence up to local registers in CBE]
137 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
138
139 CBE treats two blocks which are equivalent up to alpha-renaming of locally-bound
140 local registers as equivalent. This was not always the case (see #14226) but is
141 quite important for effective CBE. For instance, consider the blocks,
142
143 c2VZ: // global
144 _c2Yd::I64 = _s2Se::I64 + 1;
145 _s2Sx::I64 = _c2Yd::I64;
146 _s2Se::I64 = _s2Sx::I64;
147 goto c2TE;
148
149 c2VY: // global
150 _c2Yb::I64 = _s2Se::I64 + 1;
151 _s2Sw::I64 = _c2Yb::I64;
152 _s2Se::I64 = _s2Sw::I64;
153 goto c2TE;
154
155 These clearly implement precisely the same logic, differing only register
156 naming. This happens quite often in the code produced by GHC.
157
158 This alpha-equivalence relation must be accounted for in two places:
159
160 1. the block hash function (hash_block), which we use for approximate "binning"
161 2. the exact block comparison function, which computes pair-wise equivalence
162
163 In (1) we maintain a de Bruijn numbering of each block's locally-bound local
164 registers and compute the hash relative to this numbering.
165
166 For (2) we maintain a substitution which maps the local registers of one block
167 onto those of the other. We then compare local registers modulo this
168 substitution.
169
170 -}
171
172 type HashCode = Int
173
174 type LocalRegEnv a = UniqFM a
175 type DeBruijn = Int
176
177 -- | Maintains a de Bruijn numbering of local registers bound within a block.
178 --
179 -- See Note [Equivalence up to local registers in CBE]
180 data HashEnv = HashEnv { localRegHashEnv :: !(LocalRegEnv DeBruijn)
181 , nextIndex :: !DeBruijn
182 }
183
184 hash_block :: DynFlags -> CmmBlock -> HashCode
185 hash_block dflags block =
186 --pprTrace "hash_block" (ppr (entryLabel block) $$ ppr hash)
187 hash
188 where hash_fst _ (env, h) = (env, h)
189 hash_mid m (env, h) = let (env', h') = hash_node env m
190 in (env', h' + h `shiftL` 1)
191 hash_lst m (env, h) = let (env', h') = hash_node env m
192 in (env', h' + h `shiftL` 1)
193
194 hash =
195 let (_, raw_hash) =
196 foldBlockNodesF3 (hash_fst, hash_mid, hash_lst)
197 block
198 (emptyEnv, 0 :: Word32)
199 emptyEnv = HashEnv mempty 0
200 in fromIntegral (raw_hash .&. (0x7fffffff :: Word32))
201 -- UniqFM doesn't like negative Ints
202
203 hash_node :: HashEnv -> CmmNode O x -> (HashEnv, Word32)
204 hash_node env n =
205 (env', hash)
206 where
207 hash =
208 case n of
209 n | dont_care n -> 0 -- don't care
210 -- don't include register as it is a binding occurrence
211 CmmAssign (CmmLocal _) e -> hash_e env e
212 CmmAssign r e -> hash_reg env r + hash_e env e
213 CmmStore e e' -> hash_e env e + hash_e env e'
214 CmmUnsafeForeignCall t _ as
215 -> hash_tgt env t + hash_list (hash_e env) as
216 CmmBranch _ -> 23 -- NB. ignore the label
217 CmmCondBranch p _ _ _ -> hash_e env p
218 CmmCall e _ _ _ _ _ -> hash_e env e
219 CmmForeignCall t _ _ _ _ _ _ -> hash_tgt env t
220 CmmSwitch e _ -> hash_e env e
221 _ -> error "hash_node: unknown Cmm node!"
222 env' = foldLocalRegsDefd dflags (flip bind_local_reg) env n
223
224 hash_reg :: HashEnv -> CmmReg -> Word32
225 hash_reg env (CmmLocal localReg)
226 | Just idx <- lookupUFM (localRegHashEnv env) localReg
227 = fromIntegral idx
228 | otherwise
229 = hash_unique localReg -- important for performance, see #10397
230 hash_reg _ (CmmGlobal _) = 19
231
232 hash_e :: HashEnv -> CmmExpr -> Word32
233 hash_e _ (CmmLit l) = hash_lit l
234 hash_e env (CmmLoad e _) = 67 + hash_e env e
235 hash_e env (CmmReg r) = hash_reg env r
236 hash_e env (CmmMachOp _ es) = hash_list (hash_e env) es -- pessimal - no operator check
237 hash_e env (CmmRegOff r i) = hash_reg env r + cvt i
238 hash_e _ (CmmStackSlot _ _) = 13
239
240 hash_lit :: CmmLit -> Word32
241 hash_lit (CmmInt i _) = fromInteger i
242 hash_lit (CmmFloat r _) = truncate r
243 hash_lit (CmmVec ls) = hash_list hash_lit ls
244 hash_lit (CmmLabel _) = 119 -- ugh
245 hash_lit (CmmLabelOff _ i) = cvt $ 199 + i
246 hash_lit (CmmLabelDiffOff _ _ i) = cvt $ 299 + i
247 hash_lit (CmmBlock _) = 191 -- ugh
248 hash_lit (CmmHighStackMark) = cvt 313
249
250 hash_tgt :: HashEnv -> ForeignTarget -> Word32
251 hash_tgt env (ForeignTarget e _) = hash_e env e
252 hash_tgt _ (PrimTarget _) = 31 -- lots of these
253
254 hash_list f = List.foldl' (\z x -> f x + z) (0::Word32)
255
256 cvt = fromInteger . toInteger
257
258 bind_local_reg :: LocalReg -> HashEnv -> HashEnv
259 bind_local_reg reg env =
260 env { localRegHashEnv =
261 addToUFM (localRegHashEnv env) reg (nextIndex env)
262 , nextIndex = nextIndex env + 1
263 }
264
265 hash_unique :: Uniquable a => a -> Word32
266 hash_unique = cvt . getKey . getUnique
267
268 -- | Ignore these node types for equality
269 dont_care :: CmmNode O x -> Bool
270 dont_care CmmComment {} = True
271 dont_care CmmTick {} = True
272 dont_care CmmUnwind {} = True
273 dont_care _other = False
274
275 -- Utilities: equality and substitution on the graph.
276
277 -- Given a map ``subst'' from BlockID -> BlockID, we define equality.
278 eqBid :: LabelMap BlockId -> BlockId -> BlockId -> Bool
279 eqBid subst bid bid' = lookupBid subst bid == lookupBid subst bid'
280 lookupBid :: LabelMap BlockId -> BlockId -> BlockId
281 lookupBid subst bid = case mapLookup bid subst of
282 Just bid -> lookupBid subst bid
283 Nothing -> bid
284
285 -- | Maps the local registers of one block to those of another
286 --
287 -- See Note [Equivalence up to local registers in CBE]
288 type LocalRegMapping = LocalRegEnv LocalReg
289
290 -- Middle nodes and expressions can contain BlockIds, in particular in
291 -- CmmStackSlot and CmmBlock, so we have to use a special equality for
292 -- these.
293 --
294 eqMiddleWith :: DynFlags
295 -> (BlockId -> BlockId -> Bool)
296 -> LocalRegMapping
297 -> CmmNode O O -> CmmNode O O
298 -> (LocalRegMapping, Bool)
299 eqMiddleWith dflags eqBid env a b =
300 case (a, b) of
301 -- registers aren't compared since they are binding occurrences
302 (CmmAssign (CmmLocal _) e1, CmmAssign (CmmLocal _) e2) ->
303 let eq = eqExprWith eqBid env e1 e2
304 in (env', eq)
305
306 (CmmAssign r1 e1, CmmAssign r2 e2) ->
307 let eq = r1 == r2
308 && eqExprWith eqBid env e1 e2
309 in (env', eq)
310
311 (CmmStore l1 r1, CmmStore l2 r2) ->
312 let eq = eqExprWith eqBid env l1 l2
313 && eqExprWith eqBid env r1 r2
314 in (env', eq)
315
316 -- result registers aren't compared since they are binding occurrences
317 (CmmUnsafeForeignCall t1 _ a1, CmmUnsafeForeignCall t2 _ a2) ->
318 let eq = t1 == t2
319 && and (zipWith (eqExprWith eqBid env) a1 a2)
320 in (env', eq)
321
322 _ -> (env, False)
323 where
324 env' = List.foldl' (\acc (ra,rb) -> addToUFM acc ra rb) emptyUFM
325 $ List.zip defd_a defd_b
326 defd_a = foldLocalRegsDefd dflags (flip (:)) [] a
327 defd_b = foldLocalRegsDefd dflags (flip (:)) [] b
328
329 eqExprWith :: (BlockId -> BlockId -> Bool)
330 -> LocalRegMapping
331 -> CmmExpr -> CmmExpr
332 -> Bool
333 eqExprWith eqBid env = eq
334 where
335 CmmLit l1 `eq` CmmLit l2 = eqLit l1 l2
336 CmmLoad e1 _ `eq` CmmLoad e2 _ = e1 `eq` e2
337 CmmReg r1 `eq` CmmReg r2 = r1 `eqReg` r2
338 CmmRegOff r1 i1 `eq` CmmRegOff r2 i2 = r1 `eqReg` r2 && i1==i2
339 CmmMachOp op1 es1 `eq` CmmMachOp op2 es2 = op1==op2 && es1 `eqs` es2
340 CmmStackSlot a1 i1 `eq` CmmStackSlot a2 i2 = eqArea a1 a2 && i1==i2
341 _e1 `eq` _e2 = False
342
343 xs `eqs` ys = and (zipWith eq xs ys)
344
345 -- See Note [Equivalence up to local registers in CBE]
346 CmmLocal a `eqReg` CmmLocal b
347 | Just a' <- lookupUFM env a
348 = a' == b
349 a `eqReg` b = a == b
350
351 eqLit (CmmBlock id1) (CmmBlock id2) = eqBid id1 id2
352 eqLit l1 l2 = l1 == l2
353
354 eqArea Old Old = True
355 eqArea (Young id1) (Young id2) = eqBid id1 id2
356 eqArea _ _ = False
357
358 -- Equality on the body of a block, modulo a function mapping block
359 -- IDs to block IDs.
360 eqBlockBodyWith :: DynFlags
361 -> (BlockId -> BlockId -> Bool)
362 -> CmmBlock -> CmmBlock -> Bool
363 eqBlockBodyWith dflags eqBid block block'
364 {-
365 | equal = pprTrace "equal" (vcat [ppr block, ppr block']) True
366 | otherwise = pprTrace "not equal" (vcat [ppr block, ppr block']) False
367 -}
368 = equal
369 where (_,m,l) = blockSplit block
370 nodes = filter (not . dont_care) (blockToList m)
371 (_,m',l') = blockSplit block'
372 nodes' = filter (not . dont_care) (blockToList m')
373
374 (env_mid, eqs_mid) =
375 List.mapAccumL (\acc (a,b) -> eqMiddleWith dflags eqBid acc a b)
376 emptyUFM
377 (List.zip nodes nodes')
378 equal = and eqs_mid && eqLastWith eqBid env_mid l l'
379
380
381 eqLastWith :: (BlockId -> BlockId -> Bool) -> LocalRegMapping
382 -> CmmNode O C -> CmmNode O C -> Bool
383 eqLastWith eqBid env a b =
384 case (a, b) of
385 (CmmBranch bid1, CmmBranch bid2) -> eqBid bid1 bid2
386 (CmmCondBranch c1 t1 f1 l1, CmmCondBranch c2 t2 f2 l2) ->
387 eqExprWith eqBid env c1 c2 && l1 == l2 && eqBid t1 t2 && eqBid f1 f2
388 (CmmCall t1 c1 g1 a1 r1 u1, CmmCall t2 c2 g2 a2 r2 u2) ->
389 t1 == t2
390 && eqMaybeWith eqBid c1 c2
391 && a1 == a2 && r1 == r2 && u1 == u2 && g1 == g2
392 (CmmSwitch e1 ids1, CmmSwitch e2 ids2) ->
393 eqExprWith eqBid env e1 e2 && eqSwitchTargetWith eqBid ids1 ids2
394 -- result registers aren't compared since they are binding occurrences
395 (CmmForeignCall t1 _ a1 s1 ret_args1 ret_off1 intrbl1,
396 CmmForeignCall t2 _ a2 s2 ret_args2 ret_off2 intrbl2) ->
397 t1 == t2
398 && and (zipWith (eqExprWith eqBid env) a1 a2)
399 && s1 == s2
400 && ret_args1 == ret_args2
401 && ret_off1 == ret_off2
402 && intrbl1 == intrbl2
403 _ -> False
404
405 eqMaybeWith :: (a -> b -> Bool) -> Maybe a -> Maybe b -> Bool
406 eqMaybeWith eltEq (Just e) (Just e') = eltEq e e'
407 eqMaybeWith _ Nothing Nothing = True
408 eqMaybeWith _ _ _ = False
409
410 -- | Given a block map, ensure that all "target" blocks are covered by
411 -- the same ticks as the respective "source" blocks. This not only
412 -- means copying ticks, but also adjusting tick scopes where
413 -- necessary.
414 copyTicks :: LabelMap BlockId -> CmmGraph -> CmmGraph
415 copyTicks env g
416 | mapNull env = g
417 | otherwise = ofBlockMap (g_entry g) $ mapMap copyTo blockMap
418 where -- Reverse block merge map
419 blockMap = toBlockMap g
420 revEnv = mapFoldWithKey insertRev M.empty env
421 insertRev k x = M.insertWith (const (k:)) x [k]
422 -- Copy ticks and scopes into the given block
423 copyTo block = case M.lookup (entryLabel block) revEnv of
424 Nothing -> block
425 Just ls -> foldr copy block $ mapMaybe (flip mapLookup blockMap) ls
426 copy from to =
427 let ticks = blockTicks from
428 CmmEntry _ scp0 = firstNode from
429 (CmmEntry lbl scp1, code) = blockSplitHead to
430 in CmmEntry lbl (combineTickScopes scp0 scp1) `blockJoinHead`
431 foldr blockCons code (map CmmTick ticks)
432
433 -- Group by [Label]
434 groupByLabel :: [(Key, a)] -> [(Key, [a])]
435 groupByLabel = go (TM.emptyTM :: TM.ListMap UniqDFM a)
436 where
437 go !m [] = TM.foldTM (:) m []
438 go !m ((k,v) : entries) = go (TM.alterTM k' adjust m) entries
439 where k' = map getUnique k
440 adjust Nothing = Just (k,[v])
441 adjust (Just (_,vs)) = Just (k,v:vs)
442
443
444 groupByInt :: (a -> Int) -> [a] -> [[a]]
445 groupByInt f xs = nonDetEltsUFM $ List.foldl' go emptyUFM xs
446 -- See Note [Unique Determinism and code generation]
447 where go m x = alterUFM (Just . maybe [x] (x:)) m (f x)