Fix #11407.
[ghc.git] / compiler / cmm / CmmOpt.hs
1 {-# LANGUAGE CPP #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Cmm optimisation
6 --
7 -- (c) The University of Glasgow 2006
8 --
9 -----------------------------------------------------------------------------
10
11 module CmmOpt (
12 constantFoldNode,
13 constantFoldExpr,
14 cmmMachOpFold,
15 cmmMachOpFoldM
16 ) where
17
18 #include "HsVersions.h"
19
20 import CmmUtils
21 import Cmm
22 import DynFlags
23
24 import Outputable
25 import Platform
26
27 import Data.Bits
28 import Data.Maybe
29
30
31 constantFoldNode :: DynFlags -> CmmNode e x -> CmmNode e x
32 constantFoldNode dflags = mapExp (constantFoldExpr dflags)
33
34 constantFoldExpr :: DynFlags -> CmmExpr -> CmmExpr
35 constantFoldExpr dflags = wrapRecExp f
36 where f (CmmMachOp op args) = cmmMachOpFold dflags op args
37 f (CmmRegOff r 0) = CmmReg r
38 f e = e
39
40 -- -----------------------------------------------------------------------------
41 -- MachOp constant folder
42
43 -- Now, try to constant-fold the MachOps. The arguments have already
44 -- been optimized and folded.
45
46 cmmMachOpFold
47 :: DynFlags
48 -> MachOp -- The operation from an CmmMachOp
49 -> [CmmExpr] -- The optimized arguments
50 -> CmmExpr
51
52 cmmMachOpFold dflags op args = fromMaybe (CmmMachOp op args) (cmmMachOpFoldM dflags op args)
53
54 -- Returns Nothing if no changes, useful for Hoopl, also reduces
55 -- allocation!
56 cmmMachOpFoldM
57 :: DynFlags
58 -> MachOp
59 -> [CmmExpr]
60 -> Maybe CmmExpr
61
62 cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)]
63 = Just $ case op of
64 MO_S_Neg _ -> CmmLit (CmmInt (-x) rep)
65 MO_Not _ -> CmmLit (CmmInt (complement x) rep)
66
67 -- these are interesting: we must first narrow to the
68 -- "from" type, in order to truncate to the correct size.
69 -- The final narrow/widen to the destination type
70 -- is implicit in the CmmLit.
71 MO_SF_Conv _from to -> CmmLit (CmmFloat (fromInteger x) to)
72 MO_SS_Conv from to -> CmmLit (CmmInt (narrowS from x) to)
73 MO_UU_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
74
75 _ -> panic "cmmMachOpFoldM: unknown unary op"
76
77
78 -- Eliminate conversion NOPs
79 cmmMachOpFoldM _ (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
80 cmmMachOpFoldM _ (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
81
82 -- Eliminate nested conversions where possible
83 cmmMachOpFoldM dflags conv_outer [CmmMachOp conv_inner [x]]
84 | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
85 Just (_, rep3,signed2) <- isIntConversion conv_outer
86 = case () of
87 -- widen then narrow to the same size is a nop
88 _ | rep1 < rep2 && rep1 == rep3 -> Just x
89 -- Widen then narrow to different size: collapse to single conversion
90 -- but remember to use the signedness from the widening, just in case
91 -- the final conversion is a widen.
92 | rep1 < rep2 && rep2 > rep3 ->
93 Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
94 -- Nested widenings: collapse if the signedness is the same
95 | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
96 Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
97 -- Nested narrowings: collapse
98 | rep1 > rep2 && rep2 > rep3 ->
99 Just $ cmmMachOpFold dflags (MO_UU_Conv rep1 rep3) [x]
100 | otherwise ->
101 Nothing
102 where
103 isIntConversion (MO_UU_Conv rep1 rep2)
104 = Just (rep1,rep2,False)
105 isIntConversion (MO_SS_Conv rep1 rep2)
106 = Just (rep1,rep2,True)
107 isIntConversion _ = Nothing
108
109 intconv True = MO_SS_Conv
110 intconv False = MO_UU_Conv
111
112 -- ToDo: a narrow of a load can be collapsed into a narrow load, right?
113 -- but what if the architecture only supports word-sized loads, should
114 -- we do the transformation anyway?
115
116 cmmMachOpFoldM dflags mop [CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
117 = case mop of
118 -- for comparisons: don't forget to narrow the arguments before
119 -- comparing, since they might be out of range.
120 MO_Eq _ -> Just $ CmmLit (CmmInt (if x_u == y_u then 1 else 0) (wordWidth dflags))
121 MO_Ne _ -> Just $ CmmLit (CmmInt (if x_u /= y_u then 1 else 0) (wordWidth dflags))
122
123 MO_U_Gt _ -> Just $ CmmLit (CmmInt (if x_u > y_u then 1 else 0) (wordWidth dflags))
124 MO_U_Ge _ -> Just $ CmmLit (CmmInt (if x_u >= y_u then 1 else 0) (wordWidth dflags))
125 MO_U_Lt _ -> Just $ CmmLit (CmmInt (if x_u < y_u then 1 else 0) (wordWidth dflags))
126 MO_U_Le _ -> Just $ CmmLit (CmmInt (if x_u <= y_u then 1 else 0) (wordWidth dflags))
127
128 MO_S_Gt _ -> Just $ CmmLit (CmmInt (if x_s > y_s then 1 else 0) (wordWidth dflags))
129 MO_S_Ge _ -> Just $ CmmLit (CmmInt (if x_s >= y_s then 1 else 0) (wordWidth dflags))
130 MO_S_Lt _ -> Just $ CmmLit (CmmInt (if x_s < y_s then 1 else 0) (wordWidth dflags))
131 MO_S_Le _ -> Just $ CmmLit (CmmInt (if x_s <= y_s then 1 else 0) (wordWidth dflags))
132
133 MO_Add r -> Just $ CmmLit (CmmInt (x + y) r)
134 MO_Sub r -> Just $ CmmLit (CmmInt (x - y) r)
135 MO_Mul r -> Just $ CmmLit (CmmInt (x * y) r)
136 MO_U_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `quot` y_u) r)
137 MO_U_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `rem` y_u) r)
138 MO_S_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x `quot` y) r)
139 MO_S_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x `rem` y) r)
140
141 MO_And r -> Just $ CmmLit (CmmInt (x .&. y) r)
142 MO_Or r -> Just $ CmmLit (CmmInt (x .|. y) r)
143 MO_Xor r -> Just $ CmmLit (CmmInt (x `xor` y) r)
144
145 MO_Shl r -> Just $ CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
146 MO_U_Shr r -> Just $ CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
147 MO_S_Shr r -> Just $ CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
148
149 _ -> Nothing
150
151 where
152 x_u = narrowU xrep x
153 y_u = narrowU xrep y
154 x_s = narrowS xrep x
155 y_s = narrowS xrep y
156
157
158 -- When possible, shift the constants to the right-hand side, so that we
159 -- can match for strength reductions. Note that the code generator will
160 -- also assume that constants have been shifted to the right when
161 -- possible.
162
163 cmmMachOpFoldM dflags op [x@(CmmLit _), y]
164 | not (isLit y) && isCommutableMachOp op
165 = Just (cmmMachOpFold dflags op [y, x])
166
167 -- Turn (a+b)+c into a+(b+c) where possible. Because literals are
168 -- moved to the right, it is more likely that we will find
169 -- opportunities for constant folding when the expression is
170 -- right-associated.
171 --
172 -- ToDo: this appears to introduce a quadratic behaviour due to the
173 -- nested cmmMachOpFold. Can we fix this?
174 --
175 -- Why do we check isLit arg1? If arg1 is a lit, it means that arg2
176 -- is also a lit (otherwise arg1 would be on the right). If we
177 -- put arg1 on the left of the rearranged expression, we'll get into a
178 -- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ...
179 --
180 -- Also don't do it if arg1 is PicBaseReg, so that we don't separate the
181 -- PicBaseReg from the corresponding label (or label difference).
182 --
183 cmmMachOpFoldM dflags mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
184 | mop2 `associates_with` mop1
185 && not (isLit arg1) && not (isPicReg arg1)
186 = Just (cmmMachOpFold dflags mop2 [arg1, cmmMachOpFold dflags mop1 [arg2,arg3]])
187 where
188 MO_Add{} `associates_with` MO_Sub{} = True
189 mop1 `associates_with` mop2 =
190 mop1 == mop2 && isAssociativeMachOp mop1
191
192 -- special case: (a - b) + c ==> a + (c - b)
193 cmmMachOpFoldM dflags mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3]
194 | not (isLit arg1) && not (isPicReg arg1)
195 = Just (cmmMachOpFold dflags mop1 [arg1, cmmMachOpFold dflags mop2 [arg3,arg2]])
196
197 -- special case: (PicBaseReg + lit) + N ==> PicBaseReg + (lit+N)
198 --
199 -- this is better because lit+N is a single link-time constant (e.g. a
200 -- CmmLabelOff), so the right-hand expression needs only one
201 -- instruction, whereas the left needs two. This happens when pointer
202 -- tagging gives us label+offset, and PIC turns the label into
203 -- PicBaseReg + label.
204 --
205 cmmMachOpFoldM _ MO_Add{} [ CmmMachOp op@MO_Add{} [pic, CmmLit lit]
206 , CmmLit (CmmInt n rep) ]
207 | isPicReg pic
208 = Just $ CmmMachOp op [pic, CmmLit $ cmmOffsetLit lit off ]
209 where off = fromIntegral (narrowS rep n)
210
211 -- Make a RegOff if we can
212 cmmMachOpFoldM _ (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
213 = Just $ cmmRegOff reg (fromIntegral (narrowS rep n))
214 cmmMachOpFoldM _ (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
215 = Just $ cmmRegOff reg (off + fromIntegral (narrowS rep n))
216 cmmMachOpFoldM _ (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
217 = Just $ cmmRegOff reg (- fromIntegral (narrowS rep n))
218 cmmMachOpFoldM _ (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
219 = Just $ cmmRegOff reg (off - fromIntegral (narrowS rep n))
220
221 -- Fold label(+/-)offset into a CmmLit where possible
222
223 cmmMachOpFoldM _ (MO_Add _) [CmmLit lit, CmmLit (CmmInt i rep)]
224 = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
225 cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmInt i rep), CmmLit lit]
226 = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
227 cmmMachOpFoldM _ (MO_Sub _) [CmmLit lit, CmmLit (CmmInt i rep)]
228 = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (negate (narrowU rep i))))
229
230
231 -- Comparison of literal with widened operand: perform the comparison
232 -- at the smaller width, as long as the literal is within range.
233
234 -- We can't do the reverse trick, when the operand is narrowed:
235 -- narrowing throws away bits from the operand, there's no way to do
236 -- the same comparison at the larger size.
237
238 cmmMachOpFoldM dflags cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
239 | -- powerPC NCG has a TODO for I8/I16 comparisons, so don't try
240 platformArch (targetPlatform dflags) `elem` [ArchX86, ArchX86_64],
241 -- if the operand is widened:
242 Just (rep, signed, narrow_fn) <- maybe_conversion conv,
243 -- and this is a comparison operation:
244 Just narrow_cmp <- maybe_comparison cmp rep signed,
245 -- and the literal fits in the smaller size:
246 i == narrow_fn rep i
247 -- then we can do the comparison at the smaller size
248 = Just (cmmMachOpFold dflags narrow_cmp [x, CmmLit (CmmInt i rep)])
249 where
250 maybe_conversion (MO_UU_Conv from to)
251 | to > from
252 = Just (from, False, narrowU)
253 maybe_conversion (MO_SS_Conv from to)
254 | to > from
255 = Just (from, True, narrowS)
256
257 -- don't attempt to apply this optimisation when the source
258 -- is a float; see #1916
259 maybe_conversion _ = Nothing
260
261 -- careful (#2080): if the original comparison was signed, but
262 -- we were doing an unsigned widen, then we must do an
263 -- unsigned comparison at the smaller size.
264 maybe_comparison (MO_U_Gt _) rep _ = Just (MO_U_Gt rep)
265 maybe_comparison (MO_U_Ge _) rep _ = Just (MO_U_Ge rep)
266 maybe_comparison (MO_U_Lt _) rep _ = Just (MO_U_Lt rep)
267 maybe_comparison (MO_U_Le _) rep _ = Just (MO_U_Le rep)
268 maybe_comparison (MO_Eq _) rep _ = Just (MO_Eq rep)
269 maybe_comparison (MO_S_Gt _) rep True = Just (MO_S_Gt rep)
270 maybe_comparison (MO_S_Ge _) rep True = Just (MO_S_Ge rep)
271 maybe_comparison (MO_S_Lt _) rep True = Just (MO_S_Lt rep)
272 maybe_comparison (MO_S_Le _) rep True = Just (MO_S_Le rep)
273 maybe_comparison (MO_S_Gt _) rep False = Just (MO_U_Gt rep)
274 maybe_comparison (MO_S_Ge _) rep False = Just (MO_U_Ge rep)
275 maybe_comparison (MO_S_Lt _) rep False = Just (MO_U_Lt rep)
276 maybe_comparison (MO_S_Le _) rep False = Just (MO_U_Le rep)
277 maybe_comparison _ _ _ = Nothing
278
279 -- We can often do something with constants of 0 and 1 ...
280
281 cmmMachOpFoldM dflags mop [x, y@(CmmLit (CmmInt 0 _))]
282 = case mop of
283 MO_Add _ -> Just x
284 MO_Sub _ -> Just x
285 MO_Mul _ -> Just y
286 MO_And _ -> Just y
287 MO_Or _ -> Just x
288 MO_Xor _ -> Just x
289 MO_Shl _ -> Just x
290 MO_S_Shr _ -> Just x
291 MO_U_Shr _ -> Just x
292 MO_Ne _ | isComparisonExpr x -> Just x
293 MO_Eq _ | Just x' <- maybeInvertCmmExpr x -> Just x'
294 MO_U_Gt _ | isComparisonExpr x -> Just x
295 MO_S_Gt _ | isComparisonExpr x -> Just x
296 MO_U_Lt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
297 MO_S_Lt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
298 MO_U_Ge _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
299 MO_S_Ge _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
300 MO_U_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x'
301 MO_S_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x'
302 _ -> Nothing
303
304 cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt 1 rep))]
305 = case mop of
306 MO_Mul _ -> Just x
307 MO_S_Quot _ -> Just x
308 MO_U_Quot _ -> Just x
309 MO_S_Rem _ -> Just $ CmmLit (CmmInt 0 rep)
310 MO_U_Rem _ -> Just $ CmmLit (CmmInt 0 rep)
311 MO_Ne _ | Just x' <- maybeInvertCmmExpr x -> Just x'
312 MO_Eq _ | isComparisonExpr x -> Just x
313 MO_U_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x'
314 MO_S_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x'
315 MO_U_Gt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
316 MO_S_Gt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
317 MO_U_Le _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
318 MO_S_Le _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
319 MO_U_Ge _ | isComparisonExpr x -> Just x
320 MO_S_Ge _ | isComparisonExpr x -> Just x
321 _ -> Nothing
322
323 -- Now look for multiplication/division by powers of 2 (integers).
324
325 cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))]
326 = case mop of
327 MO_Mul rep
328 | Just p <- exactLog2 n ->
329 Just (cmmMachOpFold dflags (MO_Shl rep) [x, CmmLit (CmmInt p rep)])
330 MO_U_Quot rep
331 | Just p <- exactLog2 n ->
332 Just (cmmMachOpFold dflags (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)])
333 MO_S_Quot rep
334 | Just p <- exactLog2 n,
335 CmmReg _ <- x -> -- We duplicate x below, hence require
336 -- it is a reg. FIXME: remove this restriction.
337 -- shift right is not the same as quot, because it rounds
338 -- to minus infinity, whereasq quot rounds toward zero.
339 -- To fix this up, we add one less than the divisor to the
340 -- dividend if it is a negative number.
341 --
342 -- to avoid a test/jump, we use the following sequence:
343 -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve)
344 -- x2 = y & (divisor-1)
345 -- result = (x+x2) >>= log2(divisor)
346 -- this could be done a bit more simply using conditional moves,
347 -- but we're processor independent here.
348 --
349 -- we optimise the divide by 2 case slightly, generating
350 -- x1 = x >> word_size-1 (unsigned)
351 -- return = (x + x1) >>= log2(divisor)
352 let
353 bits = fromIntegral (widthInBits rep) - 1
354 shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
355 x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
356 x2 = if p == 1 then x1 else
357 CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
358 x3 = CmmMachOp (MO_Add rep) [x, x2]
359 in
360 Just (cmmMachOpFold dflags (MO_S_Shr rep) [x3, CmmLit (CmmInt p rep)])
361 _ -> Nothing
362
363 -- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x
364 -- Unfortunately this needs a unique supply because x might not be a
365 -- register. See #2253 (program 6) for an example.
366
367
368 -- Anything else is just too hard.
369
370 cmmMachOpFoldM _ _ _ = Nothing
371
372 -- -----------------------------------------------------------------------------
373 -- exactLog2
374
375 -- This algorithm for determining the $\log_2$ of exact powers of 2 comes
376 -- from GCC. It requires bit manipulation primitives, and we use GHC
377 -- extensions. Tough.
378
379 exactLog2 :: Integer -> Maybe Integer
380 exactLog2 x
381 = if (x <= 0 || x >= 2147483648) then
382 Nothing
383 else
384 if (x .&. (-x)) /= x then
385 Nothing
386 else
387 Just (pow2 x)
388 where
389 pow2 x | x == 1 = 0
390 | otherwise = 1 + pow2 (x `shiftR` 1)
391
392 -- -----------------------------------------------------------------------------
393 -- Utils
394
395 isLit :: CmmExpr -> Bool
396 isLit (CmmLit _) = True
397 isLit _ = False
398
399 isComparisonExpr :: CmmExpr -> Bool
400 isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
401 isComparisonExpr _ = False
402
403 isPicReg :: CmmExpr -> Bool
404 isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True
405 isPicReg _ = False