add a comment
[ghc.git] / compiler / cmm / CmmOpt.hs
1 {-# LANGUAGE CPP #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Cmm optimisation
6 --
7 -- (c) The University of Glasgow 2006
8 --
9 -----------------------------------------------------------------------------
10
11 module CmmOpt (
12 constantFoldNode,
13 constantFoldExpr,
14 cmmMachOpFold,
15 cmmMachOpFoldM
16 ) where
17
18 #include "HsVersions.h"
19
20 import CmmUtils
21 import Cmm
22 import DynFlags
23
24 import FastTypes
25 import Outputable
26 import Platform
27
28 import Data.Bits
29 import Data.Maybe
30
31
32 constantFoldNode :: DynFlags -> CmmNode e x -> CmmNode e x
33 constantFoldNode dflags = mapExp (constantFoldExpr dflags)
34
35 constantFoldExpr :: DynFlags -> CmmExpr -> CmmExpr
36 constantFoldExpr dflags = wrapRecExp f
37 where f (CmmMachOp op args) = cmmMachOpFold dflags op args
38 f (CmmRegOff r 0) = CmmReg r
39 f e = e
40
41 -- -----------------------------------------------------------------------------
42 -- MachOp constant folder
43
44 -- Now, try to constant-fold the MachOps. The arguments have already
45 -- been optimized and folded.
46
47 cmmMachOpFold
48 :: DynFlags
49 -> MachOp -- The operation from an CmmMachOp
50 -> [CmmExpr] -- The optimized arguments
51 -> CmmExpr
52
53 cmmMachOpFold dflags op args = fromMaybe (CmmMachOp op args) (cmmMachOpFoldM dflags op args)
54
55 -- Returns Nothing if no changes, useful for Hoopl, also reduces
56 -- allocation!
57 cmmMachOpFoldM
58 :: DynFlags
59 -> MachOp
60 -> [CmmExpr]
61 -> Maybe CmmExpr
62
63 cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)]
64 = Just $ case op of
65 MO_S_Neg _ -> CmmLit (CmmInt (-x) rep)
66 MO_Not _ -> CmmLit (CmmInt (complement x) rep)
67
68 -- these are interesting: we must first narrow to the
69 -- "from" type, in order to truncate to the correct size.
70 -- The final narrow/widen to the destination type
71 -- is implicit in the CmmLit.
72 MO_SF_Conv _from to -> CmmLit (CmmFloat (fromInteger x) to)
73 MO_SS_Conv from to -> CmmLit (CmmInt (narrowS from x) to)
74 MO_UU_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
75
76 _ -> panic "cmmMachOpFoldM: unknown unary op"
77
78
79 -- Eliminate conversion NOPs
80 cmmMachOpFoldM _ (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
81 cmmMachOpFoldM _ (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
82
83 -- Eliminate nested conversions where possible
84 cmmMachOpFoldM dflags conv_outer [CmmMachOp conv_inner [x]]
85 | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
86 Just (_, rep3,signed2) <- isIntConversion conv_outer
87 = case () of
88 -- widen then narrow to the same size is a nop
89 _ | rep1 < rep2 && rep1 == rep3 -> Just x
90 -- Widen then narrow to different size: collapse to single conversion
91 -- but remember to use the signedness from the widening, just in case
92 -- the final conversion is a widen.
93 | rep1 < rep2 && rep2 > rep3 ->
94 Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
95 -- Nested widenings: collapse if the signedness is the same
96 | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
97 Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
98 -- Nested narrowings: collapse
99 | rep1 > rep2 && rep2 > rep3 ->
100 Just $ cmmMachOpFold dflags (MO_UU_Conv rep1 rep3) [x]
101 | otherwise ->
102 Nothing
103 where
104 isIntConversion (MO_UU_Conv rep1 rep2)
105 = Just (rep1,rep2,False)
106 isIntConversion (MO_SS_Conv rep1 rep2)
107 = Just (rep1,rep2,True)
108 isIntConversion _ = Nothing
109
110 intconv True = MO_SS_Conv
111 intconv False = MO_UU_Conv
112
113 -- ToDo: a narrow of a load can be collapsed into a narrow load, right?
114 -- but what if the architecture only supports word-sized loads, should
115 -- we do the transformation anyway?
116
117 cmmMachOpFoldM dflags mop [CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
118 = case mop of
119 -- for comparisons: don't forget to narrow the arguments before
120 -- comparing, since they might be out of range.
121 MO_Eq _ -> Just $ CmmLit (CmmInt (if x_u == y_u then 1 else 0) (wordWidth dflags))
122 MO_Ne _ -> Just $ CmmLit (CmmInt (if x_u /= y_u then 1 else 0) (wordWidth dflags))
123
124 MO_U_Gt _ -> Just $ CmmLit (CmmInt (if x_u > y_u then 1 else 0) (wordWidth dflags))
125 MO_U_Ge _ -> Just $ CmmLit (CmmInt (if x_u >= y_u then 1 else 0) (wordWidth dflags))
126 MO_U_Lt _ -> Just $ CmmLit (CmmInt (if x_u < y_u then 1 else 0) (wordWidth dflags))
127 MO_U_Le _ -> Just $ CmmLit (CmmInt (if x_u <= y_u then 1 else 0) (wordWidth dflags))
128
129 MO_S_Gt _ -> Just $ CmmLit (CmmInt (if x_s > y_s then 1 else 0) (wordWidth dflags))
130 MO_S_Ge _ -> Just $ CmmLit (CmmInt (if x_s >= y_s then 1 else 0) (wordWidth dflags))
131 MO_S_Lt _ -> Just $ CmmLit (CmmInt (if x_s < y_s then 1 else 0) (wordWidth dflags))
132 MO_S_Le _ -> Just $ CmmLit (CmmInt (if x_s <= y_s then 1 else 0) (wordWidth dflags))
133
134 MO_Add r -> Just $ CmmLit (CmmInt (x + y) r)
135 MO_Sub r -> Just $ CmmLit (CmmInt (x - y) r)
136 MO_Mul r -> Just $ CmmLit (CmmInt (x * y) r)
137 MO_U_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `quot` y_u) r)
138 MO_U_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `rem` y_u) r)
139 MO_S_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x `quot` y) r)
140 MO_S_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x `rem` y) r)
141
142 MO_And r -> Just $ CmmLit (CmmInt (x .&. y) r)
143 MO_Or r -> Just $ CmmLit (CmmInt (x .|. y) r)
144 MO_Xor r -> Just $ CmmLit (CmmInt (x `xor` y) r)
145
146 MO_Shl r -> Just $ CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
147 MO_U_Shr r -> Just $ CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
148 MO_S_Shr r -> Just $ CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
149
150 _ -> Nothing
151
152 where
153 x_u = narrowU xrep x
154 y_u = narrowU xrep y
155 x_s = narrowS xrep x
156 y_s = narrowS xrep y
157
158
159 -- When possible, shift the constants to the right-hand side, so that we
160 -- can match for strength reductions. Note that the code generator will
161 -- also assume that constants have been shifted to the right when
162 -- possible.
163
164 cmmMachOpFoldM dflags op [x@(CmmLit _), y]
165 | not (isLit y) && isCommutableMachOp op
166 = Just (cmmMachOpFold dflags op [y, x])
167
168 -- Turn (a+b)+c into a+(b+c) where possible. Because literals are
169 -- moved to the right, it is more likely that we will find
170 -- opportunities for constant folding when the expression is
171 -- right-associated.
172 --
173 -- ToDo: this appears to introduce a quadratic behaviour due to the
174 -- nested cmmMachOpFold. Can we fix this?
175 --
176 -- Why do we check isLit arg1? If arg1 is a lit, it means that arg2
177 -- is also a lit (otherwise arg1 would be on the right). If we
178 -- put arg1 on the left of the rearranged expression, we'll get into a
179 -- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ...
180 --
181 -- Also don't do it if arg1 is PicBaseReg, so that we don't separate the
182 -- PicBaseReg from the corresponding label (or label difference).
183 --
184 cmmMachOpFoldM dflags mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
185 | mop2 `associates_with` mop1
186 && not (isLit arg1) && not (isPicReg arg1)
187 = Just (cmmMachOpFold dflags mop2 [arg1, cmmMachOpFold dflags mop1 [arg2,arg3]])
188 where
189 MO_Add{} `associates_with` MO_Sub{} = True
190 mop1 `associates_with` mop2 =
191 mop1 == mop2 && isAssociativeMachOp mop1
192
193 -- special case: (a - b) + c ==> a + (c - b)
194 cmmMachOpFoldM dflags mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3]
195 | not (isLit arg1) && not (isPicReg arg1)
196 = Just (cmmMachOpFold dflags mop1 [arg1, cmmMachOpFold dflags mop2 [arg3,arg2]])
197
198 -- special case: (PicBaseReg + lit) + N ==> PicBaseReg + (lit+N)
199 --
200 -- this is better because lit+N is a single link-time constant (e.g. a
201 -- CmmLabelOff), so the right-hand expression needs only one
202 -- instruction, whereas the left needs two. This happens when pointer
203 -- tagging gives us label+offset, and PIC turns the label into
204 -- PicBaseReg + label.
205 --
206 cmmMachOpFoldM _ MO_Add{} [ CmmMachOp op@MO_Add{} [pic, CmmLit lit]
207 , CmmLit (CmmInt n rep) ]
208 | isPicReg pic
209 = Just $ CmmMachOp op [pic, CmmLit $ cmmOffsetLit lit off ]
210 where off = fromIntegral (narrowS rep n)
211
212 -- Make a RegOff if we can
213 cmmMachOpFoldM _ (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
214 = Just $ cmmRegOff reg (fromIntegral (narrowS rep n))
215 cmmMachOpFoldM _ (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
216 = Just $ cmmRegOff reg (off + fromIntegral (narrowS rep n))
217 cmmMachOpFoldM _ (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
218 = Just $ cmmRegOff reg (- fromIntegral (narrowS rep n))
219 cmmMachOpFoldM _ (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
220 = Just $ cmmRegOff reg (off - fromIntegral (narrowS rep n))
221
222 -- Fold label(+/-)offset into a CmmLit where possible
223
224 cmmMachOpFoldM _ (MO_Add _) [CmmLit lit, CmmLit (CmmInt i rep)]
225 = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
226 cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmInt i rep), CmmLit lit]
227 = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
228 cmmMachOpFoldM _ (MO_Sub _) [CmmLit lit, CmmLit (CmmInt i rep)]
229 = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (negate (narrowU rep i))))
230
231
232 -- Comparison of literal with widened operand: perform the comparison
233 -- at the smaller width, as long as the literal is within range.
234
235 -- We can't do the reverse trick, when the operand is narrowed:
236 -- narrowing throws away bits from the operand, there's no way to do
237 -- the same comparison at the larger size.
238
239 cmmMachOpFoldM dflags cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
240 | -- powerPC NCG has a TODO for I8/I16 comparisons, so don't try
241 platformArch (targetPlatform dflags) `elem` [ArchX86, ArchX86_64],
242 -- if the operand is widened:
243 Just (rep, signed, narrow_fn) <- maybe_conversion conv,
244 -- and this is a comparison operation:
245 Just narrow_cmp <- maybe_comparison cmp rep signed,
246 -- and the literal fits in the smaller size:
247 i == narrow_fn rep i
248 -- then we can do the comparison at the smaller size
249 = Just (cmmMachOpFold dflags narrow_cmp [x, CmmLit (CmmInt i rep)])
250 where
251 maybe_conversion (MO_UU_Conv from to)
252 | to > from
253 = Just (from, False, narrowU)
254 maybe_conversion (MO_SS_Conv from to)
255 | to > from
256 = Just (from, True, narrowS)
257
258 -- don't attempt to apply this optimisation when the source
259 -- is a float; see #1916
260 maybe_conversion _ = Nothing
261
262 -- careful (#2080): if the original comparison was signed, but
263 -- we were doing an unsigned widen, then we must do an
264 -- unsigned comparison at the smaller size.
265 maybe_comparison (MO_U_Gt _) rep _ = Just (MO_U_Gt rep)
266 maybe_comparison (MO_U_Ge _) rep _ = Just (MO_U_Ge rep)
267 maybe_comparison (MO_U_Lt _) rep _ = Just (MO_U_Lt rep)
268 maybe_comparison (MO_U_Le _) rep _ = Just (MO_U_Le rep)
269 maybe_comparison (MO_Eq _) rep _ = Just (MO_Eq rep)
270 maybe_comparison (MO_S_Gt _) rep True = Just (MO_S_Gt rep)
271 maybe_comparison (MO_S_Ge _) rep True = Just (MO_S_Ge rep)
272 maybe_comparison (MO_S_Lt _) rep True = Just (MO_S_Lt rep)
273 maybe_comparison (MO_S_Le _) rep True = Just (MO_S_Le rep)
274 maybe_comparison (MO_S_Gt _) rep False = Just (MO_U_Gt rep)
275 maybe_comparison (MO_S_Ge _) rep False = Just (MO_U_Ge rep)
276 maybe_comparison (MO_S_Lt _) rep False = Just (MO_U_Lt rep)
277 maybe_comparison (MO_S_Le _) rep False = Just (MO_U_Le rep)
278 maybe_comparison _ _ _ = Nothing
279
280 -- We can often do something with constants of 0 and 1 ...
281
282 cmmMachOpFoldM dflags mop [x, y@(CmmLit (CmmInt 0 _))]
283 = case mop of
284 MO_Add _ -> Just x
285 MO_Sub _ -> Just x
286 MO_Mul _ -> Just y
287 MO_And _ -> Just y
288 MO_Or _ -> Just x
289 MO_Xor _ -> Just x
290 MO_Shl _ -> Just x
291 MO_S_Shr _ -> Just x
292 MO_U_Shr _ -> Just x
293 MO_Ne _ | isComparisonExpr x -> Just x
294 MO_Eq _ | Just x' <- maybeInvertCmmExpr x -> Just x'
295 MO_U_Gt _ | isComparisonExpr x -> Just x
296 MO_S_Gt _ | isComparisonExpr x -> Just x
297 MO_U_Lt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
298 MO_S_Lt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
299 MO_U_Ge _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
300 MO_S_Ge _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
301 MO_U_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x'
302 MO_S_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x'
303 _ -> Nothing
304
305 cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt 1 rep))]
306 = case mop of
307 MO_Mul _ -> Just x
308 MO_S_Quot _ -> Just x
309 MO_U_Quot _ -> Just x
310 MO_S_Rem _ -> Just $ CmmLit (CmmInt 0 rep)
311 MO_U_Rem _ -> Just $ CmmLit (CmmInt 0 rep)
312 MO_Ne _ | Just x' <- maybeInvertCmmExpr x -> Just x'
313 MO_Eq _ | isComparisonExpr x -> Just x
314 MO_U_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x'
315 MO_S_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x'
316 MO_U_Gt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
317 MO_S_Gt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
318 MO_U_Le _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
319 MO_S_Le _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
320 MO_U_Ge _ | isComparisonExpr x -> Just x
321 MO_S_Ge _ | isComparisonExpr x -> Just x
322 _ -> Nothing
323
324 -- Now look for multiplication/division by powers of 2 (integers).
325
326 cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))]
327 = case mop of
328 MO_Mul rep
329 | Just p <- exactLog2 n ->
330 Just (cmmMachOpFold dflags (MO_Shl rep) [x, CmmLit (CmmInt p rep)])
331 MO_U_Quot rep
332 | Just p <- exactLog2 n ->
333 Just (cmmMachOpFold dflags (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)])
334 MO_S_Quot rep
335 | Just p <- exactLog2 n,
336 CmmReg _ <- x -> -- We duplicate x below, hence require
337 -- it is a reg. FIXME: remove this restriction.
338 -- shift right is not the same as quot, because it rounds
339 -- to minus infinity, whereasq quot rounds toward zero.
340 -- To fix this up, we add one less than the divisor to the
341 -- dividend if it is a negative number.
342 --
343 -- to avoid a test/jump, we use the following sequence:
344 -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve)
345 -- x2 = y & (divisor-1)
346 -- result = (x+x2) >>= log2(divisor)
347 -- this could be done a bit more simply using conditional moves,
348 -- but we're processor independent here.
349 --
350 -- we optimise the divide by 2 case slightly, generating
351 -- x1 = x >> word_size-1 (unsigned)
352 -- return = (x + x1) >>= log2(divisor)
353 let
354 bits = fromIntegral (widthInBits rep) - 1
355 shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
356 x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
357 x2 = if p == 1 then x1 else
358 CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
359 x3 = CmmMachOp (MO_Add rep) [x, x2]
360 in
361 Just (cmmMachOpFold dflags (MO_S_Shr rep) [x3, CmmLit (CmmInt p rep)])
362 _ -> Nothing
363
364 -- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x
365 -- Unfortunately this needs a unique supply because x might not be a
366 -- register. See #2253 (program 6) for an example.
367
368
369 -- Anything else is just too hard.
370
371 cmmMachOpFoldM _ _ _ = Nothing
372
373 -- -----------------------------------------------------------------------------
374 -- exactLog2
375
376 -- This algorithm for determining the $\log_2$ of exact powers of 2 comes
377 -- from GCC. It requires bit manipulation primitives, and we use GHC
378 -- extensions. Tough.
379 --
380 -- Used to be in MachInstrs --SDM.
381 -- ToDo: remove use of unboxery --SDM.
382
383 -- Unboxery removed in favor of FastInt; but is the function supposed to fail
384 -- on inputs >= 2147483648, or was that just an implementation artifact?
385 -- And is this speed-critical, or can we just use Integer operations
386 -- (including Data.Bits)?
387 -- --Isaac Dupree
388
389 exactLog2 :: Integer -> Maybe Integer
390 exactLog2 x_
391 = if (x_ <= 0 || x_ >= 2147483648) then
392 Nothing
393 else
394 case iUnbox (fromInteger x_) of { x ->
395 if (x `bitAndFastInt` negateFastInt x) /=# x then
396 Nothing
397 else
398 Just (toInteger (iBox (pow2 x)))
399 }
400 where
401 pow2 x | x ==# _ILIT(1) = _ILIT(0)
402 | otherwise = _ILIT(1) +# pow2 (x `shiftR_FastInt` _ILIT(1))
403
404 -- -----------------------------------------------------------------------------
405 -- Utils
406
407 isLit :: CmmExpr -> Bool
408 isLit (CmmLit _) = True
409 isLit _ = False
410
411 isComparisonExpr :: CmmExpr -> Bool
412 isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
413 isComparisonExpr _ = False
414
415 isPicReg :: CmmExpr -> Bool
416 isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True
417 isPicReg _ = False