CLabel: Refactor pprDynamicLinkerAsmLabel
[ghc.git] / compiler / cmm / CmmOpt.hs
1 {-# LANGUAGE CPP #-}
2
3 -- The default iteration limit is a bit too low for the definitions
4 -- in this module.
5 {-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-}
6
7 -----------------------------------------------------------------------------
8 --
9 -- Cmm optimisation
10 --
11 -- (c) The University of Glasgow 2006
12 --
13 -----------------------------------------------------------------------------
14
15 module CmmOpt (
16 constantFoldNode,
17 constantFoldExpr,
18 cmmMachOpFold,
19 cmmMachOpFoldM
20 ) where
21
22 #include "HsVersions.h"
23
24 import GhcPrelude
25
26 import CmmUtils
27 import Cmm
28 import DynFlags
29 import Util
30
31 import Outputable
32 import Platform
33
34 import Data.Bits
35 import Data.Maybe
36
37
38 constantFoldNode :: DynFlags -> CmmNode e x -> CmmNode e x
39 constantFoldNode dflags = mapExp (constantFoldExpr dflags)
40
41 constantFoldExpr :: DynFlags -> CmmExpr -> CmmExpr
42 constantFoldExpr dflags = wrapRecExp f
43 where f (CmmMachOp op args) = cmmMachOpFold dflags op args
44 f (CmmRegOff r 0) = CmmReg r
45 f e = e
46
47 -- -----------------------------------------------------------------------------
48 -- MachOp constant folder
49
50 -- Now, try to constant-fold the MachOps. The arguments have already
51 -- been optimized and folded.
52
53 cmmMachOpFold
54 :: DynFlags
55 -> MachOp -- The operation from an CmmMachOp
56 -> [CmmExpr] -- The optimized arguments
57 -> CmmExpr
58
59 cmmMachOpFold dflags op args = fromMaybe (CmmMachOp op args) (cmmMachOpFoldM dflags op args)
60
61 -- Returns Nothing if no changes, useful for Hoopl, also reduces
62 -- allocation!
63 cmmMachOpFoldM
64 :: DynFlags
65 -> MachOp
66 -> [CmmExpr]
67 -> Maybe CmmExpr
68
69 cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)]
70 = Just $ case op of
71 MO_S_Neg _ -> CmmLit (CmmInt (-x) rep)
72 MO_Not _ -> CmmLit (CmmInt (complement x) rep)
73
74 -- these are interesting: we must first narrow to the
75 -- "from" type, in order to truncate to the correct size.
76 -- The final narrow/widen to the destination type
77 -- is implicit in the CmmLit.
78 MO_SF_Conv _from to -> CmmLit (CmmFloat (fromInteger x) to)
79 MO_SS_Conv from to -> CmmLit (CmmInt (narrowS from x) to)
80 MO_UU_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
81
82 _ -> panic $ "cmmMachOpFoldM: unknown unary op: " ++ show op
83
84
85 -- Eliminate conversion NOPs
86 cmmMachOpFoldM _ (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
87 cmmMachOpFoldM _ (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
88
89 -- Eliminate nested conversions where possible
90 cmmMachOpFoldM dflags conv_outer [CmmMachOp conv_inner [x]]
91 | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
92 Just (_, rep3,signed2) <- isIntConversion conv_outer
93 = case () of
94 -- widen then narrow to the same size is a nop
95 _ | rep1 < rep2 && rep1 == rep3 -> Just x
96 -- Widen then narrow to different size: collapse to single conversion
97 -- but remember to use the signedness from the widening, just in case
98 -- the final conversion is a widen.
99 | rep1 < rep2 && rep2 > rep3 ->
100 Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
101 -- Nested widenings: collapse if the signedness is the same
102 | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
103 Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
104 -- Nested narrowings: collapse
105 | rep1 > rep2 && rep2 > rep3 ->
106 Just $ cmmMachOpFold dflags (MO_UU_Conv rep1 rep3) [x]
107 | otherwise ->
108 Nothing
109 where
110 isIntConversion (MO_UU_Conv rep1 rep2)
111 = Just (rep1,rep2,False)
112 isIntConversion (MO_SS_Conv rep1 rep2)
113 = Just (rep1,rep2,True)
114 isIntConversion _ = Nothing
115
116 intconv True = MO_SS_Conv
117 intconv False = MO_UU_Conv
118
119 -- ToDo: a narrow of a load can be collapsed into a narrow load, right?
120 -- but what if the architecture only supports word-sized loads, should
121 -- we do the transformation anyway?
122
123 cmmMachOpFoldM dflags mop [CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
124 = case mop of
125 -- for comparisons: don't forget to narrow the arguments before
126 -- comparing, since they might be out of range.
127 MO_Eq _ -> Just $ CmmLit (CmmInt (if x_u == y_u then 1 else 0) (wordWidth dflags))
128 MO_Ne _ -> Just $ CmmLit (CmmInt (if x_u /= y_u then 1 else 0) (wordWidth dflags))
129
130 MO_U_Gt _ -> Just $ CmmLit (CmmInt (if x_u > y_u then 1 else 0) (wordWidth dflags))
131 MO_U_Ge _ -> Just $ CmmLit (CmmInt (if x_u >= y_u then 1 else 0) (wordWidth dflags))
132 MO_U_Lt _ -> Just $ CmmLit (CmmInt (if x_u < y_u then 1 else 0) (wordWidth dflags))
133 MO_U_Le _ -> Just $ CmmLit (CmmInt (if x_u <= y_u then 1 else 0) (wordWidth dflags))
134
135 MO_S_Gt _ -> Just $ CmmLit (CmmInt (if x_s > y_s then 1 else 0) (wordWidth dflags))
136 MO_S_Ge _ -> Just $ CmmLit (CmmInt (if x_s >= y_s then 1 else 0) (wordWidth dflags))
137 MO_S_Lt _ -> Just $ CmmLit (CmmInt (if x_s < y_s then 1 else 0) (wordWidth dflags))
138 MO_S_Le _ -> Just $ CmmLit (CmmInt (if x_s <= y_s then 1 else 0) (wordWidth dflags))
139
140 MO_Add r -> Just $ CmmLit (CmmInt (x + y) r)
141 MO_Sub r -> Just $ CmmLit (CmmInt (x - y) r)
142 MO_Mul r -> Just $ CmmLit (CmmInt (x * y) r)
143 MO_U_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `quot` y_u) r)
144 MO_U_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `rem` y_u) r)
145 MO_S_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x `quot` y) r)
146 MO_S_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x `rem` y) r)
147
148 MO_And r -> Just $ CmmLit (CmmInt (x .&. y) r)
149 MO_Or r -> Just $ CmmLit (CmmInt (x .|. y) r)
150 MO_Xor r -> Just $ CmmLit (CmmInt (x `xor` y) r)
151
152 MO_Shl r -> Just $ CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
153 MO_U_Shr r -> Just $ CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
154 MO_S_Shr r -> Just $ CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
155
156 _ -> Nothing
157
158 where
159 x_u = narrowU xrep x
160 y_u = narrowU xrep y
161 x_s = narrowS xrep x
162 y_s = narrowS xrep y
163
164
165 -- When possible, shift the constants to the right-hand side, so that we
166 -- can match for strength reductions. Note that the code generator will
167 -- also assume that constants have been shifted to the right when
168 -- possible.
169
170 cmmMachOpFoldM dflags op [x@(CmmLit _), y]
171 | not (isLit y) && isCommutableMachOp op
172 = Just (cmmMachOpFold dflags op [y, x])
173
174 -- Turn (a+b)+c into a+(b+c) where possible. Because literals are
175 -- moved to the right, it is more likely that we will find
176 -- opportunities for constant folding when the expression is
177 -- right-associated.
178 --
179 -- ToDo: this appears to introduce a quadratic behaviour due to the
180 -- nested cmmMachOpFold. Can we fix this?
181 --
182 -- Why do we check isLit arg1? If arg1 is a lit, it means that arg2
183 -- is also a lit (otherwise arg1 would be on the right). If we
184 -- put arg1 on the left of the rearranged expression, we'll get into a
185 -- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ...
186 --
187 -- Also don't do it if arg1 is PicBaseReg, so that we don't separate the
188 -- PicBaseReg from the corresponding label (or label difference).
189 --
190 cmmMachOpFoldM dflags mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
191 | mop2 `associates_with` mop1
192 && not (isLit arg1) && not (isPicReg arg1)
193 = Just (cmmMachOpFold dflags mop2 [arg1, cmmMachOpFold dflags mop1 [arg2,arg3]])
194 where
195 MO_Add{} `associates_with` MO_Sub{} = True
196 mop1 `associates_with` mop2 =
197 mop1 == mop2 && isAssociativeMachOp mop1
198
199 -- special case: (a - b) + c ==> a + (c - b)
200 cmmMachOpFoldM dflags mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3]
201 | not (isLit arg1) && not (isPicReg arg1)
202 = Just (cmmMachOpFold dflags mop1 [arg1, cmmMachOpFold dflags mop2 [arg3,arg2]])
203
204 -- special case: (PicBaseReg + lit) + N ==> PicBaseReg + (lit+N)
205 --
206 -- this is better because lit+N is a single link-time constant (e.g. a
207 -- CmmLabelOff), so the right-hand expression needs only one
208 -- instruction, whereas the left needs two. This happens when pointer
209 -- tagging gives us label+offset, and PIC turns the label into
210 -- PicBaseReg + label.
211 --
212 cmmMachOpFoldM _ MO_Add{} [ CmmMachOp op@MO_Add{} [pic, CmmLit lit]
213 , CmmLit (CmmInt n rep) ]
214 | isPicReg pic
215 = Just $ CmmMachOp op [pic, CmmLit $ cmmOffsetLit lit off ]
216 where off = fromIntegral (narrowS rep n)
217
218 -- Make a RegOff if we can
219 cmmMachOpFoldM _ (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
220 = Just $ cmmRegOff reg (fromIntegral (narrowS rep n))
221 cmmMachOpFoldM _ (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
222 = Just $ cmmRegOff reg (off + fromIntegral (narrowS rep n))
223 cmmMachOpFoldM _ (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
224 = Just $ cmmRegOff reg (- fromIntegral (narrowS rep n))
225 cmmMachOpFoldM _ (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
226 = Just $ cmmRegOff reg (off - fromIntegral (narrowS rep n))
227
228 -- Fold label(+/-)offset into a CmmLit where possible
229
230 cmmMachOpFoldM _ (MO_Add _) [CmmLit lit, CmmLit (CmmInt i rep)]
231 = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
232 cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmInt i rep), CmmLit lit]
233 = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
234 cmmMachOpFoldM _ (MO_Sub _) [CmmLit lit, CmmLit (CmmInt i rep)]
235 = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (negate (narrowU rep i))))
236
237
238 -- Comparison of literal with widened operand: perform the comparison
239 -- at the smaller width, as long as the literal is within range.
240
241 -- We can't do the reverse trick, when the operand is narrowed:
242 -- narrowing throws away bits from the operand, there's no way to do
243 -- the same comparison at the larger size.
244
245 cmmMachOpFoldM dflags cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
246 | -- powerPC NCG has a TODO for I8/I16 comparisons, so don't try
247 platformArch (targetPlatform dflags) `elem` [ArchX86, ArchX86_64],
248 -- if the operand is widened:
249 Just (rep, signed, narrow_fn) <- maybe_conversion conv,
250 -- and this is a comparison operation:
251 Just narrow_cmp <- maybe_comparison cmp rep signed,
252 -- and the literal fits in the smaller size:
253 i == narrow_fn rep i
254 -- then we can do the comparison at the smaller size
255 = Just (cmmMachOpFold dflags narrow_cmp [x, CmmLit (CmmInt i rep)])
256 where
257 maybe_conversion (MO_UU_Conv from to)
258 | to > from
259 = Just (from, False, narrowU)
260 maybe_conversion (MO_SS_Conv from to)
261 | to > from
262 = Just (from, True, narrowS)
263
264 -- don't attempt to apply this optimisation when the source
265 -- is a float; see #1916
266 maybe_conversion _ = Nothing
267
268 -- careful (#2080): if the original comparison was signed, but
269 -- we were doing an unsigned widen, then we must do an
270 -- unsigned comparison at the smaller size.
271 maybe_comparison (MO_U_Gt _) rep _ = Just (MO_U_Gt rep)
272 maybe_comparison (MO_U_Ge _) rep _ = Just (MO_U_Ge rep)
273 maybe_comparison (MO_U_Lt _) rep _ = Just (MO_U_Lt rep)
274 maybe_comparison (MO_U_Le _) rep _ = Just (MO_U_Le rep)
275 maybe_comparison (MO_Eq _) rep _ = Just (MO_Eq rep)
276 maybe_comparison (MO_S_Gt _) rep True = Just (MO_S_Gt rep)
277 maybe_comparison (MO_S_Ge _) rep True = Just (MO_S_Ge rep)
278 maybe_comparison (MO_S_Lt _) rep True = Just (MO_S_Lt rep)
279 maybe_comparison (MO_S_Le _) rep True = Just (MO_S_Le rep)
280 maybe_comparison (MO_S_Gt _) rep False = Just (MO_U_Gt rep)
281 maybe_comparison (MO_S_Ge _) rep False = Just (MO_U_Ge rep)
282 maybe_comparison (MO_S_Lt _) rep False = Just (MO_U_Lt rep)
283 maybe_comparison (MO_S_Le _) rep False = Just (MO_U_Le rep)
284 maybe_comparison _ _ _ = Nothing
285
286 -- We can often do something with constants of 0 and 1 ...
287 -- See Note [Comparison operators]
288
289 cmmMachOpFoldM dflags mop [x, y@(CmmLit (CmmInt 0 _))]
290 = case mop of
291 -- Arithmetic
292 MO_Add _ -> Just x -- x + 0 = x
293 MO_Sub _ -> Just x -- x - 0 = x
294 MO_Mul _ -> Just y -- x * 0 = 0
295
296 -- Logical operations
297 MO_And _ -> Just y -- x & 0 = 0
298 MO_Or _ -> Just x -- x | 0 = x
299 MO_Xor _ -> Just x -- x `xor` 0 = x
300
301 -- Shifts
302 MO_Shl _ -> Just x -- x << 0 = x
303 MO_S_Shr _ -> Just x -- ditto shift-right
304 MO_U_Shr _ -> Just x
305
306 -- Comparisons; these ones are trickier
307 -- See Note [Comparison operators]
308 MO_Ne _ | isComparisonExpr x -> Just x -- (x > y) != 0 = x > y
309 MO_Eq _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x > y) == 0 = x <= y
310 MO_U_Gt _ | isComparisonExpr x -> Just x -- (x > y) > 0 = x > y
311 MO_S_Gt _ | isComparisonExpr x -> Just x -- ditto
312 MO_U_Lt _ | isComparisonExpr x -> Just zero -- (x > y) < 0 = 0
313 MO_S_Lt _ | isComparisonExpr x -> Just zero
314 MO_U_Ge _ | isComparisonExpr x -> Just one -- (x > y) >= 0 = 1
315 MO_S_Ge _ | isComparisonExpr x -> Just one
316
317 MO_U_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x > y) <= 0 = x <= y
318 MO_S_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x'
319 _ -> Nothing
320 where
321 zero = CmmLit (CmmInt 0 (wordWidth dflags))
322 one = CmmLit (CmmInt 1 (wordWidth dflags))
323
324 cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt 1 rep))]
325 = case mop of
326 -- Arithmetic: x*1 = x, etc
327 MO_Mul _ -> Just x
328 MO_S_Quot _ -> Just x
329 MO_U_Quot _ -> Just x
330 MO_S_Rem _ -> Just $ CmmLit (CmmInt 0 rep)
331 MO_U_Rem _ -> Just $ CmmLit (CmmInt 0 rep)
332
333 -- Comparisons; trickier
334 -- See Note [Comparison operators]
335 MO_Ne _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x>y) != 1 = x<=y
336 MO_Eq _ | isComparisonExpr x -> Just x -- (x>y) == 1 = x>y
337 MO_U_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x>y) < 1 = x<=y
338 MO_S_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- ditto
339 MO_U_Gt _ | isComparisonExpr x -> Just zero -- (x>y) > 1 = 0
340 MO_S_Gt _ | isComparisonExpr x -> Just zero
341 MO_U_Le _ | isComparisonExpr x -> Just one -- (x>y) <= 1 = 1
342 MO_S_Le _ | isComparisonExpr x -> Just one
343 MO_U_Ge _ | isComparisonExpr x -> Just x -- (x>y) >= 1 = x>y
344 MO_S_Ge _ | isComparisonExpr x -> Just x
345 _ -> Nothing
346 where
347 zero = CmmLit (CmmInt 0 (wordWidth dflags))
348 one = CmmLit (CmmInt 1 (wordWidth dflags))
349
350 -- Now look for multiplication/division by powers of 2 (integers).
351
352 cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))]
353 = case mop of
354 MO_Mul rep
355 | Just p <- exactLog2 n ->
356 Just (cmmMachOpFold dflags (MO_Shl rep) [x, CmmLit (CmmInt p rep)])
357 MO_U_Quot rep
358 | Just p <- exactLog2 n ->
359 Just (cmmMachOpFold dflags (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)])
360 MO_U_Rem rep
361 | Just _ <- exactLog2 n ->
362 Just (cmmMachOpFold dflags (MO_And rep) [x, CmmLit (CmmInt (n - 1) rep)])
363 MO_S_Quot rep
364 | Just p <- exactLog2 n,
365 CmmReg _ <- x -> -- We duplicate x in signedQuotRemHelper, hence require
366 -- it is a reg. FIXME: remove this restriction.
367 Just (cmmMachOpFold dflags (MO_S_Shr rep)
368 [signedQuotRemHelper rep p, CmmLit (CmmInt p rep)])
369 MO_S_Rem rep
370 | Just p <- exactLog2 n,
371 CmmReg _ <- x -> -- We duplicate x in signedQuotRemHelper, hence require
372 -- it is a reg. FIXME: remove this restriction.
373 -- We replace (x `rem` 2^p) by (x - (x `quot` 2^p) * 2^p).
374 -- Moreover, we fuse MO_S_Shr (last operation of MO_S_Quot)
375 -- and MO_S_Shl (multiplication by 2^p) into a single MO_And operation.
376 Just (cmmMachOpFold dflags (MO_Sub rep)
377 [x, cmmMachOpFold dflags (MO_And rep)
378 [signedQuotRemHelper rep p, CmmLit (CmmInt (- n) rep)]])
379 _ -> Nothing
380 where
381 -- In contrast with unsigned integers, for signed ones
382 -- shift right is not the same as quot, because it rounds
383 -- to minus infinity, whereas quot rounds toward zero.
384 -- To fix this up, we add one less than the divisor to the
385 -- dividend if it is a negative number.
386 --
387 -- to avoid a test/jump, we use the following sequence:
388 -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve)
389 -- x2 = y & (divisor-1)
390 -- result = x + x2
391 -- this could be done a bit more simply using conditional moves,
392 -- but we're processor independent here.
393 --
394 -- we optimise the divide by 2 case slightly, generating
395 -- x1 = x >> word_size-1 (unsigned)
396 -- return = x + x1
397 signedQuotRemHelper :: Width -> Integer -> CmmExpr
398 signedQuotRemHelper rep p = CmmMachOp (MO_Add rep) [x, x2]
399 where
400 bits = fromIntegral (widthInBits rep) - 1
401 shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
402 x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
403 x2 = if p == 1 then x1 else
404 CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
405
406 -- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x
407 -- Unfortunately this needs a unique supply because x might not be a
408 -- register. See #2253 (program 6) for an example.
409
410
411 -- Anything else is just too hard.
412
413 cmmMachOpFoldM _ _ _ = Nothing
414
415 {- Note [Comparison operators]
416 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
417 If we have
418 CmmCondBranch ((x>#y) == 1) t f
419 we really want to convert to
420 CmmCondBranch (x>#y) t f
421
422 That's what the constant-folding operations on comparison operators do above.
423 -}
424
425
426 -- -----------------------------------------------------------------------------
427 -- Utils
428
429 isLit :: CmmExpr -> Bool
430 isLit (CmmLit _) = True
431 isLit _ = False
432
433 isComparisonExpr :: CmmExpr -> Bool
434 isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
435 isComparisonExpr _ = False
436
437 isPicReg :: CmmExpr -> Bool
438 isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True
439 isPicReg _ = False