359d44b752745604d16ea7a3131dd19c33af4a0a
[ghc.git] / compiler / cmm / CmmOpt.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Cmm optimisation
4 --
5 -- (c) The University of Glasgow 2006
6 --
7 -----------------------------------------------------------------------------
8
9 module CmmOpt (
10 cmmMachOpFold,
11 cmmMachOpFoldM
12 ) where
13
14 #include "HsVersions.h"
15
16 import CmmUtils
17 import Cmm
18 import DynFlags
19
20 import FastTypes
21 import Outputable
22 import Platform
23
24 import Data.Bits
25 import Data.Maybe
26
27 -- -----------------------------------------------------------------------------
28 -- MachOp constant folder
29
30 -- Now, try to constant-fold the MachOps. The arguments have already
31 -- been optimized and folded.
32
33 cmmMachOpFold
34 :: DynFlags
35 -> MachOp -- The operation from an CmmMachOp
36 -> [CmmExpr] -- The optimized arguments
37 -> CmmExpr
38
39 cmmMachOpFold dflags op args = fromMaybe (CmmMachOp op args) (cmmMachOpFoldM dflags op args)
40
41 -- Returns Nothing if no changes, useful for Hoopl, also reduces
42 -- allocation!
43 cmmMachOpFoldM
44 :: DynFlags
45 -> MachOp
46 -> [CmmExpr]
47 -> Maybe CmmExpr
48
49 cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)]
50 = Just $ case op of
51 MO_S_Neg _ -> CmmLit (CmmInt (-x) rep)
52 MO_Not _ -> CmmLit (CmmInt (complement x) rep)
53
54 -- these are interesting: we must first narrow to the
55 -- "from" type, in order to truncate to the correct size.
56 -- The final narrow/widen to the destination type
57 -- is implicit in the CmmLit.
58 MO_SF_Conv _from to -> CmmLit (CmmFloat (fromInteger x) to)
59 MO_SS_Conv from to -> CmmLit (CmmInt (narrowS from x) to)
60 MO_UU_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
61
62 _ -> panic "cmmMachOpFoldM: unknown unary op"
63
64
65 -- Eliminate conversion NOPs
66 cmmMachOpFoldM _ (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
67 cmmMachOpFoldM _ (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
68
69 -- Eliminate nested conversions where possible
70 cmmMachOpFoldM dflags conv_outer [CmmMachOp conv_inner [x]]
71 | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
72 Just (_, rep3,signed2) <- isIntConversion conv_outer
73 = case () of
74 -- widen then narrow to the same size is a nop
75 _ | rep1 < rep2 && rep1 == rep3 -> Just x
76 -- Widen then narrow to different size: collapse to single conversion
77 -- but remember to use the signedness from the widening, just in case
78 -- the final conversion is a widen.
79 | rep1 < rep2 && rep2 > rep3 ->
80 Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
81 -- Nested widenings: collapse if the signedness is the same
82 | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
83 Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
84 -- Nested narrowings: collapse
85 | rep1 > rep2 && rep2 > rep3 ->
86 Just $ cmmMachOpFold dflags (MO_UU_Conv rep1 rep3) [x]
87 | otherwise ->
88 Nothing
89 where
90 isIntConversion (MO_UU_Conv rep1 rep2)
91 = Just (rep1,rep2,False)
92 isIntConversion (MO_SS_Conv rep1 rep2)
93 = Just (rep1,rep2,True)
94 isIntConversion _ = Nothing
95
96 intconv True = MO_SS_Conv
97 intconv False = MO_UU_Conv
98
99 -- ToDo: a narrow of a load can be collapsed into a narrow load, right?
100 -- but what if the architecture only supports word-sized loads, should
101 -- we do the transformation anyway?
102
103 cmmMachOpFoldM dflags mop [CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
104 = case mop of
105 -- for comparisons: don't forget to narrow the arguments before
106 -- comparing, since they might be out of range.
107 MO_Eq _ -> Just $ CmmLit (CmmInt (if x_u == y_u then 1 else 0) (wordWidth dflags))
108 MO_Ne _ -> Just $ CmmLit (CmmInt (if x_u /= y_u then 1 else 0) (wordWidth dflags))
109
110 MO_U_Gt _ -> Just $ CmmLit (CmmInt (if x_u > y_u then 1 else 0) (wordWidth dflags))
111 MO_U_Ge _ -> Just $ CmmLit (CmmInt (if x_u >= y_u then 1 else 0) (wordWidth dflags))
112 MO_U_Lt _ -> Just $ CmmLit (CmmInt (if x_u < y_u then 1 else 0) (wordWidth dflags))
113 MO_U_Le _ -> Just $ CmmLit (CmmInt (if x_u <= y_u then 1 else 0) (wordWidth dflags))
114
115 MO_S_Gt _ -> Just $ CmmLit (CmmInt (if x_s > y_s then 1 else 0) (wordWidth dflags))
116 MO_S_Ge _ -> Just $ CmmLit (CmmInt (if x_s >= y_s then 1 else 0) (wordWidth dflags))
117 MO_S_Lt _ -> Just $ CmmLit (CmmInt (if x_s < y_s then 1 else 0) (wordWidth dflags))
118 MO_S_Le _ -> Just $ CmmLit (CmmInt (if x_s <= y_s then 1 else 0) (wordWidth dflags))
119
120 MO_Add r -> Just $ CmmLit (CmmInt (x + y) r)
121 MO_Sub r -> Just $ CmmLit (CmmInt (x - y) r)
122 MO_Mul r -> Just $ CmmLit (CmmInt (x * y) r)
123 MO_U_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `quot` y_u) r)
124 MO_U_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `rem` y_u) r)
125 MO_S_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x `quot` y) r)
126 MO_S_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x `rem` y) r)
127
128 MO_And r -> Just $ CmmLit (CmmInt (x .&. y) r)
129 MO_Or r -> Just $ CmmLit (CmmInt (x .|. y) r)
130 MO_Xor r -> Just $ CmmLit (CmmInt (x `xor` y) r)
131
132 MO_Shl r -> Just $ CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
133 MO_U_Shr r -> Just $ CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
134 MO_S_Shr r -> Just $ CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
135
136 _ -> Nothing
137
138 where
139 x_u = narrowU xrep x
140 y_u = narrowU xrep y
141 x_s = narrowS xrep x
142 y_s = narrowS xrep y
143
144
145 -- When possible, shift the constants to the right-hand side, so that we
146 -- can match for strength reductions. Note that the code generator will
147 -- also assume that constants have been shifted to the right when
148 -- possible.
149
150 cmmMachOpFoldM dflags op [x@(CmmLit _), y]
151 | not (isLit y) && isCommutableMachOp op
152 = Just (cmmMachOpFold dflags op [y, x])
153
154 -- Turn (a+b)+c into a+(b+c) where possible. Because literals are
155 -- moved to the right, it is more likely that we will find
156 -- opportunities for constant folding when the expression is
157 -- right-associated.
158 --
159 -- ToDo: this appears to introduce a quadratic behaviour due to the
160 -- nested cmmMachOpFold. Can we fix this?
161 --
162 -- Why do we check isLit arg1? If arg1 is a lit, it means that arg2
163 -- is also a lit (otherwise arg1 would be on the right). If we
164 -- put arg1 on the left of the rearranged expression, we'll get into a
165 -- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ...
166 --
167 -- Also don't do it if arg1 is PicBaseReg, so that we don't separate the
168 -- PicBaseReg from the corresponding label (or label difference).
169 --
170 cmmMachOpFoldM dflags mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
171 | mop2 `associates_with` mop1
172 && not (isLit arg1) && not (isPicReg arg1)
173 = Just (cmmMachOpFold dflags mop2 [arg1, cmmMachOpFold dflags mop1 [arg2,arg3]])
174 where
175 MO_Add{} `associates_with` MO_Sub{} = True
176 mop1 `associates_with` mop2 =
177 mop1 == mop2 && isAssociativeMachOp mop1
178
179 -- special case: (a - b) + c ==> a + (c - b)
180 cmmMachOpFoldM dflags mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3]
181 | not (isLit arg1) && not (isPicReg arg1)
182 = Just (cmmMachOpFold dflags mop1 [arg1, cmmMachOpFold dflags mop2 [arg3,arg2]])
183
184 -- special case: (PicBaseReg + lit) + N ==> PicBaseReg + (lit+N)
185 --
186 -- this is better because lit+N is a single link-time constant (e.g. a
187 -- CmmLabelOff), so the right-hand expression needs only one
188 -- instruction, whereas the left needs two. This happens when pointer
189 -- tagging gives us label+offset, and PIC turns the label into
190 -- PicBaseReg + label.
191 --
192 cmmMachOpFoldM _ MO_Add{} [ CmmMachOp op@MO_Add{} [pic, CmmLit lit]
193 , CmmLit (CmmInt n rep) ]
194 | isPicReg pic
195 = Just $ CmmMachOp op [pic, CmmLit $ cmmOffsetLit lit off ]
196 where off = fromIntegral (narrowS rep n)
197
198 -- Make a RegOff if we can
199 cmmMachOpFoldM _ (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
200 = Just $ cmmRegOff reg (fromIntegral (narrowS rep n))
201 cmmMachOpFoldM _ (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
202 = Just $ cmmRegOff reg (off + fromIntegral (narrowS rep n))
203 cmmMachOpFoldM _ (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
204 = Just $ cmmRegOff reg (- fromIntegral (narrowS rep n))
205 cmmMachOpFoldM _ (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
206 = Just $ cmmRegOff reg (off - fromIntegral (narrowS rep n))
207
208 -- Fold label(+/-)offset into a CmmLit where possible
209
210 cmmMachOpFoldM _ (MO_Add _) [CmmLit lit, CmmLit (CmmInt i rep)]
211 = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
212 cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmInt i rep), CmmLit lit]
213 = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
214 cmmMachOpFoldM _ (MO_Sub _) [CmmLit lit, CmmLit (CmmInt i rep)]
215 = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (negate (narrowU rep i))))
216
217
218 -- Comparison of literal with widened operand: perform the comparison
219 -- at the smaller width, as long as the literal is within range.
220
221 -- We can't do the reverse trick, when the operand is narrowed:
222 -- narrowing throws away bits from the operand, there's no way to do
223 -- the same comparison at the larger size.
224
225 cmmMachOpFoldM dflags cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
226 | -- powerPC NCG has a TODO for I8/I16 comparisons, so don't try
227 platformArch (targetPlatform dflags) `elem` [ArchX86, ArchX86_64],
228 -- if the operand is widened:
229 Just (rep, signed, narrow_fn) <- maybe_conversion conv,
230 -- and this is a comparison operation:
231 Just narrow_cmp <- maybe_comparison cmp rep signed,
232 -- and the literal fits in the smaller size:
233 i == narrow_fn rep i
234 -- then we can do the comparison at the smaller size
235 = Just (cmmMachOpFold dflags narrow_cmp [x, CmmLit (CmmInt i rep)])
236 where
237 maybe_conversion (MO_UU_Conv from to)
238 | to > from
239 = Just (from, False, narrowU)
240 maybe_conversion (MO_SS_Conv from to)
241 | to > from
242 = Just (from, True, narrowS)
243
244 -- don't attempt to apply this optimisation when the source
245 -- is a float; see #1916
246 maybe_conversion _ = Nothing
247
248 -- careful (#2080): if the original comparison was signed, but
249 -- we were doing an unsigned widen, then we must do an
250 -- unsigned comparison at the smaller size.
251 maybe_comparison (MO_U_Gt _) rep _ = Just (MO_U_Gt rep)
252 maybe_comparison (MO_U_Ge _) rep _ = Just (MO_U_Ge rep)
253 maybe_comparison (MO_U_Lt _) rep _ = Just (MO_U_Lt rep)
254 maybe_comparison (MO_U_Le _) rep _ = Just (MO_U_Le rep)
255 maybe_comparison (MO_Eq _) rep _ = Just (MO_Eq rep)
256 maybe_comparison (MO_S_Gt _) rep True = Just (MO_S_Gt rep)
257 maybe_comparison (MO_S_Ge _) rep True = Just (MO_S_Ge rep)
258 maybe_comparison (MO_S_Lt _) rep True = Just (MO_S_Lt rep)
259 maybe_comparison (MO_S_Le _) rep True = Just (MO_S_Le rep)
260 maybe_comparison (MO_S_Gt _) rep False = Just (MO_U_Gt rep)
261 maybe_comparison (MO_S_Ge _) rep False = Just (MO_U_Ge rep)
262 maybe_comparison (MO_S_Lt _) rep False = Just (MO_U_Lt rep)
263 maybe_comparison (MO_S_Le _) rep False = Just (MO_U_Le rep)
264 maybe_comparison _ _ _ = Nothing
265
266 -- We can often do something with constants of 0 and 1 ...
267
268 cmmMachOpFoldM dflags mop [x, y@(CmmLit (CmmInt 0 _))]
269 = case mop of
270 MO_Add _ -> Just x
271 MO_Sub _ -> Just x
272 MO_Mul _ -> Just y
273 MO_And _ -> Just y
274 MO_Or _ -> Just x
275 MO_Xor _ -> Just x
276 MO_Shl _ -> Just x
277 MO_S_Shr _ -> Just x
278 MO_U_Shr _ -> Just x
279 MO_Ne _ | isComparisonExpr x -> Just x
280 MO_Eq _ | Just x' <- maybeInvertCmmExpr x -> Just x'
281 MO_U_Gt _ | isComparisonExpr x -> Just x
282 MO_S_Gt _ | isComparisonExpr x -> Just x
283 MO_U_Lt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
284 MO_S_Lt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
285 MO_U_Ge _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
286 MO_S_Ge _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
287 MO_U_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x'
288 MO_S_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x'
289 _ -> Nothing
290
291 cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt 1 rep))]
292 = case mop of
293 MO_Mul _ -> Just x
294 MO_S_Quot _ -> Just x
295 MO_U_Quot _ -> Just x
296 MO_S_Rem _ -> Just $ CmmLit (CmmInt 0 rep)
297 MO_U_Rem _ -> Just $ CmmLit (CmmInt 0 rep)
298 MO_Ne _ | Just x' <- maybeInvertCmmExpr x -> Just x'
299 MO_Eq _ | isComparisonExpr x -> Just x
300 MO_U_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x'
301 MO_S_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x'
302 MO_U_Gt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
303 MO_S_Gt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 (wordWidth dflags))
304 MO_U_Le _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
305 MO_S_Le _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 (wordWidth dflags))
306 MO_U_Ge _ | isComparisonExpr x -> Just x
307 MO_S_Ge _ | isComparisonExpr x -> Just x
308 _ -> Nothing
309
310 -- Now look for multiplication/division by powers of 2 (integers).
311
312 cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))]
313 = case mop of
314 MO_Mul rep
315 | Just p <- exactLog2 n ->
316 Just (cmmMachOpFold dflags (MO_Shl rep) [x, CmmLit (CmmInt p rep)])
317 MO_U_Quot rep
318 | Just p <- exactLog2 n ->
319 Just (cmmMachOpFold dflags (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)])
320 MO_S_Quot rep
321 | Just p <- exactLog2 n,
322 CmmReg _ <- x -> -- We duplicate x below, hence require
323 -- it is a reg. FIXME: remove this restriction.
324 -- shift right is not the same as quot, because it rounds
325 -- to minus infinity, whereasq quot rounds toward zero.
326 -- To fix this up, we add one less than the divisor to the
327 -- dividend if it is a negative number.
328 --
329 -- to avoid a test/jump, we use the following sequence:
330 -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve)
331 -- x2 = y & (divisor-1)
332 -- result = (x+x2) >>= log2(divisor)
333 -- this could be done a bit more simply using conditional moves,
334 -- but we're processor independent here.
335 --
336 -- we optimise the divide by 2 case slightly, generating
337 -- x1 = x >> word_size-1 (unsigned)
338 -- return = (x + x1) >>= log2(divisor)
339 let
340 bits = fromIntegral (widthInBits rep) - 1
341 shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
342 x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
343 x2 = if p == 1 then x1 else
344 CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
345 x3 = CmmMachOp (MO_Add rep) [x, x2]
346 in
347 Just (cmmMachOpFold dflags (MO_S_Shr rep) [x3, CmmLit (CmmInt p rep)])
348 _ -> Nothing
349
350 -- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x
351 -- Unfortunately this needs a unique supply because x might not be a
352 -- register. See #2253 (program 6) for an example.
353
354
355 -- Anything else is just too hard.
356
357 cmmMachOpFoldM _ _ _ = Nothing
358
359 -- -----------------------------------------------------------------------------
360 -- exactLog2
361
362 -- This algorithm for determining the $\log_2$ of exact powers of 2 comes
363 -- from GCC. It requires bit manipulation primitives, and we use GHC
364 -- extensions. Tough.
365 --
366 -- Used to be in MachInstrs --SDM.
367 -- ToDo: remove use of unboxery --SDM.
368
369 -- Unboxery removed in favor of FastInt; but is the function supposed to fail
370 -- on inputs >= 2147483648, or was that just an implementation artifact?
371 -- And is this speed-critical, or can we just use Integer operations
372 -- (including Data.Bits)?
373 -- --Isaac Dupree
374
375 exactLog2 :: Integer -> Maybe Integer
376 exactLog2 x_
377 = if (x_ <= 0 || x_ >= 2147483648) then
378 Nothing
379 else
380 case iUnbox (fromInteger x_) of { x ->
381 if (x `bitAndFastInt` negateFastInt x) /=# x then
382 Nothing
383 else
384 Just (toInteger (iBox (pow2 x)))
385 }
386 where
387 pow2 x | x ==# _ILIT(1) = _ILIT(0)
388 | otherwise = _ILIT(1) +# pow2 (x `shiftR_FastInt` _ILIT(1))
389
390
391 -- -----------------------------------------------------------------------------
392 -- Loopify for C
393
394 {-
395 This is a simple pass that replaces tail-recursive functions like this:
396
397 fac() {
398 ...
399 jump fac();
400 }
401
402 with this:
403
404 fac() {
405 L:
406 ...
407 goto L;
408 }
409
410 the latter generates better C code, because the C compiler treats it
411 like a loop, and brings full loop optimisation to bear.
412
413 In my measurements this makes little or no difference to anything
414 except factorial, but what the hell.
415 -}
416
417 {-
418 cmmLoopifyForC :: DynFlags -> RawCmmDecl -> RawCmmDecl
419 -- XXX: revisit if we actually want to do this
420 -- cmmLoopifyForC p@(CmmProc Nothing _ _) = p -- only if there's an info table, ignore case alts
421 cmmLoopifyForC dflags (CmmProc infos entry_lbl live
422 (ListGraph blocks@(BasicBlock top_id _ : _))) =
423 -- pprTrace "jump_lbl" (ppr jump_lbl <+> ppr entry_lbl) $
424 CmmProc infos entry_lbl live (ListGraph blocks')
425 where blocks' = [ BasicBlock id (map do_stmt stmts)
426 | BasicBlock id stmts <- blocks ]
427
428 do_stmt (CmmJump (CmmLit (CmmLabel lbl)) _) | lbl == jump_lbl
429 = CmmBranch top_id
430 do_stmt stmt = stmt
431
432 jump_lbl | tablesNextToCode dflags = toInfoLbl entry_lbl
433 | otherwise = entry_lbl
434
435 cmmLoopifyForC _ top = top
436 -}
437
438 -- -----------------------------------------------------------------------------
439 -- Utils
440
441 isLit :: CmmExpr -> Bool
442 isLit (CmmLit _) = True
443 isLit _ = False
444
445 isComparisonExpr :: CmmExpr -> Bool
446 isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
447 isComparisonExpr _ = False
448
449 isPicReg :: CmmExpr -> Bool
450 isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True
451 isPicReg _ = False