Add support for passing SSE vectors in registers.
[ghc.git] / compiler / cmm / CmmMachOp.hs
1
2 module CmmMachOp
3 ( MachOp(..)
4 , pprMachOp, isCommutableMachOp, isAssociativeMachOp
5 , isComparisonMachOp, machOpResultType
6 , machOpArgReps, maybeInvertComparison
7
8 -- MachOp builders
9 , mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot
10 , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
11 , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
12 , mo_wordULe, mo_wordUGt, mo_wordULt
13 , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr
14 , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
15 , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord
16 , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32, mo_WordTo64
17
18 -- CallishMachOp
19 , CallishMachOp(..), callishMachOpHints
20 , pprCallishMachOp
21 )
22 where
23
24 #include "HsVersions.h"
25
26 import CmmType
27 import Outputable
28 import DynFlags
29
30 -----------------------------------------------------------------------------
31 -- MachOp
32 -----------------------------------------------------------------------------
33
34 {- |
35 Machine-level primops; ones which we can reasonably delegate to the
36 native code generators to handle.
37
38 Most operations are parameterised by the 'Width' that they operate on.
39 Some operations have separate signed and unsigned versions, and float
40 and integer versions.
41 -}
42
43 data MachOp
44 -- Integer operations (insensitive to signed/unsigned)
45 = MO_Add Width
46 | MO_Sub Width
47 | MO_Eq Width
48 | MO_Ne Width
49 | MO_Mul Width -- low word of multiply
50
51 -- Signed multiply/divide
52 | MO_S_MulMayOflo Width -- nonzero if signed multiply overflows
53 | MO_S_Quot Width -- signed / (same semantics as IntQuotOp)
54 | MO_S_Rem Width -- signed % (same semantics as IntRemOp)
55 | MO_S_Neg Width -- unary -
56
57 -- Unsigned multiply/divide
58 | MO_U_MulMayOflo Width -- nonzero if unsigned multiply overflows
59 | MO_U_Quot Width -- unsigned / (same semantics as WordQuotOp)
60 | MO_U_Rem Width -- unsigned % (same semantics as WordRemOp)
61
62 -- Signed comparisons
63 | MO_S_Ge Width
64 | MO_S_Le Width
65 | MO_S_Gt Width
66 | MO_S_Lt Width
67
68 -- Unsigned comparisons
69 | MO_U_Ge Width
70 | MO_U_Le Width
71 | MO_U_Gt Width
72 | MO_U_Lt Width
73
74 -- Floating point arithmetic
75 | MO_F_Add Width
76 | MO_F_Sub Width
77 | MO_F_Neg Width -- unary -
78 | MO_F_Mul Width
79 | MO_F_Quot Width
80
81 -- Floating point comparison
82 | MO_F_Eq Width
83 | MO_F_Ne Width
84 | MO_F_Ge Width
85 | MO_F_Le Width
86 | MO_F_Gt Width
87 | MO_F_Lt Width
88
89 -- Bitwise operations. Not all of these may be supported
90 -- at all sizes, and only integral Widths are valid.
91 | MO_And Width
92 | MO_Or Width
93 | MO_Xor Width
94 | MO_Not Width
95 | MO_Shl Width
96 | MO_U_Shr Width -- unsigned shift right
97 | MO_S_Shr Width -- signed shift right
98
99 -- Conversions. Some of these will be NOPs.
100 -- Floating-point conversions use the signed variant.
101 | MO_SF_Conv Width Width -- Signed int -> Float
102 | MO_FS_Conv Width Width -- Float -> Signed int
103 | MO_SS_Conv Width Width -- Signed int -> Signed int
104 | MO_UU_Conv Width Width -- unsigned int -> unsigned int
105 | MO_FF_Conv Width Width -- Float -> Float
106
107 -- Vector element insertion and extraction operations
108 | MO_V_Insert Length Width -- Insert scalar into vector
109 | MO_V_Extract Length Width -- Extract scalar from vector
110
111 -- Integer vector operations
112 | MO_V_Add Length Width
113 | MO_V_Sub Length Width
114 | MO_V_Mul Length Width
115
116 -- Signed vector multiply/divide
117 | MO_VS_Quot Length Width
118 | MO_VS_Rem Length Width
119 | MO_VS_Neg Length Width
120
121 -- Floting point vector element insertion and extraction operations
122 | MO_VF_Insert Length Width -- Insert scalar into vector
123 | MO_VF_Extract Length Width -- Extract scalar from vector
124
125 -- Floating point vector operations
126 | MO_VF_Add Length Width
127 | MO_VF_Sub Length Width
128 | MO_VF_Neg Length Width -- unary -
129 | MO_VF_Mul Length Width
130 | MO_VF_Quot Length Width
131 deriving (Eq, Show)
132
133 pprMachOp :: MachOp -> SDoc
134 pprMachOp mo = text (show mo)
135
136
137
138 -- -----------------------------------------------------------------------------
139 -- Some common MachReps
140
141 -- A 'wordRep' is a machine word on the target architecture
142 -- Specifically, it is the size of an Int#, Word#, Addr#
143 -- and the unit of allocation on the stack and the heap
144 -- Any pointer is also guaranteed to be a wordRep.
145
146 mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot
147 , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
148 , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
149 , mo_wordULe, mo_wordUGt, mo_wordULt
150 , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr
151 , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord
152 , mo_WordTo8, mo_WordTo16, mo_WordTo32, mo_WordTo64
153 :: DynFlags -> MachOp
154
155 mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
156 , mo_32To8, mo_32To16
157 :: MachOp
158
159 mo_wordAdd dflags = MO_Add (wordWidth dflags)
160 mo_wordSub dflags = MO_Sub (wordWidth dflags)
161 mo_wordEq dflags = MO_Eq (wordWidth dflags)
162 mo_wordNe dflags = MO_Ne (wordWidth dflags)
163 mo_wordMul dflags = MO_Mul (wordWidth dflags)
164 mo_wordSQuot dflags = MO_S_Quot (wordWidth dflags)
165 mo_wordSRem dflags = MO_S_Rem (wordWidth dflags)
166 mo_wordSNeg dflags = MO_S_Neg (wordWidth dflags)
167 mo_wordUQuot dflags = MO_U_Quot (wordWidth dflags)
168 mo_wordURem dflags = MO_U_Rem (wordWidth dflags)
169
170 mo_wordSGe dflags = MO_S_Ge (wordWidth dflags)
171 mo_wordSLe dflags = MO_S_Le (wordWidth dflags)
172 mo_wordSGt dflags = MO_S_Gt (wordWidth dflags)
173 mo_wordSLt dflags = MO_S_Lt (wordWidth dflags)
174
175 mo_wordUGe dflags = MO_U_Ge (wordWidth dflags)
176 mo_wordULe dflags = MO_U_Le (wordWidth dflags)
177 mo_wordUGt dflags = MO_U_Gt (wordWidth dflags)
178 mo_wordULt dflags = MO_U_Lt (wordWidth dflags)
179
180 mo_wordAnd dflags = MO_And (wordWidth dflags)
181 mo_wordOr dflags = MO_Or (wordWidth dflags)
182 mo_wordXor dflags = MO_Xor (wordWidth dflags)
183 mo_wordNot dflags = MO_Not (wordWidth dflags)
184 mo_wordShl dflags = MO_Shl (wordWidth dflags)
185 mo_wordSShr dflags = MO_S_Shr (wordWidth dflags)
186 mo_wordUShr dflags = MO_U_Shr (wordWidth dflags)
187
188 mo_u_8To32 = MO_UU_Conv W8 W32
189 mo_s_8To32 = MO_SS_Conv W8 W32
190 mo_u_16To32 = MO_UU_Conv W16 W32
191 mo_s_16To32 = MO_SS_Conv W16 W32
192
193 mo_u_8ToWord dflags = MO_UU_Conv W8 (wordWidth dflags)
194 mo_s_8ToWord dflags = MO_SS_Conv W8 (wordWidth dflags)
195 mo_u_16ToWord dflags = MO_UU_Conv W16 (wordWidth dflags)
196 mo_s_16ToWord dflags = MO_SS_Conv W16 (wordWidth dflags)
197 mo_s_32ToWord dflags = MO_SS_Conv W32 (wordWidth dflags)
198 mo_u_32ToWord dflags = MO_UU_Conv W32 (wordWidth dflags)
199
200 mo_WordTo8 dflags = MO_UU_Conv (wordWidth dflags) W8
201 mo_WordTo16 dflags = MO_UU_Conv (wordWidth dflags) W16
202 mo_WordTo32 dflags = MO_UU_Conv (wordWidth dflags) W32
203 mo_WordTo64 dflags = MO_UU_Conv (wordWidth dflags) W64
204
205 mo_32To8 = MO_UU_Conv W32 W8
206 mo_32To16 = MO_UU_Conv W32 W16
207
208
209 -- ----------------------------------------------------------------------------
210 -- isCommutableMachOp
211
212 {- |
213 Returns 'True' if the MachOp has commutable arguments. This is used
214 in the platform-independent Cmm optimisations.
215
216 If in doubt, return 'False'. This generates worse code on the
217 native routes, but is otherwise harmless.
218 -}
219 isCommutableMachOp :: MachOp -> Bool
220 isCommutableMachOp mop =
221 case mop of
222 MO_Add _ -> True
223 MO_Eq _ -> True
224 MO_Ne _ -> True
225 MO_Mul _ -> True
226 MO_S_MulMayOflo _ -> True
227 MO_U_MulMayOflo _ -> True
228 MO_And _ -> True
229 MO_Or _ -> True
230 MO_Xor _ -> True
231 MO_F_Add _ -> True
232 MO_F_Mul _ -> True
233 _other -> False
234
235 -- ----------------------------------------------------------------------------
236 -- isAssociativeMachOp
237
238 {- |
239 Returns 'True' if the MachOp is associative (i.e. @(x+y)+z == x+(y+z)@)
240 This is used in the platform-independent Cmm optimisations.
241
242 If in doubt, return 'False'. This generates worse code on the
243 native routes, but is otherwise harmless.
244 -}
245 isAssociativeMachOp :: MachOp -> Bool
246 isAssociativeMachOp mop =
247 case mop of
248 MO_Add {} -> True -- NB: does not include
249 MO_Mul {} -> True -- floatint point!
250 MO_And {} -> True
251 MO_Or {} -> True
252 MO_Xor {} -> True
253 _other -> False
254
255 -- ----------------------------------------------------------------------------
256 -- isComparisonMachOp
257
258 {- |
259 Returns 'True' if the MachOp is a comparison.
260
261 If in doubt, return False. This generates worse code on the
262 native routes, but is otherwise harmless.
263 -}
264 isComparisonMachOp :: MachOp -> Bool
265 isComparisonMachOp mop =
266 case mop of
267 MO_Eq _ -> True
268 MO_Ne _ -> True
269 MO_S_Ge _ -> True
270 MO_S_Le _ -> True
271 MO_S_Gt _ -> True
272 MO_S_Lt _ -> True
273 MO_U_Ge _ -> True
274 MO_U_Le _ -> True
275 MO_U_Gt _ -> True
276 MO_U_Lt _ -> True
277 MO_F_Eq {} -> True
278 MO_F_Ne {} -> True
279 MO_F_Ge {} -> True
280 MO_F_Le {} -> True
281 MO_F_Gt {} -> True
282 MO_F_Lt {} -> True
283 _other -> False
284
285 -- -----------------------------------------------------------------------------
286 -- Inverting conditions
287
288 -- Sometimes it's useful to be able to invert the sense of a
289 -- condition. Not all conditional tests are invertible: in
290 -- particular, floating point conditionals cannot be inverted, because
291 -- there exist floating-point values which return False for both senses
292 -- of a condition (eg. !(NaN > NaN) && !(NaN /<= NaN)).
293
294 maybeInvertComparison :: MachOp -> Maybe MachOp
295 maybeInvertComparison op
296 = case op of -- None of these Just cases include floating point
297 MO_Eq r -> Just (MO_Ne r)
298 MO_Ne r -> Just (MO_Eq r)
299 MO_U_Lt r -> Just (MO_U_Ge r)
300 MO_U_Gt r -> Just (MO_U_Le r)
301 MO_U_Le r -> Just (MO_U_Gt r)
302 MO_U_Ge r -> Just (MO_U_Lt r)
303 MO_S_Lt r -> Just (MO_S_Ge r)
304 MO_S_Gt r -> Just (MO_S_Le r)
305 MO_S_Le r -> Just (MO_S_Gt r)
306 MO_S_Ge r -> Just (MO_S_Lt r)
307 _other -> Nothing
308
309 -- ----------------------------------------------------------------------------
310 -- machOpResultType
311
312 {- |
313 Returns the MachRep of the result of a MachOp.
314 -}
315 machOpResultType :: DynFlags -> MachOp -> [CmmType] -> CmmType
316 machOpResultType dflags mop tys =
317 case mop of
318 MO_Add {} -> ty1 -- Preserve GC-ptr-hood
319 MO_Sub {} -> ty1 -- of first arg
320 MO_Mul r -> cmmBits r
321 MO_S_MulMayOflo r -> cmmBits r
322 MO_S_Quot r -> cmmBits r
323 MO_S_Rem r -> cmmBits r
324 MO_S_Neg r -> cmmBits r
325 MO_U_MulMayOflo r -> cmmBits r
326 MO_U_Quot r -> cmmBits r
327 MO_U_Rem r -> cmmBits r
328
329 MO_Eq {} -> comparisonResultRep dflags
330 MO_Ne {} -> comparisonResultRep dflags
331 MO_S_Ge {} -> comparisonResultRep dflags
332 MO_S_Le {} -> comparisonResultRep dflags
333 MO_S_Gt {} -> comparisonResultRep dflags
334 MO_S_Lt {} -> comparisonResultRep dflags
335
336 MO_U_Ge {} -> comparisonResultRep dflags
337 MO_U_Le {} -> comparisonResultRep dflags
338 MO_U_Gt {} -> comparisonResultRep dflags
339 MO_U_Lt {} -> comparisonResultRep dflags
340
341 MO_F_Add r -> cmmFloat r
342 MO_F_Sub r -> cmmFloat r
343 MO_F_Mul r -> cmmFloat r
344 MO_F_Quot r -> cmmFloat r
345 MO_F_Neg r -> cmmFloat r
346 MO_F_Eq {} -> comparisonResultRep dflags
347 MO_F_Ne {} -> comparisonResultRep dflags
348 MO_F_Ge {} -> comparisonResultRep dflags
349 MO_F_Le {} -> comparisonResultRep dflags
350 MO_F_Gt {} -> comparisonResultRep dflags
351 MO_F_Lt {} -> comparisonResultRep dflags
352
353 MO_And {} -> ty1 -- Used for pointer masking
354 MO_Or {} -> ty1
355 MO_Xor {} -> ty1
356 MO_Not r -> cmmBits r
357 MO_Shl r -> cmmBits r
358 MO_U_Shr r -> cmmBits r
359 MO_S_Shr r -> cmmBits r
360
361 MO_SS_Conv _ to -> cmmBits to
362 MO_UU_Conv _ to -> cmmBits to
363 MO_FS_Conv _ to -> cmmBits to
364 MO_SF_Conv _ to -> cmmFloat to
365 MO_FF_Conv _ to -> cmmFloat to
366
367 MO_V_Insert l w -> cmmVec l (cmmBits w)
368 MO_V_Extract _ w -> cmmBits w
369
370 MO_V_Add l w -> cmmVec l (cmmBits w)
371 MO_V_Sub l w -> cmmVec l (cmmBits w)
372 MO_V_Mul l w -> cmmVec l (cmmBits w)
373
374 MO_VS_Quot l w -> cmmVec l (cmmBits w)
375 MO_VS_Rem l w -> cmmVec l (cmmBits w)
376 MO_VS_Neg l w -> cmmVec l (cmmBits w)
377
378 MO_VF_Insert l w -> cmmVec l (cmmFloat w)
379 MO_VF_Extract _ w -> cmmFloat w
380
381 MO_VF_Add l w -> cmmVec l (cmmFloat w)
382 MO_VF_Sub l w -> cmmVec l (cmmFloat w)
383 MO_VF_Mul l w -> cmmVec l (cmmFloat w)
384 MO_VF_Quot l w -> cmmVec l (cmmFloat w)
385 MO_VF_Neg l w -> cmmVec l (cmmFloat w)
386 where
387 (ty1:_) = tys
388
389 comparisonResultRep :: DynFlags -> CmmType
390 comparisonResultRep = bWord -- is it?
391
392
393 -- -----------------------------------------------------------------------------
394 -- machOpArgReps
395
396 -- | This function is used for debugging only: we can check whether an
397 -- application of a MachOp is "type-correct" by checking that the MachReps of
398 -- its arguments are the same as the MachOp expects. This is used when
399 -- linting a CmmExpr.
400
401 machOpArgReps :: DynFlags -> MachOp -> [Width]
402 machOpArgReps dflags op =
403 case op of
404 MO_Add r -> [r,r]
405 MO_Sub r -> [r,r]
406 MO_Eq r -> [r,r]
407 MO_Ne r -> [r,r]
408 MO_Mul r -> [r,r]
409 MO_S_MulMayOflo r -> [r,r]
410 MO_S_Quot r -> [r,r]
411 MO_S_Rem r -> [r,r]
412 MO_S_Neg r -> [r]
413 MO_U_MulMayOflo r -> [r,r]
414 MO_U_Quot r -> [r,r]
415 MO_U_Rem r -> [r,r]
416
417 MO_S_Ge r -> [r,r]
418 MO_S_Le r -> [r,r]
419 MO_S_Gt r -> [r,r]
420 MO_S_Lt r -> [r,r]
421
422 MO_U_Ge r -> [r,r]
423 MO_U_Le r -> [r,r]
424 MO_U_Gt r -> [r,r]
425 MO_U_Lt r -> [r,r]
426
427 MO_F_Add r -> [r,r]
428 MO_F_Sub r -> [r,r]
429 MO_F_Mul r -> [r,r]
430 MO_F_Quot r -> [r,r]
431 MO_F_Neg r -> [r]
432 MO_F_Eq r -> [r,r]
433 MO_F_Ne r -> [r,r]
434 MO_F_Ge r -> [r,r]
435 MO_F_Le r -> [r,r]
436 MO_F_Gt r -> [r,r]
437 MO_F_Lt r -> [r,r]
438
439 MO_And r -> [r,r]
440 MO_Or r -> [r,r]
441 MO_Xor r -> [r,r]
442 MO_Not r -> [r]
443 MO_Shl r -> [r, wordWidth dflags]
444 MO_U_Shr r -> [r, wordWidth dflags]
445 MO_S_Shr r -> [r, wordWidth dflags]
446
447 MO_SS_Conv from _ -> [from]
448 MO_UU_Conv from _ -> [from]
449 MO_SF_Conv from _ -> [from]
450 MO_FS_Conv from _ -> [from]
451 MO_FF_Conv from _ -> [from]
452
453 MO_V_Insert l r -> [typeWidth (vec l (cmmBits r)),r,wordWidth dflags]
454 MO_V_Extract l r -> [typeWidth (vec l (cmmBits r)),wordWidth dflags]
455
456 MO_V_Add _ r -> [r,r]
457 MO_V_Sub _ r -> [r,r]
458 MO_V_Mul _ r -> [r,r]
459
460 MO_VS_Quot _ r -> [r,r]
461 MO_VS_Rem _ r -> [r,r]
462 MO_VS_Neg _ r -> [r]
463
464 MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
465 MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
466
467 MO_VF_Add _ r -> [r,r]
468 MO_VF_Sub _ r -> [r,r]
469 MO_VF_Mul _ r -> [r,r]
470 MO_VF_Quot _ r -> [r,r]
471 MO_VF_Neg _ r -> [r]
472
473 -----------------------------------------------------------------------------
474 -- CallishMachOp
475 -----------------------------------------------------------------------------
476
477 -- CallishMachOps tend to be implemented by foreign calls in some backends,
478 -- so we separate them out. In Cmm, these can only occur in a
479 -- statement position, in contrast to an ordinary MachOp which can occur
480 -- anywhere in an expression.
481 data CallishMachOp
482 = MO_F64_Pwr
483 | MO_F64_Sin
484 | MO_F64_Cos
485 | MO_F64_Tan
486 | MO_F64_Sinh
487 | MO_F64_Cosh
488 | MO_F64_Tanh
489 | MO_F64_Asin
490 | MO_F64_Acos
491 | MO_F64_Atan
492 | MO_F64_Log
493 | MO_F64_Exp
494 | MO_F64_Sqrt
495 | MO_F32_Pwr
496 | MO_F32_Sin
497 | MO_F32_Cos
498 | MO_F32_Tan
499 | MO_F32_Sinh
500 | MO_F32_Cosh
501 | MO_F32_Tanh
502 | MO_F32_Asin
503 | MO_F32_Acos
504 | MO_F32_Atan
505 | MO_F32_Log
506 | MO_F32_Exp
507 | MO_F32_Sqrt
508
509 | MO_UF_Conv Width
510
511 | MO_S_QuotRem Width
512 | MO_U_QuotRem Width
513 | MO_U_QuotRem2 Width
514 | MO_Add2 Width
515 | MO_U_Mul2 Width
516
517 | MO_WriteBarrier
518 | MO_Touch -- Keep variables live (when using interior pointers)
519
520 -- Note that these three MachOps all take 1 extra parameter than the
521 -- standard C lib versions. The extra (last) parameter contains
522 -- alignment of the pointers. Used for optimisation in backends.
523 | MO_Memcpy
524 | MO_Memset
525 | MO_Memmove
526
527 | MO_PopCnt Width
528 deriving (Eq, Show)
529
530 pprCallishMachOp :: CallishMachOp -> SDoc
531 pprCallishMachOp mo = text (show mo)
532
533 callishMachOpHints :: CallishMachOp -> ([ForeignHint], [ForeignHint])
534 callishMachOpHints op = case op of
535 MO_Memcpy -> ([], [AddrHint,AddrHint,NoHint,NoHint])
536 MO_Memset -> ([], [AddrHint,NoHint,NoHint,NoHint])
537 MO_Memmove -> ([], [AddrHint,AddrHint,NoHint,NoHint])
538 _ -> ([],[])
539 -- empty lists indicate NoHint