154eed866eb91f248cb80a4249017ffc363195d8
[ghc.git] / compiler / nativeGen / MachCodeGen.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Generating machine code (instruction selection)
4 --
5 -- (c) The University of Glasgow 1996-2004
6 --
7 -----------------------------------------------------------------------------
8
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, (b) the type signatures, and
11 -- (c) the #if blah_TARGET_ARCH} things, the
12 -- structure should not be too overwhelming.
13
14 module MachCodeGen ( cmmTopCodeGen, InstrBlock ) where
15
16 #include "HsVersions.h"
17 #include "nativeGen/NCG.h"
18 #include "MachDeps.h"
19
20 -- NCG stuff:
21 import MachInstrs
22 import MachRegs
23 import NCGMonad
24 import PositionIndependentCode
25 import RegAllocInfo ( mkBranchInstr )
26
27 -- Our intermediate code:
28 import PprCmm ( pprExpr )
29 import Cmm
30 import MachOp
31 import CLabel
32 import ClosureInfo ( C_SRT(..) )
33
34 -- The rest:
35 import StaticFlags ( opt_PIC )
36 import ForeignCall ( CCallConv(..) )
37 import OrdList
38 import Pretty
39 import Outputable
40 import FastString
41 import FastTypes ( isFastTrue )
42 import Constants ( wORD_SIZE )
43
44 #ifdef DEBUG
45 import Outputable ( assertPanic )
46 import Debug.Trace ( trace )
47 #endif
48
49 import Control.Monad ( mapAndUnzipM )
50 import Data.Maybe ( fromJust )
51 import Data.Bits
52 import Data.Word
53 import Data.Int
54
55 -- -----------------------------------------------------------------------------
56 -- Top-level of the instruction selector
57
58 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
59 -- They are really trees of insns to facilitate fast appending, where a
60 -- left-to-right traversal (pre-order?) yields the insns in the correct
61 -- order.
62
63 type InstrBlock = OrdList Instr
64
65 cmmTopCodeGen :: RawCmmTop -> NatM [NatCmmTop]
66 cmmTopCodeGen (CmmProc info lab params blocks) = do
67 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
68 picBaseMb <- getPicBaseMaybeNat
69 let proc = CmmProc info lab params (concat nat_blocks)
70 tops = proc : concat statics
71 case picBaseMb of
72 Just picBase -> initializePicBase picBase tops
73 Nothing -> return tops
74
75 cmmTopCodeGen (CmmData sec dat) = do
76 return [CmmData sec dat] -- no translation, we just use CmmStatic
77
78 basicBlockCodeGen :: CmmBasicBlock -> NatM ([NatBasicBlock],[NatCmmTop])
79 basicBlockCodeGen (BasicBlock id stmts) = do
80 instrs <- stmtsToInstrs stmts
81 -- code generation may introduce new basic block boundaries, which
82 -- are indicated by the NEWBLOCK instruction. We must split up the
83 -- instruction stream into basic blocks again. Also, we extract
84 -- LDATAs here too.
85 let
86 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
87
88 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
89 = ([], BasicBlock id instrs : blocks, statics)
90 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
91 = (instrs, blocks, CmmData sec dat:statics)
92 mkBlocks instr (instrs,blocks,statics)
93 = (instr:instrs, blocks, statics)
94 -- in
95 return (BasicBlock id top : other_blocks, statics)
96
97 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
98 stmtsToInstrs stmts
99 = do instrss <- mapM stmtToInstrs stmts
100 return (concatOL instrss)
101
102 stmtToInstrs :: CmmStmt -> NatM InstrBlock
103 stmtToInstrs stmt = case stmt of
104 CmmNop -> return nilOL
105 CmmComment s -> return (unitOL (COMMENT s))
106
107 CmmAssign reg src
108 | isFloatingRep kind -> assignReg_FltCode kind reg src
109 #if WORD_SIZE_IN_BITS==32
110 | kind == I64 -> assignReg_I64Code reg src
111 #endif
112 | otherwise -> assignReg_IntCode kind reg src
113 where kind = cmmRegRep reg
114
115 CmmStore addr src
116 | isFloatingRep kind -> assignMem_FltCode kind addr src
117 #if WORD_SIZE_IN_BITS==32
118 | kind == I64 -> assignMem_I64Code addr src
119 #endif
120 | otherwise -> assignMem_IntCode kind addr src
121 where kind = cmmExprRep src
122
123 CmmCall target result_regs args _
124 -> genCCall target result_regs args
125
126 CmmBranch id -> genBranch id
127 CmmCondBranch arg id -> genCondJump id arg
128 CmmSwitch arg ids -> genSwitch arg ids
129 CmmJump arg params -> genJump arg
130
131 -- -----------------------------------------------------------------------------
132 -- General things for putting together code sequences
133
134 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
135 -- CmmExprs into CmmRegOff?
136 mangleIndexTree :: CmmExpr -> CmmExpr
137 mangleIndexTree (CmmRegOff reg off)
138 = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)]
139 where rep = cmmRegRep reg
140
141 -- -----------------------------------------------------------------------------
142 -- Code gen for 64-bit arithmetic on 32-bit platforms
143
144 {-
145 Simple support for generating 64-bit code (ie, 64 bit values and 64
146 bit assignments) on 32-bit platforms. Unlike the main code generator
147 we merely shoot for generating working code as simply as possible, and
148 pay little attention to code quality. Specifically, there is no
149 attempt to deal cleverly with the fixed-vs-floating register
150 distinction; all values are generated into (pairs of) floating
151 registers, even if this would mean some redundant reg-reg moves as a
152 result. Only one of the VRegUniques is returned, since it will be
153 of the VRegUniqueLo form, and the upper-half VReg can be determined
154 by applying getHiVRegFromLo to it.
155 -}
156
157 data ChildCode64 -- a.k.a "Register64"
158 = ChildCode64
159 InstrBlock -- code
160 Reg -- the lower 32-bit temporary which contains the
161 -- result; use getHiVRegFromLo to find the other
162 -- VRegUnique. Rules of this simplified insn
163 -- selection game are therefore that the returned
164 -- Reg may be modified
165
166 #if WORD_SIZE_IN_BITS==32
167 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
168 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
169 #endif
170
171 #ifndef x86_64_TARGET_ARCH
172 iselExpr64 :: CmmExpr -> NatM ChildCode64
173 #endif
174
175 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
176
177 #if i386_TARGET_ARCH
178
179 assignMem_I64Code addrTree valueTree = do
180 Amode addr addr_code <- getAmode addrTree
181 ChildCode64 vcode rlo <- iselExpr64 valueTree
182 let
183 rhi = getHiVRegFromLo rlo
184
185 -- Little-endian store
186 mov_lo = MOV I32 (OpReg rlo) (OpAddr addr)
187 mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
188 -- in
189 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
190
191
192 assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do
193 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
194 let
195 r_dst_lo = mkVReg u_dst I32
196 r_dst_hi = getHiVRegFromLo r_dst_lo
197 r_src_hi = getHiVRegFromLo r_src_lo
198 mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo)
199 mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi)
200 -- in
201 return (
202 vcode `snocOL` mov_lo `snocOL` mov_hi
203 )
204
205 assignReg_I64Code lvalue valueTree
206 = panic "assignReg_I64Code(i386): invalid lvalue"
207
208 ------------
209
210 iselExpr64 (CmmLit (CmmInt i _)) = do
211 (rlo,rhi) <- getNewRegPairNat I32
212 let
213 r = fromIntegral (fromIntegral i :: Word32)
214 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
215 code = toOL [
216 MOV I32 (OpImm (ImmInteger r)) (OpReg rlo),
217 MOV I32 (OpImm (ImmInteger q)) (OpReg rhi)
218 ]
219 -- in
220 return (ChildCode64 code rlo)
221
222 iselExpr64 (CmmLoad addrTree I64) = do
223 Amode addr addr_code <- getAmode addrTree
224 (rlo,rhi) <- getNewRegPairNat I32
225 let
226 mov_lo = MOV I32 (OpAddr addr) (OpReg rlo)
227 mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
228 -- in
229 return (
230 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
231 rlo
232 )
233
234 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
235 = return (ChildCode64 nilOL (mkVReg vu I32))
236
237 -- we handle addition, but rather badly
238 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
239 ChildCode64 code1 r1lo <- iselExpr64 e1
240 (rlo,rhi) <- getNewRegPairNat I32
241 let
242 r = fromIntegral (fromIntegral i :: Word32)
243 q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32)
244 r1hi = getHiVRegFromLo r1lo
245 code = code1 `appOL`
246 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
247 ADD I32 (OpImm (ImmInteger r)) (OpReg rlo),
248 MOV I32 (OpReg r1hi) (OpReg rhi),
249 ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ]
250 -- in
251 return (ChildCode64 code rlo)
252
253 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
254 ChildCode64 code1 r1lo <- iselExpr64 e1
255 ChildCode64 code2 r2lo <- iselExpr64 e2
256 (rlo,rhi) <- getNewRegPairNat I32
257 let
258 r1hi = getHiVRegFromLo r1lo
259 r2hi = getHiVRegFromLo r2lo
260 code = code1 `appOL`
261 code2 `appOL`
262 toOL [ MOV I32 (OpReg r1lo) (OpReg rlo),
263 ADD I32 (OpReg r2lo) (OpReg rlo),
264 MOV I32 (OpReg r1hi) (OpReg rhi),
265 ADC I32 (OpReg r2hi) (OpReg rhi) ]
266 -- in
267 return (ChildCode64 code rlo)
268
269 iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do
270 fn <- getAnyReg expr
271 r_dst_lo <- getNewRegNat I32
272 let r_dst_hi = getHiVRegFromLo r_dst_lo
273 code = fn r_dst_lo
274 return (
275 ChildCode64 (code `snocOL`
276 MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
277 r_dst_lo
278 )
279
280 iselExpr64 expr
281 = pprPanic "iselExpr64(i386)" (ppr expr)
282
283 #endif /* i386_TARGET_ARCH */
284
285 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
286
287 #if sparc_TARGET_ARCH
288
289 assignMem_I64Code addrTree valueTree = do
290 Amode addr addr_code <- getAmode addrTree
291 ChildCode64 vcode rlo <- iselExpr64 valueTree
292 (src, code) <- getSomeReg addrTree
293 let
294 rhi = getHiVRegFromLo rlo
295 -- Big-endian store
296 mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0))
297 mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4))
298 return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo)
299
300 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
301 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
302 let
303 r_dst_lo = mkVReg u_dst pk
304 r_dst_hi = getHiVRegFromLo r_dst_lo
305 r_src_hi = getHiVRegFromLo r_src_lo
306 mov_lo = mkMOV r_src_lo r_dst_lo
307 mov_hi = mkMOV r_src_hi r_dst_hi
308 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
309 return (vcode `snocOL` mov_hi `snocOL` mov_lo)
310 assignReg_I64Code lvalue valueTree
311 = panic "assignReg_I64Code(sparc): invalid lvalue"
312
313
314 -- Don't delete this -- it's very handy for debugging.
315 --iselExpr64 expr
316 -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False
317 -- = panic "iselExpr64(???)"
318
319 iselExpr64 (CmmLoad addrTree I64) = do
320 Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree
321 rlo <- getNewRegNat I32
322 let rhi = getHiVRegFromLo rlo
323 mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi
324 mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo
325 return (
326 ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo)
327 rlo
328 )
329
330 iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64))) = do
331 r_dst_lo <- getNewRegNat I32
332 let r_dst_hi = getHiVRegFromLo r_dst_lo
333 r_src_lo = mkVReg uq I32
334 r_src_hi = getHiVRegFromLo r_src_lo
335 mov_lo = mkMOV r_src_lo r_dst_lo
336 mov_hi = mkMOV r_src_hi r_dst_hi
337 mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
338 return (
339 ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
340 )
341
342 iselExpr64 expr
343 = pprPanic "iselExpr64(sparc)" (ppr expr)
344
345 #endif /* sparc_TARGET_ARCH */
346
347 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
348
349 #if powerpc_TARGET_ARCH
350
351 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
352 getI64Amodes addrTree = do
353 Amode hi_addr addr_code <- getAmode addrTree
354 case addrOffset hi_addr 4 of
355 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
356 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
357 return (AddrRegImm hi_ptr (ImmInt 0),
358 AddrRegImm hi_ptr (ImmInt 4),
359 code)
360
361 assignMem_I64Code addrTree valueTree = do
362 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
363 ChildCode64 vcode rlo <- iselExpr64 valueTree
364 let
365 rhi = getHiVRegFromLo rlo
366
367 -- Big-endian store
368 mov_hi = ST I32 rhi hi_addr
369 mov_lo = ST I32 rlo lo_addr
370 -- in
371 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
372
373 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
374 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
375 let
376 r_dst_lo = mkVReg u_dst I32
377 r_dst_hi = getHiVRegFromLo r_dst_lo
378 r_src_hi = getHiVRegFromLo r_src_lo
379 mov_lo = MR r_dst_lo r_src_lo
380 mov_hi = MR r_dst_hi r_src_hi
381 -- in
382 return (
383 vcode `snocOL` mov_lo `snocOL` mov_hi
384 )
385
386 assignReg_I64Code lvalue valueTree
387 = panic "assignReg_I64Code(powerpc): invalid lvalue"
388
389
390 -- Don't delete this -- it's very handy for debugging.
391 --iselExpr64 expr
392 -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False
393 -- = panic "iselExpr64(???)"
394
395 iselExpr64 (CmmLoad addrTree I64) = do
396 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
397 (rlo, rhi) <- getNewRegPairNat I32
398 let mov_hi = LD I32 rhi hi_addr
399 mov_lo = LD I32 rlo lo_addr
400 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
401 rlo
402
403 iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _)))
404 = return (ChildCode64 nilOL (mkVReg vu I32))
405
406 iselExpr64 (CmmLit (CmmInt i _)) = do
407 (rlo,rhi) <- getNewRegPairNat I32
408 let
409 half0 = fromIntegral (fromIntegral i :: Word16)
410 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
411 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
412 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
413
414 code = toOL [
415 LIS rlo (ImmInt half1),
416 OR rlo rlo (RIImm $ ImmInt half0),
417 LIS rhi (ImmInt half3),
418 OR rlo rlo (RIImm $ ImmInt half2)
419 ]
420 -- in
421 return (ChildCode64 code rlo)
422
423 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
424 ChildCode64 code1 r1lo <- iselExpr64 e1
425 ChildCode64 code2 r2lo <- iselExpr64 e2
426 (rlo,rhi) <- getNewRegPairNat I32
427 let
428 r1hi = getHiVRegFromLo r1lo
429 r2hi = getHiVRegFromLo r2lo
430 code = code1 `appOL`
431 code2 `appOL`
432 toOL [ ADDC rlo r1lo r2lo,
433 ADDE rhi r1hi r2hi ]
434 -- in
435 return (ChildCode64 code rlo)
436
437 iselExpr64 expr
438 = pprPanic "iselExpr64(powerpc)" (ppr expr)
439
440 #endif /* powerpc_TARGET_ARCH */
441
442
443 -- -----------------------------------------------------------------------------
444 -- The 'Register' type
445
446 -- 'Register's passed up the tree. If the stix code forces the register
447 -- to live in a pre-decided machine register, it comes out as @Fixed@;
448 -- otherwise, it comes out as @Any@, and the parent can decide which
449 -- register to put it in.
450
451 data Register
452 = Fixed MachRep Reg InstrBlock
453 | Any MachRep (Reg -> InstrBlock)
454
455 swizzleRegisterRep :: Register -> MachRep -> Register
456 swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code
457 swizzleRegisterRep (Any _ codefn) rep = Any rep codefn
458
459
460 -- -----------------------------------------------------------------------------
461 -- Utils based on getRegister, below
462
463 -- The dual to getAnyReg: compute an expression into a register, but
464 -- we don't mind which one it is.
465 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
466 getSomeReg expr = do
467 r <- getRegister expr
468 case r of
469 Any rep code -> do
470 tmp <- getNewRegNat rep
471 return (tmp, code tmp)
472 Fixed _ reg code ->
473 return (reg, code)
474
475 -- -----------------------------------------------------------------------------
476 -- Grab the Reg for a CmmReg
477
478 getRegisterReg :: CmmReg -> Reg
479
480 getRegisterReg (CmmLocal (LocalReg u pk _))
481 = mkVReg u pk
482
483 getRegisterReg (CmmGlobal mid)
484 = case get_GlobalReg_reg_or_addr mid of
485 Left (RealReg rrno) -> RealReg rrno
486 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
487 -- By this stage, the only MagicIds remaining should be the
488 -- ones which map to a real machine register on this
489 -- platform. Hence ...
490
491
492 -- -----------------------------------------------------------------------------
493 -- Generate code to get a subtree into a Register
494
495 -- Don't delete this -- it's very handy for debugging.
496 --getRegister expr
497 -- | trace ("getRegister: " ++ showSDoc (pprCmmExpr expr)) False
498 -- = panic "getRegister(???)"
499
500 getRegister :: CmmExpr -> NatM Register
501
502 #if !x86_64_TARGET_ARCH
503 -- on x86_64, we have %rip for PicBaseReg, but it's not a full-featured
504 -- register, it can only be used for rip-relative addressing.
505 getRegister (CmmReg (CmmGlobal PicBaseReg))
506 = do
507 reg <- getPicBaseNat wordRep
508 return (Fixed wordRep reg nilOL)
509 #endif
510
511 getRegister (CmmReg reg)
512 = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL)
513
514 getRegister tree@(CmmRegOff _ _)
515 = getRegister (mangleIndexTree tree)
516
517
518 #if WORD_SIZE_IN_BITS==32
519 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
520 -- TO_W_(x), TO_W_(x >> 32)
521
522 getRegister (CmmMachOp (MO_U_Conv I64 I32)
523 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
524 ChildCode64 code rlo <- iselExpr64 x
525 return $ Fixed I32 (getHiVRegFromLo rlo) code
526
527 getRegister (CmmMachOp (MO_S_Conv I64 I32)
528 [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do
529 ChildCode64 code rlo <- iselExpr64 x
530 return $ Fixed I32 (getHiVRegFromLo rlo) code
531
532 getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do
533 ChildCode64 code rlo <- iselExpr64 x
534 return $ Fixed I32 rlo code
535
536 getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do
537 ChildCode64 code rlo <- iselExpr64 x
538 return $ Fixed I32 rlo code
539
540 #endif
541
542 -- end of machine-"independent" bit; here we go on the rest...
543
544 #if alpha_TARGET_ARCH
545
546 getRegister (StDouble d)
547 = getBlockIdNat `thenNat` \ lbl ->
548 getNewRegNat PtrRep `thenNat` \ tmp ->
549 let code dst = mkSeqInstrs [
550 LDATA RoDataSegment lbl [
551 DATA TF [ImmLab (rational d)]
552 ],
553 LDA tmp (AddrImm (ImmCLbl lbl)),
554 LD TF dst (AddrReg tmp)]
555 in
556 return (Any F64 code)
557
558 getRegister (StPrim primop [x]) -- unary PrimOps
559 = case primop of
560 IntNegOp -> trivialUCode (NEG Q False) x
561
562 NotOp -> trivialUCode NOT x
563
564 FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x
565 DoubleNegOp -> trivialUFCode F64 (FNEG TF) x
566
567 OrdOp -> coerceIntCode IntRep x
568 ChrOp -> chrCode x
569
570 Float2IntOp -> coerceFP2Int x
571 Int2FloatOp -> coerceInt2FP pr x
572 Double2IntOp -> coerceFP2Int x
573 Int2DoubleOp -> coerceInt2FP pr x
574
575 Double2FloatOp -> coerceFltCode x
576 Float2DoubleOp -> coerceFltCode x
577
578 other_op -> getRegister (StCall fn CCallConv F64 [x])
579 where
580 fn = case other_op of
581 FloatExpOp -> FSLIT("exp")
582 FloatLogOp -> FSLIT("log")
583 FloatSqrtOp -> FSLIT("sqrt")
584 FloatSinOp -> FSLIT("sin")
585 FloatCosOp -> FSLIT("cos")
586 FloatTanOp -> FSLIT("tan")
587 FloatAsinOp -> FSLIT("asin")
588 FloatAcosOp -> FSLIT("acos")
589 FloatAtanOp -> FSLIT("atan")
590 FloatSinhOp -> FSLIT("sinh")
591 FloatCoshOp -> FSLIT("cosh")
592 FloatTanhOp -> FSLIT("tanh")
593 DoubleExpOp -> FSLIT("exp")
594 DoubleLogOp -> FSLIT("log")
595 DoubleSqrtOp -> FSLIT("sqrt")
596 DoubleSinOp -> FSLIT("sin")
597 DoubleCosOp -> FSLIT("cos")
598 DoubleTanOp -> FSLIT("tan")
599 DoubleAsinOp -> FSLIT("asin")
600 DoubleAcosOp -> FSLIT("acos")
601 DoubleAtanOp -> FSLIT("atan")
602 DoubleSinhOp -> FSLIT("sinh")
603 DoubleCoshOp -> FSLIT("cosh")
604 DoubleTanhOp -> FSLIT("tanh")
605 where
606 pr = panic "MachCode.getRegister: no primrep needed for Alpha"
607
608 getRegister (StPrim primop [x, y]) -- dyadic PrimOps
609 = case primop of
610 CharGtOp -> trivialCode (CMP LTT) y x
611 CharGeOp -> trivialCode (CMP LE) y x
612 CharEqOp -> trivialCode (CMP EQQ) x y
613 CharNeOp -> int_NE_code x y
614 CharLtOp -> trivialCode (CMP LTT) x y
615 CharLeOp -> trivialCode (CMP LE) x y
616
617 IntGtOp -> trivialCode (CMP LTT) y x
618 IntGeOp -> trivialCode (CMP LE) y x
619 IntEqOp -> trivialCode (CMP EQQ) x y
620 IntNeOp -> int_NE_code x y
621 IntLtOp -> trivialCode (CMP LTT) x y
622 IntLeOp -> trivialCode (CMP LE) x y
623
624 WordGtOp -> trivialCode (CMP ULT) y x
625 WordGeOp -> trivialCode (CMP ULE) x y
626 WordEqOp -> trivialCode (CMP EQQ) x y
627 WordNeOp -> int_NE_code x y
628 WordLtOp -> trivialCode (CMP ULT) x y
629 WordLeOp -> trivialCode (CMP ULE) x y
630
631 AddrGtOp -> trivialCode (CMP ULT) y x
632 AddrGeOp -> trivialCode (CMP ULE) y x
633 AddrEqOp -> trivialCode (CMP EQQ) x y
634 AddrNeOp -> int_NE_code x y
635 AddrLtOp -> trivialCode (CMP ULT) x y
636 AddrLeOp -> trivialCode (CMP ULE) x y
637
638 FloatGtOp -> cmpF_code (FCMP TF LE) EQQ x y
639 FloatGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
640 FloatEqOp -> cmpF_code (FCMP TF EQQ) NE x y
641 FloatNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
642 FloatLtOp -> cmpF_code (FCMP TF LTT) NE x y
643 FloatLeOp -> cmpF_code (FCMP TF LE) NE x y
644
645 DoubleGtOp -> cmpF_code (FCMP TF LE) EQQ x y
646 DoubleGeOp -> cmpF_code (FCMP TF LTT) EQQ x y
647 DoubleEqOp -> cmpF_code (FCMP TF EQQ) NE x y
648 DoubleNeOp -> cmpF_code (FCMP TF EQQ) EQQ x y
649 DoubleLtOp -> cmpF_code (FCMP TF LTT) NE x y
650 DoubleLeOp -> cmpF_code (FCMP TF LE) NE x y
651
652 IntAddOp -> trivialCode (ADD Q False) x y
653 IntSubOp -> trivialCode (SUB Q False) x y
654 IntMulOp -> trivialCode (MUL Q False) x y
655 IntQuotOp -> trivialCode (DIV Q False) x y
656 IntRemOp -> trivialCode (REM Q False) x y
657
658 WordAddOp -> trivialCode (ADD Q False) x y
659 WordSubOp -> trivialCode (SUB Q False) x y
660 WordMulOp -> trivialCode (MUL Q False) x y
661 WordQuotOp -> trivialCode (DIV Q True) x y
662 WordRemOp -> trivialCode (REM Q True) x y
663
664 FloatAddOp -> trivialFCode FloatRep (FADD TF) x y
665 FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y
666 FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y
667 FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y
668
669 DoubleAddOp -> trivialFCode F64 (FADD TF) x y
670 DoubleSubOp -> trivialFCode F64 (FSUB TF) x y
671 DoubleMulOp -> trivialFCode F64 (FMUL TF) x y
672 DoubleDivOp -> trivialFCode F64 (FDIV TF) x y
673
674 AddrAddOp -> trivialCode (ADD Q False) x y
675 AddrSubOp -> trivialCode (SUB Q False) x y
676 AddrRemOp -> trivialCode (REM Q True) x y
677
678 AndOp -> trivialCode AND x y
679 OrOp -> trivialCode OR x y
680 XorOp -> trivialCode XOR x y
681 SllOp -> trivialCode SLL x y
682 SrlOp -> trivialCode SRL x y
683
684 ISllOp -> trivialCode SLL x y -- was: panic "AlphaGen:isll"
685 ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra"
686 ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl"
687
688 FloatPowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
689 DoublePowerOp -> getRegister (StCall FSLIT("pow") CCallConv F64 [x,y])
690 where
691 {- ------------------------------------------------------------
692 Some bizarre special code for getting condition codes into
693 registers. Integer non-equality is a test for equality
694 followed by an XOR with 1. (Integer comparisons always set
695 the result register to 0 or 1.) Floating point comparisons of
696 any kind leave the result in a floating point register, so we
697 need to wrangle an integer register out of things.
698 -}
699 int_NE_code :: StixTree -> StixTree -> NatM Register
700
701 int_NE_code x y
702 = trivialCode (CMP EQQ) x y `thenNat` \ register ->
703 getNewRegNat IntRep `thenNat` \ tmp ->
704 let
705 code = registerCode register tmp
706 src = registerName register tmp
707 code__2 dst = code . mkSeqInstr (XOR src (RIImm (ImmInt 1)) dst)
708 in
709 return (Any IntRep code__2)
710
711 {- ------------------------------------------------------------
712 Comments for int_NE_code also apply to cmpF_code
713 -}
714 cmpF_code
715 :: (Reg -> Reg -> Reg -> Instr)
716 -> Cond
717 -> StixTree -> StixTree
718 -> NatM Register
719
720 cmpF_code instr cond x y
721 = trivialFCode pr instr x y `thenNat` \ register ->
722 getNewRegNat F64 `thenNat` \ tmp ->
723 getBlockIdNat `thenNat` \ lbl ->
724 let
725 code = registerCode register tmp
726 result = registerName register tmp
727
728 code__2 dst = code . mkSeqInstrs [
729 OR zeroh (RIImm (ImmInt 1)) dst,
730 BF cond result (ImmCLbl lbl),
731 OR zeroh (RIReg zeroh) dst,
732 NEWBLOCK lbl]
733 in
734 return (Any IntRep code__2)
735 where
736 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
737 ------------------------------------------------------------
738
739 getRegister (CmmLoad pk mem)
740 = getAmode mem `thenNat` \ amode ->
741 let
742 code = amodeCode amode
743 src = amodeAddr amode
744 size = primRepToSize pk
745 code__2 dst = code . mkSeqInstr (LD size dst src)
746 in
747 return (Any pk code__2)
748
749 getRegister (StInt i)
750 | fits8Bits i
751 = let
752 code dst = mkSeqInstr (OR zeroh (RIImm src) dst)
753 in
754 return (Any IntRep code)
755 | otherwise
756 = let
757 code dst = mkSeqInstr (LDI Q dst src)
758 in
759 return (Any IntRep code)
760 where
761 src = ImmInt (fromInteger i)
762
763 getRegister leaf
764 | isJust imm
765 = let
766 code dst = mkSeqInstr (LDA dst (AddrImm imm__2))
767 in
768 return (Any PtrRep code)
769 where
770 imm = maybeImm leaf
771 imm__2 = case imm of Just x -> x
772
773 #endif /* alpha_TARGET_ARCH */
774
775 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
776
777 #if i386_TARGET_ARCH
778
779 getRegister (CmmLit (CmmFloat f F32)) = do
780 lbl <- getNewLabelNat
781 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
782 Amode addr addr_code <- getAmode dynRef
783 let code dst =
784 LDATA ReadOnlyData
785 [CmmDataLabel lbl,
786 CmmStaticLit (CmmFloat f F32)]
787 `consOL` (addr_code `snocOL`
788 GLD F32 addr dst)
789 -- in
790 return (Any F32 code)
791
792
793 getRegister (CmmLit (CmmFloat d F64))
794 | d == 0.0
795 = let code dst = unitOL (GLDZ dst)
796 in return (Any F64 code)
797
798 | d == 1.0
799 = let code dst = unitOL (GLD1 dst)
800 in return (Any F64 code)
801
802 | otherwise = do
803 lbl <- getNewLabelNat
804 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
805 Amode addr addr_code <- getAmode dynRef
806 let code dst =
807 LDATA ReadOnlyData
808 [CmmDataLabel lbl,
809 CmmStaticLit (CmmFloat d F64)]
810 `consOL` (addr_code `snocOL`
811 GLD F64 addr dst)
812 -- in
813 return (Any F64 code)
814
815 #endif /* i386_TARGET_ARCH */
816
817 #if x86_64_TARGET_ARCH
818
819 getRegister (CmmLit (CmmFloat 0.0 rep)) = do
820 let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst))
821 -- I don't know why there are xorpd, xorps, and pxor instructions.
822 -- They all appear to do the same thing --SDM
823 return (Any rep code)
824
825 getRegister (CmmLit (CmmFloat f rep)) = do
826 lbl <- getNewLabelNat
827 let code dst = toOL [
828 LDATA ReadOnlyData
829 [CmmDataLabel lbl,
830 CmmStaticLit (CmmFloat f rep)],
831 MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
832 ]
833 -- in
834 return (Any rep code)
835
836 #endif /* x86_64_TARGET_ARCH */
837
838 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
839
840 -- catch simple cases of zero- or sign-extended load
841 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do
842 code <- intLoadCode (MOVZxL I8) addr
843 return (Any I32 code)
844
845 getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do
846 code <- intLoadCode (MOVSxL I8) addr
847 return (Any I32 code)
848
849 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do
850 code <- intLoadCode (MOVZxL I16) addr
851 return (Any I32 code)
852
853 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do
854 code <- intLoadCode (MOVSxL I16) addr
855 return (Any I32 code)
856
857 #endif
858
859 #if x86_64_TARGET_ARCH
860
861 -- catch simple cases of zero- or sign-extended load
862 getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do
863 code <- intLoadCode (MOVZxL I8) addr
864 return (Any I64 code)
865
866 getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do
867 code <- intLoadCode (MOVSxL I8) addr
868 return (Any I64 code)
869
870 getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do
871 code <- intLoadCode (MOVZxL I16) addr
872 return (Any I64 code)
873
874 getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do
875 code <- intLoadCode (MOVSxL I16) addr
876 return (Any I64 code)
877
878 getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do
879 code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend
880 return (Any I64 code)
881
882 getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do
883 code <- intLoadCode (MOVSxL I32) addr
884 return (Any I64 code)
885
886 #endif
887
888 #if x86_64_TARGET_ARCH
889 getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
890 CmmLit displacement])
891 = return $ Any I64 (\dst -> unitOL $
892 LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
893 #endif
894
895 #if x86_64_TARGET_ARCH
896 getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do
897 x_code <- getAnyReg x
898 lbl <- getNewLabelNat
899 let
900 code dst = x_code dst `appOL` toOL [
901 -- This is how gcc does it, so it can't be that bad:
902 LDATA ReadOnlyData16 [
903 CmmAlign 16,
904 CmmDataLabel lbl,
905 CmmStaticLit (CmmInt 0x80000000 I32),
906 CmmStaticLit (CmmInt 0 I32),
907 CmmStaticLit (CmmInt 0 I32),
908 CmmStaticLit (CmmInt 0 I32)
909 ],
910 XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
911 -- xorps, so we need the 128-bit constant
912 -- ToDo: rip-relative
913 ]
914 --
915 return (Any F32 code)
916
917 getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do
918 x_code <- getAnyReg x
919 lbl <- getNewLabelNat
920 let
921 -- This is how gcc does it, so it can't be that bad:
922 code dst = x_code dst `appOL` toOL [
923 LDATA ReadOnlyData16 [
924 CmmAlign 16,
925 CmmDataLabel lbl,
926 CmmStaticLit (CmmInt 0x8000000000000000 I64),
927 CmmStaticLit (CmmInt 0 I64)
928 ],
929 -- gcc puts an unpck here. Wonder if we need it.
930 XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst)
931 -- xorpd, so we need the 128-bit constant
932 ]
933 --
934 return (Any F64 code)
935 #endif
936
937 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
938
939 getRegister (CmmMachOp mop [x]) -- unary MachOps
940 = case mop of
941 #if i386_TARGET_ARCH
942 MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x
943 MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x
944 #endif
945
946 MO_S_Neg rep -> trivialUCode rep (NEGI rep) x
947 MO_Not rep -> trivialUCode rep (NOT rep) x
948
949 -- Nop conversions
950 MO_U_Conv I32 I8 -> toI8Reg I32 x
951 MO_S_Conv I32 I8 -> toI8Reg I32 x
952 MO_U_Conv I16 I8 -> toI8Reg I16 x
953 MO_S_Conv I16 I8 -> toI8Reg I16 x
954 MO_U_Conv I32 I16 -> toI16Reg I32 x
955 MO_S_Conv I32 I16 -> toI16Reg I32 x
956 #if x86_64_TARGET_ARCH
957 MO_U_Conv I64 I32 -> conversionNop I64 x
958 MO_S_Conv I64 I32 -> conversionNop I64 x
959 MO_U_Conv I64 I16 -> toI16Reg I64 x
960 MO_S_Conv I64 I16 -> toI16Reg I64 x
961 MO_U_Conv I64 I8 -> toI8Reg I64 x
962 MO_S_Conv I64 I8 -> toI8Reg I64 x
963 #endif
964
965 MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
966 MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x
967
968 -- widenings
969 MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x
970 MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x
971 MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x
972
973 MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x
974 MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x
975 MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x
976
977 #if x86_64_TARGET_ARCH
978 MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x
979 MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x
980 MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x
981 MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x
982 MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x
983 MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x
984 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
985 -- However, we don't want the register allocator to throw it
986 -- away as an unnecessary reg-to-reg move, so we keep it in
987 -- the form of a movzl and print it as a movl later.
988 #endif
989
990 #if i386_TARGET_ARCH
991 MO_S_Conv F32 F64 -> conversionNop F64 x
992 MO_S_Conv F64 F32 -> conversionNop F32 x
993 #else
994 MO_S_Conv F32 F64 -> coerceFP2FP F64 x
995 MO_S_Conv F64 F32 -> coerceFP2FP F32 x
996 #endif
997
998 MO_S_Conv from to
999 | isFloatingRep from -> coerceFP2Int from to x
1000 | isFloatingRep to -> coerceInt2FP from to x
1001
1002 other -> pprPanic "getRegister" (pprMachOp mop)
1003 where
1004 -- signed or unsigned extension.
1005 integerExtend from to instr expr = do
1006 (reg,e_code) <- if from == I8 then getByteReg expr
1007 else getSomeReg expr
1008 let
1009 code dst =
1010 e_code `snocOL`
1011 instr from (OpReg reg) (OpReg dst)
1012 return (Any to code)
1013
1014 toI8Reg new_rep expr
1015 = do codefn <- getAnyReg expr
1016 return (Any new_rep codefn)
1017 -- HACK: use getAnyReg to get a byte-addressable register.
1018 -- If the source was a Fixed register, this will add the
1019 -- mov instruction to put it into the desired destination.
1020 -- We're assuming that the destination won't be a fixed
1021 -- non-byte-addressable register; it won't be, because all
1022 -- fixed registers are word-sized.
1023
1024 toI16Reg = toI8Reg -- for now
1025
1026 conversionNop new_rep expr
1027 = do e_code <- getRegister expr
1028 return (swizzleRegisterRep e_code new_rep)
1029
1030
1031 getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps
1032 = ASSERT2(cmmExprRep x /= I8, pprExpr e)
1033 case mop of
1034 MO_Eq F32 -> condFltReg EQQ x y
1035 MO_Ne F32 -> condFltReg NE x y
1036 MO_S_Gt F32 -> condFltReg GTT x y
1037 MO_S_Ge F32 -> condFltReg GE x y
1038 MO_S_Lt F32 -> condFltReg LTT x y
1039 MO_S_Le F32 -> condFltReg LE x y
1040
1041 MO_Eq F64 -> condFltReg EQQ x y
1042 MO_Ne F64 -> condFltReg NE x y
1043 MO_S_Gt F64 -> condFltReg GTT x y
1044 MO_S_Ge F64 -> condFltReg GE x y
1045 MO_S_Lt F64 -> condFltReg LTT x y
1046 MO_S_Le F64 -> condFltReg LE x y
1047
1048 MO_Eq rep -> condIntReg EQQ x y
1049 MO_Ne rep -> condIntReg NE x y
1050
1051 MO_S_Gt rep -> condIntReg GTT x y
1052 MO_S_Ge rep -> condIntReg GE x y
1053 MO_S_Lt rep -> condIntReg LTT x y
1054 MO_S_Le rep -> condIntReg LE x y
1055
1056 MO_U_Gt rep -> condIntReg GU x y
1057 MO_U_Ge rep -> condIntReg GEU x y
1058 MO_U_Lt rep -> condIntReg LU x y
1059 MO_U_Le rep -> condIntReg LEU x y
1060
1061 #if i386_TARGET_ARCH
1062 MO_Add F32 -> trivialFCode F32 GADD x y
1063 MO_Sub F32 -> trivialFCode F32 GSUB x y
1064
1065 MO_Add F64 -> trivialFCode F64 GADD x y
1066 MO_Sub F64 -> trivialFCode F64 GSUB x y
1067
1068 MO_S_Quot F32 -> trivialFCode F32 GDIV x y
1069 MO_S_Quot F64 -> trivialFCode F64 GDIV x y
1070 #endif
1071
1072 #if x86_64_TARGET_ARCH
1073 MO_Add F32 -> trivialFCode F32 ADD x y
1074 MO_Sub F32 -> trivialFCode F32 SUB x y
1075
1076 MO_Add F64 -> trivialFCode F64 ADD x y
1077 MO_Sub F64 -> trivialFCode F64 SUB x y
1078
1079 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1080 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1081 #endif
1082
1083 MO_Add rep -> add_code rep x y
1084 MO_Sub rep -> sub_code rep x y
1085
1086 MO_S_Quot rep -> div_code rep True True x y
1087 MO_S_Rem rep -> div_code rep True False x y
1088 MO_U_Quot rep -> div_code rep False True x y
1089 MO_U_Rem rep -> div_code rep False False x y
1090
1091 #if i386_TARGET_ARCH
1092 MO_Mul F32 -> trivialFCode F32 GMUL x y
1093 MO_Mul F64 -> trivialFCode F64 GMUL x y
1094 #endif
1095
1096 #if x86_64_TARGET_ARCH
1097 MO_Mul F32 -> trivialFCode F32 MUL x y
1098 MO_Mul F64 -> trivialFCode F64 MUL x y
1099 #endif
1100
1101 MO_Mul rep -> let op = IMUL rep in
1102 trivialCode rep op (Just op) x y
1103
1104 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1105
1106 MO_And rep -> let op = AND rep in
1107 trivialCode rep op (Just op) x y
1108 MO_Or rep -> let op = OR rep in
1109 trivialCode rep op (Just op) x y
1110 MO_Xor rep -> let op = XOR rep in
1111 trivialCode rep op (Just op) x y
1112
1113 {- Shift ops on x86s have constraints on their source, it
1114 either has to be Imm, CL or 1
1115 => trivialCode is not restrictive enough (sigh.)
1116 -}
1117 MO_Shl rep -> shift_code rep (SHL rep) x y {-False-}
1118 MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-}
1119 MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-}
1120
1121 other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
1122 where
1123 --------------------
1124 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1125 imulMayOflo rep a b = do
1126 (a_reg, a_code) <- getNonClobberedReg a
1127 b_code <- getAnyReg b
1128 let
1129 shift_amt = case rep of
1130 I32 -> 31
1131 I64 -> 63
1132 _ -> panic "shift_amt"
1133
1134 code = a_code `appOL` b_code eax `appOL`
1135 toOL [
1136 IMUL2 rep (OpReg a_reg), -- result in %edx:%eax
1137 SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax),
1138 -- sign extend lower part
1139 SUB rep (OpReg edx) (OpReg eax)
1140 -- compare against upper
1141 -- eax==0 if high part == sign extended low part
1142 ]
1143 -- in
1144 return (Fixed rep eax code)
1145
1146 --------------------
1147 shift_code :: MachRep
1148 -> (Operand -> Operand -> Instr)
1149 -> CmmExpr
1150 -> CmmExpr
1151 -> NatM Register
1152
1153 {- Case1: shift length as immediate -}
1154 shift_code rep instr x y@(CmmLit lit) = do
1155 x_code <- getAnyReg x
1156 let
1157 code dst
1158 = x_code dst `snocOL`
1159 instr (OpImm (litToImm lit)) (OpReg dst)
1160 -- in
1161 return (Any rep code)
1162
1163 {- Case2: shift length is complex (non-immediate)
1164 * y must go in %ecx.
1165 * we cannot do y first *and* put its result in %ecx, because
1166 %ecx might be clobbered by x.
1167 * if we do y second, then x cannot be
1168 in a clobbered reg. Also, we cannot clobber x's reg
1169 with the instruction itself.
1170 * so we can either:
1171 - do y first, put its result in a fresh tmp, then copy it to %ecx later
1172 - do y second and put its result into %ecx. x gets placed in a fresh
1173 tmp. This is likely to be better, becuase the reg alloc can
1174 eliminate this reg->reg move here (it won't eliminate the other one,
1175 because the move is into the fixed %ecx).
1176 -}
1177 shift_code rep instr x y{-amount-} = do
1178 x_code <- getAnyReg x
1179 tmp <- getNewRegNat rep
1180 y_code <- getAnyReg y
1181 let
1182 code = x_code tmp `appOL`
1183 y_code ecx `snocOL`
1184 instr (OpReg ecx) (OpReg tmp)
1185 -- in
1186 return (Fixed rep tmp code)
1187
1188 --------------------
1189 add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1190 add_code rep x (CmmLit (CmmInt y _))
1191 | not (is64BitInteger y) = add_int rep x y
1192 add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y
1193
1194 --------------------
1195 sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1196 sub_code rep x (CmmLit (CmmInt y _))
1197 | not (is64BitInteger (-y)) = add_int rep x (-y)
1198 sub_code rep x y = trivialCode rep (SUB rep) Nothing x y
1199
1200 -- our three-operand add instruction:
1201 add_int rep x y = do
1202 (x_reg, x_code) <- getSomeReg x
1203 let
1204 imm = ImmInt (fromInteger y)
1205 code dst
1206 = x_code `snocOL`
1207 LEA rep
1208 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
1209 (OpReg dst)
1210 --
1211 return (Any rep code)
1212
1213 ----------------------
1214 div_code rep signed quotient x y = do
1215 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
1216 x_code <- getAnyReg x
1217 let
1218 widen | signed = CLTD rep
1219 | otherwise = XOR rep (OpReg edx) (OpReg edx)
1220
1221 instr | signed = IDIV
1222 | otherwise = DIV
1223
1224 code = y_code `appOL`
1225 x_code eax `appOL`
1226 toOL [widen, instr rep y_op]
1227
1228 result | quotient = eax
1229 | otherwise = edx
1230
1231 -- in
1232 return (Fixed rep result code)
1233
1234
1235 getRegister (CmmLoad mem pk)
1236 | isFloatingRep pk
1237 = do
1238 Amode src mem_code <- getAmode mem
1239 let
1240 code dst = mem_code `snocOL`
1241 IF_ARCH_i386(GLD pk src dst,
1242 MOV pk (OpAddr src) (OpReg dst))
1243 --
1244 return (Any pk code)
1245
1246 #if i386_TARGET_ARCH
1247 getRegister (CmmLoad mem pk)
1248 | pk /= I64
1249 = do
1250 code <- intLoadCode (instr pk) mem
1251 return (Any pk code)
1252 where
1253 instr I8 = MOVZxL pk
1254 instr I16 = MOV I16
1255 instr I32 = MOV I32
1256 -- we always zero-extend 8-bit loads, if we
1257 -- can't think of anything better. This is because
1258 -- we can't guarantee access to an 8-bit variant of every register
1259 -- (esi and edi don't have 8-bit variants), so to make things
1260 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
1261 #endif
1262
1263 #if x86_64_TARGET_ARCH
1264 -- Simpler memory load code on x86_64
1265 getRegister (CmmLoad mem pk)
1266 = do
1267 code <- intLoadCode (MOV pk) mem
1268 return (Any pk code)
1269 #endif
1270
1271 getRegister (CmmLit (CmmInt 0 rep))
1272 = let
1273 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
1274 adj_rep = case rep of I64 -> I32; _ -> rep
1275 rep1 = IF_ARCH_i386( rep, adj_rep )
1276 code dst
1277 = unitOL (XOR rep1 (OpReg dst) (OpReg dst))
1278 in
1279 return (Any rep code)
1280
1281 #if x86_64_TARGET_ARCH
1282 -- optimisation for loading small literals on x86_64: take advantage
1283 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
1284 -- instruction forms are shorter.
1285 getRegister (CmmLit lit)
1286 | I64 <- cmmLitRep lit, not (isBigLit lit)
1287 = let
1288 imm = litToImm lit
1289 code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst))
1290 in
1291 return (Any I64 code)
1292 where
1293 isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff
1294 isBigLit _ = False
1295 -- note1: not the same as is64BitLit, because that checks for
1296 -- signed literals that fit in 32 bits, but we want unsigned
1297 -- literals here.
1298 -- note2: all labels are small, because we're assuming the
1299 -- small memory model (see gcc docs, -mcmodel=small).
1300 #endif
1301
1302 getRegister (CmmLit lit)
1303 = let
1304 rep = cmmLitRep lit
1305 imm = litToImm lit
1306 code dst = unitOL (MOV rep (OpImm imm) (OpReg dst))
1307 in
1308 return (Any rep code)
1309
1310 getRegister other = pprPanic "getRegister(x86)" (ppr other)
1311
1312
1313 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1314 -> NatM (Reg -> InstrBlock)
1315 intLoadCode instr mem = do
1316 Amode src mem_code <- getAmode mem
1317 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1318
1319 -- Compute an expression into *any* register, adding the appropriate
1320 -- move instruction if necessary.
1321 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1322 getAnyReg expr = do
1323 r <- getRegister expr
1324 anyReg r
1325
1326 anyReg :: Register -> NatM (Reg -> InstrBlock)
1327 anyReg (Any _ code) = return code
1328 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1329
1330 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1331 -- Fixed registers might not be byte-addressable, so we make sure we've
1332 -- got a temporary, inserting an extra reg copy if necessary.
1333 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1334 #if x86_64_TARGET_ARCH
1335 getByteReg = getSomeReg -- all regs are byte-addressable on x86_64
1336 #else
1337 getByteReg expr = do
1338 r <- getRegister expr
1339 case r of
1340 Any rep code -> do
1341 tmp <- getNewRegNat rep
1342 return (tmp, code tmp)
1343 Fixed rep reg code
1344 | isVirtualReg reg -> return (reg,code)
1345 | otherwise -> do
1346 tmp <- getNewRegNat rep
1347 return (tmp, code `snocOL` reg2reg rep reg tmp)
1348 -- ToDo: could optimise slightly by checking for byte-addressable
1349 -- real registers, but that will happen very rarely if at all.
1350 #endif
1351
1352 -- Another variant: this time we want the result in a register that cannot
1353 -- be modified by code to evaluate an arbitrary expression.
1354 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1355 getNonClobberedReg expr = do
1356 r <- getRegister expr
1357 case r of
1358 Any rep code -> do
1359 tmp <- getNewRegNat rep
1360 return (tmp, code tmp)
1361 Fixed rep reg code
1362 -- only free regs can be clobbered
1363 | RealReg rr <- reg, isFastTrue (freeReg rr) -> do
1364 tmp <- getNewRegNat rep
1365 return (tmp, code `snocOL` reg2reg rep reg tmp)
1366 | otherwise ->
1367 return (reg, code)
1368
1369 reg2reg :: MachRep -> Reg -> Reg -> Instr
1370 reg2reg rep src dst
1371 #if i386_TARGET_ARCH
1372 | isFloatingRep rep = GMOV src dst
1373 #endif
1374 | otherwise = MOV rep (OpReg src) (OpReg dst)
1375
1376 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1377
1378 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1379
1380 #if sparc_TARGET_ARCH
1381
1382 getRegister (CmmLit (CmmFloat f F32)) = do
1383 lbl <- getNewLabelNat
1384 let code dst = toOL [
1385 LDATA ReadOnlyData
1386 [CmmDataLabel lbl,
1387 CmmStaticLit (CmmFloat f F32)],
1388 SETHI (HI (ImmCLbl lbl)) dst,
1389 LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1390 return (Any F32 code)
1391
1392 getRegister (CmmLit (CmmFloat d F64)) = do
1393 lbl <- getNewLabelNat
1394 let code dst = toOL [
1395 LDATA ReadOnlyData
1396 [CmmDataLabel lbl,
1397 CmmStaticLit (CmmFloat d F64)],
1398 SETHI (HI (ImmCLbl lbl)) dst,
1399 LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst]
1400 return (Any F64 code)
1401
1402 getRegister (CmmMachOp mop [x]) -- unary MachOps
1403 = case mop of
1404 MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x
1405 MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x
1406
1407 MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x
1408 MO_Not rep -> trivialUCode rep (XNOR False g0) x
1409
1410 MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8))
1411
1412 MO_U_Conv F64 F32-> coerceDbl2Flt x
1413 MO_U_Conv F32 F64-> coerceFlt2Dbl x
1414
1415 MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x
1416 MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x
1417 MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x
1418 MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x
1419
1420 -- Conversions which are a nop on sparc
1421 MO_U_Conv from to
1422 | from == to -> conversionNop to x
1423 MO_U_Conv I32 to -> conversionNop to x
1424 MO_S_Conv I32 to -> conversionNop to x
1425
1426 -- widenings
1427 MO_U_Conv I8 I32 -> integerExtend False I8 I32 x
1428 MO_U_Conv I16 I32 -> integerExtend False I16 I32 x
1429 MO_U_Conv I8 I16 -> integerExtend False I8 I16 x
1430 MO_S_Conv I16 I32 -> integerExtend True I16 I32 x
1431
1432 other_op -> panic "Unknown unary mach op"
1433 where
1434 -- XXX SLL/SRL?
1435 integerExtend signed from to expr = do
1436 (reg, e_code) <- getSomeReg expr
1437 let
1438 code dst =
1439 e_code `snocOL`
1440 ((if signed then SRA else SRL)
1441 reg (RIImm (ImmInt 0)) dst)
1442 return (Any to code)
1443 conversionNop new_rep expr
1444 = do e_code <- getRegister expr
1445 return (swizzleRegisterRep e_code new_rep)
1446
1447 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1448 = case mop of
1449 MO_Eq F32 -> condFltReg EQQ x y
1450 MO_Ne F32 -> condFltReg NE x y
1451
1452 MO_S_Gt F32 -> condFltReg GTT x y
1453 MO_S_Ge F32 -> condFltReg GE x y
1454 MO_S_Lt F32 -> condFltReg LTT x y
1455 MO_S_Le F32 -> condFltReg LE x y
1456
1457 MO_Eq F64 -> condFltReg EQQ x y
1458 MO_Ne F64 -> condFltReg NE x y
1459
1460 MO_S_Gt F64 -> condFltReg GTT x y
1461 MO_S_Ge F64 -> condFltReg GE x y
1462 MO_S_Lt F64 -> condFltReg LTT x y
1463 MO_S_Le F64 -> condFltReg LE x y
1464
1465 MO_Eq rep -> condIntReg EQQ x y
1466 MO_Ne rep -> condIntReg NE x y
1467
1468 MO_S_Gt rep -> condIntReg GTT x y
1469 MO_S_Ge rep -> condIntReg GE x y
1470 MO_S_Lt rep -> condIntReg LTT x y
1471 MO_S_Le rep -> condIntReg LE x y
1472
1473 MO_U_Gt I32 -> condIntReg GTT x y
1474 MO_U_Ge I32 -> condIntReg GE x y
1475 MO_U_Lt I32 -> condIntReg LTT x y
1476 MO_U_Le I32 -> condIntReg LE x y
1477
1478 MO_U_Gt I16 -> condIntReg GU x y
1479 MO_U_Ge I16 -> condIntReg GEU x y
1480 MO_U_Lt I16 -> condIntReg LU x y
1481 MO_U_Le I16 -> condIntReg LEU x y
1482
1483 MO_Add I32 -> trivialCode I32 (ADD False False) x y
1484 MO_Sub I32 -> trivialCode I32 (SUB False False) x y
1485
1486 MO_S_MulMayOflo rep -> imulMayOflo rep x y
1487 {-
1488 -- ToDo: teach about V8+ SPARC div instructions
1489 MO_S_Quot I32 -> idiv FSLIT(".div") x y
1490 MO_S_Rem I32 -> idiv FSLIT(".rem") x y
1491 MO_U_Quot I32 -> idiv FSLIT(".udiv") x y
1492 MO_U_Rem I32 -> idiv FSLIT(".urem") x y
1493 -}
1494 MO_Add F32 -> trivialFCode F32 FADD x y
1495 MO_Sub F32 -> trivialFCode F32 FSUB x y
1496 MO_Mul F32 -> trivialFCode F32 FMUL x y
1497 MO_S_Quot F32 -> trivialFCode F32 FDIV x y
1498
1499 MO_Add F64 -> trivialFCode F64 FADD x y
1500 MO_Sub F64 -> trivialFCode F64 FSUB x y
1501 MO_Mul F64 -> trivialFCode F64 FMUL x y
1502 MO_S_Quot F64 -> trivialFCode F64 FDIV x y
1503
1504 MO_And rep -> trivialCode rep (AND False) x y
1505 MO_Or rep -> trivialCode rep (OR False) x y
1506 MO_Xor rep -> trivialCode rep (XOR False) x y
1507
1508 MO_Mul rep -> trivialCode rep (SMUL False) x y
1509
1510 MO_Shl rep -> trivialCode rep SLL x y
1511 MO_U_Shr rep -> trivialCode rep SRL x y
1512 MO_S_Shr rep -> trivialCode rep SRA x y
1513
1514 {-
1515 MO_F32_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1516 [promote x, promote y])
1517 where promote x = CmmMachOp MO_F32_to_Dbl [x]
1518 MO_F64_Pwr -> getRegister (StCall (Left FSLIT("pow")) CCallConv F64
1519 [x, y])
1520 -}
1521 other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
1522 where
1523 --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y])
1524
1525 --------------------
1526 imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register
1527 imulMayOflo rep a b = do
1528 (a_reg, a_code) <- getSomeReg a
1529 (b_reg, b_code) <- getSomeReg b
1530 res_lo <- getNewRegNat I32
1531 res_hi <- getNewRegNat I32
1532 let
1533 shift_amt = case rep of
1534 I32 -> 31
1535 I64 -> 63
1536 _ -> panic "shift_amt"
1537 code dst = a_code `appOL` b_code `appOL`
1538 toOL [
1539 SMUL False a_reg (RIReg b_reg) res_lo,
1540 RDY res_hi,
1541 SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
1542 SUB False False res_lo (RIReg res_hi) dst
1543 ]
1544 return (Any I32 code)
1545
1546 getRegister (CmmLoad mem pk) = do
1547 Amode src code <- getAmode mem
1548 let
1549 code__2 dst = code `snocOL` LD pk src dst
1550 return (Any pk code__2)
1551
1552 getRegister (CmmLit (CmmInt i _))
1553 | fits13Bits i
1554 = let
1555 src = ImmInt (fromInteger i)
1556 code dst = unitOL (OR False g0 (RIImm src) dst)
1557 in
1558 return (Any I32 code)
1559
1560 getRegister (CmmLit lit)
1561 = let rep = cmmLitRep lit
1562 imm = litToImm lit
1563 code dst = toOL [
1564 SETHI (HI imm) dst,
1565 OR False dst (RIImm (LO imm)) dst]
1566 in return (Any I32 code)
1567
1568 #endif /* sparc_TARGET_ARCH */
1569
1570 #if powerpc_TARGET_ARCH
1571 getRegister (CmmLoad mem pk)
1572 | pk /= I64
1573 = do
1574 Amode addr addr_code <- getAmode mem
1575 let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk)
1576 addr_code `snocOL` LD pk dst addr
1577 return (Any pk code)
1578
1579 -- catch simple cases of zero- or sign-extended load
1580 getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do
1581 Amode addr addr_code <- getAmode mem
1582 return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr))
1583
1584 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
1585
1586 getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do
1587 Amode addr addr_code <- getAmode mem
1588 return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr))
1589
1590 getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do
1591 Amode addr addr_code <- getAmode mem
1592 return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr))
1593
1594 getRegister (CmmMachOp mop [x]) -- unary MachOps
1595 = case mop of
1596 MO_Not rep -> trivialUCode rep NOT x
1597
1598 MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x
1599 MO_S_Conv F32 F64 -> conversionNop F64 x
1600
1601 MO_S_Conv from to
1602 | from == to -> conversionNop to x
1603 | isFloatingRep from -> coerceFP2Int from to x
1604 | isFloatingRep to -> coerceInt2FP from to x
1605
1606 -- narrowing is a nop: we treat the high bits as undefined
1607 MO_S_Conv I32 to -> conversionNop to x
1608 MO_S_Conv I16 I8 -> conversionNop I8 x
1609 MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x
1610 MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x
1611
1612 MO_U_Conv from to
1613 | from == to -> conversionNop to x
1614 -- narrowing is a nop: we treat the high bits as undefined
1615 MO_U_Conv I32 to -> conversionNop to x
1616 MO_U_Conv I16 I8 -> conversionNop I8 x
1617 MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32))
1618 MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32))
1619
1620 MO_S_Neg F32 -> trivialUCode F32 FNEG x
1621 MO_S_Neg F64 -> trivialUCode F64 FNEG x
1622 MO_S_Neg rep -> trivialUCode rep NEG x
1623
1624 where
1625 conversionNop new_rep expr
1626 = do e_code <- getRegister expr
1627 return (swizzleRegisterRep e_code new_rep)
1628
1629 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
1630 = case mop of
1631 MO_Eq F32 -> condFltReg EQQ x y
1632 MO_Ne F32 -> condFltReg NE x y
1633
1634 MO_S_Gt F32 -> condFltReg GTT x y
1635 MO_S_Ge F32 -> condFltReg GE x y
1636 MO_S_Lt F32 -> condFltReg LTT x y
1637 MO_S_Le F32 -> condFltReg LE x y
1638
1639 MO_Eq F64 -> condFltReg EQQ x y
1640 MO_Ne F64 -> condFltReg NE x y
1641
1642 MO_S_Gt F64 -> condFltReg GTT x y
1643 MO_S_Ge F64 -> condFltReg GE x y
1644 MO_S_Lt F64 -> condFltReg LTT x y
1645 MO_S_Le F64 -> condFltReg LE x y
1646
1647 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
1648 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
1649
1650 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
1651 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
1652 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
1653 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
1654
1655 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
1656 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
1657 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
1658 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
1659
1660 MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y
1661 MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y
1662 MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y
1663 MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y
1664
1665 MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y
1666 MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y
1667 MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y
1668 MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y
1669
1670 -- optimize addition with 32-bit immediate
1671 -- (needed for PIC)
1672 MO_Add I32 ->
1673 case y of
1674 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm)
1675 -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep)
1676 CmmLit lit
1677 -> do
1678 (src, srcCode) <- getSomeReg x
1679 let imm = litToImm lit
1680 code dst = srcCode `appOL` toOL [
1681 ADDIS dst src (HA imm),
1682 ADD dst dst (RIImm (LO imm))
1683 ]
1684 return (Any I32 code)
1685 _ -> trivialCode I32 True ADD x y
1686
1687 MO_Add rep -> trivialCode rep True ADD x y
1688 MO_Sub rep ->
1689 case y of -- subfi ('substract from' with immediate) doesn't exist
1690 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
1691 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
1692 _ -> trivialCodeNoImm rep SUBF y x
1693
1694 MO_Mul rep -> trivialCode rep True MULLW x y
1695
1696 MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y
1697
1698 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented"
1699 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
1700
1701 MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1702 MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1703
1704 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
1705 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
1706
1707 MO_And rep -> trivialCode rep False AND x y
1708 MO_Or rep -> trivialCode rep False OR x y
1709 MO_Xor rep -> trivialCode rep False XOR x y
1710
1711 MO_Shl rep -> trivialCode rep False SLW x y
1712 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
1713 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
1714
1715 getRegister (CmmLit (CmmInt i rep))
1716 | Just imm <- makeImmediate rep True i
1717 = let
1718 code dst = unitOL (LI dst imm)
1719 in
1720 return (Any rep code)
1721
1722 getRegister (CmmLit (CmmFloat f frep)) = do
1723 lbl <- getNewLabelNat
1724 dynRef <- cmmMakeDynamicReference addImportNat DataReference lbl
1725 Amode addr addr_code <- getAmode dynRef
1726 let code dst =
1727 LDATA ReadOnlyData [CmmDataLabel lbl,
1728 CmmStaticLit (CmmFloat f frep)]
1729 `consOL` (addr_code `snocOL` LD frep dst addr)
1730 return (Any frep code)
1731
1732 getRegister (CmmLit lit)
1733 = let rep = cmmLitRep lit
1734 imm = litToImm lit
1735 code dst = toOL [
1736 LIS dst (HI imm),
1737 OR dst dst (RIImm (LO imm))
1738 ]
1739 in return (Any rep code)
1740
1741 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
1742
1743 -- extend?Rep: wrap integer expression of type rep
1744 -- in a conversion to I32
1745 extendSExpr I32 x = x
1746 extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x]
1747 extendUExpr I32 x = x
1748 extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x]
1749
1750 #endif /* powerpc_TARGET_ARCH */
1751
1752
1753 -- -----------------------------------------------------------------------------
1754 -- The 'Amode' type: Memory addressing modes passed up the tree.
1755
1756 data Amode = Amode AddrMode InstrBlock
1757
1758 {-
1759 Now, given a tree (the argument to an CmmLoad) that references memory,
1760 produce a suitable addressing mode.
1761
1762 A Rule of the Game (tm) for Amodes: use of the addr bit must
1763 immediately follow use of the code part, since the code part puts
1764 values in registers which the addr then refers to. So you can't put
1765 anything in between, lest it overwrite some of those registers. If
1766 you need to do some other computation between the code part and use of
1767 the addr bit, first store the effective address from the amode in a
1768 temporary, then do the other computation, and then use the temporary:
1769
1770 code
1771 LEA amode, tmp
1772 ... other computation ...
1773 ... (tmp) ...
1774 -}
1775
1776 getAmode :: CmmExpr -> NatM Amode
1777 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
1778
1779 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1780
1781 #if alpha_TARGET_ARCH
1782
1783 getAmode (StPrim IntSubOp [x, StInt i])
1784 = getNewRegNat PtrRep `thenNat` \ tmp ->
1785 getRegister x `thenNat` \ register ->
1786 let
1787 code = registerCode register tmp
1788 reg = registerName register tmp
1789 off = ImmInt (-(fromInteger i))
1790 in
1791 return (Amode (AddrRegImm reg off) code)
1792
1793 getAmode (StPrim IntAddOp [x, StInt i])
1794 = getNewRegNat PtrRep `thenNat` \ tmp ->
1795 getRegister x `thenNat` \ register ->
1796 let
1797 code = registerCode register tmp
1798 reg = registerName register tmp
1799 off = ImmInt (fromInteger i)
1800 in
1801 return (Amode (AddrRegImm reg off) code)
1802
1803 getAmode leaf
1804 | isJust imm
1805 = return (Amode (AddrImm imm__2) id)
1806 where
1807 imm = maybeImm leaf
1808 imm__2 = case imm of Just x -> x
1809
1810 getAmode other
1811 = getNewRegNat PtrRep `thenNat` \ tmp ->
1812 getRegister other `thenNat` \ register ->
1813 let
1814 code = registerCode register tmp
1815 reg = registerName register tmp
1816 in
1817 return (Amode (AddrReg reg) code)
1818
1819 #endif /* alpha_TARGET_ARCH */
1820
1821 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1822
1823 #if x86_64_TARGET_ARCH
1824
1825 getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg),
1826 CmmLit displacement])
1827 = return $ Amode (ripRel (litToImm displacement)) nilOL
1828
1829 #endif
1830
1831 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1832
1833 -- This is all just ridiculous, since it carefully undoes
1834 -- what mangleIndexTree has just done.
1835 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)])
1836 | not (is64BitLit lit)
1837 -- ASSERT(rep == I32)???
1838 = do (x_reg, x_code) <- getSomeReg x
1839 let off = ImmInt (-(fromInteger i))
1840 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1841
1842 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)])
1843 | not (is64BitLit lit)
1844 -- ASSERT(rep == I32)???
1845 = do (x_reg, x_code) <- getSomeReg x
1846 let off = ImmInt (fromInteger i)
1847 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1848
1849 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1850 -- recognised by the next rule.
1851 getAmode (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1852 b@(CmmLit _)])
1853 = getAmode (CmmMachOp (MO_Add rep) [b,a])
1854
1855 getAmode (CmmMachOp (MO_Add rep) [x, CmmMachOp (MO_Shl _)
1856 [y, CmmLit (CmmInt shift _)]])
1857 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1858 = x86_complex_amode x y shift 0
1859
1860 getAmode (CmmMachOp (MO_Add rep)
1861 [x, CmmMachOp (MO_Add _)
1862 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1863 CmmLit (CmmInt offset _)]])
1864 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1865 && not (is64BitInteger offset)
1866 = x86_complex_amode x y shift offset
1867
1868 getAmode (CmmMachOp (MO_Add rep) [x,y])
1869 = x86_complex_amode x y 0 0
1870
1871 getAmode (CmmLit lit) | not (is64BitLit lit)
1872 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1873
1874 getAmode expr = do
1875 (reg,code) <- getSomeReg expr
1876 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1877
1878
1879 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1880 x86_complex_amode base index shift offset
1881 = do (x_reg, x_code) <- getNonClobberedReg base
1882 -- x must be in a temp, because it has to stay live over y_code
1883 -- we could compre x_reg and y_reg and do something better here...
1884 (y_reg, y_code) <- getSomeReg index
1885 let
1886 code = x_code `appOL` y_code
1887 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8
1888 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1889 code)
1890
1891 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
1892
1893 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1894
1895 #if sparc_TARGET_ARCH
1896
1897 getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit (CmmInt i _)])
1898 | fits13Bits (-i)
1899 = do
1900 (reg, code) <- getSomeReg x
1901 let
1902 off = ImmInt (-(fromInteger i))
1903 return (Amode (AddrRegImm reg off) code)
1904
1905
1906 getAmode (CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt i _)])
1907 | fits13Bits i
1908 = do
1909 (reg, code) <- getSomeReg x
1910 let
1911 off = ImmInt (fromInteger i)
1912 return (Amode (AddrRegImm reg off) code)
1913
1914 getAmode (CmmMachOp (MO_Add rep) [x, y])
1915 = do
1916 (regX, codeX) <- getSomeReg x
1917 (regY, codeY) <- getSomeReg y
1918 let
1919 code = codeX `appOL` codeY
1920 return (Amode (AddrRegReg regX regY) code)
1921
1922 -- XXX Is this same as "leaf" in Stix?
1923 getAmode (CmmLit lit)
1924 = do
1925 tmp <- getNewRegNat I32
1926 let
1927 code = unitOL (SETHI (HI imm__2) tmp)
1928 return (Amode (AddrRegImm tmp (LO imm__2)) code)
1929 where
1930 imm__2 = litToImm lit
1931
1932 getAmode other
1933 = do
1934 (reg, code) <- getSomeReg other
1935 let
1936 off = ImmInt 0
1937 return (Amode (AddrRegImm reg off) code)
1938
1939 #endif /* sparc_TARGET_ARCH */
1940
1941 #ifdef powerpc_TARGET_ARCH
1942 getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)])
1943 | Just off <- makeImmediate I32 True (-i)
1944 = do
1945 (reg, code) <- getSomeReg x
1946 return (Amode (AddrRegImm reg off) code)
1947
1948
1949 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)])
1950 | Just off <- makeImmediate I32 True i
1951 = do
1952 (reg, code) <- getSomeReg x
1953 return (Amode (AddrRegImm reg off) code)
1954
1955 -- optimize addition with 32-bit immediate
1956 -- (needed for PIC)
1957 getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit])
1958 = do
1959 tmp <- getNewRegNat I32
1960 (src, srcCode) <- getSomeReg x
1961 let imm = litToImm lit
1962 code = srcCode `snocOL` ADDIS tmp src (HA imm)
1963 return (Amode (AddrRegImm tmp (LO imm)) code)
1964
1965 getAmode (CmmLit lit)
1966 = do
1967 tmp <- getNewRegNat I32
1968 let imm = litToImm lit
1969 code = unitOL (LIS tmp (HA imm))
1970 return (Amode (AddrRegImm tmp (LO imm)) code)
1971
1972 getAmode (CmmMachOp (MO_Add I32) [x, y])
1973 = do
1974 (regX, codeX) <- getSomeReg x
1975 (regY, codeY) <- getSomeReg y
1976 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1977
1978 getAmode other
1979 = do
1980 (reg, code) <- getSomeReg other
1981 let
1982 off = ImmInt 0
1983 return (Amode (AddrRegImm reg off) code)
1984 #endif /* powerpc_TARGET_ARCH */
1985
1986 -- -----------------------------------------------------------------------------
1987 -- getOperand: sometimes any operand will do.
1988
1989 -- getNonClobberedOperand: the value of the operand will remain valid across
1990 -- the computation of an arbitrary expression, unless the expression
1991 -- is computed directly into a register which the operand refers to
1992 -- (see trivialCode where this function is used for an example).
1993
1994 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
1995
1996 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1997 #if x86_64_TARGET_ARCH
1998 getNonClobberedOperand (CmmLit lit)
1999 | isSuitableFloatingPointLit lit = do
2000 lbl <- getNewLabelNat
2001 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2002 CmmStaticLit lit])
2003 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2004 #endif
2005 getNonClobberedOperand (CmmLit lit)
2006 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) =
2007 return (OpImm (litToImm lit), nilOL)
2008 getNonClobberedOperand (CmmLoad mem pk)
2009 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2010 Amode src mem_code <- getAmode mem
2011 (src',save_code) <-
2012 if (amodeCouldBeClobbered src)
2013 then do
2014 tmp <- getNewRegNat wordRep
2015 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
2016 unitOL (LEA I32 (OpAddr src) (OpReg tmp)))
2017 else
2018 return (src, nilOL)
2019 return (OpAddr src', save_code `appOL` mem_code)
2020 getNonClobberedOperand e = do
2021 (reg, code) <- getNonClobberedReg e
2022 return (OpReg reg, code)
2023
2024 amodeCouldBeClobbered :: AddrMode -> Bool
2025 amodeCouldBeClobbered amode = any regClobbered (addrModeRegs amode)
2026
2027 regClobbered (RealReg rr) = isFastTrue (freeReg rr)
2028 regClobbered _ = False
2029
2030 -- getOperand: the operand is not required to remain valid across the
2031 -- computation of an arbitrary expression.
2032 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
2033 #if x86_64_TARGET_ARCH
2034 getOperand (CmmLit lit)
2035 | isSuitableFloatingPointLit lit = do
2036 lbl <- getNewLabelNat
2037 let code = unitOL (LDATA ReadOnlyData [CmmDataLabel lbl,
2038 CmmStaticLit lit])
2039 return (OpAddr (ripRel (ImmCLbl lbl)), code)
2040 #endif
2041 getOperand (CmmLit lit)
2042 | not (is64BitLit lit) && not (isFloatingRep (cmmLitRep lit)) = do
2043 return (OpImm (litToImm lit), nilOL)
2044 getOperand (CmmLoad mem pk)
2045 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2046 Amode src mem_code <- getAmode mem
2047 return (OpAddr src, mem_code)
2048 getOperand e = do
2049 (reg, code) <- getSomeReg e
2050 return (OpReg reg, code)
2051
2052 isOperand :: CmmExpr -> Bool
2053 isOperand (CmmLoad _ _) = True
2054 isOperand (CmmLit lit) = not (is64BitLit lit)
2055 || isSuitableFloatingPointLit lit
2056 isOperand _ = False
2057
2058 -- if we want a floating-point literal as an operand, we can
2059 -- use it directly from memory. However, if the literal is
2060 -- zero, we're better off generating it into a register using
2061 -- xor.
2062 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
2063 isSuitableFloatingPointLit _ = False
2064
2065 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
2066 getRegOrMem (CmmLoad mem pk)
2067 | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do
2068 Amode src mem_code <- getAmode mem
2069 return (OpAddr src, mem_code)
2070 getRegOrMem e = do
2071 (reg, code) <- getNonClobberedReg e
2072 return (OpReg reg, code)
2073
2074 #if x86_64_TARGET_ARCH
2075 is64BitLit (CmmInt i I64) = is64BitInteger i
2076 -- assume that labels are in the range 0-2^31-1: this assumes the
2077 -- small memory model (see gcc docs, -mcmodel=small).
2078 #endif
2079 is64BitLit x = False
2080 #endif
2081
2082 is64BitInteger :: Integer -> Bool
2083 is64BitInteger i = i64 > 0x7fffffff || i64 < -0x80000000
2084 where i64 = fromIntegral i :: Int64
2085 -- a CmmInt is intended to be truncated to the appropriate
2086 -- number of bits, so here we truncate it to Int64. This is
2087 -- important because e.g. -1 as a CmmInt might be either
2088 -- -1 or 18446744073709551615.
2089
2090 -- -----------------------------------------------------------------------------
2091 -- The 'CondCode' type: Condition codes passed up the tree.
2092
2093 data CondCode = CondCode Bool Cond InstrBlock
2094
2095 -- Set up a condition code for a conditional branch.
2096
2097 getCondCode :: CmmExpr -> NatM CondCode
2098
2099 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2100
2101 #if alpha_TARGET_ARCH
2102 getCondCode = panic "MachCode.getCondCode: not on Alphas"
2103 #endif /* alpha_TARGET_ARCH */
2104
2105 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2106
2107 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH
2108 -- yes, they really do seem to want exactly the same!
2109
2110 getCondCode (CmmMachOp mop [x, y])
2111 =
2112 case mop of
2113 MO_Eq F32 -> condFltCode EQQ x y
2114 MO_Ne F32 -> condFltCode NE x y
2115
2116 MO_S_Gt F32 -> condFltCode GTT x y
2117 MO_S_Ge F32 -> condFltCode GE x y
2118 MO_S_Lt F32 -> condFltCode LTT x y
2119 MO_S_Le F32 -> condFltCode LE x y
2120
2121 MO_Eq F64 -> condFltCode EQQ x y
2122 MO_Ne F64 -> condFltCode NE x y
2123
2124 MO_S_Gt F64 -> condFltCode GTT x y
2125 MO_S_Ge F64 -> condFltCode GE x y
2126 MO_S_Lt F64 -> condFltCode LTT x y
2127 MO_S_Le F64 -> condFltCode LE x y
2128
2129 MO_Eq rep -> condIntCode EQQ x y
2130 MO_Ne rep -> condIntCode NE x y
2131
2132 MO_S_Gt rep -> condIntCode GTT x y
2133 MO_S_Ge rep -> condIntCode GE x y
2134 MO_S_Lt rep -> condIntCode LTT x y
2135 MO_S_Le rep -> condIntCode LE x y
2136
2137 MO_U_Gt rep -> condIntCode GU x y
2138 MO_U_Ge rep -> condIntCode GEU x y
2139 MO_U_Lt rep -> condIntCode LU x y
2140 MO_U_Le rep -> condIntCode LEU x y
2141
2142 other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
2143
2144 getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
2145
2146 #elif powerpc_TARGET_ARCH
2147
2148 -- almost the same as everywhere else - but we need to
2149 -- extend small integers to 32 bit first
2150
2151 getCondCode (CmmMachOp mop [x, y])
2152 = case mop of
2153 MO_Eq F32 -> condFltCode EQQ x y
2154 MO_Ne F32 -> condFltCode NE x y
2155
2156 MO_S_Gt F32 -> condFltCode GTT x y
2157 MO_S_Ge F32 -> condFltCode GE x y
2158 MO_S_Lt F32 -> condFltCode LTT x y
2159 MO_S_Le F32 -> condFltCode LE x y
2160
2161 MO_Eq F64 -> condFltCode EQQ x y
2162 MO_Ne F64 -> condFltCode NE x y
2163
2164 MO_S_Gt F64 -> condFltCode GTT x y
2165 MO_S_Ge F64 -> condFltCode GE x y
2166 MO_S_Lt F64 -> condFltCode LTT x y
2167 MO_S_Le F64 -> condFltCode LE x y
2168
2169 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
2170 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
2171
2172 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
2173 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
2174 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
2175 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
2176
2177 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
2178 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
2179 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
2180 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
2181
2182 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
2183
2184 getCondCode other = panic "getCondCode(2)(powerpc)"
2185
2186
2187 #endif
2188
2189
2190 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
2191 -- passed back up the tree.
2192
2193 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
2194
2195 #if alpha_TARGET_ARCH
2196 condIntCode = panic "MachCode.condIntCode: not on Alphas"
2197 condFltCode = panic "MachCode.condFltCode: not on Alphas"
2198 #endif /* alpha_TARGET_ARCH */
2199
2200 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2201 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2202
2203 -- memory vs immediate
2204 condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
2205 Amode x_addr x_code <- getAmode x
2206 let
2207 imm = litToImm lit
2208 code = x_code `snocOL`
2209 CMP pk (OpImm imm) (OpAddr x_addr)
2210 --
2211 return (CondCode False cond code)
2212
2213 -- anything vs zero
2214 condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
2215 (x_reg, x_code) <- getSomeReg x
2216 let
2217 code = x_code `snocOL`
2218 TEST pk (OpReg x_reg) (OpReg x_reg)
2219 --
2220 return (CondCode False cond code)
2221
2222 -- anything vs operand
2223 condIntCode cond x y | isOperand y = do
2224 (x_reg, x_code) <- getNonClobberedReg x
2225 (y_op, y_code) <- getOperand y
2226 let
2227 code = x_code `appOL` y_code `snocOL`
2228 CMP (cmmExprRep x) y_op (OpReg x_reg)
2229 -- in
2230 return (CondCode False cond code)
2231
2232 -- anything vs anything
2233 condIntCode cond x y = do
2234 (y_reg, y_code) <- getNonClobberedReg y
2235 (x_op, x_code) <- getRegOrMem x
2236 let
2237 code = y_code `appOL`
2238 x_code `snocOL`
2239 CMP (cmmExprRep x) (OpReg y_reg) x_op
2240 -- in
2241 return (CondCode False cond code)
2242 #endif
2243
2244 #if i386_TARGET_ARCH
2245 condFltCode cond x y
2246 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
2247 (x_reg, x_code) <- getNonClobberedReg x
2248 (y_reg, y_code) <- getSomeReg y
2249 let
2250 code = x_code `appOL` y_code `snocOL`
2251 GCMP cond x_reg y_reg
2252 -- The GCMP insn does the test and sets the zero flag if comparable
2253 -- and true. Hence we always supply EQQ as the condition to test.
2254 return (CondCode True EQQ code)
2255 #endif /* i386_TARGET_ARCH */
2256
2257 #if x86_64_TARGET_ARCH
2258 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
2259 -- an operand, but the right must be a reg. We can probably do better
2260 -- than this general case...
2261 condFltCode cond x y = do
2262 (x_reg, x_code) <- getNonClobberedReg x
2263 (y_op, y_code) <- getOperand y
2264 let
2265 code = x_code `appOL`
2266 y_code `snocOL`
2267 CMP (cmmExprRep x) y_op (OpReg x_reg)
2268 -- NB(1): we need to use the unsigned comparison operators on the
2269 -- result of this comparison.
2270 -- in
2271 return (CondCode True (condToUnsigned cond) code)
2272 #endif
2273
2274 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2275
2276 #if sparc_TARGET_ARCH
2277
2278 condIntCode cond x (CmmLit (CmmInt y rep))
2279 | fits13Bits y
2280 = do
2281 (src1, code) <- getSomeReg x
2282 let
2283 src2 = ImmInt (fromInteger y)
2284 code' = code `snocOL` SUB False True src1 (RIImm src2) g0
2285 return (CondCode False cond code')
2286
2287 condIntCode cond x y = do
2288 (src1, code1) <- getSomeReg x
2289 (src2, code2) <- getSomeReg y
2290 let
2291 code__2 = code1 `appOL` code2 `snocOL`
2292 SUB False True src1 (RIReg src2) g0
2293 return (CondCode False cond code__2)
2294
2295 -----------
2296 condFltCode cond x y = do
2297 (src1, code1) <- getSomeReg x
2298 (src2, code2) <- getSomeReg y
2299 tmp <- getNewRegNat F64
2300 let
2301 promote x = FxTOy F32 F64 x tmp
2302
2303 pk1 = cmmExprRep x
2304 pk2 = cmmExprRep y
2305
2306 code__2 =
2307 if pk1 == pk2 then
2308 code1 `appOL` code2 `snocOL`
2309 FCMP True pk1 src1 src2
2310 else if pk1 == F32 then
2311 code1 `snocOL` promote src1 `appOL` code2 `snocOL`
2312 FCMP True F64 tmp src2
2313 else
2314 code1 `appOL` code2 `snocOL` promote src2 `snocOL`
2315 FCMP True F64 src1 tmp
2316 return (CondCode True cond code__2)
2317
2318 #endif /* sparc_TARGET_ARCH */
2319
2320 #if powerpc_TARGET_ARCH
2321 -- ###FIXME: I16 and I8!
2322 condIntCode cond x (CmmLit (CmmInt y rep))
2323 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
2324 = do
2325 (src1, code) <- getSomeReg x
2326 let
2327 code' = code `snocOL`
2328 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2)
2329 return (CondCode False cond code')
2330
2331 condIntCode cond x y = do
2332 (src1, code1) <- getSomeReg x
2333 (src2, code2) <- getSomeReg y
2334 let
2335 code' = code1 `appOL` code2 `snocOL`
2336 (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2)
2337 return (CondCode False cond code')
2338
2339 condFltCode cond x y = do
2340 (src1, code1) <- getSomeReg x
2341 (src2, code2) <- getSomeReg y
2342 let
2343 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
2344 code'' = case cond of -- twiddle CR to handle unordered case
2345 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
2346 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
2347 _ -> code'
2348 where
2349 ltbit = 0 ; eqbit = 2 ; gtbit = 1
2350 return (CondCode True cond code'')
2351
2352 #endif /* powerpc_TARGET_ARCH */
2353
2354 -- -----------------------------------------------------------------------------
2355 -- Generating assignments
2356
2357 -- Assignments are really at the heart of the whole code generation
2358 -- business. Almost all top-level nodes of any real importance are
2359 -- assignments, which correspond to loads, stores, or register
2360 -- transfers. If we're really lucky, some of the register transfers
2361 -- will go away, because we can use the destination register to
2362 -- complete the code generation for the right hand side. This only
2363 -- fails when the right hand side is forced into a fixed register
2364 -- (e.g. the result of a call).
2365
2366 assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2367 assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2368
2369 assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock
2370 assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock
2371
2372 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2373
2374 #if alpha_TARGET_ARCH
2375
2376 assignIntCode pk (CmmLoad dst _) src
2377 = getNewRegNat IntRep `thenNat` \ tmp ->
2378 getAmode dst `thenNat` \ amode ->
2379 getRegister src `thenNat` \ register ->
2380 let
2381 code1 = amodeCode amode []
2382 dst__2 = amodeAddr amode
2383 code2 = registerCode register tmp []
2384 src__2 = registerName register tmp
2385 sz = primRepToSize pk
2386 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2387 in
2388 return code__2
2389
2390 assignIntCode pk dst src
2391 = getRegister dst `thenNat` \ register1 ->
2392 getRegister src `thenNat` \ register2 ->
2393 let
2394 dst__2 = registerName register1 zeroh
2395 code = registerCode register2 dst__2
2396 src__2 = registerName register2 dst__2
2397 code__2 = if isFixed register2
2398 then code . mkSeqInstr (OR src__2 (RIReg src__2) dst__2)
2399 else code
2400 in
2401 return code__2
2402
2403 #endif /* alpha_TARGET_ARCH */
2404
2405 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2406
2407 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2408
2409 -- integer assignment to memory
2410
2411 -- specific case of adding/subtracting an integer to a particular address.
2412 -- ToDo: catch other cases where we can use an operation directly on a memory
2413 -- address.
2414 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
2415 CmmLit (CmmInt i _)])
2416 | addr == addr2, pk /= I64 || not (is64BitInteger i),
2417 Just instr <- check op
2418 = do Amode amode code_addr <- getAmode addr
2419 let code = code_addr `snocOL`
2420 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
2421 return code
2422 where
2423 check (MO_Add _) = Just ADD
2424 check (MO_Sub _) = Just SUB
2425 check _ = Nothing
2426 -- ToDo: more?
2427
2428 -- general case
2429 assignMem_IntCode pk addr src = do
2430 Amode addr code_addr <- getAmode addr
2431 (code_src, op_src) <- get_op_RI src
2432 let
2433 code = code_src `appOL`
2434 code_addr `snocOL`
2435 MOV pk op_src (OpAddr addr)
2436 -- NOTE: op_src is stable, so it will still be valid
2437 -- after code_addr. This may involve the introduction
2438 -- of an extra MOV to a temporary register, but we hope
2439 -- the register allocator will get rid of it.
2440 --
2441 return code
2442 where
2443 get_op_RI :: CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
2444 get_op_RI (CmmLit lit) | not (is64BitLit lit)
2445 = return (nilOL, OpImm (litToImm lit))
2446 get_op_RI op
2447 = do (reg,code) <- getNonClobberedReg op
2448 return (code, OpReg reg)
2449
2450
2451 -- Assign; dst is a reg, rhs is mem
2452 assignReg_IntCode pk reg (CmmLoad src _) = do
2453 load_code <- intLoadCode (MOV pk) src
2454 return (load_code (getRegisterReg reg))
2455
2456 -- dst is a reg, but src could be anything
2457 assignReg_IntCode pk reg src = do
2458 code <- getAnyReg src
2459 return (code (getRegisterReg reg))
2460
2461 #endif /* i386_TARGET_ARCH */
2462
2463 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2464
2465 #if sparc_TARGET_ARCH
2466
2467 assignMem_IntCode pk addr src = do
2468 (srcReg, code) <- getSomeReg src
2469 Amode dstAddr addr_code <- getAmode addr
2470 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2471
2472 assignReg_IntCode pk reg src = do
2473 r <- getRegister src
2474 return $ case r of
2475 Any _ code -> code dst
2476 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg dst) freg
2477 where
2478 dst = getRegisterReg reg
2479
2480
2481 #endif /* sparc_TARGET_ARCH */
2482
2483 #if powerpc_TARGET_ARCH
2484
2485 assignMem_IntCode pk addr src = do
2486 (srcReg, code) <- getSomeReg src
2487 Amode dstAddr addr_code <- getAmode addr
2488 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
2489
2490 -- dst is a reg, but src could be anything
2491 assignReg_IntCode pk reg src
2492 = do
2493 r <- getRegister src
2494 return $ case r of
2495 Any _ code -> code dst
2496 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
2497 where
2498 dst = getRegisterReg reg
2499
2500 #endif /* powerpc_TARGET_ARCH */
2501
2502
2503 -- -----------------------------------------------------------------------------
2504 -- Floating-point assignments
2505
2506 #if alpha_TARGET_ARCH
2507
2508 assignFltCode pk (CmmLoad dst _) src
2509 = getNewRegNat pk `thenNat` \ tmp ->
2510 getAmode dst `thenNat` \ amode ->
2511 getRegister src `thenNat` \ register ->
2512 let
2513 code1 = amodeCode amode []
2514 dst__2 = amodeAddr amode
2515 code2 = registerCode register tmp []
2516 src__2 = registerName register tmp
2517 sz = primRepToSize pk
2518 code__2 = asmSeqThen [code1, code2] . mkSeqInstr (ST sz src__2 dst__2)
2519 in
2520 return code__2
2521
2522 assignFltCode pk dst src
2523 = getRegister dst `thenNat` \ register1 ->
2524 getRegister src `thenNat` \ register2 ->
2525 let
2526 dst__2 = registerName register1 zeroh
2527 code = registerCode register2 dst__2
2528 src__2 = registerName register2 dst__2
2529 code__2 = if isFixed register2
2530 then code . mkSeqInstr (FMOV src__2 dst__2)
2531 else code
2532 in
2533 return code__2
2534
2535 #endif /* alpha_TARGET_ARCH */
2536
2537 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2538
2539 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2540
2541 -- Floating point assignment to memory
2542 assignMem_FltCode pk addr src = do
2543 (src_reg, src_code) <- getNonClobberedReg src
2544 Amode addr addr_code <- getAmode addr
2545 let
2546 code = src_code `appOL`
2547 addr_code `snocOL`
2548 IF_ARCH_i386(GST pk src_reg addr,
2549 MOV pk (OpReg src_reg) (OpAddr addr))
2550 return code
2551
2552 -- Floating point assignment to a register/temporary
2553 assignReg_FltCode pk reg src = do
2554 src_code <- getAnyReg src
2555 return (src_code (getRegisterReg reg))
2556
2557 #endif /* i386_TARGET_ARCH */
2558
2559 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2560
2561 #if sparc_TARGET_ARCH
2562
2563 -- Floating point assignment to memory
2564 assignMem_FltCode pk addr src = do
2565 Amode dst__2 code1 <- getAmode addr
2566 (src__2, code2) <- getSomeReg src
2567 tmp1 <- getNewRegNat pk
2568 let
2569 pk__2 = cmmExprRep src
2570 code__2 = code1 `appOL` code2 `appOL`
2571 if pk == pk__2
2572 then unitOL (ST pk src__2 dst__2)
2573 else toOL [FxTOy pk__2 pk src__2 tmp1, ST pk tmp1 dst__2]
2574 return code__2
2575
2576 -- Floating point assignment to a register/temporary
2577 -- ToDo: Verify correctness
2578 assignReg_FltCode pk reg src = do
2579 r <- getRegister src
2580 v1 <- getNewRegNat pk
2581 return $ case r of
2582 Any _ code -> code dst
2583 Fixed _ freg fcode -> fcode `snocOL` FMOV pk freg v1
2584 where
2585 dst = getRegisterReg reg
2586
2587 #endif /* sparc_TARGET_ARCH */
2588
2589 #if powerpc_TARGET_ARCH
2590
2591 -- Easy, isn't it?
2592 assignMem_FltCode = assignMem_IntCode
2593 assignReg_FltCode = assignReg_IntCode
2594
2595 #endif /* powerpc_TARGET_ARCH */
2596
2597
2598 -- -----------------------------------------------------------------------------
2599 -- Generating an non-local jump
2600
2601 -- (If applicable) Do not fill the delay slots here; you will confuse the
2602 -- register allocator.
2603
2604 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
2605
2606 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2607
2608 #if alpha_TARGET_ARCH
2609
2610 genJump (CmmLabel lbl)
2611 | isAsmTemp lbl = returnInstr (BR target)
2612 | otherwise = returnInstrs [LDA pv (AddrImm target), JMP zeroh (AddrReg pv) 0]
2613 where
2614 target = ImmCLbl lbl
2615
2616 genJump tree
2617 = getRegister tree `thenNat` \ register ->
2618 getNewRegNat PtrRep `thenNat` \ tmp ->
2619 let
2620 dst = registerName register pv
2621 code = registerCode register pv
2622 target = registerName register pv
2623 in
2624 if isFixed register then
2625 returnSeq code [OR dst (RIReg dst) pv, JMP zeroh (AddrReg pv) 0]
2626 else
2627 return (code . mkSeqInstr (JMP zeroh (AddrReg pv) 0))
2628
2629 #endif /* alpha_TARGET_ARCH */
2630
2631 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2632
2633 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
2634
2635 genJump (CmmLoad mem pk) = do
2636 Amode target code <- getAmode mem
2637 return (code `snocOL` JMP (OpAddr target))
2638
2639 genJump (CmmLit lit) = do
2640 return (unitOL (JMP (OpImm (litToImm lit))))
2641
2642 genJump expr = do
2643 (reg,code) <- getSomeReg expr
2644 return (code `snocOL` JMP (OpReg reg))
2645
2646 #endif /* i386_TARGET_ARCH */
2647
2648 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2649
2650 #if sparc_TARGET_ARCH
2651
2652 genJump (CmmLit (CmmLabel lbl))
2653 = return (toOL [CALL (Left target) 0 True, NOP])
2654 where
2655 target = ImmCLbl lbl
2656
2657 genJump tree
2658 = do
2659 (target, code) <- getSomeReg tree
2660 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
2661
2662 #endif /* sparc_TARGET_ARCH */
2663
2664 #if powerpc_TARGET_ARCH
2665 genJump (CmmLit (CmmLabel lbl))
2666 = return (unitOL $ JMP lbl)
2667
2668 genJump tree
2669 = do
2670 (target,code) <- getSomeReg tree
2671 return (code `snocOL` MTCTR target `snocOL` BCTR [])
2672 #endif /* powerpc_TARGET_ARCH */
2673
2674
2675 -- -----------------------------------------------------------------------------
2676 -- Unconditional branches
2677
2678 genBranch :: BlockId -> NatM InstrBlock
2679
2680 genBranch = return . toOL . mkBranchInstr
2681
2682 -- -----------------------------------------------------------------------------
2683 -- Conditional jumps
2684
2685 {-
2686 Conditional jumps are always to local labels, so we can use branch
2687 instructions. We peek at the arguments to decide what kind of
2688 comparison to do.
2689
2690 ALPHA: For comparisons with 0, we're laughing, because we can just do
2691 the desired conditional branch.
2692
2693 I386: First, we have to ensure that the condition
2694 codes are set according to the supplied comparison operation.
2695
2696 SPARC: First, we have to ensure that the condition codes are set
2697 according to the supplied comparison operation. We generate slightly
2698 different code for floating point comparisons, because a floating
2699 point operation cannot directly precede a @BF@. We assume the worst
2700 and fill that slot with a @NOP@.
2701
2702 SPARC: Do not fill the delay slots here; you will confuse the register
2703 allocator.
2704 -}
2705
2706
2707 genCondJump
2708 :: BlockId -- the branch target
2709 -> CmmExpr -- the condition on which to branch
2710 -> NatM InstrBlock
2711
2712 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2713
2714 #if alpha_TARGET_ARCH
2715
2716 genCondJump id (StPrim op [x, StInt 0])
2717 = getRegister x `thenNat` \ register ->
2718 getNewRegNat (registerRep register)
2719 `thenNat` \ tmp ->
2720 let
2721 code = registerCode register tmp
2722 value = registerName register tmp
2723 pk = registerRep register
2724 target = ImmCLbl lbl
2725 in
2726 returnSeq code [BI (cmpOp op) value target]
2727 where
2728 cmpOp CharGtOp = GTT
2729 cmpOp CharGeOp = GE
2730 cmpOp CharEqOp = EQQ
2731 cmpOp CharNeOp = NE
2732 cmpOp CharLtOp = LTT
2733 cmpOp CharLeOp = LE
2734 cmpOp IntGtOp = GTT
2735 cmpOp IntGeOp = GE
2736 cmpOp IntEqOp = EQQ
2737 cmpOp IntNeOp = NE
2738 cmpOp IntLtOp = LTT
2739 cmpOp IntLeOp = LE
2740 cmpOp WordGtOp = NE
2741 cmpOp WordGeOp = ALWAYS
2742 cmpOp WordEqOp = EQQ
2743 cmpOp WordNeOp = NE
2744 cmpOp WordLtOp = NEVER
2745 cmpOp WordLeOp = EQQ
2746 cmpOp AddrGtOp = NE
2747 cmpOp AddrGeOp = ALWAYS
2748 cmpOp AddrEqOp = EQQ
2749 cmpOp AddrNeOp = NE
2750 cmpOp AddrLtOp = NEVER
2751 cmpOp AddrLeOp = EQQ
2752
2753 genCondJump lbl (StPrim op [x, StDouble 0.0])
2754 = getRegister x `thenNat` \ register ->
2755 getNewRegNat (registerRep register)
2756 `thenNat` \ tmp ->
2757 let
2758 code = registerCode register tmp
2759 value = registerName register tmp
2760 pk = registerRep register
2761 target = ImmCLbl lbl
2762 in
2763 return (code . mkSeqInstr (BF (cmpOp op) value target))
2764 where
2765 cmpOp FloatGtOp = GTT
2766 cmpOp FloatGeOp = GE
2767 cmpOp FloatEqOp = EQQ
2768 cmpOp FloatNeOp = NE
2769 cmpOp FloatLtOp = LTT
2770 cmpOp FloatLeOp = LE
2771 cmpOp DoubleGtOp = GTT
2772 cmpOp DoubleGeOp = GE
2773 cmpOp DoubleEqOp = EQQ
2774 cmpOp DoubleNeOp = NE
2775 cmpOp DoubleLtOp = LTT
2776 cmpOp DoubleLeOp = LE
2777
2778 genCondJump lbl (StPrim op [x, y])
2779 | fltCmpOp op
2780 = trivialFCode pr instr x y `thenNat` \ register ->
2781 getNewRegNat F64 `thenNat` \ tmp ->
2782 let
2783 code = registerCode register tmp
2784 result = registerName register tmp
2785 target = ImmCLbl lbl
2786 in
2787 return (code . mkSeqInstr (BF cond result target))
2788 where
2789 pr = panic "trivialU?FCode: does not use PrimRep on Alpha"
2790
2791 fltCmpOp op = case op of
2792 FloatGtOp -> True
2793 FloatGeOp -> True
2794 FloatEqOp -> True
2795 FloatNeOp -> True
2796 FloatLtOp -> True
2797 FloatLeOp -> True
2798 DoubleGtOp -> True
2799 DoubleGeOp -> True
2800 DoubleEqOp -> True
2801 DoubleNeOp -> True
2802 DoubleLtOp -> True
2803 DoubleLeOp -> True
2804 _ -> False
2805 (instr, cond) = case op of
2806 FloatGtOp -> (FCMP TF LE, EQQ)
2807 FloatGeOp -> (FCMP TF LTT, EQQ)
2808 FloatEqOp -> (FCMP TF EQQ, NE)
2809 FloatNeOp -> (FCMP TF EQQ, EQQ)
2810 FloatLtOp -> (FCMP TF LTT, NE)
2811 FloatLeOp -> (FCMP TF LE, NE)
2812 DoubleGtOp -> (FCMP TF LE, EQQ)
2813 DoubleGeOp -> (FCMP TF LTT, EQQ)
2814 DoubleEqOp -> (FCMP TF EQQ, NE)
2815 DoubleNeOp -> (FCMP TF EQQ, EQQ)
2816 DoubleLtOp -> (FCMP TF LTT, NE)
2817 DoubleLeOp -> (FCMP TF LE, NE)
2818
2819 genCondJump lbl (StPrim op [x, y])
2820 = trivialCode instr x y `thenNat` \ register ->
2821 getNewRegNat IntRep `thenNat` \ tmp ->
2822 let
2823 code = registerCode register tmp
2824 result = registerName register tmp
2825 target = ImmCLbl lbl
2826 in
2827 return (code . mkSeqInstr (BI cond result target))
2828 where
2829 (instr, cond) = case op of
2830 CharGtOp -> (CMP LE, EQQ)
2831 CharGeOp -> (CMP LTT, EQQ)
2832 CharEqOp -> (CMP EQQ, NE)
2833 CharNeOp -> (CMP EQQ, EQQ)
2834 CharLtOp -> (CMP LTT, NE)
2835 CharLeOp -> (CMP LE, NE)
2836 IntGtOp -> (CMP LE, EQQ)
2837 IntGeOp -> (CMP LTT, EQQ)
2838 IntEqOp -> (CMP EQQ, NE)
2839 IntNeOp -> (CMP EQQ, EQQ)
2840 IntLtOp -> (CMP LTT, NE)
2841 IntLeOp -> (CMP LE, NE)
2842 WordGtOp -> (CMP ULE, EQQ)
2843 WordGeOp -> (CMP ULT, EQQ)
2844 WordEqOp -> (CMP EQQ, NE)
2845 WordNeOp -> (CMP EQQ, EQQ)
2846 WordLtOp -> (CMP ULT, NE)
2847 WordLeOp -> (CMP ULE, NE)
2848 AddrGtOp -> (CMP ULE, EQQ)
2849 AddrGeOp -> (CMP ULT, EQQ)
2850 AddrEqOp -> (CMP EQQ, NE)
2851 AddrNeOp -> (CMP EQQ, EQQ)
2852 AddrLtOp -> (CMP ULT, NE)
2853 AddrLeOp -> (CMP ULE, NE)
2854
2855 #endif /* alpha_TARGET_ARCH */
2856
2857 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2858
2859 #if i386_TARGET_ARCH
2860
2861 genCondJump id bool = do
2862 CondCode _ cond code <- getCondCode bool
2863 return (code `snocOL` JXX cond id)
2864
2865 #endif
2866
2867 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2868
2869 #if x86_64_TARGET_ARCH
2870
2871 genCondJump id bool = do
2872 CondCode is_float cond cond_code <- getCondCode bool
2873 if not is_float
2874 then
2875 return (cond_code `snocOL` JXX cond id)
2876 else do
2877 lbl <- getBlockIdNat
2878
2879 -- see comment with condFltReg
2880 let code = case cond of
2881 NE -> or_unordered
2882 GU -> plain_test
2883 GEU -> plain_test
2884 _ -> and_ordered
2885
2886 plain_test = unitOL (
2887 JXX cond id
2888 )
2889 or_unordered = toOL [
2890 JXX cond id,
2891 JXX PARITY id
2892 ]
2893 and_ordered = toOL [
2894 JXX PARITY lbl,
2895 JXX cond id,
2896 JXX ALWAYS lbl,
2897 NEWBLOCK lbl
2898 ]
2899 return (cond_code `appOL` code)
2900
2901 #endif
2902
2903 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2904
2905 #if sparc_TARGET_ARCH
2906
2907 genCondJump (BlockId id) bool = do
2908 CondCode is_float cond code <- getCondCode bool
2909 return (
2910 code `appOL`
2911 toOL (
2912 if is_float
2913 then [NOP, BF cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2914 else [BI cond False (ImmCLbl (mkAsmTempLabel id)), NOP]
2915 )
2916 )
2917
2918 #endif /* sparc_TARGET_ARCH */
2919
2920
2921 #if powerpc_TARGET_ARCH
2922
2923 genCondJump id bool = do
2924 CondCode is_float cond code <- getCondCode bool
2925 return (code `snocOL` BCC cond id)
2926
2927 #endif /* powerpc_TARGET_ARCH */
2928
2929
2930 -- -----------------------------------------------------------------------------
2931 -- Generating C calls
2932
2933 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
2934 -- @get_arg@, which moves the arguments to the correct registers/stack
2935 -- locations. Apart from that, the code is easy.
2936 --
2937 -- (If applicable) Do not fill the delay slots here; you will confuse the
2938 -- register allocator.
2939
2940 genCCall
2941 :: CmmCallTarget -- function to call
2942 -> CmmHintFormals -- where to put the result
2943 -> CmmActuals -- arguments (of mixed type)
2944 -> NatM InstrBlock
2945
2946 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2947
2948 #if alpha_TARGET_ARCH
2949
2950 ccallResultRegs =
2951
2952 genCCall fn cconv result_regs args
2953 = mapAccumLNat get_arg (allArgRegs, eXTRA_STK_ARGS_HERE) args
2954 `thenNat` \ ((unused,_), argCode) ->
2955 let
2956 nRegs = length allArgRegs - length unused
2957 code = asmSeqThen (map ($ []) argCode)
2958 in
2959 returnSeq code [
2960 LDA pv (AddrImm (ImmLab (ptext fn))),
2961 JSR ra (AddrReg pv) nRegs,
2962 LDGP gp (AddrReg ra)]
2963 where
2964 ------------------------
2965 {- Try to get a value into a specific register (or registers) for
2966 a call. The first 6 arguments go into the appropriate
2967 argument register (separate registers for integer and floating
2968 point arguments, but used in lock-step), and the remaining
2969 arguments are dumped to the stack, beginning at 0(sp). Our
2970 first argument is a pair of the list of remaining argument
2971 registers to be assigned for this call and the next stack
2972 offset to use for overflowing arguments. This way,
2973 @get_Arg@ can be applied to all of a call's arguments using
2974 @mapAccumLNat@.
2975 -}
2976 get_arg
2977 :: ([(Reg,Reg)], Int) -- Argument registers and stack offset (accumulator)
2978 -> StixTree -- Current argument
2979 -> NatM (([(Reg,Reg)],Int), InstrBlock) -- Updated accumulator and code
2980
2981 -- We have to use up all of our argument registers first...
2982
2983 get_arg ((iDst,fDst):dsts, offset) arg
2984 = getRegister arg `thenNat` \ register ->
2985 let
2986 reg = if isFloatingRep pk then fDst else iDst
2987 code = registerCode register reg
2988 src = registerName register reg
2989 pk = registerRep register
2990 in
2991 return (
2992 if isFloatingRep pk then
2993 ((dsts, offset), if isFixed register then
2994 code . mkSeqInstr (FMOV src fDst)
2995 else code)
2996 else
2997 ((dsts, offset), if isFixed register then
2998 code . mkSeqInstr (OR src (RIReg src) iDst)
2999 else code))
3000
3001 -- Once we have run out of argument registers, we move to the
3002 -- stack...
3003
3004 get_arg ([], offset) arg
3005 = getRegister arg `thenNat` \ register ->
3006 getNewRegNat (registerRep register)
3007 `thenNat` \ tmp ->
3008 let
3009 code = registerCode register tmp
3010 src = registerName register tmp
3011 pk = registerRep register
3012 sz = primRepToSize pk
3013 in
3014 return (([], offset + 1), code . mkSeqInstr (ST sz src (spRel offset)))
3015
3016 #endif /* alpha_TARGET_ARCH */
3017
3018 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3019
3020 #if i386_TARGET_ARCH
3021
3022 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3023 -- write barrier compiles to no code on x86/x86-64;
3024 -- we keep it this long in order to prevent earlier optimisations.
3025
3026 -- we only cope with a single result for foreign calls
3027 genCCall (CmmPrim op) [(r,_)] args = do
3028 case op of
3029 MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args
3030 MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args
3031
3032 MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32) args
3033 MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64) args
3034
3035 MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32) args
3036 MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64) args
3037
3038 MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32) args
3039 MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64) args
3040
3041 other_op -> outOfLineFloatOp op r args
3042 where
3043 actuallyInlineFloatOp rep instr [(x,_)]
3044 = do res <- trivialUFCode rep instr x
3045 any <- anyReg res
3046 return (any (getRegisterReg (CmmLocal r)))
3047
3048 genCCall target dest_regs args = do
3049 let
3050 sizes = map (arg_size . cmmExprRep . fst) (reverse args)
3051 #if !darwin_TARGET_OS
3052 tot_arg_size = sum sizes
3053 #else
3054 raw_arg_size = sum sizes
3055 tot_arg_size = roundTo 16 raw_arg_size
3056 arg_pad_size = tot_arg_size - raw_arg_size
3057 delta0 <- getDeltaNat
3058 setDeltaNat (delta0 - arg_pad_size)
3059 #endif
3060
3061 push_codes <- mapM push_arg (reverse args)
3062 delta <- getDeltaNat
3063
3064 -- in
3065 -- deal with static vs dynamic call targets
3066 (callinsns,cconv) <-
3067 case target of
3068 -- CmmPrim -> ...
3069 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3070 -> -- ToDo: stdcall arg sizes
3071 return (unitOL (CALL (Left fn_imm) []), conv)
3072 where fn_imm = ImmCLbl lbl
3073 CmmForeignCall expr conv
3074 -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr
3075 ASSERT(dyn_rep == I32)
3076 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv)
3077
3078 let push_code
3079 #if darwin_TARGET_OS
3080 | arg_pad_size /= 0
3081 = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
3082 DELTA (delta0 - arg_pad_size)]
3083 `appOL` concatOL push_codes
3084 | otherwise
3085 #endif
3086 = concatOL push_codes
3087 call = callinsns `appOL`
3088 toOL (
3089 -- Deallocate parameters after call for ccall;
3090 -- but not for stdcall (callee does it)
3091 (if cconv == StdCallConv || tot_arg_size==0 then [] else
3092 [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)])
3093 ++
3094 [DELTA (delta + tot_arg_size)]
3095 )
3096 -- in
3097 setDeltaNat (delta + tot_arg_size)
3098
3099 let
3100 -- assign the results, if necessary
3101 assign_code [] = nilOL
3102 assign_code [(dest,_hint)] =
3103 case rep of
3104 I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest),
3105 MOV I32 (OpReg edx) (OpReg r_dest_hi)]
3106 F32 -> unitOL (GMOV fake0 r_dest)
3107 F64 -> unitOL (GMOV fake0 r_dest)
3108 rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest))
3109 where
3110 r_dest_hi = getHiVRegFromLo r_dest
3111 rep = localRegRep dest
3112 r_dest = getRegisterReg (CmmLocal dest)
3113 assign_code many = panic "genCCall.assign_code many"
3114
3115 return (push_code `appOL`
3116 call `appOL`
3117 assign_code dest_regs)
3118
3119 where
3120 arg_size F64 = 8
3121 arg_size F32 = 4
3122 arg_size I64 = 8
3123 arg_size _ = 4
3124
3125 roundTo a x | x `mod` a == 0 = x
3126 | otherwise = x + a - (x `mod` a)
3127
3128
3129 push_arg :: (CmmExpr,MachHint){-current argument-}
3130 -> NatM InstrBlock -- code
3131
3132 push_arg (arg,_hint) -- we don't need the hints on x86
3133 | arg_rep == I64 = do
3134 ChildCode64 code r_lo <- iselExpr64 arg
3135 delta <- getDeltaNat
3136 setDeltaNat (delta - 8)
3137 let
3138 r_hi = getHiVRegFromLo r_lo
3139 -- in
3140 return ( code `appOL`
3141 toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4),
3142 PUSH I32 (OpReg r_lo), DELTA (delta - 8),
3143 DELTA (delta-8)]
3144 )
3145
3146 | otherwise = do
3147 (code, reg, sz) <- get_op arg
3148 delta <- getDeltaNat
3149 let size = arg_size sz
3150 setDeltaNat (delta-size)
3151 if (case sz of F64 -> True; F32 -> True; _ -> False)
3152 then return (code `appOL`
3153 toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp),
3154 DELTA (delta-size),
3155 GST sz reg (AddrBaseIndex (EABaseReg esp)
3156 EAIndexNone
3157 (ImmInt 0))]
3158 )
3159 else return (code `snocOL`
3160 PUSH I32 (OpReg reg) `snocOL`
3161 DELTA (delta-size)
3162 )
3163 where
3164 arg_rep = cmmExprRep arg
3165
3166 ------------
3167 get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size
3168 get_op op = do
3169 (reg,code) <- getSomeReg op
3170 return (code, reg, cmmExprRep op)
3171
3172 #endif /* i386_TARGET_ARCH */
3173
3174 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
3175
3176 outOfLineFloatOp :: CallishMachOp -> CmmFormal -> CmmActuals
3177 -> NatM InstrBlock
3178 outOfLineFloatOp mop res args
3179 = do
3180 targetExpr <- cmmMakeDynamicReference addImportNat CallReference lbl
3181 let target = CmmForeignCall targetExpr CCallConv
3182
3183 if localRegRep res == F64
3184 then
3185 stmtToInstrs (CmmCall target [(res,FloatHint)] args NoC_SRT)
3186 else do
3187 uq <- getUniqueNat
3188 let
3189 tmp = LocalReg uq F64 KindNonPtr
3190 -- in
3191 code1 <- stmtToInstrs (CmmCall target [(tmp,FloatHint)] args NoC_SRT)
3192 code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
3193 return (code1 `appOL` code2)
3194 where
3195 lbl = mkForeignLabel fn Nothing False
3196
3197 fn = case mop of
3198 MO_F32_Sqrt -> FSLIT("sqrtf")
3199 MO_F32_Sin -> FSLIT("sinf")
3200 MO_F32_Cos -> FSLIT("cosf")
3201 MO_F32_Tan -> FSLIT("tanf")
3202 MO_F32_Exp -> FSLIT("expf")
3203 MO_F32_Log -> FSLIT("logf")
3204
3205 MO_F32_Asin -> FSLIT("asinf")
3206 MO_F32_Acos -> FSLIT("acosf")
3207 MO_F32_Atan -> FSLIT("atanf")
3208
3209 MO_F32_Sinh -> FSLIT("sinhf")
3210 MO_F32_Cosh -> FSLIT("coshf")
3211 MO_F32_Tanh -> FSLIT("tanhf")
3212 MO_F32_Pwr -> FSLIT("powf")
3213
3214 MO_F64_Sqrt -> FSLIT("sqrt")
3215 MO_F64_Sin -> FSLIT("sin")
3216 MO_F64_Cos -> FSLIT("cos")
3217 MO_F64_Tan -> FSLIT("tan")
3218 MO_F64_Exp -> FSLIT("exp")
3219 MO_F64_Log -> FSLIT("log")
3220
3221 MO_F64_Asin -> FSLIT("asin")
3222 MO_F64_Acos -> FSLIT("acos")
3223 MO_F64_Atan -> FSLIT("atan")
3224
3225 MO_F64_Sinh -> FSLIT("sinh")
3226 MO_F64_Cosh -> FSLIT("cosh")
3227 MO_F64_Tanh -> FSLIT("tanh")
3228 MO_F64_Pwr -> FSLIT("pow")
3229
3230 #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */
3231
3232 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3233
3234 #if x86_64_TARGET_ARCH
3235
3236 genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
3237 -- write barrier compiles to no code on x86/x86-64;
3238 -- we keep it this long in order to prevent earlier optimisations.
3239
3240 genCCall (CmmPrim op) [(r,_)] args =
3241 outOfLineFloatOp op r args
3242
3243 genCCall target dest_regs args = do
3244
3245 -- load up the register arguments
3246 (stack_args, aregs, fregs, load_args_code)
3247 <- load_args args allArgRegs allFPArgRegs nilOL
3248
3249 let
3250 fp_regs_used = reverse (drop (length fregs) (reverse allFPArgRegs))
3251 int_regs_used = reverse (drop (length aregs) (reverse allArgRegs))
3252 arg_regs = [eax] ++ int_regs_used ++ fp_regs_used
3253 -- for annotating the call instruction with
3254
3255 sse_regs = length fp_regs_used
3256
3257 tot_arg_size = arg_size * length stack_args
3258
3259 -- On entry to the called function, %rsp should be aligned
3260 -- on a 16-byte boundary +8 (i.e. the first stack arg after
3261 -- the return address is 16-byte aligned). In STG land
3262 -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
3263 -- need to make sure we push a multiple of 16-bytes of args,
3264 -- plus the return address, to get the correct alignment.
3265 -- Urg, this is hard. We need to feed the delta back into
3266 -- the arg pushing code.
3267 (real_size, adjust_rsp) <-
3268 if tot_arg_size `rem` 16 == 0
3269 then return (tot_arg_size, nilOL)
3270 else do -- we need to adjust...
3271 delta <- getDeltaNat
3272 setDeltaNat (delta-8)
3273 return (tot_arg_size+8, toOL [
3274 SUB I64 (OpImm (ImmInt 8)) (OpReg rsp),
3275 DELTA (delta-8)
3276 ])
3277
3278 -- push the stack args, right to left
3279 push_code <- push_args (reverse stack_args) nilOL
3280 delta <- getDeltaNat
3281
3282 -- deal with static vs dynamic call targets
3283 (callinsns,cconv) <-
3284 case target of
3285 -- CmmPrim -> ...
3286 CmmForeignCall (CmmLit (CmmLabel lbl)) conv
3287 -> -- ToDo: stdcall arg sizes
3288 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
3289 where fn_imm = ImmCLbl lbl
3290 CmmForeignCall expr conv
3291 -> do (dyn_r, dyn_c) <- getSomeReg expr
3292 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
3293
3294 let
3295 -- The x86_64 ABI requires us to set %al to the number of SSE
3296 -- registers that contain arguments, if the called routine
3297 -- is a varargs function. We don't know whether it's a
3298 -- varargs function or not, so we have to assume it is.
3299 --
3300 -- It's not safe to omit this assignment, even if the number
3301 -- of SSE regs in use is zero. If %al is larger than 8
3302 -- on entry to a varargs function, seg faults ensue.
3303 assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax))
3304
3305 let call = callinsns `appOL`
3306 toOL (
3307 -- Deallocate parameters after call for ccall;
3308 -- but not for stdcall (callee does it)
3309 (if cconv == StdCallConv || real_size==0 then [] else
3310 [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)])
3311 ++
3312 [DELTA (delta + real_size)]
3313 )
3314 -- in
3315 setDeltaNat (delta + real_size)
3316
3317 let
3318 -- assign the results, if necessary
3319 assign_code [] = nilOL
3320 assign_code [(dest,_hint)] =
3321 case rep of
3322 F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3323 F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest))
3324 rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest))
3325 where
3326 rep = cmmRegRep dest
3327 r_dest = getRegisterReg dest
3328 assign_code many = panic "genCCall.assign_code many"
3329
3330 return (load_args_code `appOL`
3331 adjust_rsp `appOL`
3332 push_code `appOL`
3333 assign_eax sse_regs `appOL`
3334 call `appOL`
3335 assign_code dest_regs)
3336
3337 where
3338 arg_size = 8 -- always, at the mo
3339
3340 load_args :: [(CmmExpr,MachHint)]
3341 -> [Reg] -- int regs avail for args
3342 -> [Reg] -- FP regs avail for args
3343 -> InstrBlock
3344 -> NatM ([(CmmExpr,MachHint)],[Reg],[Reg],InstrBlock)
3345 load_args args [] [] code = return (args, [], [], code)
3346 -- no more regs to use
3347 load_args [] aregs fregs code = return ([], aregs, fregs, code)
3348 -- no more args to push
3349 load_args ((arg,hint) : rest) aregs fregs code
3350 | isFloatingRep arg_rep =
3351 case fregs of
3352 [] -> push_this_arg
3353 (r:rs) -> do
3354 arg_code <- getAnyReg arg
3355 load_args rest aregs rs (code `appOL` arg_code r)
3356 | otherwise =
3357 case aregs of
3358 [] -> push_this_arg
3359 (r:rs) -> do
3360 arg_code <- getAnyReg arg
3361 load_args rest rs fregs (code `appOL` arg_code r)
3362 where
3363 arg_rep = cmmExprRep arg
3364
3365 push_this_arg = do
3366 (args',ars,frs,code') <- load_args rest aregs fregs code
3367 return ((arg,hint):args', ars, frs, code')
3368
3369 push_args [] code = return code
3370 push_args ((arg,hint):rest) code
3371 | isFloatingRep arg_rep = do
3372 (arg_reg, arg_code) <- getSomeReg arg
3373 delta <- getDeltaNat
3374 setDeltaNat (delta-arg_size)
3375 let code' = code `appOL` arg_code `appOL` toOL [
3376 SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) ,
3377 DELTA (delta-arg_size),
3378 MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0))]
3379 push_args rest code'
3380
3381 | otherwise = do
3382 -- we only ever generate word-sized function arguments. Promotion
3383 -- has already happened: our Int8# type is kept sign-extended
3384 -- in an Int#, for example.
3385 ASSERT(arg_rep == I64) return ()
3386 (arg_op, arg_code) <- getOperand arg
3387 delta <- getDeltaNat
3388 setDeltaNat (delta-arg_size)
3389 let code' = code `appOL` toOL [PUSH I64 arg_op,
3390 DELTA (delta-arg_size)]
3391 push_args rest code'
3392 where
3393 arg_rep = cmmExprRep arg
3394 #endif
3395
3396 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3397
3398 #if sparc_TARGET_ARCH
3399 {-
3400 The SPARC calling convention is an absolute
3401 nightmare. The first 6x32 bits of arguments are mapped into
3402 %o0 through %o5, and the remaining arguments are dumped to the
3403 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
3404
3405 If we have to put args on the stack, move %o6==%sp down by
3406 the number of words to go on the stack, to ensure there's enough space.
3407
3408 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
3409 16 words above the stack pointer is a word for the address of
3410 a structure return value. I use this as a temporary location
3411 for moving values from float to int regs. Certainly it isn't
3412 safe to put anything in the 16 words starting at %sp, since
3413 this area can get trashed at any time due to window overflows
3414 caused by signal handlers.
3415
3416 A final complication (if the above isn't enough) is that
3417 we can't blithely calculate the arguments one by one into
3418 %o0 .. %o5. Consider the following nested calls:
3419
3420 fff a (fff b c)
3421
3422 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
3423 the inner call will itself use %o0, which trashes the value put there
3424 in preparation for the outer call. Upshot: we need to calculate the
3425 args into temporary regs, and move those to arg regs or onto the
3426 stack only immediately prior to the call proper. Sigh.
3427 -}
3428
3429 genCCall target dest_regs argsAndHints = do
3430 let
3431 args = map fst argsAndHints
3432 argcode_and_vregs <- mapM arg_to_int_vregs args
3433 let
3434 (argcodes, vregss) = unzip argcode_and_vregs
3435 n_argRegs = length allArgRegs
3436 n_argRegs_used = min (length vregs) n_argRegs
3437 vregs = concat vregss
3438 -- deal with static vs dynamic call targets
3439 callinsns <- (case target of
3440 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> do
3441 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3442 CmmForeignCall expr conv -> do
3443 (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
3444 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3445 CmmPrim mop -> do
3446 (res, reduce) <- outOfLineFloatOp mop
3447 lblOrMopExpr <- case res of
3448 Left lbl -> do
3449 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
3450 Right mopExpr -> do
3451 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
3452 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
3453 if reduce then panic "genCCall(sparc): can not reduce" else return lblOrMopExpr
3454
3455 )
3456 let
3457 argcode = concatOL argcodes
3458 (move_sp_down, move_sp_up)
3459 = let diff = length vregs - n_argRegs
3460 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
3461 in if nn <= 0
3462 then (nilOL, nilOL)
3463 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
3464 transfer_code
3465 = toOL (move_final vregs allArgRegs eXTRA_STK_ARGS_HERE)
3466 return (argcode `appOL`
3467 move_sp_down `appOL`
3468 transfer_code `appOL`
3469 callinsns `appOL`
3470 unitOL NOP `appOL`
3471 move_sp_up)
3472 where
3473 -- move args from the integer vregs into which they have been
3474 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
3475 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
3476
3477 move_final [] _ offset -- all args done
3478 = []
3479
3480 move_final (v:vs) [] offset -- out of aregs; move to stack
3481 = ST I32 v (spRel offset)
3482 : move_final vs [] (offset+1)
3483
3484 move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg
3485 = OR False g0 (RIReg v) a
3486 : move_final vs az offset
3487
3488 -- generate code to calculate an argument, and move it into one
3489 -- or two integer vregs.
3490 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
3491 arg_to_int_vregs arg
3492 | (cmmExprRep arg) == I64
3493 = do
3494 (ChildCode64 code r_lo) <- iselExpr64 arg
3495 let
3496 r_hi = getHiVRegFromLo r_lo
3497 return (code, [r_hi, r_lo])
3498 | otherwise
3499 = do
3500 (src, code) <- getSomeReg arg
3501 tmp <- getNewRegNat (cmmExprRep arg)
3502 let
3503 pk = cmmExprRep arg
3504 case pk of
3505 F64 -> do
3506 v1 <- getNewRegNat I32
3507 v2 <- getNewRegNat I32
3508 return (
3509 code `snocOL`
3510 FMOV F64 src f0 `snocOL`
3511 ST F32 f0 (spRel 16) `snocOL`
3512 LD I32 (spRel 16) v1 `snocOL`
3513 ST F32 (fPair f0) (spRel 16) `snocOL`
3514 LD I32 (spRel 16) v2
3515 ,
3516 [v1,v2]
3517 )
3518 F32 -> do
3519 v1 <- getNewRegNat I32
3520 return (
3521 code `snocOL`
3522 ST F32 src (spRel 16) `snocOL`
3523 LD I32 (spRel 16) v1
3524 ,
3525 [v1]
3526 )
3527 other -> do
3528 v1 <- getNewRegNat I32
3529 return (
3530 code `snocOL` OR False g0 (RIReg src) v1
3531 ,
3532 [v1]
3533 )
3534 outOfLineFloatOp mop =
3535 do
3536 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3537 mkForeignLabel functionName Nothing True
3538 let mopLabelOrExpr = case mopExpr of
3539 CmmLit (CmmLabel lbl) -> Left lbl
3540 _ -> Right mopExpr
3541 return (mopLabelOrExpr, reduce)
3542 where
3543 (reduce, functionName) = case mop of
3544 MO_F32_Exp -> (True, FSLIT("exp"))
3545 MO_F32_Log -> (True, FSLIT("log"))
3546 MO_F32_Sqrt -> (True, FSLIT("sqrt"))
3547
3548 MO_F32_Sin -> (True, FSLIT("sin"))
3549 MO_F32_Cos -> (True, FSLIT("cos"))
3550 MO_F32_Tan -> (True, FSLIT("tan"))
3551
3552 MO_F32_Asin -> (True, FSLIT("asin"))
3553 MO_F32_Acos -> (True, FSLIT("acos"))
3554 MO_F32_Atan -> (True, FSLIT("atan"))
3555
3556 MO_F32_Sinh -> (True, FSLIT("sinh"))
3557 MO_F32_Cosh -> (True, FSLIT("cosh"))
3558 MO_F32_Tanh -> (True, FSLIT("tanh"))
3559
3560 MO_F64_Exp -> (False, FSLIT("exp"))
3561 MO_F64_Log -> (False, FSLIT("log"))
3562 MO_F64_Sqrt -> (False, FSLIT("sqrt"))
3563
3564 MO_F64_Sin -> (False, FSLIT("sin"))
3565 MO_F64_Cos -> (False, FSLIT("cos"))
3566 MO_F64_Tan -> (False, FSLIT("tan"))
3567
3568 MO_F64_Asin -> (False, FSLIT("asin"))
3569 MO_F64_Acos -> (False, FSLIT("acos"))
3570 MO_F64_Atan -> (False, FSLIT("atan"))
3571
3572 MO_F64_Sinh -> (False, FSLIT("sinh"))
3573 MO_F64_Cosh -> (False, FSLIT("cosh"))
3574 MO_F64_Tanh -> (False, FSLIT("tanh"))
3575
3576 other -> pprPanic "outOfLineFloatOp(sparc) "
3577 (pprCallishMachOp mop)
3578
3579 #endif /* sparc_TARGET_ARCH */
3580
3581 #if powerpc_TARGET_ARCH
3582
3583 #if darwin_TARGET_OS || linux_TARGET_OS
3584 {-
3585 The PowerPC calling convention for Darwin/Mac OS X
3586 is described in Apple's document
3587 "Inside Mac OS X - Mach-O Runtime Architecture".
3588
3589 PowerPC Linux uses the System V Release 4 Calling Convention
3590 for PowerPC. It is described in the
3591 "System V Application Binary Interface PowerPC Processor Supplement".
3592
3593 Both conventions are similar:
3594 Parameters may be passed in general-purpose registers starting at r3, in
3595 floating point registers starting at f1, or on the stack.
3596
3597 But there are substantial differences:
3598 * The number of registers used for parameter passing and the exact set of
3599 nonvolatile registers differs (see MachRegs.lhs).
3600 * On Darwin, stack space is always reserved for parameters, even if they are
3601 passed in registers. The called routine may choose to save parameters from
3602 registers to the corresponding space on the stack.
3603 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
3604 parameter is passed in an FPR.
3605 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
3606 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
3607 Darwin just treats an I64 like two separate I32s (high word first).
3608 * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only
3609 4-byte aligned like everything else on Darwin.
3610 * The SysV spec claims that F32 is represented as F64 on the stack. GCC on
3611 PowerPC Linux does not agree, so neither do we.
3612
3613 According to both conventions, The parameter area should be part of the
3614 caller's stack frame, allocated in the caller's prologue code (large enough
3615 to hold the parameter lists for all called routines). The NCG already
3616 uses the stack for register spilling, leaving 64 bytes free at the top.
3617 If we need a larger parameter area than that, we just allocate a new stack
3618 frame just before ccalling.
3619 -}
3620
3621
3622 genCCall (CmmPrim MO_WriteBarrier) _ _ _
3623 = return $ unitOL LWSYNC
3624
3625 genCCall target dest_regs argsAndHints
3626 = ASSERT (not $ any (`elem` [I8,I16]) argReps)
3627 -- we rely on argument promotion in the codeGen
3628 do
3629 (finalStack,passArgumentsCode,usedRegs) <- passArguments
3630 (zip args argReps)
3631 allArgRegs allFPArgRegs
3632 initialStackOffset
3633 (toOL []) []
3634
3635 (labelOrExpr, reduceToF32) <- case target of
3636 CmmForeignCall (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
3637 CmmForeignCall expr conv -> return (Right expr, False)
3638 CmmPrim mop -> outOfLineFloatOp mop
3639
3640 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
3641 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32
3642
3643 case labelOrExpr of
3644 Left lbl -> do
3645 return ( codeBefore
3646 `snocOL` BL lbl usedRegs
3647 `appOL` codeAfter)
3648 Right dyn -> do
3649 (dynReg, dynCode) <- getSomeReg dyn
3650 return ( dynCode
3651 `snocOL` MTCTR dynReg
3652 `appOL` codeBefore
3653 `snocOL` BCTRL usedRegs
3654 `appOL` codeAfter)
3655 where
3656 #if darwin_TARGET_OS
3657 initialStackOffset = 24
3658 -- size of linkage area + size of arguments, in bytes
3659 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
3660 map machRepByteWidth argReps
3661 #elif linux_TARGET_OS
3662 initialStackOffset = 8
3663 stackDelta finalStack = roundTo 16 finalStack
3664 #endif
3665 args = map fst argsAndHints
3666 argReps = map cmmExprRep args
3667
3668 roundTo a x | x `mod` a == 0 = x
3669 | otherwise = x + a - (x `mod` a)
3670
3671 move_sp_down finalStack
3672 | delta > 64 =
3673 toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))),
3674 DELTA (-delta)]
3675 | otherwise = nilOL
3676 where delta = stackDelta finalStack
3677 move_sp_up finalStack
3678 | delta > 64 =
3679 toOL [ADD sp sp (RIImm (ImmInt delta)),
3680 DELTA 0]
3681 | otherwise = nilOL
3682 where delta = stackDelta finalStack
3683
3684
3685 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
3686 passArguments ((arg,I64):args) gprs fprs stackOffset
3687 accumCode accumUsed =
3688 do
3689 ChildCode64 code vr_lo <- iselExpr64 arg
3690 let vr_hi = getHiVRegFromLo vr_lo
3691
3692 #if darwin_TARGET_OS
3693 passArguments args
3694 (drop 2 gprs)
3695 fprs
3696 (stackOffset+8)
3697 (accumCode `appOL` code
3698 `snocOL` storeWord vr_hi gprs stackOffset
3699 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
3700 ((take 2 gprs) ++ accumUsed)
3701 where
3702 storeWord vr (gpr:_) offset = MR gpr vr
3703 storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset))
3704
3705 #elif linux_TARGET_OS
3706 let stackOffset' = roundTo 8 stackOffset
3707 stackCode = accumCode `appOL` code
3708 `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
3709 `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
3710 regCode hireg loreg =
3711 accumCode `appOL` code
3712 `snocOL` MR hireg vr_hi
3713 `snocOL` MR loreg vr_lo
3714
3715 case gprs of
3716 hireg : loreg : regs | even (length gprs) ->
3717 passArguments args regs fprs stackOffset
3718 (regCode hireg loreg) (hireg : loreg : accumUsed)
3719 _skipped : hireg : loreg : regs ->
3720 passArguments args regs fprs stackOffset
3721 (regCode hireg loreg) (hireg : loreg : accumUsed)
3722 _ -> -- only one or no regs left
3723 passArguments args [] fprs (stackOffset'+8)
3724 stackCode accumUsed
3725 #endif
3726
3727 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
3728 | reg : _ <- regs = do
3729 register <- getRegister arg
3730 let code = case register of
3731 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
3732 Any _ acode -> acode reg
3733 passArguments args
3734 (drop nGprs gprs)
3735 (drop nFprs fprs)
3736 #if darwin_TARGET_OS
3737 -- The Darwin ABI requires that we reserve stack slots for register parameters
3738 (stackOffset + stackBytes)
3739 #elif linux_TARGET_OS
3740 -- ... the SysV ABI doesn't.
3741 stackOffset
3742 #endif
3743 (accumCode `appOL` code)
3744 (reg : accumUsed)
3745 | otherwise = do
3746 (vr, code) <- getSomeReg arg
3747 passArguments args
3748 (drop nGprs gprs)
3749 (drop nFprs fprs)
3750 (stackOffset' + stackBytes)
3751 (accumCode `appOL` code `snocOL` ST rep vr stackSlot)
3752 accumUsed
3753 where
3754 #if darwin_TARGET_OS
3755 -- stackOffset is at least 4-byte aligned
3756 -- The Darwin ABI is happy with that.
3757 stackOffset' = stackOffset
3758 #else
3759 -- ... the SysV ABI requires 8-byte alignment for doubles.
3760 stackOffset' | rep == F64 = roundTo 8 stackOffset
3761 | otherwise = stackOffset
3762 #endif
3763 stackSlot = AddrRegImm sp (ImmInt stackOffset')
3764 (nGprs, nFprs, stackBytes, regs) = case rep of
3765 I32 -> (1, 0, 4, gprs)
3766 #if darwin_TARGET_OS
3767 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
3768 -- we use the FPRs.
3769 F32 -> (1, 1, 4, fprs)
3770 F64 -> (2, 1, 8, fprs)
3771 #elif linux_TARGET_OS
3772 -- ... the SysV ABI doesn't.
3773 F32 -> (0, 1, 4, fprs)
3774 F64 -> (0, 1, 8, fprs)
3775 #endif
3776
3777 moveResult reduceToF32 =
3778 case dest_regs of
3779 [] -> nilOL
3780 [(dest, _hint)]
3781 | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1)
3782 | rep == F32 || rep == F64 -> unitOL (MR r_dest f1)
3783 | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3,
3784 MR r_dest r4]
3785 | otherwise -> unitOL (MR r_dest r3)
3786 where rep = cmmRegRep dest
3787 r_dest = getRegisterReg dest
3788
3789 outOfLineFloatOp mop =
3790 do
3791 mopExpr <- cmmMakeDynamicReference addImportNat CallReference $
3792 mkForeignLabel functionName Nothing True
3793 let mopLabelOrExpr = case mopExpr of
3794 CmmLit (CmmLabel lbl) -> Left lbl
3795 _ -> Right mopExpr
3796 return (mopLabelOrExpr, reduce)
3797 where
3798 (functionName, reduce) = case mop of
3799 MO_F32_Exp -> (FSLIT("exp"), True)
3800 MO_F32_Log -> (FSLIT("log"), True)
3801 MO_F32_Sqrt -> (FSLIT("sqrt"), True)
3802
3803 MO_F32_Sin -> (FSLIT("sin"), True)
3804 MO_F32_Cos -> (FSLIT("cos"), True)
3805 MO_F32_Tan -> (FSLIT("tan"), True)
3806
3807 MO_F32_Asin -> (FSLIT("asin"), True)
3808 MO_F32_Acos -> (FSLIT("acos"), True)
3809 MO_F32_Atan -> (FSLIT("atan"), True)
3810
3811 MO_F32_Sinh -> (FSLIT("sinh"), True)
3812 MO_F32_Cosh -> (FSLIT("cosh"), True)
3813 MO_F32_Tanh -> (FSLIT("tanh"), True)
3814 MO_F32_Pwr -> (FSLIT("pow"), True)
3815
3816 MO_F64_Exp -> (FSLIT("exp"), False)
3817 MO_F64_Log -> (FSLIT("log"), False)
3818 MO_F64_Sqrt -> (FSLIT("sqrt"), False)
3819
3820 MO_F64_Sin -> (FSLIT("sin"), False)
3821 MO_F64_Cos -> (FSLIT("cos"), False)
3822 MO_F64_Tan -> (FSLIT("tan"), False)
3823
3824 MO_F64_Asin -> (FSLIT("asin"), False)
3825 MO_F64_Acos -> (FSLIT("acos"), False)
3826 MO_F64_Atan -> (FSLIT("atan"), False)
3827
3828 MO_F64_Sinh -> (FSLIT("sinh"), False)
3829 MO_F64_Cosh -> (FSLIT("cosh"), False)
3830 MO_F64_Tanh -> (FSLIT("tanh"), False)
3831 MO_F64_Pwr -> (FSLIT("pow"), False)
3832 other -> pprPanic "genCCall(ppc): unknown callish op"
3833 (pprCallishMachOp other)
3834
3835 #endif /* darwin_TARGET_OS || linux_TARGET_OS */
3836
3837 #endif /* powerpc_TARGET_ARCH */
3838
3839
3840 -- -----------------------------------------------------------------------------
3841 -- Generating a table-branch
3842
3843 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
3844
3845 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
<