Follow vreg/hreg patch in PPC NCG
[ghc.git] / compiler / nativeGen / PPC / CodeGen.hs
1 {-# OPTIONS -w #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2004
8 --
9 -----------------------------------------------------------------------------
10
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, (b) the type signatures, and
13 -- (c) the #if blah_TARGET_ARCH} things, the
14 -- structure should not be too overwhelming.
15
16 module PPC.CodeGen (
17 cmmTopCodeGen,
18 InstrBlock
19 )
20
21 where
22
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
25 #include "MachDeps.h"
26
27 -- NCG stuff:
28 import PPC.Instr
29 import PPC.Cond
30 import PPC.Regs
31 import PPC.RegInfo
32 import NCGMonad
33 import Instruction
34 import PIC
35 import Size
36 import RegClass
37 import Reg
38 import TargetReg
39 import Platform
40
41 -- Our intermediate code:
42 import BlockId
43 import PprCmm ( pprExpr )
44 import Cmm
45 import CLabel
46
47 -- The rest:
48 import StaticFlags ( opt_PIC )
49 import OrdList
50 import qualified Outputable as O
51 import Outputable
52 import DynFlags
53
54 import Control.Monad ( mapAndUnzipM )
55 import Data.Bits
56 import Data.Int
57 import Data.Word
58
59 #if darwin_TARGET_OS || linux_TARGET_OS
60 import BasicTypes
61 import FastString
62 #endif
63
64 -- -----------------------------------------------------------------------------
65 -- Top-level of the instruction selector
66
67 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
68 -- They are really trees of insns to facilitate fast appending, where a
69 -- left-to-right traversal (pre-order?) yields the insns in the correct
70 -- order.
71
72 cmmTopCodeGen
73 :: DynFlags
74 -> RawCmmTop
75 -> NatM [NatCmmTop Instr]
76
77 cmmTopCodeGen dflags (CmmProc info lab params (ListGraph blocks)) = do
78 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
79 picBaseMb <- getPicBaseMaybeNat
80 let proc = CmmProc info lab params (ListGraph $ concat nat_blocks)
81 tops = proc : concat statics
82 os = platformOS $ targetPlatform dflags
83 case picBaseMb of
84 Just picBase -> initializePicBase_ppc ArchPPC os picBase tops
85 Nothing -> return tops
86
87 cmmTopCodeGen dflags (CmmData sec dat) = do
88 return [CmmData sec dat] -- no translation, we just use CmmStatic
89
90 basicBlockCodeGen
91 :: CmmBasicBlock
92 -> NatM ( [NatBasicBlock Instr]
93 , [NatCmmTop Instr])
94
95 basicBlockCodeGen (BasicBlock id stmts) = do
96 instrs <- stmtsToInstrs stmts
97 -- code generation may introduce new basic block boundaries, which
98 -- are indicated by the NEWBLOCK instruction. We must split up the
99 -- instruction stream into basic blocks again. Also, we extract
100 -- LDATAs here too.
101 let
102 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
103
104 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
105 = ([], BasicBlock id instrs : blocks, statics)
106 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
107 = (instrs, blocks, CmmData sec dat:statics)
108 mkBlocks instr (instrs,blocks,statics)
109 = (instr:instrs, blocks, statics)
110 -- in
111 return (BasicBlock id top : other_blocks, statics)
112
113 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
114 stmtsToInstrs stmts
115 = do instrss <- mapM stmtToInstrs stmts
116 return (concatOL instrss)
117
118 stmtToInstrs :: CmmStmt -> NatM InstrBlock
119 stmtToInstrs stmt = case stmt of
120 CmmNop -> return nilOL
121 CmmComment s -> return (unitOL (COMMENT s))
122
123 CmmAssign reg src
124 | isFloatType ty -> assignReg_FltCode size reg src
125 #if WORD_SIZE_IN_BITS==32
126 | isWord64 ty -> assignReg_I64Code reg src
127 #endif
128 | otherwise -> assignReg_IntCode size reg src
129 where ty = cmmRegType reg
130 size = cmmTypeSize ty
131
132 CmmStore addr src
133 | isFloatType ty -> assignMem_FltCode size addr src
134 #if WORD_SIZE_IN_BITS==32
135 | isWord64 ty -> assignMem_I64Code addr src
136 #endif
137 | otherwise -> assignMem_IntCode size addr src
138 where ty = cmmExprType src
139 size = cmmTypeSize ty
140
141 CmmCall target result_regs args _ _
142 -> genCCall target result_regs args
143
144 CmmBranch id -> genBranch id
145 CmmCondBranch arg id -> genCondJump id arg
146 CmmSwitch arg ids -> genSwitch arg ids
147 CmmJump arg params -> genJump arg
148 CmmReturn params ->
149 panic "stmtToInstrs: return statement should have been cps'd away"
150
151
152 --------------------------------------------------------------------------------
153 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
154 -- They are really trees of insns to facilitate fast appending, where a
155 -- left-to-right traversal yields the insns in the correct order.
156 --
157 type InstrBlock
158 = OrdList Instr
159
160
161 -- | Register's passed up the tree. If the stix code forces the register
162 -- to live in a pre-decided machine register, it comes out as @Fixed@;
163 -- otherwise, it comes out as @Any@, and the parent can decide which
164 -- register to put it in.
165 --
166 data Register
167 = Fixed Size Reg InstrBlock
168 | Any Size (Reg -> InstrBlock)
169
170
171 swizzleRegisterRep :: Register -> Size -> Register
172 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
173 swizzleRegisterRep (Any _ codefn) size = Any size codefn
174
175
176 -- | Grab the Reg for a CmmReg
177 getRegisterReg :: CmmReg -> Reg
178
179 getRegisterReg (CmmLocal (LocalReg u pk))
180 = RegVirtual $ mkVirtualReg u (cmmTypeSize pk)
181
182 getRegisterReg (CmmGlobal mid)
183 = case get_GlobalReg_reg_or_addr mid of
184 Left reg -> reg
185 _other -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
186 -- By this stage, the only MagicIds remaining should be the
187 -- ones which map to a real machine register on this
188 -- platform. Hence ...
189
190
191 {-
192 Now, given a tree (the argument to an CmmLoad) that references memory,
193 produce a suitable addressing mode.
194
195 A Rule of the Game (tm) for Amodes: use of the addr bit must
196 immediately follow use of the code part, since the code part puts
197 values in registers which the addr then refers to. So you can't put
198 anything in between, lest it overwrite some of those registers. If
199 you need to do some other computation between the code part and use of
200 the addr bit, first store the effective address from the amode in a
201 temporary, then do the other computation, and then use the temporary:
202
203 code
204 LEA amode, tmp
205 ... other computation ...
206 ... (tmp) ...
207 -}
208
209
210 -- | Check whether an integer will fit in 32 bits.
211 -- A CmmInt is intended to be truncated to the appropriate
212 -- number of bits, so here we truncate it to Int64. This is
213 -- important because e.g. -1 as a CmmInt might be either
214 -- -1 or 18446744073709551615.
215 --
216 is32BitInteger :: Integer -> Bool
217 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
218 where i64 = fromIntegral i :: Int64
219
220
221 -- | Convert a BlockId to some CmmStatic data
222 jumpTableEntry :: Maybe BlockId -> CmmStatic
223 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
224 jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel)
225 where blockLabel = mkAsmTempLabel id
226
227
228
229 -- -----------------------------------------------------------------------------
230 -- General things for putting together code sequences
231
232 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
233 -- CmmExprs into CmmRegOff?
234 mangleIndexTree :: CmmExpr -> CmmExpr
235 mangleIndexTree (CmmRegOff reg off)
236 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
237 where width = typeWidth (cmmRegType reg)
238
239 mangleIndexTree _
240 = panic "PPC.CodeGen.mangleIndexTree: no match"
241
242 -- -----------------------------------------------------------------------------
243 -- Code gen for 64-bit arithmetic on 32-bit platforms
244
245 {-
246 Simple support for generating 64-bit code (ie, 64 bit values and 64
247 bit assignments) on 32-bit platforms. Unlike the main code generator
248 we merely shoot for generating working code as simply as possible, and
249 pay little attention to code quality. Specifically, there is no
250 attempt to deal cleverly with the fixed-vs-floating register
251 distinction; all values are generated into (pairs of) floating
252 registers, even if this would mean some redundant reg-reg moves as a
253 result. Only one of the VRegUniques is returned, since it will be
254 of the VRegUniqueLo form, and the upper-half VReg can be determined
255 by applying getHiVRegFromLo to it.
256 -}
257
258 data ChildCode64 -- a.k.a "Register64"
259 = ChildCode64
260 InstrBlock -- code
261 Reg -- the lower 32-bit temporary which contains the
262 -- result; use getHiVRegFromLo to find the other
263 -- VRegUnique. Rules of this simplified insn
264 -- selection game are therefore that the returned
265 -- Reg may be modified
266
267
268 -- | The dual to getAnyReg: compute an expression into a register, but
269 -- we don't mind which one it is.
270 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
271 getSomeReg expr = do
272 r <- getRegister expr
273 case r of
274 Any rep code -> do
275 tmp <- getNewRegNat rep
276 return (tmp, code tmp)
277 Fixed _ reg code ->
278 return (reg, code)
279
280 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
281 getI64Amodes addrTree = do
282 Amode hi_addr addr_code <- getAmode addrTree
283 case addrOffset hi_addr 4 of
284 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
285 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
286 return (AddrRegImm hi_ptr (ImmInt 0),
287 AddrRegImm hi_ptr (ImmInt 4),
288 code)
289
290
291 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
292 assignMem_I64Code addrTree valueTree = do
293 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
294 ChildCode64 vcode rlo <- iselExpr64 valueTree
295 let
296 rhi = getHiVRegFromLo rlo
297
298 -- Big-endian store
299 mov_hi = ST II32 rhi hi_addr
300 mov_lo = ST II32 rlo lo_addr
301 -- in
302 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
303
304
305 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
306 assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do
307 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
308 let
309 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
310 r_dst_hi = getHiVRegFromLo r_dst_lo
311 r_src_hi = getHiVRegFromLo r_src_lo
312 mov_lo = MR r_dst_lo r_src_lo
313 mov_hi = MR r_dst_hi r_src_hi
314 -- in
315 return (
316 vcode `snocOL` mov_lo `snocOL` mov_hi
317 )
318
319 assignReg_I64Code lvalue valueTree
320 = panic "assignReg_I64Code(powerpc): invalid lvalue"
321
322
323 iselExpr64 :: CmmExpr -> NatM ChildCode64
324 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
325 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
326 (rlo, rhi) <- getNewRegPairNat II32
327 let mov_hi = LD II32 rhi hi_addr
328 mov_lo = LD II32 rlo lo_addr
329 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
330 rlo
331
332 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
333 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
334
335 iselExpr64 (CmmLit (CmmInt i _)) = do
336 (rlo,rhi) <- getNewRegPairNat II32
337 let
338 half0 = fromIntegral (fromIntegral i :: Word16)
339 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
340 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
341 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
342
343 code = toOL [
344 LIS rlo (ImmInt half1),
345 OR rlo rlo (RIImm $ ImmInt half0),
346 LIS rhi (ImmInt half3),
347 OR rlo rlo (RIImm $ ImmInt half2)
348 ]
349 -- in
350 return (ChildCode64 code rlo)
351
352 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
353 ChildCode64 code1 r1lo <- iselExpr64 e1
354 ChildCode64 code2 r2lo <- iselExpr64 e2
355 (rlo,rhi) <- getNewRegPairNat II32
356 let
357 r1hi = getHiVRegFromLo r1lo
358 r2hi = getHiVRegFromLo r2lo
359 code = code1 `appOL`
360 code2 `appOL`
361 toOL [ ADDC rlo r1lo r2lo,
362 ADDE rhi r1hi r2hi ]
363 -- in
364 return (ChildCode64 code rlo)
365
366 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
367 (expr_reg,expr_code) <- getSomeReg expr
368 (rlo, rhi) <- getNewRegPairNat II32
369 let mov_hi = LI rhi (ImmInt 0)
370 mov_lo = MR rlo expr_reg
371 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
372 rlo
373 iselExpr64 expr
374 = pprPanic "iselExpr64(powerpc)" (ppr expr)
375
376
377
378 getRegister :: CmmExpr -> NatM Register
379
380 getRegister (CmmReg reg)
381 = return (Fixed (cmmTypeSize (cmmRegType reg))
382 (getRegisterReg reg) nilOL)
383
384 getRegister tree@(CmmRegOff _ _)
385 = getRegister (mangleIndexTree tree)
386
387
388 #if WORD_SIZE_IN_BITS==32
389 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
390 -- TO_W_(x), TO_W_(x >> 32)
391
392 getRegister (CmmMachOp (MO_UU_Conv W64 W32)
393 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
394 ChildCode64 code rlo <- iselExpr64 x
395 return $ Fixed II32 (getHiVRegFromLo rlo) code
396
397 getRegister (CmmMachOp (MO_SS_Conv W64 W32)
398 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
399 ChildCode64 code rlo <- iselExpr64 x
400 return $ Fixed II32 (getHiVRegFromLo rlo) code
401
402 getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
403 ChildCode64 code rlo <- iselExpr64 x
404 return $ Fixed II32 rlo code
405
406 getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
407 ChildCode64 code rlo <- iselExpr64 x
408 return $ Fixed II32 rlo code
409
410 #endif
411
412
413 getRegister (CmmLoad mem pk)
414 | not (isWord64 pk)
415 = do
416 Amode addr addr_code <- getAmode mem
417 let code dst = ASSERT((targetClassOfReg dst == RcDouble) == isFloatType pk)
418 addr_code `snocOL` LD size dst addr
419 return (Any size code)
420 where size = cmmTypeSize pk
421
422 -- catch simple cases of zero- or sign-extended load
423 getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
424 Amode addr addr_code <- getAmode mem
425 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
426
427 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
428
429 getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
430 Amode addr addr_code <- getAmode mem
431 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
432
433 getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
434 Amode addr addr_code <- getAmode mem
435 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
436
437 getRegister (CmmMachOp mop [x]) -- unary MachOps
438 = case mop of
439 MO_Not rep -> triv_ucode_int rep NOT
440
441 MO_F_Neg w -> triv_ucode_float w FNEG
442 MO_S_Neg w -> triv_ucode_int w NEG
443
444 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
445 MO_FF_Conv W32 W64 -> conversionNop FF64 x
446
447 MO_FS_Conv from to -> coerceFP2Int from to x
448 MO_SF_Conv from to -> coerceInt2FP from to x
449
450 MO_SS_Conv from to
451 | from == to -> conversionNop (intSize to) x
452
453 -- narrowing is a nop: we treat the high bits as undefined
454 MO_SS_Conv W32 to -> conversionNop (intSize to) x
455 MO_SS_Conv W16 W8 -> conversionNop II8 x
456 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
457 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
458
459 MO_UU_Conv from to
460 | from == to -> conversionNop (intSize to) x
461 -- narrowing is a nop: we treat the high bits as undefined
462 MO_UU_Conv W32 to -> conversionNop (intSize to) x
463 MO_UU_Conv W16 W8 -> conversionNop II8 x
464 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
465 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
466 _ -> panic "PPC.CodeGen.getRegister: no match"
467
468 where
469 triv_ucode_int width instr = trivialUCode (intSize width) instr x
470 triv_ucode_float width instr = trivialUCode (floatSize width) instr x
471
472 conversionNop new_size expr
473 = do e_code <- getRegister expr
474 return (swizzleRegisterRep e_code new_size)
475
476 getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
477 = case mop of
478 MO_F_Eq w -> condFltReg EQQ x y
479 MO_F_Ne w -> condFltReg NE x y
480 MO_F_Gt w -> condFltReg GTT x y
481 MO_F_Ge w -> condFltReg GE x y
482 MO_F_Lt w -> condFltReg LTT x y
483 MO_F_Le w -> condFltReg LE x y
484
485 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
486 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
487
488 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
489 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
490 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
491 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
492
493 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
494 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
495 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
496 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
497
498 MO_F_Add w -> triv_float w FADD
499 MO_F_Sub w -> triv_float w FSUB
500 MO_F_Mul w -> triv_float w FMUL
501 MO_F_Quot w -> triv_float w FDIV
502
503 -- optimize addition with 32-bit immediate
504 -- (needed for PIC)
505 MO_Add W32 ->
506 case y of
507 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
508 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
509 CmmLit lit
510 -> do
511 (src, srcCode) <- getSomeReg x
512 let imm = litToImm lit
513 code dst = srcCode `appOL` toOL [
514 ADDIS dst src (HA imm),
515 ADD dst dst (RIImm (LO imm))
516 ]
517 return (Any II32 code)
518 _ -> trivialCode W32 True ADD x y
519
520 MO_Add rep -> trivialCode rep True ADD x y
521 MO_Sub rep ->
522 case y of -- subfi ('substract from' with immediate) doesn't exist
523 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
524 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
525 _ -> trivialCodeNoImm' (intSize rep) SUBF y x
526
527 MO_Mul rep -> trivialCode rep True MULLW x y
528
529 MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
530
531 MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented"
532 MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented"
533
534 MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
535 MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
536
537 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
538 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
539
540 MO_And rep -> trivialCode rep False AND x y
541 MO_Or rep -> trivialCode rep False OR x y
542 MO_Xor rep -> trivialCode rep False XOR x y
543
544 MO_Shl rep -> trivialCode rep False SLW x y
545 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
546 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
547 _ -> panic "PPC.CodeGen.getRegister: no match"
548
549 where
550 triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
551 triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
552
553 getRegister (CmmLit (CmmInt i rep))
554 | Just imm <- makeImmediate rep True i
555 = let
556 code dst = unitOL (LI dst imm)
557 in
558 return (Any (intSize rep) code)
559
560 getRegister (CmmLit (CmmFloat f frep)) = do
561 lbl <- getNewLabelNat
562 dflags <- getDynFlagsNat
563 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
564 Amode addr addr_code <- getAmode dynRef
565 let size = floatSize frep
566 code dst =
567 LDATA ReadOnlyData [CmmDataLabel lbl,
568 CmmStaticLit (CmmFloat f frep)]
569 `consOL` (addr_code `snocOL` LD size dst addr)
570 return (Any size code)
571
572 getRegister (CmmLit lit)
573 = let rep = cmmLitType lit
574 imm = litToImm lit
575 code dst = toOL [
576 LIS dst (HA imm),
577 ADD dst dst (RIImm (LO imm))
578 ]
579 in return (Any (cmmTypeSize rep) code)
580
581 getRegister other = pprPanic "getRegister(ppc)" (pprExpr other)
582
583 -- extend?Rep: wrap integer expression of type rep
584 -- in a conversion to II32
585 extendSExpr W32 x = x
586 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
587 extendUExpr W32 x = x
588 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
589
590 -- -----------------------------------------------------------------------------
591 -- The 'Amode' type: Memory addressing modes passed up the tree.
592
593 data Amode
594 = Amode AddrMode InstrBlock
595
596 {-
597 Now, given a tree (the argument to an CmmLoad) that references memory,
598 produce a suitable addressing mode.
599
600 A Rule of the Game (tm) for Amodes: use of the addr bit must
601 immediately follow use of the code part, since the code part puts
602 values in registers which the addr then refers to. So you can't put
603 anything in between, lest it overwrite some of those registers. If
604 you need to do some other computation between the code part and use of
605 the addr bit, first store the effective address from the amode in a
606 temporary, then do the other computation, and then use the temporary:
607
608 code
609 LEA amode, tmp
610 ... other computation ...
611 ... (tmp) ...
612 -}
613
614 getAmode :: CmmExpr -> NatM Amode
615 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
616
617 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
618 | Just off <- makeImmediate W32 True (-i)
619 = do
620 (reg, code) <- getSomeReg x
621 return (Amode (AddrRegImm reg off) code)
622
623
624 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
625 | Just off <- makeImmediate W32 True i
626 = do
627 (reg, code) <- getSomeReg x
628 return (Amode (AddrRegImm reg off) code)
629
630 -- optimize addition with 32-bit immediate
631 -- (needed for PIC)
632 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
633 = do
634 tmp <- getNewRegNat II32
635 (src, srcCode) <- getSomeReg x
636 let imm = litToImm lit
637 code = srcCode `snocOL` ADDIS tmp src (HA imm)
638 return (Amode (AddrRegImm tmp (LO imm)) code)
639
640 getAmode (CmmLit lit)
641 = do
642 tmp <- getNewRegNat II32
643 let imm = litToImm lit
644 code = unitOL (LIS tmp (HA imm))
645 return (Amode (AddrRegImm tmp (LO imm)) code)
646
647 getAmode (CmmMachOp (MO_Add W32) [x, y])
648 = do
649 (regX, codeX) <- getSomeReg x
650 (regY, codeY) <- getSomeReg y
651 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
652
653 getAmode other
654 = do
655 (reg, code) <- getSomeReg other
656 let
657 off = ImmInt 0
658 return (Amode (AddrRegImm reg off) code)
659
660
661
662 -- The 'CondCode' type: Condition codes passed up the tree.
663 data CondCode
664 = CondCode Bool Cond InstrBlock
665
666 -- Set up a condition code for a conditional branch.
667
668 getCondCode :: CmmExpr -> NatM CondCode
669
670 -- almost the same as everywhere else - but we need to
671 -- extend small integers to 32 bit first
672
673 getCondCode (CmmMachOp mop [x, y])
674 = case mop of
675 MO_F_Eq W32 -> condFltCode EQQ x y
676 MO_F_Ne W32 -> condFltCode NE x y
677 MO_F_Gt W32 -> condFltCode GTT x y
678 MO_F_Ge W32 -> condFltCode GE x y
679 MO_F_Lt W32 -> condFltCode LTT x y
680 MO_F_Le W32 -> condFltCode LE x y
681
682 MO_F_Eq W64 -> condFltCode EQQ x y
683 MO_F_Ne W64 -> condFltCode NE x y
684 MO_F_Gt W64 -> condFltCode GTT x y
685 MO_F_Ge W64 -> condFltCode GE x y
686 MO_F_Lt W64 -> condFltCode LTT x y
687 MO_F_Le W64 -> condFltCode LE x y
688
689 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
690 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
691
692 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
693 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
694 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
695 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
696
697 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
698 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
699 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
700 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
701
702 other -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
703
704 getCondCode other = panic "getCondCode(2)(powerpc)"
705
706
707
708 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
709 -- passed back up the tree.
710
711 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
712
713 -- ###FIXME: I16 and I8!
714 condIntCode cond x (CmmLit (CmmInt y rep))
715 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
716 = do
717 (src1, code) <- getSomeReg x
718 let
719 code' = code `snocOL`
720 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
721 return (CondCode False cond code')
722
723 condIntCode cond x y = do
724 (src1, code1) <- getSomeReg x
725 (src2, code2) <- getSomeReg y
726 let
727 code' = code1 `appOL` code2 `snocOL`
728 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
729 return (CondCode False cond code')
730
731 condFltCode cond x y = do
732 (src1, code1) <- getSomeReg x
733 (src2, code2) <- getSomeReg y
734 let
735 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
736 code'' = case cond of -- twiddle CR to handle unordered case
737 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
738 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
739 _ -> code'
740 where
741 ltbit = 0 ; eqbit = 2 ; gtbit = 1
742 return (CondCode True cond code'')
743
744
745
746 -- -----------------------------------------------------------------------------
747 -- Generating assignments
748
749 -- Assignments are really at the heart of the whole code generation
750 -- business. Almost all top-level nodes of any real importance are
751 -- assignments, which correspond to loads, stores, or register
752 -- transfers. If we're really lucky, some of the register transfers
753 -- will go away, because we can use the destination register to
754 -- complete the code generation for the right hand side. This only
755 -- fails when the right hand side is forced into a fixed register
756 -- (e.g. the result of a call).
757
758 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
759 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
760
761 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
762 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
763
764 assignMem_IntCode pk addr src = do
765 (srcReg, code) <- getSomeReg src
766 Amode dstAddr addr_code <- getAmode addr
767 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
768
769 -- dst is a reg, but src could be anything
770 assignReg_IntCode _ reg src
771 = do
772 r <- getRegister src
773 return $ case r of
774 Any _ code -> code dst
775 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
776 where
777 dst = getRegisterReg reg
778
779
780
781 -- Easy, isn't it?
782 assignMem_FltCode = assignMem_IntCode
783 assignReg_FltCode = assignReg_IntCode
784
785
786
787 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
788
789 genJump (CmmLit (CmmLabel lbl))
790 = return (unitOL $ JMP lbl)
791
792 genJump tree
793 = do
794 (target,code) <- getSomeReg tree
795 return (code `snocOL` MTCTR target `snocOL` BCTR [])
796
797
798 -- -----------------------------------------------------------------------------
799 -- Unconditional branches
800 genBranch :: BlockId -> NatM InstrBlock
801 genBranch = return . toOL . mkJumpInstr
802
803
804 -- -----------------------------------------------------------------------------
805 -- Conditional jumps
806
807 {-
808 Conditional jumps are always to local labels, so we can use branch
809 instructions. We peek at the arguments to decide what kind of
810 comparison to do.
811
812 SPARC: First, we have to ensure that the condition codes are set
813 according to the supplied comparison operation. We generate slightly
814 different code for floating point comparisons, because a floating
815 point operation cannot directly precede a @BF@. We assume the worst
816 and fill that slot with a @NOP@.
817
818 SPARC: Do not fill the delay slots here; you will confuse the register
819 allocator.
820 -}
821
822
823 genCondJump
824 :: BlockId -- the branch target
825 -> CmmExpr -- the condition on which to branch
826 -> NatM InstrBlock
827
828 genCondJump id bool = do
829 CondCode _ cond code <- getCondCode bool
830 return (code `snocOL` BCC cond id)
831
832
833
834 -- -----------------------------------------------------------------------------
835 -- Generating C calls
836
837 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
838 -- @get_arg@, which moves the arguments to the correct registers/stack
839 -- locations. Apart from that, the code is easy.
840 --
841 -- (If applicable) Do not fill the delay slots here; you will confuse the
842 -- register allocator.
843
844 genCCall
845 :: CmmCallTarget -- function to call
846 -> HintedCmmFormals -- where to put the result
847 -> HintedCmmActuals -- arguments (of mixed type)
848 -> NatM InstrBlock
849
850
851 #if darwin_TARGET_OS || linux_TARGET_OS
852 {-
853 The PowerPC calling convention for Darwin/Mac OS X
854 is described in Apple's document
855 "Inside Mac OS X - Mach-O Runtime Architecture".
856
857 PowerPC Linux uses the System V Release 4 Calling Convention
858 for PowerPC. It is described in the
859 "System V Application Binary Interface PowerPC Processor Supplement".
860
861 Both conventions are similar:
862 Parameters may be passed in general-purpose registers starting at r3, in
863 floating point registers starting at f1, or on the stack.
864
865 But there are substantial differences:
866 * The number of registers used for parameter passing and the exact set of
867 nonvolatile registers differs (see MachRegs.lhs).
868 * On Darwin, stack space is always reserved for parameters, even if they are
869 passed in registers. The called routine may choose to save parameters from
870 registers to the corresponding space on the stack.
871 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
872 parameter is passed in an FPR.
873 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
874 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
875 Darwin just treats an I64 like two separate II32s (high word first).
876 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
877 4-byte aligned like everything else on Darwin.
878 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
879 PowerPC Linux does not agree, so neither do we.
880
881 According to both conventions, The parameter area should be part of the
882 caller's stack frame, allocated in the caller's prologue code (large enough
883 to hold the parameter lists for all called routines). The NCG already
884 uses the stack for register spilling, leaving 64 bytes free at the top.
885 If we need a larger parameter area than that, we just allocate a new stack
886 frame just before ccalling.
887 -}
888
889
890 genCCall (CmmPrim MO_WriteBarrier) _ _
891 = return $ unitOL LWSYNC
892
893 genCCall target dest_regs argsAndHints
894 = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps)
895 -- we rely on argument promotion in the codeGen
896 do
897 (finalStack,passArgumentsCode,usedRegs) <- passArguments
898 (zip args argReps)
899 allArgRegs allFPArgRegs
900 initialStackOffset
901 (toOL []) []
902
903 (labelOrExpr, reduceToFF32) <- case target of
904 CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False)
905 CmmCallee expr conv -> return (Right expr, False)
906 CmmPrim mop -> outOfLineFloatOp mop
907
908 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
909 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
910
911 case labelOrExpr of
912 Left lbl -> do
913 return ( codeBefore
914 `snocOL` BL lbl usedRegs
915 `appOL` codeAfter)
916 Right dyn -> do
917 (dynReg, dynCode) <- getSomeReg dyn
918 return ( dynCode
919 `snocOL` MTCTR dynReg
920 `appOL` codeBefore
921 `snocOL` BCTRL usedRegs
922 `appOL` codeAfter)
923 where
924 #if darwin_TARGET_OS
925 initialStackOffset = 24
926 -- size of linkage area + size of arguments, in bytes
927 stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $
928 map (widthInBytes . typeWidth) argReps
929 #elif linux_TARGET_OS
930 initialStackOffset = 8
931 stackDelta finalStack = roundTo 16 finalStack
932 #endif
933 args = map hintlessCmm argsAndHints
934 argReps = map cmmExprType args
935
936 roundTo a x | x `mod` a == 0 = x
937 | otherwise = x + a - (x `mod` a)
938
939 move_sp_down finalStack
940 | delta > 64 =
941 toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
942 DELTA (-delta)]
943 | otherwise = nilOL
944 where delta = stackDelta finalStack
945 move_sp_up finalStack
946 | delta > 64 =
947 toOL [ADD sp sp (RIImm (ImmInt delta)),
948 DELTA 0]
949 | otherwise = nilOL
950 where delta = stackDelta finalStack
951
952
953 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
954 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
955 accumCode accumUsed | isWord64 arg_ty =
956 do
957 ChildCode64 code vr_lo <- iselExpr64 arg
958 let vr_hi = getHiVRegFromLo vr_lo
959
960 #if darwin_TARGET_OS
961 passArguments args
962 (drop 2 gprs)
963 fprs
964 (stackOffset+8)
965 (accumCode `appOL` code
966 `snocOL` storeWord vr_hi gprs stackOffset
967 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
968 ((take 2 gprs) ++ accumUsed)
969 where
970 storeWord vr (gpr:_) offset = MR gpr vr
971 storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset))
972
973 #elif linux_TARGET_OS
974 let stackOffset' = roundTo 8 stackOffset
975 stackCode = accumCode `appOL` code
976 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
977 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
978 regCode hireg loreg =
979 accumCode `appOL` code
980 `snocOL` MR hireg vr_hi
981 `snocOL` MR loreg vr_lo
982
983 case gprs of
984 hireg : loreg : regs | even (length gprs) ->
985 passArguments args regs fprs stackOffset
986 (regCode hireg loreg) (hireg : loreg : accumUsed)
987 _skipped : hireg : loreg : regs ->
988 passArguments args regs fprs stackOffset
989 (regCode hireg loreg) (hireg : loreg : accumUsed)
990 _ -> -- only one or no regs left
991 passArguments args [] fprs (stackOffset'+8)
992 stackCode accumUsed
993 #endif
994
995 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
996 | reg : _ <- regs = do
997 register <- getRegister arg
998 let code = case register of
999 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1000 Any _ acode -> acode reg
1001 passArguments args
1002 (drop nGprs gprs)
1003 (drop nFprs fprs)
1004 #if darwin_TARGET_OS
1005 -- The Darwin ABI requires that we reserve stack slots for register parameters
1006 (stackOffset + stackBytes)
1007 #elif linux_TARGET_OS
1008 -- ... the SysV ABI doesn't.
1009 stackOffset
1010 #endif
1011 (accumCode `appOL` code)
1012 (reg : accumUsed)
1013 | otherwise = do
1014 (vr, code) <- getSomeReg arg
1015 passArguments args
1016 (drop nGprs gprs)
1017 (drop nFprs fprs)
1018 (stackOffset' + stackBytes)
1019 (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
1020 accumUsed
1021 where
1022 #if darwin_TARGET_OS
1023 -- stackOffset is at least 4-byte aligned
1024 -- The Darwin ABI is happy with that.
1025 stackOffset' = stackOffset
1026 #else
1027 -- ... the SysV ABI requires 8-byte alignment for doubles.
1028 stackOffset' | isFloatType rep && typeWidth rep == W64 =
1029 roundTo 8 stackOffset
1030 | otherwise = stackOffset
1031 #endif
1032 stackSlot = AddrRegImm sp (ImmInt stackOffset')
1033 (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of
1034 II32 -> (1, 0, 4, gprs)
1035 #if darwin_TARGET_OS
1036 -- The Darwin ABI requires that we skip a corresponding number of GPRs when
1037 -- we use the FPRs.
1038 FF32 -> (1, 1, 4, fprs)
1039 FF64 -> (2, 1, 8, fprs)
1040 #elif linux_TARGET_OS
1041 -- ... the SysV ABI doesn't.
1042 FF32 -> (0, 1, 4, fprs)
1043 FF64 -> (0, 1, 8, fprs)
1044 #endif
1045
1046 moveResult reduceToFF32 =
1047 case dest_regs of
1048 [] -> nilOL
1049 [CmmHinted dest _hint]
1050 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
1051 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1052 | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
1053 MR r_dest r4]
1054 | otherwise -> unitOL (MR r_dest r3)
1055 where rep = cmmRegType (CmmLocal dest)
1056 r_dest = getRegisterReg (CmmLocal dest)
1057
1058 outOfLineFloatOp mop =
1059 do
1060 dflags <- getDynFlagsNat
1061 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
1062 mkForeignLabel functionName Nothing True IsFunction
1063 let mopLabelOrExpr = case mopExpr of
1064 CmmLit (CmmLabel lbl) -> Left lbl
1065 _ -> Right mopExpr
1066 return (mopLabelOrExpr, reduce)
1067 where
1068 (functionName, reduce) = case mop of
1069 MO_F32_Exp -> (fsLit "exp", True)
1070 MO_F32_Log -> (fsLit "log", True)
1071 MO_F32_Sqrt -> (fsLit "sqrt", True)
1072
1073 MO_F32_Sin -> (fsLit "sin", True)
1074 MO_F32_Cos -> (fsLit "cos", True)
1075 MO_F32_Tan -> (fsLit "tan", True)
1076
1077 MO_F32_Asin -> (fsLit "asin", True)
1078 MO_F32_Acos -> (fsLit "acos", True)
1079 MO_F32_Atan -> (fsLit "atan", True)
1080
1081 MO_F32_Sinh -> (fsLit "sinh", True)
1082 MO_F32_Cosh -> (fsLit "cosh", True)
1083 MO_F32_Tanh -> (fsLit "tanh", True)
1084 MO_F32_Pwr -> (fsLit "pow", True)
1085
1086 MO_F64_Exp -> (fsLit "exp", False)
1087 MO_F64_Log -> (fsLit "log", False)
1088 MO_F64_Sqrt -> (fsLit "sqrt", False)
1089
1090 MO_F64_Sin -> (fsLit "sin", False)
1091 MO_F64_Cos -> (fsLit "cos", False)
1092 MO_F64_Tan -> (fsLit "tan", False)
1093
1094 MO_F64_Asin -> (fsLit "asin", False)
1095 MO_F64_Acos -> (fsLit "acos", False)
1096 MO_F64_Atan -> (fsLit "atan", False)
1097
1098 MO_F64_Sinh -> (fsLit "sinh", False)
1099 MO_F64_Cosh -> (fsLit "cosh", False)
1100 MO_F64_Tanh -> (fsLit "tanh", False)
1101 MO_F64_Pwr -> (fsLit "pow", False)
1102 other -> pprPanic "genCCall(ppc): unknown callish op"
1103 (pprCallishMachOp other)
1104
1105 #else /* darwin_TARGET_OS || linux_TARGET_OS */
1106 genCCall = panic "PPC.CodeGen.genCCall: not defined for this os"
1107 #endif
1108
1109
1110 -- -----------------------------------------------------------------------------
1111 -- Generating a table-branch
1112
1113 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
1114 genSwitch expr ids
1115 | opt_PIC
1116 = do
1117 (reg,e_code) <- getSomeReg expr
1118 tmp <- getNewRegNat II32
1119 lbl <- getNewLabelNat
1120 dflags <- getDynFlagsNat
1121 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1122 (tableReg,t_code) <- getSomeReg $ dynRef
1123 let
1124 jumpTable = map jumpTableEntryRel ids
1125
1126 jumpTableEntryRel Nothing
1127 = CmmStaticLit (CmmInt 0 wordWidth)
1128 jumpTableEntryRel (Just (BlockId id))
1129 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1130 where blockLabel = mkAsmTempLabel id
1131
1132 code = e_code `appOL` t_code `appOL` toOL [
1133 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1134 SLW tmp reg (RIImm (ImmInt 2)),
1135 LD II32 tmp (AddrRegReg tableReg tmp),
1136 ADD tmp tmp (RIReg tableReg),
1137 MTCTR tmp,
1138 BCTR [ id | Just id <- ids ]
1139 ]
1140 return code
1141 | otherwise
1142 = do
1143 (reg,e_code) <- getSomeReg expr
1144 tmp <- getNewRegNat II32
1145 lbl <- getNewLabelNat
1146 let
1147 jumpTable = map jumpTableEntry ids
1148
1149 code = e_code `appOL` toOL [
1150 LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable),
1151 SLW tmp reg (RIImm (ImmInt 2)),
1152 ADDIS tmp tmp (HA (ImmCLbl lbl)),
1153 LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
1154 MTCTR tmp,
1155 BCTR [ id | Just id <- ids ]
1156 ]
1157 return code
1158
1159
1160 -- -----------------------------------------------------------------------------
1161 -- 'condIntReg' and 'condFltReg': condition codes into registers
1162
1163 -- Turn those condition codes into integers now (when they appear on
1164 -- the right hand side of an assignment).
1165 --
1166 -- (If applicable) Do not fill the delay slots here; you will confuse the
1167 -- register allocator.
1168
1169 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
1170
1171 condReg :: NatM CondCode -> NatM Register
1172 condReg getCond = do
1173 CondCode _ cond cond_code <- getCond
1174 let
1175 {- code dst = cond_code `appOL` toOL [
1176 BCC cond lbl1,
1177 LI dst (ImmInt 0),
1178 BCC ALWAYS lbl2,
1179 NEWBLOCK lbl1,
1180 LI dst (ImmInt 1),
1181 BCC ALWAYS lbl2,
1182 NEWBLOCK lbl2
1183 ]-}
1184 code dst = cond_code
1185 `appOL` negate_code
1186 `appOL` toOL [
1187 MFCR dst,
1188 RLWINM dst dst (bit + 1) 31 31
1189 ]
1190
1191 negate_code | do_negate = unitOL (CRNOR bit bit bit)
1192 | otherwise = nilOL
1193
1194 (bit, do_negate) = case cond of
1195 LTT -> (0, False)
1196 LE -> (1, True)
1197 EQQ -> (2, False)
1198 GE -> (0, True)
1199 GTT -> (1, False)
1200
1201 NE -> (2, True)
1202
1203 LU -> (0, False)
1204 LEU -> (1, True)
1205 GEU -> (0, True)
1206 GU -> (1, False)
1207 _ -> panic "PPC.CodeGen.codeReg: no match"
1208
1209 return (Any II32 code)
1210
1211 condIntReg cond x y = condReg (condIntCode cond x y)
1212 condFltReg cond x y = condReg (condFltCode cond x y)
1213
1214
1215
1216 -- -----------------------------------------------------------------------------
1217 -- 'trivial*Code': deal with trivial instructions
1218
1219 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
1220 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
1221 -- Only look for constants on the right hand side, because that's
1222 -- where the generic optimizer will have put them.
1223
1224 -- Similarly, for unary instructions, we don't have to worry about
1225 -- matching an StInt as the argument, because genericOpt will already
1226 -- have handled the constant-folding.
1227
1228
1229
1230 {-
1231 Wolfgang's PowerPC version of The Rules:
1232
1233 A slightly modified version of The Rules to take advantage of the fact
1234 that PowerPC instructions work on all registers and don't implicitly
1235 clobber any fixed registers.
1236
1237 * The only expression for which getRegister returns Fixed is (CmmReg reg).
1238
1239 * If getRegister returns Any, then the code it generates may modify only:
1240 (a) fresh temporaries
1241 (b) the destination register
1242 It may *not* modify global registers, unless the global
1243 register happens to be the destination register.
1244 It may not clobber any other registers. In fact, only ccalls clobber any
1245 fixed registers.
1246 Also, it may not modify the counter register (used by genCCall).
1247
1248 Corollary: If a getRegister for a subexpression returns Fixed, you need
1249 not move it to a fresh temporary before evaluating the next subexpression.
1250 The Fixed register won't be modified.
1251 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
1252
1253 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
1254 the value of the destination register.
1255 -}
1256
1257 trivialCode
1258 :: Width
1259 -> Bool
1260 -> (Reg -> Reg -> RI -> Instr)
1261 -> CmmExpr
1262 -> CmmExpr
1263 -> NatM Register
1264
1265 trivialCode rep signed instr x (CmmLit (CmmInt y _))
1266 | Just imm <- makeImmediate rep signed y
1267 = do
1268 (src1, code1) <- getSomeReg x
1269 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
1270 return (Any (intSize rep) code)
1271
1272 trivialCode rep _ instr x y = do
1273 (src1, code1) <- getSomeReg x
1274 (src2, code2) <- getSomeReg y
1275 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
1276 return (Any (intSize rep) code)
1277
1278 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
1279 -> CmmExpr -> CmmExpr -> NatM Register
1280 trivialCodeNoImm' size instr x y = do
1281 (src1, code1) <- getSomeReg x
1282 (src2, code2) <- getSomeReg y
1283 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
1284 return (Any size code)
1285
1286 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
1287 -> CmmExpr -> CmmExpr -> NatM Register
1288 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
1289
1290
1291 trivialUCode
1292 :: Size
1293 -> (Reg -> Reg -> Instr)
1294 -> CmmExpr
1295 -> NatM Register
1296 trivialUCode rep instr x = do
1297 (src, code) <- getSomeReg x
1298 let code' dst = code `snocOL` instr dst src
1299 return (Any rep code')
1300
1301 -- There is no "remainder" instruction on the PPC, so we have to do
1302 -- it the hard way.
1303 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
1304
1305 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
1306 -> CmmExpr -> CmmExpr -> NatM Register
1307 remainderCode rep div x y = do
1308 (src1, code1) <- getSomeReg x
1309 (src2, code2) <- getSomeReg y
1310 let code dst = code1 `appOL` code2 `appOL` toOL [
1311 div dst src1 src2,
1312 MULLW dst dst (RIReg src2),
1313 SUBF dst dst src1
1314 ]
1315 return (Any (intSize rep) code)
1316
1317
1318 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
1319 coerceInt2FP fromRep toRep x = do
1320 (src, code) <- getSomeReg x
1321 lbl <- getNewLabelNat
1322 itmp <- getNewRegNat II32
1323 ftmp <- getNewRegNat FF64
1324 dflags <- getDynFlagsNat
1325 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1326 Amode addr addr_code <- getAmode dynRef
1327 let
1328 code' dst = code `appOL` maybe_exts `appOL` toOL [
1329 LDATA ReadOnlyData
1330 [CmmDataLabel lbl,
1331 CmmStaticLit (CmmInt 0x43300000 W32),
1332 CmmStaticLit (CmmInt 0x80000000 W32)],
1333 XORIS itmp src (ImmInt 0x8000),
1334 ST II32 itmp (spRel 3),
1335 LIS itmp (ImmInt 0x4330),
1336 ST II32 itmp (spRel 2),
1337 LD FF64 ftmp (spRel 2)
1338 ] `appOL` addr_code `appOL` toOL [
1339 LD FF64 dst addr,
1340 FSUB FF64 dst ftmp dst
1341 ] `appOL` maybe_frsp dst
1342
1343 maybe_exts = case fromRep of
1344 W8 -> unitOL $ EXTS II8 src src
1345 W16 -> unitOL $ EXTS II16 src src
1346 W32 -> nilOL
1347 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1348
1349 maybe_frsp dst
1350 = case toRep of
1351 W32 -> unitOL $ FRSP dst dst
1352 W64 -> nilOL
1353 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1354
1355 return (Any (floatSize toRep) code')
1356
1357 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
1358 coerceFP2Int _ toRep x = do
1359 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
1360 (src, code) <- getSomeReg x
1361 tmp <- getNewRegNat FF64
1362 let
1363 code' dst = code `appOL` toOL [
1364 -- convert to int in FP reg
1365 FCTIWZ tmp src,
1366 -- store value (64bit) from FP to stack
1367 ST FF64 tmp (spRel 2),
1368 -- read low word of value (high word is undefined)
1369 LD II32 dst (spRel 3)]
1370 return (Any (intSize toRep) code')