More CPP removal: pprDynamicLinkerAsmLabel in CLabel
[ghc.git] / compiler / nativeGen / PPC / CodeGen.hs
1
2 -----------------------------------------------------------------------------
3 --
4 -- Generating machine code (instruction selection)
5 --
6 -- (c) The University of Glasgow 1996-2004
7 --
8 -----------------------------------------------------------------------------
9
10 -- This is a big module, but, if you pay attention to
11 -- (a) the sectioning, (b) the type signatures, and
12 -- (c) the #if blah_TARGET_ARCH} things, the
13 -- structure should not be too overwhelming.
14
15 module PPC.CodeGen (
16 cmmTopCodeGen,
17 generateJumpTableForInstr,
18 InstrBlock
19 )
20
21 where
22
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
25 #include "../includes/MachDeps.h"
26
27 -- NCG stuff:
28 import PPC.Instr
29 import PPC.Cond
30 import PPC.Regs
31 import CPrim
32 import NCGMonad
33 import Instruction
34 import PIC
35 import Size
36 import RegClass
37 import Reg
38 import TargetReg
39 import Platform
40
41 -- Our intermediate code:
42 import BlockId
43 import PprCmm ( pprExpr )
44 import OldCmm
45 import CLabel
46
47 -- The rest:
48 import StaticFlags ( opt_PIC )
49 import OrdList
50 import Outputable
51 import Unique
52 import DynFlags
53
54 import Control.Monad ( mapAndUnzipM )
55 import Data.Bits
56 import Data.Word
57
58 import BasicTypes
59 import FastString
60
61 -- -----------------------------------------------------------------------------
62 -- Top-level of the instruction selector
63
64 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
65 -- They are really trees of insns to facilitate fast appending, where a
66 -- left-to-right traversal (pre-order?) yields the insns in the correct
67 -- order.
68
69 cmmTopCodeGen
70 :: RawCmmDecl
71 -> NatM [NatCmmDecl CmmStatics Instr]
72
73 cmmTopCodeGen (CmmProc info lab (ListGraph blocks)) = do
74 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
75 picBaseMb <- getPicBaseMaybeNat
76 dflags <- getDynFlagsNat
77 let proc = CmmProc info lab (ListGraph $ concat nat_blocks)
78 tops = proc : concat statics
79 os = platformOS $ targetPlatform dflags
80 case picBaseMb of
81 Just picBase -> initializePicBase_ppc ArchPPC os picBase tops
82 Nothing -> return tops
83
84 cmmTopCodeGen (CmmData sec dat) = do
85 return [CmmData sec dat] -- no translation, we just use CmmStatic
86
87 basicBlockCodeGen
88 :: CmmBasicBlock
89 -> NatM ( [NatBasicBlock Instr]
90 , [NatCmmDecl CmmStatics Instr])
91
92 basicBlockCodeGen (BasicBlock id stmts) = do
93 instrs <- stmtsToInstrs stmts
94 -- code generation may introduce new basic block boundaries, which
95 -- are indicated by the NEWBLOCK instruction. We must split up the
96 -- instruction stream into basic blocks again. Also, we extract
97 -- LDATAs here too.
98 let
99 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
100
101 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
102 = ([], BasicBlock id instrs : blocks, statics)
103 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
104 = (instrs, blocks, CmmData sec dat:statics)
105 mkBlocks instr (instrs,blocks,statics)
106 = (instr:instrs, blocks, statics)
107 -- in
108 return (BasicBlock id top : other_blocks, statics)
109
110 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
111 stmtsToInstrs stmts
112 = do instrss <- mapM stmtToInstrs stmts
113 return (concatOL instrss)
114
115 stmtToInstrs :: CmmStmt -> NatM InstrBlock
116 stmtToInstrs stmt = do
117 dflags <- getDynFlagsNat
118 case stmt of
119 CmmNop -> return nilOL
120 CmmComment s -> return (unitOL (COMMENT s))
121
122 CmmAssign reg src
123 | isFloatType ty -> assignReg_FltCode size reg src
124 | target32Bit (targetPlatform dflags) &&
125 isWord64 ty -> assignReg_I64Code reg src
126 | otherwise -> assignReg_IntCode size reg src
127 where ty = cmmRegType reg
128 size = cmmTypeSize ty
129
130 CmmStore addr src
131 | isFloatType ty -> assignMem_FltCode size addr src
132 | target32Bit (targetPlatform dflags) &&
133 isWord64 ty -> assignMem_I64Code addr src
134 | otherwise -> assignMem_IntCode size addr src
135 where ty = cmmExprType src
136 size = cmmTypeSize ty
137
138 CmmCall target result_regs args _ _
139 -> genCCall target result_regs args
140
141 CmmBranch id -> genBranch id
142 CmmCondBranch arg id -> genCondJump id arg
143 CmmSwitch arg ids -> genSwitch arg ids
144 CmmJump arg _ -> genJump arg
145 CmmReturn _ ->
146 panic "stmtToInstrs: return statement should have been cps'd away"
147
148
149 --------------------------------------------------------------------------------
150 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
151 -- They are really trees of insns to facilitate fast appending, where a
152 -- left-to-right traversal yields the insns in the correct order.
153 --
154 type InstrBlock
155 = OrdList Instr
156
157
158 -- | Register's passed up the tree. If the stix code forces the register
159 -- to live in a pre-decided machine register, it comes out as @Fixed@;
160 -- otherwise, it comes out as @Any@, and the parent can decide which
161 -- register to put it in.
162 --
163 data Register
164 = Fixed Size Reg InstrBlock
165 | Any Size (Reg -> InstrBlock)
166
167
168 swizzleRegisterRep :: Register -> Size -> Register
169 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
170 swizzleRegisterRep (Any _ codefn) size = Any size codefn
171
172
173 -- | Grab the Reg for a CmmReg
174 getRegisterReg :: CmmReg -> Reg
175
176 getRegisterReg (CmmLocal (LocalReg u pk))
177 = RegVirtual $ mkVirtualReg u (cmmTypeSize pk)
178
179 getRegisterReg (CmmGlobal mid)
180 = case globalRegMaybe mid of
181 Just reg -> reg
182 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
183 -- By this stage, the only MagicIds remaining should be the
184 -- ones which map to a real machine register on this
185 -- platform. Hence ...
186
187
188 {-
189 Now, given a tree (the argument to an CmmLoad) that references memory,
190 produce a suitable addressing mode.
191
192 A Rule of the Game (tm) for Amodes: use of the addr bit must
193 immediately follow use of the code part, since the code part puts
194 values in registers which the addr then refers to. So you can't put
195 anything in between, lest it overwrite some of those registers. If
196 you need to do some other computation between the code part and use of
197 the addr bit, first store the effective address from the amode in a
198 temporary, then do the other computation, and then use the temporary:
199
200 code
201 LEA amode, tmp
202 ... other computation ...
203 ... (tmp) ...
204 -}
205
206
207 -- | Convert a BlockId to some CmmStatic data
208 jumpTableEntry :: Maybe BlockId -> CmmStatic
209 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
210 jumpTableEntry (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
211 where blockLabel = mkAsmTempLabel (getUnique blockid)
212
213
214
215 -- -----------------------------------------------------------------------------
216 -- General things for putting together code sequences
217
218 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
219 -- CmmExprs into CmmRegOff?
220 mangleIndexTree :: CmmExpr -> CmmExpr
221 mangleIndexTree (CmmRegOff reg off)
222 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
223 where width = typeWidth (cmmRegType reg)
224
225 mangleIndexTree _
226 = panic "PPC.CodeGen.mangleIndexTree: no match"
227
228 -- -----------------------------------------------------------------------------
229 -- Code gen for 64-bit arithmetic on 32-bit platforms
230
231 {-
232 Simple support for generating 64-bit code (ie, 64 bit values and 64
233 bit assignments) on 32-bit platforms. Unlike the main code generator
234 we merely shoot for generating working code as simply as possible, and
235 pay little attention to code quality. Specifically, there is no
236 attempt to deal cleverly with the fixed-vs-floating register
237 distinction; all values are generated into (pairs of) floating
238 registers, even if this would mean some redundant reg-reg moves as a
239 result. Only one of the VRegUniques is returned, since it will be
240 of the VRegUniqueLo form, and the upper-half VReg can be determined
241 by applying getHiVRegFromLo to it.
242 -}
243
244 data ChildCode64 -- a.k.a "Register64"
245 = ChildCode64
246 InstrBlock -- code
247 Reg -- the lower 32-bit temporary which contains the
248 -- result; use getHiVRegFromLo to find the other
249 -- VRegUnique. Rules of this simplified insn
250 -- selection game are therefore that the returned
251 -- Reg may be modified
252
253
254 -- | The dual to getAnyReg: compute an expression into a register, but
255 -- we don't mind which one it is.
256 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
257 getSomeReg expr = do
258 r <- getRegister expr
259 case r of
260 Any rep code -> do
261 tmp <- getNewRegNat rep
262 return (tmp, code tmp)
263 Fixed _ reg code ->
264 return (reg, code)
265
266 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
267 getI64Amodes addrTree = do
268 Amode hi_addr addr_code <- getAmode addrTree
269 case addrOffset hi_addr 4 of
270 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
271 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
272 return (AddrRegImm hi_ptr (ImmInt 0),
273 AddrRegImm hi_ptr (ImmInt 4),
274 code)
275
276
277 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
278 assignMem_I64Code addrTree valueTree = do
279 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
280 ChildCode64 vcode rlo <- iselExpr64 valueTree
281 let
282 rhi = getHiVRegFromLo rlo
283
284 -- Big-endian store
285 mov_hi = ST II32 rhi hi_addr
286 mov_lo = ST II32 rlo lo_addr
287 -- in
288 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
289
290
291 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
292 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
293 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
294 let
295 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
296 r_dst_hi = getHiVRegFromLo r_dst_lo
297 r_src_hi = getHiVRegFromLo r_src_lo
298 mov_lo = MR r_dst_lo r_src_lo
299 mov_hi = MR r_dst_hi r_src_hi
300 -- in
301 return (
302 vcode `snocOL` mov_lo `snocOL` mov_hi
303 )
304
305 assignReg_I64Code _ _
306 = panic "assignReg_I64Code(powerpc): invalid lvalue"
307
308
309 iselExpr64 :: CmmExpr -> NatM ChildCode64
310 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
311 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
312 (rlo, rhi) <- getNewRegPairNat II32
313 let mov_hi = LD II32 rhi hi_addr
314 mov_lo = LD II32 rlo lo_addr
315 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
316 rlo
317
318 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
319 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
320
321 iselExpr64 (CmmLit (CmmInt i _)) = do
322 (rlo,rhi) <- getNewRegPairNat II32
323 let
324 half0 = fromIntegral (fromIntegral i :: Word16)
325 half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16)
326 half2 = fromIntegral ((fromIntegral i `shiftR` 32) :: Word16)
327 half3 = fromIntegral ((fromIntegral i `shiftR` 48) :: Word16)
328
329 code = toOL [
330 LIS rlo (ImmInt half1),
331 OR rlo rlo (RIImm $ ImmInt half0),
332 LIS rhi (ImmInt half3),
333 OR rlo rlo (RIImm $ ImmInt half2)
334 ]
335 -- in
336 return (ChildCode64 code rlo)
337
338 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
339 ChildCode64 code1 r1lo <- iselExpr64 e1
340 ChildCode64 code2 r2lo <- iselExpr64 e2
341 (rlo,rhi) <- getNewRegPairNat II32
342 let
343 r1hi = getHiVRegFromLo r1lo
344 r2hi = getHiVRegFromLo r2lo
345 code = code1 `appOL`
346 code2 `appOL`
347 toOL [ ADDC rlo r1lo r2lo,
348 ADDE rhi r1hi r2hi ]
349 -- in
350 return (ChildCode64 code rlo)
351
352 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
353 (expr_reg,expr_code) <- getSomeReg expr
354 (rlo, rhi) <- getNewRegPairNat II32
355 let mov_hi = LI rhi (ImmInt 0)
356 mov_lo = MR rlo expr_reg
357 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
358 rlo
359 iselExpr64 expr
360 = do dflags <- getDynFlagsNat
361 pprPanic "iselExpr64(powerpc)" (pprPlatform (targetPlatform dflags) expr)
362
363
364
365 getRegister :: CmmExpr -> NatM Register
366 getRegister e = do dflags <- getDynFlagsNat
367 getRegister' dflags e
368
369 getRegister' :: DynFlags -> CmmExpr -> NatM Register
370
371 getRegister' _ (CmmReg (CmmGlobal PicBaseReg))
372 = do
373 reg <- getPicBaseNat archWordSize
374 return (Fixed archWordSize reg nilOL)
375
376 getRegister' _ (CmmReg reg)
377 = return (Fixed (cmmTypeSize (cmmRegType reg))
378 (getRegisterReg reg) nilOL)
379
380 getRegister' dflags tree@(CmmRegOff _ _)
381 = getRegister' dflags (mangleIndexTree tree)
382
383 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
384 -- TO_W_(x), TO_W_(x >> 32)
385
386 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32)
387 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
388 | target32Bit (targetPlatform dflags) = do
389 ChildCode64 code rlo <- iselExpr64 x
390 return $ Fixed II32 (getHiVRegFromLo rlo) code
391
392 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32)
393 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
394 | target32Bit (targetPlatform dflags) = do
395 ChildCode64 code rlo <- iselExpr64 x
396 return $ Fixed II32 (getHiVRegFromLo rlo) code
397
398 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x])
399 | target32Bit (targetPlatform dflags) = do
400 ChildCode64 code rlo <- iselExpr64 x
401 return $ Fixed II32 rlo code
402
403 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x])
404 | target32Bit (targetPlatform dflags) = do
405 ChildCode64 code rlo <- iselExpr64 x
406 return $ Fixed II32 rlo code
407
408 getRegister' dflags (CmmLoad mem pk)
409 | not (isWord64 pk)
410 = do
411 let platform = targetPlatform dflags
412 Amode addr addr_code <- getAmode mem
413 let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk)
414 addr_code `snocOL` LD size dst addr
415 return (Any size code)
416 where size = cmmTypeSize pk
417
418 -- catch simple cases of zero- or sign-extended load
419 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
420 Amode addr addr_code <- getAmode mem
421 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
422
423 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
424
425 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
426 Amode addr addr_code <- getAmode mem
427 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
428
429 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
430 Amode addr addr_code <- getAmode mem
431 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
432
433 getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
434 = case mop of
435 MO_Not rep -> triv_ucode_int rep NOT
436
437 MO_F_Neg w -> triv_ucode_float w FNEG
438 MO_S_Neg w -> triv_ucode_int w NEG
439
440 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
441 MO_FF_Conv W32 W64 -> conversionNop FF64 x
442
443 MO_FS_Conv from to -> coerceFP2Int from to x
444 MO_SF_Conv from to -> coerceInt2FP from to x
445
446 MO_SS_Conv from to
447 | from == to -> conversionNop (intSize to) x
448
449 -- narrowing is a nop: we treat the high bits as undefined
450 MO_SS_Conv W32 to -> conversionNop (intSize to) x
451 MO_SS_Conv W16 W8 -> conversionNop II8 x
452 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
453 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
454
455 MO_UU_Conv from to
456 | from == to -> conversionNop (intSize to) x
457 -- narrowing is a nop: we treat the high bits as undefined
458 MO_UU_Conv W32 to -> conversionNop (intSize to) x
459 MO_UU_Conv W16 W8 -> conversionNop II8 x
460 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
461 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
462 _ -> panic "PPC.CodeGen.getRegister: no match"
463
464 where
465 triv_ucode_int width instr = trivialUCode (intSize width) instr x
466 triv_ucode_float width instr = trivialUCode (floatSize width) instr x
467
468 conversionNop new_size expr
469 = do e_code <- getRegister' dflags expr
470 return (swizzleRegisterRep e_code new_size)
471
472 getRegister' _ (CmmMachOp mop [x, y]) -- dyadic PrimOps
473 = case mop of
474 MO_F_Eq _ -> condFltReg EQQ x y
475 MO_F_Ne _ -> condFltReg NE x y
476 MO_F_Gt _ -> condFltReg GTT x y
477 MO_F_Ge _ -> condFltReg GE x y
478 MO_F_Lt _ -> condFltReg LTT x y
479 MO_F_Le _ -> condFltReg LE x y
480
481 MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y)
482 MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y)
483
484 MO_S_Gt rep -> condIntReg GTT (extendSExpr rep x) (extendSExpr rep y)
485 MO_S_Ge rep -> condIntReg GE (extendSExpr rep x) (extendSExpr rep y)
486 MO_S_Lt rep -> condIntReg LTT (extendSExpr rep x) (extendSExpr rep y)
487 MO_S_Le rep -> condIntReg LE (extendSExpr rep x) (extendSExpr rep y)
488
489 MO_U_Gt rep -> condIntReg GU (extendUExpr rep x) (extendUExpr rep y)
490 MO_U_Ge rep -> condIntReg GEU (extendUExpr rep x) (extendUExpr rep y)
491 MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y)
492 MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y)
493
494 MO_F_Add w -> triv_float w FADD
495 MO_F_Sub w -> triv_float w FSUB
496 MO_F_Mul w -> triv_float w FMUL
497 MO_F_Quot w -> triv_float w FDIV
498
499 -- optimize addition with 32-bit immediate
500 -- (needed for PIC)
501 MO_Add W32 ->
502 case y of
503 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
504 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
505 CmmLit lit
506 -> do
507 (src, srcCode) <- getSomeReg x
508 let imm = litToImm lit
509 code dst = srcCode `appOL` toOL [
510 ADDIS dst src (HA imm),
511 ADD dst dst (RIImm (LO imm))
512 ]
513 return (Any II32 code)
514 _ -> trivialCode W32 True ADD x y
515
516 MO_Add rep -> trivialCode rep True ADD x y
517 MO_Sub rep ->
518 case y of -- subfi ('substract from' with immediate) doesn't exist
519 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
520 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
521 _ -> trivialCodeNoImm' (intSize rep) SUBF y x
522
523 MO_Mul rep -> trivialCode rep True MULLW x y
524
525 MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
526
527 MO_S_MulMayOflo _ -> panic "S_MulMayOflo (rep /= II32): not implemented"
528 MO_U_MulMayOflo _ -> panic "U_MulMayOflo: not implemented"
529
530 MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y)
531 MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y)
532
533 MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y)
534 MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y)
535
536 MO_And rep -> trivialCode rep False AND x y
537 MO_Or rep -> trivialCode rep False OR x y
538 MO_Xor rep -> trivialCode rep False XOR x y
539
540 MO_Shl rep -> trivialCode rep False SLW x y
541 MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y
542 MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y
543 _ -> panic "PPC.CodeGen.getRegister: no match"
544
545 where
546 triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register
547 triv_float width instr = trivialCodeNoImm (floatSize width) instr x y
548
549 getRegister' _ (CmmLit (CmmInt i rep))
550 | Just imm <- makeImmediate rep True i
551 = let
552 code dst = unitOL (LI dst imm)
553 in
554 return (Any (intSize rep) code)
555
556 getRegister' _ (CmmLit (CmmFloat f frep)) = do
557 lbl <- getNewLabelNat
558 dflags <- getDynFlagsNat
559 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
560 Amode addr addr_code <- getAmode dynRef
561 let size = floatSize frep
562 code dst =
563 LDATA ReadOnlyData (Statics lbl
564 [CmmStaticLit (CmmFloat f frep)])
565 `consOL` (addr_code `snocOL` LD size dst addr)
566 return (Any size code)
567
568 getRegister' _ (CmmLit lit)
569 = let rep = cmmLitType lit
570 imm = litToImm lit
571 code dst = toOL [
572 LIS dst (HA imm),
573 ADD dst dst (RIImm (LO imm))
574 ]
575 in return (Any (cmmTypeSize rep) code)
576
577 getRegister' dflags other = pprPanic "getRegister(ppc)" (pprExpr (targetPlatform dflags) other)
578
579 -- extend?Rep: wrap integer expression of type rep
580 -- in a conversion to II32
581 extendSExpr :: Width -> CmmExpr -> CmmExpr
582 extendSExpr W32 x = x
583 extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x]
584
585 extendUExpr :: Width -> CmmExpr -> CmmExpr
586 extendUExpr W32 x = x
587 extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x]
588
589 -- -----------------------------------------------------------------------------
590 -- The 'Amode' type: Memory addressing modes passed up the tree.
591
592 data Amode
593 = Amode AddrMode InstrBlock
594
595 {-
596 Now, given a tree (the argument to an CmmLoad) that references memory,
597 produce a suitable addressing mode.
598
599 A Rule of the Game (tm) for Amodes: use of the addr bit must
600 immediately follow use of the code part, since the code part puts
601 values in registers which the addr then refers to. So you can't put
602 anything in between, lest it overwrite some of those registers. If
603 you need to do some other computation between the code part and use of
604 the addr bit, first store the effective address from the amode in a
605 temporary, then do the other computation, and then use the temporary:
606
607 code
608 LEA amode, tmp
609 ... other computation ...
610 ... (tmp) ...
611 -}
612
613 getAmode :: CmmExpr -> NatM Amode
614 getAmode tree@(CmmRegOff _ _) = getAmode (mangleIndexTree tree)
615
616 getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
617 | Just off <- makeImmediate W32 True (-i)
618 = do
619 (reg, code) <- getSomeReg x
620 return (Amode (AddrRegImm reg off) code)
621
622
623 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
624 | Just off <- makeImmediate W32 True i
625 = do
626 (reg, code) <- getSomeReg x
627 return (Amode (AddrRegImm reg off) code)
628
629 -- optimize addition with 32-bit immediate
630 -- (needed for PIC)
631 getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit])
632 = do
633 tmp <- getNewRegNat II32
634 (src, srcCode) <- getSomeReg x
635 let imm = litToImm lit
636 code = srcCode `snocOL` ADDIS tmp src (HA imm)
637 return (Amode (AddrRegImm tmp (LO imm)) code)
638
639 getAmode (CmmLit lit)
640 = do
641 tmp <- getNewRegNat II32
642 let imm = litToImm lit
643 code = unitOL (LIS tmp (HA imm))
644 return (Amode (AddrRegImm tmp (LO imm)) code)
645
646 getAmode (CmmMachOp (MO_Add W32) [x, y])
647 = do
648 (regX, codeX) <- getSomeReg x
649 (regY, codeY) <- getSomeReg y
650 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
651
652 getAmode other
653 = do
654 (reg, code) <- getSomeReg other
655 let
656 off = ImmInt 0
657 return (Amode (AddrRegImm reg off) code)
658
659
660
661 -- The 'CondCode' type: Condition codes passed up the tree.
662 data CondCode
663 = CondCode Bool Cond InstrBlock
664
665 -- Set up a condition code for a conditional branch.
666
667 getCondCode :: CmmExpr -> NatM CondCode
668
669 -- almost the same as everywhere else - but we need to
670 -- extend small integers to 32 bit first
671
672 getCondCode (CmmMachOp mop [x, y])
673 = case mop of
674 MO_F_Eq W32 -> condFltCode EQQ x y
675 MO_F_Ne W32 -> condFltCode NE x y
676 MO_F_Gt W32 -> condFltCode GTT x y
677 MO_F_Ge W32 -> condFltCode GE x y
678 MO_F_Lt W32 -> condFltCode LTT x y
679 MO_F_Le W32 -> condFltCode LE x y
680
681 MO_F_Eq W64 -> condFltCode EQQ x y
682 MO_F_Ne W64 -> condFltCode NE x y
683 MO_F_Gt W64 -> condFltCode GTT x y
684 MO_F_Ge W64 -> condFltCode GE x y
685 MO_F_Lt W64 -> condFltCode LTT x y
686 MO_F_Le W64 -> condFltCode LE x y
687
688 MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y)
689 MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y)
690
691 MO_S_Gt rep -> condIntCode GTT (extendSExpr rep x) (extendSExpr rep y)
692 MO_S_Ge rep -> condIntCode GE (extendSExpr rep x) (extendSExpr rep y)
693 MO_S_Lt rep -> condIntCode LTT (extendSExpr rep x) (extendSExpr rep y)
694 MO_S_Le rep -> condIntCode LE (extendSExpr rep x) (extendSExpr rep y)
695
696 MO_U_Gt rep -> condIntCode GU (extendUExpr rep x) (extendUExpr rep y)
697 MO_U_Ge rep -> condIntCode GEU (extendUExpr rep x) (extendUExpr rep y)
698 MO_U_Lt rep -> condIntCode LU (extendUExpr rep x) (extendUExpr rep y)
699 MO_U_Le rep -> condIntCode LEU (extendUExpr rep x) (extendUExpr rep y)
700
701 _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
702
703 getCondCode _ = panic "getCondCode(2)(powerpc)"
704
705
706
707 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
708 -- passed back up the tree.
709
710 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
711
712 -- ###FIXME: I16 and I8!
713 condIntCode cond x (CmmLit (CmmInt y rep))
714 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
715 = do
716 (src1, code) <- getSomeReg x
717 let
718 code' = code `snocOL`
719 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2)
720 return (CondCode False cond code')
721
722 condIntCode cond x y = do
723 (src1, code1) <- getSomeReg x
724 (src2, code2) <- getSomeReg y
725 let
726 code' = code1 `appOL` code2 `snocOL`
727 (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2)
728 return (CondCode False cond code')
729
730 condFltCode cond x y = do
731 (src1, code1) <- getSomeReg x
732 (src2, code2) <- getSomeReg y
733 let
734 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
735 code'' = case cond of -- twiddle CR to handle unordered case
736 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
737 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
738 _ -> code'
739 where
740 ltbit = 0 ; eqbit = 2 ; gtbit = 1
741 return (CondCode True cond code'')
742
743
744
745 -- -----------------------------------------------------------------------------
746 -- Generating assignments
747
748 -- Assignments are really at the heart of the whole code generation
749 -- business. Almost all top-level nodes of any real importance are
750 -- assignments, which correspond to loads, stores, or register
751 -- transfers. If we're really lucky, some of the register transfers
752 -- will go away, because we can use the destination register to
753 -- complete the code generation for the right hand side. This only
754 -- fails when the right hand side is forced into a fixed register
755 -- (e.g. the result of a call).
756
757 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
758 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
759
760 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
761 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
762
763 assignMem_IntCode pk addr src = do
764 (srcReg, code) <- getSomeReg src
765 Amode dstAddr addr_code <- getAmode addr
766 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
767
768 -- dst is a reg, but src could be anything
769 assignReg_IntCode _ reg src
770 = do
771 r <- getRegister src
772 return $ case r of
773 Any _ code -> code dst
774 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
775 where
776 dst = getRegisterReg reg
777
778
779
780 -- Easy, isn't it?
781 assignMem_FltCode = assignMem_IntCode
782 assignReg_FltCode = assignReg_IntCode
783
784
785
786 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
787
788 genJump (CmmLit (CmmLabel lbl))
789 = return (unitOL $ JMP lbl)
790
791 genJump tree
792 = do
793 (target,code) <- getSomeReg tree
794 return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
795
796
797 -- -----------------------------------------------------------------------------
798 -- Unconditional branches
799 genBranch :: BlockId -> NatM InstrBlock
800 genBranch = return . toOL . mkJumpInstr
801
802
803 -- -----------------------------------------------------------------------------
804 -- Conditional jumps
805
806 {-
807 Conditional jumps are always to local labels, so we can use branch
808 instructions. We peek at the arguments to decide what kind of
809 comparison to do.
810
811 SPARC: First, we have to ensure that the condition codes are set
812 according to the supplied comparison operation. We generate slightly
813 different code for floating point comparisons, because a floating
814 point operation cannot directly precede a @BF@. We assume the worst
815 and fill that slot with a @NOP@.
816
817 SPARC: Do not fill the delay slots here; you will confuse the register
818 allocator.
819 -}
820
821
822 genCondJump
823 :: BlockId -- the branch target
824 -> CmmExpr -- the condition on which to branch
825 -> NatM InstrBlock
826
827 genCondJump id bool = do
828 CondCode _ cond code <- getCondCode bool
829 return (code `snocOL` BCC cond id)
830
831
832
833 -- -----------------------------------------------------------------------------
834 -- Generating C calls
835
836 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
837 -- @get_arg@, which moves the arguments to the correct registers/stack
838 -- locations. Apart from that, the code is easy.
839 --
840 -- (If applicable) Do not fill the delay slots here; you will confuse the
841 -- register allocator.
842
843 genCCall :: CmmCallTarget -- function to call
844 -> [HintedCmmFormal] -- where to put the result
845 -> [HintedCmmActual] -- arguments (of mixed type)
846 -> NatM InstrBlock
847 genCCall target dest_regs argsAndHints
848 = do dflags <- getDynFlagsNat
849 case platformOS (targetPlatform dflags) of
850 OSLinux -> genCCall' GCPLinux target dest_regs argsAndHints
851 OSDarwin -> genCCall' GCPDarwin target dest_regs argsAndHints
852 OSSolaris2 -> panic "PPC.CodeGen.genCCall: not defined for this os"
853 OSMinGW32 -> panic "PPC.CodeGen.genCCall: not defined for this os"
854 OSFreeBSD -> panic "PPC.CodeGen.genCCall: not defined for this os"
855 OSOpenBSD -> panic "PPC.CodeGen.genCCall: not defined for this os"
856 OSUnknown -> panic "PPC.CodeGen.genCCall: not defined for this os"
857
858 data GenCCallPlatform = GCPLinux | GCPDarwin
859
860 genCCall'
861 :: GenCCallPlatform
862 -> CmmCallTarget -- function to call
863 -> [HintedCmmFormal] -- where to put the result
864 -> [HintedCmmActual] -- arguments (of mixed type)
865 -> NatM InstrBlock
866
867 {-
868 The PowerPC calling convention for Darwin/Mac OS X
869 is described in Apple's document
870 "Inside Mac OS X - Mach-O Runtime Architecture".
871
872 PowerPC Linux uses the System V Release 4 Calling Convention
873 for PowerPC. It is described in the
874 "System V Application Binary Interface PowerPC Processor Supplement".
875
876 Both conventions are similar:
877 Parameters may be passed in general-purpose registers starting at r3, in
878 floating point registers starting at f1, or on the stack.
879
880 But there are substantial differences:
881 * The number of registers used for parameter passing and the exact set of
882 nonvolatile registers differs (see MachRegs.lhs).
883 * On Darwin, stack space is always reserved for parameters, even if they are
884 passed in registers. The called routine may choose to save parameters from
885 registers to the corresponding space on the stack.
886 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
887 parameter is passed in an FPR.
888 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
889 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
890 Darwin just treats an I64 like two separate II32s (high word first).
891 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
892 4-byte aligned like everything else on Darwin.
893 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
894 PowerPC Linux does not agree, so neither do we.
895
896 According to both conventions, The parameter area should be part of the
897 caller's stack frame, allocated in the caller's prologue code (large enough
898 to hold the parameter lists for all called routines). The NCG already
899 uses the stack for register spilling, leaving 64 bytes free at the top.
900 If we need a larger parameter area than that, we just allocate a new stack
901 frame just before ccalling.
902 -}
903
904
905 genCCall' _ (CmmPrim MO_WriteBarrier) _ _
906 = return $ unitOL LWSYNC
907
908 genCCall' gcp target dest_regs argsAndHints
909 = ASSERT (not $ any (`elem` [II16]) $ map cmmTypeSize argReps)
910 -- we rely on argument promotion in the codeGen
911 do
912 (finalStack,passArgumentsCode,usedRegs) <- passArguments
913 (zip args argReps)
914 allArgRegs allFPArgRegs
915 initialStackOffset
916 (toOL []) []
917
918 (labelOrExpr, reduceToFF32) <- case target of
919 CmmCallee (CmmLit (CmmLabel lbl)) _ -> return (Left lbl, False)
920 CmmCallee expr _ -> return (Right expr, False)
921 CmmPrim mop -> outOfLineMachOp mop
922
923 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
924 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
925
926 case labelOrExpr of
927 Left lbl -> do
928 return ( codeBefore
929 `snocOL` BL lbl usedRegs
930 `appOL` codeAfter)
931 Right dyn -> do
932 (dynReg, dynCode) <- getSomeReg dyn
933 return ( dynCode
934 `snocOL` MTCTR dynReg
935 `appOL` codeBefore
936 `snocOL` BCTRL usedRegs
937 `appOL` codeAfter)
938 where
939 initialStackOffset = case gcp of
940 GCPDarwin -> 24
941 GCPLinux -> 8
942 -- size of linkage area + size of arguments, in bytes
943 stackDelta finalStack = case gcp of
944 GCPDarwin ->
945 roundTo 16 $ (24 +) $ max 32 $ sum $
946 map (widthInBytes . typeWidth) argReps
947 GCPLinux -> roundTo 16 finalStack
948
949 -- need to remove alignment information
950 argsAndHints' | (CmmPrim mop) <- target,
951 (mop == MO_Memcpy ||
952 mop == MO_Memset ||
953 mop == MO_Memmove)
954 = init argsAndHints
955
956 | otherwise
957 = argsAndHints
958
959 args = map hintlessCmm argsAndHints'
960 argReps = map cmmExprType args
961
962 roundTo a x | x `mod` a == 0 = x
963 | otherwise = x + a - (x `mod` a)
964
965 move_sp_down finalStack
966 | delta > 64 =
967 toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))),
968 DELTA (-delta)]
969 | otherwise = nilOL
970 where delta = stackDelta finalStack
971 move_sp_up finalStack
972 | delta > 64 =
973 toOL [ADD sp sp (RIImm (ImmInt delta)),
974 DELTA 0]
975 | otherwise = nilOL
976 where delta = stackDelta finalStack
977
978
979 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
980 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
981 accumCode accumUsed | isWord64 arg_ty =
982 do
983 ChildCode64 code vr_lo <- iselExpr64 arg
984 let vr_hi = getHiVRegFromLo vr_lo
985
986 case gcp of
987 GCPDarwin ->
988 do let storeWord vr (gpr:_) _ = MR gpr vr
989 storeWord vr [] offset
990 = ST II32 vr (AddrRegImm sp (ImmInt offset))
991 passArguments args
992 (drop 2 gprs)
993 fprs
994 (stackOffset+8)
995 (accumCode `appOL` code
996 `snocOL` storeWord vr_hi gprs stackOffset
997 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
998 ((take 2 gprs) ++ accumUsed)
999 GCPLinux ->
1000 do let stackOffset' = roundTo 8 stackOffset
1001 stackCode = accumCode `appOL` code
1002 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
1003 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
1004 regCode hireg loreg =
1005 accumCode `appOL` code
1006 `snocOL` MR hireg vr_hi
1007 `snocOL` MR loreg vr_lo
1008
1009 case gprs of
1010 hireg : loreg : regs | even (length gprs) ->
1011 passArguments args regs fprs stackOffset
1012 (regCode hireg loreg) (hireg : loreg : accumUsed)
1013 _skipped : hireg : loreg : regs ->
1014 passArguments args regs fprs stackOffset
1015 (regCode hireg loreg) (hireg : loreg : accumUsed)
1016 _ -> -- only one or no regs left
1017 passArguments args [] fprs (stackOffset'+8)
1018 stackCode accumUsed
1019
1020 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
1021 | reg : _ <- regs = do
1022 register <- getRegister arg
1023 let code = case register of
1024 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1025 Any _ acode -> acode reg
1026 stackOffsetRes = case gcp of
1027 -- The Darwin ABI requires that we reserve
1028 -- stack slots for register parameters
1029 GCPDarwin -> stackOffset + stackBytes
1030 -- ... the SysV ABI doesn't.
1031 GCPLinux -> stackOffset
1032 passArguments args
1033 (drop nGprs gprs)
1034 (drop nFprs fprs)
1035 stackOffsetRes
1036 (accumCode `appOL` code)
1037 (reg : accumUsed)
1038 | otherwise = do
1039 (vr, code) <- getSomeReg arg
1040 passArguments args
1041 (drop nGprs gprs)
1042 (drop nFprs fprs)
1043 (stackOffset' + stackBytes)
1044 (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot)
1045 accumUsed
1046 where
1047 stackOffset' = case gcp of
1048 GCPDarwin ->
1049 -- stackOffset is at least 4-byte aligned
1050 -- The Darwin ABI is happy with that.
1051 stackOffset
1052 GCPLinux
1053 -- ... the SysV ABI requires 8-byte
1054 -- alignment for doubles.
1055 | isFloatType rep && typeWidth rep == W64 ->
1056 roundTo 8 stackOffset
1057 | otherwise ->
1058 stackOffset
1059 stackSlot = AddrRegImm sp (ImmInt stackOffset')
1060 (nGprs, nFprs, stackBytes, regs)
1061 = case gcp of
1062 GCPDarwin ->
1063 case cmmTypeSize rep of
1064 II8 -> (1, 0, 4, gprs)
1065 II32 -> (1, 0, 4, gprs)
1066 -- The Darwin ABI requires that we skip a
1067 -- corresponding number of GPRs when we use
1068 -- the FPRs.
1069 FF32 -> (1, 1, 4, fprs)
1070 FF64 -> (2, 1, 8, fprs)
1071 II16 -> panic "genCCall' passArguments II16"
1072 II64 -> panic "genCCall' passArguments II64"
1073 FF80 -> panic "genCCall' passArguments FF80"
1074 GCPLinux ->
1075 case cmmTypeSize rep of
1076 II8 -> (1, 0, 4, gprs)
1077 II32 -> (1, 0, 4, gprs)
1078 -- ... the SysV ABI doesn't.
1079 FF32 -> (0, 1, 4, fprs)
1080 FF64 -> (0, 1, 8, fprs)
1081 II16 -> panic "genCCall' passArguments II16"
1082 II64 -> panic "genCCall' passArguments II64"
1083 FF80 -> panic "genCCall' passArguments FF80"
1084
1085 moveResult reduceToFF32 =
1086 case dest_regs of
1087 [] -> nilOL
1088 [CmmHinted dest _hint]
1089 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
1090 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1091 | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3,
1092 MR r_dest r4]
1093 | otherwise -> unitOL (MR r_dest r3)
1094 where rep = cmmRegType (CmmLocal dest)
1095 r_dest = getRegisterReg (CmmLocal dest)
1096 _ -> panic "genCCall' moveResult: Bad dest_regs"
1097
1098 outOfLineMachOp mop =
1099 do
1100 dflags <- getDynFlagsNat
1101 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference $
1102 mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
1103 let mopLabelOrExpr = case mopExpr of
1104 CmmLit (CmmLabel lbl) -> Left lbl
1105 _ -> Right mopExpr
1106 return (mopLabelOrExpr, reduce)
1107 where
1108 (functionName, reduce) = case mop of
1109 MO_F32_Exp -> (fsLit "exp", True)
1110 MO_F32_Log -> (fsLit "log", True)
1111 MO_F32_Sqrt -> (fsLit "sqrt", True)
1112
1113 MO_F32_Sin -> (fsLit "sin", True)
1114 MO_F32_Cos -> (fsLit "cos", True)
1115 MO_F32_Tan -> (fsLit "tan", True)
1116
1117 MO_F32_Asin -> (fsLit "asin", True)
1118 MO_F32_Acos -> (fsLit "acos", True)
1119 MO_F32_Atan -> (fsLit "atan", True)
1120
1121 MO_F32_Sinh -> (fsLit "sinh", True)
1122 MO_F32_Cosh -> (fsLit "cosh", True)
1123 MO_F32_Tanh -> (fsLit "tanh", True)
1124 MO_F32_Pwr -> (fsLit "pow", True)
1125
1126 MO_F64_Exp -> (fsLit "exp", False)
1127 MO_F64_Log -> (fsLit "log", False)
1128 MO_F64_Sqrt -> (fsLit "sqrt", False)
1129
1130 MO_F64_Sin -> (fsLit "sin", False)
1131 MO_F64_Cos -> (fsLit "cos", False)
1132 MO_F64_Tan -> (fsLit "tan", False)
1133
1134 MO_F64_Asin -> (fsLit "asin", False)
1135 MO_F64_Acos -> (fsLit "acos", False)
1136 MO_F64_Atan -> (fsLit "atan", False)
1137
1138 MO_F64_Sinh -> (fsLit "sinh", False)
1139 MO_F64_Cosh -> (fsLit "cosh", False)
1140 MO_F64_Tanh -> (fsLit "tanh", False)
1141 MO_F64_Pwr -> (fsLit "pow", False)
1142
1143 MO_Memcpy -> (fsLit "memcpy", False)
1144 MO_Memset -> (fsLit "memset", False)
1145 MO_Memmove -> (fsLit "memmove", False)
1146
1147 MO_PopCnt w -> (fsLit $ popCntLabel w, False)
1148
1149 other -> pprPanic "genCCall(ppc): unknown callish op"
1150 (pprCallishMachOp other)
1151
1152
1153 -- -----------------------------------------------------------------------------
1154 -- Generating a table-branch
1155
1156 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
1157 genSwitch expr ids
1158 | opt_PIC
1159 = do
1160 (reg,e_code) <- getSomeReg expr
1161 tmp <- getNewRegNat II32
1162 lbl <- getNewLabelNat
1163 dflags <- getDynFlagsNat
1164 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1165 (tableReg,t_code) <- getSomeReg $ dynRef
1166 let code = e_code `appOL` t_code `appOL` toOL [
1167 SLW tmp reg (RIImm (ImmInt 2)),
1168 LD II32 tmp (AddrRegReg tableReg tmp),
1169 ADD tmp tmp (RIReg tableReg),
1170 MTCTR tmp,
1171 BCTR ids (Just lbl)
1172 ]
1173 return code
1174 | otherwise
1175 = do
1176 (reg,e_code) <- getSomeReg expr
1177 tmp <- getNewRegNat II32
1178 lbl <- getNewLabelNat
1179 let code = e_code `appOL` toOL [
1180 SLW tmp reg (RIImm (ImmInt 2)),
1181 ADDIS tmp tmp (HA (ImmCLbl lbl)),
1182 LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
1183 MTCTR tmp,
1184 BCTR ids (Just lbl)
1185 ]
1186 return code
1187
1188 generateJumpTableForInstr :: Instr -> Maybe (NatCmmDecl CmmStatics Instr)
1189 generateJumpTableForInstr (BCTR ids (Just lbl)) =
1190 let jumpTable
1191 | opt_PIC = map jumpTableEntryRel ids
1192 | otherwise = map jumpTableEntry ids
1193 where jumpTableEntryRel Nothing
1194 = CmmStaticLit (CmmInt 0 wordWidth)
1195 jumpTableEntryRel (Just blockid)
1196 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1197 where blockLabel = mkAsmTempLabel (getUnique blockid)
1198 in Just (CmmData ReadOnlyData (Statics lbl jumpTable))
1199 generateJumpTableForInstr _ = Nothing
1200
1201 -- -----------------------------------------------------------------------------
1202 -- 'condIntReg' and 'condFltReg': condition codes into registers
1203
1204 -- Turn those condition codes into integers now (when they appear on
1205 -- the right hand side of an assignment).
1206 --
1207 -- (If applicable) Do not fill the delay slots here; you will confuse the
1208 -- register allocator.
1209
1210 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
1211
1212 condReg :: NatM CondCode -> NatM Register
1213 condReg getCond = do
1214 CondCode _ cond cond_code <- getCond
1215 let
1216 {- code dst = cond_code `appOL` toOL [
1217 BCC cond lbl1,
1218 LI dst (ImmInt 0),
1219 BCC ALWAYS lbl2,
1220 NEWBLOCK lbl1,
1221 LI dst (ImmInt 1),
1222 BCC ALWAYS lbl2,
1223 NEWBLOCK lbl2
1224 ]-}
1225 code dst = cond_code
1226 `appOL` negate_code
1227 `appOL` toOL [
1228 MFCR dst,
1229 RLWINM dst dst (bit + 1) 31 31
1230 ]
1231
1232 negate_code | do_negate = unitOL (CRNOR bit bit bit)
1233 | otherwise = nilOL
1234
1235 (bit, do_negate) = case cond of
1236 LTT -> (0, False)
1237 LE -> (1, True)
1238 EQQ -> (2, False)
1239 GE -> (0, True)
1240 GTT -> (1, False)
1241
1242 NE -> (2, True)
1243
1244 LU -> (0, False)
1245 LEU -> (1, True)
1246 GEU -> (0, True)
1247 GU -> (1, False)
1248 _ -> panic "PPC.CodeGen.codeReg: no match"
1249
1250 return (Any II32 code)
1251
1252 condIntReg cond x y = condReg (condIntCode cond x y)
1253 condFltReg cond x y = condReg (condFltCode cond x y)
1254
1255
1256
1257 -- -----------------------------------------------------------------------------
1258 -- 'trivial*Code': deal with trivial instructions
1259
1260 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
1261 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
1262 -- Only look for constants on the right hand side, because that's
1263 -- where the generic optimizer will have put them.
1264
1265 -- Similarly, for unary instructions, we don't have to worry about
1266 -- matching an StInt as the argument, because genericOpt will already
1267 -- have handled the constant-folding.
1268
1269
1270
1271 {-
1272 Wolfgang's PowerPC version of The Rules:
1273
1274 A slightly modified version of The Rules to take advantage of the fact
1275 that PowerPC instructions work on all registers and don't implicitly
1276 clobber any fixed registers.
1277
1278 * The only expression for which getRegister returns Fixed is (CmmReg reg).
1279
1280 * If getRegister returns Any, then the code it generates may modify only:
1281 (a) fresh temporaries
1282 (b) the destination register
1283 It may *not* modify global registers, unless the global
1284 register happens to be the destination register.
1285 It may not clobber any other registers. In fact, only ccalls clobber any
1286 fixed registers.
1287 Also, it may not modify the counter register (used by genCCall).
1288
1289 Corollary: If a getRegister for a subexpression returns Fixed, you need
1290 not move it to a fresh temporary before evaluating the next subexpression.
1291 The Fixed register won't be modified.
1292 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
1293
1294 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
1295 the value of the destination register.
1296 -}
1297
1298 trivialCode
1299 :: Width
1300 -> Bool
1301 -> (Reg -> Reg -> RI -> Instr)
1302 -> CmmExpr
1303 -> CmmExpr
1304 -> NatM Register
1305
1306 trivialCode rep signed instr x (CmmLit (CmmInt y _))
1307 | Just imm <- makeImmediate rep signed y
1308 = do
1309 (src1, code1) <- getSomeReg x
1310 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
1311 return (Any (intSize rep) code)
1312
1313 trivialCode rep _ instr x y = do
1314 (src1, code1) <- getSomeReg x
1315 (src2, code2) <- getSomeReg y
1316 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
1317 return (Any (intSize rep) code)
1318
1319 trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr)
1320 -> CmmExpr -> CmmExpr -> NatM Register
1321 trivialCodeNoImm' size instr x y = do
1322 (src1, code1) <- getSomeReg x
1323 (src2, code2) <- getSomeReg y
1324 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
1325 return (Any size code)
1326
1327 trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr)
1328 -> CmmExpr -> CmmExpr -> NatM Register
1329 trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y
1330
1331
1332 trivialUCode
1333 :: Size
1334 -> (Reg -> Reg -> Instr)
1335 -> CmmExpr
1336 -> NatM Register
1337 trivialUCode rep instr x = do
1338 (src, code) <- getSomeReg x
1339 let code' dst = code `snocOL` instr dst src
1340 return (Any rep code')
1341
1342 -- There is no "remainder" instruction on the PPC, so we have to do
1343 -- it the hard way.
1344 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
1345
1346 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
1347 -> CmmExpr -> CmmExpr -> NatM Register
1348 remainderCode rep div x y = do
1349 (src1, code1) <- getSomeReg x
1350 (src2, code2) <- getSomeReg y
1351 let code dst = code1 `appOL` code2 `appOL` toOL [
1352 div dst src1 src2,
1353 MULLW dst dst (RIReg src2),
1354 SUBF dst dst src1
1355 ]
1356 return (Any (intSize rep) code)
1357
1358
1359 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
1360 coerceInt2FP fromRep toRep x = do
1361 (src, code) <- getSomeReg x
1362 lbl <- getNewLabelNat
1363 itmp <- getNewRegNat II32
1364 ftmp <- getNewRegNat FF64
1365 dflags <- getDynFlagsNat
1366 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
1367 Amode addr addr_code <- getAmode dynRef
1368 let
1369 code' dst = code `appOL` maybe_exts `appOL` toOL [
1370 LDATA ReadOnlyData $ Statics lbl
1371 [CmmStaticLit (CmmInt 0x43300000 W32),
1372 CmmStaticLit (CmmInt 0x80000000 W32)],
1373 XORIS itmp src (ImmInt 0x8000),
1374 ST II32 itmp (spRel 3),
1375 LIS itmp (ImmInt 0x4330),
1376 ST II32 itmp (spRel 2),
1377 LD FF64 ftmp (spRel 2)
1378 ] `appOL` addr_code `appOL` toOL [
1379 LD FF64 dst addr,
1380 FSUB FF64 dst ftmp dst
1381 ] `appOL` maybe_frsp dst
1382
1383 maybe_exts = case fromRep of
1384 W8 -> unitOL $ EXTS II8 src src
1385 W16 -> unitOL $ EXTS II16 src src
1386 W32 -> nilOL
1387 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1388
1389 maybe_frsp dst
1390 = case toRep of
1391 W32 -> unitOL $ FRSP dst dst
1392 W64 -> nilOL
1393 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1394
1395 return (Any (floatSize toRep) code')
1396
1397 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
1398 coerceFP2Int _ toRep x = do
1399 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
1400 (src, code) <- getSomeReg x
1401 tmp <- getNewRegNat FF64
1402 let
1403 code' dst = code `appOL` toOL [
1404 -- convert to int in FP reg
1405 FCTIWZ tmp src,
1406 -- store value (64bit) from FP to stack
1407 ST FF64 tmp (spRel 2),
1408 -- read low word of value (high word is undefined)
1409 LD II32 dst (spRel 3)]
1410 return (Any (intSize toRep) code')