e2d86a93aa367900e39b6220425e733d15d881de
[ghc.git] / compiler / nativeGen / PPC / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2004
8 --
9 -----------------------------------------------------------------------------
10
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, (b) the type signatures, and
13 -- (c) the #if blah_TARGET_ARCH} things, the
14 -- structure should not be too overwhelming.
15
16 module PPC.CodeGen (
17 cmmTopCodeGen,
18 generateJumpTableForInstr,
19 InstrBlock
20 )
21
22 where
23
24 #include "HsVersions.h"
25 #include "nativeGen/NCG.h"
26 #include "../includes/MachDeps.h"
27
28 -- NCG stuff:
29 import CodeGen.Platform
30 import PPC.Instr
31 import PPC.Cond
32 import PPC.Regs
33 import CPrim
34 import NCGMonad
35 import Instruction
36 import PIC
37 import Format
38 import RegClass
39 import Reg
40 import TargetReg
41 import Platform
42
43 -- Our intermediate code:
44 import BlockId
45 import PprCmm ( pprExpr )
46 import Cmm
47 import CmmUtils
48 import CmmSwitch
49 import CLabel
50 import Hoopl
51
52 -- The rest:
53 import OrdList
54 import Outputable
55 import Unique
56 import DynFlags
57
58 import Control.Monad ( mapAndUnzipM, when )
59 import Data.Bits
60 import Data.Word
61
62 import BasicTypes
63 import FastString
64 import Util
65
66 -- -----------------------------------------------------------------------------
67 -- Top-level of the instruction selector
68
69 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
70 -- They are really trees of insns to facilitate fast appending, where a
71 -- left-to-right traversal (pre-order?) yields the insns in the correct
72 -- order.
73
74 cmmTopCodeGen
75 :: RawCmmDecl
76 -> NatM [NatCmmDecl CmmStatics Instr]
77
78 cmmTopCodeGen (CmmProc info lab live graph) = do
79 let blocks = toBlockListEntryFirst graph
80 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
81 dflags <- getDynFlags
82 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
83 tops = proc : concat statics
84 os = platformOS $ targetPlatform dflags
85 arch = platformArch $ targetPlatform dflags
86 case arch of
87 ArchPPC -> do
88 picBaseMb <- getPicBaseMaybeNat
89 case picBaseMb of
90 Just picBase -> initializePicBase_ppc arch os picBase tops
91 Nothing -> return tops
92 ArchPPC_64 ELF_V1 -> return tops
93 -- generating function descriptor is handled in
94 -- pretty printer
95 ArchPPC_64 ELF_V2 -> return tops
96 -- generating function prologue is handled in
97 -- pretty printer
98 _ -> panic "PPC.cmmTopCodeGen: unknown arch"
99
100 cmmTopCodeGen (CmmData sec dat) = do
101 return [CmmData sec dat] -- no translation, we just use CmmStatic
102
103 basicBlockCodeGen
104 :: Block CmmNode C C
105 -> NatM ( [NatBasicBlock Instr]
106 , [NatCmmDecl CmmStatics Instr])
107
108 basicBlockCodeGen block = do
109 let (_, nodes, tail) = blockSplit block
110 id = entryLabel block
111 stmts = blockToList nodes
112 mid_instrs <- stmtsToInstrs stmts
113 tail_instrs <- stmtToInstrs tail
114 let instrs = mid_instrs `appOL` tail_instrs
115 -- code generation may introduce new basic block boundaries, which
116 -- are indicated by the NEWBLOCK instruction. We must split up the
117 -- instruction stream into basic blocks again. Also, we extract
118 -- LDATAs here too.
119 let
120 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
121
122 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
123 = ([], BasicBlock id instrs : blocks, statics)
124 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
125 = (instrs, blocks, CmmData sec dat:statics)
126 mkBlocks instr (instrs,blocks,statics)
127 = (instr:instrs, blocks, statics)
128 return (BasicBlock id top : other_blocks, statics)
129
130 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
131 stmtsToInstrs stmts
132 = do instrss <- mapM stmtToInstrs stmts
133 return (concatOL instrss)
134
135 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
136 stmtToInstrs stmt = do
137 dflags <- getDynFlags
138 case stmt of
139 CmmComment s -> return (unitOL (COMMENT s))
140 CmmTick {} -> return nilOL
141 CmmUnwind {} -> return nilOL
142
143 CmmAssign reg src
144 | isFloatType ty -> assignReg_FltCode format reg src
145 | target32Bit (targetPlatform dflags) &&
146 isWord64 ty -> assignReg_I64Code reg src
147 | otherwise -> assignReg_IntCode format reg src
148 where ty = cmmRegType dflags reg
149 format = cmmTypeFormat ty
150
151 CmmStore addr src
152 | isFloatType ty -> assignMem_FltCode format addr src
153 | target32Bit (targetPlatform dflags) &&
154 isWord64 ty -> assignMem_I64Code addr src
155 | otherwise -> assignMem_IntCode format addr src
156 where ty = cmmExprType dflags src
157 format = cmmTypeFormat ty
158
159 CmmUnsafeForeignCall target result_regs args
160 -> genCCall target result_regs args
161
162 CmmBranch id -> genBranch id
163 CmmCondBranch arg true false _ -> do
164 b1 <- genCondJump true arg
165 b2 <- genBranch false
166 return (b1 `appOL` b2)
167 CmmSwitch arg ids -> do dflags <- getDynFlags
168 genSwitch dflags arg ids
169 CmmCall { cml_target = arg } -> genJump arg
170 _ ->
171 panic "stmtToInstrs: statement should have been cps'd away"
172
173
174 --------------------------------------------------------------------------------
175 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
176 -- They are really trees of insns to facilitate fast appending, where a
177 -- left-to-right traversal yields the insns in the correct order.
178 --
179 type InstrBlock
180 = OrdList Instr
181
182
183 -- | Register's passed up the tree. If the stix code forces the register
184 -- to live in a pre-decided machine register, it comes out as @Fixed@;
185 -- otherwise, it comes out as @Any@, and the parent can decide which
186 -- register to put it in.
187 --
188 data Register
189 = Fixed Format Reg InstrBlock
190 | Any Format (Reg -> InstrBlock)
191
192
193 swizzleRegisterRep :: Register -> Format -> Register
194 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
195 swizzleRegisterRep (Any _ codefn) format = Any format codefn
196
197
198 -- | Grab the Reg for a CmmReg
199 getRegisterReg :: Platform -> CmmReg -> Reg
200
201 getRegisterReg _ (CmmLocal (LocalReg u pk))
202 = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
203
204 getRegisterReg platform (CmmGlobal mid)
205 = case globalRegMaybe platform mid of
206 Just reg -> RegReal reg
207 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
208 -- By this stage, the only MagicIds remaining should be the
209 -- ones which map to a real machine register on this
210 -- platform. Hence ...
211
212 -- | Convert a BlockId to some CmmStatic data
213 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
214 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
215 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
216 where blockLabel = mkAsmTempLabel (getUnique blockid)
217
218
219
220 -- -----------------------------------------------------------------------------
221 -- General things for putting together code sequences
222
223 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
224 -- CmmExprs into CmmRegOff?
225 mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr
226 mangleIndexTree dflags (CmmRegOff reg off)
227 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
228 where width = typeWidth (cmmRegType dflags reg)
229
230 mangleIndexTree _ _
231 = panic "PPC.CodeGen.mangleIndexTree: no match"
232
233 -- -----------------------------------------------------------------------------
234 -- Code gen for 64-bit arithmetic on 32-bit platforms
235
236 {-
237 Simple support for generating 64-bit code (ie, 64 bit values and 64
238 bit assignments) on 32-bit platforms. Unlike the main code generator
239 we merely shoot for generating working code as simply as possible, and
240 pay little attention to code quality. Specifically, there is no
241 attempt to deal cleverly with the fixed-vs-floating register
242 distinction; all values are generated into (pairs of) floating
243 registers, even if this would mean some redundant reg-reg moves as a
244 result. Only one of the VRegUniques is returned, since it will be
245 of the VRegUniqueLo form, and the upper-half VReg can be determined
246 by applying getHiVRegFromLo to it.
247 -}
248
249 data ChildCode64 -- a.k.a "Register64"
250 = ChildCode64
251 InstrBlock -- code
252 Reg -- the lower 32-bit temporary which contains the
253 -- result; use getHiVRegFromLo to find the other
254 -- VRegUnique. Rules of this simplified insn
255 -- selection game are therefore that the returned
256 -- Reg may be modified
257
258
259 -- | Compute an expression into a register, but
260 -- we don't mind which one it is.
261 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
262 getSomeReg expr = do
263 r <- getRegister expr
264 case r of
265 Any rep code -> do
266 tmp <- getNewRegNat rep
267 return (tmp, code tmp)
268 Fixed _ reg code ->
269 return (reg, code)
270
271 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
272 getI64Amodes addrTree = do
273 Amode hi_addr addr_code <- getAmode D addrTree
274 case addrOffset hi_addr 4 of
275 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
276 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
277 return (AddrRegImm hi_ptr (ImmInt 0),
278 AddrRegImm hi_ptr (ImmInt 4),
279 code)
280
281
282 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
283 assignMem_I64Code addrTree valueTree = do
284 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
285 ChildCode64 vcode rlo <- iselExpr64 valueTree
286 let
287 rhi = getHiVRegFromLo rlo
288
289 -- Big-endian store
290 mov_hi = ST II32 rhi hi_addr
291 mov_lo = ST II32 rlo lo_addr
292 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
293
294
295 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
296 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
297 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
298 let
299 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
300 r_dst_hi = getHiVRegFromLo r_dst_lo
301 r_src_hi = getHiVRegFromLo r_src_lo
302 mov_lo = MR r_dst_lo r_src_lo
303 mov_hi = MR r_dst_hi r_src_hi
304 return (
305 vcode `snocOL` mov_lo `snocOL` mov_hi
306 )
307
308 assignReg_I64Code _ _
309 = panic "assignReg_I64Code(powerpc): invalid lvalue"
310
311
312 iselExpr64 :: CmmExpr -> NatM ChildCode64
313 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
314 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
315 (rlo, rhi) <- getNewRegPairNat II32
316 let mov_hi = LD II32 rhi hi_addr
317 mov_lo = LD II32 rlo lo_addr
318 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
319 rlo
320
321 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
322 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
323
324 iselExpr64 (CmmLit (CmmInt i _)) = do
325 (rlo,rhi) <- getNewRegPairNat II32
326 let
327 half0 = fromIntegral (fromIntegral i :: Word16)
328 half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
329 half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16)
330 half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16)
331
332 code = toOL [
333 LIS rlo (ImmInt half1),
334 OR rlo rlo (RIImm $ ImmInt half0),
335 LIS rhi (ImmInt half3),
336 OR rhi rhi (RIImm $ ImmInt half2)
337 ]
338 return (ChildCode64 code rlo)
339
340 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
341 ChildCode64 code1 r1lo <- iselExpr64 e1
342 ChildCode64 code2 r2lo <- iselExpr64 e2
343 (rlo,rhi) <- getNewRegPairNat II32
344 let
345 r1hi = getHiVRegFromLo r1lo
346 r2hi = getHiVRegFromLo r2lo
347 code = code1 `appOL`
348 code2 `appOL`
349 toOL [ ADDC rlo r1lo r2lo,
350 ADDE rhi r1hi r2hi ]
351 return (ChildCode64 code rlo)
352
353 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
354 ChildCode64 code1 r1lo <- iselExpr64 e1
355 ChildCode64 code2 r2lo <- iselExpr64 e2
356 (rlo,rhi) <- getNewRegPairNat II32
357 let
358 r1hi = getHiVRegFromLo r1lo
359 r2hi = getHiVRegFromLo r2lo
360 code = code1 `appOL`
361 code2 `appOL`
362 toOL [ SUBFC rlo r2lo r1lo,
363 SUBFE rhi r2hi r1hi ]
364 return (ChildCode64 code rlo)
365
366 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
367 (expr_reg,expr_code) <- getSomeReg expr
368 (rlo, rhi) <- getNewRegPairNat II32
369 let mov_hi = LI rhi (ImmInt 0)
370 mov_lo = MR rlo expr_reg
371 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
372 rlo
373 iselExpr64 expr
374 = pprPanic "iselExpr64(powerpc)" (pprExpr expr)
375
376
377
378 getRegister :: CmmExpr -> NatM Register
379 getRegister e = do dflags <- getDynFlags
380 getRegister' dflags e
381
382 getRegister' :: DynFlags -> CmmExpr -> NatM Register
383
384 getRegister' dflags (CmmReg (CmmGlobal PicBaseReg))
385 | target32Bit (targetPlatform dflags) = do
386 reg <- getPicBaseNat $ archWordFormat (target32Bit (targetPlatform dflags))
387 return (Fixed (archWordFormat (target32Bit (targetPlatform dflags)))
388 reg nilOL)
389 | otherwise = return (Fixed II64 toc nilOL)
390
391 getRegister' dflags (CmmReg reg)
392 = return (Fixed (cmmTypeFormat (cmmRegType dflags reg))
393 (getRegisterReg (targetPlatform dflags) reg) nilOL)
394
395 getRegister' dflags tree@(CmmRegOff _ _)
396 = getRegister' dflags (mangleIndexTree dflags tree)
397
398 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
399 -- TO_W_(x), TO_W_(x >> 32)
400
401 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32)
402 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
403 | target32Bit (targetPlatform dflags) = do
404 ChildCode64 code rlo <- iselExpr64 x
405 return $ Fixed II32 (getHiVRegFromLo rlo) code
406
407 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32)
408 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
409 | target32Bit (targetPlatform dflags) = do
410 ChildCode64 code rlo <- iselExpr64 x
411 return $ Fixed II32 (getHiVRegFromLo rlo) code
412
413 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x])
414 | target32Bit (targetPlatform dflags) = do
415 ChildCode64 code rlo <- iselExpr64 x
416 return $ Fixed II32 rlo code
417
418 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x])
419 | target32Bit (targetPlatform dflags) = do
420 ChildCode64 code rlo <- iselExpr64 x
421 return $ Fixed II32 rlo code
422
423 getRegister' dflags (CmmLoad mem pk)
424 | not (isWord64 pk) = do
425 let platform = targetPlatform dflags
426 Amode addr addr_code <- getAmode D mem
427 let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk)
428 addr_code `snocOL` LD format dst addr
429 return (Any format code)
430 | not (target32Bit (targetPlatform dflags)) = do
431 Amode addr addr_code <- getAmode DS mem
432 let code dst = addr_code `snocOL` LD II64 dst addr
433 return (Any II64 code)
434
435 where format = cmmTypeFormat pk
436
437 -- catch simple cases of zero- or sign-extended load
438 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
439 Amode addr addr_code <- getAmode D mem
440 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
441
442 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
443 Amode addr addr_code <- getAmode D mem
444 return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
445
446 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
447
448 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
449 Amode addr addr_code <- getAmode D mem
450 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
451
452 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
453 Amode addr addr_code <- getAmode D mem
454 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
455
456 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
457 Amode addr addr_code <- getAmode D mem
458 return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
459
460 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
461 Amode addr addr_code <- getAmode D mem
462 return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
463
464 getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
465 Amode addr addr_code <- getAmode D mem
466 return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
467
468 getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
469 Amode addr addr_code <- getAmode D mem
470 return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
471
472 getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
473 = case mop of
474 MO_Not rep -> triv_ucode_int rep NOT
475
476 MO_F_Neg w -> triv_ucode_float w FNEG
477 MO_S_Neg w -> triv_ucode_int w NEG
478
479 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
480 MO_FF_Conv W32 W64 -> conversionNop FF64 x
481
482 MO_FS_Conv from to -> coerceFP2Int from to x
483 MO_SF_Conv from to -> coerceInt2FP from to x
484
485 MO_SS_Conv from to
486 | from == to -> conversionNop (intFormat to) x
487
488 -- narrowing is a nop: we treat the high bits as undefined
489 MO_SS_Conv W64 to
490 | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit int register"
491 | otherwise -> conversionNop (intFormat to) x
492 MO_SS_Conv W32 to
493 | arch32 -> conversionNop (intFormat to) x
494 | otherwise -> case to of
495 W64 -> triv_ucode_int to (EXTS II32)
496 W16 -> conversionNop II16 x
497 W8 -> conversionNop II8 x
498 _ -> panic "PPC.CodeGen.getRegister: no match"
499 MO_SS_Conv W16 W8 -> conversionNop II8 x
500 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
501 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
502
503 MO_UU_Conv from to
504 | from == to -> conversionNop (intFormat to) x
505 -- narrowing is a nop: we treat the high bits as undefined
506 MO_UU_Conv W64 to
507 | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit target"
508 | otherwise -> conversionNop (intFormat to) x
509 MO_UU_Conv W32 to
510 | arch32 -> conversionNop (intFormat to) x
511 | otherwise ->
512 case to of
513 W64 -> trivialCode to False AND x (CmmLit (CmmInt 4294967295 W64))
514 W16 -> conversionNop II16 x
515 W8 -> conversionNop II8 x
516 _ -> panic "PPC.CodeGen.getRegister: no match"
517 MO_UU_Conv W16 W8 -> conversionNop II8 x
518 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
519 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
520 _ -> panic "PPC.CodeGen.getRegister: no match"
521
522 where
523 triv_ucode_int width instr = trivialUCode (intFormat width) instr x
524 triv_ucode_float width instr = trivialUCode (floatFormat width) instr x
525
526 conversionNop new_format expr
527 = do e_code <- getRegister' dflags expr
528 return (swizzleRegisterRep e_code new_format)
529 arch32 = target32Bit $ targetPlatform dflags
530
531 getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps
532 = case mop of
533 MO_F_Eq _ -> condFltReg EQQ x y
534 MO_F_Ne _ -> condFltReg NE x y
535 MO_F_Gt _ -> condFltReg GTT x y
536 MO_F_Ge _ -> condFltReg GE x y
537 MO_F_Lt _ -> condFltReg LTT x y
538 MO_F_Le _ -> condFltReg LE x y
539
540 MO_Eq rep -> condIntReg EQQ (extendUExpr dflags rep x)
541 (extendUExpr dflags rep y)
542 MO_Ne rep -> condIntReg NE (extendUExpr dflags rep x)
543 (extendUExpr dflags rep y)
544
545 MO_S_Gt rep -> condIntReg GTT (extendSExpr dflags rep x)
546 (extendSExpr dflags rep y)
547 MO_S_Ge rep -> condIntReg GE (extendSExpr dflags rep x)
548 (extendSExpr dflags rep y)
549 MO_S_Lt rep -> condIntReg LTT (extendSExpr dflags rep x)
550 (extendSExpr dflags rep y)
551 MO_S_Le rep -> condIntReg LE (extendSExpr dflags rep x)
552 (extendSExpr dflags rep y)
553
554 MO_U_Gt rep -> condIntReg GU (extendUExpr dflags rep x)
555 (extendUExpr dflags rep y)
556 MO_U_Ge rep -> condIntReg GEU (extendUExpr dflags rep x)
557 (extendUExpr dflags rep y)
558 MO_U_Lt rep -> condIntReg LU (extendUExpr dflags rep x)
559 (extendUExpr dflags rep y)
560 MO_U_Le rep -> condIntReg LEU (extendUExpr dflags rep x)
561 (extendUExpr dflags rep y)
562
563 MO_F_Add w -> triv_float w FADD
564 MO_F_Sub w -> triv_float w FSUB
565 MO_F_Mul w -> triv_float w FMUL
566 MO_F_Quot w -> triv_float w FDIV
567
568 -- optimize addition with 32-bit immediate
569 -- (needed for PIC)
570 MO_Add W32 ->
571 case y of
572 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
573 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
574 CmmLit lit
575 -> do
576 (src, srcCode) <- getSomeReg x
577 let imm = litToImm lit
578 code dst = srcCode `appOL` toOL [
579 ADDIS dst src (HA imm),
580 ADD dst dst (RIImm (LO imm))
581 ]
582 return (Any II32 code)
583 _ -> trivialCode W32 True ADD x y
584
585 MO_Add rep -> trivialCode rep True ADD x y
586 MO_Sub rep ->
587 case y of -- subfi ('substract from' with immediate) doesn't exist
588 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
589 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
590 _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
591
592 MO_Mul rep
593 | arch32 -> trivialCode rep True MULLW x y
594 | otherwise -> trivialCode rep True MULLD x y
595
596 MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
597 MO_S_MulMayOflo W64 -> trivialCodeNoImm' II64 MULLD_MayOflo x y
598
599 MO_S_MulMayOflo _ -> panic "S_MulMayOflo: (II8/16) not implemented"
600 MO_U_MulMayOflo _ -> panic "U_MulMayOflo: not implemented"
601
602 MO_S_Quot rep
603 | arch32 -> trivialCodeNoImm' (intFormat rep) DIVW
604 (extendSExpr dflags rep x) (extendSExpr dflags rep y)
605 | otherwise -> trivialCodeNoImm' (intFormat rep) DIVD
606 (extendSExpr dflags rep x) (extendSExpr dflags rep y)
607 MO_U_Quot rep
608 | arch32 -> trivialCodeNoImm' (intFormat rep) DIVWU
609 (extendUExpr dflags rep x) (extendUExpr dflags rep y)
610 | otherwise -> trivialCodeNoImm' (intFormat rep) DIVDU
611 (extendUExpr dflags rep x) (extendUExpr dflags rep y)
612
613 MO_S_Rem rep
614 | arch32 -> remainderCode rep DIVW (extendSExpr dflags rep x)
615 (extendSExpr dflags rep y)
616 | otherwise -> remainderCode rep DIVD (extendSExpr dflags rep x)
617 (extendSExpr dflags rep y)
618 MO_U_Rem rep
619 | arch32 -> remainderCode rep DIVWU (extendSExpr dflags rep x)
620 (extendSExpr dflags rep y)
621 | otherwise -> remainderCode rep DIVDU (extendSExpr dflags rep x)
622 (extendSExpr dflags rep y)
623
624 MO_And rep -> trivialCode rep False AND x y
625 MO_Or rep -> trivialCode rep False OR x y
626 MO_Xor rep -> trivialCode rep False XOR x y
627
628 MO_Shl rep -> shiftCode rep SL x y
629 MO_S_Shr rep -> shiftCode rep SRA (extendSExpr dflags rep x) y
630 MO_U_Shr rep -> shiftCode rep SR (extendUExpr dflags rep x) y
631 _ -> panic "PPC.CodeGen.getRegister: no match"
632
633 where
634 triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register
635 triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y
636
637 arch32 = target32Bit $ targetPlatform dflags
638
639 getRegister' _ (CmmLit (CmmInt i rep))
640 | Just imm <- makeImmediate rep True i
641 = let
642 code dst = unitOL (LI dst imm)
643 in
644 return (Any (intFormat rep) code)
645
646 getRegister' _ (CmmLit (CmmFloat f frep)) = do
647 lbl <- getNewLabelNat
648 dflags <- getDynFlags
649 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
650 Amode addr addr_code <- getAmode D dynRef
651 let format = floatFormat frep
652 code dst =
653 LDATA ReadOnlyData (Statics lbl
654 [CmmStaticLit (CmmFloat f frep)])
655 `consOL` (addr_code `snocOL` LD format dst addr)
656 return (Any format code)
657
658 getRegister' dflags (CmmLit lit)
659 | target32Bit (targetPlatform dflags)
660 = let rep = cmmLitType dflags lit
661 imm = litToImm lit
662 code dst = toOL [
663 LIS dst (HA imm),
664 ADD dst dst (RIImm (LO imm))
665 ]
666 in return (Any (cmmTypeFormat rep) code)
667 | otherwise
668 = do lbl <- getNewLabelNat
669 dflags <- getDynFlags
670 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
671 Amode addr addr_code <- getAmode D dynRef
672 let rep = cmmLitType dflags lit
673 format = cmmTypeFormat rep
674 code dst =
675 LDATA ReadOnlyData (Statics lbl
676 [CmmStaticLit lit])
677 `consOL` (addr_code `snocOL` LD format dst addr)
678 return (Any format code)
679
680 getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other)
681
682 -- extend?Rep: wrap integer expression of type rep
683 -- in a conversion to II32 or II64 resp.
684 extendSExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
685 extendSExpr dflags W32 x
686 | target32Bit (targetPlatform dflags) = x
687
688 extendSExpr dflags W64 x
689 | not (target32Bit (targetPlatform dflags)) = x
690
691 extendSExpr dflags rep x =
692 let size = if target32Bit $ targetPlatform dflags
693 then W32
694 else W64
695 in CmmMachOp (MO_SS_Conv rep size) [x]
696
697 extendUExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
698 extendUExpr dflags W32 x
699 | target32Bit (targetPlatform dflags) = x
700 extendUExpr dflags W64 x
701 | not (target32Bit (targetPlatform dflags)) = x
702 extendUExpr dflags rep x =
703 let size = if target32Bit $ targetPlatform dflags
704 then W32
705 else W64
706 in CmmMachOp (MO_UU_Conv rep size) [x]
707
708 -- -----------------------------------------------------------------------------
709 -- The 'Amode' type: Memory addressing modes passed up the tree.
710
711 data Amode
712 = Amode AddrMode InstrBlock
713
714 {-
715 Now, given a tree (the argument to an CmmLoad) that references memory,
716 produce a suitable addressing mode.
717
718 A Rule of the Game (tm) for Amodes: use of the addr bit must
719 immediately follow use of the code part, since the code part puts
720 values in registers which the addr then refers to. So you can't put
721 anything in between, lest it overwrite some of those registers. If
722 you need to do some other computation between the code part and use of
723 the addr bit, first store the effective address from the amode in a
724 temporary, then do the other computation, and then use the temporary:
725
726 code
727 LEA amode, tmp
728 ... other computation ...
729 ... (tmp) ...
730 -}
731
732 data InstrForm = D | DS
733
734 getAmode :: InstrForm -> CmmExpr -> NatM Amode
735 getAmode inf tree@(CmmRegOff _ _)
736 = do dflags <- getDynFlags
737 getAmode inf (mangleIndexTree dflags tree)
738
739 getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
740 | Just off <- makeImmediate W32 True (-i)
741 = do
742 (reg, code) <- getSomeReg x
743 return (Amode (AddrRegImm reg off) code)
744
745
746 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
747 | Just off <- makeImmediate W32 True i
748 = do
749 (reg, code) <- getSomeReg x
750 return (Amode (AddrRegImm reg off) code)
751
752 getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
753 | Just off <- makeImmediate W64 True (-i)
754 = do
755 (reg, code) <- getSomeReg x
756 return (Amode (AddrRegImm reg off) code)
757
758
759 getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
760 | Just off <- makeImmediate W64 True i
761 = do
762 (reg, code) <- getSomeReg x
763 return (Amode (AddrRegImm reg off) code)
764
765 getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
766 | Just off <- makeImmediate W64 True (-i)
767 = do
768 (reg, code) <- getSomeReg x
769 (reg', off', code') <-
770 if i `mod` 4 == 0
771 then do return (reg, off, code)
772 else do
773 tmp <- getNewRegNat II64
774 return (tmp, ImmInt 0,
775 code `snocOL` ADD tmp reg (RIImm off))
776 return (Amode (AddrRegImm reg' off') code')
777
778 getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
779 | Just off <- makeImmediate W64 True i
780 = do
781 (reg, code) <- getSomeReg x
782 (reg', off', code') <-
783 if i `mod` 4 == 0
784 then do return (reg, off, code)
785 else do
786 tmp <- getNewRegNat II64
787 return (tmp, ImmInt 0,
788 code `snocOL` ADD tmp reg (RIImm off))
789 return (Amode (AddrRegImm reg' off') code')
790
791 -- optimize addition with 32-bit immediate
792 -- (needed for PIC)
793 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit])
794 = do
795 tmp <- getNewRegNat II32
796 (src, srcCode) <- getSomeReg x
797 let imm = litToImm lit
798 code = srcCode `snocOL` ADDIS tmp src (HA imm)
799 return (Amode (AddrRegImm tmp (LO imm)) code)
800
801 getAmode _ (CmmLit lit)
802 = do
803 dflags <- getDynFlags
804 case platformArch $ targetPlatform dflags of
805 ArchPPC -> do
806 tmp <- getNewRegNat II32
807 let imm = litToImm lit
808 code = unitOL (LIS tmp (HA imm))
809 return (Amode (AddrRegImm tmp (LO imm)) code)
810 _ -> do -- TODO: Load from TOC,
811 -- see getRegister' _ (CmmLit lit)
812 tmp <- getNewRegNat II64
813 let imm = litToImm lit
814 code = toOL [
815 LIS tmp (HIGHESTA imm),
816 OR tmp tmp (RIImm (HIGHERA imm)),
817 SL II64 tmp tmp (RIImm (ImmInt 32)),
818 ORIS tmp tmp (HA imm)
819 ]
820 return (Amode (AddrRegImm tmp (LO imm)) code)
821
822 getAmode _ (CmmMachOp (MO_Add W32) [x, y])
823 = do
824 (regX, codeX) <- getSomeReg x
825 (regY, codeY) <- getSomeReg y
826 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
827
828 getAmode _ (CmmMachOp (MO_Add W64) [x, y])
829 = do
830 (regX, codeX) <- getSomeReg x
831 (regY, codeY) <- getSomeReg y
832 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
833
834 getAmode _ other
835 = do
836 (reg, code) <- getSomeReg other
837 let
838 off = ImmInt 0
839 return (Amode (AddrRegImm reg off) code)
840
841
842 -- The 'CondCode' type: Condition codes passed up the tree.
843 data CondCode
844 = CondCode Bool Cond InstrBlock
845
846 -- Set up a condition code for a conditional branch.
847
848 getCondCode :: CmmExpr -> NatM CondCode
849
850 -- almost the same as everywhere else - but we need to
851 -- extend small integers to 32 bit or 64 bit first
852
853 getCondCode (CmmMachOp mop [x, y])
854 = do
855 dflags <- getDynFlags
856 case mop of
857 MO_F_Eq W32 -> condFltCode EQQ x y
858 MO_F_Ne W32 -> condFltCode NE x y
859 MO_F_Gt W32 -> condFltCode GTT x y
860 MO_F_Ge W32 -> condFltCode GE x y
861 MO_F_Lt W32 -> condFltCode LTT x y
862 MO_F_Le W32 -> condFltCode LE x y
863
864 MO_F_Eq W64 -> condFltCode EQQ x y
865 MO_F_Ne W64 -> condFltCode NE x y
866 MO_F_Gt W64 -> condFltCode GTT x y
867 MO_F_Ge W64 -> condFltCode GE x y
868 MO_F_Lt W64 -> condFltCode LTT x y
869 MO_F_Le W64 -> condFltCode LE x y
870
871 MO_Eq rep -> condIntCode EQQ (extendUExpr dflags rep x)
872 (extendUExpr dflags rep y)
873 MO_Ne rep -> condIntCode NE (extendUExpr dflags rep x)
874 (extendUExpr dflags rep y)
875
876 MO_S_Gt rep -> condIntCode GTT (extendSExpr dflags rep x)
877 (extendSExpr dflags rep y)
878 MO_S_Ge rep -> condIntCode GE (extendSExpr dflags rep x)
879 (extendSExpr dflags rep y)
880 MO_S_Lt rep -> condIntCode LTT (extendSExpr dflags rep x)
881 (extendSExpr dflags rep y)
882 MO_S_Le rep -> condIntCode LE (extendSExpr dflags rep x)
883 (extendSExpr dflags rep y)
884
885 MO_U_Gt rep -> condIntCode GU (extendSExpr dflags rep x)
886 (extendSExpr dflags rep y)
887 MO_U_Ge rep -> condIntCode GEU (extendSExpr dflags rep x)
888 (extendSExpr dflags rep y)
889 MO_U_Lt rep -> condIntCode LU (extendSExpr dflags rep x)
890 (extendSExpr dflags rep y)
891 MO_U_Le rep -> condIntCode LEU (extendSExpr dflags rep x)
892 (extendSExpr dflags rep y)
893
894 _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
895
896 getCondCode _ = panic "getCondCode(2)(powerpc)"
897
898
899
900 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
901 -- passed back up the tree.
902
903 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
904
905 -- ###FIXME: I16 and I8!
906 -- TODO: Is this still an issue? All arguments are extend?Expr'd.
907 condIntCode cond x (CmmLit (CmmInt y rep))
908 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
909 = do
910 (src1, code) <- getSomeReg x
911 dflags <- getDynFlags
912 let format = archWordFormat $ target32Bit $ targetPlatform dflags
913 code' = code `snocOL`
914 (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2)
915 return (CondCode False cond code')
916
917 condIntCode cond x y = do
918 (src1, code1) <- getSomeReg x
919 (src2, code2) <- getSomeReg y
920 dflags <- getDynFlags
921 let format = archWordFormat $ target32Bit $ targetPlatform dflags
922 code' = code1 `appOL` code2 `snocOL`
923 (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2)
924 return (CondCode False cond code')
925
926 condFltCode cond x y = do
927 (src1, code1) <- getSomeReg x
928 (src2, code2) <- getSomeReg y
929 let
930 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
931 code'' = case cond of -- twiddle CR to handle unordered case
932 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
933 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
934 _ -> code'
935 where
936 ltbit = 0 ; eqbit = 2 ; gtbit = 1
937 return (CondCode True cond code'')
938
939
940
941 -- -----------------------------------------------------------------------------
942 -- Generating assignments
943
944 -- Assignments are really at the heart of the whole code generation
945 -- business. Almost all top-level nodes of any real importance are
946 -- assignments, which correspond to loads, stores, or register
947 -- transfers. If we're really lucky, some of the register transfers
948 -- will go away, because we can use the destination register to
949 -- complete the code generation for the right hand side. This only
950 -- fails when the right hand side is forced into a fixed register
951 -- (e.g. the result of a call).
952
953 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
954 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
955
956 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
957 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
958
959 assignMem_IntCode pk addr src = do
960 (srcReg, code) <- getSomeReg src
961 Amode dstAddr addr_code <- case pk of
962 II64 -> getAmode DS addr
963 _ -> getAmode D addr
964 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
965
966 -- dst is a reg, but src could be anything
967 assignReg_IntCode _ reg src
968 = do
969 dflags <- getDynFlags
970 let dst = getRegisterReg (targetPlatform dflags) reg
971 r <- getRegister src
972 return $ case r of
973 Any _ code -> code dst
974 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
975
976
977
978 -- Easy, isn't it?
979 assignMem_FltCode = assignMem_IntCode
980 assignReg_FltCode = assignReg_IntCode
981
982
983
984 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
985
986 genJump (CmmLit (CmmLabel lbl))
987 = return (unitOL $ JMP lbl)
988
989 genJump tree
990 = do
991 dflags <- getDynFlags
992 let platform = targetPlatform dflags
993 case platformOS platform of
994 OSLinux -> case platformArch platform of
995 ArchPPC -> genJump' tree GCPLinux
996 ArchPPC_64 ELF_V1 -> genJump' tree (GCPLinux64ELF 1)
997 ArchPPC_64 ELF_V2 -> genJump' tree (GCPLinux64ELF 2)
998 _ -> panic "PPC.CodeGen.genJump: Unknown Linux"
999 OSDarwin -> genJump' tree GCPDarwin
1000 _ -> panic "PPC.CodeGen.genJump: not defined for this os"
1001
1002
1003 genJump' :: CmmExpr -> GenCCallPlatform -> NatM InstrBlock
1004
1005 genJump' tree (GCPLinux64ELF 1)
1006 = do
1007 (target,code) <- getSomeReg tree
1008 return (code
1009 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0))
1010 `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
1011 `snocOL` MTCTR r11
1012 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
1013 `snocOL` BCTR [] Nothing)
1014
1015 genJump' tree (GCPLinux64ELF 2)
1016 = do
1017 (target,code) <- getSomeReg tree
1018 return (code
1019 `snocOL` MR r12 target
1020 `snocOL` MTCTR r12
1021 `snocOL` BCTR [] Nothing)
1022
1023 genJump' tree _
1024 = do
1025 (target,code) <- getSomeReg tree
1026 return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
1027
1028 -- -----------------------------------------------------------------------------
1029 -- Unconditional branches
1030 genBranch :: BlockId -> NatM InstrBlock
1031 genBranch = return . toOL . mkJumpInstr
1032
1033
1034 -- -----------------------------------------------------------------------------
1035 -- Conditional jumps
1036
1037 {-
1038 Conditional jumps are always to local labels, so we can use branch
1039 instructions. We peek at the arguments to decide what kind of
1040 comparison to do.
1041 -}
1042
1043
1044 genCondJump
1045 :: BlockId -- the branch target
1046 -> CmmExpr -- the condition on which to branch
1047 -> NatM InstrBlock
1048
1049 genCondJump id bool = do
1050 CondCode _ cond code <- getCondCode bool
1051 return (code `snocOL` BCC cond id)
1052
1053
1054
1055 -- -----------------------------------------------------------------------------
1056 -- Generating C calls
1057
1058 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1059 -- @get_arg@, which moves the arguments to the correct registers/stack
1060 -- locations. Apart from that, the code is easy.
1061 --
1062 -- (If applicable) Do not fill the delay slots here; you will confuse the
1063 -- register allocator.
1064
1065 genCCall :: ForeignTarget -- function to call
1066 -> [CmmFormal] -- where to put the result
1067 -> [CmmActual] -- arguments (of mixed type)
1068 -> NatM InstrBlock
1069 genCCall target dest_regs argsAndHints
1070 = do dflags <- getDynFlags
1071 let platform = targetPlatform dflags
1072 case platformOS platform of
1073 OSLinux -> case platformArch platform of
1074 ArchPPC -> genCCall' dflags GCPLinux
1075 target dest_regs argsAndHints
1076 ArchPPC_64 ELF_V1 -> genCCall' dflags (GCPLinux64ELF 1)
1077 target dest_regs argsAndHints
1078 ArchPPC_64 ELF_V2 -> genCCall' dflags (GCPLinux64ELF 2)
1079 target dest_regs argsAndHints
1080 _ -> panic "PPC.CodeGen.genCCall: Unknown Linux"
1081 OSDarwin -> genCCall' dflags GCPDarwin target dest_regs argsAndHints
1082 _ -> panic "PPC.CodeGen.genCCall: not defined for this os"
1083
1084 data GenCCallPlatform = GCPLinux | GCPDarwin | GCPLinux64ELF Int
1085
1086 genCCall'
1087 :: DynFlags
1088 -> GenCCallPlatform
1089 -> ForeignTarget -- function to call
1090 -> [CmmFormal] -- where to put the result
1091 -> [CmmActual] -- arguments (of mixed type)
1092 -> NatM InstrBlock
1093
1094 {-
1095 The PowerPC calling convention for Darwin/Mac OS X
1096 is described in Apple's document
1097 "Inside Mac OS X - Mach-O Runtime Architecture".
1098
1099 PowerPC Linux uses the System V Release 4 Calling Convention
1100 for PowerPC. It is described in the
1101 "System V Application Binary Interface PowerPC Processor Supplement".
1102
1103 Both conventions are similar:
1104 Parameters may be passed in general-purpose registers starting at r3, in
1105 floating point registers starting at f1, or on the stack.
1106
1107 But there are substantial differences:
1108 * The number of registers used for parameter passing and the exact set of
1109 nonvolatile registers differs (see MachRegs.hs).
1110 * On Darwin, stack space is always reserved for parameters, even if they are
1111 passed in registers. The called routine may choose to save parameters from
1112 registers to the corresponding space on the stack.
1113 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
1114 parameter is passed in an FPR.
1115 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
1116 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
1117 Darwin just treats an I64 like two separate II32s (high word first).
1118 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
1119 4-byte aligned like everything else on Darwin.
1120 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
1121 PowerPC Linux does not agree, so neither do we.
1122
1123 PowerPC 64 Linux uses the System V Release 4 Calling Convention for
1124 64-bit PowerPC. It is specified in
1125 "64-bit PowerPC ELF Application Binary Interface Supplement 1.9".
1126
1127 According to all conventions, the parameter area should be part of the
1128 caller's stack frame, allocated in the caller's prologue code (large enough
1129 to hold the parameter lists for all called routines). The NCG already
1130 uses the stack for register spilling, leaving 64 bytes free at the top.
1131 If we need a larger parameter area than that, we just allocate a new stack
1132 frame just before ccalling.
1133 -}
1134
1135
1136 genCCall' _ _ (PrimTarget MO_WriteBarrier) _ _
1137 = return $ unitOL LWSYNC
1138
1139 genCCall' _ _ (PrimTarget MO_Touch) _ _
1140 = return $ nilOL
1141
1142 genCCall' _ _ (PrimTarget (MO_Prefetch_Data _)) _ _
1143 = return $ nilOL
1144
1145 genCCall' dflags gcp target dest_regs args
1146 = ASSERT(not $ any (`elem` [II16]) $ map cmmTypeFormat argReps)
1147 -- we rely on argument promotion in the codeGen
1148 do
1149 (finalStack,passArgumentsCode,usedRegs) <- passArguments
1150 (zip args argReps)
1151 allArgRegs
1152 (allFPArgRegs platform)
1153 initialStackOffset
1154 (toOL []) []
1155
1156 (labelOrExpr, reduceToFF32) <- case target of
1157 ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
1158 uses_pic_base_implicitly
1159 return (Left lbl, False)
1160 ForeignTarget expr _ -> do
1161 uses_pic_base_implicitly
1162 return (Right expr, False)
1163 PrimTarget mop -> outOfLineMachOp mop
1164
1165 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
1166 `appOL` toc_before
1167 codeAfter = toc_after labelOrExpr `appOL` move_sp_up finalStack
1168 `appOL` moveResult reduceToFF32
1169
1170 case labelOrExpr of
1171 Left lbl -> do -- the linker does all the work for us
1172 return ( codeBefore
1173 `snocOL` BL lbl usedRegs
1174 `appOL` codeAfter)
1175 Right dyn -> do -- implement call through function pointer
1176 (dynReg, dynCode) <- getSomeReg dyn
1177 case gcp of
1178 GCPLinux64ELF 1 -> return ( dynCode
1179 `appOL` codeBefore
1180 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0))
1181 `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8))
1182 `snocOL` MTCTR r11
1183 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16))
1184 `snocOL` BCTRL usedRegs
1185 `appOL` codeAfter)
1186 GCPLinux64ELF 2 -> return ( dynCode
1187 `appOL` codeBefore
1188 `snocOL` MR r12 dynReg
1189 `snocOL` MTCTR r12
1190 `snocOL` BCTRL usedRegs
1191 `appOL` codeAfter)
1192 _ -> return ( dynCode
1193 `snocOL` MTCTR dynReg
1194 `appOL` codeBefore
1195 `snocOL` BCTRL usedRegs
1196 `appOL` codeAfter)
1197 where
1198 platform = targetPlatform dflags
1199
1200 uses_pic_base_implicitly = do
1201 -- See Note [implicit register in PPC PIC code]
1202 -- on why we claim to use PIC register here
1203 when (gopt Opt_PIC dflags && target32Bit platform) $ do
1204 _ <- getPicBaseNat $ archWordFormat True
1205 return ()
1206
1207 initialStackOffset = case gcp of
1208 GCPDarwin -> 24
1209 GCPLinux -> 8
1210 GCPLinux64ELF 1 -> 48
1211 GCPLinux64ELF 2 -> 32
1212 _ -> panic "genCall': unknown calling convention"
1213 -- size of linkage area + size of arguments, in bytes
1214 stackDelta finalStack = case gcp of
1215 GCPDarwin ->
1216 roundTo 16 $ (24 +) $ max 32 $ sum $
1217 map (widthInBytes . typeWidth) argReps
1218 GCPLinux -> roundTo 16 finalStack
1219 GCPLinux64ELF 1 ->
1220 roundTo 16 $ (48 +) $ max 64 $ sum $
1221 map (widthInBytes . typeWidth) argReps
1222 GCPLinux64ELF 2 ->
1223 roundTo 16 $ (32 +) $ max 64 $ sum $
1224 map (widthInBytes . typeWidth) argReps
1225 _ -> panic "genCall': unknown calling conv."
1226
1227 argReps = map (cmmExprType dflags) args
1228
1229 roundTo a x | x `mod` a == 0 = x
1230 | otherwise = x + a - (x `mod` a)
1231
1232 spFormat = if target32Bit platform then II32 else II64
1233
1234 move_sp_down finalStack
1235 | delta > 64 =
1236 toOL [STU spFormat sp (AddrRegImm sp (ImmInt (-delta))),
1237 DELTA (-delta)]
1238 | otherwise = nilOL
1239 where delta = stackDelta finalStack
1240 toc_before = case gcp of
1241 GCPLinux64ELF 1 -> unitOL $ ST spFormat toc (AddrRegImm sp (ImmInt 40))
1242 GCPLinux64ELF 2 -> unitOL $ ST spFormat toc (AddrRegImm sp (ImmInt 24))
1243 _ -> nilOL
1244 toc_after labelOrExpr = case gcp of
1245 GCPLinux64ELF 1 -> case labelOrExpr of
1246 Left _ -> toOL [ NOP ]
1247 Right _ -> toOL [ LD spFormat toc
1248 (AddrRegImm sp
1249 (ImmInt 40))
1250 ]
1251 GCPLinux64ELF 2 -> case labelOrExpr of
1252 Left _ -> toOL [ NOP ]
1253 Right _ -> toOL [ LD spFormat toc
1254 (AddrRegImm sp
1255 (ImmInt 24))
1256 ]
1257 _ -> nilOL
1258 move_sp_up finalStack
1259 | delta > 64 = -- TODO: fix-up stack back-chain
1260 toOL [ADD sp sp (RIImm (ImmInt delta)),
1261 DELTA 0]
1262 | otherwise = nilOL
1263 where delta = stackDelta finalStack
1264
1265
1266 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
1267 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
1268 accumCode accumUsed | isWord64 arg_ty
1269 && target32Bit (targetPlatform dflags) =
1270 do
1271 ChildCode64 code vr_lo <- iselExpr64 arg
1272 let vr_hi = getHiVRegFromLo vr_lo
1273
1274 case gcp of
1275 GCPDarwin ->
1276 do let storeWord vr (gpr:_) _ = MR gpr vr
1277 storeWord vr [] offset
1278 = ST II32 vr (AddrRegImm sp (ImmInt offset))
1279 passArguments args
1280 (drop 2 gprs)
1281 fprs
1282 (stackOffset+8)
1283 (accumCode `appOL` code
1284 `snocOL` storeWord vr_hi gprs stackOffset
1285 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1286 ((take 2 gprs) ++ accumUsed)
1287 GCPLinux ->
1288 do let stackOffset' = roundTo 8 stackOffset
1289 stackCode = accumCode `appOL` code
1290 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
1291 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
1292 regCode hireg loreg =
1293 accumCode `appOL` code
1294 `snocOL` MR hireg vr_hi
1295 `snocOL` MR loreg vr_lo
1296
1297 case gprs of
1298 hireg : loreg : regs | even (length gprs) ->
1299 passArguments args regs fprs stackOffset
1300 (regCode hireg loreg) (hireg : loreg : accumUsed)
1301 _skipped : hireg : loreg : regs ->
1302 passArguments args regs fprs stackOffset
1303 (regCode hireg loreg) (hireg : loreg : accumUsed)
1304 _ -> -- only one or no regs left
1305 passArguments args [] fprs (stackOffset'+8)
1306 stackCode accumUsed
1307 GCPLinux64ELF _ -> panic "passArguments: 32 bit code"
1308
1309 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
1310 | reg : _ <- regs = do
1311 register <- getRegister arg
1312 let code = case register of
1313 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1314 Any _ acode -> acode reg
1315 stackOffsetRes = case gcp of
1316 -- The Darwin ABI requires that we reserve
1317 -- stack slots for register parameters
1318 GCPDarwin -> stackOffset + stackBytes
1319 -- ... the SysV ABI 32-bit doesn't.
1320 GCPLinux -> stackOffset
1321 -- ... but SysV ABI 64-bit does.
1322 GCPLinux64ELF _ -> stackOffset + stackBytes
1323 passArguments args
1324 (drop nGprs gprs)
1325 (drop nFprs fprs)
1326 stackOffsetRes
1327 (accumCode `appOL` code)
1328 (reg : accumUsed)
1329 | otherwise = do
1330 (vr, code) <- getSomeReg arg
1331 passArguments args
1332 (drop nGprs gprs)
1333 (drop nFprs fprs)
1334 (stackOffset' + stackBytes)
1335 (accumCode `appOL` code `snocOL` ST (cmmTypeFormat rep) vr stackSlot)
1336 accumUsed
1337 where
1338 stackOffset' = case gcp of
1339 GCPDarwin ->
1340 -- stackOffset is at least 4-byte aligned
1341 -- The Darwin ABI is happy with that.
1342 stackOffset
1343 GCPLinux
1344 -- ... the SysV ABI requires 8-byte
1345 -- alignment for doubles.
1346 | isFloatType rep && typeWidth rep == W64 ->
1347 roundTo 8 stackOffset
1348 | otherwise ->
1349 stackOffset
1350 GCPLinux64ELF _ ->
1351 -- everything on the stack is 8-byte
1352 -- aligned on a 64 bit system
1353 -- (except vector status, not used now)
1354 stackOffset
1355 stackSlot = AddrRegImm sp (ImmInt stackOffset')
1356 (nGprs, nFprs, stackBytes, regs)
1357 = case gcp of
1358 GCPDarwin ->
1359 case cmmTypeFormat rep of
1360 II8 -> (1, 0, 4, gprs)
1361 II16 -> (1, 0, 4, gprs)
1362 II32 -> (1, 0, 4, gprs)
1363 -- The Darwin ABI requires that we skip a
1364 -- corresponding number of GPRs when we use
1365 -- the FPRs.
1366 FF32 -> (1, 1, 4, fprs)
1367 FF64 -> (2, 1, 8, fprs)
1368 II64 -> panic "genCCall' passArguments II64"
1369 FF80 -> panic "genCCall' passArguments FF80"
1370 GCPLinux ->
1371 case cmmTypeFormat rep of
1372 II8 -> (1, 0, 4, gprs)
1373 II16 -> (1, 0, 4, gprs)
1374 II32 -> (1, 0, 4, gprs)
1375 -- ... the SysV ABI doesn't.
1376 FF32 -> (0, 1, 4, fprs)
1377 FF64 -> (0, 1, 8, fprs)
1378 II64 -> panic "genCCall' passArguments II64"
1379 FF80 -> panic "genCCall' passArguments FF80"
1380 GCPLinux64ELF _ ->
1381 case cmmTypeFormat rep of
1382 II8 -> (1, 0, 8, gprs)
1383 II16 -> (1, 0, 8, gprs)
1384 II32 -> (1, 0, 8, gprs)
1385 II64 -> (1, 0, 8, gprs)
1386 -- The ELFv1 ABI requires that we skip a
1387 -- corresponding number of GPRs when we use
1388 -- the FPRs.
1389 FF32 -> (1, 1, 8, fprs)
1390 FF64 -> (1, 1, 8, fprs)
1391 FF80 -> panic "genCCall' passArguments FF80"
1392
1393 moveResult reduceToFF32 =
1394 case dest_regs of
1395 [] -> nilOL
1396 [dest]
1397 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
1398 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1399 | isWord64 rep && target32Bit (targetPlatform dflags)
1400 -> toOL [MR (getHiVRegFromLo r_dest) r3,
1401 MR r_dest r4]
1402 | otherwise -> unitOL (MR r_dest r3)
1403 where rep = cmmRegType dflags (CmmLocal dest)
1404 r_dest = getRegisterReg platform (CmmLocal dest)
1405 _ -> panic "genCCall' moveResult: Bad dest_regs"
1406
1407 outOfLineMachOp mop =
1408 do
1409 dflags <- getDynFlags
1410 mopExpr <- cmmMakeDynamicReference dflags CallReference $
1411 mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
1412 let mopLabelOrExpr = case mopExpr of
1413 CmmLit (CmmLabel lbl) -> Left lbl
1414 _ -> Right mopExpr
1415 return (mopLabelOrExpr, reduce)
1416 where
1417 (functionName, reduce) = case mop of
1418 MO_F32_Exp -> (fsLit "exp", True)
1419 MO_F32_Log -> (fsLit "log", True)
1420 MO_F32_Sqrt -> (fsLit "sqrt", True)
1421
1422 MO_F32_Sin -> (fsLit "sin", True)
1423 MO_F32_Cos -> (fsLit "cos", True)
1424 MO_F32_Tan -> (fsLit "tan", True)
1425
1426 MO_F32_Asin -> (fsLit "asin", True)
1427 MO_F32_Acos -> (fsLit "acos", True)
1428 MO_F32_Atan -> (fsLit "atan", True)
1429
1430 MO_F32_Sinh -> (fsLit "sinh", True)
1431 MO_F32_Cosh -> (fsLit "cosh", True)
1432 MO_F32_Tanh -> (fsLit "tanh", True)
1433 MO_F32_Pwr -> (fsLit "pow", True)
1434
1435 MO_F64_Exp -> (fsLit "exp", False)
1436 MO_F64_Log -> (fsLit "log", False)
1437 MO_F64_Sqrt -> (fsLit "sqrt", False)
1438
1439 MO_F64_Sin -> (fsLit "sin", False)
1440 MO_F64_Cos -> (fsLit "cos", False)
1441 MO_F64_Tan -> (fsLit "tan", False)
1442
1443 MO_F64_Asin -> (fsLit "asin", False)
1444 MO_F64_Acos -> (fsLit "acos", False)
1445 MO_F64_Atan -> (fsLit "atan", False)
1446
1447 MO_F64_Sinh -> (fsLit "sinh", False)
1448 MO_F64_Cosh -> (fsLit "cosh", False)
1449 MO_F64_Tanh -> (fsLit "tanh", False)
1450 MO_F64_Pwr -> (fsLit "pow", False)
1451
1452 MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False)
1453
1454 MO_Memcpy _ -> (fsLit "memcpy", False)
1455 MO_Memset _ -> (fsLit "memset", False)
1456 MO_Memmove _ -> (fsLit "memmove", False)
1457
1458 MO_BSwap w -> (fsLit $ bSwapLabel w, False)
1459 MO_PopCnt w -> (fsLit $ popCntLabel w, False)
1460 MO_Clz w -> (fsLit $ clzLabel w, False)
1461 MO_Ctz w -> (fsLit $ ctzLabel w, False)
1462 MO_AtomicRMW w amop -> (fsLit $ atomicRMWLabel w amop, False)
1463 MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
1464 MO_AtomicRead w -> (fsLit $ atomicReadLabel w, False)
1465 MO_AtomicWrite w -> (fsLit $ atomicWriteLabel w, False)
1466
1467 MO_S_QuotRem {} -> unsupported
1468 MO_U_QuotRem {} -> unsupported
1469 MO_U_QuotRem2 {} -> unsupported
1470 MO_Add2 {} -> unsupported
1471 MO_SubWordC {} -> unsupported
1472 MO_AddIntC {} -> unsupported
1473 MO_SubIntC {} -> unsupported
1474 MO_U_Mul2 {} -> unsupported
1475 MO_WriteBarrier -> unsupported
1476 MO_Touch -> unsupported
1477 (MO_Prefetch_Data _ ) -> unsupported
1478 unsupported = panic ("outOfLineCmmOp: " ++ show mop
1479 ++ " not supported")
1480
1481 -- -----------------------------------------------------------------------------
1482 -- Generating a table-branch
1483
1484 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
1485 genSwitch dflags expr targets
1486 | (gopt Opt_PIC dflags) || (not $ target32Bit $ targetPlatform dflags)
1487 = do
1488 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
1489 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
1490 sha = if target32Bit $ targetPlatform dflags then 2 else 3
1491 tmp <- getNewRegNat fmt
1492 lbl <- getNewLabelNat
1493 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
1494 (tableReg,t_code) <- getSomeReg $ dynRef
1495 let code = e_code `appOL` t_code `appOL` toOL [
1496 SL fmt tmp reg (RIImm (ImmInt sha)),
1497 LD fmt tmp (AddrRegReg tableReg tmp),
1498 ADD tmp tmp (RIReg tableReg),
1499 MTCTR tmp,
1500 BCTR ids (Just lbl)
1501 ]
1502 return code
1503 | otherwise
1504 = do
1505 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
1506 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
1507 sha = if target32Bit $ targetPlatform dflags then 2 else 3
1508 tmp <- getNewRegNat fmt
1509 lbl <- getNewLabelNat
1510 let code = e_code `appOL` toOL [
1511 SL fmt tmp reg (RIImm (ImmInt sha)),
1512 ADDIS tmp tmp (HA (ImmCLbl lbl)),
1513 LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
1514 MTCTR tmp,
1515 BCTR ids (Just lbl)
1516 ]
1517 return code
1518 where (offset, ids) = switchTargetsToTable targets
1519
1520 generateJumpTableForInstr :: DynFlags -> Instr
1521 -> Maybe (NatCmmDecl CmmStatics Instr)
1522 generateJumpTableForInstr dflags (BCTR ids (Just lbl)) =
1523 let jumpTable
1524 | (gopt Opt_PIC dflags)
1525 || (not $ target32Bit $ targetPlatform dflags)
1526 = map jumpTableEntryRel ids
1527 | otherwise = map (jumpTableEntry dflags) ids
1528 where jumpTableEntryRel Nothing
1529 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
1530 jumpTableEntryRel (Just blockid)
1531 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1532 where blockLabel = mkAsmTempLabel (getUnique blockid)
1533 in Just (CmmData ReadOnlyData (Statics lbl jumpTable))
1534 generateJumpTableForInstr _ _ = Nothing
1535
1536 -- -----------------------------------------------------------------------------
1537 -- 'condIntReg' and 'condFltReg': condition codes into registers
1538
1539 -- Turn those condition codes into integers now (when they appear on
1540 -- the right hand side of an assignment).
1541
1542 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
1543
1544 condReg :: NatM CondCode -> NatM Register
1545 condReg getCond = do
1546 CondCode _ cond cond_code <- getCond
1547 dflags <- getDynFlags
1548 let
1549 code dst = cond_code
1550 `appOL` negate_code
1551 `appOL` toOL [
1552 MFCR dst,
1553 RLWINM dst dst (bit + 1) 31 31
1554 ]
1555
1556 negate_code | do_negate = unitOL (CRNOR bit bit bit)
1557 | otherwise = nilOL
1558
1559 (bit, do_negate) = case cond of
1560 LTT -> (0, False)
1561 LE -> (1, True)
1562 EQQ -> (2, False)
1563 GE -> (0, True)
1564 GTT -> (1, False)
1565
1566 NE -> (2, True)
1567
1568 LU -> (0, False)
1569 LEU -> (1, True)
1570 GEU -> (0, True)
1571 GU -> (1, False)
1572 _ -> panic "PPC.CodeGen.codeReg: no match"
1573
1574 format = archWordFormat $ target32Bit $ targetPlatform dflags
1575 return (Any format code)
1576
1577 condIntReg cond x y = condReg (condIntCode cond x y)
1578 condFltReg cond x y = condReg (condFltCode cond x y)
1579
1580
1581
1582 -- -----------------------------------------------------------------------------
1583 -- 'trivial*Code': deal with trivial instructions
1584
1585 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
1586 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
1587 -- Only look for constants on the right hand side, because that's
1588 -- where the generic optimizer will have put them.
1589
1590 -- Similarly, for unary instructions, we don't have to worry about
1591 -- matching an StInt as the argument, because genericOpt will already
1592 -- have handled the constant-folding.
1593
1594
1595
1596 {-
1597 Wolfgang's PowerPC version of The Rules:
1598
1599 A slightly modified version of The Rules to take advantage of the fact
1600 that PowerPC instructions work on all registers and don't implicitly
1601 clobber any fixed registers.
1602
1603 * The only expression for which getRegister returns Fixed is (CmmReg reg).
1604
1605 * If getRegister returns Any, then the code it generates may modify only:
1606 (a) fresh temporaries
1607 (b) the destination register
1608 It may *not* modify global registers, unless the global
1609 register happens to be the destination register.
1610 It may not clobber any other registers. In fact, only ccalls clobber any
1611 fixed registers.
1612 Also, it may not modify the counter register (used by genCCall).
1613
1614 Corollary: If a getRegister for a subexpression returns Fixed, you need
1615 not move it to a fresh temporary before evaluating the next subexpression.
1616 The Fixed register won't be modified.
1617 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
1618
1619 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
1620 the value of the destination register.
1621 -}
1622
1623 trivialCode
1624 :: Width
1625 -> Bool
1626 -> (Reg -> Reg -> RI -> Instr)
1627 -> CmmExpr
1628 -> CmmExpr
1629 -> NatM Register
1630
1631 trivialCode rep signed instr x (CmmLit (CmmInt y _))
1632 | Just imm <- makeImmediate rep signed y
1633 = do
1634 (src1, code1) <- getSomeReg x
1635 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
1636 return (Any (intFormat rep) code)
1637
1638 trivialCode rep _ instr x y = do
1639 (src1, code1) <- getSomeReg x
1640 (src2, code2) <- getSomeReg y
1641 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
1642 return (Any (intFormat rep) code)
1643
1644 shiftCode
1645 :: Width
1646 -> (Format-> Reg -> Reg -> RI -> Instr)
1647 -> CmmExpr
1648 -> CmmExpr
1649 -> NatM Register
1650 shiftCode width instr x (CmmLit (CmmInt y _))
1651 | Just imm <- makeImmediate width False y
1652 = do
1653 (src1, code1) <- getSomeReg x
1654 let format = intFormat width
1655 let code dst = code1 `snocOL` instr format dst src1 (RIImm imm)
1656 return (Any format code)
1657
1658 shiftCode width instr x y = do
1659 (src1, code1) <- getSomeReg x
1660 (src2, code2) <- getSomeReg y
1661 let format = intFormat width
1662 let code dst = code1 `appOL` code2 `snocOL` instr format dst src1 (RIReg src2)
1663 return (Any format code)
1664
1665 trivialCodeNoImm' :: Format -> (Reg -> Reg -> Reg -> Instr)
1666 -> CmmExpr -> CmmExpr -> NatM Register
1667 trivialCodeNoImm' format instr x y = do
1668 (src1, code1) <- getSomeReg x
1669 (src2, code2) <- getSomeReg y
1670 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
1671 return (Any format code)
1672
1673 trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr)
1674 -> CmmExpr -> CmmExpr -> NatM Register
1675 trivialCodeNoImm format instr x y = trivialCodeNoImm' format (instr format) x y
1676
1677
1678 trivialUCode
1679 :: Format
1680 -> (Reg -> Reg -> Instr)
1681 -> CmmExpr
1682 -> NatM Register
1683 trivialUCode rep instr x = do
1684 (src, code) <- getSomeReg x
1685 let code' dst = code `snocOL` instr dst src
1686 return (Any rep code')
1687
1688 -- There is no "remainder" instruction on the PPC, so we have to do
1689 -- it the hard way.
1690 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
1691
1692 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
1693 -> CmmExpr -> CmmExpr -> NatM Register
1694 remainderCode rep div x y = do
1695 dflags <- getDynFlags
1696 let mull_instr = if target32Bit $ targetPlatform dflags then MULLW
1697 else MULLD
1698 (src1, code1) <- getSomeReg x
1699 (src2, code2) <- getSomeReg y
1700 let code dst = code1 `appOL` code2 `appOL` toOL [
1701 div dst src1 src2,
1702 mull_instr dst dst (RIReg src2),
1703 SUBF dst dst src1
1704 ]
1705 return (Any (intFormat rep) code)
1706
1707 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
1708 coerceInt2FP fromRep toRep x = do
1709 dflags <- getDynFlags
1710 let arch = platformArch $ targetPlatform dflags
1711 coerceInt2FP' arch fromRep toRep x
1712
1713 coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
1714 coerceInt2FP' ArchPPC fromRep toRep x = do
1715 (src, code) <- getSomeReg x
1716 lbl <- getNewLabelNat
1717 itmp <- getNewRegNat II32
1718 ftmp <- getNewRegNat FF64
1719 dflags <- getDynFlags
1720 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
1721 Amode addr addr_code <- getAmode D dynRef
1722 let
1723 code' dst = code `appOL` maybe_exts `appOL` toOL [
1724 LDATA ReadOnlyData $ Statics lbl
1725 [CmmStaticLit (CmmInt 0x43300000 W32),
1726 CmmStaticLit (CmmInt 0x80000000 W32)],
1727 XORIS itmp src (ImmInt 0x8000),
1728 ST II32 itmp (spRel dflags 3),
1729 LIS itmp (ImmInt 0x4330),
1730 ST II32 itmp (spRel dflags 2),
1731 LD FF64 ftmp (spRel dflags 2)
1732 ] `appOL` addr_code `appOL` toOL [
1733 LD FF64 dst addr,
1734 FSUB FF64 dst ftmp dst
1735 ] `appOL` maybe_frsp dst
1736
1737 maybe_exts = case fromRep of
1738 W8 -> unitOL $ EXTS II8 src src
1739 W16 -> unitOL $ EXTS II16 src src
1740 W32 -> nilOL
1741 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1742
1743 maybe_frsp dst
1744 = case toRep of
1745 W32 -> unitOL $ FRSP dst dst
1746 W64 -> nilOL
1747 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1748
1749 return (Any (floatFormat toRep) code')
1750
1751 -- On an ELF v1 Linux we use the compiler doubleword in the stack frame
1752 -- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only
1753 -- set right before a call and restored right after return from the call.
1754 -- So it is fine.
1755 coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do
1756 (src, code) <- getSomeReg x
1757 dflags <- getDynFlags
1758 let
1759 code' dst = code `appOL` maybe_exts `appOL` toOL [
1760 ST II64 src (spRel dflags 3),
1761 LD FF64 dst (spRel dflags 3),
1762 FCFID dst dst
1763 ] `appOL` maybe_frsp dst
1764
1765 maybe_exts = case fromRep of
1766 W8 -> unitOL $ EXTS II8 src src
1767 W16 -> unitOL $ EXTS II16 src src
1768 W32 -> unitOL $ EXTS II32 src src
1769 W64 -> nilOL
1770 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1771
1772 maybe_frsp dst
1773 = case toRep of
1774 W32 -> unitOL $ FRSP dst dst
1775 W64 -> nilOL
1776 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1777
1778 return (Any (floatFormat toRep) code')
1779
1780 coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch"
1781
1782
1783 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
1784 coerceFP2Int fromRep toRep x = do
1785 dflags <- getDynFlags
1786 let arch = platformArch $ targetPlatform dflags
1787 coerceFP2Int' arch fromRep toRep x
1788
1789 coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
1790 coerceFP2Int' ArchPPC _ toRep x = do
1791 dflags <- getDynFlags
1792 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
1793 (src, code) <- getSomeReg x
1794 tmp <- getNewRegNat FF64
1795 let
1796 code' dst = code `appOL` toOL [
1797 -- convert to int in FP reg
1798 FCTIWZ tmp src,
1799 -- store value (64bit) from FP to stack
1800 ST FF64 tmp (spRel dflags 2),
1801 -- read low word of value (high word is undefined)
1802 LD II32 dst (spRel dflags 3)]
1803 return (Any (intFormat toRep) code')
1804
1805 coerceFP2Int' (ArchPPC_64 _) _ toRep x = do
1806 dflags <- getDynFlags
1807 -- the reps don't really matter: F*->FF64 and II64->I* are no-ops
1808 (src, code) <- getSomeReg x
1809 tmp <- getNewRegNat FF64
1810 let
1811 code' dst = code `appOL` toOL [
1812 -- convert to int in FP reg
1813 FCTIDZ tmp src,
1814 -- store value (64bit) from FP to compiler word on stack
1815 ST FF64 tmp (spRel dflags 3),
1816 LD II64 dst (spRel dflags 3)]
1817 return (Any (intFormat toRep) code')
1818
1819 coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch"
1820
1821 -- Note [.LCTOC1 in PPC PIC code]
1822 -- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
1823 -- to make the most of the PPC's 16-bit displacements.
1824 -- As 16-bit signed offset is used (usually via addi/lwz instructions)
1825 -- first element will have '-32768' offset against .LCTOC1.
1826
1827 -- Note [implicit register in PPC PIC code]
1828 -- PPC generates calls by labels in assembly
1829 -- in form of:
1830 -- bl puts+32768@plt
1831 -- in this form it's not seen directly (by GHC NCG)
1832 -- that r30 (PicBaseReg) is used,
1833 -- but r30 is a required part of PLT code setup:
1834 -- puts+32768@plt:
1835 -- lwz r11,-30484(r30) ; offset in .LCTOC1
1836 -- mtctr r11
1837 -- bctr