56025f44acda05bd07e4aaee8d70137adadf8710
[ghc.git] / compiler / nativeGen / PPC / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2004
8 --
9 -----------------------------------------------------------------------------
10
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, (b) the type signatures, and
13 -- (c) the #if blah_TARGET_ARCH} things, the
14 -- structure should not be too overwhelming.
15
16 module PPC.CodeGen (
17 cmmTopCodeGen,
18 generateJumpTableForInstr,
19 InstrBlock
20 )
21
22 where
23
24 #include "HsVersions.h"
25 #include "nativeGen/NCG.h"
26 #include "../includes/MachDeps.h"
27
28 -- NCG stuff:
29 import CodeGen.Platform
30 import PPC.Instr
31 import PPC.Cond
32 import PPC.Regs
33 import CPrim
34 import NCGMonad
35 import Instruction
36 import PIC
37 import Format
38 import RegClass
39 import Reg
40 import TargetReg
41 import Platform
42
43 -- Our intermediate code:
44 import BlockId
45 import PprCmm ( pprExpr )
46 import Cmm
47 import CmmUtils
48 import CmmSwitch
49 import CLabel
50 import Hoopl
51
52 -- The rest:
53 import OrdList
54 import Outputable
55 import Unique
56 import DynFlags
57
58 import Control.Monad ( mapAndUnzipM, when )
59 import Data.Bits
60 import Data.Word
61
62 import BasicTypes
63 import FastString
64 import Util
65
66 -- -----------------------------------------------------------------------------
67 -- Top-level of the instruction selector
68
69 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
70 -- They are really trees of insns to facilitate fast appending, where a
71 -- left-to-right traversal (pre-order?) yields the insns in the correct
72 -- order.
73
74 cmmTopCodeGen
75 :: RawCmmDecl
76 -> NatM [NatCmmDecl CmmStatics Instr]
77
78 cmmTopCodeGen (CmmProc info lab live graph) = do
79 let blocks = toBlockListEntryFirst graph
80 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
81 dflags <- getDynFlags
82 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
83 tops = proc : concat statics
84 os = platformOS $ targetPlatform dflags
85 arch = platformArch $ targetPlatform dflags
86 case arch of
87 ArchPPC -> do
88 picBaseMb <- getPicBaseMaybeNat
89 case picBaseMb of
90 Just picBase -> initializePicBase_ppc arch os picBase tops
91 Nothing -> return tops
92 ArchPPC_64 ELF_V1 -> return tops
93 -- generating function descriptor is handled in
94 -- pretty printer
95 ArchPPC_64 ELF_V2 -> return tops
96 -- generating function prologue is handled in
97 -- pretty printer
98 _ -> panic "PPC.cmmTopCodeGen: unknown arch"
99
100 cmmTopCodeGen (CmmData sec dat) = do
101 return [CmmData sec dat] -- no translation, we just use CmmStatic
102
103 basicBlockCodeGen
104 :: Block CmmNode C C
105 -> NatM ( [NatBasicBlock Instr]
106 , [NatCmmDecl CmmStatics Instr])
107
108 basicBlockCodeGen block = do
109 let (_, nodes, tail) = blockSplit block
110 id = entryLabel block
111 stmts = blockToList nodes
112 mid_instrs <- stmtsToInstrs stmts
113 tail_instrs <- stmtToInstrs tail
114 let instrs = mid_instrs `appOL` tail_instrs
115 -- code generation may introduce new basic block boundaries, which
116 -- are indicated by the NEWBLOCK instruction. We must split up the
117 -- instruction stream into basic blocks again. Also, we extract
118 -- LDATAs here too.
119 let
120 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
121
122 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
123 = ([], BasicBlock id instrs : blocks, statics)
124 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
125 = (instrs, blocks, CmmData sec dat:statics)
126 mkBlocks instr (instrs,blocks,statics)
127 = (instr:instrs, blocks, statics)
128 return (BasicBlock id top : other_blocks, statics)
129
130 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
131 stmtsToInstrs stmts
132 = do instrss <- mapM stmtToInstrs stmts
133 return (concatOL instrss)
134
135 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
136 stmtToInstrs stmt = do
137 dflags <- getDynFlags
138 case stmt of
139 CmmComment s -> return (unitOL (COMMENT s))
140 CmmTick {} -> return nilOL
141 CmmUnwind {} -> return nilOL
142
143 CmmAssign reg src
144 | isFloatType ty -> assignReg_FltCode format reg src
145 | target32Bit (targetPlatform dflags) &&
146 isWord64 ty -> assignReg_I64Code reg src
147 | otherwise -> assignReg_IntCode format reg src
148 where ty = cmmRegType dflags reg
149 format = cmmTypeFormat ty
150
151 CmmStore addr src
152 | isFloatType ty -> assignMem_FltCode format addr src
153 | target32Bit (targetPlatform dflags) &&
154 isWord64 ty -> assignMem_I64Code addr src
155 | otherwise -> assignMem_IntCode format addr src
156 where ty = cmmExprType dflags src
157 format = cmmTypeFormat ty
158
159 CmmUnsafeForeignCall target result_regs args
160 -> genCCall target result_regs args
161
162 CmmBranch id -> genBranch id
163 CmmCondBranch arg true false _ -> do
164 b1 <- genCondJump true arg
165 b2 <- genBranch false
166 return (b1 `appOL` b2)
167 CmmSwitch arg ids -> do dflags <- getDynFlags
168 genSwitch dflags arg ids
169 CmmCall { cml_target = arg } -> genJump arg
170 _ ->
171 panic "stmtToInstrs: statement should have been cps'd away"
172
173
174 --------------------------------------------------------------------------------
175 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
176 -- They are really trees of insns to facilitate fast appending, where a
177 -- left-to-right traversal yields the insns in the correct order.
178 --
179 type InstrBlock
180 = OrdList Instr
181
182
183 -- | Register's passed up the tree. If the stix code forces the register
184 -- to live in a pre-decided machine register, it comes out as @Fixed@;
185 -- otherwise, it comes out as @Any@, and the parent can decide which
186 -- register to put it in.
187 --
188 data Register
189 = Fixed Format Reg InstrBlock
190 | Any Format (Reg -> InstrBlock)
191
192
193 swizzleRegisterRep :: Register -> Format -> Register
194 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
195 swizzleRegisterRep (Any _ codefn) format = Any format codefn
196
197
198 -- | Grab the Reg for a CmmReg
199 getRegisterReg :: Platform -> CmmReg -> Reg
200
201 getRegisterReg _ (CmmLocal (LocalReg u pk))
202 = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
203
204 getRegisterReg platform (CmmGlobal mid)
205 = case globalRegMaybe platform mid of
206 Just reg -> RegReal reg
207 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
208 -- By this stage, the only MagicIds remaining should be the
209 -- ones which map to a real machine register on this
210 -- platform. Hence ...
211
212 -- | Convert a BlockId to some CmmStatic data
213 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
214 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
215 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
216 where blockLabel = mkAsmTempLabel (getUnique blockid)
217
218
219
220 -- -----------------------------------------------------------------------------
221 -- General things for putting together code sequences
222
223 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
224 -- CmmExprs into CmmRegOff?
225 mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr
226 mangleIndexTree dflags (CmmRegOff reg off)
227 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
228 where width = typeWidth (cmmRegType dflags reg)
229
230 mangleIndexTree _ _
231 = panic "PPC.CodeGen.mangleIndexTree: no match"
232
233 -- -----------------------------------------------------------------------------
234 -- Code gen for 64-bit arithmetic on 32-bit platforms
235
236 {-
237 Simple support for generating 64-bit code (ie, 64 bit values and 64
238 bit assignments) on 32-bit platforms. Unlike the main code generator
239 we merely shoot for generating working code as simply as possible, and
240 pay little attention to code quality. Specifically, there is no
241 attempt to deal cleverly with the fixed-vs-floating register
242 distinction; all values are generated into (pairs of) floating
243 registers, even if this would mean some redundant reg-reg moves as a
244 result. Only one of the VRegUniques is returned, since it will be
245 of the VRegUniqueLo form, and the upper-half VReg can be determined
246 by applying getHiVRegFromLo to it.
247 -}
248
249 data ChildCode64 -- a.k.a "Register64"
250 = ChildCode64
251 InstrBlock -- code
252 Reg -- the lower 32-bit temporary which contains the
253 -- result; use getHiVRegFromLo to find the other
254 -- VRegUnique. Rules of this simplified insn
255 -- selection game are therefore that the returned
256 -- Reg may be modified
257
258
259 -- | Compute an expression into a register, but
260 -- we don't mind which one it is.
261 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
262 getSomeReg expr = do
263 r <- getRegister expr
264 case r of
265 Any rep code -> do
266 tmp <- getNewRegNat rep
267 return (tmp, code tmp)
268 Fixed _ reg code ->
269 return (reg, code)
270
271 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
272 getI64Amodes addrTree = do
273 Amode hi_addr addr_code <- getAmode D addrTree
274 case addrOffset hi_addr 4 of
275 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
276 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
277 return (AddrRegImm hi_ptr (ImmInt 0),
278 AddrRegImm hi_ptr (ImmInt 4),
279 code)
280
281
282 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
283 assignMem_I64Code addrTree valueTree = do
284 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
285 ChildCode64 vcode rlo <- iselExpr64 valueTree
286 let
287 rhi = getHiVRegFromLo rlo
288
289 -- Big-endian store
290 mov_hi = ST II32 rhi hi_addr
291 mov_lo = ST II32 rlo lo_addr
292 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
293
294
295 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
296 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
297 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
298 let
299 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
300 r_dst_hi = getHiVRegFromLo r_dst_lo
301 r_src_hi = getHiVRegFromLo r_src_lo
302 mov_lo = MR r_dst_lo r_src_lo
303 mov_hi = MR r_dst_hi r_src_hi
304 return (
305 vcode `snocOL` mov_lo `snocOL` mov_hi
306 )
307
308 assignReg_I64Code _ _
309 = panic "assignReg_I64Code(powerpc): invalid lvalue"
310
311
312 iselExpr64 :: CmmExpr -> NatM ChildCode64
313 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
314 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
315 (rlo, rhi) <- getNewRegPairNat II32
316 let mov_hi = LD II32 rhi hi_addr
317 mov_lo = LD II32 rlo lo_addr
318 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
319 rlo
320
321 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
322 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
323
324 iselExpr64 (CmmLit (CmmInt i _)) = do
325 (rlo,rhi) <- getNewRegPairNat II32
326 let
327 half0 = fromIntegral (fromIntegral i :: Word16)
328 half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
329 half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16)
330 half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16)
331
332 code = toOL [
333 LIS rlo (ImmInt half1),
334 OR rlo rlo (RIImm $ ImmInt half0),
335 LIS rhi (ImmInt half3),
336 OR rhi rhi (RIImm $ ImmInt half2)
337 ]
338 return (ChildCode64 code rlo)
339
340 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
341 ChildCode64 code1 r1lo <- iselExpr64 e1
342 ChildCode64 code2 r2lo <- iselExpr64 e2
343 (rlo,rhi) <- getNewRegPairNat II32
344 let
345 r1hi = getHiVRegFromLo r1lo
346 r2hi = getHiVRegFromLo r2lo
347 code = code1 `appOL`
348 code2 `appOL`
349 toOL [ ADDC rlo r1lo r2lo,
350 ADDE rhi r1hi r2hi ]
351 return (ChildCode64 code rlo)
352
353 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
354 ChildCode64 code1 r1lo <- iselExpr64 e1
355 ChildCode64 code2 r2lo <- iselExpr64 e2
356 (rlo,rhi) <- getNewRegPairNat II32
357 let
358 r1hi = getHiVRegFromLo r1lo
359 r2hi = getHiVRegFromLo r2lo
360 code = code1 `appOL`
361 code2 `appOL`
362 toOL [ SUBFC rlo r2lo r1lo,
363 SUBFE rhi r2hi r1hi ]
364 return (ChildCode64 code rlo)
365
366 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
367 (expr_reg,expr_code) <- getSomeReg expr
368 (rlo, rhi) <- getNewRegPairNat II32
369 let mov_hi = LI rhi (ImmInt 0)
370 mov_lo = MR rlo expr_reg
371 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
372 rlo
373 iselExpr64 expr
374 = pprPanic "iselExpr64(powerpc)" (pprExpr expr)
375
376
377
378 getRegister :: CmmExpr -> NatM Register
379 getRegister e = do dflags <- getDynFlags
380 getRegister' dflags e
381
382 getRegister' :: DynFlags -> CmmExpr -> NatM Register
383
384 getRegister' dflags (CmmReg (CmmGlobal PicBaseReg))
385 | target32Bit (targetPlatform dflags) = do
386 reg <- getPicBaseNat $ archWordFormat (target32Bit (targetPlatform dflags))
387 return (Fixed (archWordFormat (target32Bit (targetPlatform dflags)))
388 reg nilOL)
389 | otherwise = return (Fixed II64 toc nilOL)
390
391 getRegister' dflags (CmmReg reg)
392 = return (Fixed (cmmTypeFormat (cmmRegType dflags reg))
393 (getRegisterReg (targetPlatform dflags) reg) nilOL)
394
395 getRegister' dflags tree@(CmmRegOff _ _)
396 = getRegister' dflags (mangleIndexTree dflags tree)
397
398 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
399 -- TO_W_(x), TO_W_(x >> 32)
400
401 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32)
402 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
403 | target32Bit (targetPlatform dflags) = do
404 ChildCode64 code rlo <- iselExpr64 x
405 return $ Fixed II32 (getHiVRegFromLo rlo) code
406
407 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32)
408 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
409 | target32Bit (targetPlatform dflags) = do
410 ChildCode64 code rlo <- iselExpr64 x
411 return $ Fixed II32 (getHiVRegFromLo rlo) code
412
413 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x])
414 | target32Bit (targetPlatform dflags) = do
415 ChildCode64 code rlo <- iselExpr64 x
416 return $ Fixed II32 rlo code
417
418 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x])
419 | target32Bit (targetPlatform dflags) = do
420 ChildCode64 code rlo <- iselExpr64 x
421 return $ Fixed II32 rlo code
422
423 getRegister' dflags (CmmLoad mem pk)
424 | not (isWord64 pk) = do
425 let platform = targetPlatform dflags
426 Amode addr addr_code <- getAmode D mem
427 let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk)
428 addr_code `snocOL` LD format dst addr
429 return (Any format code)
430 | not (target32Bit (targetPlatform dflags)) = do
431 Amode addr addr_code <- getAmode DS mem
432 let code dst = addr_code `snocOL` LD II64 dst addr
433 return (Any II64 code)
434
435 where format = cmmTypeFormat pk
436
437 -- catch simple cases of zero- or sign-extended load
438 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
439 Amode addr addr_code <- getAmode D mem
440 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
441
442 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
443 Amode addr addr_code <- getAmode D mem
444 return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
445
446 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
447
448 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
449 Amode addr addr_code <- getAmode D mem
450 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
451
452 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
453 Amode addr addr_code <- getAmode D mem
454 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
455
456 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
457 Amode addr addr_code <- getAmode D mem
458 return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
459
460 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
461 Amode addr addr_code <- getAmode D mem
462 return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
463
464 getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
465 Amode addr addr_code <- getAmode D mem
466 return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
467
468 getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
469 Amode addr addr_code <- getAmode D mem
470 return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
471
472 getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
473 = case mop of
474 MO_Not rep -> triv_ucode_int rep NOT
475
476 MO_F_Neg w -> triv_ucode_float w FNEG
477 MO_S_Neg w -> triv_ucode_int w NEG
478
479 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
480 MO_FF_Conv W32 W64 -> conversionNop FF64 x
481
482 MO_FS_Conv from to -> coerceFP2Int from to x
483 MO_SF_Conv from to -> coerceInt2FP from to x
484
485 MO_SS_Conv from to
486 | from == to -> conversionNop (intFormat to) x
487
488 -- narrowing is a nop: we treat the high bits as undefined
489 MO_SS_Conv W64 to
490 | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit int register"
491 | otherwise -> conversionNop (intFormat to) x
492 MO_SS_Conv W32 to
493 | arch32 -> conversionNop (intFormat to) x
494 | otherwise -> case to of
495 W64 -> triv_ucode_int to (EXTS II32)
496 W16 -> conversionNop II16 x
497 W8 -> conversionNop II8 x
498 _ -> panic "PPC.CodeGen.getRegister: no match"
499 MO_SS_Conv W16 W8 -> conversionNop II8 x
500 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
501 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
502
503 MO_UU_Conv from to
504 | from == to -> conversionNop (intFormat to) x
505 -- narrowing is a nop: we treat the high bits as undefined
506 MO_UU_Conv W64 to
507 | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit target"
508 | otherwise -> conversionNop (intFormat to) x
509 MO_UU_Conv W32 to
510 | arch32 -> conversionNop (intFormat to) x
511 | otherwise ->
512 case to of
513 W64 -> trivialCode to False AND x (CmmLit (CmmInt 4294967295 W64))
514 W16 -> conversionNop II16 x
515 W8 -> conversionNop II8 x
516 _ -> panic "PPC.CodeGen.getRegister: no match"
517 MO_UU_Conv W16 W8 -> conversionNop II8 x
518 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
519 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
520 _ -> panic "PPC.CodeGen.getRegister: no match"
521
522 where
523 triv_ucode_int width instr = trivialUCode (intFormat width) instr x
524 triv_ucode_float width instr = trivialUCode (floatFormat width) instr x
525
526 conversionNop new_format expr
527 = do e_code <- getRegister' dflags expr
528 return (swizzleRegisterRep e_code new_format)
529 arch32 = target32Bit $ targetPlatform dflags
530
531 getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps
532 = case mop of
533 MO_F_Eq _ -> condFltReg EQQ x y
534 MO_F_Ne _ -> condFltReg NE x y
535 MO_F_Gt _ -> condFltReg GTT x y
536 MO_F_Ge _ -> condFltReg GE x y
537 MO_F_Lt _ -> condFltReg LTT x y
538 MO_F_Le _ -> condFltReg LE x y
539
540 MO_Eq rep -> condIntReg EQQ (extendUExpr dflags rep x)
541 (extendUExpr dflags rep y)
542 MO_Ne rep -> condIntReg NE (extendUExpr dflags rep x)
543 (extendUExpr dflags rep y)
544
545 MO_S_Gt rep -> condIntReg GTT (extendSExpr dflags rep x)
546 (extendSExpr dflags rep y)
547 MO_S_Ge rep -> condIntReg GE (extendSExpr dflags rep x)
548 (extendSExpr dflags rep y)
549 MO_S_Lt rep -> condIntReg LTT (extendSExpr dflags rep x)
550 (extendSExpr dflags rep y)
551 MO_S_Le rep -> condIntReg LE (extendSExpr dflags rep x)
552 (extendSExpr dflags rep y)
553
554 MO_U_Gt rep -> condIntReg GU (extendUExpr dflags rep x)
555 (extendUExpr dflags rep y)
556 MO_U_Ge rep -> condIntReg GEU (extendUExpr dflags rep x)
557 (extendUExpr dflags rep y)
558 MO_U_Lt rep -> condIntReg LU (extendUExpr dflags rep x)
559 (extendUExpr dflags rep y)
560 MO_U_Le rep -> condIntReg LEU (extendUExpr dflags rep x)
561 (extendUExpr dflags rep y)
562
563 MO_F_Add w -> triv_float w FADD
564 MO_F_Sub w -> triv_float w FSUB
565 MO_F_Mul w -> triv_float w FMUL
566 MO_F_Quot w -> triv_float w FDIV
567
568 -- optimize addition with 32-bit immediate
569 -- (needed for PIC)
570 MO_Add W32 ->
571 case y of
572 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
573 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
574 CmmLit lit
575 -> do
576 (src, srcCode) <- getSomeReg x
577 let imm = litToImm lit
578 code dst = srcCode `appOL` toOL [
579 ADDIS dst src (HA imm),
580 ADD dst dst (RIImm (LO imm))
581 ]
582 return (Any II32 code)
583 _ -> trivialCode W32 True ADD x y
584
585 MO_Add rep -> trivialCode rep True ADD x y
586 MO_Sub rep ->
587 case y of -- subfi ('substract from' with immediate) doesn't exist
588 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
589 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
590 _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
591
592 MO_Mul rep
593 | arch32 -> trivialCode rep True MULLW x y
594 | otherwise -> trivialCode rep True MULLD x y
595
596 MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
597 MO_S_MulMayOflo W64 -> trivialCodeNoImm' II64 MULLD_MayOflo x y
598
599 MO_S_MulMayOflo _ -> panic "S_MulMayOflo: (II8/16) not implemented"
600 MO_U_MulMayOflo _ -> panic "U_MulMayOflo: not implemented"
601
602 MO_S_Quot rep
603 | arch32 -> trivialCodeNoImm' (intFormat rep) DIVW
604 (extendSExpr dflags rep x) (extendSExpr dflags rep y)
605 | otherwise -> trivialCodeNoImm' (intFormat rep) DIVD
606 (extendSExpr dflags rep x) (extendSExpr dflags rep y)
607 MO_U_Quot rep
608 | arch32 -> trivialCodeNoImm' (intFormat rep) DIVWU
609 (extendUExpr dflags rep x) (extendUExpr dflags rep y)
610 | otherwise -> trivialCodeNoImm' (intFormat rep) DIVDU
611 (extendUExpr dflags rep x) (extendUExpr dflags rep y)
612
613 MO_S_Rem rep
614 | arch32 -> remainderCode rep DIVW (extendSExpr dflags rep x)
615 (extendSExpr dflags rep y)
616 | otherwise -> remainderCode rep DIVD (extendSExpr dflags rep x)
617 (extendSExpr dflags rep y)
618 MO_U_Rem rep
619 | arch32 -> remainderCode rep DIVWU (extendSExpr dflags rep x)
620 (extendSExpr dflags rep y)
621 | otherwise -> remainderCode rep DIVDU (extendSExpr dflags rep x)
622 (extendSExpr dflags rep y)
623
624 MO_And rep -> trivialCode rep False AND x y
625 MO_Or rep -> trivialCode rep False OR x y
626 MO_Xor rep -> trivialCode rep False XOR x y
627
628 MO_Shl rep -> shiftCode rep SL x y
629 MO_S_Shr rep -> shiftCode rep SRA (extendSExpr dflags rep x) y
630 MO_U_Shr rep -> shiftCode rep SR (extendUExpr dflags rep x) y
631 _ -> panic "PPC.CodeGen.getRegister: no match"
632
633 where
634 triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register
635 triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y
636
637 arch32 = target32Bit $ targetPlatform dflags
638
639 getRegister' _ (CmmLit (CmmInt i rep))
640 | Just imm <- makeImmediate rep True i
641 = let
642 code dst = unitOL (LI dst imm)
643 in
644 return (Any (intFormat rep) code)
645
646 getRegister' _ (CmmLit (CmmFloat f frep)) = do
647 lbl <- getNewLabelNat
648 dflags <- getDynFlags
649 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
650 Amode addr addr_code <- getAmode D dynRef
651 let format = floatFormat frep
652 code dst =
653 LDATA (Section ReadOnlyData lbl)
654 (Statics lbl [CmmStaticLit (CmmFloat f frep)])
655 `consOL` (addr_code `snocOL` LD format dst addr)
656 return (Any format code)
657
658 getRegister' dflags (CmmLit lit)
659 | target32Bit (targetPlatform dflags)
660 = let rep = cmmLitType dflags lit
661 imm = litToImm lit
662 code dst = toOL [
663 LIS dst (HA imm),
664 ADD dst dst (RIImm (LO imm))
665 ]
666 in return (Any (cmmTypeFormat rep) code)
667 | otherwise
668 = do lbl <- getNewLabelNat
669 dflags <- getDynFlags
670 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
671 Amode addr addr_code <- getAmode D dynRef
672 let rep = cmmLitType dflags lit
673 format = cmmTypeFormat rep
674 code dst =
675 LDATA (Section ReadOnlyData lbl) (Statics lbl [CmmStaticLit lit])
676 `consOL` (addr_code `snocOL` LD format dst addr)
677 return (Any format code)
678
679 getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other)
680
681 -- extend?Rep: wrap integer expression of type rep
682 -- in a conversion to II32 or II64 resp.
683 extendSExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
684 extendSExpr dflags W32 x
685 | target32Bit (targetPlatform dflags) = x
686
687 extendSExpr dflags W64 x
688 | not (target32Bit (targetPlatform dflags)) = x
689
690 extendSExpr dflags rep x =
691 let size = if target32Bit $ targetPlatform dflags
692 then W32
693 else W64
694 in CmmMachOp (MO_SS_Conv rep size) [x]
695
696 extendUExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
697 extendUExpr dflags W32 x
698 | target32Bit (targetPlatform dflags) = x
699 extendUExpr dflags W64 x
700 | not (target32Bit (targetPlatform dflags)) = x
701 extendUExpr dflags rep x =
702 let size = if target32Bit $ targetPlatform dflags
703 then W32
704 else W64
705 in CmmMachOp (MO_UU_Conv rep size) [x]
706
707 -- -----------------------------------------------------------------------------
708 -- The 'Amode' type: Memory addressing modes passed up the tree.
709
710 data Amode
711 = Amode AddrMode InstrBlock
712
713 {-
714 Now, given a tree (the argument to an CmmLoad) that references memory,
715 produce a suitable addressing mode.
716
717 A Rule of the Game (tm) for Amodes: use of the addr bit must
718 immediately follow use of the code part, since the code part puts
719 values in registers which the addr then refers to. So you can't put
720 anything in between, lest it overwrite some of those registers. If
721 you need to do some other computation between the code part and use of
722 the addr bit, first store the effective address from the amode in a
723 temporary, then do the other computation, and then use the temporary:
724
725 code
726 LEA amode, tmp
727 ... other computation ...
728 ... (tmp) ...
729 -}
730
731 data InstrForm = D | DS
732
733 getAmode :: InstrForm -> CmmExpr -> NatM Amode
734 getAmode inf tree@(CmmRegOff _ _)
735 = do dflags <- getDynFlags
736 getAmode inf (mangleIndexTree dflags tree)
737
738 getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
739 | Just off <- makeImmediate W32 True (-i)
740 = do
741 (reg, code) <- getSomeReg x
742 return (Amode (AddrRegImm reg off) code)
743
744
745 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
746 | Just off <- makeImmediate W32 True i
747 = do
748 (reg, code) <- getSomeReg x
749 return (Amode (AddrRegImm reg off) code)
750
751 getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
752 | Just off <- makeImmediate W64 True (-i)
753 = do
754 (reg, code) <- getSomeReg x
755 return (Amode (AddrRegImm reg off) code)
756
757
758 getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
759 | Just off <- makeImmediate W64 True i
760 = do
761 (reg, code) <- getSomeReg x
762 return (Amode (AddrRegImm reg off) code)
763
764 getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
765 | Just off <- makeImmediate W64 True (-i)
766 = do
767 (reg, code) <- getSomeReg x
768 (reg', off', code') <-
769 if i `mod` 4 == 0
770 then do return (reg, off, code)
771 else do
772 tmp <- getNewRegNat II64
773 return (tmp, ImmInt 0,
774 code `snocOL` ADD tmp reg (RIImm off))
775 return (Amode (AddrRegImm reg' off') code')
776
777 getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
778 | Just off <- makeImmediate W64 True i
779 = do
780 (reg, code) <- getSomeReg x
781 (reg', off', code') <-
782 if i `mod` 4 == 0
783 then do return (reg, off, code)
784 else do
785 tmp <- getNewRegNat II64
786 return (tmp, ImmInt 0,
787 code `snocOL` ADD tmp reg (RIImm off))
788 return (Amode (AddrRegImm reg' off') code')
789
790 -- optimize addition with 32-bit immediate
791 -- (needed for PIC)
792 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit])
793 = do
794 tmp <- getNewRegNat II32
795 (src, srcCode) <- getSomeReg x
796 let imm = litToImm lit
797 code = srcCode `snocOL` ADDIS tmp src (HA imm)
798 return (Amode (AddrRegImm tmp (LO imm)) code)
799
800 getAmode _ (CmmLit lit)
801 = do
802 dflags <- getDynFlags
803 case platformArch $ targetPlatform dflags of
804 ArchPPC -> do
805 tmp <- getNewRegNat II32
806 let imm = litToImm lit
807 code = unitOL (LIS tmp (HA imm))
808 return (Amode (AddrRegImm tmp (LO imm)) code)
809 _ -> do -- TODO: Load from TOC,
810 -- see getRegister' _ (CmmLit lit)
811 tmp <- getNewRegNat II64
812 let imm = litToImm lit
813 code = toOL [
814 LIS tmp (HIGHESTA imm),
815 OR tmp tmp (RIImm (HIGHERA imm)),
816 SL II64 tmp tmp (RIImm (ImmInt 32)),
817 ORIS tmp tmp (HA imm)
818 ]
819 return (Amode (AddrRegImm tmp (LO imm)) code)
820
821 getAmode _ (CmmMachOp (MO_Add W32) [x, y])
822 = do
823 (regX, codeX) <- getSomeReg x
824 (regY, codeY) <- getSomeReg y
825 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
826
827 getAmode _ (CmmMachOp (MO_Add W64) [x, y])
828 = do
829 (regX, codeX) <- getSomeReg x
830 (regY, codeY) <- getSomeReg y
831 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
832
833 getAmode _ other
834 = do
835 (reg, code) <- getSomeReg other
836 let
837 off = ImmInt 0
838 return (Amode (AddrRegImm reg off) code)
839
840
841 -- The 'CondCode' type: Condition codes passed up the tree.
842 data CondCode
843 = CondCode Bool Cond InstrBlock
844
845 -- Set up a condition code for a conditional branch.
846
847 getCondCode :: CmmExpr -> NatM CondCode
848
849 -- almost the same as everywhere else - but we need to
850 -- extend small integers to 32 bit or 64 bit first
851
852 getCondCode (CmmMachOp mop [x, y])
853 = do
854 dflags <- getDynFlags
855 case mop of
856 MO_F_Eq W32 -> condFltCode EQQ x y
857 MO_F_Ne W32 -> condFltCode NE x y
858 MO_F_Gt W32 -> condFltCode GTT x y
859 MO_F_Ge W32 -> condFltCode GE x y
860 MO_F_Lt W32 -> condFltCode LTT x y
861 MO_F_Le W32 -> condFltCode LE x y
862
863 MO_F_Eq W64 -> condFltCode EQQ x y
864 MO_F_Ne W64 -> condFltCode NE x y
865 MO_F_Gt W64 -> condFltCode GTT x y
866 MO_F_Ge W64 -> condFltCode GE x y
867 MO_F_Lt W64 -> condFltCode LTT x y
868 MO_F_Le W64 -> condFltCode LE x y
869
870 MO_Eq rep -> condIntCode EQQ (extendUExpr dflags rep x)
871 (extendUExpr dflags rep y)
872 MO_Ne rep -> condIntCode NE (extendUExpr dflags rep x)
873 (extendUExpr dflags rep y)
874
875 MO_S_Gt rep -> condIntCode GTT (extendSExpr dflags rep x)
876 (extendSExpr dflags rep y)
877 MO_S_Ge rep -> condIntCode GE (extendSExpr dflags rep x)
878 (extendSExpr dflags rep y)
879 MO_S_Lt rep -> condIntCode LTT (extendSExpr dflags rep x)
880 (extendSExpr dflags rep y)
881 MO_S_Le rep -> condIntCode LE (extendSExpr dflags rep x)
882 (extendSExpr dflags rep y)
883
884 MO_U_Gt rep -> condIntCode GU (extendSExpr dflags rep x)
885 (extendSExpr dflags rep y)
886 MO_U_Ge rep -> condIntCode GEU (extendSExpr dflags rep x)
887 (extendSExpr dflags rep y)
888 MO_U_Lt rep -> condIntCode LU (extendSExpr dflags rep x)
889 (extendSExpr dflags rep y)
890 MO_U_Le rep -> condIntCode LEU (extendSExpr dflags rep x)
891 (extendSExpr dflags rep y)
892
893 _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
894
895 getCondCode _ = panic "getCondCode(2)(powerpc)"
896
897
898
899 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
900 -- passed back up the tree.
901
902 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
903
904 -- ###FIXME: I16 and I8!
905 -- TODO: Is this still an issue? All arguments are extend?Expr'd.
906 condIntCode cond x (CmmLit (CmmInt y rep))
907 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
908 = do
909 (src1, code) <- getSomeReg x
910 dflags <- getDynFlags
911 let format = archWordFormat $ target32Bit $ targetPlatform dflags
912 code' = code `snocOL`
913 (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2)
914 return (CondCode False cond code')
915
916 condIntCode cond x y = do
917 (src1, code1) <- getSomeReg x
918 (src2, code2) <- getSomeReg y
919 dflags <- getDynFlags
920 let format = archWordFormat $ target32Bit $ targetPlatform dflags
921 code' = code1 `appOL` code2 `snocOL`
922 (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2)
923 return (CondCode False cond code')
924
925 condFltCode cond x y = do
926 (src1, code1) <- getSomeReg x
927 (src2, code2) <- getSomeReg y
928 let
929 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
930 code'' = case cond of -- twiddle CR to handle unordered case
931 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
932 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
933 _ -> code'
934 where
935 ltbit = 0 ; eqbit = 2 ; gtbit = 1
936 return (CondCode True cond code'')
937
938
939
940 -- -----------------------------------------------------------------------------
941 -- Generating assignments
942
943 -- Assignments are really at the heart of the whole code generation
944 -- business. Almost all top-level nodes of any real importance are
945 -- assignments, which correspond to loads, stores, or register
946 -- transfers. If we're really lucky, some of the register transfers
947 -- will go away, because we can use the destination register to
948 -- complete the code generation for the right hand side. This only
949 -- fails when the right hand side is forced into a fixed register
950 -- (e.g. the result of a call).
951
952 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
953 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
954
955 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
956 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
957
958 assignMem_IntCode pk addr src = do
959 (srcReg, code) <- getSomeReg src
960 Amode dstAddr addr_code <- case pk of
961 II64 -> getAmode DS addr
962 _ -> getAmode D addr
963 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
964
965 -- dst is a reg, but src could be anything
966 assignReg_IntCode _ reg src
967 = do
968 dflags <- getDynFlags
969 let dst = getRegisterReg (targetPlatform dflags) reg
970 r <- getRegister src
971 return $ case r of
972 Any _ code -> code dst
973 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
974
975
976
977 -- Easy, isn't it?
978 assignMem_FltCode = assignMem_IntCode
979 assignReg_FltCode = assignReg_IntCode
980
981
982
983 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
984
985 genJump (CmmLit (CmmLabel lbl))
986 = return (unitOL $ JMP lbl)
987
988 genJump tree
989 = do
990 dflags <- getDynFlags
991 let platform = targetPlatform dflags
992 case platformOS platform of
993 OSLinux -> case platformArch platform of
994 ArchPPC -> genJump' tree GCPLinux
995 ArchPPC_64 ELF_V1 -> genJump' tree (GCPLinux64ELF 1)
996 ArchPPC_64 ELF_V2 -> genJump' tree (GCPLinux64ELF 2)
997 _ -> panic "PPC.CodeGen.genJump: Unknown Linux"
998 OSDarwin -> genJump' tree GCPDarwin
999 _ -> panic "PPC.CodeGen.genJump: not defined for this os"
1000
1001
1002 genJump' :: CmmExpr -> GenCCallPlatform -> NatM InstrBlock
1003
1004 genJump' tree (GCPLinux64ELF 1)
1005 = do
1006 (target,code) <- getSomeReg tree
1007 return (code
1008 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0))
1009 `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
1010 `snocOL` MTCTR r11
1011 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
1012 `snocOL` BCTR [] Nothing)
1013
1014 genJump' tree (GCPLinux64ELF 2)
1015 = do
1016 (target,code) <- getSomeReg tree
1017 return (code
1018 `snocOL` MR r12 target
1019 `snocOL` MTCTR r12
1020 `snocOL` BCTR [] Nothing)
1021
1022 genJump' tree _
1023 = do
1024 (target,code) <- getSomeReg tree
1025 return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
1026
1027 -- -----------------------------------------------------------------------------
1028 -- Unconditional branches
1029 genBranch :: BlockId -> NatM InstrBlock
1030 genBranch = return . toOL . mkJumpInstr
1031
1032
1033 -- -----------------------------------------------------------------------------
1034 -- Conditional jumps
1035
1036 {-
1037 Conditional jumps are always to local labels, so we can use branch
1038 instructions. We peek at the arguments to decide what kind of
1039 comparison to do.
1040 -}
1041
1042
1043 genCondJump
1044 :: BlockId -- the branch target
1045 -> CmmExpr -- the condition on which to branch
1046 -> NatM InstrBlock
1047
1048 genCondJump id bool = do
1049 CondCode _ cond code <- getCondCode bool
1050 return (code `snocOL` BCC cond id)
1051
1052
1053
1054 -- -----------------------------------------------------------------------------
1055 -- Generating C calls
1056
1057 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1058 -- @get_arg@, which moves the arguments to the correct registers/stack
1059 -- locations. Apart from that, the code is easy.
1060 --
1061 -- (If applicable) Do not fill the delay slots here; you will confuse the
1062 -- register allocator.
1063
1064 genCCall :: ForeignTarget -- function to call
1065 -> [CmmFormal] -- where to put the result
1066 -> [CmmActual] -- arguments (of mixed type)
1067 -> NatM InstrBlock
1068 genCCall target dest_regs argsAndHints
1069 = do dflags <- getDynFlags
1070 let platform = targetPlatform dflags
1071 case platformOS platform of
1072 OSLinux -> case platformArch platform of
1073 ArchPPC -> genCCall' dflags GCPLinux
1074 target dest_regs argsAndHints
1075 ArchPPC_64 ELF_V1 -> genCCall' dflags (GCPLinux64ELF 1)
1076 target dest_regs argsAndHints
1077 ArchPPC_64 ELF_V2 -> genCCall' dflags (GCPLinux64ELF 2)
1078 target dest_regs argsAndHints
1079 _ -> panic "PPC.CodeGen.genCCall: Unknown Linux"
1080 OSDarwin -> genCCall' dflags GCPDarwin target dest_regs argsAndHints
1081 _ -> panic "PPC.CodeGen.genCCall: not defined for this os"
1082
1083 data GenCCallPlatform = GCPLinux | GCPDarwin | GCPLinux64ELF Int
1084
1085 genCCall'
1086 :: DynFlags
1087 -> GenCCallPlatform
1088 -> ForeignTarget -- function to call
1089 -> [CmmFormal] -- where to put the result
1090 -> [CmmActual] -- arguments (of mixed type)
1091 -> NatM InstrBlock
1092
1093 {-
1094 The PowerPC calling convention for Darwin/Mac OS X
1095 is described in Apple's document
1096 "Inside Mac OS X - Mach-O Runtime Architecture".
1097
1098 PowerPC Linux uses the System V Release 4 Calling Convention
1099 for PowerPC. It is described in the
1100 "System V Application Binary Interface PowerPC Processor Supplement".
1101
1102 Both conventions are similar:
1103 Parameters may be passed in general-purpose registers starting at r3, in
1104 floating point registers starting at f1, or on the stack.
1105
1106 But there are substantial differences:
1107 * The number of registers used for parameter passing and the exact set of
1108 nonvolatile registers differs (see MachRegs.hs).
1109 * On Darwin, stack space is always reserved for parameters, even if they are
1110 passed in registers. The called routine may choose to save parameters from
1111 registers to the corresponding space on the stack.
1112 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
1113 parameter is passed in an FPR.
1114 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
1115 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
1116 Darwin just treats an I64 like two separate II32s (high word first).
1117 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
1118 4-byte aligned like everything else on Darwin.
1119 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
1120 PowerPC Linux does not agree, so neither do we.
1121
1122 PowerPC 64 Linux uses the System V Release 4 Calling Convention for
1123 64-bit PowerPC. It is specified in
1124 "64-bit PowerPC ELF Application Binary Interface Supplement 1.9".
1125
1126 According to all conventions, the parameter area should be part of the
1127 caller's stack frame, allocated in the caller's prologue code (large enough
1128 to hold the parameter lists for all called routines). The NCG already
1129 uses the stack for register spilling, leaving 64 bytes free at the top.
1130 If we need a larger parameter area than that, we just allocate a new stack
1131 frame just before ccalling.
1132 -}
1133
1134
1135 genCCall' _ _ (PrimTarget MO_WriteBarrier) _ _
1136 = return $ unitOL LWSYNC
1137
1138 genCCall' _ _ (PrimTarget MO_Touch) _ _
1139 = return $ nilOL
1140
1141 genCCall' _ _ (PrimTarget (MO_Prefetch_Data _)) _ _
1142 = return $ nilOL
1143
1144 genCCall' dflags gcp target dest_regs args
1145 = ASSERT(not $ any (`elem` [II16]) $ map cmmTypeFormat argReps)
1146 -- we rely on argument promotion in the codeGen
1147 do
1148 (finalStack,passArgumentsCode,usedRegs) <- passArguments
1149 (zip args argReps)
1150 allArgRegs
1151 (allFPArgRegs platform)
1152 initialStackOffset
1153 (toOL []) []
1154
1155 (labelOrExpr, reduceToFF32) <- case target of
1156 ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
1157 uses_pic_base_implicitly
1158 return (Left lbl, False)
1159 ForeignTarget expr _ -> do
1160 uses_pic_base_implicitly
1161 return (Right expr, False)
1162 PrimTarget mop -> outOfLineMachOp mop
1163
1164 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
1165 `appOL` toc_before
1166 codeAfter = toc_after labelOrExpr `appOL` move_sp_up finalStack
1167 `appOL` moveResult reduceToFF32
1168
1169 case labelOrExpr of
1170 Left lbl -> do -- the linker does all the work for us
1171 return ( codeBefore
1172 `snocOL` BL lbl usedRegs
1173 `appOL` codeAfter)
1174 Right dyn -> do -- implement call through function pointer
1175 (dynReg, dynCode) <- getSomeReg dyn
1176 case gcp of
1177 GCPLinux64ELF 1 -> return ( dynCode
1178 `appOL` codeBefore
1179 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0))
1180 `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8))
1181 `snocOL` MTCTR r11
1182 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16))
1183 `snocOL` BCTRL usedRegs
1184 `appOL` codeAfter)
1185 GCPLinux64ELF 2 -> return ( dynCode
1186 `appOL` codeBefore
1187 `snocOL` MR r12 dynReg
1188 `snocOL` MTCTR r12
1189 `snocOL` BCTRL usedRegs
1190 `appOL` codeAfter)
1191 _ -> return ( dynCode
1192 `snocOL` MTCTR dynReg
1193 `appOL` codeBefore
1194 `snocOL` BCTRL usedRegs
1195 `appOL` codeAfter)
1196 where
1197 platform = targetPlatform dflags
1198
1199 uses_pic_base_implicitly = do
1200 -- See Note [implicit register in PPC PIC code]
1201 -- on why we claim to use PIC register here
1202 when (gopt Opt_PIC dflags && target32Bit platform) $ do
1203 _ <- getPicBaseNat $ archWordFormat True
1204 return ()
1205
1206 initialStackOffset = case gcp of
1207 GCPDarwin -> 24
1208 GCPLinux -> 8
1209 GCPLinux64ELF 1 -> 48
1210 GCPLinux64ELF 2 -> 32
1211 _ -> panic "genCall': unknown calling convention"
1212 -- size of linkage area + size of arguments, in bytes
1213 stackDelta finalStack = case gcp of
1214 GCPDarwin ->
1215 roundTo 16 $ (24 +) $ max 32 $ sum $
1216 map (widthInBytes . typeWidth) argReps
1217 GCPLinux -> roundTo 16 finalStack
1218 GCPLinux64ELF 1 ->
1219 roundTo 16 $ (48 +) $ max 64 $ sum $
1220 map (widthInBytes . typeWidth) argReps
1221 GCPLinux64ELF 2 ->
1222 roundTo 16 $ (32 +) $ max 64 $ sum $
1223 map (widthInBytes . typeWidth) argReps
1224 _ -> panic "genCall': unknown calling conv."
1225
1226 argReps = map (cmmExprType dflags) args
1227
1228 roundTo a x | x `mod` a == 0 = x
1229 | otherwise = x + a - (x `mod` a)
1230
1231 spFormat = if target32Bit platform then II32 else II64
1232
1233 move_sp_down finalStack
1234 | delta > 64 =
1235 toOL [STU spFormat sp (AddrRegImm sp (ImmInt (-delta))),
1236 DELTA (-delta)]
1237 | otherwise = nilOL
1238 where delta = stackDelta finalStack
1239 toc_before = case gcp of
1240 GCPLinux64ELF 1 -> unitOL $ ST spFormat toc (AddrRegImm sp (ImmInt 40))
1241 GCPLinux64ELF 2 -> unitOL $ ST spFormat toc (AddrRegImm sp (ImmInt 24))
1242 _ -> nilOL
1243 toc_after labelOrExpr = case gcp of
1244 GCPLinux64ELF 1 -> case labelOrExpr of
1245 Left _ -> toOL [ NOP ]
1246 Right _ -> toOL [ LD spFormat toc
1247 (AddrRegImm sp
1248 (ImmInt 40))
1249 ]
1250 GCPLinux64ELF 2 -> case labelOrExpr of
1251 Left _ -> toOL [ NOP ]
1252 Right _ -> toOL [ LD spFormat toc
1253 (AddrRegImm sp
1254 (ImmInt 24))
1255 ]
1256 _ -> nilOL
1257 move_sp_up finalStack
1258 | delta > 64 = -- TODO: fix-up stack back-chain
1259 toOL [ADD sp sp (RIImm (ImmInt delta)),
1260 DELTA 0]
1261 | otherwise = nilOL
1262 where delta = stackDelta finalStack
1263
1264
1265 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
1266 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
1267 accumCode accumUsed | isWord64 arg_ty
1268 && target32Bit (targetPlatform dflags) =
1269 do
1270 ChildCode64 code vr_lo <- iselExpr64 arg
1271 let vr_hi = getHiVRegFromLo vr_lo
1272
1273 case gcp of
1274 GCPDarwin ->
1275 do let storeWord vr (gpr:_) _ = MR gpr vr
1276 storeWord vr [] offset
1277 = ST II32 vr (AddrRegImm sp (ImmInt offset))
1278 passArguments args
1279 (drop 2 gprs)
1280 fprs
1281 (stackOffset+8)
1282 (accumCode `appOL` code
1283 `snocOL` storeWord vr_hi gprs stackOffset
1284 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1285 ((take 2 gprs) ++ accumUsed)
1286 GCPLinux ->
1287 do let stackOffset' = roundTo 8 stackOffset
1288 stackCode = accumCode `appOL` code
1289 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
1290 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
1291 regCode hireg loreg =
1292 accumCode `appOL` code
1293 `snocOL` MR hireg vr_hi
1294 `snocOL` MR loreg vr_lo
1295
1296 case gprs of
1297 hireg : loreg : regs | even (length gprs) ->
1298 passArguments args regs fprs stackOffset
1299 (regCode hireg loreg) (hireg : loreg : accumUsed)
1300 _skipped : hireg : loreg : regs ->
1301 passArguments args regs fprs stackOffset
1302 (regCode hireg loreg) (hireg : loreg : accumUsed)
1303 _ -> -- only one or no regs left
1304 passArguments args [] fprs (stackOffset'+8)
1305 stackCode accumUsed
1306 GCPLinux64ELF _ -> panic "passArguments: 32 bit code"
1307
1308 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
1309 | reg : _ <- regs = do
1310 register <- getRegister arg
1311 let code = case register of
1312 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1313 Any _ acode -> acode reg
1314 stackOffsetRes = case gcp of
1315 -- The Darwin ABI requires that we reserve
1316 -- stack slots for register parameters
1317 GCPDarwin -> stackOffset + stackBytes
1318 -- ... the SysV ABI 32-bit doesn't.
1319 GCPLinux -> stackOffset
1320 -- ... but SysV ABI 64-bit does.
1321 GCPLinux64ELF _ -> stackOffset + stackBytes
1322 passArguments args
1323 (drop nGprs gprs)
1324 (drop nFprs fprs)
1325 stackOffsetRes
1326 (accumCode `appOL` code)
1327 (reg : accumUsed)
1328 | otherwise = do
1329 (vr, code) <- getSomeReg arg
1330 passArguments args
1331 (drop nGprs gprs)
1332 (drop nFprs fprs)
1333 (stackOffset' + stackBytes)
1334 (accumCode `appOL` code `snocOL` ST (cmmTypeFormat rep) vr stackSlot)
1335 accumUsed
1336 where
1337 stackOffset' = case gcp of
1338 GCPDarwin ->
1339 -- stackOffset is at least 4-byte aligned
1340 -- The Darwin ABI is happy with that.
1341 stackOffset
1342 GCPLinux
1343 -- ... the SysV ABI requires 8-byte
1344 -- alignment for doubles.
1345 | isFloatType rep && typeWidth rep == W64 ->
1346 roundTo 8 stackOffset
1347 | otherwise ->
1348 stackOffset
1349 GCPLinux64ELF _ ->
1350 -- everything on the stack is 8-byte
1351 -- aligned on a 64 bit system
1352 -- (except vector status, not used now)
1353 stackOffset
1354 stackSlot = AddrRegImm sp (ImmInt stackOffset')
1355 (nGprs, nFprs, stackBytes, regs)
1356 = case gcp of
1357 GCPDarwin ->
1358 case cmmTypeFormat rep of
1359 II8 -> (1, 0, 4, gprs)
1360 II16 -> (1, 0, 4, gprs)
1361 II32 -> (1, 0, 4, gprs)
1362 -- The Darwin ABI requires that we skip a
1363 -- corresponding number of GPRs when we use
1364 -- the FPRs.
1365 FF32 -> (1, 1, 4, fprs)
1366 FF64 -> (2, 1, 8, fprs)
1367 II64 -> panic "genCCall' passArguments II64"
1368 FF80 -> panic "genCCall' passArguments FF80"
1369 GCPLinux ->
1370 case cmmTypeFormat rep of
1371 II8 -> (1, 0, 4, gprs)
1372 II16 -> (1, 0, 4, gprs)
1373 II32 -> (1, 0, 4, gprs)
1374 -- ... the SysV ABI doesn't.
1375 FF32 -> (0, 1, 4, fprs)
1376 FF64 -> (0, 1, 8, fprs)
1377 II64 -> panic "genCCall' passArguments II64"
1378 FF80 -> panic "genCCall' passArguments FF80"
1379 GCPLinux64ELF _ ->
1380 case cmmTypeFormat rep of
1381 II8 -> (1, 0, 8, gprs)
1382 II16 -> (1, 0, 8, gprs)
1383 II32 -> (1, 0, 8, gprs)
1384 II64 -> (1, 0, 8, gprs)
1385 -- The ELFv1 ABI requires that we skip a
1386 -- corresponding number of GPRs when we use
1387 -- the FPRs.
1388 FF32 -> (1, 1, 8, fprs)
1389 FF64 -> (1, 1, 8, fprs)
1390 FF80 -> panic "genCCall' passArguments FF80"
1391
1392 moveResult reduceToFF32 =
1393 case dest_regs of
1394 [] -> nilOL
1395 [dest]
1396 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
1397 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1398 | isWord64 rep && target32Bit (targetPlatform dflags)
1399 -> toOL [MR (getHiVRegFromLo r_dest) r3,
1400 MR r_dest r4]
1401 | otherwise -> unitOL (MR r_dest r3)
1402 where rep = cmmRegType dflags (CmmLocal dest)
1403 r_dest = getRegisterReg platform (CmmLocal dest)
1404 _ -> panic "genCCall' moveResult: Bad dest_regs"
1405
1406 outOfLineMachOp mop =
1407 do
1408 dflags <- getDynFlags
1409 mopExpr <- cmmMakeDynamicReference dflags CallReference $
1410 mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
1411 let mopLabelOrExpr = case mopExpr of
1412 CmmLit (CmmLabel lbl) -> Left lbl
1413 _ -> Right mopExpr
1414 return (mopLabelOrExpr, reduce)
1415 where
1416 (functionName, reduce) = case mop of
1417 MO_F32_Exp -> (fsLit "exp", True)
1418 MO_F32_Log -> (fsLit "log", True)
1419 MO_F32_Sqrt -> (fsLit "sqrt", True)
1420
1421 MO_F32_Sin -> (fsLit "sin", True)
1422 MO_F32_Cos -> (fsLit "cos", True)
1423 MO_F32_Tan -> (fsLit "tan", True)
1424
1425 MO_F32_Asin -> (fsLit "asin", True)
1426 MO_F32_Acos -> (fsLit "acos", True)
1427 MO_F32_Atan -> (fsLit "atan", True)
1428
1429 MO_F32_Sinh -> (fsLit "sinh", True)
1430 MO_F32_Cosh -> (fsLit "cosh", True)
1431 MO_F32_Tanh -> (fsLit "tanh", True)
1432 MO_F32_Pwr -> (fsLit "pow", True)
1433
1434 MO_F64_Exp -> (fsLit "exp", False)
1435 MO_F64_Log -> (fsLit "log", False)
1436 MO_F64_Sqrt -> (fsLit "sqrt", False)
1437
1438 MO_F64_Sin -> (fsLit "sin", False)
1439 MO_F64_Cos -> (fsLit "cos", False)
1440 MO_F64_Tan -> (fsLit "tan", False)
1441
1442 MO_F64_Asin -> (fsLit "asin", False)
1443 MO_F64_Acos -> (fsLit "acos", False)
1444 MO_F64_Atan -> (fsLit "atan", False)
1445
1446 MO_F64_Sinh -> (fsLit "sinh", False)
1447 MO_F64_Cosh -> (fsLit "cosh", False)
1448 MO_F64_Tanh -> (fsLit "tanh", False)
1449 MO_F64_Pwr -> (fsLit "pow", False)
1450
1451 MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False)
1452
1453 MO_Memcpy _ -> (fsLit "memcpy", False)
1454 MO_Memset _ -> (fsLit "memset", False)
1455 MO_Memmove _ -> (fsLit "memmove", False)
1456
1457 MO_BSwap w -> (fsLit $ bSwapLabel w, False)
1458 MO_PopCnt w -> (fsLit $ popCntLabel w, False)
1459 MO_Clz w -> (fsLit $ clzLabel w, False)
1460 MO_Ctz w -> (fsLit $ ctzLabel w, False)
1461 MO_AtomicRMW w amop -> (fsLit $ atomicRMWLabel w amop, False)
1462 MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
1463 MO_AtomicRead w -> (fsLit $ atomicReadLabel w, False)
1464 MO_AtomicWrite w -> (fsLit $ atomicWriteLabel w, False)
1465
1466 MO_S_QuotRem {} -> unsupported
1467 MO_U_QuotRem {} -> unsupported
1468 MO_U_QuotRem2 {} -> unsupported
1469 MO_Add2 {} -> unsupported
1470 MO_SubWordC {} -> unsupported
1471 MO_AddIntC {} -> unsupported
1472 MO_SubIntC {} -> unsupported
1473 MO_U_Mul2 {} -> unsupported
1474 MO_WriteBarrier -> unsupported
1475 MO_Touch -> unsupported
1476 (MO_Prefetch_Data _ ) -> unsupported
1477 unsupported = panic ("outOfLineCmmOp: " ++ show mop
1478 ++ " not supported")
1479
1480 -- -----------------------------------------------------------------------------
1481 -- Generating a table-branch
1482
1483 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
1484 genSwitch dflags expr targets
1485 | (gopt Opt_PIC dflags) || (not $ target32Bit $ targetPlatform dflags)
1486 = do
1487 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
1488 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
1489 sha = if target32Bit $ targetPlatform dflags then 2 else 3
1490 tmp <- getNewRegNat fmt
1491 lbl <- getNewLabelNat
1492 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
1493 (tableReg,t_code) <- getSomeReg $ dynRef
1494 let code = e_code `appOL` t_code `appOL` toOL [
1495 SL fmt tmp reg (RIImm (ImmInt sha)),
1496 LD fmt tmp (AddrRegReg tableReg tmp),
1497 ADD tmp tmp (RIReg tableReg),
1498 MTCTR tmp,
1499 BCTR ids (Just lbl)
1500 ]
1501 return code
1502 | otherwise
1503 = do
1504 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
1505 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
1506 sha = if target32Bit $ targetPlatform dflags then 2 else 3
1507 tmp <- getNewRegNat fmt
1508 lbl <- getNewLabelNat
1509 let code = e_code `appOL` toOL [
1510 SL fmt tmp reg (RIImm (ImmInt sha)),
1511 ADDIS tmp tmp (HA (ImmCLbl lbl)),
1512 LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
1513 MTCTR tmp,
1514 BCTR ids (Just lbl)
1515 ]
1516 return code
1517 where (offset, ids) = switchTargetsToTable targets
1518
1519 generateJumpTableForInstr :: DynFlags -> Instr
1520 -> Maybe (NatCmmDecl CmmStatics Instr)
1521 generateJumpTableForInstr dflags (BCTR ids (Just lbl)) =
1522 let jumpTable
1523 | (gopt Opt_PIC dflags)
1524 || (not $ target32Bit $ targetPlatform dflags)
1525 = map jumpTableEntryRel ids
1526 | otherwise = map (jumpTableEntry dflags) ids
1527 where jumpTableEntryRel Nothing
1528 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
1529 jumpTableEntryRel (Just blockid)
1530 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1531 where blockLabel = mkAsmTempLabel (getUnique blockid)
1532 in Just (CmmData (Section ReadOnlyData lbl) (Statics lbl jumpTable))
1533 generateJumpTableForInstr _ _ = Nothing
1534
1535 -- -----------------------------------------------------------------------------
1536 -- 'condIntReg' and 'condFltReg': condition codes into registers
1537
1538 -- Turn those condition codes into integers now (when they appear on
1539 -- the right hand side of an assignment).
1540
1541 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
1542
1543 condReg :: NatM CondCode -> NatM Register
1544 condReg getCond = do
1545 CondCode _ cond cond_code <- getCond
1546 dflags <- getDynFlags
1547 let
1548 code dst = cond_code
1549 `appOL` negate_code
1550 `appOL` toOL [
1551 MFCR dst,
1552 RLWINM dst dst (bit + 1) 31 31
1553 ]
1554
1555 negate_code | do_negate = unitOL (CRNOR bit bit bit)
1556 | otherwise = nilOL
1557
1558 (bit, do_negate) = case cond of
1559 LTT -> (0, False)
1560 LE -> (1, True)
1561 EQQ -> (2, False)
1562 GE -> (0, True)
1563 GTT -> (1, False)
1564
1565 NE -> (2, True)
1566
1567 LU -> (0, False)
1568 LEU -> (1, True)
1569 GEU -> (0, True)
1570 GU -> (1, False)
1571 _ -> panic "PPC.CodeGen.codeReg: no match"
1572
1573 format = archWordFormat $ target32Bit $ targetPlatform dflags
1574 return (Any format code)
1575
1576 condIntReg cond x y = condReg (condIntCode cond x y)
1577 condFltReg cond x y = condReg (condFltCode cond x y)
1578
1579
1580
1581 -- -----------------------------------------------------------------------------
1582 -- 'trivial*Code': deal with trivial instructions
1583
1584 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
1585 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
1586 -- Only look for constants on the right hand side, because that's
1587 -- where the generic optimizer will have put them.
1588
1589 -- Similarly, for unary instructions, we don't have to worry about
1590 -- matching an StInt as the argument, because genericOpt will already
1591 -- have handled the constant-folding.
1592
1593
1594
1595 {-
1596 Wolfgang's PowerPC version of The Rules:
1597
1598 A slightly modified version of The Rules to take advantage of the fact
1599 that PowerPC instructions work on all registers and don't implicitly
1600 clobber any fixed registers.
1601
1602 * The only expression for which getRegister returns Fixed is (CmmReg reg).
1603
1604 * If getRegister returns Any, then the code it generates may modify only:
1605 (a) fresh temporaries
1606 (b) the destination register
1607 It may *not* modify global registers, unless the global
1608 register happens to be the destination register.
1609 It may not clobber any other registers. In fact, only ccalls clobber any
1610 fixed registers.
1611 Also, it may not modify the counter register (used by genCCall).
1612
1613 Corollary: If a getRegister for a subexpression returns Fixed, you need
1614 not move it to a fresh temporary before evaluating the next subexpression.
1615 The Fixed register won't be modified.
1616 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
1617
1618 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
1619 the value of the destination register.
1620 -}
1621
1622 trivialCode
1623 :: Width
1624 -> Bool
1625 -> (Reg -> Reg -> RI -> Instr)
1626 -> CmmExpr
1627 -> CmmExpr
1628 -> NatM Register
1629
1630 trivialCode rep signed instr x (CmmLit (CmmInt y _))
1631 | Just imm <- makeImmediate rep signed y
1632 = do
1633 (src1, code1) <- getSomeReg x
1634 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
1635 return (Any (intFormat rep) code)
1636
1637 trivialCode rep _ instr x y = do
1638 (src1, code1) <- getSomeReg x
1639 (src2, code2) <- getSomeReg y
1640 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
1641 return (Any (intFormat rep) code)
1642
1643 shiftCode
1644 :: Width
1645 -> (Format-> Reg -> Reg -> RI -> Instr)
1646 -> CmmExpr
1647 -> CmmExpr
1648 -> NatM Register
1649 shiftCode width instr x (CmmLit (CmmInt y _))
1650 | Just imm <- makeImmediate width False y
1651 = do
1652 (src1, code1) <- getSomeReg x
1653 let format = intFormat width
1654 let code dst = code1 `snocOL` instr format dst src1 (RIImm imm)
1655 return (Any format code)
1656
1657 shiftCode width instr x y = do
1658 (src1, code1) <- getSomeReg x
1659 (src2, code2) <- getSomeReg y
1660 let format = intFormat width
1661 let code dst = code1 `appOL` code2 `snocOL` instr format dst src1 (RIReg src2)
1662 return (Any format code)
1663
1664 trivialCodeNoImm' :: Format -> (Reg -> Reg -> Reg -> Instr)
1665 -> CmmExpr -> CmmExpr -> NatM Register
1666 trivialCodeNoImm' format instr x y = do
1667 (src1, code1) <- getSomeReg x
1668 (src2, code2) <- getSomeReg y
1669 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
1670 return (Any format code)
1671
1672 trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr)
1673 -> CmmExpr -> CmmExpr -> NatM Register
1674 trivialCodeNoImm format instr x y = trivialCodeNoImm' format (instr format) x y
1675
1676
1677 trivialUCode
1678 :: Format
1679 -> (Reg -> Reg -> Instr)
1680 -> CmmExpr
1681 -> NatM Register
1682 trivialUCode rep instr x = do
1683 (src, code) <- getSomeReg x
1684 let code' dst = code `snocOL` instr dst src
1685 return (Any rep code')
1686
1687 -- There is no "remainder" instruction on the PPC, so we have to do
1688 -- it the hard way.
1689 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
1690
1691 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
1692 -> CmmExpr -> CmmExpr -> NatM Register
1693 remainderCode rep div x y = do
1694 dflags <- getDynFlags
1695 let mull_instr = if target32Bit $ targetPlatform dflags then MULLW
1696 else MULLD
1697 (src1, code1) <- getSomeReg x
1698 (src2, code2) <- getSomeReg y
1699 let code dst = code1 `appOL` code2 `appOL` toOL [
1700 div dst src1 src2,
1701 mull_instr dst dst (RIReg src2),
1702 SUBF dst dst src1
1703 ]
1704 return (Any (intFormat rep) code)
1705
1706 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
1707 coerceInt2FP fromRep toRep x = do
1708 dflags <- getDynFlags
1709 let arch = platformArch $ targetPlatform dflags
1710 coerceInt2FP' arch fromRep toRep x
1711
1712 coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
1713 coerceInt2FP' ArchPPC fromRep toRep x = do
1714 (src, code) <- getSomeReg x
1715 lbl <- getNewLabelNat
1716 itmp <- getNewRegNat II32
1717 ftmp <- getNewRegNat FF64
1718 dflags <- getDynFlags
1719 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
1720 Amode addr addr_code <- getAmode D dynRef
1721 let
1722 code' dst = code `appOL` maybe_exts `appOL` toOL [
1723 LDATA (Section ReadOnlyData lbl) $ Statics lbl
1724 [CmmStaticLit (CmmInt 0x43300000 W32),
1725 CmmStaticLit (CmmInt 0x80000000 W32)],
1726 XORIS itmp src (ImmInt 0x8000),
1727 ST II32 itmp (spRel dflags 3),
1728 LIS itmp (ImmInt 0x4330),
1729 ST II32 itmp (spRel dflags 2),
1730 LD FF64 ftmp (spRel dflags 2)
1731 ] `appOL` addr_code `appOL` toOL [
1732 LD FF64 dst addr,
1733 FSUB FF64 dst ftmp dst
1734 ] `appOL` maybe_frsp dst
1735
1736 maybe_exts = case fromRep of
1737 W8 -> unitOL $ EXTS II8 src src
1738 W16 -> unitOL $ EXTS II16 src src
1739 W32 -> nilOL
1740 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1741
1742 maybe_frsp dst
1743 = case toRep of
1744 W32 -> unitOL $ FRSP dst dst
1745 W64 -> nilOL
1746 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1747
1748 return (Any (floatFormat toRep) code')
1749
1750 -- On an ELF v1 Linux we use the compiler doubleword in the stack frame
1751 -- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only
1752 -- set right before a call and restored right after return from the call.
1753 -- So it is fine.
1754 coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do
1755 (src, code) <- getSomeReg x
1756 dflags <- getDynFlags
1757 let
1758 code' dst = code `appOL` maybe_exts `appOL` toOL [
1759 ST II64 src (spRel dflags 3),
1760 LD FF64 dst (spRel dflags 3),
1761 FCFID dst dst
1762 ] `appOL` maybe_frsp dst
1763
1764 maybe_exts = case fromRep of
1765 W8 -> unitOL $ EXTS II8 src src
1766 W16 -> unitOL $ EXTS II16 src src
1767 W32 -> unitOL $ EXTS II32 src src
1768 W64 -> nilOL
1769 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1770
1771 maybe_frsp dst
1772 = case toRep of
1773 W32 -> unitOL $ FRSP dst dst
1774 W64 -> nilOL
1775 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1776
1777 return (Any (floatFormat toRep) code')
1778
1779 coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch"
1780
1781
1782 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
1783 coerceFP2Int fromRep toRep x = do
1784 dflags <- getDynFlags
1785 let arch = platformArch $ targetPlatform dflags
1786 coerceFP2Int' arch fromRep toRep x
1787
1788 coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
1789 coerceFP2Int' ArchPPC _ toRep x = do
1790 dflags <- getDynFlags
1791 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
1792 (src, code) <- getSomeReg x
1793 tmp <- getNewRegNat FF64
1794 let
1795 code' dst = code `appOL` toOL [
1796 -- convert to int in FP reg
1797 FCTIWZ tmp src,
1798 -- store value (64bit) from FP to stack
1799 ST FF64 tmp (spRel dflags 2),
1800 -- read low word of value (high word is undefined)
1801 LD II32 dst (spRel dflags 3)]
1802 return (Any (intFormat toRep) code')
1803
1804 coerceFP2Int' (ArchPPC_64 _) _ toRep x = do
1805 dflags <- getDynFlags
1806 -- the reps don't really matter: F*->FF64 and II64->I* are no-ops
1807 (src, code) <- getSomeReg x
1808 tmp <- getNewRegNat FF64
1809 let
1810 code' dst = code `appOL` toOL [
1811 -- convert to int in FP reg
1812 FCTIDZ tmp src,
1813 -- store value (64bit) from FP to compiler word on stack
1814 ST FF64 tmp (spRel dflags 3),
1815 LD II64 dst (spRel dflags 3)]
1816 return (Any (intFormat toRep) code')
1817
1818 coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch"
1819
1820 -- Note [.LCTOC1 in PPC PIC code]
1821 -- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
1822 -- to make the most of the PPC's 16-bit displacements.
1823 -- As 16-bit signed offset is used (usually via addi/lwz instructions)
1824 -- first element will have '-32768' offset against .LCTOC1.
1825
1826 -- Note [implicit register in PPC PIC code]
1827 -- PPC generates calls by labels in assembly
1828 -- in form of:
1829 -- bl puts+32768@plt
1830 -- in this form it's not seen directly (by GHC NCG)
1831 -- that r30 (PicBaseReg) is used,
1832 -- but r30 is a required part of PLT code setup:
1833 -- puts+32768@plt:
1834 -- lwz r11,-30484(r30) ; offset in .LCTOC1
1835 -- mtctr r11
1836 -- bctr