516a49aee3cca9ae3a62de7d3ad215e3eda5e29e
[ghc.git] / compiler / nativeGen / PPC / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2004
8 --
9 -----------------------------------------------------------------------------
10
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, and (b) the type signatures,
13 -- the structure should not be too overwhelming.
14
15 module PPC.CodeGen (
16 cmmTopCodeGen,
17 generateJumpTableForInstr,
18 InstrBlock
19 )
20
21 where
22
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
25 #include "../includes/MachDeps.h"
26
27 -- NCG stuff:
28 import GhcPrelude
29
30 import CodeGen.Platform
31 import PPC.Instr
32 import PPC.Cond
33 import PPC.Regs
34 import CPrim
35 import NCGMonad ( NatM, getNewRegNat, getNewLabelNat
36 , getBlockIdNat, getPicBaseNat, getNewRegPairNat
37 , getPicBaseMaybeNat )
38 import Instruction
39 import PIC
40 import Format
41 import RegClass
42 import Reg
43 import TargetReg
44 import Platform
45
46 -- Our intermediate code:
47 import BlockId
48 import PprCmm ( pprExpr )
49 import Cmm
50 import CmmUtils
51 import CmmSwitch
52 import CLabel
53 import Hoopl.Block
54 import Hoopl.Graph
55
56 -- The rest:
57 import OrdList
58 import Outputable
59 import DynFlags
60
61 import Control.Monad ( mapAndUnzipM, when )
62 import Data.Bits
63 import Data.Word
64
65 import BasicTypes
66 import FastString
67 import Util
68
69 -- -----------------------------------------------------------------------------
70 -- Top-level of the instruction selector
71
72 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
73 -- They are really trees of insns to facilitate fast appending, where a
74 -- left-to-right traversal (pre-order?) yields the insns in the correct
75 -- order.
76
77 cmmTopCodeGen
78 :: RawCmmDecl
79 -> NatM [NatCmmDecl CmmStatics Instr]
80
81 cmmTopCodeGen (CmmProc info lab live graph) = do
82 let blocks = toBlockListEntryFirst graph
83 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
84 dflags <- getDynFlags
85 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
86 tops = proc : concat statics
87 os = platformOS $ targetPlatform dflags
88 arch = platformArch $ targetPlatform dflags
89 case arch of
90 ArchPPC | os == OSAIX -> return tops
91 | otherwise -> do
92 picBaseMb <- getPicBaseMaybeNat
93 case picBaseMb of
94 Just picBase -> initializePicBase_ppc arch os picBase tops
95 Nothing -> return tops
96 ArchPPC_64 ELF_V1 -> fixup_entry tops
97 -- generating function descriptor is handled in
98 -- pretty printer
99 ArchPPC_64 ELF_V2 -> fixup_entry tops
100 -- generating function prologue is handled in
101 -- pretty printer
102 _ -> panic "PPC.cmmTopCodeGen: unknown arch"
103 where
104 fixup_entry (CmmProc info lab live (ListGraph (entry:blocks)) : statics)
105 = do
106 let BasicBlock bID insns = entry
107 bID' <- if lab == (blockLbl bID)
108 then newBlockId
109 else return bID
110 let b' = BasicBlock bID' insns
111 return (CmmProc info lab live (ListGraph (b':blocks)) : statics)
112 fixup_entry _ = panic "cmmTopCodegen: Broken CmmProc"
113
114 cmmTopCodeGen (CmmData sec dat) = do
115 return [CmmData sec dat] -- no translation, we just use CmmStatic
116
117 basicBlockCodeGen
118 :: Block CmmNode C C
119 -> NatM ( [NatBasicBlock Instr]
120 , [NatCmmDecl CmmStatics Instr])
121
122 basicBlockCodeGen block = do
123 let (_, nodes, tail) = blockSplit block
124 id = entryLabel block
125 stmts = blockToList nodes
126 mid_instrs <- stmtsToInstrs stmts
127 tail_instrs <- stmtToInstrs tail
128 let instrs = mid_instrs `appOL` tail_instrs
129 -- code generation may introduce new basic block boundaries, which
130 -- are indicated by the NEWBLOCK instruction. We must split up the
131 -- instruction stream into basic blocks again. Also, we extract
132 -- LDATAs here too.
133 let
134 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
135
136 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
137 = ([], BasicBlock id instrs : blocks, statics)
138 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
139 = (instrs, blocks, CmmData sec dat:statics)
140 mkBlocks instr (instrs,blocks,statics)
141 = (instr:instrs, blocks, statics)
142 return (BasicBlock id top : other_blocks, statics)
143
144 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
145 stmtsToInstrs stmts
146 = do instrss <- mapM stmtToInstrs stmts
147 return (concatOL instrss)
148
149 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
150 stmtToInstrs stmt = do
151 dflags <- getDynFlags
152 case stmt of
153 CmmComment s -> return (unitOL (COMMENT s))
154 CmmTick {} -> return nilOL
155 CmmUnwind {} -> return nilOL
156
157 CmmAssign reg src
158 | isFloatType ty -> assignReg_FltCode format reg src
159 | target32Bit (targetPlatform dflags) &&
160 isWord64 ty -> assignReg_I64Code reg src
161 | otherwise -> assignReg_IntCode format reg src
162 where ty = cmmRegType dflags reg
163 format = cmmTypeFormat ty
164
165 CmmStore addr src
166 | isFloatType ty -> assignMem_FltCode format addr src
167 | target32Bit (targetPlatform dflags) &&
168 isWord64 ty -> assignMem_I64Code addr src
169 | otherwise -> assignMem_IntCode format addr src
170 where ty = cmmExprType dflags src
171 format = cmmTypeFormat ty
172
173 CmmUnsafeForeignCall target result_regs args
174 -> genCCall target result_regs args
175
176 CmmBranch id -> genBranch id
177 CmmCondBranch arg true false prediction -> do
178 b1 <- genCondJump true arg prediction
179 b2 <- genBranch false
180 return (b1 `appOL` b2)
181 CmmSwitch arg ids -> do dflags <- getDynFlags
182 genSwitch dflags arg ids
183 CmmCall { cml_target = arg } -> genJump arg
184 _ ->
185 panic "stmtToInstrs: statement should have been cps'd away"
186
187
188 --------------------------------------------------------------------------------
189 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
190 -- They are really trees of insns to facilitate fast appending, where a
191 -- left-to-right traversal yields the insns in the correct order.
192 --
193 type InstrBlock
194 = OrdList Instr
195
196
197 -- | Register's passed up the tree. If the stix code forces the register
198 -- to live in a pre-decided machine register, it comes out as @Fixed@;
199 -- otherwise, it comes out as @Any@, and the parent can decide which
200 -- register to put it in.
201 --
202 data Register
203 = Fixed Format Reg InstrBlock
204 | Any Format (Reg -> InstrBlock)
205
206
207 swizzleRegisterRep :: Register -> Format -> Register
208 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
209 swizzleRegisterRep (Any _ codefn) format = Any format codefn
210
211
212 -- | Grab the Reg for a CmmReg
213 getRegisterReg :: Platform -> CmmReg -> Reg
214
215 getRegisterReg _ (CmmLocal (LocalReg u pk))
216 = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
217
218 getRegisterReg platform (CmmGlobal mid)
219 = case globalRegMaybe platform mid of
220 Just reg -> RegReal reg
221 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
222 -- By this stage, the only MagicIds remaining should be the
223 -- ones which map to a real machine register on this
224 -- platform. Hence ...
225
226 -- | Convert a BlockId to some CmmStatic data
227 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
228 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
229 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
230 where blockLabel = blockLbl blockid
231
232
233
234 -- -----------------------------------------------------------------------------
235 -- General things for putting together code sequences
236
237 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
238 -- CmmExprs into CmmRegOff?
239 mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr
240 mangleIndexTree dflags (CmmRegOff reg off)
241 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
242 where width = typeWidth (cmmRegType dflags reg)
243
244 mangleIndexTree _ _
245 = panic "PPC.CodeGen.mangleIndexTree: no match"
246
247 -- -----------------------------------------------------------------------------
248 -- Code gen for 64-bit arithmetic on 32-bit platforms
249
250 {-
251 Simple support for generating 64-bit code (ie, 64 bit values and 64
252 bit assignments) on 32-bit platforms. Unlike the main code generator
253 we merely shoot for generating working code as simply as possible, and
254 pay little attention to code quality. Specifically, there is no
255 attempt to deal cleverly with the fixed-vs-floating register
256 distinction; all values are generated into (pairs of) floating
257 registers, even if this would mean some redundant reg-reg moves as a
258 result. Only one of the VRegUniques is returned, since it will be
259 of the VRegUniqueLo form, and the upper-half VReg can be determined
260 by applying getHiVRegFromLo to it.
261 -}
262
263 data ChildCode64 -- a.k.a "Register64"
264 = ChildCode64
265 InstrBlock -- code
266 Reg -- the lower 32-bit temporary which contains the
267 -- result; use getHiVRegFromLo to find the other
268 -- VRegUnique. Rules of this simplified insn
269 -- selection game are therefore that the returned
270 -- Reg may be modified
271
272
273 -- | Compute an expression into a register, but
274 -- we don't mind which one it is.
275 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
276 getSomeReg expr = do
277 r <- getRegister expr
278 case r of
279 Any rep code -> do
280 tmp <- getNewRegNat rep
281 return (tmp, code tmp)
282 Fixed _ reg code ->
283 return (reg, code)
284
285 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
286 getI64Amodes addrTree = do
287 Amode hi_addr addr_code <- getAmode D addrTree
288 case addrOffset hi_addr 4 of
289 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
290 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
291 return (AddrRegImm hi_ptr (ImmInt 0),
292 AddrRegImm hi_ptr (ImmInt 4),
293 code)
294
295
296 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
297 assignMem_I64Code addrTree valueTree = do
298 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
299 ChildCode64 vcode rlo <- iselExpr64 valueTree
300 let
301 rhi = getHiVRegFromLo rlo
302
303 -- Big-endian store
304 mov_hi = ST II32 rhi hi_addr
305 mov_lo = ST II32 rlo lo_addr
306 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
307
308
309 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
310 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
311 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
312 let
313 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
314 r_dst_hi = getHiVRegFromLo r_dst_lo
315 r_src_hi = getHiVRegFromLo r_src_lo
316 mov_lo = MR r_dst_lo r_src_lo
317 mov_hi = MR r_dst_hi r_src_hi
318 return (
319 vcode `snocOL` mov_lo `snocOL` mov_hi
320 )
321
322 assignReg_I64Code _ _
323 = panic "assignReg_I64Code(powerpc): invalid lvalue"
324
325
326 iselExpr64 :: CmmExpr -> NatM ChildCode64
327 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
328 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
329 (rlo, rhi) <- getNewRegPairNat II32
330 let mov_hi = LD II32 rhi hi_addr
331 mov_lo = LD II32 rlo lo_addr
332 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
333 rlo
334
335 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
336 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
337
338 iselExpr64 (CmmLit (CmmInt i _)) = do
339 (rlo,rhi) <- getNewRegPairNat II32
340 let
341 half0 = fromIntegral (fromIntegral i :: Word16)
342 half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
343 half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16)
344 half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16)
345
346 code = toOL [
347 LIS rlo (ImmInt half1),
348 OR rlo rlo (RIImm $ ImmInt half0),
349 LIS rhi (ImmInt half3),
350 OR rhi rhi (RIImm $ ImmInt half2)
351 ]
352 return (ChildCode64 code rlo)
353
354 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
355 ChildCode64 code1 r1lo <- iselExpr64 e1
356 ChildCode64 code2 r2lo <- iselExpr64 e2
357 (rlo,rhi) <- getNewRegPairNat II32
358 let
359 r1hi = getHiVRegFromLo r1lo
360 r2hi = getHiVRegFromLo r2lo
361 code = code1 `appOL`
362 code2 `appOL`
363 toOL [ ADDC rlo r1lo r2lo,
364 ADDE rhi r1hi r2hi ]
365 return (ChildCode64 code rlo)
366
367 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
368 ChildCode64 code1 r1lo <- iselExpr64 e1
369 ChildCode64 code2 r2lo <- iselExpr64 e2
370 (rlo,rhi) <- getNewRegPairNat II32
371 let
372 r1hi = getHiVRegFromLo r1lo
373 r2hi = getHiVRegFromLo r2lo
374 code = code1 `appOL`
375 code2 `appOL`
376 toOL [ SUBFC rlo r2lo (RIReg r1lo),
377 SUBFE rhi r2hi r1hi ]
378 return (ChildCode64 code rlo)
379
380 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
381 (expr_reg,expr_code) <- getSomeReg expr
382 (rlo, rhi) <- getNewRegPairNat II32
383 let mov_hi = LI rhi (ImmInt 0)
384 mov_lo = MR rlo expr_reg
385 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
386 rlo
387
388 iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
389 (expr_reg,expr_code) <- getSomeReg expr
390 (rlo, rhi) <- getNewRegPairNat II32
391 let mov_hi = SRA II32 rhi expr_reg (RIImm (ImmInt 31))
392 mov_lo = MR rlo expr_reg
393 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
394 rlo
395 iselExpr64 expr
396 = pprPanic "iselExpr64(powerpc)" (pprExpr expr)
397
398
399
400 getRegister :: CmmExpr -> NatM Register
401 getRegister e = do dflags <- getDynFlags
402 getRegister' dflags e
403
404 getRegister' :: DynFlags -> CmmExpr -> NatM Register
405
406 getRegister' dflags (CmmReg (CmmGlobal PicBaseReg))
407 | OSAIX <- platformOS (targetPlatform dflags) = do
408 let code dst = toOL [ LD II32 dst tocAddr ]
409 tocAddr = AddrRegImm toc (ImmLit (text "ghc_toc_table[TC]"))
410 return (Any II32 code)
411 | target32Bit (targetPlatform dflags) = do
412 reg <- getPicBaseNat $ archWordFormat (target32Bit (targetPlatform dflags))
413 return (Fixed (archWordFormat (target32Bit (targetPlatform dflags)))
414 reg nilOL)
415 | otherwise = return (Fixed II64 toc nilOL)
416
417 getRegister' dflags (CmmReg reg)
418 = return (Fixed (cmmTypeFormat (cmmRegType dflags reg))
419 (getRegisterReg (targetPlatform dflags) reg) nilOL)
420
421 getRegister' dflags tree@(CmmRegOff _ _)
422 = getRegister' dflags (mangleIndexTree dflags tree)
423
424 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
425 -- TO_W_(x), TO_W_(x >> 32)
426
427 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32)
428 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
429 | target32Bit (targetPlatform dflags) = do
430 ChildCode64 code rlo <- iselExpr64 x
431 return $ Fixed II32 (getHiVRegFromLo rlo) code
432
433 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32)
434 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
435 | target32Bit (targetPlatform dflags) = do
436 ChildCode64 code rlo <- iselExpr64 x
437 return $ Fixed II32 (getHiVRegFromLo rlo) code
438
439 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x])
440 | target32Bit (targetPlatform dflags) = do
441 ChildCode64 code rlo <- iselExpr64 x
442 return $ Fixed II32 rlo code
443
444 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x])
445 | target32Bit (targetPlatform dflags) = do
446 ChildCode64 code rlo <- iselExpr64 x
447 return $ Fixed II32 rlo code
448
449 getRegister' dflags (CmmLoad mem pk)
450 | not (isWord64 pk) = do
451 let platform = targetPlatform dflags
452 Amode addr addr_code <- getAmode D mem
453 let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk)
454 addr_code `snocOL` LD format dst addr
455 return (Any format code)
456 | not (target32Bit (targetPlatform dflags)) = do
457 Amode addr addr_code <- getAmode DS mem
458 let code dst = addr_code `snocOL` LD II64 dst addr
459 return (Any II64 code)
460
461 where format = cmmTypeFormat pk
462
463 -- catch simple cases of zero- or sign-extended load
464 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
465 Amode addr addr_code <- getAmode D mem
466 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
467
468 getRegister' _ (CmmMachOp (MO_XX_Conv W8 W32) [CmmLoad mem _]) = do
469 Amode addr addr_code <- getAmode D mem
470 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
471
472 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
473 Amode addr addr_code <- getAmode D mem
474 return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
475
476 getRegister' _ (CmmMachOp (MO_XX_Conv W8 W64) [CmmLoad mem _]) = do
477 Amode addr addr_code <- getAmode D mem
478 return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
479
480 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
481
482 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
483 Amode addr addr_code <- getAmode D mem
484 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
485
486 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
487 Amode addr addr_code <- getAmode D mem
488 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
489
490 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
491 Amode addr addr_code <- getAmode D mem
492 return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
493
494 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
495 Amode addr addr_code <- getAmode D mem
496 return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
497
498 getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
499 Amode addr addr_code <- getAmode D mem
500 return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
501
502 getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
503 -- lwa is DS-form. See Note [Power instruction format]
504 Amode addr addr_code <- getAmode DS mem
505 return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
506
507 getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
508 = case mop of
509 MO_Not rep -> triv_ucode_int rep NOT
510
511 MO_F_Neg w -> triv_ucode_float w FNEG
512 MO_S_Neg w -> triv_ucode_int w NEG
513
514 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
515 MO_FF_Conv W32 W64 -> conversionNop FF64 x
516
517 MO_FS_Conv from to -> coerceFP2Int from to x
518 MO_SF_Conv from to -> coerceInt2FP from to x
519
520 MO_SS_Conv from to
521 | from >= to -> conversionNop (intFormat to) x
522 | otherwise -> triv_ucode_int to (EXTS (intFormat from))
523
524 MO_UU_Conv from to
525 | from >= to -> conversionNop (intFormat to) x
526 | otherwise -> clearLeft from to
527
528 MO_XX_Conv _ to -> conversionNop (intFormat to) x
529
530 _ -> panic "PPC.CodeGen.getRegister: no match"
531
532 where
533 triv_ucode_int width instr = trivialUCode (intFormat width) instr x
534 triv_ucode_float width instr = trivialUCode (floatFormat width) instr x
535
536 conversionNop new_format expr
537 = do e_code <- getRegister' dflags expr
538 return (swizzleRegisterRep e_code new_format)
539
540 clearLeft from to
541 = do (src1, code1) <- getSomeReg x
542 let arch_fmt = intFormat (wordWidth dflags)
543 arch_bits = widthInBits (wordWidth dflags)
544 size = widthInBits from
545 code dst = code1 `snocOL`
546 CLRLI arch_fmt dst src1 (arch_bits - size)
547 return (Any (intFormat to) code)
548
549 getRegister' _ (CmmMachOp mop [x, y]) -- dyadic PrimOps
550 = case mop of
551 MO_F_Eq _ -> condFltReg EQQ x y
552 MO_F_Ne _ -> condFltReg NE x y
553 MO_F_Gt _ -> condFltReg GTT x y
554 MO_F_Ge _ -> condFltReg GE x y
555 MO_F_Lt _ -> condFltReg LTT x y
556 MO_F_Le _ -> condFltReg LE x y
557
558 MO_Eq rep -> condIntReg EQQ rep x y
559 MO_Ne rep -> condIntReg NE rep x y
560
561 MO_S_Gt rep -> condIntReg GTT rep x y
562 MO_S_Ge rep -> condIntReg GE rep x y
563 MO_S_Lt rep -> condIntReg LTT rep x y
564 MO_S_Le rep -> condIntReg LE rep x y
565
566 MO_U_Gt rep -> condIntReg GU rep x y
567 MO_U_Ge rep -> condIntReg GEU rep x y
568 MO_U_Lt rep -> condIntReg LU rep x y
569 MO_U_Le rep -> condIntReg LEU rep x y
570
571 MO_F_Add w -> triv_float w FADD
572 MO_F_Sub w -> triv_float w FSUB
573 MO_F_Mul w -> triv_float w FMUL
574 MO_F_Quot w -> triv_float w FDIV
575
576 -- optimize addition with 32-bit immediate
577 -- (needed for PIC)
578 MO_Add W32 ->
579 case y of
580 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True imm
581 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
582 CmmLit lit
583 -> do
584 (src, srcCode) <- getSomeReg x
585 let imm = litToImm lit
586 code dst = srcCode `appOL` toOL [
587 ADDIS dst src (HA imm),
588 ADD dst dst (RIImm (LO imm))
589 ]
590 return (Any II32 code)
591 _ -> trivialCode W32 True ADD x y
592
593 MO_Add rep -> trivialCode rep True ADD x y
594 MO_Sub rep ->
595 case y of
596 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
597 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
598 _ -> case x of
599 CmmLit (CmmInt imm _)
600 | Just _ <- makeImmediate rep True imm
601 -- subfi ('substract from' with immediate) doesn't exist
602 -> trivialCode rep True SUBFC y x
603 _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
604
605 MO_Mul rep -> shiftMulCode rep True MULL x y
606 MO_S_MulMayOflo rep -> do
607 (src1, code1) <- getSomeReg x
608 (src2, code2) <- getSomeReg y
609 let
610 format = intFormat rep
611 code dst = code1 `appOL` code2
612 `appOL` toOL [ MULLO format dst src1 src2
613 , MFOV format dst
614 ]
615 return (Any format code)
616
617 MO_S_Quot rep -> divCode rep True x y
618 MO_U_Quot rep -> divCode rep False x y
619
620 MO_S_Rem rep -> remainder rep True x y
621 MO_U_Rem rep -> remainder rep False x y
622
623 MO_And rep -> case y of
624 (CmmLit (CmmInt imm _)) | imm == -8 || imm == -4
625 -> do
626 (src, srcCode) <- getSomeReg x
627 let clear_mask = if imm == -4 then 2 else 3
628 fmt = intFormat rep
629 code dst = srcCode
630 `appOL` unitOL (CLRRI fmt dst src clear_mask)
631 return (Any fmt code)
632 _ -> trivialCode rep False AND x y
633 MO_Or rep -> trivialCode rep False OR x y
634 MO_Xor rep -> trivialCode rep False XOR x y
635
636 MO_Shl rep -> shiftMulCode rep False SL x y
637 MO_S_Shr rep -> srCode rep True SRA x y
638 MO_U_Shr rep -> srCode rep False SR x y
639 _ -> panic "PPC.CodeGen.getRegister: no match"
640
641 where
642 triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register
643 triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y
644
645 remainder :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register
646 remainder rep sgn x y = do
647 let fmt = intFormat rep
648 tmp <- getNewRegNat fmt
649 code <- remainderCode rep sgn tmp x y
650 return (Any fmt code)
651
652
653 getRegister' _ (CmmLit (CmmInt i rep))
654 | Just imm <- makeImmediate rep True i
655 = let
656 code dst = unitOL (LI dst imm)
657 in
658 return (Any (intFormat rep) code)
659
660 getRegister' _ (CmmLit (CmmFloat f frep)) = do
661 lbl <- getNewLabelNat
662 dflags <- getDynFlags
663 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
664 Amode addr addr_code <- getAmode D dynRef
665 let format = floatFormat frep
666 code dst =
667 LDATA (Section ReadOnlyData lbl)
668 (Statics lbl [CmmStaticLit (CmmFloat f frep)])
669 `consOL` (addr_code `snocOL` LD format dst addr)
670 return (Any format code)
671
672 getRegister' dflags (CmmLit lit)
673 | target32Bit (targetPlatform dflags)
674 = let rep = cmmLitType dflags lit
675 imm = litToImm lit
676 code dst = toOL [
677 LIS dst (HA imm),
678 ADD dst dst (RIImm (LO imm))
679 ]
680 in return (Any (cmmTypeFormat rep) code)
681 | otherwise
682 = do lbl <- getNewLabelNat
683 dflags <- getDynFlags
684 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
685 Amode addr addr_code <- getAmode D dynRef
686 let rep = cmmLitType dflags lit
687 format = cmmTypeFormat rep
688 code dst =
689 LDATA (Section ReadOnlyData lbl) (Statics lbl [CmmStaticLit lit])
690 `consOL` (addr_code `snocOL` LD format dst addr)
691 return (Any format code)
692
693 getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other)
694
695 -- extend?Rep: wrap integer expression of type `from`
696 -- in a conversion to `to`
697 extendSExpr :: Width -> Width -> CmmExpr -> CmmExpr
698 extendSExpr from to x = CmmMachOp (MO_SS_Conv from to) [x]
699
700 extendUExpr :: Width -> Width -> CmmExpr -> CmmExpr
701 extendUExpr from to x = CmmMachOp (MO_UU_Conv from to) [x]
702
703 -- -----------------------------------------------------------------------------
704 -- The 'Amode' type: Memory addressing modes passed up the tree.
705
706 data Amode
707 = Amode AddrMode InstrBlock
708
709 {-
710 Now, given a tree (the argument to a CmmLoad) that references memory,
711 produce a suitable addressing mode.
712
713 A Rule of the Game (tm) for Amodes: use of the addr bit must
714 immediately follow use of the code part, since the code part puts
715 values in registers which the addr then refers to. So you can't put
716 anything in between, lest it overwrite some of those registers. If
717 you need to do some other computation between the code part and use of
718 the addr bit, first store the effective address from the amode in a
719 temporary, then do the other computation, and then use the temporary:
720
721 code
722 LEA amode, tmp
723 ... other computation ...
724 ... (tmp) ...
725 -}
726
727 {- Note [Power instruction format]
728 In some instructions the 16 bit offset must be a multiple of 4, i.e.
729 the two least significant bits must be zero. The "Power ISA" specification
730 calls these instruction formats "DS-FORM" and the instructions with
731 arbitrary 16 bit offsets are "D-FORM".
732
733 The Power ISA specification document can be obtained from www.power.org.
734 -}
735 data InstrForm = D | DS
736
737 getAmode :: InstrForm -> CmmExpr -> NatM Amode
738 getAmode inf tree@(CmmRegOff _ _)
739 = do dflags <- getDynFlags
740 getAmode inf (mangleIndexTree dflags tree)
741
742 getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
743 | Just off <- makeImmediate W32 True (-i)
744 = do
745 (reg, code) <- getSomeReg x
746 return (Amode (AddrRegImm reg off) code)
747
748
749 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
750 | Just off <- makeImmediate W32 True i
751 = do
752 (reg, code) <- getSomeReg x
753 return (Amode (AddrRegImm reg off) code)
754
755 getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
756 | Just off <- makeImmediate W64 True (-i)
757 = do
758 (reg, code) <- getSomeReg x
759 return (Amode (AddrRegImm reg off) code)
760
761
762 getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
763 | Just off <- makeImmediate W64 True i
764 = do
765 (reg, code) <- getSomeReg x
766 return (Amode (AddrRegImm reg off) code)
767
768 getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
769 | Just off <- makeImmediate W64 True (-i)
770 = do
771 (reg, code) <- getSomeReg x
772 (reg', off', code') <-
773 if i `mod` 4 == 0
774 then do return (reg, off, code)
775 else do
776 tmp <- getNewRegNat II64
777 return (tmp, ImmInt 0,
778 code `snocOL` ADD tmp reg (RIImm off))
779 return (Amode (AddrRegImm reg' off') code')
780
781 getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
782 | Just off <- makeImmediate W64 True i
783 = do
784 (reg, code) <- getSomeReg x
785 (reg', off', code') <-
786 if i `mod` 4 == 0
787 then do return (reg, off, code)
788 else do
789 tmp <- getNewRegNat II64
790 return (tmp, ImmInt 0,
791 code `snocOL` ADD tmp reg (RIImm off))
792 return (Amode (AddrRegImm reg' off') code')
793
794 -- optimize addition with 32-bit immediate
795 -- (needed for PIC)
796 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit])
797 = do
798 dflags <- getDynFlags
799 (src, srcCode) <- getSomeReg x
800 let imm = litToImm lit
801 case () of
802 _ | OSAIX <- platformOS (targetPlatform dflags)
803 , isCmmLabelType lit ->
804 -- HA16/LO16 relocations on labels not supported on AIX
805 return (Amode (AddrRegImm src imm) srcCode)
806 | otherwise -> do
807 tmp <- getNewRegNat II32
808 let code = srcCode `snocOL` ADDIS tmp src (HA imm)
809 return (Amode (AddrRegImm tmp (LO imm)) code)
810 where
811 isCmmLabelType (CmmLabel {}) = True
812 isCmmLabelType (CmmLabelOff {}) = True
813 isCmmLabelType (CmmLabelDiffOff {}) = True
814 isCmmLabelType _ = False
815
816 getAmode _ (CmmLit lit)
817 = do
818 dflags <- getDynFlags
819 case platformArch $ targetPlatform dflags of
820 ArchPPC -> do
821 tmp <- getNewRegNat II32
822 let imm = litToImm lit
823 code = unitOL (LIS tmp (HA imm))
824 return (Amode (AddrRegImm tmp (LO imm)) code)
825 _ -> do -- TODO: Load from TOC,
826 -- see getRegister' _ (CmmLit lit)
827 tmp <- getNewRegNat II64
828 let imm = litToImm lit
829 code = toOL [
830 LIS tmp (HIGHESTA imm),
831 OR tmp tmp (RIImm (HIGHERA imm)),
832 SL II64 tmp tmp (RIImm (ImmInt 32)),
833 ORIS tmp tmp (HA imm)
834 ]
835 return (Amode (AddrRegImm tmp (LO imm)) code)
836
837 getAmode _ (CmmMachOp (MO_Add W32) [x, y])
838 = do
839 (regX, codeX) <- getSomeReg x
840 (regY, codeY) <- getSomeReg y
841 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
842
843 getAmode _ (CmmMachOp (MO_Add W64) [x, y])
844 = do
845 (regX, codeX) <- getSomeReg x
846 (regY, codeY) <- getSomeReg y
847 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
848
849 getAmode _ other
850 = do
851 (reg, code) <- getSomeReg other
852 let
853 off = ImmInt 0
854 return (Amode (AddrRegImm reg off) code)
855
856
857 -- The 'CondCode' type: Condition codes passed up the tree.
858 data CondCode
859 = CondCode Bool Cond InstrBlock
860
861 -- Set up a condition code for a conditional branch.
862
863 getCondCode :: CmmExpr -> NatM CondCode
864
865 -- almost the same as everywhere else - but we need to
866 -- extend small integers to 32 bit or 64 bit first
867
868 getCondCode (CmmMachOp mop [x, y])
869 = do
870 case mop of
871 MO_F_Eq W32 -> condFltCode EQQ x y
872 MO_F_Ne W32 -> condFltCode NE x y
873 MO_F_Gt W32 -> condFltCode GTT x y
874 MO_F_Ge W32 -> condFltCode GE x y
875 MO_F_Lt W32 -> condFltCode LTT x y
876 MO_F_Le W32 -> condFltCode LE x y
877
878 MO_F_Eq W64 -> condFltCode EQQ x y
879 MO_F_Ne W64 -> condFltCode NE x y
880 MO_F_Gt W64 -> condFltCode GTT x y
881 MO_F_Ge W64 -> condFltCode GE x y
882 MO_F_Lt W64 -> condFltCode LTT x y
883 MO_F_Le W64 -> condFltCode LE x y
884
885 MO_Eq rep -> condIntCode EQQ rep x y
886 MO_Ne rep -> condIntCode NE rep x y
887
888 MO_S_Gt rep -> condIntCode GTT rep x y
889 MO_S_Ge rep -> condIntCode GE rep x y
890 MO_S_Lt rep -> condIntCode LTT rep x y
891 MO_S_Le rep -> condIntCode LE rep x y
892
893 MO_U_Gt rep -> condIntCode GU rep x y
894 MO_U_Ge rep -> condIntCode GEU rep x y
895 MO_U_Lt rep -> condIntCode LU rep x y
896 MO_U_Le rep -> condIntCode LEU rep x y
897
898 _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
899
900 getCondCode _ = panic "getCondCode(2)(powerpc)"
901
902
903 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
904 -- passed back up the tree.
905
906 condIntCode :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode
907 condIntCode cond width x y = do
908 dflags <- getDynFlags
909 condIntCode' (target32Bit (targetPlatform dflags)) cond width x y
910
911 condIntCode' :: Bool -> Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode
912
913 -- simple code for 64-bit on 32-bit platforms
914 condIntCode' True cond W64 x y
915 | condUnsigned cond
916 = do
917 ChildCode64 code_x x_lo <- iselExpr64 x
918 ChildCode64 code_y y_lo <- iselExpr64 y
919 let x_hi = getHiVRegFromLo x_lo
920 y_hi = getHiVRegFromLo y_lo
921 end_lbl <- getBlockIdNat
922 let code = code_x `appOL` code_y `appOL` toOL
923 [ CMPL II32 x_hi (RIReg y_hi)
924 , BCC NE end_lbl Nothing
925 , CMPL II32 x_lo (RIReg y_lo)
926 , BCC ALWAYS end_lbl Nothing
927
928 , NEWBLOCK end_lbl
929 ]
930 return (CondCode False cond code)
931 | otherwise
932 = do
933 ChildCode64 code_x x_lo <- iselExpr64 x
934 ChildCode64 code_y y_lo <- iselExpr64 y
935 let x_hi = getHiVRegFromLo x_lo
936 y_hi = getHiVRegFromLo y_lo
937 end_lbl <- getBlockIdNat
938 cmp_lo <- getBlockIdNat
939 let code = code_x `appOL` code_y `appOL` toOL
940 [ CMP II32 x_hi (RIReg y_hi)
941 , BCC NE end_lbl Nothing
942 , CMP II32 x_hi (RIImm (ImmInt 0))
943 , BCC LE cmp_lo Nothing
944 , CMPL II32 x_lo (RIReg y_lo)
945 , BCC ALWAYS end_lbl Nothing
946 , CMPL II32 y_lo (RIReg x_lo)
947 , BCC ALWAYS end_lbl Nothing
948
949 , NEWBLOCK end_lbl
950 ]
951 return (CondCode False cond code)
952
953 -- optimize pointer tag checks. Operation andi. sets condition register
954 -- so cmpi ..., 0 is redundant.
955 condIntCode' _ cond _ (CmmMachOp (MO_And _) [x, CmmLit (CmmInt imm rep)])
956 (CmmLit (CmmInt 0 _))
957 | not $ condUnsigned cond,
958 Just src2 <- makeImmediate rep False imm
959 = do
960 (src1, code) <- getSomeReg x
961 let code' = code `snocOL` AND r0 src1 (RIImm src2)
962 return (CondCode False cond code')
963
964 condIntCode' _ cond width x (CmmLit (CmmInt y rep))
965 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
966 = do
967 let op_len = max W32 width
968 let extend = extendSExpr width op_len
969 (src1, code) <- getSomeReg (extend x)
970 let format = intFormat op_len
971 code' = code `snocOL`
972 (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2)
973 return (CondCode False cond code')
974
975 condIntCode' _ cond width x y = do
976 let op_len = max W32 width
977 let extend = if condUnsigned cond then extendUExpr width op_len
978 else extendSExpr width op_len
979 (src1, code1) <- getSomeReg (extend x)
980 (src2, code2) <- getSomeReg (extend y)
981 let format = intFormat op_len
982 code' = code1 `appOL` code2 `snocOL`
983 (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2)
984 return (CondCode False cond code')
985
986 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
987 condFltCode cond x y = do
988 (src1, code1) <- getSomeReg x
989 (src2, code2) <- getSomeReg y
990 let
991 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
992 code'' = case cond of -- twiddle CR to handle unordered case
993 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
994 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
995 _ -> code'
996 where
997 ltbit = 0 ; eqbit = 2 ; gtbit = 1
998 return (CondCode True cond code'')
999
1000
1001
1002 -- -----------------------------------------------------------------------------
1003 -- Generating assignments
1004
1005 -- Assignments are really at the heart of the whole code generation
1006 -- business. Almost all top-level nodes of any real importance are
1007 -- assignments, which correspond to loads, stores, or register
1008 -- transfers. If we're really lucky, some of the register transfers
1009 -- will go away, because we can use the destination register to
1010 -- complete the code generation for the right hand side. This only
1011 -- fails when the right hand side is forced into a fixed register
1012 -- (e.g. the result of a call).
1013
1014 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1015 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1016
1017 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1018 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1019
1020 assignMem_IntCode pk addr src = do
1021 (srcReg, code) <- getSomeReg src
1022 Amode dstAddr addr_code <- case pk of
1023 II64 -> getAmode DS addr
1024 _ -> getAmode D addr
1025 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
1026
1027 -- dst is a reg, but src could be anything
1028 assignReg_IntCode _ reg src
1029 = do
1030 dflags <- getDynFlags
1031 let dst = getRegisterReg (targetPlatform dflags) reg
1032 r <- getRegister src
1033 return $ case r of
1034 Any _ code -> code dst
1035 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
1036
1037
1038
1039 -- Easy, isn't it?
1040 assignMem_FltCode = assignMem_IntCode
1041 assignReg_FltCode = assignReg_IntCode
1042
1043
1044
1045 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
1046
1047 genJump (CmmLit (CmmLabel lbl))
1048 = return (unitOL $ JMP lbl)
1049
1050 genJump tree
1051 = do
1052 dflags <- getDynFlags
1053 genJump' tree (platformToGCP (targetPlatform dflags))
1054
1055 genJump' :: CmmExpr -> GenCCallPlatform -> NatM InstrBlock
1056
1057 genJump' tree (GCP64ELF 1)
1058 = do
1059 (target,code) <- getSomeReg tree
1060 return (code
1061 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0))
1062 `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
1063 `snocOL` MTCTR r11
1064 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
1065 `snocOL` BCTR [] Nothing)
1066
1067 genJump' tree (GCP64ELF 2)
1068 = do
1069 (target,code) <- getSomeReg tree
1070 return (code
1071 `snocOL` MR r12 target
1072 `snocOL` MTCTR r12
1073 `snocOL` BCTR [] Nothing)
1074
1075 genJump' tree _
1076 = do
1077 (target,code) <- getSomeReg tree
1078 return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
1079
1080 -- -----------------------------------------------------------------------------
1081 -- Unconditional branches
1082 genBranch :: BlockId -> NatM InstrBlock
1083 genBranch = return . toOL . mkJumpInstr
1084
1085
1086 -- -----------------------------------------------------------------------------
1087 -- Conditional jumps
1088
1089 {-
1090 Conditional jumps are always to local labels, so we can use branch
1091 instructions. We peek at the arguments to decide what kind of
1092 comparison to do.
1093 -}
1094
1095
1096 genCondJump
1097 :: BlockId -- the branch target
1098 -> CmmExpr -- the condition on which to branch
1099 -> Maybe Bool
1100 -> NatM InstrBlock
1101
1102 genCondJump id bool prediction = do
1103 CondCode _ cond code <- getCondCode bool
1104 return (code `snocOL` BCC cond id prediction)
1105
1106
1107
1108 -- -----------------------------------------------------------------------------
1109 -- Generating C calls
1110
1111 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1112 -- @get_arg@, which moves the arguments to the correct registers/stack
1113 -- locations. Apart from that, the code is easy.
1114
1115 genCCall :: ForeignTarget -- function to call
1116 -> [CmmFormal] -- where to put the result
1117 -> [CmmActual] -- arguments (of mixed type)
1118 -> NatM InstrBlock
1119 genCCall (PrimTarget MO_WriteBarrier) _ _
1120 = return $ unitOL LWSYNC
1121
1122 genCCall (PrimTarget MO_Touch) _ _
1123 = return $ nilOL
1124
1125 genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
1126 = return $ nilOL
1127
1128 genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n]
1129 = do dflags <- getDynFlags
1130 let platform = targetPlatform dflags
1131 fmt = intFormat width
1132 reg_dst = getRegisterReg platform (CmmLocal dst)
1133 (instr, n_code) <- case amop of
1134 AMO_Add -> getSomeRegOrImm ADD True reg_dst
1135 AMO_Sub -> case n of
1136 CmmLit (CmmInt i _)
1137 | Just imm <- makeImmediate width True (-i)
1138 -> return (ADD reg_dst reg_dst (RIImm imm), nilOL)
1139 _
1140 -> do
1141 (n_reg, n_code) <- getSomeReg n
1142 return (SUBF reg_dst n_reg reg_dst, n_code)
1143 AMO_And -> getSomeRegOrImm AND False reg_dst
1144 AMO_Nand -> do (n_reg, n_code) <- getSomeReg n
1145 return (NAND reg_dst reg_dst n_reg, n_code)
1146 AMO_Or -> getSomeRegOrImm OR False reg_dst
1147 AMO_Xor -> getSomeRegOrImm XOR False reg_dst
1148 Amode addr_reg addr_code <- getAmodeIndex addr
1149 lbl_retry <- getBlockIdNat
1150 return $ n_code `appOL` addr_code
1151 `appOL` toOL [ HWSYNC
1152 , BCC ALWAYS lbl_retry Nothing
1153
1154 , NEWBLOCK lbl_retry
1155 , LDR fmt reg_dst addr_reg
1156 , instr
1157 , STC fmt reg_dst addr_reg
1158 , BCC NE lbl_retry (Just False)
1159 , ISYNC
1160 ]
1161 where
1162 getAmodeIndex (CmmMachOp (MO_Add _) [x, y])
1163 = do
1164 (regX, codeX) <- getSomeReg x
1165 (regY, codeY) <- getSomeReg y
1166 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1167 getAmodeIndex other
1168 = do
1169 (reg, code) <- getSomeReg other
1170 return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here!
1171 getSomeRegOrImm op sign dst
1172 = case n of
1173 CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i
1174 -> return (op dst dst (RIImm imm), nilOL)
1175 _
1176 -> do
1177 (n_reg, n_code) <- getSomeReg n
1178 return (op dst dst (RIReg n_reg), n_code)
1179
1180 genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr]
1181 = do dflags <- getDynFlags
1182 let platform = targetPlatform dflags
1183 fmt = intFormat width
1184 reg_dst = getRegisterReg platform (CmmLocal dst)
1185 form = if widthInBits width == 64 then DS else D
1186 Amode addr_reg addr_code <- getAmode form addr
1187 lbl_end <- getBlockIdNat
1188 return $ addr_code `appOL` toOL [ HWSYNC
1189 , LD fmt reg_dst addr_reg
1190 , CMP fmt reg_dst (RIReg reg_dst)
1191 , BCC NE lbl_end (Just False)
1192 , BCC ALWAYS lbl_end Nothing
1193 -- See Note [Seemingly useless cmp and bne]
1194 , NEWBLOCK lbl_end
1195 , ISYNC
1196 ]
1197
1198 -- Note [Seemingly useless cmp and bne]
1199 -- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction
1200 -- the second paragraph says that isync may complete before storage accesses
1201 -- "associated" with a preceding instruction have been performed. The cmp
1202 -- operation and the following bne introduce a data and control dependency
1203 -- on the load instruction (See also Power ISA, Book II, Appendix B.2.3, Safe
1204 -- Fetch).
1205 -- This is also what gcc does.
1206
1207
1208 genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
1209 code <- assignMem_IntCode (intFormat width) addr val
1210 return $ unitOL(HWSYNC) `appOL` code
1211
1212 genCCall (PrimTarget (MO_Clz width)) [dst] [src]
1213 = do dflags <- getDynFlags
1214 let platform = targetPlatform dflags
1215 reg_dst = getRegisterReg platform (CmmLocal dst)
1216 if target32Bit platform && width == W64
1217 then do
1218 ChildCode64 code vr_lo <- iselExpr64 src
1219 lbl1 <- getBlockIdNat
1220 lbl2 <- getBlockIdNat
1221 lbl3 <- getBlockIdNat
1222 let vr_hi = getHiVRegFromLo vr_lo
1223 cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0))
1224 , BCC NE lbl2 Nothing
1225 , BCC ALWAYS lbl1 Nothing
1226
1227 , NEWBLOCK lbl1
1228 , CNTLZ II32 reg_dst vr_lo
1229 , ADD reg_dst reg_dst (RIImm (ImmInt 32))
1230 , BCC ALWAYS lbl3 Nothing
1231
1232 , NEWBLOCK lbl2
1233 , CNTLZ II32 reg_dst vr_hi
1234 , BCC ALWAYS lbl3 Nothing
1235
1236 , NEWBLOCK lbl3
1237 ]
1238 return $ code `appOL` cntlz
1239 else do
1240 let format = if width == W64 then II64 else II32
1241 (s_reg, s_code) <- getSomeReg src
1242 (pre, reg , post) <-
1243 case width of
1244 W64 -> return (nilOL, s_reg, nilOL)
1245 W32 -> return (nilOL, s_reg, nilOL)
1246 W16 -> do
1247 reg_tmp <- getNewRegNat format
1248 return
1249 ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 65535))
1250 , reg_tmp
1251 , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-16)))
1252 )
1253 W8 -> do
1254 reg_tmp <- getNewRegNat format
1255 return
1256 ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 255))
1257 , reg_tmp
1258 , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-24)))
1259 )
1260 _ -> panic "genCall: Clz wrong format"
1261 let cntlz = unitOL (CNTLZ format reg_dst reg)
1262 return $ s_code `appOL` pre `appOL` cntlz `appOL` post
1263
1264 genCCall (PrimTarget (MO_Ctz width)) [dst] [src]
1265 = do dflags <- getDynFlags
1266 let platform = targetPlatform dflags
1267 reg_dst = getRegisterReg platform (CmmLocal dst)
1268 if target32Bit platform && width == W64
1269 then do
1270 let format = II32
1271 ChildCode64 code vr_lo <- iselExpr64 src
1272 lbl1 <- getBlockIdNat
1273 lbl2 <- getBlockIdNat
1274 lbl3 <- getBlockIdNat
1275 x' <- getNewRegNat format
1276 x'' <- getNewRegNat format
1277 r' <- getNewRegNat format
1278 cnttzlo <- cnttz format reg_dst vr_lo
1279 let vr_hi = getHiVRegFromLo vr_lo
1280 cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0))
1281 , BCC NE lbl2 Nothing
1282 , BCC ALWAYS lbl1 Nothing
1283
1284 , NEWBLOCK lbl1
1285 , ADD x' vr_hi (RIImm (ImmInt (-1)))
1286 , ANDC x'' x' vr_hi
1287 , CNTLZ format r' x''
1288 -- 32 + (32 - clz(x''))
1289 , SUBFC reg_dst r' (RIImm (ImmInt 64))
1290 , BCC ALWAYS lbl3 Nothing
1291
1292 , NEWBLOCK lbl2
1293 ]
1294 `appOL` cnttzlo `appOL`
1295 toOL [ BCC ALWAYS lbl3 Nothing
1296
1297 , NEWBLOCK lbl3
1298 ]
1299 return $ code `appOL` cnttz64
1300 else do
1301 let format = if width == W64 then II64 else II32
1302 (s_reg, s_code) <- getSomeReg src
1303 (reg_ctz, pre_code) <-
1304 case width of
1305 W64 -> return (s_reg, nilOL)
1306 W32 -> return (s_reg, nilOL)
1307 W16 -> do
1308 reg_tmp <- getNewRegNat format
1309 return (reg_tmp, unitOL $ ORIS reg_tmp s_reg (ImmInt 1))
1310 W8 -> do
1311 reg_tmp <- getNewRegNat format
1312 return (reg_tmp, unitOL $ OR reg_tmp s_reg (RIImm (ImmInt 256)))
1313 _ -> panic "genCall: Ctz wrong format"
1314 ctz_code <- cnttz format reg_dst reg_ctz
1315 return $ s_code `appOL` pre_code `appOL` ctz_code
1316 where
1317 -- cnttz(x) = sizeof(x) - cntlz(~x & (x - 1))
1318 -- see Henry S. Warren, Hacker's Delight, p 107
1319 cnttz format dst src = do
1320 let format_bits = 8 * formatInBytes format
1321 x' <- getNewRegNat format
1322 x'' <- getNewRegNat format
1323 r' <- getNewRegNat format
1324 return $ toOL [ ADD x' src (RIImm (ImmInt (-1)))
1325 , ANDC x'' x' src
1326 , CNTLZ format r' x''
1327 , SUBFC dst r' (RIImm (ImmInt (format_bits)))
1328 ]
1329
1330 genCCall target dest_regs argsAndHints
1331 = do dflags <- getDynFlags
1332 let platform = targetPlatform dflags
1333 case target of
1334 PrimTarget (MO_S_QuotRem width) -> divOp1 platform True width
1335 dest_regs argsAndHints
1336 PrimTarget (MO_U_QuotRem width) -> divOp1 platform False width
1337 dest_regs argsAndHints
1338 PrimTarget (MO_U_QuotRem2 width) -> divOp2 platform width dest_regs
1339 argsAndHints
1340 PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs
1341 argsAndHints
1342 PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints
1343 PrimTarget (MO_AddWordC _) -> addcOp platform dest_regs argsAndHints
1344 PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints
1345 PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width
1346 dest_regs argsAndHints
1347 PrimTarget (MO_SubIntC width) -> addSubCOp SUBFO platform width
1348 dest_regs argsAndHints
1349 PrimTarget MO_F64_Fabs -> fabs platform dest_regs argsAndHints
1350 PrimTarget MO_F32_Fabs -> fabs platform dest_regs argsAndHints
1351 _ -> genCCall' dflags (platformToGCP platform)
1352 target dest_regs argsAndHints
1353 where divOp1 platform signed width [res_q, res_r] [arg_x, arg_y]
1354 = do let reg_q = getRegisterReg platform (CmmLocal res_q)
1355 reg_r = getRegisterReg platform (CmmLocal res_r)
1356 remainderCode width signed reg_q arg_x arg_y
1357 <*> pure reg_r
1358
1359 divOp1 _ _ _ _ _
1360 = panic "genCCall: Wrong number of arguments for divOp1"
1361 divOp2 platform width [res_q, res_r]
1362 [arg_x_high, arg_x_low, arg_y]
1363 = do let reg_q = getRegisterReg platform (CmmLocal res_q)
1364 reg_r = getRegisterReg platform (CmmLocal res_r)
1365 fmt = intFormat width
1366 half = 4 * (formatInBytes fmt)
1367 (xh_reg, xh_code) <- getSomeReg arg_x_high
1368 (xl_reg, xl_code) <- getSomeReg arg_x_low
1369 (y_reg, y_code) <- getSomeReg arg_y
1370 s <- getNewRegNat fmt
1371 b <- getNewRegNat fmt
1372 v <- getNewRegNat fmt
1373 vn1 <- getNewRegNat fmt
1374 vn0 <- getNewRegNat fmt
1375 un32 <- getNewRegNat fmt
1376 tmp <- getNewRegNat fmt
1377 un10 <- getNewRegNat fmt
1378 un1 <- getNewRegNat fmt
1379 un0 <- getNewRegNat fmt
1380 q1 <- getNewRegNat fmt
1381 rhat <- getNewRegNat fmt
1382 tmp1 <- getNewRegNat fmt
1383 q0 <- getNewRegNat fmt
1384 un21 <- getNewRegNat fmt
1385 again1 <- getBlockIdNat
1386 no1 <- getBlockIdNat
1387 then1 <- getBlockIdNat
1388 endif1 <- getBlockIdNat
1389 again2 <- getBlockIdNat
1390 no2 <- getBlockIdNat
1391 then2 <- getBlockIdNat
1392 endif2 <- getBlockIdNat
1393 return $ y_code `appOL` xl_code `appOL` xh_code `appOL`
1394 -- see Hacker's Delight p 196 Figure 9-3
1395 toOL [ -- b = 2 ^ (bits_in_word / 2)
1396 LI b (ImmInt 1)
1397 , SL fmt b b (RIImm (ImmInt half))
1398 -- s = clz(y)
1399 , CNTLZ fmt s y_reg
1400 -- v = y << s
1401 , SL fmt v y_reg (RIReg s)
1402 -- vn1 = upper half of v
1403 , SR fmt vn1 v (RIImm (ImmInt half))
1404 -- vn0 = lower half of v
1405 , CLRLI fmt vn0 v half
1406 -- un32 = (u1 << s)
1407 -- | (u0 >> (bits_in_word - s))
1408 , SL fmt un32 xh_reg (RIReg s)
1409 , SUBFC tmp s
1410 (RIImm (ImmInt (8 * formatInBytes fmt)))
1411 , SR fmt tmp xl_reg (RIReg tmp)
1412 , OR un32 un32 (RIReg tmp)
1413 -- un10 = u0 << s
1414 , SL fmt un10 xl_reg (RIReg s)
1415 -- un1 = upper half of un10
1416 , SR fmt un1 un10 (RIImm (ImmInt half))
1417 -- un0 = lower half of un10
1418 , CLRLI fmt un0 un10 half
1419 -- q1 = un32/vn1
1420 , DIV fmt False q1 un32 vn1
1421 -- rhat = un32 - q1*vn1
1422 , MULL fmt tmp q1 (RIReg vn1)
1423 , SUBF rhat tmp un32
1424 , BCC ALWAYS again1 Nothing
1425
1426 , NEWBLOCK again1
1427 -- if (q1 >= b || q1*vn0 > b*rhat + un1)
1428 , CMPL fmt q1 (RIReg b)
1429 , BCC GEU then1 Nothing
1430 , BCC ALWAYS no1 Nothing
1431
1432 , NEWBLOCK no1
1433 , MULL fmt tmp q1 (RIReg vn0)
1434 , SL fmt tmp1 rhat (RIImm (ImmInt half))
1435 , ADD tmp1 tmp1 (RIReg un1)
1436 , CMPL fmt tmp (RIReg tmp1)
1437 , BCC LEU endif1 Nothing
1438 , BCC ALWAYS then1 Nothing
1439
1440 , NEWBLOCK then1
1441 -- q1 = q1 - 1
1442 , ADD q1 q1 (RIImm (ImmInt (-1)))
1443 -- rhat = rhat + vn1
1444 , ADD rhat rhat (RIReg vn1)
1445 -- if (rhat < b) goto again1
1446 , CMPL fmt rhat (RIReg b)
1447 , BCC LTT again1 Nothing
1448 , BCC ALWAYS endif1 Nothing
1449
1450 , NEWBLOCK endif1
1451 -- un21 = un32*b + un1 - q1*v
1452 , SL fmt un21 un32 (RIImm (ImmInt half))
1453 , ADD un21 un21 (RIReg un1)
1454 , MULL fmt tmp q1 (RIReg v)
1455 , SUBF un21 tmp un21
1456 -- compute second quotient digit
1457 -- q0 = un21/vn1
1458 , DIV fmt False q0 un21 vn1
1459 -- rhat = un21- q0*vn1
1460 , MULL fmt tmp q0 (RIReg vn1)
1461 , SUBF rhat tmp un21
1462 , BCC ALWAYS again2 Nothing
1463
1464 , NEWBLOCK again2
1465 -- if (q0>b || q0*vn0 > b*rhat + un0)
1466 , CMPL fmt q0 (RIReg b)
1467 , BCC GEU then2 Nothing
1468 , BCC ALWAYS no2 Nothing
1469
1470 , NEWBLOCK no2
1471 , MULL fmt tmp q0 (RIReg vn0)
1472 , SL fmt tmp1 rhat (RIImm (ImmInt half))
1473 , ADD tmp1 tmp1 (RIReg un0)
1474 , CMPL fmt tmp (RIReg tmp1)
1475 , BCC LEU endif2 Nothing
1476 , BCC ALWAYS then2 Nothing
1477
1478 , NEWBLOCK then2
1479 -- q0 = q0 - 1
1480 , ADD q0 q0 (RIImm (ImmInt (-1)))
1481 -- rhat = rhat + vn1
1482 , ADD rhat rhat (RIReg vn1)
1483 -- if (rhat<b) goto again2
1484 , CMPL fmt rhat (RIReg b)
1485 , BCC LTT again2 Nothing
1486 , BCC ALWAYS endif2 Nothing
1487
1488 , NEWBLOCK endif2
1489 -- compute remainder
1490 -- r = (un21*b + un0 - q0*v) >> s
1491 , SL fmt reg_r un21 (RIImm (ImmInt half))
1492 , ADD reg_r reg_r (RIReg un0)
1493 , MULL fmt tmp q0 (RIReg v)
1494 , SUBF reg_r tmp reg_r
1495 , SR fmt reg_r reg_r (RIReg s)
1496 -- compute quotient
1497 -- q = q1*b + q0
1498 , SL fmt reg_q q1 (RIImm (ImmInt half))
1499 , ADD reg_q reg_q (RIReg q0)
1500 ]
1501 divOp2 _ _ _ _
1502 = panic "genCCall: Wrong number of arguments for divOp2"
1503 multOp2 platform width [res_h, res_l] [arg_x, arg_y]
1504 = do let reg_h = getRegisterReg platform (CmmLocal res_h)
1505 reg_l = getRegisterReg platform (CmmLocal res_l)
1506 fmt = intFormat width
1507 (x_reg, x_code) <- getSomeReg arg_x
1508 (y_reg, y_code) <- getSomeReg arg_y
1509 return $ y_code `appOL` x_code
1510 `appOL` toOL [ MULL fmt reg_l x_reg (RIReg y_reg)
1511 , MULHU fmt reg_h x_reg y_reg
1512 ]
1513 multOp2 _ _ _ _
1514 = panic "genCall: Wrong number of arguments for multOp2"
1515 add2Op platform [res_h, res_l] [arg_x, arg_y]
1516 = do let reg_h = getRegisterReg platform (CmmLocal res_h)
1517 reg_l = getRegisterReg platform (CmmLocal res_l)
1518 (x_reg, x_code) <- getSomeReg arg_x
1519 (y_reg, y_code) <- getSomeReg arg_y
1520 return $ y_code `appOL` x_code
1521 `appOL` toOL [ LI reg_h (ImmInt 0)
1522 , ADDC reg_l x_reg y_reg
1523 , ADDZE reg_h reg_h
1524 ]
1525 add2Op _ _ _
1526 = panic "genCCall: Wrong number of arguments/results for add2"
1527
1528 addcOp platform [res_r, res_c] [arg_x, arg_y]
1529 = add2Op platform [res_c {-hi-}, res_r {-lo-}] [arg_x, arg_y]
1530 addcOp _ _ _
1531 = panic "genCCall: Wrong number of arguments/results for addc"
1532
1533 -- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1,
1534 -- which is 0 for borrow and 1 otherwise. We need 1 and 0
1535 -- so xor with 1.
1536 subcOp platform [res_r, res_c] [arg_x, arg_y]
1537 = do let reg_r = getRegisterReg platform (CmmLocal res_r)
1538 reg_c = getRegisterReg platform (CmmLocal res_c)
1539 (x_reg, x_code) <- getSomeReg arg_x
1540 (y_reg, y_code) <- getSomeReg arg_y
1541 return $ y_code `appOL` x_code
1542 `appOL` toOL [ LI reg_c (ImmInt 0)
1543 , SUBFC reg_r y_reg (RIReg x_reg)
1544 , ADDZE reg_c reg_c
1545 , XOR reg_c reg_c (RIImm (ImmInt 1))
1546 ]
1547 subcOp _ _ _
1548 = panic "genCCall: Wrong number of arguments/results for subc"
1549 addSubCOp instr platform width [res_r, res_c] [arg_x, arg_y]
1550 = do let reg_r = getRegisterReg platform (CmmLocal res_r)
1551 reg_c = getRegisterReg platform (CmmLocal res_c)
1552 (x_reg, x_code) <- getSomeReg arg_x
1553 (y_reg, y_code) <- getSomeReg arg_y
1554 return $ y_code `appOL` x_code
1555 `appOL` toOL [ instr reg_r y_reg x_reg,
1556 -- SUBFO argument order reversed!
1557 MFOV (intFormat width) reg_c
1558 ]
1559 addSubCOp _ _ _ _ _
1560 = panic "genCall: Wrong number of arguments/results for addC"
1561 fabs platform [res] [arg]
1562 = do let res_r = getRegisterReg platform (CmmLocal res)
1563 (arg_reg, arg_code) <- getSomeReg arg
1564 return $ arg_code `snocOL` FABS res_r arg_reg
1565 fabs _ _ _
1566 = panic "genCall: Wrong number of arguments/results for fabs"
1567
1568 -- TODO: replace 'Int' by an enum such as 'PPC_64ABI'
1569 data GenCCallPlatform = GCP32ELF | GCP64ELF !Int | GCPAIX
1570
1571 platformToGCP :: Platform -> GenCCallPlatform
1572 platformToGCP platform
1573 = case platformOS platform of
1574 OSAIX -> GCPAIX
1575 _ -> case platformArch platform of
1576 ArchPPC -> GCP32ELF
1577 ArchPPC_64 ELF_V1 -> GCP64ELF 1
1578 ArchPPC_64 ELF_V2 -> GCP64ELF 2
1579 _ -> panic "platformToGCP: Not PowerPC"
1580
1581
1582 genCCall'
1583 :: DynFlags
1584 -> GenCCallPlatform
1585 -> ForeignTarget -- function to call
1586 -> [CmmFormal] -- where to put the result
1587 -> [CmmActual] -- arguments (of mixed type)
1588 -> NatM InstrBlock
1589
1590 {-
1591 PowerPC Linux uses the System V Release 4 Calling Convention
1592 for PowerPC. It is described in the
1593 "System V Application Binary Interface PowerPC Processor Supplement".
1594
1595 PowerPC 64 Linux uses the System V Release 4 Calling Convention for
1596 64-bit PowerPC. It is specified in
1597 "64-bit PowerPC ELF Application Binary Interface Supplement 1.9"
1598 (PPC64 ELF v1.9).
1599
1600 PowerPC 64 Linux in little endian mode uses the "Power Architecture 64-Bit
1601 ELF V2 ABI Specification -- OpenPOWER ABI for Linux Supplement"
1602 (PPC64 ELF v2).
1603
1604 AIX follows the "PowerOpen ABI: Application Binary Interface Big-Endian
1605 32-Bit Hardware Implementation"
1606
1607 All four conventions are similar:
1608 Parameters may be passed in general-purpose registers starting at r3, in
1609 floating point registers starting at f1, or on the stack.
1610
1611 But there are substantial differences:
1612 * The number of registers used for parameter passing and the exact set of
1613 nonvolatile registers differs (see MachRegs.hs).
1614 * On AIX and 64-bit ELF, stack space is always reserved for parameters,
1615 even if they are passed in registers. The called routine may choose to
1616 save parameters from registers to the corresponding space on the stack.
1617 * On AIX and 64-bit ELF, a corresponding amount of GPRs is skipped when
1618 a floating point parameter is passed in an FPR.
1619 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
1620 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
1621 AIX just treats an I64 likt two separate I32s (high word first).
1622 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
1623 4-byte aligned like everything else on AIX.
1624 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
1625 PowerPC Linux does not agree, so neither do we.
1626
1627 According to all conventions, the parameter area should be part of the
1628 caller's stack frame, allocated in the caller's prologue code (large enough
1629 to hold the parameter lists for all called routines). The NCG already
1630 uses the stack for register spilling, leaving 64 bytes free at the top.
1631 If we need a larger parameter area than that, we increase the size
1632 of the stack frame just before ccalling.
1633 -}
1634
1635
1636 genCCall' dflags gcp target dest_regs args
1637 = do
1638 (finalStack,passArgumentsCode,usedRegs) <- passArguments
1639 (zip3 args argReps argHints)
1640 allArgRegs
1641 (allFPArgRegs platform)
1642 initialStackOffset
1643 nilOL []
1644
1645 (labelOrExpr, reduceToFF32) <- case target of
1646 ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
1647 uses_pic_base_implicitly
1648 return (Left lbl, False)
1649 ForeignTarget expr _ -> do
1650 uses_pic_base_implicitly
1651 return (Right expr, False)
1652 PrimTarget mop -> outOfLineMachOp mop
1653
1654 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
1655 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
1656
1657 case labelOrExpr of
1658 Left lbl -> do -- the linker does all the work for us
1659 return ( codeBefore
1660 `snocOL` BL lbl usedRegs
1661 `appOL` maybeNOP -- some ABI require a NOP after BL
1662 `appOL` codeAfter)
1663 Right dyn -> do -- implement call through function pointer
1664 (dynReg, dynCode) <- getSomeReg dyn
1665 case gcp of
1666 GCP64ELF 1 -> return ( dynCode
1667 `appOL` codeBefore
1668 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 40))
1669 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0))
1670 `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8))
1671 `snocOL` MTCTR r11
1672 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16))
1673 `snocOL` BCTRL usedRegs
1674 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 40))
1675 `appOL` codeAfter)
1676 GCP64ELF 2 -> return ( dynCode
1677 `appOL` codeBefore
1678 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 24))
1679 `snocOL` MR r12 dynReg
1680 `snocOL` MTCTR r12
1681 `snocOL` BCTRL usedRegs
1682 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 24))
1683 `appOL` codeAfter)
1684 GCPAIX -> return ( dynCode
1685 -- AIX/XCOFF follows the PowerOPEN ABI
1686 -- which is quite similiar to LinuxPPC64/ELFv1
1687 `appOL` codeBefore
1688 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 20))
1689 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 0))
1690 `snocOL` LD II32 toc (AddrRegImm dynReg (ImmInt 4))
1691 `snocOL` MTCTR r11
1692 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 8))
1693 `snocOL` BCTRL usedRegs
1694 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 20))
1695 `appOL` codeAfter)
1696 _ -> return ( dynCode
1697 `snocOL` MTCTR dynReg
1698 `appOL` codeBefore
1699 `snocOL` BCTRL usedRegs
1700 `appOL` codeAfter)
1701 where
1702 platform = targetPlatform dflags
1703
1704 uses_pic_base_implicitly = do
1705 -- See Note [implicit register in PPC PIC code]
1706 -- on why we claim to use PIC register here
1707 when (positionIndependent dflags && target32Bit platform) $ do
1708 _ <- getPicBaseNat $ archWordFormat True
1709 return ()
1710
1711 initialStackOffset = case gcp of
1712 GCPAIX -> 24
1713 GCP32ELF -> 8
1714 GCP64ELF 1 -> 48
1715 GCP64ELF 2 -> 32
1716 _ -> panic "genCall': unknown calling convention"
1717 -- size of linkage area + size of arguments, in bytes
1718 stackDelta finalStack = case gcp of
1719 GCPAIX ->
1720 roundTo 16 $ (24 +) $ max 32 $ sum $
1721 map (widthInBytes . typeWidth) argReps
1722 GCP32ELF -> roundTo 16 finalStack
1723 GCP64ELF 1 ->
1724 roundTo 16 $ (48 +) $ max 64 $ sum $
1725 map (roundTo 8 . widthInBytes . typeWidth)
1726 argReps
1727 GCP64ELF 2 ->
1728 roundTo 16 $ (32 +) $ max 64 $ sum $
1729 map (roundTo 8 . widthInBytes . typeWidth)
1730 argReps
1731 _ -> panic "genCall': unknown calling conv."
1732
1733 argReps = map (cmmExprType dflags) args
1734 (argHints, _) = foreignTargetHints target
1735
1736 roundTo a x | x `mod` a == 0 = x
1737 | otherwise = x + a - (x `mod` a)
1738
1739 spFormat = if target32Bit platform then II32 else II64
1740
1741 -- TODO: Do not create a new stack frame if delta is too large.
1742 move_sp_down finalStack
1743 | delta > stackFrameHeaderSize dflags =
1744 toOL [STU spFormat sp (AddrRegImm sp (ImmInt (-delta))),
1745 DELTA (-delta)]
1746 | otherwise = nilOL
1747 where delta = stackDelta finalStack
1748 move_sp_up finalStack
1749 | delta > stackFrameHeaderSize dflags =
1750 toOL [ADD sp sp (RIImm (ImmInt delta)),
1751 DELTA 0]
1752 | otherwise = nilOL
1753 where delta = stackDelta finalStack
1754
1755 -- A NOP instruction is required after a call (bl instruction)
1756 -- on AIX and 64-Bit Linux.
1757 -- If the call is to a function with a different TOC (r2) the
1758 -- link editor replaces the NOP instruction with a load of the TOC
1759 -- from the stack to restore the TOC.
1760 maybeNOP = case gcp of
1761 GCP32ELF -> nilOL
1762 -- See Section 3.9.4 of OpenPower ABI
1763 GCPAIX -> unitOL NOP
1764 -- See Section 3.5.11 of PPC64 ELF v1.9
1765 GCP64ELF 1 -> unitOL NOP
1766 -- See Section 2.3.6 of PPC64 ELF v2
1767 GCP64ELF 2 -> unitOL NOP
1768 _ -> panic "maybeNOP: Unknown PowerPC 64-bit ABI"
1769
1770 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
1771 passArguments ((arg,arg_ty,_):args) gprs fprs stackOffset
1772 accumCode accumUsed | isWord64 arg_ty
1773 && target32Bit (targetPlatform dflags) =
1774 do
1775 ChildCode64 code vr_lo <- iselExpr64 arg
1776 let vr_hi = getHiVRegFromLo vr_lo
1777
1778 case gcp of
1779 GCPAIX ->
1780 do let storeWord vr (gpr:_) _ = MR gpr vr
1781 storeWord vr [] offset
1782 = ST II32 vr (AddrRegImm sp (ImmInt offset))
1783 passArguments args
1784 (drop 2 gprs)
1785 fprs
1786 (stackOffset+8)
1787 (accumCode `appOL` code
1788 `snocOL` storeWord vr_hi gprs stackOffset
1789 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1790 ((take 2 gprs) ++ accumUsed)
1791 GCP32ELF ->
1792 do let stackOffset' = roundTo 8 stackOffset
1793 stackCode = accumCode `appOL` code
1794 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
1795 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
1796 regCode hireg loreg =
1797 accumCode `appOL` code
1798 `snocOL` MR hireg vr_hi
1799 `snocOL` MR loreg vr_lo
1800
1801 case gprs of
1802 hireg : loreg : regs | even (length gprs) ->
1803 passArguments args regs fprs stackOffset
1804 (regCode hireg loreg) (hireg : loreg : accumUsed)
1805 _skipped : hireg : loreg : regs ->
1806 passArguments args regs fprs stackOffset
1807 (regCode hireg loreg) (hireg : loreg : accumUsed)
1808 _ -> -- only one or no regs left
1809 passArguments args [] fprs (stackOffset'+8)
1810 stackCode accumUsed
1811 GCP64ELF _ -> panic "passArguments: 32 bit code"
1812
1813 passArguments ((arg,rep,hint):args) gprs fprs stackOffset accumCode accumUsed
1814 | reg : _ <- regs = do
1815 register <- getRegister arg_pro
1816 let code = case register of
1817 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1818 Any _ acode -> acode reg
1819 stackOffsetRes = case gcp of
1820 -- The PowerOpen ABI requires that we
1821 -- reserve stack slots for register
1822 -- parameters
1823 GCPAIX -> stackOffset + stackBytes
1824 -- ... the SysV ABI 32-bit doesn't.
1825 GCP32ELF -> stackOffset
1826 -- ... but SysV ABI 64-bit does.
1827 GCP64ELF _ -> stackOffset + stackBytes
1828 passArguments args
1829 (drop nGprs gprs)
1830 (drop nFprs fprs)
1831 stackOffsetRes
1832 (accumCode `appOL` code)
1833 (reg : accumUsed)
1834 | otherwise = do
1835 (vr, code) <- getSomeReg arg_pro
1836 passArguments args
1837 (drop nGprs gprs)
1838 (drop nFprs fprs)
1839 (stackOffset' + stackBytes)
1840 (accumCode `appOL` code
1841 `snocOL` ST format_pro vr stackSlot)
1842 accumUsed
1843 where
1844 arg_pro
1845 | isBitsType rep = CmmMachOp (conv_op (typeWidth rep) (wordWidth dflags)) [arg]
1846 | otherwise = arg
1847 format_pro
1848 | isBitsType rep = intFormat (wordWidth dflags)
1849 | otherwise = cmmTypeFormat rep
1850 conv_op = case hint of
1851 SignedHint -> MO_SS_Conv
1852 _ -> MO_UU_Conv
1853
1854 stackOffset' = case gcp of
1855 GCPAIX ->
1856 -- The 32bit PowerOPEN ABI is happy with
1857 -- 32bit-alignment ...
1858 stackOffset
1859 GCP32ELF
1860 -- ... the SysV ABI requires 8-byte
1861 -- alignment for doubles.
1862 | isFloatType rep && typeWidth rep == W64 ->
1863 roundTo 8 stackOffset
1864 | otherwise ->
1865 stackOffset
1866 GCP64ELF _ ->
1867 -- Everything on the stack is mapped to
1868 -- 8-byte aligned doublewords
1869 stackOffset
1870 stackOffset''
1871 | isFloatType rep && typeWidth rep == W32 =
1872 case gcp of
1873 -- The ELF v1 ABI Section 3.2.3 requires:
1874 -- "Single precision floating point values
1875 -- are mapped to the second word in a single
1876 -- doubleword"
1877 GCP64ELF 1 -> stackOffset' + 4
1878 _ -> stackOffset'
1879 | otherwise = stackOffset'
1880
1881 stackSlot = AddrRegImm sp (ImmInt stackOffset'')
1882 (nGprs, nFprs, stackBytes, regs)
1883 = case gcp of
1884 GCPAIX ->
1885 case cmmTypeFormat rep of
1886 II8 -> (1, 0, 4, gprs)
1887 II16 -> (1, 0, 4, gprs)
1888 II32 -> (1, 0, 4, gprs)
1889 -- The PowerOpen ABI requires that we skip a
1890 -- corresponding number of GPRs when we use
1891 -- the FPRs.
1892 --
1893 -- E.g. for a `double` two GPRs are skipped,
1894 -- whereas for a `float` one GPR is skipped
1895 -- when parameters are assigned to
1896 -- registers.
1897 --
1898 -- The PowerOpen ABI specification can be found at
1899 -- ftp://www.sourceware.org/pub/binutils/ppc-docs/ppc-poweropen/
1900 FF32 -> (1, 1, 4, fprs)
1901 FF64 -> (2, 1, 8, fprs)
1902 II64 -> panic "genCCall' passArguments II64"
1903 FF80 -> panic "genCCall' passArguments FF80"
1904 GCP32ELF ->
1905 case cmmTypeFormat rep of
1906 II8 -> (1, 0, 4, gprs)
1907 II16 -> (1, 0, 4, gprs)
1908 II32 -> (1, 0, 4, gprs)
1909 -- ... the SysV ABI doesn't.
1910 FF32 -> (0, 1, 4, fprs)
1911 FF64 -> (0, 1, 8, fprs)
1912 II64 -> panic "genCCall' passArguments II64"
1913 FF80 -> panic "genCCall' passArguments FF80"
1914 GCP64ELF _ ->
1915 case cmmTypeFormat rep of
1916 II8 -> (1, 0, 8, gprs)
1917 II16 -> (1, 0, 8, gprs)
1918 II32 -> (1, 0, 8, gprs)
1919 II64 -> (1, 0, 8, gprs)
1920 -- The ELFv1 ABI requires that we skip a
1921 -- corresponding number of GPRs when we use
1922 -- the FPRs.
1923 FF32 -> (1, 1, 8, fprs)
1924 FF64 -> (1, 1, 8, fprs)
1925 FF80 -> panic "genCCall' passArguments FF80"
1926
1927 moveResult reduceToFF32 =
1928 case dest_regs of
1929 [] -> nilOL
1930 [dest]
1931 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
1932 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1933 | isWord64 rep && target32Bit (targetPlatform dflags)
1934 -> toOL [MR (getHiVRegFromLo r_dest) r3,
1935 MR r_dest r4]
1936 | otherwise -> unitOL (MR r_dest r3)
1937 where rep = cmmRegType dflags (CmmLocal dest)
1938 r_dest = getRegisterReg platform (CmmLocal dest)
1939 _ -> panic "genCCall' moveResult: Bad dest_regs"
1940
1941 outOfLineMachOp mop =
1942 do
1943 dflags <- getDynFlags
1944 mopExpr <- cmmMakeDynamicReference dflags CallReference $
1945 mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
1946 let mopLabelOrExpr = case mopExpr of
1947 CmmLit (CmmLabel lbl) -> Left lbl
1948 _ -> Right mopExpr
1949 return (mopLabelOrExpr, reduce)
1950 where
1951 (functionName, reduce) = case mop of
1952 MO_F32_Exp -> (fsLit "exp", True)
1953 MO_F32_Log -> (fsLit "log", True)
1954 MO_F32_Sqrt -> (fsLit "sqrt", True)
1955 MO_F32_Fabs -> unsupported
1956
1957 MO_F32_Sin -> (fsLit "sin", True)
1958 MO_F32_Cos -> (fsLit "cos", True)
1959 MO_F32_Tan -> (fsLit "tan", True)
1960
1961 MO_F32_Asin -> (fsLit "asin", True)
1962 MO_F32_Acos -> (fsLit "acos", True)
1963 MO_F32_Atan -> (fsLit "atan", True)
1964
1965 MO_F32_Sinh -> (fsLit "sinh", True)
1966 MO_F32_Cosh -> (fsLit "cosh", True)
1967 MO_F32_Tanh -> (fsLit "tanh", True)
1968 MO_F32_Pwr -> (fsLit "pow", True)
1969
1970 MO_F32_Asinh -> (fsLit "asinh", True)
1971 MO_F32_Acosh -> (fsLit "acosh", True)
1972 MO_F32_Atanh -> (fsLit "atanh", True)
1973
1974 MO_F64_Exp -> (fsLit "exp", False)
1975 MO_F64_Log -> (fsLit "log", False)
1976 MO_F64_Sqrt -> (fsLit "sqrt", False)
1977 MO_F64_Fabs -> unsupported
1978
1979 MO_F64_Sin -> (fsLit "sin", False)
1980 MO_F64_Cos -> (fsLit "cos", False)
1981 MO_F64_Tan -> (fsLit "tan", False)
1982
1983 MO_F64_Asin -> (fsLit "asin", False)
1984 MO_F64_Acos -> (fsLit "acos", False)
1985 MO_F64_Atan -> (fsLit "atan", False)
1986
1987 MO_F64_Sinh -> (fsLit "sinh", False)
1988 MO_F64_Cosh -> (fsLit "cosh", False)
1989 MO_F64_Tanh -> (fsLit "tanh", False)
1990 MO_F64_Pwr -> (fsLit "pow", False)
1991
1992 MO_F64_Asinh -> (fsLit "asinh", False)
1993 MO_F64_Acosh -> (fsLit "acosh", False)
1994 MO_F64_Atanh -> (fsLit "atanh", False)
1995
1996 MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False)
1997
1998 MO_Memcpy _ -> (fsLit "memcpy", False)
1999 MO_Memset _ -> (fsLit "memset", False)
2000 MO_Memmove _ -> (fsLit "memmove", False)
2001 MO_Memcmp _ -> (fsLit "memcmp", False)
2002
2003 MO_BSwap w -> (fsLit $ bSwapLabel w, False)
2004 MO_PopCnt w -> (fsLit $ popCntLabel w, False)
2005 MO_Pdep w -> (fsLit $ pdepLabel w, False)
2006 MO_Pext w -> (fsLit $ pextLabel w, False)
2007 MO_Clz _ -> unsupported
2008 MO_Ctz _ -> unsupported
2009 MO_AtomicRMW {} -> unsupported
2010 MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
2011 MO_AtomicRead _ -> unsupported
2012 MO_AtomicWrite _ -> unsupported
2013
2014 MO_S_QuotRem {} -> unsupported
2015 MO_U_QuotRem {} -> unsupported
2016 MO_U_QuotRem2 {} -> unsupported
2017 MO_Add2 {} -> unsupported
2018 MO_AddWordC {} -> unsupported
2019 MO_SubWordC {} -> unsupported
2020 MO_AddIntC {} -> unsupported
2021 MO_SubIntC {} -> unsupported
2022 MO_U_Mul2 {} -> unsupported
2023 MO_WriteBarrier -> unsupported
2024 MO_Touch -> unsupported
2025 MO_Prefetch_Data _ -> unsupported
2026 unsupported = panic ("outOfLineCmmOp: " ++ show mop
2027 ++ " not supported")
2028
2029 -- -----------------------------------------------------------------------------
2030 -- Generating a table-branch
2031
2032 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
2033 genSwitch dflags expr targets
2034 | OSAIX <- platformOS (targetPlatform dflags)
2035 = do
2036 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2037 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
2038 sha = if target32Bit $ targetPlatform dflags then 2 else 3
2039 tmp <- getNewRegNat fmt
2040 lbl <- getNewLabelNat
2041 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2042 (tableReg,t_code) <- getSomeReg $ dynRef
2043 let code = e_code `appOL` t_code `appOL` toOL [
2044 SL fmt tmp reg (RIImm (ImmInt sha)),
2045 LD fmt tmp (AddrRegReg tableReg tmp),
2046 MTCTR tmp,
2047 BCTR ids (Just lbl)
2048 ]
2049 return code
2050
2051 | (positionIndependent dflags) || (not $ target32Bit $ targetPlatform dflags)
2052 = do
2053 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2054 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
2055 sha = if target32Bit $ targetPlatform dflags then 2 else 3
2056 tmp <- getNewRegNat fmt
2057 lbl <- getNewLabelNat
2058 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2059 (tableReg,t_code) <- getSomeReg $ dynRef
2060 let code = e_code `appOL` t_code `appOL` toOL [
2061 SL fmt tmp reg (RIImm (ImmInt sha)),
2062 LD fmt tmp (AddrRegReg tableReg tmp),
2063 ADD tmp tmp (RIReg tableReg),
2064 MTCTR tmp,
2065 BCTR ids (Just lbl)
2066 ]
2067 return code
2068 | otherwise
2069 = do
2070 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2071 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
2072 sha = if target32Bit $ targetPlatform dflags then 2 else 3
2073 tmp <- getNewRegNat fmt
2074 lbl <- getNewLabelNat
2075 let code = e_code `appOL` toOL [
2076 SL fmt tmp reg (RIImm (ImmInt sha)),
2077 ADDIS tmp tmp (HA (ImmCLbl lbl)),
2078 LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
2079 MTCTR tmp,
2080 BCTR ids (Just lbl)
2081 ]
2082 return code
2083 where (offset, ids) = switchTargetsToTable targets
2084
2085 generateJumpTableForInstr :: DynFlags -> Instr
2086 -> Maybe (NatCmmDecl CmmStatics Instr)
2087 generateJumpTableForInstr dflags (BCTR ids (Just lbl)) =
2088 let jumpTable
2089 | (positionIndependent dflags)
2090 || (not $ target32Bit $ targetPlatform dflags)
2091 = map jumpTableEntryRel ids
2092 | otherwise = map (jumpTableEntry dflags) ids
2093 where jumpTableEntryRel Nothing
2094 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
2095 jumpTableEntryRel (Just blockid)
2096 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0
2097 (wordWidth dflags))
2098 where blockLabel = blockLbl blockid
2099 in Just (CmmData (Section ReadOnlyData lbl) (Statics lbl jumpTable))
2100 generateJumpTableForInstr _ _ = Nothing
2101
2102 -- -----------------------------------------------------------------------------
2103 -- 'condIntReg' and 'condFltReg': condition codes into registers
2104
2105 -- Turn those condition codes into integers now (when they appear on
2106 -- the right hand side of an assignment).
2107
2108
2109
2110 condReg :: NatM CondCode -> NatM Register
2111 condReg getCond = do
2112 CondCode _ cond cond_code <- getCond
2113 dflags <- getDynFlags
2114 let
2115 code dst = cond_code
2116 `appOL` negate_code
2117 `appOL` toOL [
2118 MFCR dst,
2119 RLWINM dst dst (bit + 1) 31 31
2120 ]
2121
2122 negate_code | do_negate = unitOL (CRNOR bit bit bit)
2123 | otherwise = nilOL
2124
2125 (bit, do_negate) = case cond of
2126 LTT -> (0, False)
2127 LE -> (1, True)
2128 EQQ -> (2, False)
2129 GE -> (0, True)
2130 GTT -> (1, False)
2131
2132 NE -> (2, True)
2133
2134 LU -> (0, False)
2135 LEU -> (1, True)
2136 GEU -> (0, True)
2137 GU -> (1, False)
2138 _ -> panic "PPC.CodeGen.codeReg: no match"
2139
2140 format = archWordFormat $ target32Bit $ targetPlatform dflags
2141 return (Any format code)
2142
2143 condIntReg :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM Register
2144 condIntReg cond width x y = condReg (condIntCode cond width x y)
2145 condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2146 condFltReg cond x y = condReg (condFltCode cond x y)
2147
2148
2149
2150 -- -----------------------------------------------------------------------------
2151 -- 'trivial*Code': deal with trivial instructions
2152
2153 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2154 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2155 -- Only look for constants on the right hand side, because that's
2156 -- where the generic optimizer will have put them.
2157
2158 -- Similarly, for unary instructions, we don't have to worry about
2159 -- matching an StInt as the argument, because genericOpt will already
2160 -- have handled the constant-folding.
2161
2162
2163
2164 {-
2165 Wolfgang's PowerPC version of The Rules:
2166
2167 A slightly modified version of The Rules to take advantage of the fact
2168 that PowerPC instructions work on all registers and don't implicitly
2169 clobber any fixed registers.
2170
2171 * The only expression for which getRegister returns Fixed is (CmmReg reg).
2172
2173 * If getRegister returns Any, then the code it generates may modify only:
2174 (a) fresh temporaries
2175 (b) the destination register
2176 It may *not* modify global registers, unless the global
2177 register happens to be the destination register.
2178 It may not clobber any other registers. In fact, only ccalls clobber any
2179 fixed registers.
2180 Also, it may not modify the counter register (used by genCCall).
2181
2182 Corollary: If a getRegister for a subexpression returns Fixed, you need
2183 not move it to a fresh temporary before evaluating the next subexpression.
2184 The Fixed register won't be modified.
2185 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
2186
2187 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
2188 the value of the destination register.
2189 -}
2190
2191 trivialCode
2192 :: Width
2193 -> Bool
2194 -> (Reg -> Reg -> RI -> Instr)
2195 -> CmmExpr
2196 -> CmmExpr
2197 -> NatM Register
2198
2199 trivialCode rep signed instr x (CmmLit (CmmInt y _))
2200 | Just imm <- makeImmediate rep signed y
2201 = do
2202 (src1, code1) <- getSomeReg x
2203 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
2204 return (Any (intFormat rep) code)
2205
2206 trivialCode rep _ instr x y = do
2207 (src1, code1) <- getSomeReg x
2208 (src2, code2) <- getSomeReg y
2209 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
2210 return (Any (intFormat rep) code)
2211
2212 shiftMulCode
2213 :: Width
2214 -> Bool
2215 -> (Format-> Reg -> Reg -> RI -> Instr)
2216 -> CmmExpr
2217 -> CmmExpr
2218 -> NatM Register
2219 shiftMulCode width sign instr x (CmmLit (CmmInt y _))
2220 | Just imm <- makeImmediate width sign y
2221 = do
2222 (src1, code1) <- getSomeReg x
2223 let format = intFormat width
2224 let ins_fmt = intFormat (max W32 width)
2225 let code dst = code1 `snocOL` instr ins_fmt dst src1 (RIImm imm)
2226 return (Any format code)
2227
2228 shiftMulCode width _ instr x y = do
2229 (src1, code1) <- getSomeReg x
2230 (src2, code2) <- getSomeReg y
2231 let format = intFormat width
2232 let ins_fmt = intFormat (max W32 width)
2233 let code dst = code1 `appOL` code2
2234 `snocOL` instr ins_fmt dst src1 (RIReg src2)
2235 return (Any format code)
2236
2237 trivialCodeNoImm' :: Format -> (Reg -> Reg -> Reg -> Instr)
2238 -> CmmExpr -> CmmExpr -> NatM Register
2239 trivialCodeNoImm' format instr x y = do
2240 (src1, code1) <- getSomeReg x
2241 (src2, code2) <- getSomeReg y
2242 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
2243 return (Any format code)
2244
2245 trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr)
2246 -> CmmExpr -> CmmExpr -> NatM Register
2247 trivialCodeNoImm format instr x y
2248 = trivialCodeNoImm' format (instr format) x y
2249
2250 srCode :: Width -> Bool -> (Format-> Reg -> Reg -> RI -> Instr)
2251 -> CmmExpr -> CmmExpr -> NatM Register
2252 srCode width sgn instr x (CmmLit (CmmInt y _))
2253 | Just imm <- makeImmediate width sgn y
2254 = do
2255 let op_len = max W32 width
2256 extend = if sgn then extendSExpr else extendUExpr
2257 (src1, code1) <- getSomeReg (extend width op_len x)
2258 let code dst = code1 `snocOL`
2259 instr (intFormat op_len) dst src1 (RIImm imm)
2260 return (Any (intFormat width) code)
2261
2262 srCode width sgn instr x y = do
2263 let op_len = max W32 width
2264 extend = if sgn then extendSExpr else extendUExpr
2265 (src1, code1) <- getSomeReg (extend width op_len x)
2266 (src2, code2) <- getSomeReg (extendUExpr width op_len y)
2267 -- Note: Shift amount `y` is unsigned
2268 let code dst = code1 `appOL` code2 `snocOL`
2269 instr (intFormat op_len) dst src1 (RIReg src2)
2270 return (Any (intFormat width) code)
2271
2272 divCode :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register
2273 divCode width sgn x y = do
2274 let op_len = max W32 width
2275 extend = if sgn then extendSExpr else extendUExpr
2276 (src1, code1) <- getSomeReg (extend width op_len x)
2277 (src2, code2) <- getSomeReg (extend width op_len y)
2278 let code dst = code1 `appOL` code2 `snocOL`
2279 DIV (intFormat op_len) sgn dst src1 src2
2280 return (Any (intFormat width) code)
2281
2282
2283 trivialUCode :: Format
2284 -> (Reg -> Reg -> Instr)
2285 -> CmmExpr
2286 -> NatM Register
2287 trivialUCode rep instr x = do
2288 (src, code) <- getSomeReg x
2289 let code' dst = code `snocOL` instr dst src
2290 return (Any rep code')
2291
2292 -- There is no "remainder" instruction on the PPC, so we have to do
2293 -- it the hard way.
2294 -- The "sgn" parameter is the signedness for the division instruction
2295
2296 remainderCode :: Width -> Bool -> Reg -> CmmExpr -> CmmExpr
2297 -> NatM (Reg -> InstrBlock)
2298 remainderCode rep sgn reg_q arg_x arg_y = do
2299 let op_len = max W32 rep
2300 fmt = intFormat op_len
2301 extend = if sgn then extendSExpr else extendUExpr
2302 (x_reg, x_code) <- getSomeReg (extend rep op_len arg_x)
2303 (y_reg, y_code) <- getSomeReg (extend rep op_len arg_y)
2304 return $ \reg_r -> y_code `appOL` x_code
2305 `appOL` toOL [ DIV fmt sgn reg_q x_reg y_reg
2306 , MULL fmt reg_r reg_q (RIReg y_reg)
2307 , SUBF reg_r reg_r x_reg
2308 ]
2309
2310
2311 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2312 coerceInt2FP fromRep toRep x = do
2313 dflags <- getDynFlags
2314 let arch = platformArch $ targetPlatform dflags
2315 coerceInt2FP' arch fromRep toRep x
2316
2317 coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
2318 coerceInt2FP' ArchPPC fromRep toRep x = do
2319 (src, code) <- getSomeReg x
2320 lbl <- getNewLabelNat
2321 itmp <- getNewRegNat II32
2322 ftmp <- getNewRegNat FF64
2323 dflags <- getDynFlags
2324 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2325 Amode addr addr_code <- getAmode D dynRef
2326 let
2327 code' dst = code `appOL` maybe_exts `appOL` toOL [
2328 LDATA (Section ReadOnlyData lbl) $ Statics lbl
2329 [CmmStaticLit (CmmInt 0x43300000 W32),
2330 CmmStaticLit (CmmInt 0x80000000 W32)],
2331 XORIS itmp src (ImmInt 0x8000),
2332 ST II32 itmp (spRel dflags 3),
2333 LIS itmp (ImmInt 0x4330),
2334 ST II32 itmp (spRel dflags 2),
2335 LD FF64 ftmp (spRel dflags 2)
2336 ] `appOL` addr_code `appOL` toOL [
2337 LD FF64 dst addr,
2338 FSUB FF64 dst ftmp dst
2339 ] `appOL` maybe_frsp dst
2340
2341 maybe_exts = case fromRep of
2342 W8 -> unitOL $ EXTS II8 src src
2343 W16 -> unitOL $ EXTS II16 src src
2344 W32 -> nilOL
2345 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2346
2347 maybe_frsp dst
2348 = case toRep of
2349 W32 -> unitOL $ FRSP dst dst
2350 W64 -> nilOL
2351 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2352
2353 return (Any (floatFormat toRep) code')
2354
2355 -- On an ELF v1 Linux we use the compiler doubleword in the stack frame
2356 -- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only
2357 -- set right before a call and restored right after return from the call.
2358 -- So it is fine.
2359 coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do
2360 (src, code) <- getSomeReg x
2361 dflags <- getDynFlags
2362 let
2363 code' dst = code `appOL` maybe_exts `appOL` toOL [
2364 ST II64 src (spRel dflags 3),
2365 LD FF64 dst (spRel dflags 3),
2366 FCFID dst dst
2367 ] `appOL` maybe_frsp dst
2368
2369 maybe_exts = case fromRep of
2370 W8 -> unitOL $ EXTS II8 src src
2371 W16 -> unitOL $ EXTS II16 src src
2372 W32 -> unitOL $ EXTS II32 src src
2373 W64 -> nilOL
2374 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2375
2376 maybe_frsp dst
2377 = case toRep of
2378 W32 -> unitOL $ FRSP dst dst
2379 W64 -> nilOL
2380 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2381
2382 return (Any (floatFormat toRep) code')
2383
2384 coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch"
2385
2386
2387 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2388 coerceFP2Int fromRep toRep x = do
2389 dflags <- getDynFlags
2390 let arch = platformArch $ targetPlatform dflags
2391 coerceFP2Int' arch fromRep toRep x
2392
2393 coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
2394 coerceFP2Int' ArchPPC _ toRep x = do
2395 dflags <- getDynFlags
2396 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
2397 (src, code) <- getSomeReg x
2398 tmp <- getNewRegNat FF64
2399 let
2400 code' dst = code `appOL` toOL [
2401 -- convert to int in FP reg
2402 FCTIWZ tmp src,
2403 -- store value (64bit) from FP to stack
2404 ST FF64 tmp (spRel dflags 2),
2405 -- read low word of value (high word is undefined)
2406 LD II32 dst (spRel dflags 3)]
2407 return (Any (intFormat toRep) code')
2408
2409 coerceFP2Int' (ArchPPC_64 _) _ toRep x = do
2410 dflags <- getDynFlags
2411 -- the reps don't really matter: F*->FF64 and II64->I* are no-ops
2412 (src, code) <- getSomeReg x
2413 tmp <- getNewRegNat FF64
2414 let
2415 code' dst = code `appOL` toOL [
2416 -- convert to int in FP reg
2417 FCTIDZ tmp src,
2418 -- store value (64bit) from FP to compiler word on stack
2419 ST FF64 tmp (spRel dflags 3),
2420 LD II64 dst (spRel dflags 3)]
2421 return (Any (intFormat toRep) code')
2422
2423 coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch"
2424
2425 -- Note [.LCTOC1 in PPC PIC code]
2426 -- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
2427 -- to make the most of the PPC's 16-bit displacements.
2428 -- As 16-bit signed offset is used (usually via addi/lwz instructions)
2429 -- first element will have '-32768' offset against .LCTOC1.
2430
2431 -- Note [implicit register in PPC PIC code]
2432 -- PPC generates calls by labels in assembly
2433 -- in form of:
2434 -- bl puts+32768@plt
2435 -- in this form it's not seen directly (by GHC NCG)
2436 -- that r30 (PicBaseReg) is used,
2437 -- but r30 is a required part of PLT code setup:
2438 -- puts+32768@plt:
2439 -- lwz r11,-30484(r30) ; offset in .LCTOC1
2440 -- mtctr r11
2441 -- bctr