PPC NCG: Use liveness information in CmmCall
[ghc.git] / compiler / nativeGen / PPC / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2004
8 --
9 -----------------------------------------------------------------------------
10
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, and (b) the type signatures,
13 -- the structure should not be too overwhelming.
14
15 module PPC.CodeGen (
16 cmmTopCodeGen,
17 generateJumpTableForInstr,
18 InstrBlock
19 )
20
21 where
22
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
25 #include "../includes/MachDeps.h"
26
27 -- NCG stuff:
28 import GhcPrelude
29
30 import CodeGen.Platform
31 import PPC.Instr
32 import PPC.Cond
33 import PPC.Regs
34 import CPrim
35 import NCGMonad ( NatM, getNewRegNat, getNewLabelNat
36 , getBlockIdNat, getPicBaseNat, getNewRegPairNat
37 , getPicBaseMaybeNat )
38 import Instruction
39 import PIC
40 import Format
41 import RegClass
42 import Reg
43 import TargetReg
44 import Platform
45
46 -- Our intermediate code:
47 import BlockId
48 import PprCmm ( pprExpr )
49 import Cmm
50 import CmmUtils
51 import CmmSwitch
52 import CLabel
53 import Hoopl.Block
54 import Hoopl.Graph
55
56 -- The rest:
57 import OrdList
58 import Outputable
59 import DynFlags
60
61 import Control.Monad ( mapAndUnzipM, when )
62 import Data.Bits
63 import Data.Word
64
65 import BasicTypes
66 import FastString
67 import Util
68
69 -- -----------------------------------------------------------------------------
70 -- Top-level of the instruction selector
71
72 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
73 -- They are really trees of insns to facilitate fast appending, where a
74 -- left-to-right traversal (pre-order?) yields the insns in the correct
75 -- order.
76
77 cmmTopCodeGen
78 :: RawCmmDecl
79 -> NatM [NatCmmDecl CmmStatics Instr]
80
81 cmmTopCodeGen (CmmProc info lab live graph) = do
82 let blocks = toBlockListEntryFirst graph
83 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
84 dflags <- getDynFlags
85 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
86 tops = proc : concat statics
87 os = platformOS $ targetPlatform dflags
88 arch = platformArch $ targetPlatform dflags
89 case arch of
90 ArchPPC | os == OSAIX -> return tops
91 | otherwise -> do
92 picBaseMb <- getPicBaseMaybeNat
93 case picBaseMb of
94 Just picBase -> initializePicBase_ppc arch os picBase tops
95 Nothing -> return tops
96 ArchPPC_64 ELF_V1 -> fixup_entry tops
97 -- generating function descriptor is handled in
98 -- pretty printer
99 ArchPPC_64 ELF_V2 -> fixup_entry tops
100 -- generating function prologue is handled in
101 -- pretty printer
102 _ -> panic "PPC.cmmTopCodeGen: unknown arch"
103 where
104 fixup_entry (CmmProc info lab live (ListGraph (entry:blocks)) : statics)
105 = do
106 let BasicBlock bID insns = entry
107 bID' <- if lab == (blockLbl bID)
108 then newBlockId
109 else return bID
110 let b' = BasicBlock bID' insns
111 return (CmmProc info lab live (ListGraph (b':blocks)) : statics)
112 fixup_entry _ = panic "cmmTopCodegen: Broken CmmProc"
113
114 cmmTopCodeGen (CmmData sec dat) = do
115 return [CmmData sec dat] -- no translation, we just use CmmStatic
116
117 basicBlockCodeGen
118 :: Block CmmNode C C
119 -> NatM ( [NatBasicBlock Instr]
120 , [NatCmmDecl CmmStatics Instr])
121
122 basicBlockCodeGen block = do
123 let (_, nodes, tail) = blockSplit block
124 id = entryLabel block
125 stmts = blockToList nodes
126 mid_instrs <- stmtsToInstrs stmts
127 tail_instrs <- stmtToInstrs tail
128 let instrs = mid_instrs `appOL` tail_instrs
129 -- code generation may introduce new basic block boundaries, which
130 -- are indicated by the NEWBLOCK instruction. We must split up the
131 -- instruction stream into basic blocks again. Also, we extract
132 -- LDATAs here too.
133 let
134 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
135
136 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
137 = ([], BasicBlock id instrs : blocks, statics)
138 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
139 = (instrs, blocks, CmmData sec dat:statics)
140 mkBlocks instr (instrs,blocks,statics)
141 = (instr:instrs, blocks, statics)
142 return (BasicBlock id top : other_blocks, statics)
143
144 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
145 stmtsToInstrs stmts
146 = do instrss <- mapM stmtToInstrs stmts
147 return (concatOL instrss)
148
149 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
150 stmtToInstrs stmt = do
151 dflags <- getDynFlags
152 case stmt of
153 CmmComment s -> return (unitOL (COMMENT s))
154 CmmTick {} -> return nilOL
155 CmmUnwind {} -> return nilOL
156
157 CmmAssign reg src
158 | isFloatType ty -> assignReg_FltCode format reg src
159 | target32Bit (targetPlatform dflags) &&
160 isWord64 ty -> assignReg_I64Code reg src
161 | otherwise -> assignReg_IntCode format reg src
162 where ty = cmmRegType dflags reg
163 format = cmmTypeFormat ty
164
165 CmmStore addr src
166 | isFloatType ty -> assignMem_FltCode format addr src
167 | target32Bit (targetPlatform dflags) &&
168 isWord64 ty -> assignMem_I64Code addr src
169 | otherwise -> assignMem_IntCode format addr src
170 where ty = cmmExprType dflags src
171 format = cmmTypeFormat ty
172
173 CmmUnsafeForeignCall target result_regs args
174 -> genCCall target result_regs args
175
176 CmmBranch id -> genBranch id
177 CmmCondBranch arg true false prediction -> do
178 b1 <- genCondJump true arg prediction
179 b2 <- genBranch false
180 return (b1 `appOL` b2)
181 CmmSwitch arg ids -> do dflags <- getDynFlags
182 genSwitch dflags arg ids
183 CmmCall { cml_target = arg
184 , cml_args_regs = gregs } -> do
185 dflags <- getDynFlags
186 genJump arg (jumpRegs dflags gregs)
187 _ ->
188 panic "stmtToInstrs: statement should have been cps'd away"
189
190 jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
191 jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
192 where platform = targetPlatform dflags
193
194 --------------------------------------------------------------------------------
195 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
196 -- They are really trees of insns to facilitate fast appending, where a
197 -- left-to-right traversal yields the insns in the correct order.
198 --
199 type InstrBlock
200 = OrdList Instr
201
202
203 -- | Register's passed up the tree. If the stix code forces the register
204 -- to live in a pre-decided machine register, it comes out as @Fixed@;
205 -- otherwise, it comes out as @Any@, and the parent can decide which
206 -- register to put it in.
207 --
208 data Register
209 = Fixed Format Reg InstrBlock
210 | Any Format (Reg -> InstrBlock)
211
212
213 swizzleRegisterRep :: Register -> Format -> Register
214 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
215 swizzleRegisterRep (Any _ codefn) format = Any format codefn
216
217
218 -- | Grab the Reg for a CmmReg
219 getRegisterReg :: Platform -> CmmReg -> Reg
220
221 getRegisterReg _ (CmmLocal (LocalReg u pk))
222 = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
223
224 getRegisterReg platform (CmmGlobal mid)
225 = case globalRegMaybe platform mid of
226 Just reg -> RegReal reg
227 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
228 -- By this stage, the only MagicIds remaining should be the
229 -- ones which map to a real machine register on this
230 -- platform. Hence ...
231
232 -- | Convert a BlockId to some CmmStatic data
233 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
234 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
235 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
236 where blockLabel = blockLbl blockid
237
238
239
240 -- -----------------------------------------------------------------------------
241 -- General things for putting together code sequences
242
243 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
244 -- CmmExprs into CmmRegOff?
245 mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr
246 mangleIndexTree dflags (CmmRegOff reg off)
247 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
248 where width = typeWidth (cmmRegType dflags reg)
249
250 mangleIndexTree _ _
251 = panic "PPC.CodeGen.mangleIndexTree: no match"
252
253 -- -----------------------------------------------------------------------------
254 -- Code gen for 64-bit arithmetic on 32-bit platforms
255
256 {-
257 Simple support for generating 64-bit code (ie, 64 bit values and 64
258 bit assignments) on 32-bit platforms. Unlike the main code generator
259 we merely shoot for generating working code as simply as possible, and
260 pay little attention to code quality. Specifically, there is no
261 attempt to deal cleverly with the fixed-vs-floating register
262 distinction; all values are generated into (pairs of) floating
263 registers, even if this would mean some redundant reg-reg moves as a
264 result. Only one of the VRegUniques is returned, since it will be
265 of the VRegUniqueLo form, and the upper-half VReg can be determined
266 by applying getHiVRegFromLo to it.
267 -}
268
269 data ChildCode64 -- a.k.a "Register64"
270 = ChildCode64
271 InstrBlock -- code
272 Reg -- the lower 32-bit temporary which contains the
273 -- result; use getHiVRegFromLo to find the other
274 -- VRegUnique. Rules of this simplified insn
275 -- selection game are therefore that the returned
276 -- Reg may be modified
277
278
279 -- | Compute an expression into a register, but
280 -- we don't mind which one it is.
281 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
282 getSomeReg expr = do
283 r <- getRegister expr
284 case r of
285 Any rep code -> do
286 tmp <- getNewRegNat rep
287 return (tmp, code tmp)
288 Fixed _ reg code ->
289 return (reg, code)
290
291 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
292 getI64Amodes addrTree = do
293 Amode hi_addr addr_code <- getAmode D addrTree
294 case addrOffset hi_addr 4 of
295 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
296 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
297 return (AddrRegImm hi_ptr (ImmInt 0),
298 AddrRegImm hi_ptr (ImmInt 4),
299 code)
300
301
302 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
303 assignMem_I64Code addrTree valueTree = do
304 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
305 ChildCode64 vcode rlo <- iselExpr64 valueTree
306 let
307 rhi = getHiVRegFromLo rlo
308
309 -- Big-endian store
310 mov_hi = ST II32 rhi hi_addr
311 mov_lo = ST II32 rlo lo_addr
312 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
313
314
315 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
316 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
317 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
318 let
319 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
320 r_dst_hi = getHiVRegFromLo r_dst_lo
321 r_src_hi = getHiVRegFromLo r_src_lo
322 mov_lo = MR r_dst_lo r_src_lo
323 mov_hi = MR r_dst_hi r_src_hi
324 return (
325 vcode `snocOL` mov_lo `snocOL` mov_hi
326 )
327
328 assignReg_I64Code _ _
329 = panic "assignReg_I64Code(powerpc): invalid lvalue"
330
331
332 iselExpr64 :: CmmExpr -> NatM ChildCode64
333 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
334 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
335 (rlo, rhi) <- getNewRegPairNat II32
336 let mov_hi = LD II32 rhi hi_addr
337 mov_lo = LD II32 rlo lo_addr
338 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
339 rlo
340
341 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
342 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
343
344 iselExpr64 (CmmLit (CmmInt i _)) = do
345 (rlo,rhi) <- getNewRegPairNat II32
346 let
347 half0 = fromIntegral (fromIntegral i :: Word16)
348 half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
349 half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16)
350 half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16)
351
352 code = toOL [
353 LIS rlo (ImmInt half1),
354 OR rlo rlo (RIImm $ ImmInt half0),
355 LIS rhi (ImmInt half3),
356 OR rhi rhi (RIImm $ ImmInt half2)
357 ]
358 return (ChildCode64 code rlo)
359
360 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
361 ChildCode64 code1 r1lo <- iselExpr64 e1
362 ChildCode64 code2 r2lo <- iselExpr64 e2
363 (rlo,rhi) <- getNewRegPairNat II32
364 let
365 r1hi = getHiVRegFromLo r1lo
366 r2hi = getHiVRegFromLo r2lo
367 code = code1 `appOL`
368 code2 `appOL`
369 toOL [ ADDC rlo r1lo r2lo,
370 ADDE rhi r1hi r2hi ]
371 return (ChildCode64 code rlo)
372
373 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
374 ChildCode64 code1 r1lo <- iselExpr64 e1
375 ChildCode64 code2 r2lo <- iselExpr64 e2
376 (rlo,rhi) <- getNewRegPairNat II32
377 let
378 r1hi = getHiVRegFromLo r1lo
379 r2hi = getHiVRegFromLo r2lo
380 code = code1 `appOL`
381 code2 `appOL`
382 toOL [ SUBFC rlo r2lo (RIReg r1lo),
383 SUBFE rhi r2hi r1hi ]
384 return (ChildCode64 code rlo)
385
386 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
387 (expr_reg,expr_code) <- getSomeReg expr
388 (rlo, rhi) <- getNewRegPairNat II32
389 let mov_hi = LI rhi (ImmInt 0)
390 mov_lo = MR rlo expr_reg
391 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
392 rlo
393
394 iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
395 (expr_reg,expr_code) <- getSomeReg expr
396 (rlo, rhi) <- getNewRegPairNat II32
397 let mov_hi = SRA II32 rhi expr_reg (RIImm (ImmInt 31))
398 mov_lo = MR rlo expr_reg
399 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
400 rlo
401 iselExpr64 expr
402 = pprPanic "iselExpr64(powerpc)" (pprExpr expr)
403
404
405
406 getRegister :: CmmExpr -> NatM Register
407 getRegister e = do dflags <- getDynFlags
408 getRegister' dflags e
409
410 getRegister' :: DynFlags -> CmmExpr -> NatM Register
411
412 getRegister' dflags (CmmReg (CmmGlobal PicBaseReg))
413 | OSAIX <- platformOS (targetPlatform dflags) = do
414 let code dst = toOL [ LD II32 dst tocAddr ]
415 tocAddr = AddrRegImm toc (ImmLit (text "ghc_toc_table[TC]"))
416 return (Any II32 code)
417 | target32Bit (targetPlatform dflags) = do
418 reg <- getPicBaseNat $ archWordFormat (target32Bit (targetPlatform dflags))
419 return (Fixed (archWordFormat (target32Bit (targetPlatform dflags)))
420 reg nilOL)
421 | otherwise = return (Fixed II64 toc nilOL)
422
423 getRegister' dflags (CmmReg reg)
424 = return (Fixed (cmmTypeFormat (cmmRegType dflags reg))
425 (getRegisterReg (targetPlatform dflags) reg) nilOL)
426
427 getRegister' dflags tree@(CmmRegOff _ _)
428 = getRegister' dflags (mangleIndexTree dflags tree)
429
430 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
431 -- TO_W_(x), TO_W_(x >> 32)
432
433 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32)
434 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
435 | target32Bit (targetPlatform dflags) = do
436 ChildCode64 code rlo <- iselExpr64 x
437 return $ Fixed II32 (getHiVRegFromLo rlo) code
438
439 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32)
440 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
441 | target32Bit (targetPlatform dflags) = do
442 ChildCode64 code rlo <- iselExpr64 x
443 return $ Fixed II32 (getHiVRegFromLo rlo) code
444
445 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x])
446 | target32Bit (targetPlatform dflags) = do
447 ChildCode64 code rlo <- iselExpr64 x
448 return $ Fixed II32 rlo code
449
450 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x])
451 | target32Bit (targetPlatform dflags) = do
452 ChildCode64 code rlo <- iselExpr64 x
453 return $ Fixed II32 rlo code
454
455 getRegister' dflags (CmmLoad mem pk)
456 | not (isWord64 pk) = do
457 let platform = targetPlatform dflags
458 Amode addr addr_code <- getAmode D mem
459 let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk)
460 addr_code `snocOL` LD format dst addr
461 return (Any format code)
462 | not (target32Bit (targetPlatform dflags)) = do
463 Amode addr addr_code <- getAmode DS mem
464 let code dst = addr_code `snocOL` LD II64 dst addr
465 return (Any II64 code)
466
467 where format = cmmTypeFormat pk
468
469 -- catch simple cases of zero- or sign-extended load
470 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
471 Amode addr addr_code <- getAmode D mem
472 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
473
474 getRegister' _ (CmmMachOp (MO_XX_Conv W8 W32) [CmmLoad mem _]) = do
475 Amode addr addr_code <- getAmode D mem
476 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
477
478 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
479 Amode addr addr_code <- getAmode D mem
480 return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
481
482 getRegister' _ (CmmMachOp (MO_XX_Conv W8 W64) [CmmLoad mem _]) = do
483 Amode addr addr_code <- getAmode D mem
484 return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
485
486 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
487
488 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
489 Amode addr addr_code <- getAmode D mem
490 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
491
492 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
493 Amode addr addr_code <- getAmode D mem
494 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
495
496 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
497 Amode addr addr_code <- getAmode D mem
498 return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
499
500 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
501 Amode addr addr_code <- getAmode D mem
502 return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
503
504 getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
505 Amode addr addr_code <- getAmode D mem
506 return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
507
508 getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
509 -- lwa is DS-form. See Note [Power instruction format]
510 Amode addr addr_code <- getAmode DS mem
511 return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
512
513 getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
514 = case mop of
515 MO_Not rep -> triv_ucode_int rep NOT
516
517 MO_F_Neg w -> triv_ucode_float w FNEG
518 MO_S_Neg w -> triv_ucode_int w NEG
519
520 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
521 MO_FF_Conv W32 W64 -> conversionNop FF64 x
522
523 MO_FS_Conv from to -> coerceFP2Int from to x
524 MO_SF_Conv from to -> coerceInt2FP from to x
525
526 MO_SS_Conv from to
527 | from >= to -> conversionNop (intFormat to) x
528 | otherwise -> triv_ucode_int to (EXTS (intFormat from))
529
530 MO_UU_Conv from to
531 | from >= to -> conversionNop (intFormat to) x
532 | otherwise -> clearLeft from to
533
534 MO_XX_Conv _ to -> conversionNop (intFormat to) x
535
536 _ -> panic "PPC.CodeGen.getRegister: no match"
537
538 where
539 triv_ucode_int width instr = trivialUCode (intFormat width) instr x
540 triv_ucode_float width instr = trivialUCode (floatFormat width) instr x
541
542 conversionNop new_format expr
543 = do e_code <- getRegister' dflags expr
544 return (swizzleRegisterRep e_code new_format)
545
546 clearLeft from to
547 = do (src1, code1) <- getSomeReg x
548 let arch_fmt = intFormat (wordWidth dflags)
549 arch_bits = widthInBits (wordWidth dflags)
550 size = widthInBits from
551 code dst = code1 `snocOL`
552 CLRLI arch_fmt dst src1 (arch_bits - size)
553 return (Any (intFormat to) code)
554
555 getRegister' _ (CmmMachOp mop [x, y]) -- dyadic PrimOps
556 = case mop of
557 MO_F_Eq _ -> condFltReg EQQ x y
558 MO_F_Ne _ -> condFltReg NE x y
559 MO_F_Gt _ -> condFltReg GTT x y
560 MO_F_Ge _ -> condFltReg GE x y
561 MO_F_Lt _ -> condFltReg LTT x y
562 MO_F_Le _ -> condFltReg LE x y
563
564 MO_Eq rep -> condIntReg EQQ rep x y
565 MO_Ne rep -> condIntReg NE rep x y
566
567 MO_S_Gt rep -> condIntReg GTT rep x y
568 MO_S_Ge rep -> condIntReg GE rep x y
569 MO_S_Lt rep -> condIntReg LTT rep x y
570 MO_S_Le rep -> condIntReg LE rep x y
571
572 MO_U_Gt rep -> condIntReg GU rep x y
573 MO_U_Ge rep -> condIntReg GEU rep x y
574 MO_U_Lt rep -> condIntReg LU rep x y
575 MO_U_Le rep -> condIntReg LEU rep x y
576
577 MO_F_Add w -> triv_float w FADD
578 MO_F_Sub w -> triv_float w FSUB
579 MO_F_Mul w -> triv_float w FMUL
580 MO_F_Quot w -> triv_float w FDIV
581
582 -- optimize addition with 32-bit immediate
583 -- (needed for PIC)
584 MO_Add W32 ->
585 case y of
586 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True imm
587 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
588 CmmLit lit
589 -> do
590 (src, srcCode) <- getSomeReg x
591 let imm = litToImm lit
592 code dst = srcCode `appOL` toOL [
593 ADDIS dst src (HA imm),
594 ADD dst dst (RIImm (LO imm))
595 ]
596 return (Any II32 code)
597 _ -> trivialCode W32 True ADD x y
598
599 MO_Add rep -> trivialCode rep True ADD x y
600 MO_Sub rep ->
601 case y of
602 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
603 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
604 _ -> case x of
605 CmmLit (CmmInt imm _)
606 | Just _ <- makeImmediate rep True imm
607 -- subfi ('substract from' with immediate) doesn't exist
608 -> trivialCode rep True SUBFC y x
609 _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
610
611 MO_Mul rep -> shiftMulCode rep True MULL x y
612 MO_S_MulMayOflo rep -> do
613 (src1, code1) <- getSomeReg x
614 (src2, code2) <- getSomeReg y
615 let
616 format = intFormat rep
617 code dst = code1 `appOL` code2
618 `appOL` toOL [ MULLO format dst src1 src2
619 , MFOV format dst
620 ]
621 return (Any format code)
622
623 MO_S_Quot rep -> divCode rep True x y
624 MO_U_Quot rep -> divCode rep False x y
625
626 MO_S_Rem rep -> remainder rep True x y
627 MO_U_Rem rep -> remainder rep False x y
628
629 MO_And rep -> case y of
630 (CmmLit (CmmInt imm _)) | imm == -8 || imm == -4
631 -> do
632 (src, srcCode) <- getSomeReg x
633 let clear_mask = if imm == -4 then 2 else 3
634 fmt = intFormat rep
635 code dst = srcCode
636 `appOL` unitOL (CLRRI fmt dst src clear_mask)
637 return (Any fmt code)
638 _ -> trivialCode rep False AND x y
639 MO_Or rep -> trivialCode rep False OR x y
640 MO_Xor rep -> trivialCode rep False XOR x y
641
642 MO_Shl rep -> shiftMulCode rep False SL x y
643 MO_S_Shr rep -> srCode rep True SRA x y
644 MO_U_Shr rep -> srCode rep False SR x y
645 _ -> panic "PPC.CodeGen.getRegister: no match"
646
647 where
648 triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register
649 triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y
650
651 remainder :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register
652 remainder rep sgn x y = do
653 let fmt = intFormat rep
654 tmp <- getNewRegNat fmt
655 code <- remainderCode rep sgn tmp x y
656 return (Any fmt code)
657
658
659 getRegister' _ (CmmLit (CmmInt i rep))
660 | Just imm <- makeImmediate rep True i
661 = let
662 code dst = unitOL (LI dst imm)
663 in
664 return (Any (intFormat rep) code)
665
666 getRegister' _ (CmmLit (CmmFloat f frep)) = do
667 lbl <- getNewLabelNat
668 dflags <- getDynFlags
669 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
670 Amode addr addr_code <- getAmode D dynRef
671 let format = floatFormat frep
672 code dst =
673 LDATA (Section ReadOnlyData lbl)
674 (Statics lbl [CmmStaticLit (CmmFloat f frep)])
675 `consOL` (addr_code `snocOL` LD format dst addr)
676 return (Any format code)
677
678 getRegister' dflags (CmmLit lit)
679 | target32Bit (targetPlatform dflags)
680 = let rep = cmmLitType dflags lit
681 imm = litToImm lit
682 code dst = toOL [
683 LIS dst (HA imm),
684 ADD dst dst (RIImm (LO imm))
685 ]
686 in return (Any (cmmTypeFormat rep) code)
687 | otherwise
688 = do lbl <- getNewLabelNat
689 dflags <- getDynFlags
690 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
691 Amode addr addr_code <- getAmode D dynRef
692 let rep = cmmLitType dflags lit
693 format = cmmTypeFormat rep
694 code dst =
695 LDATA (Section ReadOnlyData lbl) (Statics lbl [CmmStaticLit lit])
696 `consOL` (addr_code `snocOL` LD format dst addr)
697 return (Any format code)
698
699 getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other)
700
701 -- extend?Rep: wrap integer expression of type `from`
702 -- in a conversion to `to`
703 extendSExpr :: Width -> Width -> CmmExpr -> CmmExpr
704 extendSExpr from to x = CmmMachOp (MO_SS_Conv from to) [x]
705
706 extendUExpr :: Width -> Width -> CmmExpr -> CmmExpr
707 extendUExpr from to x = CmmMachOp (MO_UU_Conv from to) [x]
708
709 -- -----------------------------------------------------------------------------
710 -- The 'Amode' type: Memory addressing modes passed up the tree.
711
712 data Amode
713 = Amode AddrMode InstrBlock
714
715 {-
716 Now, given a tree (the argument to a CmmLoad) that references memory,
717 produce a suitable addressing mode.
718
719 A Rule of the Game (tm) for Amodes: use of the addr bit must
720 immediately follow use of the code part, since the code part puts
721 values in registers which the addr then refers to. So you can't put
722 anything in between, lest it overwrite some of those registers. If
723 you need to do some other computation between the code part and use of
724 the addr bit, first store the effective address from the amode in a
725 temporary, then do the other computation, and then use the temporary:
726
727 code
728 LEA amode, tmp
729 ... other computation ...
730 ... (tmp) ...
731 -}
732
733 {- Note [Power instruction format]
734 In some instructions the 16 bit offset must be a multiple of 4, i.e.
735 the two least significant bits must be zero. The "Power ISA" specification
736 calls these instruction formats "DS-FORM" and the instructions with
737 arbitrary 16 bit offsets are "D-FORM".
738
739 The Power ISA specification document can be obtained from www.power.org.
740 -}
741 data InstrForm = D | DS
742
743 getAmode :: InstrForm -> CmmExpr -> NatM Amode
744 getAmode inf tree@(CmmRegOff _ _)
745 = do dflags <- getDynFlags
746 getAmode inf (mangleIndexTree dflags tree)
747
748 getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
749 | Just off <- makeImmediate W32 True (-i)
750 = do
751 (reg, code) <- getSomeReg x
752 return (Amode (AddrRegImm reg off) code)
753
754
755 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
756 | Just off <- makeImmediate W32 True i
757 = do
758 (reg, code) <- getSomeReg x
759 return (Amode (AddrRegImm reg off) code)
760
761 getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
762 | Just off <- makeImmediate W64 True (-i)
763 = do
764 (reg, code) <- getSomeReg x
765 return (Amode (AddrRegImm reg off) code)
766
767
768 getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
769 | Just off <- makeImmediate W64 True i
770 = do
771 (reg, code) <- getSomeReg x
772 return (Amode (AddrRegImm reg off) code)
773
774 getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
775 | Just off <- makeImmediate W64 True (-i)
776 = do
777 (reg, code) <- getSomeReg x
778 (reg', off', code') <-
779 if i `mod` 4 == 0
780 then do return (reg, off, code)
781 else do
782 tmp <- getNewRegNat II64
783 return (tmp, ImmInt 0,
784 code `snocOL` ADD tmp reg (RIImm off))
785 return (Amode (AddrRegImm reg' off') code')
786
787 getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
788 | Just off <- makeImmediate W64 True i
789 = do
790 (reg, code) <- getSomeReg x
791 (reg', off', code') <-
792 if i `mod` 4 == 0
793 then do return (reg, off, code)
794 else do
795 tmp <- getNewRegNat II64
796 return (tmp, ImmInt 0,
797 code `snocOL` ADD tmp reg (RIImm off))
798 return (Amode (AddrRegImm reg' off') code')
799
800 -- optimize addition with 32-bit immediate
801 -- (needed for PIC)
802 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit])
803 = do
804 dflags <- getDynFlags
805 (src, srcCode) <- getSomeReg x
806 let imm = litToImm lit
807 case () of
808 _ | OSAIX <- platformOS (targetPlatform dflags)
809 , isCmmLabelType lit ->
810 -- HA16/LO16 relocations on labels not supported on AIX
811 return (Amode (AddrRegImm src imm) srcCode)
812 | otherwise -> do
813 tmp <- getNewRegNat II32
814 let code = srcCode `snocOL` ADDIS tmp src (HA imm)
815 return (Amode (AddrRegImm tmp (LO imm)) code)
816 where
817 isCmmLabelType (CmmLabel {}) = True
818 isCmmLabelType (CmmLabelOff {}) = True
819 isCmmLabelType (CmmLabelDiffOff {}) = True
820 isCmmLabelType _ = False
821
822 getAmode _ (CmmLit lit)
823 = do
824 dflags <- getDynFlags
825 case platformArch $ targetPlatform dflags of
826 ArchPPC -> do
827 tmp <- getNewRegNat II32
828 let imm = litToImm lit
829 code = unitOL (LIS tmp (HA imm))
830 return (Amode (AddrRegImm tmp (LO imm)) code)
831 _ -> do -- TODO: Load from TOC,
832 -- see getRegister' _ (CmmLit lit)
833 tmp <- getNewRegNat II64
834 let imm = litToImm lit
835 code = toOL [
836 LIS tmp (HIGHESTA imm),
837 OR tmp tmp (RIImm (HIGHERA imm)),
838 SL II64 tmp tmp (RIImm (ImmInt 32)),
839 ORIS tmp tmp (HA imm)
840 ]
841 return (Amode (AddrRegImm tmp (LO imm)) code)
842
843 getAmode _ (CmmMachOp (MO_Add W32) [x, y])
844 = do
845 (regX, codeX) <- getSomeReg x
846 (regY, codeY) <- getSomeReg y
847 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
848
849 getAmode _ (CmmMachOp (MO_Add W64) [x, y])
850 = do
851 (regX, codeX) <- getSomeReg x
852 (regY, codeY) <- getSomeReg y
853 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
854
855 getAmode _ other
856 = do
857 (reg, code) <- getSomeReg other
858 let
859 off = ImmInt 0
860 return (Amode (AddrRegImm reg off) code)
861
862
863 -- The 'CondCode' type: Condition codes passed up the tree.
864 data CondCode
865 = CondCode Bool Cond InstrBlock
866
867 -- Set up a condition code for a conditional branch.
868
869 getCondCode :: CmmExpr -> NatM CondCode
870
871 -- almost the same as everywhere else - but we need to
872 -- extend small integers to 32 bit or 64 bit first
873
874 getCondCode (CmmMachOp mop [x, y])
875 = do
876 case mop of
877 MO_F_Eq W32 -> condFltCode EQQ x y
878 MO_F_Ne W32 -> condFltCode NE x y
879 MO_F_Gt W32 -> condFltCode GTT x y
880 MO_F_Ge W32 -> condFltCode GE x y
881 MO_F_Lt W32 -> condFltCode LTT x y
882 MO_F_Le W32 -> condFltCode LE x y
883
884 MO_F_Eq W64 -> condFltCode EQQ x y
885 MO_F_Ne W64 -> condFltCode NE x y
886 MO_F_Gt W64 -> condFltCode GTT x y
887 MO_F_Ge W64 -> condFltCode GE x y
888 MO_F_Lt W64 -> condFltCode LTT x y
889 MO_F_Le W64 -> condFltCode LE x y
890
891 MO_Eq rep -> condIntCode EQQ rep x y
892 MO_Ne rep -> condIntCode NE rep x y
893
894 MO_S_Gt rep -> condIntCode GTT rep x y
895 MO_S_Ge rep -> condIntCode GE rep x y
896 MO_S_Lt rep -> condIntCode LTT rep x y
897 MO_S_Le rep -> condIntCode LE rep x y
898
899 MO_U_Gt rep -> condIntCode GU rep x y
900 MO_U_Ge rep -> condIntCode GEU rep x y
901 MO_U_Lt rep -> condIntCode LU rep x y
902 MO_U_Le rep -> condIntCode LEU rep x y
903
904 _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
905
906 getCondCode _ = panic "getCondCode(2)(powerpc)"
907
908
909 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
910 -- passed back up the tree.
911
912 condIntCode :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode
913 condIntCode cond width x y = do
914 dflags <- getDynFlags
915 condIntCode' (target32Bit (targetPlatform dflags)) cond width x y
916
917 condIntCode' :: Bool -> Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode
918
919 -- simple code for 64-bit on 32-bit platforms
920 condIntCode' True cond W64 x y
921 | condUnsigned cond
922 = do
923 ChildCode64 code_x x_lo <- iselExpr64 x
924 ChildCode64 code_y y_lo <- iselExpr64 y
925 let x_hi = getHiVRegFromLo x_lo
926 y_hi = getHiVRegFromLo y_lo
927 end_lbl <- getBlockIdNat
928 let code = code_x `appOL` code_y `appOL` toOL
929 [ CMPL II32 x_hi (RIReg y_hi)
930 , BCC NE end_lbl Nothing
931 , CMPL II32 x_lo (RIReg y_lo)
932 , BCC ALWAYS end_lbl Nothing
933
934 , NEWBLOCK end_lbl
935 ]
936 return (CondCode False cond code)
937 | otherwise
938 = do
939 ChildCode64 code_x x_lo <- iselExpr64 x
940 ChildCode64 code_y y_lo <- iselExpr64 y
941 let x_hi = getHiVRegFromLo x_lo
942 y_hi = getHiVRegFromLo y_lo
943 end_lbl <- getBlockIdNat
944 cmp_lo <- getBlockIdNat
945 let code = code_x `appOL` code_y `appOL` toOL
946 [ CMP II32 x_hi (RIReg y_hi)
947 , BCC NE end_lbl Nothing
948 , CMP II32 x_hi (RIImm (ImmInt 0))
949 , BCC LE cmp_lo Nothing
950 , CMPL II32 x_lo (RIReg y_lo)
951 , BCC ALWAYS end_lbl Nothing
952 , CMPL II32 y_lo (RIReg x_lo)
953 , BCC ALWAYS end_lbl Nothing
954
955 , NEWBLOCK end_lbl
956 ]
957 return (CondCode False cond code)
958
959 -- optimize pointer tag checks. Operation andi. sets condition register
960 -- so cmpi ..., 0 is redundant.
961 condIntCode' _ cond _ (CmmMachOp (MO_And _) [x, CmmLit (CmmInt imm rep)])
962 (CmmLit (CmmInt 0 _))
963 | not $ condUnsigned cond,
964 Just src2 <- makeImmediate rep False imm
965 = do
966 (src1, code) <- getSomeReg x
967 let code' = code `snocOL` AND r0 src1 (RIImm src2)
968 return (CondCode False cond code')
969
970 condIntCode' _ cond width x (CmmLit (CmmInt y rep))
971 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
972 = do
973 let op_len = max W32 width
974 let extend = extendSExpr width op_len
975 (src1, code) <- getSomeReg (extend x)
976 let format = intFormat op_len
977 code' = code `snocOL`
978 (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2)
979 return (CondCode False cond code')
980
981 condIntCode' _ cond width x y = do
982 let op_len = max W32 width
983 let extend = if condUnsigned cond then extendUExpr width op_len
984 else extendSExpr width op_len
985 (src1, code1) <- getSomeReg (extend x)
986 (src2, code2) <- getSomeReg (extend y)
987 let format = intFormat op_len
988 code' = code1 `appOL` code2 `snocOL`
989 (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2)
990 return (CondCode False cond code')
991
992 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
993 condFltCode cond x y = do
994 (src1, code1) <- getSomeReg x
995 (src2, code2) <- getSomeReg y
996 let
997 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
998 code'' = case cond of -- twiddle CR to handle unordered case
999 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
1000 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
1001 _ -> code'
1002 where
1003 ltbit = 0 ; eqbit = 2 ; gtbit = 1
1004 return (CondCode True cond code'')
1005
1006
1007
1008 -- -----------------------------------------------------------------------------
1009 -- Generating assignments
1010
1011 -- Assignments are really at the heart of the whole code generation
1012 -- business. Almost all top-level nodes of any real importance are
1013 -- assignments, which correspond to loads, stores, or register
1014 -- transfers. If we're really lucky, some of the register transfers
1015 -- will go away, because we can use the destination register to
1016 -- complete the code generation for the right hand side. This only
1017 -- fails when the right hand side is forced into a fixed register
1018 -- (e.g. the result of a call).
1019
1020 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1021 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1022
1023 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1024 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1025
1026 assignMem_IntCode pk addr src = do
1027 (srcReg, code) <- getSomeReg src
1028 Amode dstAddr addr_code <- case pk of
1029 II64 -> getAmode DS addr
1030 _ -> getAmode D addr
1031 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
1032
1033 -- dst is a reg, but src could be anything
1034 assignReg_IntCode _ reg src
1035 = do
1036 dflags <- getDynFlags
1037 let dst = getRegisterReg (targetPlatform dflags) reg
1038 r <- getRegister src
1039 return $ case r of
1040 Any _ code -> code dst
1041 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
1042
1043
1044
1045 -- Easy, isn't it?
1046 assignMem_FltCode = assignMem_IntCode
1047 assignReg_FltCode = assignReg_IntCode
1048
1049
1050
1051 genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
1052
1053 genJump (CmmLit (CmmLabel lbl)) regs
1054 = return (unitOL $ JMP lbl regs)
1055
1056 genJump tree gregs
1057 = do
1058 dflags <- getDynFlags
1059 genJump' tree (platformToGCP (targetPlatform dflags)) gregs
1060
1061 genJump' :: CmmExpr -> GenCCallPlatform -> [Reg] -> NatM InstrBlock
1062
1063 genJump' tree (GCP64ELF 1) regs
1064 = do
1065 (target,code) <- getSomeReg tree
1066 return (code
1067 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0))
1068 `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
1069 `snocOL` MTCTR r11
1070 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
1071 `snocOL` BCTR [] Nothing regs)
1072
1073 genJump' tree (GCP64ELF 2) regs
1074 = do
1075 (target,code) <- getSomeReg tree
1076 return (code
1077 `snocOL` MR r12 target
1078 `snocOL` MTCTR r12
1079 `snocOL` BCTR [] Nothing regs)
1080
1081 genJump' tree _ regs
1082 = do
1083 (target,code) <- getSomeReg tree
1084 return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing regs)
1085
1086 -- -----------------------------------------------------------------------------
1087 -- Unconditional branches
1088 genBranch :: BlockId -> NatM InstrBlock
1089 genBranch = return . toOL . mkJumpInstr
1090
1091
1092 -- -----------------------------------------------------------------------------
1093 -- Conditional jumps
1094
1095 {-
1096 Conditional jumps are always to local labels, so we can use branch
1097 instructions. We peek at the arguments to decide what kind of
1098 comparison to do.
1099 -}
1100
1101
1102 genCondJump
1103 :: BlockId -- the branch target
1104 -> CmmExpr -- the condition on which to branch
1105 -> Maybe Bool
1106 -> NatM InstrBlock
1107
1108 genCondJump id bool prediction = do
1109 CondCode _ cond code <- getCondCode bool
1110 return (code `snocOL` BCC cond id prediction)
1111
1112
1113
1114 -- -----------------------------------------------------------------------------
1115 -- Generating C calls
1116
1117 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1118 -- @get_arg@, which moves the arguments to the correct registers/stack
1119 -- locations. Apart from that, the code is easy.
1120
1121 genCCall :: ForeignTarget -- function to call
1122 -> [CmmFormal] -- where to put the result
1123 -> [CmmActual] -- arguments (of mixed type)
1124 -> NatM InstrBlock
1125 genCCall (PrimTarget MO_WriteBarrier) _ _
1126 = return $ unitOL LWSYNC
1127
1128 genCCall (PrimTarget MO_Touch) _ _
1129 = return $ nilOL
1130
1131 genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
1132 = return $ nilOL
1133
1134 genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n]
1135 = do dflags <- getDynFlags
1136 let platform = targetPlatform dflags
1137 fmt = intFormat width
1138 reg_dst = getRegisterReg platform (CmmLocal dst)
1139 (instr, n_code) <- case amop of
1140 AMO_Add -> getSomeRegOrImm ADD True reg_dst
1141 AMO_Sub -> case n of
1142 CmmLit (CmmInt i _)
1143 | Just imm <- makeImmediate width True (-i)
1144 -> return (ADD reg_dst reg_dst (RIImm imm), nilOL)
1145 _
1146 -> do
1147 (n_reg, n_code) <- getSomeReg n
1148 return (SUBF reg_dst n_reg reg_dst, n_code)
1149 AMO_And -> getSomeRegOrImm AND False reg_dst
1150 AMO_Nand -> do (n_reg, n_code) <- getSomeReg n
1151 return (NAND reg_dst reg_dst n_reg, n_code)
1152 AMO_Or -> getSomeRegOrImm OR False reg_dst
1153 AMO_Xor -> getSomeRegOrImm XOR False reg_dst
1154 Amode addr_reg addr_code <- getAmodeIndex addr
1155 lbl_retry <- getBlockIdNat
1156 return $ n_code `appOL` addr_code
1157 `appOL` toOL [ HWSYNC
1158 , BCC ALWAYS lbl_retry Nothing
1159
1160 , NEWBLOCK lbl_retry
1161 , LDR fmt reg_dst addr_reg
1162 , instr
1163 , STC fmt reg_dst addr_reg
1164 , BCC NE lbl_retry (Just False)
1165 , ISYNC
1166 ]
1167 where
1168 getAmodeIndex (CmmMachOp (MO_Add _) [x, y])
1169 = do
1170 (regX, codeX) <- getSomeReg x
1171 (regY, codeY) <- getSomeReg y
1172 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1173 getAmodeIndex other
1174 = do
1175 (reg, code) <- getSomeReg other
1176 return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here!
1177 getSomeRegOrImm op sign dst
1178 = case n of
1179 CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i
1180 -> return (op dst dst (RIImm imm), nilOL)
1181 _
1182 -> do
1183 (n_reg, n_code) <- getSomeReg n
1184 return (op dst dst (RIReg n_reg), n_code)
1185
1186 genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr]
1187 = do dflags <- getDynFlags
1188 let platform = targetPlatform dflags
1189 fmt = intFormat width
1190 reg_dst = getRegisterReg platform (CmmLocal dst)
1191 form = if widthInBits width == 64 then DS else D
1192 Amode addr_reg addr_code <- getAmode form addr
1193 lbl_end <- getBlockIdNat
1194 return $ addr_code `appOL` toOL [ HWSYNC
1195 , LD fmt reg_dst addr_reg
1196 , CMP fmt reg_dst (RIReg reg_dst)
1197 , BCC NE lbl_end (Just False)
1198 , BCC ALWAYS lbl_end Nothing
1199 -- See Note [Seemingly useless cmp and bne]
1200 , NEWBLOCK lbl_end
1201 , ISYNC
1202 ]
1203
1204 -- Note [Seemingly useless cmp and bne]
1205 -- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction
1206 -- the second paragraph says that isync may complete before storage accesses
1207 -- "associated" with a preceding instruction have been performed. The cmp
1208 -- operation and the following bne introduce a data and control dependency
1209 -- on the load instruction (See also Power ISA, Book II, Appendix B.2.3, Safe
1210 -- Fetch).
1211 -- This is also what gcc does.
1212
1213
1214 genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
1215 code <- assignMem_IntCode (intFormat width) addr val
1216 return $ unitOL(HWSYNC) `appOL` code
1217
1218 genCCall (PrimTarget (MO_Clz width)) [dst] [src]
1219 = do dflags <- getDynFlags
1220 let platform = targetPlatform dflags
1221 reg_dst = getRegisterReg platform (CmmLocal dst)
1222 if target32Bit platform && width == W64
1223 then do
1224 ChildCode64 code vr_lo <- iselExpr64 src
1225 lbl1 <- getBlockIdNat
1226 lbl2 <- getBlockIdNat
1227 lbl3 <- getBlockIdNat
1228 let vr_hi = getHiVRegFromLo vr_lo
1229 cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0))
1230 , BCC NE lbl2 Nothing
1231 , BCC ALWAYS lbl1 Nothing
1232
1233 , NEWBLOCK lbl1
1234 , CNTLZ II32 reg_dst vr_lo
1235 , ADD reg_dst reg_dst (RIImm (ImmInt 32))
1236 , BCC ALWAYS lbl3 Nothing
1237
1238 , NEWBLOCK lbl2
1239 , CNTLZ II32 reg_dst vr_hi
1240 , BCC ALWAYS lbl3 Nothing
1241
1242 , NEWBLOCK lbl3
1243 ]
1244 return $ code `appOL` cntlz
1245 else do
1246 let format = if width == W64 then II64 else II32
1247 (s_reg, s_code) <- getSomeReg src
1248 (pre, reg , post) <-
1249 case width of
1250 W64 -> return (nilOL, s_reg, nilOL)
1251 W32 -> return (nilOL, s_reg, nilOL)
1252 W16 -> do
1253 reg_tmp <- getNewRegNat format
1254 return
1255 ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 65535))
1256 , reg_tmp
1257 , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-16)))
1258 )
1259 W8 -> do
1260 reg_tmp <- getNewRegNat format
1261 return
1262 ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 255))
1263 , reg_tmp
1264 , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-24)))
1265 )
1266 _ -> panic "genCall: Clz wrong format"
1267 let cntlz = unitOL (CNTLZ format reg_dst reg)
1268 return $ s_code `appOL` pre `appOL` cntlz `appOL` post
1269
1270 genCCall (PrimTarget (MO_Ctz width)) [dst] [src]
1271 = do dflags <- getDynFlags
1272 let platform = targetPlatform dflags
1273 reg_dst = getRegisterReg platform (CmmLocal dst)
1274 if target32Bit platform && width == W64
1275 then do
1276 let format = II32
1277 ChildCode64 code vr_lo <- iselExpr64 src
1278 lbl1 <- getBlockIdNat
1279 lbl2 <- getBlockIdNat
1280 lbl3 <- getBlockIdNat
1281 x' <- getNewRegNat format
1282 x'' <- getNewRegNat format
1283 r' <- getNewRegNat format
1284 cnttzlo <- cnttz format reg_dst vr_lo
1285 let vr_hi = getHiVRegFromLo vr_lo
1286 cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0))
1287 , BCC NE lbl2 Nothing
1288 , BCC ALWAYS lbl1 Nothing
1289
1290 , NEWBLOCK lbl1
1291 , ADD x' vr_hi (RIImm (ImmInt (-1)))
1292 , ANDC x'' x' vr_hi
1293 , CNTLZ format r' x''
1294 -- 32 + (32 - clz(x''))
1295 , SUBFC reg_dst r' (RIImm (ImmInt 64))
1296 , BCC ALWAYS lbl3 Nothing
1297
1298 , NEWBLOCK lbl2
1299 ]
1300 `appOL` cnttzlo `appOL`
1301 toOL [ BCC ALWAYS lbl3 Nothing
1302
1303 , NEWBLOCK lbl3
1304 ]
1305 return $ code `appOL` cnttz64
1306 else do
1307 let format = if width == W64 then II64 else II32
1308 (s_reg, s_code) <- getSomeReg src
1309 (reg_ctz, pre_code) <-
1310 case width of
1311 W64 -> return (s_reg, nilOL)
1312 W32 -> return (s_reg, nilOL)
1313 W16 -> do
1314 reg_tmp <- getNewRegNat format
1315 return (reg_tmp, unitOL $ ORIS reg_tmp s_reg (ImmInt 1))
1316 W8 -> do
1317 reg_tmp <- getNewRegNat format
1318 return (reg_tmp, unitOL $ OR reg_tmp s_reg (RIImm (ImmInt 256)))
1319 _ -> panic "genCall: Ctz wrong format"
1320 ctz_code <- cnttz format reg_dst reg_ctz
1321 return $ s_code `appOL` pre_code `appOL` ctz_code
1322 where
1323 -- cnttz(x) = sizeof(x) - cntlz(~x & (x - 1))
1324 -- see Henry S. Warren, Hacker's Delight, p 107
1325 cnttz format dst src = do
1326 let format_bits = 8 * formatInBytes format
1327 x' <- getNewRegNat format
1328 x'' <- getNewRegNat format
1329 r' <- getNewRegNat format
1330 return $ toOL [ ADD x' src (RIImm (ImmInt (-1)))
1331 , ANDC x'' x' src
1332 , CNTLZ format r' x''
1333 , SUBFC dst r' (RIImm (ImmInt (format_bits)))
1334 ]
1335
1336 genCCall target dest_regs argsAndHints
1337 = do dflags <- getDynFlags
1338 let platform = targetPlatform dflags
1339 case target of
1340 PrimTarget (MO_S_QuotRem width) -> divOp1 platform True width
1341 dest_regs argsAndHints
1342 PrimTarget (MO_U_QuotRem width) -> divOp1 platform False width
1343 dest_regs argsAndHints
1344 PrimTarget (MO_U_QuotRem2 width) -> divOp2 platform width dest_regs
1345 argsAndHints
1346 PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs
1347 argsAndHints
1348 PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints
1349 PrimTarget (MO_AddWordC _) -> addcOp platform dest_regs argsAndHints
1350 PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints
1351 PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width
1352 dest_regs argsAndHints
1353 PrimTarget (MO_SubIntC width) -> addSubCOp SUBFO platform width
1354 dest_regs argsAndHints
1355 PrimTarget MO_F64_Fabs -> fabs platform dest_regs argsAndHints
1356 PrimTarget MO_F32_Fabs -> fabs platform dest_regs argsAndHints
1357 _ -> genCCall' dflags (platformToGCP platform)
1358 target dest_regs argsAndHints
1359 where divOp1 platform signed width [res_q, res_r] [arg_x, arg_y]
1360 = do let reg_q = getRegisterReg platform (CmmLocal res_q)
1361 reg_r = getRegisterReg platform (CmmLocal res_r)
1362 remainderCode width signed reg_q arg_x arg_y
1363 <*> pure reg_r
1364
1365 divOp1 _ _ _ _ _
1366 = panic "genCCall: Wrong number of arguments for divOp1"
1367 divOp2 platform width [res_q, res_r]
1368 [arg_x_high, arg_x_low, arg_y]
1369 = do let reg_q = getRegisterReg platform (CmmLocal res_q)
1370 reg_r = getRegisterReg platform (CmmLocal res_r)
1371 fmt = intFormat width
1372 half = 4 * (formatInBytes fmt)
1373 (xh_reg, xh_code) <- getSomeReg arg_x_high
1374 (xl_reg, xl_code) <- getSomeReg arg_x_low
1375 (y_reg, y_code) <- getSomeReg arg_y
1376 s <- getNewRegNat fmt
1377 b <- getNewRegNat fmt
1378 v <- getNewRegNat fmt
1379 vn1 <- getNewRegNat fmt
1380 vn0 <- getNewRegNat fmt
1381 un32 <- getNewRegNat fmt
1382 tmp <- getNewRegNat fmt
1383 un10 <- getNewRegNat fmt
1384 un1 <- getNewRegNat fmt
1385 un0 <- getNewRegNat fmt
1386 q1 <- getNewRegNat fmt
1387 rhat <- getNewRegNat fmt
1388 tmp1 <- getNewRegNat fmt
1389 q0 <- getNewRegNat fmt
1390 un21 <- getNewRegNat fmt
1391 again1 <- getBlockIdNat
1392 no1 <- getBlockIdNat
1393 then1 <- getBlockIdNat
1394 endif1 <- getBlockIdNat
1395 again2 <- getBlockIdNat
1396 no2 <- getBlockIdNat
1397 then2 <- getBlockIdNat
1398 endif2 <- getBlockIdNat
1399 return $ y_code `appOL` xl_code `appOL` xh_code `appOL`
1400 -- see Hacker's Delight p 196 Figure 9-3
1401 toOL [ -- b = 2 ^ (bits_in_word / 2)
1402 LI b (ImmInt 1)
1403 , SL fmt b b (RIImm (ImmInt half))
1404 -- s = clz(y)
1405 , CNTLZ fmt s y_reg
1406 -- v = y << s
1407 , SL fmt v y_reg (RIReg s)
1408 -- vn1 = upper half of v
1409 , SR fmt vn1 v (RIImm (ImmInt half))
1410 -- vn0 = lower half of v
1411 , CLRLI fmt vn0 v half
1412 -- un32 = (u1 << s)
1413 -- | (u0 >> (bits_in_word - s))
1414 , SL fmt un32 xh_reg (RIReg s)
1415 , SUBFC tmp s
1416 (RIImm (ImmInt (8 * formatInBytes fmt)))
1417 , SR fmt tmp xl_reg (RIReg tmp)
1418 , OR un32 un32 (RIReg tmp)
1419 -- un10 = u0 << s
1420 , SL fmt un10 xl_reg (RIReg s)
1421 -- un1 = upper half of un10
1422 , SR fmt un1 un10 (RIImm (ImmInt half))
1423 -- un0 = lower half of un10
1424 , CLRLI fmt un0 un10 half
1425 -- q1 = un32/vn1
1426 , DIV fmt False q1 un32 vn1
1427 -- rhat = un32 - q1*vn1
1428 , MULL fmt tmp q1 (RIReg vn1)
1429 , SUBF rhat tmp un32
1430 , BCC ALWAYS again1 Nothing
1431
1432 , NEWBLOCK again1
1433 -- if (q1 >= b || q1*vn0 > b*rhat + un1)
1434 , CMPL fmt q1 (RIReg b)
1435 , BCC GEU then1 Nothing
1436 , BCC ALWAYS no1 Nothing
1437
1438 , NEWBLOCK no1
1439 , MULL fmt tmp q1 (RIReg vn0)
1440 , SL fmt tmp1 rhat (RIImm (ImmInt half))
1441 , ADD tmp1 tmp1 (RIReg un1)
1442 , CMPL fmt tmp (RIReg tmp1)
1443 , BCC LEU endif1 Nothing
1444 , BCC ALWAYS then1 Nothing
1445
1446 , NEWBLOCK then1
1447 -- q1 = q1 - 1
1448 , ADD q1 q1 (RIImm (ImmInt (-1)))
1449 -- rhat = rhat + vn1
1450 , ADD rhat rhat (RIReg vn1)
1451 -- if (rhat < b) goto again1
1452 , CMPL fmt rhat (RIReg b)
1453 , BCC LTT again1 Nothing
1454 , BCC ALWAYS endif1 Nothing
1455
1456 , NEWBLOCK endif1
1457 -- un21 = un32*b + un1 - q1*v
1458 , SL fmt un21 un32 (RIImm (ImmInt half))
1459 , ADD un21 un21 (RIReg un1)
1460 , MULL fmt tmp q1 (RIReg v)
1461 , SUBF un21 tmp un21
1462 -- compute second quotient digit
1463 -- q0 = un21/vn1
1464 , DIV fmt False q0 un21 vn1
1465 -- rhat = un21- q0*vn1
1466 , MULL fmt tmp q0 (RIReg vn1)
1467 , SUBF rhat tmp un21
1468 , BCC ALWAYS again2 Nothing
1469
1470 , NEWBLOCK again2
1471 -- if (q0>b || q0*vn0 > b*rhat + un0)
1472 , CMPL fmt q0 (RIReg b)
1473 , BCC GEU then2 Nothing
1474 , BCC ALWAYS no2 Nothing
1475
1476 , NEWBLOCK no2
1477 , MULL fmt tmp q0 (RIReg vn0)
1478 , SL fmt tmp1 rhat (RIImm (ImmInt half))
1479 , ADD tmp1 tmp1 (RIReg un0)
1480 , CMPL fmt tmp (RIReg tmp1)
1481 , BCC LEU endif2 Nothing
1482 , BCC ALWAYS then2 Nothing
1483
1484 , NEWBLOCK then2
1485 -- q0 = q0 - 1
1486 , ADD q0 q0 (RIImm (ImmInt (-1)))
1487 -- rhat = rhat + vn1
1488 , ADD rhat rhat (RIReg vn1)
1489 -- if (rhat<b) goto again2
1490 , CMPL fmt rhat (RIReg b)
1491 , BCC LTT again2 Nothing
1492 , BCC ALWAYS endif2 Nothing
1493
1494 , NEWBLOCK endif2
1495 -- compute remainder
1496 -- r = (un21*b + un0 - q0*v) >> s
1497 , SL fmt reg_r un21 (RIImm (ImmInt half))
1498 , ADD reg_r reg_r (RIReg un0)
1499 , MULL fmt tmp q0 (RIReg v)
1500 , SUBF reg_r tmp reg_r
1501 , SR fmt reg_r reg_r (RIReg s)
1502 -- compute quotient
1503 -- q = q1*b + q0
1504 , SL fmt reg_q q1 (RIImm (ImmInt half))
1505 , ADD reg_q reg_q (RIReg q0)
1506 ]
1507 divOp2 _ _ _ _
1508 = panic "genCCall: Wrong number of arguments for divOp2"
1509 multOp2 platform width [res_h, res_l] [arg_x, arg_y]
1510 = do let reg_h = getRegisterReg platform (CmmLocal res_h)
1511 reg_l = getRegisterReg platform (CmmLocal res_l)
1512 fmt = intFormat width
1513 (x_reg, x_code) <- getSomeReg arg_x
1514 (y_reg, y_code) <- getSomeReg arg_y
1515 return $ y_code `appOL` x_code
1516 `appOL` toOL [ MULL fmt reg_l x_reg (RIReg y_reg)
1517 , MULHU fmt reg_h x_reg y_reg
1518 ]
1519 multOp2 _ _ _ _
1520 = panic "genCall: Wrong number of arguments for multOp2"
1521 add2Op platform [res_h, res_l] [arg_x, arg_y]
1522 = do let reg_h = getRegisterReg platform (CmmLocal res_h)
1523 reg_l = getRegisterReg platform (CmmLocal res_l)
1524 (x_reg, x_code) <- getSomeReg arg_x
1525 (y_reg, y_code) <- getSomeReg arg_y
1526 return $ y_code `appOL` x_code
1527 `appOL` toOL [ LI reg_h (ImmInt 0)
1528 , ADDC reg_l x_reg y_reg
1529 , ADDZE reg_h reg_h
1530 ]
1531 add2Op _ _ _
1532 = panic "genCCall: Wrong number of arguments/results for add2"
1533
1534 addcOp platform [res_r, res_c] [arg_x, arg_y]
1535 = add2Op platform [res_c {-hi-}, res_r {-lo-}] [arg_x, arg_y]
1536 addcOp _ _ _
1537 = panic "genCCall: Wrong number of arguments/results for addc"
1538
1539 -- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1,
1540 -- which is 0 for borrow and 1 otherwise. We need 1 and 0
1541 -- so xor with 1.
1542 subcOp platform [res_r, res_c] [arg_x, arg_y]
1543 = do let reg_r = getRegisterReg platform (CmmLocal res_r)
1544 reg_c = getRegisterReg platform (CmmLocal res_c)
1545 (x_reg, x_code) <- getSomeReg arg_x
1546 (y_reg, y_code) <- getSomeReg arg_y
1547 return $ y_code `appOL` x_code
1548 `appOL` toOL [ LI reg_c (ImmInt 0)
1549 , SUBFC reg_r y_reg (RIReg x_reg)
1550 , ADDZE reg_c reg_c
1551 , XOR reg_c reg_c (RIImm (ImmInt 1))
1552 ]
1553 subcOp _ _ _
1554 = panic "genCCall: Wrong number of arguments/results for subc"
1555 addSubCOp instr platform width [res_r, res_c] [arg_x, arg_y]
1556 = do let reg_r = getRegisterReg platform (CmmLocal res_r)
1557 reg_c = getRegisterReg platform (CmmLocal res_c)
1558 (x_reg, x_code) <- getSomeReg arg_x
1559 (y_reg, y_code) <- getSomeReg arg_y
1560 return $ y_code `appOL` x_code
1561 `appOL` toOL [ instr reg_r y_reg x_reg,
1562 -- SUBFO argument order reversed!
1563 MFOV (intFormat width) reg_c
1564 ]
1565 addSubCOp _ _ _ _ _
1566 = panic "genCall: Wrong number of arguments/results for addC"
1567 fabs platform [res] [arg]
1568 = do let res_r = getRegisterReg platform (CmmLocal res)
1569 (arg_reg, arg_code) <- getSomeReg arg
1570 return $ arg_code `snocOL` FABS res_r arg_reg
1571 fabs _ _ _
1572 = panic "genCall: Wrong number of arguments/results for fabs"
1573
1574 -- TODO: replace 'Int' by an enum such as 'PPC_64ABI'
1575 data GenCCallPlatform = GCP32ELF | GCP64ELF !Int | GCPAIX
1576
1577 platformToGCP :: Platform -> GenCCallPlatform
1578 platformToGCP platform
1579 = case platformOS platform of
1580 OSAIX -> GCPAIX
1581 _ -> case platformArch platform of
1582 ArchPPC -> GCP32ELF
1583 ArchPPC_64 ELF_V1 -> GCP64ELF 1
1584 ArchPPC_64 ELF_V2 -> GCP64ELF 2
1585 _ -> panic "platformToGCP: Not PowerPC"
1586
1587
1588 genCCall'
1589 :: DynFlags
1590 -> GenCCallPlatform
1591 -> ForeignTarget -- function to call
1592 -> [CmmFormal] -- where to put the result
1593 -> [CmmActual] -- arguments (of mixed type)
1594 -> NatM InstrBlock
1595
1596 {-
1597 PowerPC Linux uses the System V Release 4 Calling Convention
1598 for PowerPC. It is described in the
1599 "System V Application Binary Interface PowerPC Processor Supplement".
1600
1601 PowerPC 64 Linux uses the System V Release 4 Calling Convention for
1602 64-bit PowerPC. It is specified in
1603 "64-bit PowerPC ELF Application Binary Interface Supplement 1.9"
1604 (PPC64 ELF v1.9).
1605
1606 PowerPC 64 Linux in little endian mode uses the "Power Architecture 64-Bit
1607 ELF V2 ABI Specification -- OpenPOWER ABI for Linux Supplement"
1608 (PPC64 ELF v2).
1609
1610 AIX follows the "PowerOpen ABI: Application Binary Interface Big-Endian
1611 32-Bit Hardware Implementation"
1612
1613 All four conventions are similar:
1614 Parameters may be passed in general-purpose registers starting at r3, in
1615 floating point registers starting at f1, or on the stack.
1616
1617 But there are substantial differences:
1618 * The number of registers used for parameter passing and the exact set of
1619 nonvolatile registers differs (see MachRegs.hs).
1620 * On AIX and 64-bit ELF, stack space is always reserved for parameters,
1621 even if they are passed in registers. The called routine may choose to
1622 save parameters from registers to the corresponding space on the stack.
1623 * On AIX and 64-bit ELF, a corresponding amount of GPRs is skipped when
1624 a floating point parameter is passed in an FPR.
1625 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
1626 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
1627 AIX just treats an I64 likt two separate I32s (high word first).
1628 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
1629 4-byte aligned like everything else on AIX.
1630 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
1631 PowerPC Linux does not agree, so neither do we.
1632
1633 According to all conventions, the parameter area should be part of the
1634 caller's stack frame, allocated in the caller's prologue code (large enough
1635 to hold the parameter lists for all called routines). The NCG already
1636 uses the stack for register spilling, leaving 64 bytes free at the top.
1637 If we need a larger parameter area than that, we increase the size
1638 of the stack frame just before ccalling.
1639 -}
1640
1641
1642 genCCall' dflags gcp target dest_regs args
1643 = do
1644 (finalStack,passArgumentsCode,usedRegs) <- passArguments
1645 (zip3 args argReps argHints)
1646 allArgRegs
1647 (allFPArgRegs platform)
1648 initialStackOffset
1649 nilOL []
1650
1651 (labelOrExpr, reduceToFF32) <- case target of
1652 ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
1653 uses_pic_base_implicitly
1654 return (Left lbl, False)
1655 ForeignTarget expr _ -> do
1656 uses_pic_base_implicitly
1657 return (Right expr, False)
1658 PrimTarget mop -> outOfLineMachOp mop
1659
1660 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
1661 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
1662
1663 case labelOrExpr of
1664 Left lbl -> do -- the linker does all the work for us
1665 return ( codeBefore
1666 `snocOL` BL lbl usedRegs
1667 `appOL` maybeNOP -- some ABI require a NOP after BL
1668 `appOL` codeAfter)
1669 Right dyn -> do -- implement call through function pointer
1670 (dynReg, dynCode) <- getSomeReg dyn
1671 case gcp of
1672 GCP64ELF 1 -> return ( dynCode
1673 `appOL` codeBefore
1674 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 40))
1675 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0))
1676 `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8))
1677 `snocOL` MTCTR r11
1678 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16))
1679 `snocOL` BCTRL usedRegs
1680 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 40))
1681 `appOL` codeAfter)
1682 GCP64ELF 2 -> return ( dynCode
1683 `appOL` codeBefore
1684 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 24))
1685 `snocOL` MR r12 dynReg
1686 `snocOL` MTCTR r12
1687 `snocOL` BCTRL usedRegs
1688 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 24))
1689 `appOL` codeAfter)
1690 GCPAIX -> return ( dynCode
1691 -- AIX/XCOFF follows the PowerOPEN ABI
1692 -- which is quite similiar to LinuxPPC64/ELFv1
1693 `appOL` codeBefore
1694 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 20))
1695 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 0))
1696 `snocOL` LD II32 toc (AddrRegImm dynReg (ImmInt 4))
1697 `snocOL` MTCTR r11
1698 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 8))
1699 `snocOL` BCTRL usedRegs
1700 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 20))
1701 `appOL` codeAfter)
1702 _ -> return ( dynCode
1703 `snocOL` MTCTR dynReg
1704 `appOL` codeBefore
1705 `snocOL` BCTRL usedRegs
1706 `appOL` codeAfter)
1707 where
1708 platform = targetPlatform dflags
1709
1710 uses_pic_base_implicitly = do
1711 -- See Note [implicit register in PPC PIC code]
1712 -- on why we claim to use PIC register here
1713 when (positionIndependent dflags && target32Bit platform) $ do
1714 _ <- getPicBaseNat $ archWordFormat True
1715 return ()
1716
1717 initialStackOffset = case gcp of
1718 GCPAIX -> 24
1719 GCP32ELF -> 8
1720 GCP64ELF 1 -> 48
1721 GCP64ELF 2 -> 32
1722 _ -> panic "genCall': unknown calling convention"
1723 -- size of linkage area + size of arguments, in bytes
1724 stackDelta finalStack = case gcp of
1725 GCPAIX ->
1726 roundTo 16 $ (24 +) $ max 32 $ sum $
1727 map (widthInBytes . typeWidth) argReps
1728 GCP32ELF -> roundTo 16 finalStack
1729 GCP64ELF 1 ->
1730 roundTo 16 $ (48 +) $ max 64 $ sum $
1731 map (roundTo 8 . widthInBytes . typeWidth)
1732 argReps
1733 GCP64ELF 2 ->
1734 roundTo 16 $ (32 +) $ max 64 $ sum $
1735 map (roundTo 8 . widthInBytes . typeWidth)
1736 argReps
1737 _ -> panic "genCall': unknown calling conv."
1738
1739 argReps = map (cmmExprType dflags) args
1740 (argHints, _) = foreignTargetHints target
1741
1742 roundTo a x | x `mod` a == 0 = x
1743 | otherwise = x + a - (x `mod` a)
1744
1745 spFormat = if target32Bit platform then II32 else II64
1746
1747 -- TODO: Do not create a new stack frame if delta is too large.
1748 move_sp_down finalStack
1749 | delta > stackFrameHeaderSize dflags =
1750 toOL [STU spFormat sp (AddrRegImm sp (ImmInt (-delta))),
1751 DELTA (-delta)]
1752 | otherwise = nilOL
1753 where delta = stackDelta finalStack
1754 move_sp_up finalStack
1755 | delta > stackFrameHeaderSize dflags =
1756 toOL [ADD sp sp (RIImm (ImmInt delta)),
1757 DELTA 0]
1758 | otherwise = nilOL
1759 where delta = stackDelta finalStack
1760
1761 -- A NOP instruction is required after a call (bl instruction)
1762 -- on AIX and 64-Bit Linux.
1763 -- If the call is to a function with a different TOC (r2) the
1764 -- link editor replaces the NOP instruction with a load of the TOC
1765 -- from the stack to restore the TOC.
1766 maybeNOP = case gcp of
1767 GCP32ELF -> nilOL
1768 -- See Section 3.9.4 of OpenPower ABI
1769 GCPAIX -> unitOL NOP
1770 -- See Section 3.5.11 of PPC64 ELF v1.9
1771 GCP64ELF 1 -> unitOL NOP
1772 -- See Section 2.3.6 of PPC64 ELF v2
1773 GCP64ELF 2 -> unitOL NOP
1774 _ -> panic "maybeNOP: Unknown PowerPC 64-bit ABI"
1775
1776 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
1777 passArguments ((arg,arg_ty,_):args) gprs fprs stackOffset
1778 accumCode accumUsed | isWord64 arg_ty
1779 && target32Bit (targetPlatform dflags) =
1780 do
1781 ChildCode64 code vr_lo <- iselExpr64 arg
1782 let vr_hi = getHiVRegFromLo vr_lo
1783
1784 case gcp of
1785 GCPAIX ->
1786 do let storeWord vr (gpr:_) _ = MR gpr vr
1787 storeWord vr [] offset
1788 = ST II32 vr (AddrRegImm sp (ImmInt offset))
1789 passArguments args
1790 (drop 2 gprs)
1791 fprs
1792 (stackOffset+8)
1793 (accumCode `appOL` code
1794 `snocOL` storeWord vr_hi gprs stackOffset
1795 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1796 ((take 2 gprs) ++ accumUsed)
1797 GCP32ELF ->
1798 do let stackOffset' = roundTo 8 stackOffset
1799 stackCode = accumCode `appOL` code
1800 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
1801 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
1802 regCode hireg loreg =
1803 accumCode `appOL` code
1804 `snocOL` MR hireg vr_hi
1805 `snocOL` MR loreg vr_lo
1806
1807 case gprs of
1808 hireg : loreg : regs | even (length gprs) ->
1809 passArguments args regs fprs stackOffset
1810 (regCode hireg loreg) (hireg : loreg : accumUsed)
1811 _skipped : hireg : loreg : regs ->
1812 passArguments args regs fprs stackOffset
1813 (regCode hireg loreg) (hireg : loreg : accumUsed)
1814 _ -> -- only one or no regs left
1815 passArguments args [] fprs (stackOffset'+8)
1816 stackCode accumUsed
1817 GCP64ELF _ -> panic "passArguments: 32 bit code"
1818
1819 passArguments ((arg,rep,hint):args) gprs fprs stackOffset accumCode accumUsed
1820 | reg : _ <- regs = do
1821 register <- getRegister arg_pro
1822 let code = case register of
1823 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1824 Any _ acode -> acode reg
1825 stackOffsetRes = case gcp of
1826 -- The PowerOpen ABI requires that we
1827 -- reserve stack slots for register
1828 -- parameters
1829 GCPAIX -> stackOffset + stackBytes
1830 -- ... the SysV ABI 32-bit doesn't.
1831 GCP32ELF -> stackOffset
1832 -- ... but SysV ABI 64-bit does.
1833 GCP64ELF _ -> stackOffset + stackBytes
1834 passArguments args
1835 (drop nGprs gprs)
1836 (drop nFprs fprs)
1837 stackOffsetRes
1838 (accumCode `appOL` code)
1839 (reg : accumUsed)
1840 | otherwise = do
1841 (vr, code) <- getSomeReg arg_pro
1842 passArguments args
1843 (drop nGprs gprs)
1844 (drop nFprs fprs)
1845 (stackOffset' + stackBytes)
1846 (accumCode `appOL` code
1847 `snocOL` ST format_pro vr stackSlot)
1848 accumUsed
1849 where
1850 arg_pro
1851 | isBitsType rep = CmmMachOp (conv_op (typeWidth rep) (wordWidth dflags)) [arg]
1852 | otherwise = arg
1853 format_pro
1854 | isBitsType rep = intFormat (wordWidth dflags)
1855 | otherwise = cmmTypeFormat rep
1856 conv_op = case hint of
1857 SignedHint -> MO_SS_Conv
1858 _ -> MO_UU_Conv
1859
1860 stackOffset' = case gcp of
1861 GCPAIX ->
1862 -- The 32bit PowerOPEN ABI is happy with
1863 -- 32bit-alignment ...
1864 stackOffset
1865 GCP32ELF
1866 -- ... the SysV ABI requires 8-byte
1867 -- alignment for doubles.
1868 | isFloatType rep && typeWidth rep == W64 ->
1869 roundTo 8 stackOffset
1870 | otherwise ->
1871 stackOffset
1872 GCP64ELF _ ->
1873 -- Everything on the stack is mapped to
1874 -- 8-byte aligned doublewords
1875 stackOffset
1876 stackOffset''
1877 | isFloatType rep && typeWidth rep == W32 =
1878 case gcp of
1879 -- The ELF v1 ABI Section 3.2.3 requires:
1880 -- "Single precision floating point values
1881 -- are mapped to the second word in a single
1882 -- doubleword"
1883 GCP64ELF 1 -> stackOffset' + 4
1884 _ -> stackOffset'
1885 | otherwise = stackOffset'
1886
1887 stackSlot = AddrRegImm sp (ImmInt stackOffset'')
1888 (nGprs, nFprs, stackBytes, regs)
1889 = case gcp of
1890 GCPAIX ->
1891 case cmmTypeFormat rep of
1892 II8 -> (1, 0, 4, gprs)
1893 II16 -> (1, 0, 4, gprs)
1894 II32 -> (1, 0, 4, gprs)
1895 -- The PowerOpen ABI requires that we skip a
1896 -- corresponding number of GPRs when we use
1897 -- the FPRs.
1898 --
1899 -- E.g. for a `double` two GPRs are skipped,
1900 -- whereas for a `float` one GPR is skipped
1901 -- when parameters are assigned to
1902 -- registers.
1903 --
1904 -- The PowerOpen ABI specification can be found at
1905 -- ftp://www.sourceware.org/pub/binutils/ppc-docs/ppc-poweropen/
1906 FF32 -> (1, 1, 4, fprs)
1907 FF64 -> (2, 1, 8, fprs)
1908 II64 -> panic "genCCall' passArguments II64"
1909 FF80 -> panic "genCCall' passArguments FF80"
1910 GCP32ELF ->
1911 case cmmTypeFormat rep of
1912 II8 -> (1, 0, 4, gprs)
1913 II16 -> (1, 0, 4, gprs)
1914 II32 -> (1, 0, 4, gprs)
1915 -- ... the SysV ABI doesn't.
1916 FF32 -> (0, 1, 4, fprs)
1917 FF64 -> (0, 1, 8, fprs)
1918 II64 -> panic "genCCall' passArguments II64"
1919 FF80 -> panic "genCCall' passArguments FF80"
1920 GCP64ELF _ ->
1921 case cmmTypeFormat rep of
1922 II8 -> (1, 0, 8, gprs)
1923 II16 -> (1, 0, 8, gprs)
1924 II32 -> (1, 0, 8, gprs)
1925 II64 -> (1, 0, 8, gprs)
1926 -- The ELFv1 ABI requires that we skip a
1927 -- corresponding number of GPRs when we use
1928 -- the FPRs.
1929 FF32 -> (1, 1, 8, fprs)
1930 FF64 -> (1, 1, 8, fprs)
1931 FF80 -> panic "genCCall' passArguments FF80"
1932
1933 moveResult reduceToFF32 =
1934 case dest_regs of
1935 [] -> nilOL
1936 [dest]
1937 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
1938 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1939 | isWord64 rep && target32Bit (targetPlatform dflags)
1940 -> toOL [MR (getHiVRegFromLo r_dest) r3,
1941 MR r_dest r4]
1942 | otherwise -> unitOL (MR r_dest r3)
1943 where rep = cmmRegType dflags (CmmLocal dest)
1944 r_dest = getRegisterReg platform (CmmLocal dest)
1945 _ -> panic "genCCall' moveResult: Bad dest_regs"
1946
1947 outOfLineMachOp mop =
1948 do
1949 dflags <- getDynFlags
1950 mopExpr <- cmmMakeDynamicReference dflags CallReference $
1951 mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
1952 let mopLabelOrExpr = case mopExpr of
1953 CmmLit (CmmLabel lbl) -> Left lbl
1954 _ -> Right mopExpr
1955 return (mopLabelOrExpr, reduce)
1956 where
1957 (functionName, reduce) = case mop of
1958 MO_F32_Exp -> (fsLit "exp", True)
1959 MO_F32_Log -> (fsLit "log", True)
1960 MO_F32_Sqrt -> (fsLit "sqrt", True)
1961 MO_F32_Fabs -> unsupported
1962
1963 MO_F32_Sin -> (fsLit "sin", True)
1964 MO_F32_Cos -> (fsLit "cos", True)
1965 MO_F32_Tan -> (fsLit "tan", True)
1966
1967 MO_F32_Asin -> (fsLit "asin", True)
1968 MO_F32_Acos -> (fsLit "acos", True)
1969 MO_F32_Atan -> (fsLit "atan", True)
1970
1971 MO_F32_Sinh -> (fsLit "sinh", True)
1972 MO_F32_Cosh -> (fsLit "cosh", True)
1973 MO_F32_Tanh -> (fsLit "tanh", True)
1974 MO_F32_Pwr -> (fsLit "pow", True)
1975
1976 MO_F32_Asinh -> (fsLit "asinh", True)
1977 MO_F32_Acosh -> (fsLit "acosh", True)
1978 MO_F32_Atanh -> (fsLit "atanh", True)
1979
1980 MO_F64_Exp -> (fsLit "exp", False)
1981 MO_F64_Log -> (fsLit "log", False)
1982 MO_F64_Sqrt -> (fsLit "sqrt", False)
1983 MO_F64_Fabs -> unsupported
1984
1985 MO_F64_Sin -> (fsLit "sin", False)
1986 MO_F64_Cos -> (fsLit "cos", False)
1987 MO_F64_Tan -> (fsLit "tan", False)
1988
1989 MO_F64_Asin -> (fsLit "asin", False)
1990 MO_F64_Acos -> (fsLit "acos", False)
1991 MO_F64_Atan -> (fsLit "atan", False)
1992
1993 MO_F64_Sinh -> (fsLit "sinh", False)
1994 MO_F64_Cosh -> (fsLit "cosh", False)
1995 MO_F64_Tanh -> (fsLit "tanh", False)
1996 MO_F64_Pwr -> (fsLit "pow", False)
1997
1998 MO_F64_Asinh -> (fsLit "asinh", False)
1999 MO_F64_Acosh -> (fsLit "acosh", False)
2000 MO_F64_Atanh -> (fsLit "atanh", False)
2001
2002 MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False)
2003
2004 MO_Memcpy _ -> (fsLit "memcpy", False)
2005 MO_Memset _ -> (fsLit "memset", False)
2006 MO_Memmove _ -> (fsLit "memmove", False)
2007 MO_Memcmp _ -> (fsLit "memcmp", False)
2008
2009 MO_BSwap w -> (fsLit $ bSwapLabel w, False)
2010 MO_PopCnt w -> (fsLit $ popCntLabel w, False)
2011 MO_Pdep w -> (fsLit $ pdepLabel w, False)
2012 MO_Pext w -> (fsLit $ pextLabel w, False)
2013 MO_Clz _ -> unsupported
2014 MO_Ctz _ -> unsupported
2015 MO_AtomicRMW {} -> unsupported
2016 MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
2017 MO_AtomicRead _ -> unsupported
2018 MO_AtomicWrite _ -> unsupported
2019
2020 MO_S_QuotRem {} -> unsupported
2021 MO_U_QuotRem {} -> unsupported
2022 MO_U_QuotRem2 {} -> unsupported
2023 MO_Add2 {} -> unsupported
2024 MO_AddWordC {} -> unsupported
2025 MO_SubWordC {} -> unsupported
2026 MO_AddIntC {} -> unsupported
2027 MO_SubIntC {} -> unsupported
2028 MO_U_Mul2 {} -> unsupported
2029 MO_WriteBarrier -> unsupported
2030 MO_Touch -> unsupported
2031 MO_Prefetch_Data _ -> unsupported
2032 unsupported = panic ("outOfLineCmmOp: " ++ show mop
2033 ++ " not supported")
2034
2035 -- -----------------------------------------------------------------------------
2036 -- Generating a table-branch
2037
2038 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
2039 genSwitch dflags expr targets
2040 | OSAIX <- platformOS (targetPlatform dflags)
2041 = do
2042 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2043 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
2044 sha = if target32Bit $ targetPlatform dflags then 2 else 3
2045 tmp <- getNewRegNat fmt
2046 lbl <- getNewLabelNat
2047 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2048 (tableReg,t_code) <- getSomeReg $ dynRef
2049 let code = e_code `appOL` t_code `appOL` toOL [
2050 SL fmt tmp reg (RIImm (ImmInt sha)),
2051 LD fmt tmp (AddrRegReg tableReg tmp),
2052 MTCTR tmp,
2053 BCTR ids (Just lbl) []
2054 ]
2055 return code
2056
2057 | (positionIndependent dflags) || (not $ target32Bit $ targetPlatform dflags)
2058 = do
2059 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2060 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
2061 sha = if target32Bit $ targetPlatform dflags then 2 else 3
2062 tmp <- getNewRegNat fmt
2063 lbl <- getNewLabelNat
2064 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2065 (tableReg,t_code) <- getSomeReg $ dynRef
2066 let code = e_code `appOL` t_code `appOL` toOL [
2067 SL fmt tmp reg (RIImm (ImmInt sha)),
2068 LD fmt tmp (AddrRegReg tableReg tmp),
2069 ADD tmp tmp (RIReg tableReg),
2070 MTCTR tmp,
2071 BCTR ids (Just lbl) []
2072 ]
2073 return code
2074 | otherwise
2075 = do
2076 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2077 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
2078 sha = if target32Bit $ targetPlatform dflags then 2 else 3
2079 tmp <- getNewRegNat fmt
2080 lbl <- getNewLabelNat
2081 let code = e_code `appOL` toOL [
2082 SL fmt tmp reg (RIImm (ImmInt sha)),
2083 ADDIS tmp tmp (HA (ImmCLbl lbl)),
2084 LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
2085 MTCTR tmp,
2086 BCTR ids (Just lbl) []
2087 ]
2088 return code
2089 where (offset, ids) = switchTargetsToTable targets
2090
2091 generateJumpTableForInstr :: DynFlags -> Instr
2092 -> Maybe (NatCmmDecl CmmStatics Instr)
2093 generateJumpTableForInstr dflags (BCTR ids (Just lbl) _) =
2094 let jumpTable
2095 | (positionIndependent dflags)
2096 || (not $ target32Bit $ targetPlatform dflags)
2097 = map jumpTableEntryRel ids
2098 | otherwise = map (jumpTableEntry dflags) ids
2099 where jumpTableEntryRel Nothing
2100 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
2101 jumpTableEntryRel (Just blockid)
2102 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0
2103 (wordWidth dflags))
2104 where blockLabel = blockLbl blockid
2105 in Just (CmmData (Section ReadOnlyData lbl) (Statics lbl jumpTable))
2106 generateJumpTableForInstr _ _ = Nothing
2107
2108 -- -----------------------------------------------------------------------------
2109 -- 'condIntReg' and 'condFltReg': condition codes into registers
2110
2111 -- Turn those condition codes into integers now (when they appear on
2112 -- the right hand side of an assignment).
2113
2114
2115
2116 condReg :: NatM CondCode -> NatM Register
2117 condReg getCond = do
2118 CondCode _ cond cond_code <- getCond
2119 dflags <- getDynFlags
2120 let
2121 code dst = cond_code
2122 `appOL` negate_code
2123 `appOL` toOL [
2124 MFCR dst,
2125 RLWINM dst dst (bit + 1) 31 31
2126 ]
2127
2128 negate_code | do_negate = unitOL (CRNOR bit bit bit)
2129 | otherwise = nilOL
2130
2131 (bit, do_negate) = case cond of
2132 LTT -> (0, False)
2133 LE -> (1, True)
2134 EQQ -> (2, False)
2135 GE -> (0, True)
2136 GTT -> (1, False)
2137
2138 NE -> (2, True)
2139
2140 LU -> (0, False)
2141 LEU -> (1, True)
2142 GEU -> (0, True)
2143 GU -> (1, False)
2144 _ -> panic "PPC.CodeGen.codeReg: no match"
2145
2146 format = archWordFormat $ target32Bit $ targetPlatform dflags
2147 return (Any format code)
2148
2149 condIntReg :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM Register
2150 condIntReg cond width x y = condReg (condIntCode cond width x y)
2151 condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2152 condFltReg cond x y = condReg (condFltCode cond x y)
2153
2154
2155
2156 -- -----------------------------------------------------------------------------
2157 -- 'trivial*Code': deal with trivial instructions
2158
2159 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2160 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2161 -- Only look for constants on the right hand side, because that's
2162 -- where the generic optimizer will have put them.
2163
2164 -- Similarly, for unary instructions, we don't have to worry about
2165 -- matching an StInt as the argument, because genericOpt will already
2166 -- have handled the constant-folding.
2167
2168
2169
2170 {-
2171 Wolfgang's PowerPC version of The Rules:
2172
2173 A slightly modified version of The Rules to take advantage of the fact
2174 that PowerPC instructions work on all registers and don't implicitly
2175 clobber any fixed registers.
2176
2177 * The only expression for which getRegister returns Fixed is (CmmReg reg).
2178
2179 * If getRegister returns Any, then the code it generates may modify only:
2180 (a) fresh temporaries
2181 (b) the destination register
2182 It may *not* modify global registers, unless the global
2183 register happens to be the destination register.
2184 It may not clobber any other registers. In fact, only ccalls clobber any
2185 fixed registers.
2186 Also, it may not modify the counter register (used by genCCall).
2187
2188 Corollary: If a getRegister for a subexpression returns Fixed, you need
2189 not move it to a fresh temporary before evaluating the next subexpression.
2190 The Fixed register won't be modified.
2191 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
2192
2193 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
2194 the value of the destination register.
2195 -}
2196
2197 trivialCode
2198 :: Width
2199 -> Bool
2200 -> (Reg -> Reg -> RI -> Instr)
2201 -> CmmExpr
2202 -> CmmExpr
2203 -> NatM Register
2204
2205 trivialCode rep signed instr x (CmmLit (CmmInt y _))
2206 | Just imm <- makeImmediate rep signed y
2207 = do
2208 (src1, code1) <- getSomeReg x
2209 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
2210 return (Any (intFormat rep) code)
2211
2212 trivialCode rep _ instr x y = do
2213 (src1, code1) <- getSomeReg x
2214 (src2, code2) <- getSomeReg y
2215 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
2216 return (Any (intFormat rep) code)
2217
2218 shiftMulCode
2219 :: Width
2220 -> Bool
2221 -> (Format-> Reg -> Reg -> RI -> Instr)
2222 -> CmmExpr
2223 -> CmmExpr
2224 -> NatM Register
2225 shiftMulCode width sign instr x (CmmLit (CmmInt y _))
2226 | Just imm <- makeImmediate width sign y
2227 = do
2228 (src1, code1) <- getSomeReg x
2229 let format = intFormat width
2230 let ins_fmt = intFormat (max W32 width)
2231 let code dst = code1 `snocOL` instr ins_fmt dst src1 (RIImm imm)
2232 return (Any format code)
2233
2234 shiftMulCode width _ instr x y = do
2235 (src1, code1) <- getSomeReg x
2236 (src2, code2) <- getSomeReg y
2237 let format = intFormat width
2238 let ins_fmt = intFormat (max W32 width)
2239 let code dst = code1 `appOL` code2
2240 `snocOL` instr ins_fmt dst src1 (RIReg src2)
2241 return (Any format code)
2242
2243 trivialCodeNoImm' :: Format -> (Reg -> Reg -> Reg -> Instr)
2244 -> CmmExpr -> CmmExpr -> NatM Register
2245 trivialCodeNoImm' format instr x y = do
2246 (src1, code1) <- getSomeReg x
2247 (src2, code2) <- getSomeReg y
2248 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
2249 return (Any format code)
2250
2251 trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr)
2252 -> CmmExpr -> CmmExpr -> NatM Register
2253 trivialCodeNoImm format instr x y
2254 = trivialCodeNoImm' format (instr format) x y
2255
2256 srCode :: Width -> Bool -> (Format-> Reg -> Reg -> RI -> Instr)
2257 -> CmmExpr -> CmmExpr -> NatM Register
2258 srCode width sgn instr x (CmmLit (CmmInt y _))
2259 | Just imm <- makeImmediate width sgn y
2260 = do
2261 let op_len = max W32 width
2262 extend = if sgn then extendSExpr else extendUExpr
2263 (src1, code1) <- getSomeReg (extend width op_len x)
2264 let code dst = code1 `snocOL`
2265 instr (intFormat op_len) dst src1 (RIImm imm)
2266 return (Any (intFormat width) code)
2267
2268 srCode width sgn instr x y = do
2269 let op_len = max W32 width
2270 extend = if sgn then extendSExpr else extendUExpr
2271 (src1, code1) <- getSomeReg (extend width op_len x)
2272 (src2, code2) <- getSomeReg (extendUExpr width op_len y)
2273 -- Note: Shift amount `y` is unsigned
2274 let code dst = code1 `appOL` code2 `snocOL`
2275 instr (intFormat op_len) dst src1 (RIReg src2)
2276 return (Any (intFormat width) code)
2277
2278 divCode :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register
2279 divCode width sgn x y = do
2280 let op_len = max W32 width
2281 extend = if sgn then extendSExpr else extendUExpr
2282 (src1, code1) <- getSomeReg (extend width op_len x)
2283 (src2, code2) <- getSomeReg (extend width op_len y)
2284 let code dst = code1 `appOL` code2 `snocOL`
2285 DIV (intFormat op_len) sgn dst src1 src2
2286 return (Any (intFormat width) code)
2287
2288
2289 trivialUCode :: Format
2290 -> (Reg -> Reg -> Instr)
2291 -> CmmExpr
2292 -> NatM Register
2293 trivialUCode rep instr x = do
2294 (src, code) <- getSomeReg x
2295 let code' dst = code `snocOL` instr dst src
2296 return (Any rep code')
2297
2298 -- There is no "remainder" instruction on the PPC, so we have to do
2299 -- it the hard way.
2300 -- The "sgn" parameter is the signedness for the division instruction
2301
2302 remainderCode :: Width -> Bool -> Reg -> CmmExpr -> CmmExpr
2303 -> NatM (Reg -> InstrBlock)
2304 remainderCode rep sgn reg_q arg_x arg_y = do
2305 let op_len = max W32 rep
2306 fmt = intFormat op_len
2307 extend = if sgn then extendSExpr else extendUExpr
2308 (x_reg, x_code) <- getSomeReg (extend rep op_len arg_x)
2309 (y_reg, y_code) <- getSomeReg (extend rep op_len arg_y)
2310 return $ \reg_r -> y_code `appOL` x_code
2311 `appOL` toOL [ DIV fmt sgn reg_q x_reg y_reg
2312 , MULL fmt reg_r reg_q (RIReg y_reg)
2313 , SUBF reg_r reg_r x_reg
2314 ]
2315
2316
2317 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2318 coerceInt2FP fromRep toRep x = do
2319 dflags <- getDynFlags
2320 let arch = platformArch $ targetPlatform dflags
2321 coerceInt2FP' arch fromRep toRep x
2322
2323 coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
2324 coerceInt2FP' ArchPPC fromRep toRep x = do
2325 (src, code) <- getSomeReg x
2326 lbl <- getNewLabelNat
2327 itmp <- getNewRegNat II32
2328 ftmp <- getNewRegNat FF64
2329 dflags <- getDynFlags
2330 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2331 Amode addr addr_code <- getAmode D dynRef
2332 let
2333 code' dst = code `appOL` maybe_exts `appOL` toOL [
2334 LDATA (Section ReadOnlyData lbl) $ Statics lbl
2335 [CmmStaticLit (CmmInt 0x43300000 W32),
2336 CmmStaticLit (CmmInt 0x80000000 W32)],
2337 XORIS itmp src (ImmInt 0x8000),
2338 ST II32 itmp (spRel dflags 3),
2339 LIS itmp (ImmInt 0x4330),
2340 ST II32 itmp (spRel dflags 2),
2341 LD FF64 ftmp (spRel dflags 2)
2342 ] `appOL` addr_code `appOL` toOL [
2343 LD FF64 dst addr,
2344 FSUB FF64 dst ftmp dst
2345 ] `appOL` maybe_frsp dst
2346
2347 maybe_exts = case fromRep of
2348 W8 -> unitOL $ EXTS II8 src src
2349 W16 -> unitOL $ EXTS II16 src src
2350 W32 -> nilOL
2351 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2352
2353 maybe_frsp dst
2354 = case toRep of
2355 W32 -> unitOL $ FRSP dst dst
2356 W64 -> nilOL
2357 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2358
2359 return (Any (floatFormat toRep) code')
2360
2361 -- On an ELF v1 Linux we use the compiler doubleword in the stack frame
2362 -- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only
2363 -- set right before a call and restored right after return from the call.
2364 -- So it is fine.
2365 coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do
2366 (src, code) <- getSomeReg x
2367 dflags <- getDynFlags
2368 let
2369 code' dst = code `appOL` maybe_exts `appOL` toOL [
2370 ST II64 src (spRel dflags 3),
2371 LD FF64 dst (spRel dflags 3),
2372 FCFID dst dst
2373 ] `appOL` maybe_frsp dst
2374
2375 maybe_exts = case fromRep of
2376 W8 -> unitOL $ EXTS II8 src src
2377 W16 -> unitOL $ EXTS II16 src src
2378 W32 -> unitOL $ EXTS II32 src src
2379 W64 -> nilOL
2380 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2381
2382 maybe_frsp dst
2383 = case toRep of
2384 W32 -> unitOL $ FRSP dst dst
2385 W64 -> nilOL
2386 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2387
2388 return (Any (floatFormat toRep) code')
2389
2390 coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch"
2391
2392
2393 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2394 coerceFP2Int fromRep toRep x = do
2395 dflags <- getDynFlags
2396 let arch = platformArch $ targetPlatform dflags
2397 coerceFP2Int' arch fromRep toRep x
2398
2399 coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
2400 coerceFP2Int' ArchPPC _ toRep x = do
2401 dflags <- getDynFlags
2402 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
2403 (src, code) <- getSomeReg x
2404 tmp <- getNewRegNat FF64
2405 let
2406 code' dst = code `appOL` toOL [
2407 -- convert to int in FP reg
2408 FCTIWZ tmp src,
2409 -- store value (64bit) from FP to stack
2410 ST FF64 tmp (spRel dflags 2),
2411 -- read low word of value (high word is undefined)
2412 LD II32 dst (spRel dflags 3)]
2413 return (Any (intFormat toRep) code')
2414
2415 coerceFP2Int' (ArchPPC_64 _) _ toRep x = do
2416 dflags <- getDynFlags
2417 -- the reps don't really matter: F*->FF64 and II64->I* are no-ops
2418 (src, code) <- getSomeReg x
2419 tmp <- getNewRegNat FF64
2420 let
2421 code' dst = code `appOL` toOL [
2422 -- convert to int in FP reg
2423 FCTIDZ tmp src,
2424 -- store value (64bit) from FP to compiler word on stack
2425 ST FF64 tmp (spRel dflags 3),
2426 LD II64 dst (spRel dflags 3)]
2427 return (Any (intFormat toRep) code')
2428
2429 coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch"
2430
2431 -- Note [.LCTOC1 in PPC PIC code]
2432 -- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
2433 -- to make the most of the PPC's 16-bit displacements.
2434 -- As 16-bit signed offset is used (usually via addi/lwz instructions)
2435 -- first element will have '-32768' offset against .LCTOC1.
2436
2437 -- Note [implicit register in PPC PIC code]
2438 -- PPC generates calls by labels in assembly
2439 -- in form of:
2440 -- bl puts+32768@plt
2441 -- in this form it's not seen directly (by GHC NCG)
2442 -- that r30 (PicBaseReg) is used,
2443 -- but r30 is a required part of PLT code setup:
2444 -- puts+32768@plt:
2445 -- lwz r11,-30484(r30) ; offset in .LCTOC1
2446 -- mtctr r11
2447 -- bctr