a716765eabbe1ae07096079a2de2787ee9e578a1
[ghc.git] / compiler / nativeGen / PPC / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2004
8 --
9 -----------------------------------------------------------------------------
10
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, and (b) the type signatures,
13 -- the structure should not be too overwhelming.
14
15 module PPC.CodeGen (
16 cmmTopCodeGen,
17 generateJumpTableForInstr,
18 InstrBlock
19 )
20
21 where
22
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
25 #include "../includes/MachDeps.h"
26
27 -- NCG stuff:
28 import GhcPrelude
29
30 import CodeGen.Platform
31 import PPC.Instr
32 import PPC.Cond
33 import PPC.Regs
34 import CPrim
35 import NCGMonad ( NatM, getNewRegNat, getNewLabelNat
36 , getBlockIdNat, getPicBaseNat, getNewRegPairNat
37 , getPicBaseMaybeNat )
38 import Instruction
39 import PIC
40 import Format
41 import RegClass
42 import Reg
43 import TargetReg
44 import Platform
45
46 -- Our intermediate code:
47 import BlockId
48 import PprCmm ( pprExpr )
49 import Cmm
50 import CmmUtils
51 import CmmSwitch
52 import CLabel
53 import Hoopl.Block
54 import Hoopl.Graph
55
56 -- The rest:
57 import OrdList
58 import Outputable
59 import DynFlags
60
61 import Control.Monad ( mapAndUnzipM, when )
62 import Data.Bits
63 import Data.Word
64
65 import BasicTypes
66 import FastString
67 import Util
68
69 -- -----------------------------------------------------------------------------
70 -- Top-level of the instruction selector
71
72 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
73 -- They are really trees of insns to facilitate fast appending, where a
74 -- left-to-right traversal (pre-order?) yields the insns in the correct
75 -- order.
76
77 cmmTopCodeGen
78 :: RawCmmDecl
79 -> NatM [NatCmmDecl CmmStatics Instr]
80
81 cmmTopCodeGen (CmmProc info lab live graph) = do
82 let blocks = toBlockListEntryFirst graph
83 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
84 dflags <- getDynFlags
85 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
86 tops = proc : concat statics
87 os = platformOS $ targetPlatform dflags
88 arch = platformArch $ targetPlatform dflags
89 case arch of
90 ArchPPC | os == OSAIX -> return tops
91 | otherwise -> do
92 picBaseMb <- getPicBaseMaybeNat
93 case picBaseMb of
94 Just picBase -> initializePicBase_ppc arch os picBase tops
95 Nothing -> return tops
96 ArchPPC_64 ELF_V1 -> fixup_entry tops
97 -- generating function descriptor is handled in
98 -- pretty printer
99 ArchPPC_64 ELF_V2 -> fixup_entry tops
100 -- generating function prologue is handled in
101 -- pretty printer
102 _ -> panic "PPC.cmmTopCodeGen: unknown arch"
103 where
104 fixup_entry (CmmProc info lab live (ListGraph (entry:blocks)) : statics)
105 = do
106 let BasicBlock bID insns = entry
107 bID' <- if lab == (blockLbl bID)
108 then newBlockId
109 else return bID
110 let b' = BasicBlock bID' insns
111 return (CmmProc info lab live (ListGraph (b':blocks)) : statics)
112 fixup_entry _ = panic "cmmTopCodegen: Broken CmmProc"
113
114 cmmTopCodeGen (CmmData sec dat) = do
115 return [CmmData sec dat] -- no translation, we just use CmmStatic
116
117 basicBlockCodeGen
118 :: Block CmmNode C C
119 -> NatM ( [NatBasicBlock Instr]
120 , [NatCmmDecl CmmStatics Instr])
121
122 basicBlockCodeGen block = do
123 let (_, nodes, tail) = blockSplit block
124 id = entryLabel block
125 stmts = blockToList nodes
126 mid_instrs <- stmtsToInstrs stmts
127 tail_instrs <- stmtToInstrs tail
128 let instrs = mid_instrs `appOL` tail_instrs
129 -- code generation may introduce new basic block boundaries, which
130 -- are indicated by the NEWBLOCK instruction. We must split up the
131 -- instruction stream into basic blocks again. Also, we extract
132 -- LDATAs here too.
133 let
134 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
135
136 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
137 = ([], BasicBlock id instrs : blocks, statics)
138 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
139 = (instrs, blocks, CmmData sec dat:statics)
140 mkBlocks instr (instrs,blocks,statics)
141 = (instr:instrs, blocks, statics)
142 return (BasicBlock id top : other_blocks, statics)
143
144 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
145 stmtsToInstrs stmts
146 = do instrss <- mapM stmtToInstrs stmts
147 return (concatOL instrss)
148
149 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
150 stmtToInstrs stmt = do
151 dflags <- getDynFlags
152 case stmt of
153 CmmComment s -> return (unitOL (COMMENT s))
154 CmmTick {} -> return nilOL
155 CmmUnwind {} -> return nilOL
156
157 CmmAssign reg src
158 | isFloatType ty -> assignReg_FltCode format reg src
159 | target32Bit (targetPlatform dflags) &&
160 isWord64 ty -> assignReg_I64Code reg src
161 | otherwise -> assignReg_IntCode format reg src
162 where ty = cmmRegType dflags reg
163 format = cmmTypeFormat ty
164
165 CmmStore addr src
166 | isFloatType ty -> assignMem_FltCode format addr src
167 | target32Bit (targetPlatform dflags) &&
168 isWord64 ty -> assignMem_I64Code addr src
169 | otherwise -> assignMem_IntCode format addr src
170 where ty = cmmExprType dflags src
171 format = cmmTypeFormat ty
172
173 CmmUnsafeForeignCall target result_regs args
174 -> genCCall target result_regs args
175
176 CmmBranch id -> genBranch id
177 CmmCondBranch arg true false prediction -> do
178 b1 <- genCondJump true arg prediction
179 b2 <- genBranch false
180 return (b1 `appOL` b2)
181 CmmSwitch arg ids -> do dflags <- getDynFlags
182 genSwitch dflags arg ids
183 CmmCall { cml_target = arg } -> genJump arg
184 _ ->
185 panic "stmtToInstrs: statement should have been cps'd away"
186
187
188 --------------------------------------------------------------------------------
189 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
190 -- They are really trees of insns to facilitate fast appending, where a
191 -- left-to-right traversal yields the insns in the correct order.
192 --
193 type InstrBlock
194 = OrdList Instr
195
196
197 -- | Register's passed up the tree. If the stix code forces the register
198 -- to live in a pre-decided machine register, it comes out as @Fixed@;
199 -- otherwise, it comes out as @Any@, and the parent can decide which
200 -- register to put it in.
201 --
202 data Register
203 = Fixed Format Reg InstrBlock
204 | Any Format (Reg -> InstrBlock)
205
206
207 swizzleRegisterRep :: Register -> Format -> Register
208 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
209 swizzleRegisterRep (Any _ codefn) format = Any format codefn
210
211
212 -- | Grab the Reg for a CmmReg
213 getRegisterReg :: Platform -> CmmReg -> Reg
214
215 getRegisterReg _ (CmmLocal (LocalReg u pk))
216 = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
217
218 getRegisterReg platform (CmmGlobal mid)
219 = case globalRegMaybe platform mid of
220 Just reg -> RegReal reg
221 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
222 -- By this stage, the only MagicIds remaining should be the
223 -- ones which map to a real machine register on this
224 -- platform. Hence ...
225
226 -- | Convert a BlockId to some CmmStatic data
227 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
228 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
229 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
230 where blockLabel = blockLbl blockid
231
232
233
234 -- -----------------------------------------------------------------------------
235 -- General things for putting together code sequences
236
237 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
238 -- CmmExprs into CmmRegOff?
239 mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr
240 mangleIndexTree dflags (CmmRegOff reg off)
241 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
242 where width = typeWidth (cmmRegType dflags reg)
243
244 mangleIndexTree _ _
245 = panic "PPC.CodeGen.mangleIndexTree: no match"
246
247 -- -----------------------------------------------------------------------------
248 -- Code gen for 64-bit arithmetic on 32-bit platforms
249
250 {-
251 Simple support for generating 64-bit code (ie, 64 bit values and 64
252 bit assignments) on 32-bit platforms. Unlike the main code generator
253 we merely shoot for generating working code as simply as possible, and
254 pay little attention to code quality. Specifically, there is no
255 attempt to deal cleverly with the fixed-vs-floating register
256 distinction; all values are generated into (pairs of) floating
257 registers, even if this would mean some redundant reg-reg moves as a
258 result. Only one of the VRegUniques is returned, since it will be
259 of the VRegUniqueLo form, and the upper-half VReg can be determined
260 by applying getHiVRegFromLo to it.
261 -}
262
263 data ChildCode64 -- a.k.a "Register64"
264 = ChildCode64
265 InstrBlock -- code
266 Reg -- the lower 32-bit temporary which contains the
267 -- result; use getHiVRegFromLo to find the other
268 -- VRegUnique. Rules of this simplified insn
269 -- selection game are therefore that the returned
270 -- Reg may be modified
271
272
273 -- | Compute an expression into a register, but
274 -- we don't mind which one it is.
275 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
276 getSomeReg expr = do
277 r <- getRegister expr
278 case r of
279 Any rep code -> do
280 tmp <- getNewRegNat rep
281 return (tmp, code tmp)
282 Fixed _ reg code ->
283 return (reg, code)
284
285 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
286 getI64Amodes addrTree = do
287 Amode hi_addr addr_code <- getAmode D addrTree
288 case addrOffset hi_addr 4 of
289 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
290 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
291 return (AddrRegImm hi_ptr (ImmInt 0),
292 AddrRegImm hi_ptr (ImmInt 4),
293 code)
294
295
296 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
297 assignMem_I64Code addrTree valueTree = do
298 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
299 ChildCode64 vcode rlo <- iselExpr64 valueTree
300 let
301 rhi = getHiVRegFromLo rlo
302
303 -- Big-endian store
304 mov_hi = ST II32 rhi hi_addr
305 mov_lo = ST II32 rlo lo_addr
306 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
307
308
309 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
310 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
311 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
312 let
313 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
314 r_dst_hi = getHiVRegFromLo r_dst_lo
315 r_src_hi = getHiVRegFromLo r_src_lo
316 mov_lo = MR r_dst_lo r_src_lo
317 mov_hi = MR r_dst_hi r_src_hi
318 return (
319 vcode `snocOL` mov_lo `snocOL` mov_hi
320 )
321
322 assignReg_I64Code _ _
323 = panic "assignReg_I64Code(powerpc): invalid lvalue"
324
325
326 iselExpr64 :: CmmExpr -> NatM ChildCode64
327 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
328 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
329 (rlo, rhi) <- getNewRegPairNat II32
330 let mov_hi = LD II32 rhi hi_addr
331 mov_lo = LD II32 rlo lo_addr
332 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
333 rlo
334
335 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
336 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
337
338 iselExpr64 (CmmLit (CmmInt i _)) = do
339 (rlo,rhi) <- getNewRegPairNat II32
340 let
341 half0 = fromIntegral (fromIntegral i :: Word16)
342 half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
343 half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16)
344 half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16)
345
346 code = toOL [
347 LIS rlo (ImmInt half1),
348 OR rlo rlo (RIImm $ ImmInt half0),
349 LIS rhi (ImmInt half3),
350 OR rhi rhi (RIImm $ ImmInt half2)
351 ]
352 return (ChildCode64 code rlo)
353
354 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
355 ChildCode64 code1 r1lo <- iselExpr64 e1
356 ChildCode64 code2 r2lo <- iselExpr64 e2
357 (rlo,rhi) <- getNewRegPairNat II32
358 let
359 r1hi = getHiVRegFromLo r1lo
360 r2hi = getHiVRegFromLo r2lo
361 code = code1 `appOL`
362 code2 `appOL`
363 toOL [ ADDC rlo r1lo r2lo,
364 ADDE rhi r1hi r2hi ]
365 return (ChildCode64 code rlo)
366
367 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
368 ChildCode64 code1 r1lo <- iselExpr64 e1
369 ChildCode64 code2 r2lo <- iselExpr64 e2
370 (rlo,rhi) <- getNewRegPairNat II32
371 let
372 r1hi = getHiVRegFromLo r1lo
373 r2hi = getHiVRegFromLo r2lo
374 code = code1 `appOL`
375 code2 `appOL`
376 toOL [ SUBFC rlo r2lo (RIReg r1lo),
377 SUBFE rhi r2hi r1hi ]
378 return (ChildCode64 code rlo)
379
380 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
381 (expr_reg,expr_code) <- getSomeReg expr
382 (rlo, rhi) <- getNewRegPairNat II32
383 let mov_hi = LI rhi (ImmInt 0)
384 mov_lo = MR rlo expr_reg
385 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
386 rlo
387
388 iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
389 (expr_reg,expr_code) <- getSomeReg expr
390 (rlo, rhi) <- getNewRegPairNat II32
391 let mov_hi = SRA II32 rhi expr_reg (RIImm (ImmInt 31))
392 mov_lo = MR rlo expr_reg
393 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
394 rlo
395 iselExpr64 expr
396 = pprPanic "iselExpr64(powerpc)" (pprExpr expr)
397
398
399
400 getRegister :: CmmExpr -> NatM Register
401 getRegister e = do dflags <- getDynFlags
402 getRegister' dflags e
403
404 getRegister' :: DynFlags -> CmmExpr -> NatM Register
405
406 getRegister' dflags (CmmReg (CmmGlobal PicBaseReg))
407 | OSAIX <- platformOS (targetPlatform dflags) = do
408 let code dst = toOL [ LD II32 dst tocAddr ]
409 tocAddr = AddrRegImm toc (ImmLit (text "ghc_toc_table[TC]"))
410 return (Any II32 code)
411 | target32Bit (targetPlatform dflags) = do
412 reg <- getPicBaseNat $ archWordFormat (target32Bit (targetPlatform dflags))
413 return (Fixed (archWordFormat (target32Bit (targetPlatform dflags)))
414 reg nilOL)
415 | otherwise = return (Fixed II64 toc nilOL)
416
417 getRegister' dflags (CmmReg reg)
418 = return (Fixed (cmmTypeFormat (cmmRegType dflags reg))
419 (getRegisterReg (targetPlatform dflags) reg) nilOL)
420
421 getRegister' dflags tree@(CmmRegOff _ _)
422 = getRegister' dflags (mangleIndexTree dflags tree)
423
424 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
425 -- TO_W_(x), TO_W_(x >> 32)
426
427 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32)
428 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
429 | target32Bit (targetPlatform dflags) = do
430 ChildCode64 code rlo <- iselExpr64 x
431 return $ Fixed II32 (getHiVRegFromLo rlo) code
432
433 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32)
434 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
435 | target32Bit (targetPlatform dflags) = do
436 ChildCode64 code rlo <- iselExpr64 x
437 return $ Fixed II32 (getHiVRegFromLo rlo) code
438
439 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x])
440 | target32Bit (targetPlatform dflags) = do
441 ChildCode64 code rlo <- iselExpr64 x
442 return $ Fixed II32 rlo code
443
444 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x])
445 | target32Bit (targetPlatform dflags) = do
446 ChildCode64 code rlo <- iselExpr64 x
447 return $ Fixed II32 rlo code
448
449 getRegister' dflags (CmmLoad mem pk)
450 | not (isWord64 pk) = do
451 let platform = targetPlatform dflags
452 Amode addr addr_code <- getAmode D mem
453 let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk)
454 addr_code `snocOL` LD format dst addr
455 return (Any format code)
456 | not (target32Bit (targetPlatform dflags)) = do
457 Amode addr addr_code <- getAmode DS mem
458 let code dst = addr_code `snocOL` LD II64 dst addr
459 return (Any II64 code)
460
461 where format = cmmTypeFormat pk
462
463 -- catch simple cases of zero- or sign-extended load
464 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
465 Amode addr addr_code <- getAmode D mem
466 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
467
468 getRegister' _ (CmmMachOp (MO_XX_Conv W8 W32) [CmmLoad mem _]) = do
469 Amode addr addr_code <- getAmode D mem
470 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
471
472 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
473 Amode addr addr_code <- getAmode D mem
474 return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
475
476 getRegister' _ (CmmMachOp (MO_XX_Conv W8 W64) [CmmLoad mem _]) = do
477 Amode addr addr_code <- getAmode D mem
478 return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
479
480 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
481
482 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
483 Amode addr addr_code <- getAmode D mem
484 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
485
486 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
487 Amode addr addr_code <- getAmode D mem
488 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
489
490 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
491 Amode addr addr_code <- getAmode D mem
492 return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
493
494 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
495 Amode addr addr_code <- getAmode D mem
496 return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
497
498 getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
499 Amode addr addr_code <- getAmode D mem
500 return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
501
502 getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
503 -- lwa is DS-form. See Note [Power instruction format]
504 Amode addr addr_code <- getAmode DS mem
505 return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
506
507 getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
508 = case mop of
509 MO_Not rep -> triv_ucode_int rep NOT
510
511 MO_F_Neg w -> triv_ucode_float w FNEG
512 MO_S_Neg w -> triv_ucode_int w NEG
513
514 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
515 MO_FF_Conv W32 W64 -> conversionNop FF64 x
516
517 MO_FS_Conv from to -> coerceFP2Int from to x
518 MO_SF_Conv from to -> coerceInt2FP from to x
519
520 MO_SS_Conv from to
521 | from >= to -> conversionNop (intFormat to) x
522 | otherwise -> triv_ucode_int to (EXTS (intFormat from))
523
524 MO_UU_Conv from to
525 | from >= to -> conversionNop (intFormat to) x
526 | otherwise -> clearLeft from to
527
528 MO_XX_Conv _ to -> conversionNop (intFormat to) x
529
530 _ -> panic "PPC.CodeGen.getRegister: no match"
531
532 where
533 triv_ucode_int width instr = trivialUCode (intFormat width) instr x
534 triv_ucode_float width instr = trivialUCode (floatFormat width) instr x
535
536 conversionNop new_format expr
537 = do e_code <- getRegister' dflags expr
538 return (swizzleRegisterRep e_code new_format)
539
540 clearLeft from to
541 = do (src1, code1) <- getSomeReg x
542 let arch_fmt = intFormat (wordWidth dflags)
543 arch_bits = widthInBits (wordWidth dflags)
544 size = widthInBits from
545 code dst = code1 `snocOL`
546 CLRLI arch_fmt dst src1 (arch_bits - size)
547 return (Any (intFormat to) code)
548
549 getRegister' _ (CmmMachOp mop [x, y]) -- dyadic PrimOps
550 = case mop of
551 MO_F_Eq _ -> condFltReg EQQ x y
552 MO_F_Ne _ -> condFltReg NE x y
553 MO_F_Gt _ -> condFltReg GTT x y
554 MO_F_Ge _ -> condFltReg GE x y
555 MO_F_Lt _ -> condFltReg LTT x y
556 MO_F_Le _ -> condFltReg LE x y
557
558 MO_Eq rep -> condIntReg EQQ rep x y
559 MO_Ne rep -> condIntReg NE rep x y
560
561 MO_S_Gt rep -> condIntReg GTT rep x y
562 MO_S_Ge rep -> condIntReg GE rep x y
563 MO_S_Lt rep -> condIntReg LTT rep x y
564 MO_S_Le rep -> condIntReg LE rep x y
565
566 MO_U_Gt rep -> condIntReg GU rep x y
567 MO_U_Ge rep -> condIntReg GEU rep x y
568 MO_U_Lt rep -> condIntReg LU rep x y
569 MO_U_Le rep -> condIntReg LEU rep x y
570
571 MO_F_Add w -> triv_float w FADD
572 MO_F_Sub w -> triv_float w FSUB
573 MO_F_Mul w -> triv_float w FMUL
574 MO_F_Quot w -> triv_float w FDIV
575
576 -- optimize addition with 32-bit immediate
577 -- (needed for PIC)
578 MO_Add W32 ->
579 case y of
580 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True imm
581 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
582 CmmLit lit
583 -> do
584 (src, srcCode) <- getSomeReg x
585 let imm = litToImm lit
586 code dst = srcCode `appOL` toOL [
587 ADDIS dst src (HA imm),
588 ADD dst dst (RIImm (LO imm))
589 ]
590 return (Any II32 code)
591 _ -> trivialCode W32 True ADD x y
592
593 MO_Add rep -> trivialCode rep True ADD x y
594 MO_Sub rep ->
595 case y of
596 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
597 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
598 _ -> case x of
599 CmmLit (CmmInt imm _)
600 | Just _ <- makeImmediate rep True imm
601 -- subfi ('substract from' with immediate) doesn't exist
602 -> trivialCode rep True SUBFC y x
603 _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
604
605 MO_Mul rep -> shiftMulCode rep True MULL x y
606 MO_S_MulMayOflo rep -> do
607 (src1, code1) <- getSomeReg x
608 (src2, code2) <- getSomeReg y
609 let
610 format = intFormat rep
611 code dst = code1 `appOL` code2
612 `appOL` toOL [ MULLO format dst src1 src2
613 , MFOV format dst
614 ]
615 return (Any format code)
616
617 MO_S_Quot rep -> divCode rep True x y
618 MO_U_Quot rep -> divCode rep False x y
619
620 MO_S_Rem rep -> remainder rep True x y
621 MO_U_Rem rep -> remainder rep False x y
622
623 MO_And rep -> case y of
624 (CmmLit (CmmInt imm _)) | imm == -8 || imm == -4
625 -> do
626 (src, srcCode) <- getSomeReg x
627 let clear_mask = if imm == -4 then 2 else 3
628 fmt = intFormat rep
629 code dst = srcCode
630 `appOL` unitOL (CLRRI fmt dst src clear_mask)
631 return (Any fmt code)
632 _ -> trivialCode rep False AND x y
633 MO_Or rep -> trivialCode rep False OR x y
634 MO_Xor rep -> trivialCode rep False XOR x y
635
636 MO_Shl rep -> shiftMulCode rep False SL x y
637 MO_S_Shr rep -> srCode rep True SRA x y
638 MO_U_Shr rep -> srCode rep False SR x y
639 _ -> panic "PPC.CodeGen.getRegister: no match"
640
641 where
642 triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register
643 triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y
644
645 remainder :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register
646 remainder rep sgn x y = do
647 let fmt = intFormat rep
648 tmp <- getNewRegNat fmt
649 code <- remainderCode rep sgn tmp x y
650 return (Any fmt code)
651
652
653 getRegister' _ (CmmLit (CmmInt i rep))
654 | Just imm <- makeImmediate rep True i
655 = let
656 code dst = unitOL (LI dst imm)
657 in
658 return (Any (intFormat rep) code)
659
660 getRegister' _ (CmmLit (CmmFloat f frep)) = do
661 lbl <- getNewLabelNat
662 dflags <- getDynFlags
663 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
664 Amode addr addr_code <- getAmode D dynRef
665 let format = floatFormat frep
666 code dst =
667 LDATA (Section ReadOnlyData lbl)
668 (Statics lbl [CmmStaticLit (CmmFloat f frep)])
669 `consOL` (addr_code `snocOL` LD format dst addr)
670 return (Any format code)
671
672 getRegister' dflags (CmmLit lit)
673 | target32Bit (targetPlatform dflags)
674 = let rep = cmmLitType dflags lit
675 imm = litToImm lit
676 code dst = toOL [
677 LIS dst (HA imm),
678 ADD dst dst (RIImm (LO imm))
679 ]
680 in return (Any (cmmTypeFormat rep) code)
681 | otherwise
682 = do lbl <- getNewLabelNat
683 dflags <- getDynFlags
684 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
685 Amode addr addr_code <- getAmode D dynRef
686 let rep = cmmLitType dflags lit
687 format = cmmTypeFormat rep
688 code dst =
689 LDATA (Section ReadOnlyData lbl) (Statics lbl [CmmStaticLit lit])
690 `consOL` (addr_code `snocOL` LD format dst addr)
691 return (Any format code)
692
693 getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other)
694
695 -- extend?Rep: wrap integer expression of type `from`
696 -- in a conversion to `to`
697 extendSExpr :: Width -> Width -> CmmExpr -> CmmExpr
698 extendSExpr from to x = CmmMachOp (MO_SS_Conv from to) [x]
699
700 extendUExpr :: Width -> Width -> CmmExpr -> CmmExpr
701 extendUExpr from to x = CmmMachOp (MO_UU_Conv from to) [x]
702
703 -- -----------------------------------------------------------------------------
704 -- The 'Amode' type: Memory addressing modes passed up the tree.
705
706 data Amode
707 = Amode AddrMode InstrBlock
708
709 {-
710 Now, given a tree (the argument to a CmmLoad) that references memory,
711 produce a suitable addressing mode.
712
713 A Rule of the Game (tm) for Amodes: use of the addr bit must
714 immediately follow use of the code part, since the code part puts
715 values in registers which the addr then refers to. So you can't put
716 anything in between, lest it overwrite some of those registers. If
717 you need to do some other computation between the code part and use of
718 the addr bit, first store the effective address from the amode in a
719 temporary, then do the other computation, and then use the temporary:
720
721 code
722 LEA amode, tmp
723 ... other computation ...
724 ... (tmp) ...
725 -}
726
727 {- Note [Power instruction format]
728 In some instructions the 16 bit offset must be a multiple of 4, i.e.
729 the two least significant bits must be zero. The "Power ISA" specification
730 calls these instruction formats "DS-FORM" and the instructions with
731 arbitrary 16 bit offsets are "D-FORM".
732
733 The Power ISA specification document can be obtained from www.power.org.
734 -}
735 data InstrForm = D | DS
736
737 getAmode :: InstrForm -> CmmExpr -> NatM Amode
738 getAmode inf tree@(CmmRegOff _ _)
739 = do dflags <- getDynFlags
740 getAmode inf (mangleIndexTree dflags tree)
741
742 getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
743 | Just off <- makeImmediate W32 True (-i)
744 = do
745 (reg, code) <- getSomeReg x
746 return (Amode (AddrRegImm reg off) code)
747
748
749 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
750 | Just off <- makeImmediate W32 True i
751 = do
752 (reg, code) <- getSomeReg x
753 return (Amode (AddrRegImm reg off) code)
754
755 getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
756 | Just off <- makeImmediate W64 True (-i)
757 = do
758 (reg, code) <- getSomeReg x
759 return (Amode (AddrRegImm reg off) code)
760
761
762 getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
763 | Just off <- makeImmediate W64 True i
764 = do
765 (reg, code) <- getSomeReg x
766 return (Amode (AddrRegImm reg off) code)
767
768 getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
769 | Just off <- makeImmediate W64 True (-i)
770 = do
771 (reg, code) <- getSomeReg x
772 (reg', off', code') <-
773 if i `mod` 4 == 0
774 then do return (reg, off, code)
775 else do
776 tmp <- getNewRegNat II64
777 return (tmp, ImmInt 0,
778 code `snocOL` ADD tmp reg (RIImm off))
779 return (Amode (AddrRegImm reg' off') code')
780
781 getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
782 | Just off <- makeImmediate W64 True i
783 = do
784 (reg, code) <- getSomeReg x
785 (reg', off', code') <-
786 if i `mod` 4 == 0
787 then do return (reg, off, code)
788 else do
789 tmp <- getNewRegNat II64
790 return (tmp, ImmInt 0,
791 code `snocOL` ADD tmp reg (RIImm off))
792 return (Amode (AddrRegImm reg' off') code')
793
794 -- optimize addition with 32-bit immediate
795 -- (needed for PIC)
796 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit])
797 = do
798 dflags <- getDynFlags
799 (src, srcCode) <- getSomeReg x
800 let imm = litToImm lit
801 case () of
802 _ | OSAIX <- platformOS (targetPlatform dflags)
803 , isCmmLabelType lit ->
804 -- HA16/LO16 relocations on labels not supported on AIX
805 return (Amode (AddrRegImm src imm) srcCode)
806 | otherwise -> do
807 tmp <- getNewRegNat II32
808 let code = srcCode `snocOL` ADDIS tmp src (HA imm)
809 return (Amode (AddrRegImm tmp (LO imm)) code)
810 where
811 isCmmLabelType (CmmLabel {}) = True
812 isCmmLabelType (CmmLabelOff {}) = True
813 isCmmLabelType (CmmLabelDiffOff {}) = True
814 isCmmLabelType _ = False
815
816 getAmode _ (CmmLit lit)
817 = do
818 dflags <- getDynFlags
819 case platformArch $ targetPlatform dflags of
820 ArchPPC -> do
821 tmp <- getNewRegNat II32
822 let imm = litToImm lit
823 code = unitOL (LIS tmp (HA imm))
824 return (Amode (AddrRegImm tmp (LO imm)) code)
825 _ -> do -- TODO: Load from TOC,
826 -- see getRegister' _ (CmmLit lit)
827 tmp <- getNewRegNat II64
828 let imm = litToImm lit
829 code = toOL [
830 LIS tmp (HIGHESTA imm),
831 OR tmp tmp (RIImm (HIGHERA imm)),
832 SL II64 tmp tmp (RIImm (ImmInt 32)),
833 ORIS tmp tmp (HA imm)
834 ]
835 return (Amode (AddrRegImm tmp (LO imm)) code)
836
837 getAmode _ (CmmMachOp (MO_Add W32) [x, y])
838 = do
839 (regX, codeX) <- getSomeReg x
840 (regY, codeY) <- getSomeReg y
841 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
842
843 getAmode _ (CmmMachOp (MO_Add W64) [x, y])
844 = do
845 (regX, codeX) <- getSomeReg x
846 (regY, codeY) <- getSomeReg y
847 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
848
849 getAmode _ other
850 = do
851 (reg, code) <- getSomeReg other
852 let
853 off = ImmInt 0
854 return (Amode (AddrRegImm reg off) code)
855
856
857 -- The 'CondCode' type: Condition codes passed up the tree.
858 data CondCode
859 = CondCode Bool Cond InstrBlock
860
861 -- Set up a condition code for a conditional branch.
862
863 getCondCode :: CmmExpr -> NatM CondCode
864
865 -- almost the same as everywhere else - but we need to
866 -- extend small integers to 32 bit or 64 bit first
867
868 getCondCode (CmmMachOp mop [x, y])
869 = do
870 case mop of
871 MO_F_Eq W32 -> condFltCode EQQ x y
872 MO_F_Ne W32 -> condFltCode NE x y
873 MO_F_Gt W32 -> condFltCode GTT x y
874 MO_F_Ge W32 -> condFltCode GE x y
875 MO_F_Lt W32 -> condFltCode LTT x y
876 MO_F_Le W32 -> condFltCode LE x y
877
878 MO_F_Eq W64 -> condFltCode EQQ x y
879 MO_F_Ne W64 -> condFltCode NE x y
880 MO_F_Gt W64 -> condFltCode GTT x y
881 MO_F_Ge W64 -> condFltCode GE x y
882 MO_F_Lt W64 -> condFltCode LTT x y
883 MO_F_Le W64 -> condFltCode LE x y
884
885 MO_Eq rep -> condIntCode EQQ rep x y
886 MO_Ne rep -> condIntCode NE rep x y
887
888 MO_S_Gt rep -> condIntCode GTT rep x y
889 MO_S_Ge rep -> condIntCode GE rep x y
890 MO_S_Lt rep -> condIntCode LTT rep x y
891 MO_S_Le rep -> condIntCode LE rep x y
892
893 MO_U_Gt rep -> condIntCode GU rep x y
894 MO_U_Ge rep -> condIntCode GEU rep x y
895 MO_U_Lt rep -> condIntCode LU rep x y
896 MO_U_Le rep -> condIntCode LEU rep x y
897
898 _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
899
900 getCondCode _ = panic "getCondCode(2)(powerpc)"
901
902
903 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
904 -- passed back up the tree.
905
906 condIntCode :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode
907
908 -- optimize pointer tag checks. Operation andi. sets condition register
909 -- so cmpi ..., 0 is redundant.
910 condIntCode cond _ (CmmMachOp (MO_And _) [x, CmmLit (CmmInt imm rep)])
911 (CmmLit (CmmInt 0 _))
912 | not $ condUnsigned cond,
913 Just src2 <- makeImmediate rep False imm
914 = do
915 (src1, code) <- getSomeReg x
916 let code' = code `snocOL` AND r0 src1 (RIImm src2)
917 return (CondCode False cond code')
918
919 condIntCode cond width x (CmmLit (CmmInt y rep))
920 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
921 = do
922 let op_len = max W32 width
923 let extend = extendSExpr width op_len
924 (src1, code) <- getSomeReg (extend x)
925 let format = intFormat op_len
926 code' = code `snocOL`
927 (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2)
928 return (CondCode False cond code')
929
930 condIntCode cond width x y = do
931 let op_len = max W32 width
932 let extend = if condUnsigned cond then extendUExpr width op_len
933 else extendSExpr width op_len
934 (src1, code1) <- getSomeReg (extend x)
935 (src2, code2) <- getSomeReg (extend y)
936 let format = intFormat op_len
937 code' = code1 `appOL` code2 `snocOL`
938 (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2)
939 return (CondCode False cond code')
940
941 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
942 condFltCode cond x y = do
943 (src1, code1) <- getSomeReg x
944 (src2, code2) <- getSomeReg y
945 let
946 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
947 code'' = case cond of -- twiddle CR to handle unordered case
948 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
949 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
950 _ -> code'
951 where
952 ltbit = 0 ; eqbit = 2 ; gtbit = 1
953 return (CondCode True cond code'')
954
955
956
957 -- -----------------------------------------------------------------------------
958 -- Generating assignments
959
960 -- Assignments are really at the heart of the whole code generation
961 -- business. Almost all top-level nodes of any real importance are
962 -- assignments, which correspond to loads, stores, or register
963 -- transfers. If we're really lucky, some of the register transfers
964 -- will go away, because we can use the destination register to
965 -- complete the code generation for the right hand side. This only
966 -- fails when the right hand side is forced into a fixed register
967 -- (e.g. the result of a call).
968
969 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
970 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
971
972 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
973 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
974
975 assignMem_IntCode pk addr src = do
976 (srcReg, code) <- getSomeReg src
977 Amode dstAddr addr_code <- case pk of
978 II64 -> getAmode DS addr
979 _ -> getAmode D addr
980 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
981
982 -- dst is a reg, but src could be anything
983 assignReg_IntCode _ reg src
984 = do
985 dflags <- getDynFlags
986 let dst = getRegisterReg (targetPlatform dflags) reg
987 r <- getRegister src
988 return $ case r of
989 Any _ code -> code dst
990 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
991
992
993
994 -- Easy, isn't it?
995 assignMem_FltCode = assignMem_IntCode
996 assignReg_FltCode = assignReg_IntCode
997
998
999
1000 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
1001
1002 genJump (CmmLit (CmmLabel lbl))
1003 = return (unitOL $ JMP lbl)
1004
1005 genJump tree
1006 = do
1007 dflags <- getDynFlags
1008 genJump' tree (platformToGCP (targetPlatform dflags))
1009
1010 genJump' :: CmmExpr -> GenCCallPlatform -> NatM InstrBlock
1011
1012 genJump' tree (GCPLinux64ELF 1)
1013 = do
1014 (target,code) <- getSomeReg tree
1015 return (code
1016 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0))
1017 `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
1018 `snocOL` MTCTR r11
1019 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
1020 `snocOL` BCTR [] Nothing)
1021
1022 genJump' tree (GCPLinux64ELF 2)
1023 = do
1024 (target,code) <- getSomeReg tree
1025 return (code
1026 `snocOL` MR r12 target
1027 `snocOL` MTCTR r12
1028 `snocOL` BCTR [] Nothing)
1029
1030 genJump' tree _
1031 = do
1032 (target,code) <- getSomeReg tree
1033 return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
1034
1035 -- -----------------------------------------------------------------------------
1036 -- Unconditional branches
1037 genBranch :: BlockId -> NatM InstrBlock
1038 genBranch = return . toOL . mkJumpInstr
1039
1040
1041 -- -----------------------------------------------------------------------------
1042 -- Conditional jumps
1043
1044 {-
1045 Conditional jumps are always to local labels, so we can use branch
1046 instructions. We peek at the arguments to decide what kind of
1047 comparison to do.
1048 -}
1049
1050
1051 genCondJump
1052 :: BlockId -- the branch target
1053 -> CmmExpr -- the condition on which to branch
1054 -> Maybe Bool
1055 -> NatM InstrBlock
1056
1057 genCondJump id bool prediction = do
1058 CondCode _ cond code <- getCondCode bool
1059 return (code `snocOL` BCC cond id prediction)
1060
1061
1062
1063 -- -----------------------------------------------------------------------------
1064 -- Generating C calls
1065
1066 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1067 -- @get_arg@, which moves the arguments to the correct registers/stack
1068 -- locations. Apart from that, the code is easy.
1069
1070 genCCall :: ForeignTarget -- function to call
1071 -> [CmmFormal] -- where to put the result
1072 -> [CmmActual] -- arguments (of mixed type)
1073 -> NatM InstrBlock
1074 genCCall (PrimTarget MO_WriteBarrier) _ _
1075 = return $ unitOL LWSYNC
1076
1077 genCCall (PrimTarget MO_Touch) _ _
1078 = return $ nilOL
1079
1080 genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
1081 = return $ nilOL
1082
1083 genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n]
1084 = do dflags <- getDynFlags
1085 let platform = targetPlatform dflags
1086 fmt = intFormat width
1087 reg_dst = getRegisterReg platform (CmmLocal dst)
1088 (instr, n_code) <- case amop of
1089 AMO_Add -> getSomeRegOrImm ADD True reg_dst
1090 AMO_Sub -> case n of
1091 CmmLit (CmmInt i _)
1092 | Just imm <- makeImmediate width True (-i)
1093 -> return (ADD reg_dst reg_dst (RIImm imm), nilOL)
1094 _
1095 -> do
1096 (n_reg, n_code) <- getSomeReg n
1097 return (SUBF reg_dst n_reg reg_dst, n_code)
1098 AMO_And -> getSomeRegOrImm AND False reg_dst
1099 AMO_Nand -> do (n_reg, n_code) <- getSomeReg n
1100 return (NAND reg_dst reg_dst n_reg, n_code)
1101 AMO_Or -> getSomeRegOrImm OR False reg_dst
1102 AMO_Xor -> getSomeRegOrImm XOR False reg_dst
1103 Amode addr_reg addr_code <- getAmodeIndex addr
1104 lbl_retry <- getBlockIdNat
1105 return $ n_code `appOL` addr_code
1106 `appOL` toOL [ HWSYNC
1107 , BCC ALWAYS lbl_retry Nothing
1108
1109 , NEWBLOCK lbl_retry
1110 , LDR fmt reg_dst addr_reg
1111 , instr
1112 , STC fmt reg_dst addr_reg
1113 , BCC NE lbl_retry (Just False)
1114 , ISYNC
1115 ]
1116 where
1117 getAmodeIndex (CmmMachOp (MO_Add _) [x, y])
1118 = do
1119 (regX, codeX) <- getSomeReg x
1120 (regY, codeY) <- getSomeReg y
1121 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1122 getAmodeIndex other
1123 = do
1124 (reg, code) <- getSomeReg other
1125 return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here!
1126 getSomeRegOrImm op sign dst
1127 = case n of
1128 CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i
1129 -> return (op dst dst (RIImm imm), nilOL)
1130 _
1131 -> do
1132 (n_reg, n_code) <- getSomeReg n
1133 return (op dst dst (RIReg n_reg), n_code)
1134
1135 genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr]
1136 = do dflags <- getDynFlags
1137 let platform = targetPlatform dflags
1138 fmt = intFormat width
1139 reg_dst = getRegisterReg platform (CmmLocal dst)
1140 form = if widthInBits width == 64 then DS else D
1141 Amode addr_reg addr_code <- getAmode form addr
1142 lbl_end <- getBlockIdNat
1143 return $ addr_code `appOL` toOL [ HWSYNC
1144 , LD fmt reg_dst addr_reg
1145 , CMP fmt reg_dst (RIReg reg_dst)
1146 , BCC NE lbl_end (Just False)
1147 , BCC ALWAYS lbl_end Nothing
1148 -- See Note [Seemingly useless cmp and bne]
1149 , NEWBLOCK lbl_end
1150 , ISYNC
1151 ]
1152
1153 -- Note [Seemingly useless cmp and bne]
1154 -- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction
1155 -- the second paragraph says that isync may complete before storage accesses
1156 -- "associated" with a preceding instruction have been performed. The cmp
1157 -- operation and the following bne introduce a data and control dependency
1158 -- on the load instruction (See also Power ISA, Book II, Appendix B.2.3, Safe
1159 -- Fetch).
1160 -- This is also what gcc does.
1161
1162
1163 genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
1164 code <- assignMem_IntCode (intFormat width) addr val
1165 return $ unitOL(HWSYNC) `appOL` code
1166
1167 genCCall (PrimTarget (MO_Clz width)) [dst] [src]
1168 = do dflags <- getDynFlags
1169 let platform = targetPlatform dflags
1170 reg_dst = getRegisterReg platform (CmmLocal dst)
1171 if target32Bit platform && width == W64
1172 then do
1173 ChildCode64 code vr_lo <- iselExpr64 src
1174 lbl1 <- getBlockIdNat
1175 lbl2 <- getBlockIdNat
1176 lbl3 <- getBlockIdNat
1177 let vr_hi = getHiVRegFromLo vr_lo
1178 cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0))
1179 , BCC NE lbl2 Nothing
1180 , BCC ALWAYS lbl1 Nothing
1181
1182 , NEWBLOCK lbl1
1183 , CNTLZ II32 reg_dst vr_lo
1184 , ADD reg_dst reg_dst (RIImm (ImmInt 32))
1185 , BCC ALWAYS lbl3 Nothing
1186
1187 , NEWBLOCK lbl2
1188 , CNTLZ II32 reg_dst vr_hi
1189 , BCC ALWAYS lbl3 Nothing
1190
1191 , NEWBLOCK lbl3
1192 ]
1193 return $ code `appOL` cntlz
1194 else do
1195 let format = if width == W64 then II64 else II32
1196 (s_reg, s_code) <- getSomeReg src
1197 (pre, reg , post) <-
1198 case width of
1199 W64 -> return (nilOL, s_reg, nilOL)
1200 W32 -> return (nilOL, s_reg, nilOL)
1201 W16 -> do
1202 reg_tmp <- getNewRegNat format
1203 return
1204 ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 65535))
1205 , reg_tmp
1206 , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-16)))
1207 )
1208 W8 -> do
1209 reg_tmp <- getNewRegNat format
1210 return
1211 ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 255))
1212 , reg_tmp
1213 , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-24)))
1214 )
1215 _ -> panic "genCall: Clz wrong format"
1216 let cntlz = unitOL (CNTLZ format reg_dst reg)
1217 return $ s_code `appOL` pre `appOL` cntlz `appOL` post
1218
1219 genCCall (PrimTarget (MO_Ctz width)) [dst] [src]
1220 = do dflags <- getDynFlags
1221 let platform = targetPlatform dflags
1222 reg_dst = getRegisterReg platform (CmmLocal dst)
1223 if target32Bit platform && width == W64
1224 then do
1225 let format = II32
1226 ChildCode64 code vr_lo <- iselExpr64 src
1227 lbl1 <- getBlockIdNat
1228 lbl2 <- getBlockIdNat
1229 lbl3 <- getBlockIdNat
1230 x' <- getNewRegNat format
1231 x'' <- getNewRegNat format
1232 r' <- getNewRegNat format
1233 cnttzlo <- cnttz format reg_dst vr_lo
1234 let vr_hi = getHiVRegFromLo vr_lo
1235 cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0))
1236 , BCC NE lbl2 Nothing
1237 , BCC ALWAYS lbl1 Nothing
1238
1239 , NEWBLOCK lbl1
1240 , ADD x' vr_hi (RIImm (ImmInt (-1)))
1241 , ANDC x'' x' vr_hi
1242 , CNTLZ format r' x''
1243 -- 32 + (32 - clz(x''))
1244 , SUBFC reg_dst r' (RIImm (ImmInt 64))
1245 , BCC ALWAYS lbl3 Nothing
1246
1247 , NEWBLOCK lbl2
1248 ]
1249 `appOL` cnttzlo `appOL`
1250 toOL [ BCC ALWAYS lbl3 Nothing
1251
1252 , NEWBLOCK lbl3
1253 ]
1254 return $ code `appOL` cnttz64
1255 else do
1256 let format = if width == W64 then II64 else II32
1257 (s_reg, s_code) <- getSomeReg src
1258 (reg_ctz, pre_code) <-
1259 case width of
1260 W64 -> return (s_reg, nilOL)
1261 W32 -> return (s_reg, nilOL)
1262 W16 -> do
1263 reg_tmp <- getNewRegNat format
1264 return (reg_tmp, unitOL $ ORIS reg_tmp s_reg (ImmInt 1))
1265 W8 -> do
1266 reg_tmp <- getNewRegNat format
1267 return (reg_tmp, unitOL $ OR reg_tmp s_reg (RIImm (ImmInt 256)))
1268 _ -> panic "genCall: Ctz wrong format"
1269 ctz_code <- cnttz format reg_dst reg_ctz
1270 return $ s_code `appOL` pre_code `appOL` ctz_code
1271 where
1272 -- cnttz(x) = sizeof(x) - cntlz(~x & (x - 1))
1273 -- see Henry S. Warren, Hacker's Delight, p 107
1274 cnttz format dst src = do
1275 let format_bits = 8 * formatInBytes format
1276 x' <- getNewRegNat format
1277 x'' <- getNewRegNat format
1278 r' <- getNewRegNat format
1279 return $ toOL [ ADD x' src (RIImm (ImmInt (-1)))
1280 , ANDC x'' x' src
1281 , CNTLZ format r' x''
1282 , SUBFC dst r' (RIImm (ImmInt (format_bits)))
1283 ]
1284
1285 genCCall target dest_regs argsAndHints
1286 = do dflags <- getDynFlags
1287 let platform = targetPlatform dflags
1288 case target of
1289 PrimTarget (MO_S_QuotRem width) -> divOp1 platform True width
1290 dest_regs argsAndHints
1291 PrimTarget (MO_U_QuotRem width) -> divOp1 platform False width
1292 dest_regs argsAndHints
1293 PrimTarget (MO_U_QuotRem2 width) -> divOp2 platform width dest_regs
1294 argsAndHints
1295 PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs
1296 argsAndHints
1297 PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints
1298 PrimTarget (MO_AddWordC _) -> addcOp platform dest_regs argsAndHints
1299 PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints
1300 PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width
1301 dest_regs argsAndHints
1302 PrimTarget (MO_SubIntC width) -> addSubCOp SUBFO platform width
1303 dest_regs argsAndHints
1304 PrimTarget MO_F64_Fabs -> fabs platform dest_regs argsAndHints
1305 PrimTarget MO_F32_Fabs -> fabs platform dest_regs argsAndHints
1306 _ -> genCCall' dflags (platformToGCP platform)
1307 target dest_regs argsAndHints
1308 where divOp1 platform signed width [res_q, res_r] [arg_x, arg_y]
1309 = do let reg_q = getRegisterReg platform (CmmLocal res_q)
1310 reg_r = getRegisterReg platform (CmmLocal res_r)
1311 remainderCode width signed reg_q arg_x arg_y
1312 <*> pure reg_r
1313
1314 divOp1 _ _ _ _ _
1315 = panic "genCCall: Wrong number of arguments for divOp1"
1316 divOp2 platform width [res_q, res_r]
1317 [arg_x_high, arg_x_low, arg_y]
1318 = do let reg_q = getRegisterReg platform (CmmLocal res_q)
1319 reg_r = getRegisterReg platform (CmmLocal res_r)
1320 fmt = intFormat width
1321 half = 4 * (formatInBytes fmt)
1322 (xh_reg, xh_code) <- getSomeReg arg_x_high
1323 (xl_reg, xl_code) <- getSomeReg arg_x_low
1324 (y_reg, y_code) <- getSomeReg arg_y
1325 s <- getNewRegNat fmt
1326 b <- getNewRegNat fmt
1327 v <- getNewRegNat fmt
1328 vn1 <- getNewRegNat fmt
1329 vn0 <- getNewRegNat fmt
1330 un32 <- getNewRegNat fmt
1331 tmp <- getNewRegNat fmt
1332 un10 <- getNewRegNat fmt
1333 un1 <- getNewRegNat fmt
1334 un0 <- getNewRegNat fmt
1335 q1 <- getNewRegNat fmt
1336 rhat <- getNewRegNat fmt
1337 tmp1 <- getNewRegNat fmt
1338 q0 <- getNewRegNat fmt
1339 un21 <- getNewRegNat fmt
1340 again1 <- getBlockIdNat
1341 no1 <- getBlockIdNat
1342 then1 <- getBlockIdNat
1343 endif1 <- getBlockIdNat
1344 again2 <- getBlockIdNat
1345 no2 <- getBlockIdNat
1346 then2 <- getBlockIdNat
1347 endif2 <- getBlockIdNat
1348 return $ y_code `appOL` xl_code `appOL` xh_code `appOL`
1349 -- see Hacker's Delight p 196 Figure 9-3
1350 toOL [ -- b = 2 ^ (bits_in_word / 2)
1351 LI b (ImmInt 1)
1352 , SL fmt b b (RIImm (ImmInt half))
1353 -- s = clz(y)
1354 , CNTLZ fmt s y_reg
1355 -- v = y << s
1356 , SL fmt v y_reg (RIReg s)
1357 -- vn1 = upper half of v
1358 , SR fmt vn1 v (RIImm (ImmInt half))
1359 -- vn0 = lower half of v
1360 , CLRLI fmt vn0 v half
1361 -- un32 = (u1 << s)
1362 -- | (u0 >> (bits_in_word - s))
1363 , SL fmt un32 xh_reg (RIReg s)
1364 , SUBFC tmp s
1365 (RIImm (ImmInt (8 * formatInBytes fmt)))
1366 , SR fmt tmp xl_reg (RIReg tmp)
1367 , OR un32 un32 (RIReg tmp)
1368 -- un10 = u0 << s
1369 , SL fmt un10 xl_reg (RIReg s)
1370 -- un1 = upper half of un10
1371 , SR fmt un1 un10 (RIImm (ImmInt half))
1372 -- un0 = lower half of un10
1373 , CLRLI fmt un0 un10 half
1374 -- q1 = un32/vn1
1375 , DIV fmt False q1 un32 vn1
1376 -- rhat = un32 - q1*vn1
1377 , MULL fmt tmp q1 (RIReg vn1)
1378 , SUBF rhat tmp un32
1379 , BCC ALWAYS again1 Nothing
1380
1381 , NEWBLOCK again1
1382 -- if (q1 >= b || q1*vn0 > b*rhat + un1)
1383 , CMPL fmt q1 (RIReg b)
1384 , BCC GEU then1 Nothing
1385 , BCC ALWAYS no1 Nothing
1386
1387 , NEWBLOCK no1
1388 , MULL fmt tmp q1 (RIReg vn0)
1389 , SL fmt tmp1 rhat (RIImm (ImmInt half))
1390 , ADD tmp1 tmp1 (RIReg un1)
1391 , CMPL fmt tmp (RIReg tmp1)
1392 , BCC LEU endif1 Nothing
1393 , BCC ALWAYS then1 Nothing
1394
1395 , NEWBLOCK then1
1396 -- q1 = q1 - 1
1397 , ADD q1 q1 (RIImm (ImmInt (-1)))
1398 -- rhat = rhat + vn1
1399 , ADD rhat rhat (RIReg vn1)
1400 -- if (rhat < b) goto again1
1401 , CMPL fmt rhat (RIReg b)
1402 , BCC LTT again1 Nothing
1403 , BCC ALWAYS endif1 Nothing
1404
1405 , NEWBLOCK endif1
1406 -- un21 = un32*b + un1 - q1*v
1407 , SL fmt un21 un32 (RIImm (ImmInt half))
1408 , ADD un21 un21 (RIReg un1)
1409 , MULL fmt tmp q1 (RIReg v)
1410 , SUBF un21 tmp un21
1411 -- compute second quotient digit
1412 -- q0 = un21/vn1
1413 , DIV fmt False q0 un21 vn1
1414 -- rhat = un21- q0*vn1
1415 , MULL fmt tmp q0 (RIReg vn1)
1416 , SUBF rhat tmp un21
1417 , BCC ALWAYS again2 Nothing
1418
1419 , NEWBLOCK again2
1420 -- if (q0>b || q0*vn0 > b*rhat + un0)
1421 , CMPL fmt q0 (RIReg b)
1422 , BCC GEU then2 Nothing
1423 , BCC ALWAYS no2 Nothing
1424
1425 , NEWBLOCK no2
1426 , MULL fmt tmp q0 (RIReg vn0)
1427 , SL fmt tmp1 rhat (RIImm (ImmInt half))
1428 , ADD tmp1 tmp1 (RIReg un0)
1429 , CMPL fmt tmp (RIReg tmp1)
1430 , BCC LEU endif2 Nothing
1431 , BCC ALWAYS then2 Nothing
1432
1433 , NEWBLOCK then2
1434 -- q0 = q0 - 1
1435 , ADD q0 q0 (RIImm (ImmInt (-1)))
1436 -- rhat = rhat + vn1
1437 , ADD rhat rhat (RIReg vn1)
1438 -- if (rhat<b) goto again2
1439 , CMPL fmt rhat (RIReg b)
1440 , BCC LTT again2 Nothing
1441 , BCC ALWAYS endif2 Nothing
1442
1443 , NEWBLOCK endif2
1444 -- compute remainder
1445 -- r = (un21*b + un0 - q0*v) >> s
1446 , SL fmt reg_r un21 (RIImm (ImmInt half))
1447 , ADD reg_r reg_r (RIReg un0)
1448 , MULL fmt tmp q0 (RIReg v)
1449 , SUBF reg_r tmp reg_r
1450 , SR fmt reg_r reg_r (RIReg s)
1451 -- compute quotient
1452 -- q = q1*b + q0
1453 , SL fmt reg_q q1 (RIImm (ImmInt half))
1454 , ADD reg_q reg_q (RIReg q0)
1455 ]
1456 divOp2 _ _ _ _
1457 = panic "genCCall: Wrong number of arguments for divOp2"
1458 multOp2 platform width [res_h, res_l] [arg_x, arg_y]
1459 = do let reg_h = getRegisterReg platform (CmmLocal res_h)
1460 reg_l = getRegisterReg platform (CmmLocal res_l)
1461 fmt = intFormat width
1462 (x_reg, x_code) <- getSomeReg arg_x
1463 (y_reg, y_code) <- getSomeReg arg_y
1464 return $ y_code `appOL` x_code
1465 `appOL` toOL [ MULL fmt reg_l x_reg (RIReg y_reg)
1466 , MULHU fmt reg_h x_reg y_reg
1467 ]
1468 multOp2 _ _ _ _
1469 = panic "genCall: Wrong number of arguments for multOp2"
1470 add2Op platform [res_h, res_l] [arg_x, arg_y]
1471 = do let reg_h = getRegisterReg platform (CmmLocal res_h)
1472 reg_l = getRegisterReg platform (CmmLocal res_l)
1473 (x_reg, x_code) <- getSomeReg arg_x
1474 (y_reg, y_code) <- getSomeReg arg_y
1475 return $ y_code `appOL` x_code
1476 `appOL` toOL [ LI reg_h (ImmInt 0)
1477 , ADDC reg_l x_reg y_reg
1478 , ADDZE reg_h reg_h
1479 ]
1480 add2Op _ _ _
1481 = panic "genCCall: Wrong number of arguments/results for add2"
1482
1483 addcOp platform [res_r, res_c] [arg_x, arg_y]
1484 = add2Op platform [res_c {-hi-}, res_r {-lo-}] [arg_x, arg_y]
1485 addcOp _ _ _
1486 = panic "genCCall: Wrong number of arguments/results for addc"
1487
1488 -- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1,
1489 -- which is 0 for borrow and 1 otherwise. We need 1 and 0
1490 -- so xor with 1.
1491 subcOp platform [res_r, res_c] [arg_x, arg_y]
1492 = do let reg_r = getRegisterReg platform (CmmLocal res_r)
1493 reg_c = getRegisterReg platform (CmmLocal res_c)
1494 (x_reg, x_code) <- getSomeReg arg_x
1495 (y_reg, y_code) <- getSomeReg arg_y
1496 return $ y_code `appOL` x_code
1497 `appOL` toOL [ LI reg_c (ImmInt 0)
1498 , SUBFC reg_r y_reg (RIReg x_reg)
1499 , ADDZE reg_c reg_c
1500 , XOR reg_c reg_c (RIImm (ImmInt 1))
1501 ]
1502 subcOp _ _ _
1503 = panic "genCCall: Wrong number of arguments/results for subc"
1504 addSubCOp instr platform width [res_r, res_c] [arg_x, arg_y]
1505 = do let reg_r = getRegisterReg platform (CmmLocal res_r)
1506 reg_c = getRegisterReg platform (CmmLocal res_c)
1507 (x_reg, x_code) <- getSomeReg arg_x
1508 (y_reg, y_code) <- getSomeReg arg_y
1509 return $ y_code `appOL` x_code
1510 `appOL` toOL [ instr reg_r y_reg x_reg,
1511 -- SUBFO argument order reversed!
1512 MFOV (intFormat width) reg_c
1513 ]
1514 addSubCOp _ _ _ _ _
1515 = panic "genCall: Wrong number of arguments/results for addC"
1516 fabs platform [res] [arg]
1517 = do let res_r = getRegisterReg platform (CmmLocal res)
1518 (arg_reg, arg_code) <- getSomeReg arg
1519 return $ arg_code `snocOL` FABS res_r arg_reg
1520 fabs _ _ _
1521 = panic "genCall: Wrong number of arguments/results for fabs"
1522
1523 -- TODO: replace 'Int' by an enum such as 'PPC_64ABI'
1524 data GenCCallPlatform = GCPLinux | GCPDarwin | GCPLinux64ELF !Int | GCPAIX
1525
1526 platformToGCP :: Platform -> GenCCallPlatform
1527 platformToGCP platform = case platformOS platform of
1528 OSLinux -> case platformArch platform of
1529 ArchPPC -> GCPLinux
1530 ArchPPC_64 ELF_V1 -> GCPLinux64ELF 1
1531 ArchPPC_64 ELF_V2 -> GCPLinux64ELF 2
1532 _ -> panic "PPC.CodeGen.platformToGCP: Unknown Linux"
1533 OSAIX -> GCPAIX
1534 OSDarwin -> GCPDarwin
1535 _ -> panic "PPC.CodeGen.platformToGCP: not defined for this OS"
1536
1537
1538 genCCall'
1539 :: DynFlags
1540 -> GenCCallPlatform
1541 -> ForeignTarget -- function to call
1542 -> [CmmFormal] -- where to put the result
1543 -> [CmmActual] -- arguments (of mixed type)
1544 -> NatM InstrBlock
1545
1546 {-
1547 The PowerPC calling convention for Darwin/Mac OS X
1548 is described in Apple's document
1549 "Inside Mac OS X - Mach-O Runtime Architecture".
1550
1551 PowerPC Linux uses the System V Release 4 Calling Convention
1552 for PowerPC. It is described in the
1553 "System V Application Binary Interface PowerPC Processor Supplement".
1554
1555 Both conventions are similar:
1556 Parameters may be passed in general-purpose registers starting at r3, in
1557 floating point registers starting at f1, or on the stack.
1558
1559 But there are substantial differences:
1560 * The number of registers used for parameter passing and the exact set of
1561 nonvolatile registers differs (see MachRegs.hs).
1562 * On Darwin, stack space is always reserved for parameters, even if they are
1563 passed in registers. The called routine may choose to save parameters from
1564 registers to the corresponding space on the stack.
1565 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
1566 parameter is passed in an FPR.
1567 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
1568 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
1569 Darwin just treats an I64 like two separate II32s (high word first).
1570 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
1571 4-byte aligned like everything else on Darwin.
1572 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
1573 PowerPC Linux does not agree, so neither do we.
1574
1575 PowerPC 64 Linux uses the System V Release 4 Calling Convention for
1576 64-bit PowerPC. It is specified in
1577 "64-bit PowerPC ELF Application Binary Interface Supplement 1.9"
1578 (PPC64 ELF v1.9).
1579 PowerPC 64 Linux in little endian mode uses the "Power Architecture 64-Bit
1580 ELF V2 ABI Specification -- OpenPOWER ABI for Linux Supplement"
1581 (PPC64 ELF v2).
1582 AIX follows the "PowerOpen ABI: Application Binary Interface Big-Endian
1583 32-Bit Hardware Implementation"
1584
1585 According to all conventions, the parameter area should be part of the
1586 caller's stack frame, allocated in the caller's prologue code (large enough
1587 to hold the parameter lists for all called routines). The NCG already
1588 uses the stack for register spilling, leaving 64 bytes free at the top.
1589 If we need a larger parameter area than that, we just allocate a new stack
1590 frame just before ccalling.
1591 -}
1592
1593
1594 genCCall' dflags gcp target dest_regs args
1595 = ASSERT(not $ any (`elem` [II16]) $ map cmmTypeFormat argReps)
1596 -- we rely on argument promotion in the codeGen
1597 do
1598 (finalStack,passArgumentsCode,usedRegs) <- passArguments
1599 (zip args argReps)
1600 allArgRegs
1601 (allFPArgRegs platform)
1602 initialStackOffset
1603 (toOL []) []
1604
1605 (labelOrExpr, reduceToFF32) <- case target of
1606 ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
1607 uses_pic_base_implicitly
1608 return (Left lbl, False)
1609 ForeignTarget expr _ -> do
1610 uses_pic_base_implicitly
1611 return (Right expr, False)
1612 PrimTarget mop -> outOfLineMachOp mop
1613
1614 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
1615 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
1616
1617 case labelOrExpr of
1618 Left lbl -> do -- the linker does all the work for us
1619 return ( codeBefore
1620 `snocOL` BL lbl usedRegs
1621 `appOL` maybeNOP -- some ABI require a NOP after BL
1622 `appOL` codeAfter)
1623 Right dyn -> do -- implement call through function pointer
1624 (dynReg, dynCode) <- getSomeReg dyn
1625 case gcp of
1626 GCPLinux64ELF 1 -> return ( dynCode
1627 `appOL` codeBefore
1628 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 40))
1629 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0))
1630 `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8))
1631 `snocOL` MTCTR r11
1632 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16))
1633 `snocOL` BCTRL usedRegs
1634 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 40))
1635 `appOL` codeAfter)
1636 GCPLinux64ELF 2 -> return ( dynCode
1637 `appOL` codeBefore
1638 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 24))
1639 `snocOL` MR r12 dynReg
1640 `snocOL` MTCTR r12
1641 `snocOL` BCTRL usedRegs
1642 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 24))
1643 `appOL` codeAfter)
1644 GCPAIX -> return ( dynCode
1645 -- AIX/XCOFF follows the PowerOPEN ABI
1646 -- which is quite similiar to LinuxPPC64/ELFv1
1647 `appOL` codeBefore
1648 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 20))
1649 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 0))
1650 `snocOL` LD II32 toc (AddrRegImm dynReg (ImmInt 4))
1651 `snocOL` MTCTR r11
1652 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 8))
1653 `snocOL` BCTRL usedRegs
1654 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 20))
1655 `appOL` codeAfter)
1656 _ -> return ( dynCode
1657 `snocOL` MTCTR dynReg
1658 `appOL` codeBefore
1659 `snocOL` BCTRL usedRegs
1660 `appOL` codeAfter)
1661 where
1662 platform = targetPlatform dflags
1663
1664 uses_pic_base_implicitly = do
1665 -- See Note [implicit register in PPC PIC code]
1666 -- on why we claim to use PIC register here
1667 when (positionIndependent dflags && target32Bit platform) $ do
1668 _ <- getPicBaseNat $ archWordFormat True
1669 return ()
1670
1671 initialStackOffset = case gcp of
1672 GCPAIX -> 24
1673 GCPDarwin -> 24
1674 GCPLinux -> 8
1675 GCPLinux64ELF 1 -> 48
1676 GCPLinux64ELF 2 -> 32
1677 _ -> panic "genCall': unknown calling convention"
1678 -- size of linkage area + size of arguments, in bytes
1679 stackDelta finalStack = case gcp of
1680 GCPAIX ->
1681 roundTo 16 $ (24 +) $ max 32 $ sum $
1682 map (widthInBytes . typeWidth) argReps
1683 GCPDarwin ->
1684 roundTo 16 $ (24 +) $ max 32 $ sum $
1685 map (widthInBytes . typeWidth) argReps
1686 GCPLinux -> roundTo 16 finalStack
1687 GCPLinux64ELF 1 ->
1688 roundTo 16 $ (48 +) $ max 64 $ sum $
1689 map (roundTo 8 . widthInBytes . typeWidth)
1690 argReps
1691 GCPLinux64ELF 2 ->
1692 roundTo 16 $ (32 +) $ max 64 $ sum $
1693 map (roundTo 8 . widthInBytes . typeWidth)
1694 argReps
1695 _ -> panic "genCall': unknown calling conv."
1696
1697 argReps = map (cmmExprType dflags) args
1698
1699 roundTo a x | x `mod` a == 0 = x
1700 | otherwise = x + a - (x `mod` a)
1701
1702 spFormat = if target32Bit platform then II32 else II64
1703
1704 -- TODO: Do not create a new stack frame if delta is too large.
1705 move_sp_down finalStack
1706 | delta > stackFrameHeaderSize dflags =
1707 toOL [STU spFormat sp (AddrRegImm sp (ImmInt (-delta))),
1708 DELTA (-delta)]
1709 | otherwise = nilOL
1710 where delta = stackDelta finalStack
1711 move_sp_up finalStack
1712 | delta > stackFrameHeaderSize dflags =
1713 toOL [ADD sp sp (RIImm (ImmInt delta)),
1714 DELTA 0]
1715 | otherwise = nilOL
1716 where delta = stackDelta finalStack
1717
1718 -- A NOP instruction is required after a call (bl instruction)
1719 -- on AIX and 64-Bit Linux.
1720 -- If the call is to a function with a different TOC (r2) the
1721 -- link editor replaces the NOP instruction with a load of the TOC
1722 -- from the stack to restore the TOC.
1723 maybeNOP = case gcp of
1724 -- See Section 3.9.4 of OpenPower ABI
1725 GCPAIX -> unitOL NOP
1726 -- See Section 3.5.11 of PPC64 ELF v1.9
1727 GCPLinux64ELF 1 -> unitOL NOP
1728 -- See Section 2.3.6 of PPC64 ELF v2
1729 GCPLinux64ELF 2 -> unitOL NOP
1730 _ -> nilOL
1731
1732 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
1733 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
1734 accumCode accumUsed | isWord64 arg_ty
1735 && target32Bit (targetPlatform dflags) =
1736 do
1737 ChildCode64 code vr_lo <- iselExpr64 arg
1738 let vr_hi = getHiVRegFromLo vr_lo
1739
1740 case gcp of
1741 GCPAIX -> -- same as for Darwin
1742 do let storeWord vr (gpr:_) _ = MR gpr vr
1743 storeWord vr [] offset
1744 = ST II32 vr (AddrRegImm sp (ImmInt offset))
1745 passArguments args
1746 (drop 2 gprs)
1747 fprs
1748 (stackOffset+8)
1749 (accumCode `appOL` code
1750 `snocOL` storeWord vr_hi gprs stackOffset
1751 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1752 ((take 2 gprs) ++ accumUsed)
1753 GCPDarwin ->
1754 do let storeWord vr (gpr:_) _ = MR gpr vr
1755 storeWord vr [] offset
1756 = ST II32 vr (AddrRegImm sp (ImmInt offset))
1757 passArguments args
1758 (drop 2 gprs)
1759 fprs
1760 (stackOffset+8)
1761 (accumCode `appOL` code
1762 `snocOL` storeWord vr_hi gprs stackOffset
1763 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1764 ((take 2 gprs) ++ accumUsed)
1765 GCPLinux ->
1766 do let stackOffset' = roundTo 8 stackOffset
1767 stackCode = accumCode `appOL` code
1768 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
1769 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
1770 regCode hireg loreg =
1771 accumCode `appOL` code
1772 `snocOL` MR hireg vr_hi
1773 `snocOL` MR loreg vr_lo
1774
1775 case gprs of
1776 hireg : loreg : regs | even (length gprs) ->
1777 passArguments args regs fprs stackOffset
1778 (regCode hireg loreg) (hireg : loreg : accumUsed)
1779 _skipped : hireg : loreg : regs ->
1780 passArguments args regs fprs stackOffset
1781 (regCode hireg loreg) (hireg : loreg : accumUsed)
1782 _ -> -- only one or no regs left
1783 passArguments args [] fprs (stackOffset'+8)
1784 stackCode accumUsed
1785 GCPLinux64ELF _ -> panic "passArguments: 32 bit code"
1786
1787 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
1788 | reg : _ <- regs = do
1789 register <- getRegister arg
1790 let code = case register of
1791 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1792 Any _ acode -> acode reg
1793 stackOffsetRes = case gcp of
1794 -- The Darwin ABI requires that we reserve
1795 -- stack slots for register parameters
1796 GCPDarwin -> stackOffset + stackBytes
1797 -- ... so does the PowerOpen ABI.
1798 GCPAIX -> stackOffset + stackBytes
1799 -- ... the SysV ABI 32-bit doesn't.
1800 GCPLinux -> stackOffset
1801 -- ... but SysV ABI 64-bit does.
1802 GCPLinux64ELF _ -> stackOffset + stackBytes
1803 passArguments args
1804 (drop nGprs gprs)
1805 (drop nFprs fprs)
1806 stackOffsetRes
1807 (accumCode `appOL` code)
1808 (reg : accumUsed)
1809 | otherwise = do
1810 (vr, code) <- getSomeReg arg
1811 passArguments args
1812 (drop nGprs gprs)
1813 (drop nFprs fprs)
1814 (stackOffset' + stackBytes)
1815 (accumCode `appOL` code `snocOL` ST (cmmTypeFormat rep) vr stackSlot)
1816 accumUsed
1817 where
1818 stackOffset' = case gcp of
1819 GCPDarwin ->
1820 -- stackOffset is at least 4-byte aligned
1821 -- The Darwin ABI is happy with that.
1822 stackOffset
1823 GCPAIX ->
1824 -- The 32bit PowerOPEN ABI is happy with
1825 -- 32bit-alignment as well...
1826 stackOffset
1827 GCPLinux
1828 -- ... the SysV ABI requires 8-byte
1829 -- alignment for doubles.
1830 | isFloatType rep && typeWidth rep == W64 ->
1831 roundTo 8 stackOffset
1832 | otherwise ->
1833 stackOffset
1834 GCPLinux64ELF _ ->
1835 -- Everything on the stack is mapped to
1836 -- 8-byte aligned doublewords
1837 stackOffset
1838 stackOffset''
1839 | isFloatType rep && typeWidth rep == W32 =
1840 case gcp of
1841 -- The ELF v1 ABI Section 3.2.3 requires:
1842 -- "Single precision floating point values
1843 -- are mapped to the second word in a single
1844 -- doubleword"
1845 GCPLinux64ELF 1 -> stackOffset' + 4
1846 _ -> stackOffset'
1847 | otherwise = stackOffset'
1848
1849 stackSlot = AddrRegImm sp (ImmInt stackOffset'')
1850 (nGprs, nFprs, stackBytes, regs)
1851 = case gcp of
1852 GCPAIX ->
1853 case cmmTypeFormat rep of
1854 II8 -> (1, 0, 4, gprs)
1855 II16 -> (1, 0, 4, gprs)
1856 II32 -> (1, 0, 4, gprs)
1857 -- The PowerOpen ABI requires that we skip a
1858 -- corresponding number of GPRs when we use
1859 -- the FPRs.
1860 --
1861 -- E.g. for a `double` two GPRs are skipped,
1862 -- whereas for a `float` one GPR is skipped
1863 -- when parameters are assigned to
1864 -- registers.
1865 --
1866 -- The PowerOpen ABI specification can be found at
1867 -- ftp://www.sourceware.org/pub/binutils/ppc-docs/ppc-poweropen/
1868 FF32 -> (1, 1, 4, fprs)
1869 FF64 -> (2, 1, 8, fprs)
1870 II64 -> panic "genCCall' passArguments II64"
1871 FF80 -> panic "genCCall' passArguments FF80"
1872 GCPDarwin ->
1873 case cmmTypeFormat rep of
1874 II8 -> (1, 0, 4, gprs)
1875 II16 -> (1, 0, 4, gprs)
1876 II32 -> (1, 0, 4, gprs)
1877 -- The Darwin ABI requires that we skip a
1878 -- corresponding number of GPRs when we use
1879 -- the FPRs.
1880 FF32 -> (1, 1, 4, fprs)
1881 FF64 -> (2, 1, 8, fprs)
1882 II64 -> panic "genCCall' passArguments II64"
1883 FF80 -> panic "genCCall' passArguments FF80"
1884 GCPLinux ->
1885 case cmmTypeFormat rep of
1886 II8 -> (1, 0, 4, gprs)
1887 II16 -> (1, 0, 4, gprs)
1888 II32 -> (1, 0, 4, gprs)
1889 -- ... the SysV ABI doesn't.
1890 FF32 -> (0, 1, 4, fprs)
1891 FF64 -> (0, 1, 8, fprs)
1892 II64 -> panic "genCCall' passArguments II64"
1893 FF80 -> panic "genCCall' passArguments FF80"
1894 GCPLinux64ELF _ ->
1895 case cmmTypeFormat rep of
1896 II8 -> (1, 0, 8, gprs)
1897 II16 -> (1, 0, 8, gprs)
1898 II32 -> (1, 0, 8, gprs)
1899 II64 -> (1, 0, 8, gprs)
1900 -- The ELFv1 ABI requires that we skip a
1901 -- corresponding number of GPRs when we use
1902 -- the FPRs.
1903 FF32 -> (1, 1, 8, fprs)
1904 FF64 -> (1, 1, 8, fprs)
1905 FF80 -> panic "genCCall' passArguments FF80"
1906
1907 moveResult reduceToFF32 =
1908 case dest_regs of
1909 [] -> nilOL
1910 [dest]
1911 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
1912 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1913 | isWord64 rep && target32Bit (targetPlatform dflags)
1914 -> toOL [MR (getHiVRegFromLo r_dest) r3,
1915 MR r_dest r4]
1916 | otherwise -> unitOL (MR r_dest r3)
1917 where rep = cmmRegType dflags (CmmLocal dest)
1918 r_dest = getRegisterReg platform (CmmLocal dest)
1919 _ -> panic "genCCall' moveResult: Bad dest_regs"
1920
1921 outOfLineMachOp mop =
1922 do
1923 dflags <- getDynFlags
1924 mopExpr <- cmmMakeDynamicReference dflags CallReference $
1925 mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
1926 let mopLabelOrExpr = case mopExpr of
1927 CmmLit (CmmLabel lbl) -> Left lbl
1928 _ -> Right mopExpr
1929 return (mopLabelOrExpr, reduce)
1930 where
1931 (functionName, reduce) = case mop of
1932 MO_F32_Exp -> (fsLit "exp", True)
1933 MO_F32_Log -> (fsLit "log", True)
1934 MO_F32_Sqrt -> (fsLit "sqrt", True)
1935 MO_F32_Fabs -> unsupported
1936
1937 MO_F32_Sin -> (fsLit "sin", True)
1938 MO_F32_Cos -> (fsLit "cos", True)
1939 MO_F32_Tan -> (fsLit "tan", True)
1940
1941 MO_F32_Asin -> (fsLit "asin", True)
1942 MO_F32_Acos -> (fsLit "acos", True)
1943 MO_F32_Atan -> (fsLit "atan", True)
1944
1945 MO_F32_Sinh -> (fsLit "sinh", True)
1946 MO_F32_Cosh -> (fsLit "cosh", True)
1947 MO_F32_Tanh -> (fsLit "tanh", True)
1948 MO_F32_Pwr -> (fsLit "pow", True)
1949
1950 MO_F32_Asinh -> (fsLit "asinh", True)
1951 MO_F32_Acosh -> (fsLit "acosh", True)
1952 MO_F32_Atanh -> (fsLit "atanh", True)
1953
1954 MO_F64_Exp -> (fsLit "exp", False)
1955 MO_F64_Log -> (fsLit "log", False)
1956 MO_F64_Sqrt -> (fsLit "sqrt", False)
1957 MO_F64_Fabs -> unsupported
1958
1959 MO_F64_Sin -> (fsLit "sin", False)
1960 MO_F64_Cos -> (fsLit "cos", False)
1961 MO_F64_Tan -> (fsLit "tan", False)
1962
1963 MO_F64_Asin -> (fsLit "asin", False)
1964 MO_F64_Acos -> (fsLit "acos", False)
1965 MO_F64_Atan -> (fsLit "atan", False)
1966
1967 MO_F64_Sinh -> (fsLit "sinh", False)
1968 MO_F64_Cosh -> (fsLit "cosh", False)
1969 MO_F64_Tanh -> (fsLit "tanh", False)
1970 MO_F64_Pwr -> (fsLit "pow", False)
1971
1972 MO_F64_Asinh -> (fsLit "asinh", False)
1973 MO_F64_Acosh -> (fsLit "acosh", False)
1974 MO_F64_Atanh -> (fsLit "atanh", False)
1975
1976 MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False)
1977
1978 MO_Memcpy _ -> (fsLit "memcpy", False)
1979 MO_Memset _ -> (fsLit "memset", False)
1980 MO_Memmove _ -> (fsLit "memmove", False)
1981 MO_Memcmp _ -> (fsLit "memcmp", False)
1982
1983 MO_BSwap w -> (fsLit $ bSwapLabel w, False)
1984 MO_PopCnt w -> (fsLit $ popCntLabel w, False)
1985 MO_Pdep w -> (fsLit $ pdepLabel w, False)
1986 MO_Pext w -> (fsLit $ pextLabel w, False)
1987 MO_Clz _ -> unsupported
1988 MO_Ctz _ -> unsupported
1989 MO_AtomicRMW {} -> unsupported
1990 MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
1991 MO_AtomicRead _ -> unsupported
1992 MO_AtomicWrite _ -> unsupported
1993
1994 MO_S_QuotRem {} -> unsupported
1995 MO_U_QuotRem {} -> unsupported
1996 MO_U_QuotRem2 {} -> unsupported
1997 MO_Add2 {} -> unsupported
1998 MO_AddWordC {} -> unsupported
1999 MO_SubWordC {} -> unsupported
2000 MO_AddIntC {} -> unsupported
2001 MO_SubIntC {} -> unsupported
2002 MO_U_Mul2 {} -> unsupported
2003 MO_WriteBarrier -> unsupported
2004 MO_Touch -> unsupported
2005 MO_Prefetch_Data _ -> unsupported
2006 unsupported = panic ("outOfLineCmmOp: " ++ show mop
2007 ++ " not supported")
2008
2009 -- -----------------------------------------------------------------------------
2010 -- Generating a table-branch
2011
2012 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
2013 genSwitch dflags expr targets
2014 | OSAIX <- platformOS (targetPlatform dflags)
2015 = do
2016 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2017 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
2018 sha = if target32Bit $ targetPlatform dflags then 2 else 3
2019 tmp <- getNewRegNat fmt
2020 lbl <- getNewLabelNat
2021 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2022 (tableReg,t_code) <- getSomeReg $ dynRef
2023 let code = e_code `appOL` t_code `appOL` toOL [
2024 SL fmt tmp reg (RIImm (ImmInt sha)),
2025 LD fmt tmp (AddrRegReg tableReg tmp),
2026 MTCTR tmp,
2027 BCTR ids (Just lbl)
2028 ]
2029 return code
2030
2031 | (positionIndependent dflags) || (not $ target32Bit $ targetPlatform dflags)
2032 = do
2033 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2034 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
2035 sha = if target32Bit $ targetPlatform dflags then 2 else 3
2036 tmp <- getNewRegNat fmt
2037 lbl <- getNewLabelNat
2038 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2039 (tableReg,t_code) <- getSomeReg $ dynRef
2040 let code = e_code `appOL` t_code `appOL` toOL [
2041 SL fmt tmp reg (RIImm (ImmInt sha)),
2042 LD fmt tmp (AddrRegReg tableReg tmp),
2043 ADD tmp tmp (RIReg tableReg),
2044 MTCTR tmp,
2045 BCTR ids (Just lbl)
2046 ]
2047 return code
2048 | otherwise
2049 = do
2050 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2051 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
2052 sha = if target32Bit $ targetPlatform dflags then 2 else 3
2053 tmp <- getNewRegNat fmt
2054 lbl <- getNewLabelNat
2055 let code = e_code `appOL` toOL [
2056 SL fmt tmp reg (RIImm (ImmInt sha)),
2057 ADDIS tmp tmp (HA (ImmCLbl lbl)),
2058 LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
2059 MTCTR tmp,
2060 BCTR ids (Just lbl)
2061 ]
2062 return code
2063 where (offset, ids) = switchTargetsToTable targets
2064
2065 generateJumpTableForInstr :: DynFlags -> Instr
2066 -> Maybe (NatCmmDecl CmmStatics Instr)
2067 generateJumpTableForInstr dflags (BCTR ids (Just lbl)) =
2068 let jumpTable
2069 | (positionIndependent dflags)
2070 || (not $ target32Bit $ targetPlatform dflags)
2071 = map jumpTableEntryRel ids
2072 | otherwise = map (jumpTableEntry dflags) ids
2073 where jumpTableEntryRel Nothing
2074 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
2075 jumpTableEntryRel (Just blockid)
2076 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0
2077 (wordWidth dflags))
2078 where blockLabel = blockLbl blockid
2079 in Just (CmmData (Section ReadOnlyData lbl) (Statics lbl jumpTable))
2080 generateJumpTableForInstr _ _ = Nothing
2081
2082 -- -----------------------------------------------------------------------------
2083 -- 'condIntReg' and 'condFltReg': condition codes into registers
2084
2085 -- Turn those condition codes into integers now (when they appear on
2086 -- the right hand side of an assignment).
2087
2088
2089
2090 condReg :: NatM CondCode -> NatM Register
2091 condReg getCond = do
2092 CondCode _ cond cond_code <- getCond
2093 dflags <- getDynFlags
2094 let
2095 code dst = cond_code
2096 `appOL` negate_code
2097 `appOL` toOL [
2098 MFCR dst,
2099 RLWINM dst dst (bit + 1) 31 31
2100 ]
2101
2102 negate_code | do_negate = unitOL (CRNOR bit bit bit)
2103 | otherwise = nilOL
2104
2105 (bit, do_negate) = case cond of
2106 LTT -> (0, False)
2107 LE -> (1, True)
2108 EQQ -> (2, False)
2109 GE -> (0, True)
2110 GTT -> (1, False)
2111
2112 NE -> (2, True)
2113
2114 LU -> (0, False)
2115 LEU -> (1, True)
2116 GEU -> (0, True)
2117 GU -> (1, False)
2118 _ -> panic "PPC.CodeGen.codeReg: no match"
2119
2120 format = archWordFormat $ target32Bit $ targetPlatform dflags
2121 return (Any format code)
2122
2123 condIntReg :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM Register
2124 condIntReg cond width x y = condReg (condIntCode cond width x y)
2125 condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2126 condFltReg cond x y = condReg (condFltCode cond x y)
2127
2128
2129
2130 -- -----------------------------------------------------------------------------
2131 -- 'trivial*Code': deal with trivial instructions
2132
2133 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2134 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2135 -- Only look for constants on the right hand side, because that's
2136 -- where the generic optimizer will have put them.
2137
2138 -- Similarly, for unary instructions, we don't have to worry about
2139 -- matching an StInt as the argument, because genericOpt will already
2140 -- have handled the constant-folding.
2141
2142
2143
2144 {-
2145 Wolfgang's PowerPC version of The Rules:
2146
2147 A slightly modified version of The Rules to take advantage of the fact
2148 that PowerPC instructions work on all registers and don't implicitly
2149 clobber any fixed registers.
2150
2151 * The only expression for which getRegister returns Fixed is (CmmReg reg).
2152
2153 * If getRegister returns Any, then the code it generates may modify only:
2154 (a) fresh temporaries
2155 (b) the destination register
2156 It may *not* modify global registers, unless the global
2157 register happens to be the destination register.
2158 It may not clobber any other registers. In fact, only ccalls clobber any
2159 fixed registers.
2160 Also, it may not modify the counter register (used by genCCall).
2161
2162 Corollary: If a getRegister for a subexpression returns Fixed, you need
2163 not move it to a fresh temporary before evaluating the next subexpression.
2164 The Fixed register won't be modified.
2165 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
2166
2167 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
2168 the value of the destination register.
2169 -}
2170
2171 trivialCode
2172 :: Width
2173 -> Bool
2174 -> (Reg -> Reg -> RI -> Instr)
2175 -> CmmExpr
2176 -> CmmExpr
2177 -> NatM Register
2178
2179 trivialCode rep signed instr x (CmmLit (CmmInt y _))
2180 | Just imm <- makeImmediate rep signed y
2181 = do
2182 (src1, code1) <- getSomeReg x
2183 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
2184 return (Any (intFormat rep) code)
2185
2186 trivialCode rep _ instr x y = do
2187 (src1, code1) <- getSomeReg x
2188 (src2, code2) <- getSomeReg y
2189 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
2190 return (Any (intFormat rep) code)
2191
2192 shiftMulCode
2193 :: Width
2194 -> Bool
2195 -> (Format-> Reg -> Reg -> RI -> Instr)
2196 -> CmmExpr
2197 -> CmmExpr
2198 -> NatM Register
2199 shiftMulCode width sign instr x (CmmLit (CmmInt y _))
2200 | Just imm <- makeImmediate width sign y
2201 = do
2202 (src1, code1) <- getSomeReg x
2203 let format = intFormat width
2204 let ins_fmt = intFormat (max W32 width)
2205 let code dst = code1 `snocOL` instr ins_fmt dst src1 (RIImm imm)
2206 return (Any format code)
2207
2208 shiftMulCode width _ instr x y = do
2209 (src1, code1) <- getSomeReg x
2210 (src2, code2) <- getSomeReg y
2211 let format = intFormat width
2212 let ins_fmt = intFormat (max W32 width)
2213 let code dst = code1 `appOL` code2
2214 `snocOL` instr ins_fmt dst src1 (RIReg src2)
2215 return (Any format code)
2216
2217 trivialCodeNoImm' :: Format -> (Reg -> Reg -> Reg -> Instr)
2218 -> CmmExpr -> CmmExpr -> NatM Register
2219 trivialCodeNoImm' format instr x y = do
2220 (src1, code1) <- getSomeReg x
2221 (src2, code2) <- getSomeReg y
2222 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
2223 return (Any format code)
2224
2225 trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr)
2226 -> CmmExpr -> CmmExpr -> NatM Register
2227 trivialCodeNoImm format instr x y
2228 = trivialCodeNoImm' format (instr format) x y
2229
2230 srCode :: Width -> Bool -> (Format-> Reg -> Reg -> RI -> Instr)
2231 -> CmmExpr -> CmmExpr -> NatM Register
2232 srCode width sgn instr x (CmmLit (CmmInt y _))
2233 | Just imm <- makeImmediate width sgn y
2234 = do
2235 let op_len = max W32 width
2236 extend = if sgn then extendSExpr else extendUExpr
2237 (src1, code1) <- getSomeReg (extend width op_len x)
2238 let code dst = code1 `snocOL`
2239 instr (intFormat op_len) dst src1 (RIImm imm)
2240 return (Any (intFormat width) code)
2241
2242 srCode width sgn instr x y = do
2243 let op_len = max W32 width
2244 extend = if sgn then extendSExpr else extendUExpr
2245 (src1, code1) <- getSomeReg (extend width op_len x)
2246 (src2, code2) <- getSomeReg (extendUExpr width op_len y)
2247 -- Note: Shift amount `y` is unsigned
2248 let code dst = code1 `appOL` code2 `snocOL`
2249 instr (intFormat op_len) dst src1 (RIReg src2)
2250 return (Any (intFormat width) code)
2251
2252 divCode :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register
2253 divCode width sgn x y = do
2254 let op_len = max W32 width
2255 extend = if sgn then extendSExpr else extendUExpr
2256 (src1, code1) <- getSomeReg (extend width op_len x)
2257 (src2, code2) <- getSomeReg (extend width op_len y)
2258 let code dst = code1 `appOL` code2 `snocOL`
2259 DIV (intFormat op_len) sgn dst src1 src2
2260 return (Any (intFormat width) code)
2261
2262
2263 trivialUCode :: Format
2264 -> (Reg -> Reg -> Instr)
2265 -> CmmExpr
2266 -> NatM Register
2267 trivialUCode rep instr x = do
2268 (src, code) <- getSomeReg x
2269 let code' dst = code `snocOL` instr dst src
2270 return (Any rep code')
2271
2272 -- There is no "remainder" instruction on the PPC, so we have to do
2273 -- it the hard way.
2274 -- The "sgn" parameter is the signedness for the division instruction
2275
2276 remainderCode :: Width -> Bool -> Reg -> CmmExpr -> CmmExpr
2277 -> NatM (Reg -> InstrBlock)
2278 remainderCode rep sgn reg_q arg_x arg_y = do
2279 let op_len = max W32 rep
2280 fmt = intFormat op_len
2281 extend = if sgn then extendSExpr else extendUExpr
2282 (x_reg, x_code) <- getSomeReg (extend rep op_len arg_x)
2283 (y_reg, y_code) <- getSomeReg (extend rep op_len arg_y)
2284 return $ \reg_r -> y_code `appOL` x_code
2285 `appOL` toOL [ DIV fmt sgn reg_q x_reg y_reg
2286 , MULL fmt reg_r reg_q (RIReg y_reg)
2287 , SUBF reg_r reg_r x_reg
2288 ]
2289
2290
2291 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2292 coerceInt2FP fromRep toRep x = do
2293 dflags <- getDynFlags
2294 let arch = platformArch $ targetPlatform dflags
2295 coerceInt2FP' arch fromRep toRep x
2296
2297 coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
2298 coerceInt2FP' ArchPPC fromRep toRep x = do
2299 (src, code) <- getSomeReg x
2300 lbl <- getNewLabelNat
2301 itmp <- getNewRegNat II32
2302 ftmp <- getNewRegNat FF64
2303 dflags <- getDynFlags
2304 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2305 Amode addr addr_code <- getAmode D dynRef
2306 let
2307 code' dst = code `appOL` maybe_exts `appOL` toOL [
2308 LDATA (Section ReadOnlyData lbl) $ Statics lbl
2309 [CmmStaticLit (CmmInt 0x43300000 W32),
2310 CmmStaticLit (CmmInt 0x80000000 W32)],
2311 XORIS itmp src (ImmInt 0x8000),
2312 ST II32 itmp (spRel dflags 3),
2313 LIS itmp (ImmInt 0x4330),
2314 ST II32 itmp (spRel dflags 2),
2315 LD FF64 ftmp (spRel dflags 2)
2316 ] `appOL` addr_code `appOL` toOL [
2317 LD FF64 dst addr,
2318 FSUB FF64 dst ftmp dst
2319 ] `appOL` maybe_frsp dst
2320
2321 maybe_exts = case fromRep of
2322 W8 -> unitOL $ EXTS II8 src src
2323 W16 -> unitOL $ EXTS II16 src src
2324 W32 -> nilOL
2325 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2326
2327 maybe_frsp dst
2328 = case toRep of
2329 W32 -> unitOL $ FRSP dst dst
2330 W64 -> nilOL
2331 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2332
2333 return (Any (floatFormat toRep) code')
2334
2335 -- On an ELF v1 Linux we use the compiler doubleword in the stack frame
2336 -- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only
2337 -- set right before a call and restored right after return from the call.
2338 -- So it is fine.
2339 coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do
2340 (src, code) <- getSomeReg x
2341 dflags <- getDynFlags
2342 let
2343 code' dst = code `appOL` maybe_exts `appOL` toOL [
2344 ST II64 src (spRel dflags 3),
2345 LD FF64 dst (spRel dflags 3),
2346 FCFID dst dst
2347 ] `appOL` maybe_frsp dst
2348
2349 maybe_exts = case fromRep of
2350 W8 -> unitOL $ EXTS II8 src src
2351 W16 -> unitOL $ EXTS II16 src src
2352 W32 -> unitOL $ EXTS II32 src src
2353 W64 -> nilOL
2354 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2355
2356 maybe_frsp dst
2357 = case toRep of
2358 W32 -> unitOL $ FRSP dst dst
2359 W64 -> nilOL
2360 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2361
2362 return (Any (floatFormat toRep) code')
2363
2364 coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch"
2365
2366
2367 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2368 coerceFP2Int fromRep toRep x = do
2369 dflags <- getDynFlags
2370 let arch = platformArch $ targetPlatform dflags
2371 coerceFP2Int' arch fromRep toRep x
2372
2373 coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
2374 coerceFP2Int' ArchPPC _ toRep x = do
2375 dflags <- getDynFlags
2376 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
2377 (src, code) <- getSomeReg x
2378 tmp <- getNewRegNat FF64
2379 let
2380 code' dst = code `appOL` toOL [
2381 -- convert to int in FP reg
2382 FCTIWZ tmp src,
2383 -- store value (64bit) from FP to stack
2384 ST FF64 tmp (spRel dflags 2),
2385 -- read low word of value (high word is undefined)
2386 LD II32 dst (spRel dflags 3)]
2387 return (Any (intFormat toRep) code')
2388
2389 coerceFP2Int' (ArchPPC_64 _) _ toRep x = do
2390 dflags <- getDynFlags
2391 -- the reps don't really matter: F*->FF64 and II64->I* are no-ops
2392 (src, code) <- getSomeReg x
2393 tmp <- getNewRegNat FF64
2394 let
2395 code' dst = code `appOL` toOL [
2396 -- convert to int in FP reg
2397 FCTIDZ tmp src,
2398 -- store value (64bit) from FP to compiler word on stack
2399 ST FF64 tmp (spRel dflags 3),
2400 LD II64 dst (spRel dflags 3)]
2401 return (Any (intFormat toRep) code')
2402
2403 coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch"
2404
2405 -- Note [.LCTOC1 in PPC PIC code]
2406 -- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
2407 -- to make the most of the PPC's 16-bit displacements.
2408 -- As 16-bit signed offset is used (usually via addi/lwz instructions)
2409 -- first element will have '-32768' offset against .LCTOC1.
2410
2411 -- Note [implicit register in PPC PIC code]
2412 -- PPC generates calls by labels in assembly
2413 -- in form of:
2414 -- bl puts+32768@plt
2415 -- in this form it's not seen directly (by GHC NCG)
2416 -- that r30 (PicBaseReg) is used,
2417 -- but r30 is a required part of PLT code setup:
2418 -- puts+32768@plt:
2419 -- lwz r11,-30484(r30) ; offset in .LCTOC1
2420 -- mtctr r11
2421 -- bctr