7c345f2328b3dc4d9cc13d5177ce9cfc63fc4330
[ghc.git] / compiler / nativeGen / PPC / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2004
8 --
9 -----------------------------------------------------------------------------
10
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, and (b) the type signatures,
13 -- the structure should not be too overwhelming.
14
15 module PPC.CodeGen (
16 cmmTopCodeGen,
17 generateJumpTableForInstr,
18 InstrBlock
19 )
20
21 where
22
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
25 #include "../includes/MachDeps.h"
26
27 -- NCG stuff:
28 import GhcPrelude
29
30 import CodeGen.Platform
31 import PPC.Instr
32 import PPC.Cond
33 import PPC.Regs
34 import CPrim
35 import NCGMonad
36 import Instruction
37 import PIC
38 import Format
39 import RegClass
40 import Reg
41 import TargetReg
42 import Platform
43
44 -- Our intermediate code:
45 import BlockId
46 import PprCmm ( pprExpr )
47 import Cmm
48 import CmmUtils
49 import CmmSwitch
50 import CLabel
51 import Hoopl.Block
52 import Hoopl.Graph
53
54 -- The rest:
55 import OrdList
56 import Outputable
57 import DynFlags
58
59 import Control.Monad ( mapAndUnzipM, when )
60 import Data.Bits
61 import Data.Word
62
63 import BasicTypes
64 import FastString
65 import Util
66
67 -- -----------------------------------------------------------------------------
68 -- Top-level of the instruction selector
69
70 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
71 -- They are really trees of insns to facilitate fast appending, where a
72 -- left-to-right traversal (pre-order?) yields the insns in the correct
73 -- order.
74
75 cmmTopCodeGen
76 :: RawCmmDecl
77 -> NatM [NatCmmDecl CmmStatics Instr]
78
79 cmmTopCodeGen (CmmProc info lab live graph) = do
80 let blocks = toBlockListEntryFirst graph
81 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
82 dflags <- getDynFlags
83 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
84 tops = proc : concat statics
85 os = platformOS $ targetPlatform dflags
86 arch = platformArch $ targetPlatform dflags
87 case arch of
88 ArchPPC | os == OSAIX -> return tops
89 | otherwise -> do
90 picBaseMb <- getPicBaseMaybeNat
91 case picBaseMb of
92 Just picBase -> initializePicBase_ppc arch os picBase tops
93 Nothing -> return tops
94 ArchPPC_64 ELF_V1 -> fixup_entry tops
95 -- generating function descriptor is handled in
96 -- pretty printer
97 ArchPPC_64 ELF_V2 -> fixup_entry tops
98 -- generating function prologue is handled in
99 -- pretty printer
100 _ -> panic "PPC.cmmTopCodeGen: unknown arch"
101 where
102 fixup_entry (CmmProc info lab live (ListGraph (entry:blocks)) : statics)
103 = do
104 let BasicBlock bID insns = entry
105 bID' <- if lab == (blockLbl bID)
106 then newBlockId
107 else return bID
108 let b' = BasicBlock bID' insns
109 return (CmmProc info lab live (ListGraph (b':blocks)) : statics)
110 fixup_entry _ = panic "cmmTopCodegen: Broken CmmProc"
111
112 cmmTopCodeGen (CmmData sec dat) = do
113 return [CmmData sec dat] -- no translation, we just use CmmStatic
114
115 basicBlockCodeGen
116 :: Block CmmNode C C
117 -> NatM ( [NatBasicBlock Instr]
118 , [NatCmmDecl CmmStatics Instr])
119
120 basicBlockCodeGen block = do
121 let (_, nodes, tail) = blockSplit block
122 id = entryLabel block
123 stmts = blockToList nodes
124 mid_instrs <- stmtsToInstrs stmts
125 tail_instrs <- stmtToInstrs tail
126 let instrs = mid_instrs `appOL` tail_instrs
127 -- code generation may introduce new basic block boundaries, which
128 -- are indicated by the NEWBLOCK instruction. We must split up the
129 -- instruction stream into basic blocks again. Also, we extract
130 -- LDATAs here too.
131 let
132 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
133
134 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
135 = ([], BasicBlock id instrs : blocks, statics)
136 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
137 = (instrs, blocks, CmmData sec dat:statics)
138 mkBlocks instr (instrs,blocks,statics)
139 = (instr:instrs, blocks, statics)
140 return (BasicBlock id top : other_blocks, statics)
141
142 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
143 stmtsToInstrs stmts
144 = do instrss <- mapM stmtToInstrs stmts
145 return (concatOL instrss)
146
147 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
148 stmtToInstrs stmt = do
149 dflags <- getDynFlags
150 case stmt of
151 CmmComment s -> return (unitOL (COMMENT s))
152 CmmTick {} -> return nilOL
153 CmmUnwind {} -> return nilOL
154
155 CmmAssign reg src
156 | isFloatType ty -> assignReg_FltCode format reg src
157 | target32Bit (targetPlatform dflags) &&
158 isWord64 ty -> assignReg_I64Code reg src
159 | otherwise -> assignReg_IntCode format reg src
160 where ty = cmmRegType dflags reg
161 format = cmmTypeFormat ty
162
163 CmmStore addr src
164 | isFloatType ty -> assignMem_FltCode format addr src
165 | target32Bit (targetPlatform dflags) &&
166 isWord64 ty -> assignMem_I64Code addr src
167 | otherwise -> assignMem_IntCode format addr src
168 where ty = cmmExprType dflags src
169 format = cmmTypeFormat ty
170
171 CmmUnsafeForeignCall target result_regs args
172 -> genCCall target result_regs args
173
174 CmmBranch id -> genBranch id
175 CmmCondBranch arg true false prediction -> do
176 b1 <- genCondJump true arg prediction
177 b2 <- genBranch false
178 return (b1 `appOL` b2)
179 CmmSwitch arg ids -> do dflags <- getDynFlags
180 genSwitch dflags arg ids
181 CmmCall { cml_target = arg } -> genJump arg
182 _ ->
183 panic "stmtToInstrs: statement should have been cps'd away"
184
185
186 --------------------------------------------------------------------------------
187 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
188 -- They are really trees of insns to facilitate fast appending, where a
189 -- left-to-right traversal yields the insns in the correct order.
190 --
191 type InstrBlock
192 = OrdList Instr
193
194
195 -- | Register's passed up the tree. If the stix code forces the register
196 -- to live in a pre-decided machine register, it comes out as @Fixed@;
197 -- otherwise, it comes out as @Any@, and the parent can decide which
198 -- register to put it in.
199 --
200 data Register
201 = Fixed Format Reg InstrBlock
202 | Any Format (Reg -> InstrBlock)
203
204
205 swizzleRegisterRep :: Register -> Format -> Register
206 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
207 swizzleRegisterRep (Any _ codefn) format = Any format codefn
208
209
210 -- | Grab the Reg for a CmmReg
211 getRegisterReg :: Platform -> CmmReg -> Reg
212
213 getRegisterReg _ (CmmLocal (LocalReg u pk))
214 = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
215
216 getRegisterReg platform (CmmGlobal mid)
217 = case globalRegMaybe platform mid of
218 Just reg -> RegReal reg
219 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
220 -- By this stage, the only MagicIds remaining should be the
221 -- ones which map to a real machine register on this
222 -- platform. Hence ...
223
224 -- | Convert a BlockId to some CmmStatic data
225 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
226 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
227 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
228 where blockLabel = blockLbl blockid
229
230
231
232 -- -----------------------------------------------------------------------------
233 -- General things for putting together code sequences
234
235 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
236 -- CmmExprs into CmmRegOff?
237 mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr
238 mangleIndexTree dflags (CmmRegOff reg off)
239 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
240 where width = typeWidth (cmmRegType dflags reg)
241
242 mangleIndexTree _ _
243 = panic "PPC.CodeGen.mangleIndexTree: no match"
244
245 -- -----------------------------------------------------------------------------
246 -- Code gen for 64-bit arithmetic on 32-bit platforms
247
248 {-
249 Simple support for generating 64-bit code (ie, 64 bit values and 64
250 bit assignments) on 32-bit platforms. Unlike the main code generator
251 we merely shoot for generating working code as simply as possible, and
252 pay little attention to code quality. Specifically, there is no
253 attempt to deal cleverly with the fixed-vs-floating register
254 distinction; all values are generated into (pairs of) floating
255 registers, even if this would mean some redundant reg-reg moves as a
256 result. Only one of the VRegUniques is returned, since it will be
257 of the VRegUniqueLo form, and the upper-half VReg can be determined
258 by applying getHiVRegFromLo to it.
259 -}
260
261 data ChildCode64 -- a.k.a "Register64"
262 = ChildCode64
263 InstrBlock -- code
264 Reg -- the lower 32-bit temporary which contains the
265 -- result; use getHiVRegFromLo to find the other
266 -- VRegUnique. Rules of this simplified insn
267 -- selection game are therefore that the returned
268 -- Reg may be modified
269
270
271 -- | Compute an expression into a register, but
272 -- we don't mind which one it is.
273 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
274 getSomeReg expr = do
275 r <- getRegister expr
276 case r of
277 Any rep code -> do
278 tmp <- getNewRegNat rep
279 return (tmp, code tmp)
280 Fixed _ reg code ->
281 return (reg, code)
282
283 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
284 getI64Amodes addrTree = do
285 Amode hi_addr addr_code <- getAmode D addrTree
286 case addrOffset hi_addr 4 of
287 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
288 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
289 return (AddrRegImm hi_ptr (ImmInt 0),
290 AddrRegImm hi_ptr (ImmInt 4),
291 code)
292
293
294 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
295 assignMem_I64Code addrTree valueTree = do
296 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
297 ChildCode64 vcode rlo <- iselExpr64 valueTree
298 let
299 rhi = getHiVRegFromLo rlo
300
301 -- Big-endian store
302 mov_hi = ST II32 rhi hi_addr
303 mov_lo = ST II32 rlo lo_addr
304 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
305
306
307 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
308 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
309 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
310 let
311 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
312 r_dst_hi = getHiVRegFromLo r_dst_lo
313 r_src_hi = getHiVRegFromLo r_src_lo
314 mov_lo = MR r_dst_lo r_src_lo
315 mov_hi = MR r_dst_hi r_src_hi
316 return (
317 vcode `snocOL` mov_lo `snocOL` mov_hi
318 )
319
320 assignReg_I64Code _ _
321 = panic "assignReg_I64Code(powerpc): invalid lvalue"
322
323
324 iselExpr64 :: CmmExpr -> NatM ChildCode64
325 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
326 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
327 (rlo, rhi) <- getNewRegPairNat II32
328 let mov_hi = LD II32 rhi hi_addr
329 mov_lo = LD II32 rlo lo_addr
330 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
331 rlo
332
333 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
334 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
335
336 iselExpr64 (CmmLit (CmmInt i _)) = do
337 (rlo,rhi) <- getNewRegPairNat II32
338 let
339 half0 = fromIntegral (fromIntegral i :: Word16)
340 half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
341 half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16)
342 half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16)
343
344 code = toOL [
345 LIS rlo (ImmInt half1),
346 OR rlo rlo (RIImm $ ImmInt half0),
347 LIS rhi (ImmInt half3),
348 OR rhi rhi (RIImm $ ImmInt half2)
349 ]
350 return (ChildCode64 code rlo)
351
352 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
353 ChildCode64 code1 r1lo <- iselExpr64 e1
354 ChildCode64 code2 r2lo <- iselExpr64 e2
355 (rlo,rhi) <- getNewRegPairNat II32
356 let
357 r1hi = getHiVRegFromLo r1lo
358 r2hi = getHiVRegFromLo r2lo
359 code = code1 `appOL`
360 code2 `appOL`
361 toOL [ ADDC rlo r1lo r2lo,
362 ADDE rhi r1hi r2hi ]
363 return (ChildCode64 code rlo)
364
365 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
366 ChildCode64 code1 r1lo <- iselExpr64 e1
367 ChildCode64 code2 r2lo <- iselExpr64 e2
368 (rlo,rhi) <- getNewRegPairNat II32
369 let
370 r1hi = getHiVRegFromLo r1lo
371 r2hi = getHiVRegFromLo r2lo
372 code = code1 `appOL`
373 code2 `appOL`
374 toOL [ SUBFC rlo r2lo (RIReg r1lo),
375 SUBFE rhi r2hi r1hi ]
376 return (ChildCode64 code rlo)
377
378 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
379 (expr_reg,expr_code) <- getSomeReg expr
380 (rlo, rhi) <- getNewRegPairNat II32
381 let mov_hi = LI rhi (ImmInt 0)
382 mov_lo = MR rlo expr_reg
383 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
384 rlo
385
386 iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
387 (expr_reg,expr_code) <- getSomeReg expr
388 (rlo, rhi) <- getNewRegPairNat II32
389 let mov_hi = SRA II32 rhi expr_reg (RIImm (ImmInt 31))
390 mov_lo = MR rlo expr_reg
391 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
392 rlo
393 iselExpr64 expr
394 = pprPanic "iselExpr64(powerpc)" (pprExpr expr)
395
396
397
398 getRegister :: CmmExpr -> NatM Register
399 getRegister e = do dflags <- getDynFlags
400 getRegister' dflags e
401
402 getRegister' :: DynFlags -> CmmExpr -> NatM Register
403
404 getRegister' dflags (CmmReg (CmmGlobal PicBaseReg))
405 | OSAIX <- platformOS (targetPlatform dflags) = do
406 let code dst = toOL [ LD II32 dst tocAddr ]
407 tocAddr = AddrRegImm toc (ImmLit (text "ghc_toc_table[TC]"))
408 return (Any II32 code)
409 | target32Bit (targetPlatform dflags) = do
410 reg <- getPicBaseNat $ archWordFormat (target32Bit (targetPlatform dflags))
411 return (Fixed (archWordFormat (target32Bit (targetPlatform dflags)))
412 reg nilOL)
413 | otherwise = return (Fixed II64 toc nilOL)
414
415 getRegister' dflags (CmmReg reg)
416 = return (Fixed (cmmTypeFormat (cmmRegType dflags reg))
417 (getRegisterReg (targetPlatform dflags) reg) nilOL)
418
419 getRegister' dflags tree@(CmmRegOff _ _)
420 = getRegister' dflags (mangleIndexTree dflags tree)
421
422 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
423 -- TO_W_(x), TO_W_(x >> 32)
424
425 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32)
426 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
427 | target32Bit (targetPlatform dflags) = do
428 ChildCode64 code rlo <- iselExpr64 x
429 return $ Fixed II32 (getHiVRegFromLo rlo) code
430
431 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32)
432 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
433 | target32Bit (targetPlatform dflags) = do
434 ChildCode64 code rlo <- iselExpr64 x
435 return $ Fixed II32 (getHiVRegFromLo rlo) code
436
437 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x])
438 | target32Bit (targetPlatform dflags) = do
439 ChildCode64 code rlo <- iselExpr64 x
440 return $ Fixed II32 rlo code
441
442 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x])
443 | target32Bit (targetPlatform dflags) = do
444 ChildCode64 code rlo <- iselExpr64 x
445 return $ Fixed II32 rlo code
446
447 getRegister' dflags (CmmLoad mem pk)
448 | not (isWord64 pk) = do
449 let platform = targetPlatform dflags
450 Amode addr addr_code <- getAmode D mem
451 let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk)
452 addr_code `snocOL` LD format dst addr
453 return (Any format code)
454 | not (target32Bit (targetPlatform dflags)) = do
455 Amode addr addr_code <- getAmode DS mem
456 let code dst = addr_code `snocOL` LD II64 dst addr
457 return (Any II64 code)
458
459 where format = cmmTypeFormat pk
460
461 -- catch simple cases of zero- or sign-extended load
462 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
463 Amode addr addr_code <- getAmode D mem
464 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
465
466 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
467 Amode addr addr_code <- getAmode D mem
468 return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
469
470 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
471
472 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
473 Amode addr addr_code <- getAmode D mem
474 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
475
476 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
477 Amode addr addr_code <- getAmode D mem
478 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
479
480 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
481 Amode addr addr_code <- getAmode D mem
482 return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
483
484 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
485 Amode addr addr_code <- getAmode D mem
486 return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
487
488 getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
489 Amode addr addr_code <- getAmode D mem
490 return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
491
492 getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
493 -- lwa is DS-form. See Note [Power instruction format]
494 Amode addr addr_code <- getAmode DS mem
495 return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
496
497 getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
498 = case mop of
499 MO_Not rep -> triv_ucode_int rep NOT
500
501 MO_F_Neg w -> triv_ucode_float w FNEG
502 MO_S_Neg w -> triv_ucode_int w NEG
503
504 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
505 MO_FF_Conv W32 W64 -> conversionNop FF64 x
506
507 MO_FS_Conv from to -> coerceFP2Int from to x
508 MO_SF_Conv from to -> coerceInt2FP from to x
509
510 MO_SS_Conv from to
511 | from == to -> conversionNop (intFormat to) x
512
513 -- narrowing is a nop: we treat the high bits as undefined
514 MO_SS_Conv W64 to
515 | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit int register"
516 | otherwise -> conversionNop (intFormat to) x
517 MO_SS_Conv W32 to
518 | arch32 -> conversionNop (intFormat to) x
519 | otherwise -> case to of
520 W64 -> triv_ucode_int to (EXTS II32)
521 W16 -> conversionNop II16 x
522 W8 -> conversionNop II8 x
523 _ -> panic "PPC.CodeGen.getRegister: no match"
524 MO_SS_Conv W16 W8 -> conversionNop II8 x
525 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
526 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
527
528 MO_UU_Conv from to
529 | from == to -> conversionNop (intFormat to) x
530 -- narrowing is a nop: we treat the high bits as undefined
531 MO_UU_Conv W64 to
532 | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit target"
533 | otherwise -> conversionNop (intFormat to) x
534 MO_UU_Conv W32 to
535 | arch32 -> conversionNop (intFormat to) x
536 | otherwise ->
537 case to of
538 W64 -> trivialCode to False AND x (CmmLit (CmmInt 4294967295 W64))
539 W16 -> conversionNop II16 x
540 W8 -> conversionNop II8 x
541 _ -> panic "PPC.CodeGen.getRegister: no match"
542 MO_UU_Conv W16 W8 -> conversionNop II8 x
543 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
544 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
545 _ -> panic "PPC.CodeGen.getRegister: no match"
546
547 where
548 triv_ucode_int width instr = trivialUCode (intFormat width) instr x
549 triv_ucode_float width instr = trivialUCode (floatFormat width) instr x
550
551 conversionNop new_format expr
552 = do e_code <- getRegister' dflags expr
553 return (swizzleRegisterRep e_code new_format)
554 arch32 = target32Bit $ targetPlatform dflags
555
556 getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps
557 = case mop of
558 MO_F_Eq _ -> condFltReg EQQ x y
559 MO_F_Ne _ -> condFltReg NE x y
560 MO_F_Gt _ -> condFltReg GTT x y
561 MO_F_Ge _ -> condFltReg GE x y
562 MO_F_Lt _ -> condFltReg LTT x y
563 MO_F_Le _ -> condFltReg LE x y
564
565 MO_Eq rep -> condIntReg EQQ (extendUExpr dflags rep x)
566 (extendUExpr dflags rep y)
567 MO_Ne rep -> condIntReg NE (extendUExpr dflags rep x)
568 (extendUExpr dflags rep y)
569
570 MO_S_Gt rep -> condIntReg GTT (extendSExpr dflags rep x)
571 (extendSExpr dflags rep y)
572 MO_S_Ge rep -> condIntReg GE (extendSExpr dflags rep x)
573 (extendSExpr dflags rep y)
574 MO_S_Lt rep -> condIntReg LTT (extendSExpr dflags rep x)
575 (extendSExpr dflags rep y)
576 MO_S_Le rep -> condIntReg LE (extendSExpr dflags rep x)
577 (extendSExpr dflags rep y)
578
579 MO_U_Gt rep -> condIntReg GU (extendUExpr dflags rep x)
580 (extendUExpr dflags rep y)
581 MO_U_Ge rep -> condIntReg GEU (extendUExpr dflags rep x)
582 (extendUExpr dflags rep y)
583 MO_U_Lt rep -> condIntReg LU (extendUExpr dflags rep x)
584 (extendUExpr dflags rep y)
585 MO_U_Le rep -> condIntReg LEU (extendUExpr dflags rep x)
586 (extendUExpr dflags rep y)
587
588 MO_F_Add w -> triv_float w FADD
589 MO_F_Sub w -> triv_float w FSUB
590 MO_F_Mul w -> triv_float w FMUL
591 MO_F_Quot w -> triv_float w FDIV
592
593 -- optimize addition with 32-bit immediate
594 -- (needed for PIC)
595 MO_Add W32 ->
596 case y of
597 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True imm
598 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
599 CmmLit lit
600 -> do
601 (src, srcCode) <- getSomeReg x
602 let imm = litToImm lit
603 code dst = srcCode `appOL` toOL [
604 ADDIS dst src (HA imm),
605 ADD dst dst (RIImm (LO imm))
606 ]
607 return (Any II32 code)
608 _ -> trivialCode W32 True ADD x y
609
610 MO_Add rep -> trivialCode rep True ADD x y
611 MO_Sub rep ->
612 case y of
613 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
614 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
615 _ -> case x of
616 CmmLit (CmmInt imm _)
617 | Just _ <- makeImmediate rep True imm
618 -- subfi ('substract from' with immediate) doesn't exist
619 -> trivialCode rep True SUBFC y x
620 _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
621
622 MO_Mul rep -> shiftMulCode rep True MULL x y
623 MO_S_MulMayOflo rep -> do
624 (src1, code1) <- getSomeReg x
625 (src2, code2) <- getSomeReg y
626 let
627 format = intFormat rep
628 code dst = code1 `appOL` code2
629 `appOL` toOL [ MULLO format dst src1 src2
630 , MFOV format dst
631 ]
632 return (Any format code)
633
634 MO_S_Quot rep -> trivialCodeNoImmSign (intFormat rep) True DIV
635 (extendSExpr dflags rep x) (extendSExpr dflags rep y)
636 MO_U_Quot rep -> trivialCodeNoImmSign (intFormat rep) False DIV
637 (extendUExpr dflags rep x) (extendUExpr dflags rep y)
638
639 MO_S_Rem rep -> remainderCode rep True (extendSExpr dflags rep x)
640 (extendSExpr dflags rep y)
641 MO_U_Rem rep -> remainderCode rep False (extendUExpr dflags rep x)
642 (extendUExpr dflags rep y)
643
644 MO_And rep -> case y of
645 (CmmLit (CmmInt imm _)) | imm == -8 || imm == -4
646 -> do
647 (src, srcCode) <- getSomeReg x
648 let clear_mask = if imm == -4 then 2 else 3
649 fmt = intFormat rep
650 code dst = srcCode
651 `appOL` unitOL (CLRRI fmt dst src clear_mask)
652 return (Any fmt code)
653 _ -> trivialCode rep False AND x y
654 MO_Or rep -> trivialCode rep False OR x y
655 MO_Xor rep -> trivialCode rep False XOR x y
656
657 MO_Shl rep -> shiftMulCode rep False SL x y
658 MO_S_Shr rep -> shiftMulCode rep False SRA (extendSExpr dflags rep x) y
659 MO_U_Shr rep -> shiftMulCode rep False SR (extendUExpr dflags rep x) y
660 _ -> panic "PPC.CodeGen.getRegister: no match"
661
662 where
663 triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register
664 triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y
665
666 getRegister' _ (CmmLit (CmmInt i rep))
667 | Just imm <- makeImmediate rep True i
668 = let
669 code dst = unitOL (LI dst imm)
670 in
671 return (Any (intFormat rep) code)
672
673 getRegister' _ (CmmLit (CmmFloat f frep)) = do
674 lbl <- getNewLabelNat
675 dflags <- getDynFlags
676 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
677 Amode addr addr_code <- getAmode D dynRef
678 let format = floatFormat frep
679 code dst =
680 LDATA (Section ReadOnlyData lbl)
681 (Statics lbl [CmmStaticLit (CmmFloat f frep)])
682 `consOL` (addr_code `snocOL` LD format dst addr)
683 return (Any format code)
684
685 getRegister' dflags (CmmLit lit)
686 | target32Bit (targetPlatform dflags)
687 = let rep = cmmLitType dflags lit
688 imm = litToImm lit
689 code dst = toOL [
690 LIS dst (HA imm),
691 ADD dst dst (RIImm (LO imm))
692 ]
693 in return (Any (cmmTypeFormat rep) code)
694 | otherwise
695 = do lbl <- getNewLabelNat
696 dflags <- getDynFlags
697 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
698 Amode addr addr_code <- getAmode D dynRef
699 let rep = cmmLitType dflags lit
700 format = cmmTypeFormat rep
701 code dst =
702 LDATA (Section ReadOnlyData lbl) (Statics lbl [CmmStaticLit lit])
703 `consOL` (addr_code `snocOL` LD format dst addr)
704 return (Any format code)
705
706 getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other)
707
708 -- extend?Rep: wrap integer expression of type rep
709 -- in a conversion to II32 or II64 resp.
710 extendSExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
711 extendSExpr dflags W32 x
712 | target32Bit (targetPlatform dflags) = x
713
714 extendSExpr dflags W64 x
715 | not (target32Bit (targetPlatform dflags)) = x
716
717 extendSExpr dflags rep x =
718 let size = if target32Bit $ targetPlatform dflags
719 then W32
720 else W64
721 in CmmMachOp (MO_SS_Conv rep size) [x]
722
723 extendUExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
724 extendUExpr dflags W32 x
725 | target32Bit (targetPlatform dflags) = x
726 extendUExpr dflags W64 x
727 | not (target32Bit (targetPlatform dflags)) = x
728 extendUExpr dflags rep x =
729 let size = if target32Bit $ targetPlatform dflags
730 then W32
731 else W64
732 in CmmMachOp (MO_UU_Conv rep size) [x]
733
734 -- -----------------------------------------------------------------------------
735 -- The 'Amode' type: Memory addressing modes passed up the tree.
736
737 data Amode
738 = Amode AddrMode InstrBlock
739
740 {-
741 Now, given a tree (the argument to a CmmLoad) that references memory,
742 produce a suitable addressing mode.
743
744 A Rule of the Game (tm) for Amodes: use of the addr bit must
745 immediately follow use of the code part, since the code part puts
746 values in registers which the addr then refers to. So you can't put
747 anything in between, lest it overwrite some of those registers. If
748 you need to do some other computation between the code part and use of
749 the addr bit, first store the effective address from the amode in a
750 temporary, then do the other computation, and then use the temporary:
751
752 code
753 LEA amode, tmp
754 ... other computation ...
755 ... (tmp) ...
756 -}
757
758 {- Note [Power instruction format]
759 In some instructions the 16 bit offset must be a multiple of 4, i.e.
760 the two least significant bits must be zero. The "Power ISA" specification
761 calls these instruction formats "DS-FORM" and the instructions with
762 arbitrary 16 bit offsets are "D-FORM".
763
764 The Power ISA specification document can be obtained from www.power.org.
765 -}
766 data InstrForm = D | DS
767
768 getAmode :: InstrForm -> CmmExpr -> NatM Amode
769 getAmode inf tree@(CmmRegOff _ _)
770 = do dflags <- getDynFlags
771 getAmode inf (mangleIndexTree dflags tree)
772
773 getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
774 | Just off <- makeImmediate W32 True (-i)
775 = do
776 (reg, code) <- getSomeReg x
777 return (Amode (AddrRegImm reg off) code)
778
779
780 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
781 | Just off <- makeImmediate W32 True i
782 = do
783 (reg, code) <- getSomeReg x
784 return (Amode (AddrRegImm reg off) code)
785
786 getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
787 | Just off <- makeImmediate W64 True (-i)
788 = do
789 (reg, code) <- getSomeReg x
790 return (Amode (AddrRegImm reg off) code)
791
792
793 getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
794 | Just off <- makeImmediate W64 True i
795 = do
796 (reg, code) <- getSomeReg x
797 return (Amode (AddrRegImm reg off) code)
798
799 getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
800 | Just off <- makeImmediate W64 True (-i)
801 = do
802 (reg, code) <- getSomeReg x
803 (reg', off', code') <-
804 if i `mod` 4 == 0
805 then do return (reg, off, code)
806 else do
807 tmp <- getNewRegNat II64
808 return (tmp, ImmInt 0,
809 code `snocOL` ADD tmp reg (RIImm off))
810 return (Amode (AddrRegImm reg' off') code')
811
812 getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
813 | Just off <- makeImmediate W64 True i
814 = do
815 (reg, code) <- getSomeReg x
816 (reg', off', code') <-
817 if i `mod` 4 == 0
818 then do return (reg, off, code)
819 else do
820 tmp <- getNewRegNat II64
821 return (tmp, ImmInt 0,
822 code `snocOL` ADD tmp reg (RIImm off))
823 return (Amode (AddrRegImm reg' off') code')
824
825 -- optimize addition with 32-bit immediate
826 -- (needed for PIC)
827 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit])
828 = do
829 dflags <- getDynFlags
830 (src, srcCode) <- getSomeReg x
831 let imm = litToImm lit
832 case () of
833 _ | OSAIX <- platformOS (targetPlatform dflags)
834 , isCmmLabelType lit ->
835 -- HA16/LO16 relocations on labels not supported on AIX
836 return (Amode (AddrRegImm src imm) srcCode)
837 | otherwise -> do
838 tmp <- getNewRegNat II32
839 let code = srcCode `snocOL` ADDIS tmp src (HA imm)
840 return (Amode (AddrRegImm tmp (LO imm)) code)
841 where
842 isCmmLabelType (CmmLabel {}) = True
843 isCmmLabelType (CmmLabelOff {}) = True
844 isCmmLabelType (CmmLabelDiffOff {}) = True
845 isCmmLabelType _ = False
846
847 getAmode _ (CmmLit lit)
848 = do
849 dflags <- getDynFlags
850 case platformArch $ targetPlatform dflags of
851 ArchPPC -> do
852 tmp <- getNewRegNat II32
853 let imm = litToImm lit
854 code = unitOL (LIS tmp (HA imm))
855 return (Amode (AddrRegImm tmp (LO imm)) code)
856 _ -> do -- TODO: Load from TOC,
857 -- see getRegister' _ (CmmLit lit)
858 tmp <- getNewRegNat II64
859 let imm = litToImm lit
860 code = toOL [
861 LIS tmp (HIGHESTA imm),
862 OR tmp tmp (RIImm (HIGHERA imm)),
863 SL II64 tmp tmp (RIImm (ImmInt 32)),
864 ORIS tmp tmp (HA imm)
865 ]
866 return (Amode (AddrRegImm tmp (LO imm)) code)
867
868 getAmode _ (CmmMachOp (MO_Add W32) [x, y])
869 = do
870 (regX, codeX) <- getSomeReg x
871 (regY, codeY) <- getSomeReg y
872 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
873
874 getAmode _ (CmmMachOp (MO_Add W64) [x, y])
875 = do
876 (regX, codeX) <- getSomeReg x
877 (regY, codeY) <- getSomeReg y
878 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
879
880 getAmode _ other
881 = do
882 (reg, code) <- getSomeReg other
883 let
884 off = ImmInt 0
885 return (Amode (AddrRegImm reg off) code)
886
887
888 -- The 'CondCode' type: Condition codes passed up the tree.
889 data CondCode
890 = CondCode Bool Cond InstrBlock
891
892 -- Set up a condition code for a conditional branch.
893
894 getCondCode :: CmmExpr -> NatM CondCode
895
896 -- almost the same as everywhere else - but we need to
897 -- extend small integers to 32 bit or 64 bit first
898
899 getCondCode (CmmMachOp mop [x, y])
900 = do
901 dflags <- getDynFlags
902 case mop of
903 MO_F_Eq W32 -> condFltCode EQQ x y
904 MO_F_Ne W32 -> condFltCode NE x y
905 MO_F_Gt W32 -> condFltCode GTT x y
906 MO_F_Ge W32 -> condFltCode GE x y
907 MO_F_Lt W32 -> condFltCode LTT x y
908 MO_F_Le W32 -> condFltCode LE x y
909
910 MO_F_Eq W64 -> condFltCode EQQ x y
911 MO_F_Ne W64 -> condFltCode NE x y
912 MO_F_Gt W64 -> condFltCode GTT x y
913 MO_F_Ge W64 -> condFltCode GE x y
914 MO_F_Lt W64 -> condFltCode LTT x y
915 MO_F_Le W64 -> condFltCode LE x y
916
917 MO_Eq rep -> condIntCode EQQ (extendUExpr dflags rep x)
918 (extendUExpr dflags rep y)
919 MO_Ne rep -> condIntCode NE (extendUExpr dflags rep x)
920 (extendUExpr dflags rep y)
921
922 MO_S_Gt rep -> condIntCode GTT (extendSExpr dflags rep x)
923 (extendSExpr dflags rep y)
924 MO_S_Ge rep -> condIntCode GE (extendSExpr dflags rep x)
925 (extendSExpr dflags rep y)
926 MO_S_Lt rep -> condIntCode LTT (extendSExpr dflags rep x)
927 (extendSExpr dflags rep y)
928 MO_S_Le rep -> condIntCode LE (extendSExpr dflags rep x)
929 (extendSExpr dflags rep y)
930
931 MO_U_Gt rep -> condIntCode GU (extendSExpr dflags rep x)
932 (extendSExpr dflags rep y)
933 MO_U_Ge rep -> condIntCode GEU (extendSExpr dflags rep x)
934 (extendSExpr dflags rep y)
935 MO_U_Lt rep -> condIntCode LU (extendSExpr dflags rep x)
936 (extendSExpr dflags rep y)
937 MO_U_Le rep -> condIntCode LEU (extendSExpr dflags rep x)
938 (extendSExpr dflags rep y)
939
940 _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
941
942 getCondCode _ = panic "getCondCode(2)(powerpc)"
943
944
945 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
946 -- passed back up the tree.
947
948 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
949
950 -- optimize pointer tag checks. Operation andi. sets condition register
951 -- so cmpi ..., 0 is redundant.
952 condIntCode cond (CmmMachOp (MO_And _) [x, CmmLit (CmmInt imm rep)])
953 (CmmLit (CmmInt 0 _))
954 | not $ condUnsigned cond,
955 Just src2 <- makeImmediate rep False imm
956 = do
957 (src1, code) <- getSomeReg x
958 let code' = code `snocOL` AND r0 src1 (RIImm src2)
959 return (CondCode False cond code')
960
961 condIntCode cond x (CmmLit (CmmInt y rep))
962 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
963 = do
964 (src1, code) <- getSomeReg x
965 dflags <- getDynFlags
966 let format = archWordFormat $ target32Bit $ targetPlatform dflags
967 code' = code `snocOL`
968 (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2)
969 return (CondCode False cond code')
970
971 condIntCode cond x y = do
972 (src1, code1) <- getSomeReg x
973 (src2, code2) <- getSomeReg y
974 dflags <- getDynFlags
975 let format = archWordFormat $ target32Bit $ targetPlatform dflags
976 code' = code1 `appOL` code2 `snocOL`
977 (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2)
978 return (CondCode False cond code')
979
980 condFltCode cond x y = do
981 (src1, code1) <- getSomeReg x
982 (src2, code2) <- getSomeReg y
983 let
984 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
985 code'' = case cond of -- twiddle CR to handle unordered case
986 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
987 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
988 _ -> code'
989 where
990 ltbit = 0 ; eqbit = 2 ; gtbit = 1
991 return (CondCode True cond code'')
992
993
994
995 -- -----------------------------------------------------------------------------
996 -- Generating assignments
997
998 -- Assignments are really at the heart of the whole code generation
999 -- business. Almost all top-level nodes of any real importance are
1000 -- assignments, which correspond to loads, stores, or register
1001 -- transfers. If we're really lucky, some of the register transfers
1002 -- will go away, because we can use the destination register to
1003 -- complete the code generation for the right hand side. This only
1004 -- fails when the right hand side is forced into a fixed register
1005 -- (e.g. the result of a call).
1006
1007 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1008 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1009
1010 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1011 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1012
1013 assignMem_IntCode pk addr src = do
1014 (srcReg, code) <- getSomeReg src
1015 Amode dstAddr addr_code <- case pk of
1016 II64 -> getAmode DS addr
1017 _ -> getAmode D addr
1018 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
1019
1020 -- dst is a reg, but src could be anything
1021 assignReg_IntCode _ reg src
1022 = do
1023 dflags <- getDynFlags
1024 let dst = getRegisterReg (targetPlatform dflags) reg
1025 r <- getRegister src
1026 return $ case r of
1027 Any _ code -> code dst
1028 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
1029
1030
1031
1032 -- Easy, isn't it?
1033 assignMem_FltCode = assignMem_IntCode
1034 assignReg_FltCode = assignReg_IntCode
1035
1036
1037
1038 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
1039
1040 genJump (CmmLit (CmmLabel lbl))
1041 = return (unitOL $ JMP lbl)
1042
1043 genJump tree
1044 = do
1045 dflags <- getDynFlags
1046 genJump' tree (platformToGCP (targetPlatform dflags))
1047
1048 genJump' :: CmmExpr -> GenCCallPlatform -> NatM InstrBlock
1049
1050 genJump' tree (GCPLinux64ELF 1)
1051 = do
1052 (target,code) <- getSomeReg tree
1053 return (code
1054 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0))
1055 `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
1056 `snocOL` MTCTR r11
1057 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
1058 `snocOL` BCTR [] Nothing)
1059
1060 genJump' tree (GCPLinux64ELF 2)
1061 = do
1062 (target,code) <- getSomeReg tree
1063 return (code
1064 `snocOL` MR r12 target
1065 `snocOL` MTCTR r12
1066 `snocOL` BCTR [] Nothing)
1067
1068 genJump' tree _
1069 = do
1070 (target,code) <- getSomeReg tree
1071 return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
1072
1073 -- -----------------------------------------------------------------------------
1074 -- Unconditional branches
1075 genBranch :: BlockId -> NatM InstrBlock
1076 genBranch = return . toOL . mkJumpInstr
1077
1078
1079 -- -----------------------------------------------------------------------------
1080 -- Conditional jumps
1081
1082 {-
1083 Conditional jumps are always to local labels, so we can use branch
1084 instructions. We peek at the arguments to decide what kind of
1085 comparison to do.
1086 -}
1087
1088
1089 genCondJump
1090 :: BlockId -- the branch target
1091 -> CmmExpr -- the condition on which to branch
1092 -> Maybe Bool
1093 -> NatM InstrBlock
1094
1095 genCondJump id bool prediction = do
1096 CondCode _ cond code <- getCondCode bool
1097 return (code `snocOL` BCC cond id prediction)
1098
1099
1100
1101 -- -----------------------------------------------------------------------------
1102 -- Generating C calls
1103
1104 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1105 -- @get_arg@, which moves the arguments to the correct registers/stack
1106 -- locations. Apart from that, the code is easy.
1107
1108 genCCall :: ForeignTarget -- function to call
1109 -> [CmmFormal] -- where to put the result
1110 -> [CmmActual] -- arguments (of mixed type)
1111 -> NatM InstrBlock
1112 genCCall (PrimTarget MO_WriteBarrier) _ _
1113 = return $ unitOL LWSYNC
1114
1115 genCCall (PrimTarget MO_Touch) _ _
1116 = return $ nilOL
1117
1118 genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
1119 = return $ nilOL
1120
1121 genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n]
1122 = do dflags <- getDynFlags
1123 let platform = targetPlatform dflags
1124 fmt = intFormat width
1125 reg_dst = getRegisterReg platform (CmmLocal dst)
1126 (instr, n_code) <- case amop of
1127 AMO_Add -> getSomeRegOrImm ADD True reg_dst
1128 AMO_Sub -> case n of
1129 CmmLit (CmmInt i _)
1130 | Just imm <- makeImmediate width True (-i)
1131 -> return (ADD reg_dst reg_dst (RIImm imm), nilOL)
1132 _
1133 -> do
1134 (n_reg, n_code) <- getSomeReg n
1135 return (SUBF reg_dst n_reg reg_dst, n_code)
1136 AMO_And -> getSomeRegOrImm AND False reg_dst
1137 AMO_Nand -> do (n_reg, n_code) <- getSomeReg n
1138 return (NAND reg_dst reg_dst n_reg, n_code)
1139 AMO_Or -> getSomeRegOrImm OR False reg_dst
1140 AMO_Xor -> getSomeRegOrImm XOR False reg_dst
1141 Amode addr_reg addr_code <- getAmodeIndex addr
1142 lbl_retry <- getBlockIdNat
1143 return $ n_code `appOL` addr_code
1144 `appOL` toOL [ HWSYNC
1145 , BCC ALWAYS lbl_retry Nothing
1146
1147 , NEWBLOCK lbl_retry
1148 , LDR fmt reg_dst addr_reg
1149 , instr
1150 , STC fmt reg_dst addr_reg
1151 , BCC NE lbl_retry (Just False)
1152 , ISYNC
1153 ]
1154 where
1155 getAmodeIndex (CmmMachOp (MO_Add _) [x, y])
1156 = do
1157 (regX, codeX) <- getSomeReg x
1158 (regY, codeY) <- getSomeReg y
1159 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1160 getAmodeIndex other
1161 = do
1162 (reg, code) <- getSomeReg other
1163 return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here!
1164 getSomeRegOrImm op sign dst
1165 = case n of
1166 CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i
1167 -> return (op dst dst (RIImm imm), nilOL)
1168 _
1169 -> do
1170 (n_reg, n_code) <- getSomeReg n
1171 return (op dst dst (RIReg n_reg), n_code)
1172
1173 genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr]
1174 = do dflags <- getDynFlags
1175 let platform = targetPlatform dflags
1176 fmt = intFormat width
1177 reg_dst = getRegisterReg platform (CmmLocal dst)
1178 form = if widthInBits width == 64 then DS else D
1179 Amode addr_reg addr_code <- getAmode form addr
1180 lbl_end <- getBlockIdNat
1181 return $ addr_code `appOL` toOL [ HWSYNC
1182 , LD fmt reg_dst addr_reg
1183 , CMP fmt reg_dst (RIReg reg_dst)
1184 , BCC NE lbl_end (Just False)
1185 , BCC ALWAYS lbl_end Nothing
1186 -- See Note [Seemingly useless cmp and bne]
1187 , NEWBLOCK lbl_end
1188 , ISYNC
1189 ]
1190
1191 -- Note [Seemingly useless cmp and bne]
1192 -- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction
1193 -- the second paragraph says that isync may complete before storage accesses
1194 -- "associated" with a preceding instruction have been performed. The cmp
1195 -- operation and the following bne introduce a data and control dependency
1196 -- on the load instruction (See also Power ISA, Book II, Appendix B.2.3, Safe
1197 -- Fetch).
1198 -- This is also what gcc does.
1199
1200
1201 genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
1202 code <- assignMem_IntCode (intFormat width) addr val
1203 return $ unitOL(HWSYNC) `appOL` code
1204
1205 genCCall (PrimTarget (MO_Clz width)) [dst] [src]
1206 = do dflags <- getDynFlags
1207 let platform = targetPlatform dflags
1208 reg_dst = getRegisterReg platform (CmmLocal dst)
1209 if target32Bit platform && width == W64
1210 then do
1211 ChildCode64 code vr_lo <- iselExpr64 src
1212 lbl1 <- getBlockIdNat
1213 lbl2 <- getBlockIdNat
1214 lbl3 <- getBlockIdNat
1215 let vr_hi = getHiVRegFromLo vr_lo
1216 cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0))
1217 , BCC NE lbl2 Nothing
1218 , BCC ALWAYS lbl1 Nothing
1219
1220 , NEWBLOCK lbl1
1221 , CNTLZ II32 reg_dst vr_lo
1222 , ADD reg_dst reg_dst (RIImm (ImmInt 32))
1223 , BCC ALWAYS lbl3 Nothing
1224
1225 , NEWBLOCK lbl2
1226 , CNTLZ II32 reg_dst vr_hi
1227 , BCC ALWAYS lbl3 Nothing
1228
1229 , NEWBLOCK lbl3
1230 ]
1231 return $ code `appOL` cntlz
1232 else do
1233 let format = if width == W64 then II64 else II32
1234 (s_reg, s_code) <- getSomeReg src
1235 (pre, reg , post) <-
1236 case width of
1237 W64 -> return (nilOL, s_reg, nilOL)
1238 W32 -> return (nilOL, s_reg, nilOL)
1239 W16 -> do
1240 reg_tmp <- getNewRegNat format
1241 return
1242 ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 65535))
1243 , reg_tmp
1244 , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-16)))
1245 )
1246 W8 -> do
1247 reg_tmp <- getNewRegNat format
1248 return
1249 ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 255))
1250 , reg_tmp
1251 , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-24)))
1252 )
1253 _ -> panic "genCall: Clz wrong format"
1254 let cntlz = unitOL (CNTLZ format reg_dst reg)
1255 return $ s_code `appOL` pre `appOL` cntlz `appOL` post
1256
1257 genCCall (PrimTarget (MO_Ctz width)) [dst] [src]
1258 = do dflags <- getDynFlags
1259 let platform = targetPlatform dflags
1260 reg_dst = getRegisterReg platform (CmmLocal dst)
1261 if target32Bit platform && width == W64
1262 then do
1263 let format = II32
1264 ChildCode64 code vr_lo <- iselExpr64 src
1265 lbl1 <- getBlockIdNat
1266 lbl2 <- getBlockIdNat
1267 lbl3 <- getBlockIdNat
1268 x' <- getNewRegNat format
1269 x'' <- getNewRegNat format
1270 r' <- getNewRegNat format
1271 cnttzlo <- cnttz format reg_dst vr_lo
1272 let vr_hi = getHiVRegFromLo vr_lo
1273 cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0))
1274 , BCC NE lbl2 Nothing
1275 , BCC ALWAYS lbl1 Nothing
1276
1277 , NEWBLOCK lbl1
1278 , ADD x' vr_hi (RIImm (ImmInt (-1)))
1279 , ANDC x'' x' vr_hi
1280 , CNTLZ format r' x''
1281 -- 32 + (32 - clz(x''))
1282 , SUBFC reg_dst r' (RIImm (ImmInt 64))
1283 , BCC ALWAYS lbl3 Nothing
1284
1285 , NEWBLOCK lbl2
1286 ]
1287 `appOL` cnttzlo `appOL`
1288 toOL [ BCC ALWAYS lbl3 Nothing
1289
1290 , NEWBLOCK lbl3
1291 ]
1292 return $ code `appOL` cnttz64
1293 else do
1294 let format = if width == W64 then II64 else II32
1295 (s_reg, s_code) <- getSomeReg src
1296 (reg_ctz, pre_code) <-
1297 case width of
1298 W64 -> return (s_reg, nilOL)
1299 W32 -> return (s_reg, nilOL)
1300 W16 -> do
1301 reg_tmp <- getNewRegNat format
1302 return (reg_tmp, unitOL $ ORIS reg_tmp s_reg (ImmInt 1))
1303 W8 -> do
1304 reg_tmp <- getNewRegNat format
1305 return (reg_tmp, unitOL $ OR reg_tmp s_reg (RIImm (ImmInt 256)))
1306 _ -> panic "genCall: Ctz wrong format"
1307 ctz_code <- cnttz format reg_dst reg_ctz
1308 return $ s_code `appOL` pre_code `appOL` ctz_code
1309 where
1310 -- cnttz(x) = sizeof(x) - cntlz(~x & (x - 1))
1311 -- see Henry S. Warren, Hacker's Delight, p 107
1312 cnttz format dst src = do
1313 let format_bits = 8 * formatInBytes format
1314 x' <- getNewRegNat format
1315 x'' <- getNewRegNat format
1316 r' <- getNewRegNat format
1317 return $ toOL [ ADD x' src (RIImm (ImmInt (-1)))
1318 , ANDC x'' x' src
1319 , CNTLZ format r' x''
1320 , SUBFC dst r' (RIImm (ImmInt (format_bits)))
1321 ]
1322
1323 genCCall target dest_regs argsAndHints
1324 = do dflags <- getDynFlags
1325 let platform = targetPlatform dflags
1326 case target of
1327 PrimTarget (MO_S_QuotRem width) -> divOp1 platform True width
1328 dest_regs argsAndHints
1329 PrimTarget (MO_U_QuotRem width) -> divOp1 platform False width
1330 dest_regs argsAndHints
1331 PrimTarget (MO_U_QuotRem2 width) -> divOp2 platform width dest_regs
1332 argsAndHints
1333 PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs
1334 argsAndHints
1335 PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints
1336 PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints
1337 PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width
1338 dest_regs argsAndHints
1339 PrimTarget (MO_SubIntC width) -> addSubCOp SUBFO platform width
1340 dest_regs argsAndHints
1341 PrimTarget MO_F64_Fabs -> fabs platform dest_regs argsAndHints
1342 PrimTarget MO_F32_Fabs -> fabs platform dest_regs argsAndHints
1343 _ -> genCCall' dflags (platformToGCP platform)
1344 target dest_regs argsAndHints
1345 where divOp1 platform signed width [res_q, res_r] [arg_x, arg_y]
1346 = do let reg_q = getRegisterReg platform (CmmLocal res_q)
1347 reg_r = getRegisterReg platform (CmmLocal res_r)
1348 fmt = intFormat width
1349 (x_reg, x_code) <- getSomeReg arg_x
1350 (y_reg, y_code) <- getSomeReg arg_y
1351 return $ y_code `appOL` x_code
1352 `appOL` toOL [ DIV fmt signed reg_q x_reg y_reg
1353 , MULL fmt reg_r reg_q (RIReg y_reg)
1354 , SUBF reg_r reg_r x_reg
1355 ]
1356
1357 divOp1 _ _ _ _ _
1358 = panic "genCCall: Wrong number of arguments for divOp1"
1359 divOp2 platform width [res_q, res_r]
1360 [arg_x_high, arg_x_low, arg_y]
1361 = do let reg_q = getRegisterReg platform (CmmLocal res_q)
1362 reg_r = getRegisterReg platform (CmmLocal res_r)
1363 fmt = intFormat width
1364 half = 4 * (formatInBytes fmt)
1365 (xh_reg, xh_code) <- getSomeReg arg_x_high
1366 (xl_reg, xl_code) <- getSomeReg arg_x_low
1367 (y_reg, y_code) <- getSomeReg arg_y
1368 s <- getNewRegNat fmt
1369 b <- getNewRegNat fmt
1370 v <- getNewRegNat fmt
1371 vn1 <- getNewRegNat fmt
1372 vn0 <- getNewRegNat fmt
1373 un32 <- getNewRegNat fmt
1374 tmp <- getNewRegNat fmt
1375 un10 <- getNewRegNat fmt
1376 un1 <- getNewRegNat fmt
1377 un0 <- getNewRegNat fmt
1378 q1 <- getNewRegNat fmt
1379 rhat <- getNewRegNat fmt
1380 tmp1 <- getNewRegNat fmt
1381 q0 <- getNewRegNat fmt
1382 un21 <- getNewRegNat fmt
1383 again1 <- getBlockIdNat
1384 no1 <- getBlockIdNat
1385 then1 <- getBlockIdNat
1386 endif1 <- getBlockIdNat
1387 again2 <- getBlockIdNat
1388 no2 <- getBlockIdNat
1389 then2 <- getBlockIdNat
1390 endif2 <- getBlockIdNat
1391 return $ y_code `appOL` xl_code `appOL` xh_code `appOL`
1392 -- see Hacker's Delight p 196 Figure 9-3
1393 toOL [ -- b = 2 ^ (bits_in_word / 2)
1394 LI b (ImmInt 1)
1395 , SL fmt b b (RIImm (ImmInt half))
1396 -- s = clz(y)
1397 , CNTLZ fmt s y_reg
1398 -- v = y << s
1399 , SL fmt v y_reg (RIReg s)
1400 -- vn1 = upper half of v
1401 , SR fmt vn1 v (RIImm (ImmInt half))
1402 -- vn0 = lower half of v
1403 , CLRLI fmt vn0 v half
1404 -- un32 = (u1 << s)
1405 -- | (u0 >> (bits_in_word - s))
1406 , SL fmt un32 xh_reg (RIReg s)
1407 , SUBFC tmp s
1408 (RIImm (ImmInt (8 * formatInBytes fmt)))
1409 , SR fmt tmp xl_reg (RIReg tmp)
1410 , OR un32 un32 (RIReg tmp)
1411 -- un10 = u0 << s
1412 , SL fmt un10 xl_reg (RIReg s)
1413 -- un1 = upper half of un10
1414 , SR fmt un1 un10 (RIImm (ImmInt half))
1415 -- un0 = lower half of un10
1416 , CLRLI fmt un0 un10 half
1417 -- q1 = un32/vn1
1418 , DIV fmt False q1 un32 vn1
1419 -- rhat = un32 - q1*vn1
1420 , MULL fmt tmp q1 (RIReg vn1)
1421 , SUBF rhat tmp un32
1422 , BCC ALWAYS again1 Nothing
1423
1424 , NEWBLOCK again1
1425 -- if (q1 >= b || q1*vn0 > b*rhat + un1)
1426 , CMPL fmt q1 (RIReg b)
1427 , BCC GEU then1 Nothing
1428 , BCC ALWAYS no1 Nothing
1429
1430 , NEWBLOCK no1
1431 , MULL fmt tmp q1 (RIReg vn0)
1432 , SL fmt tmp1 rhat (RIImm (ImmInt half))
1433 , ADD tmp1 tmp1 (RIReg un1)
1434 , CMPL fmt tmp (RIReg tmp1)
1435 , BCC LEU endif1 Nothing
1436 , BCC ALWAYS then1 Nothing
1437
1438 , NEWBLOCK then1
1439 -- q1 = q1 - 1
1440 , ADD q1 q1 (RIImm (ImmInt (-1)))
1441 -- rhat = rhat + vn1
1442 , ADD rhat rhat (RIReg vn1)
1443 -- if (rhat < b) goto again1
1444 , CMPL fmt rhat (RIReg b)
1445 , BCC LTT again1 Nothing
1446 , BCC ALWAYS endif1 Nothing
1447
1448 , NEWBLOCK endif1
1449 -- un21 = un32*b + un1 - q1*v
1450 , SL fmt un21 un32 (RIImm (ImmInt half))
1451 , ADD un21 un21 (RIReg un1)
1452 , MULL fmt tmp q1 (RIReg v)
1453 , SUBF un21 tmp un21
1454 -- compute second quotient digit
1455 -- q0 = un21/vn1
1456 , DIV fmt False q0 un21 vn1
1457 -- rhat = un21- q0*vn1
1458 , MULL fmt tmp q0 (RIReg vn1)
1459 , SUBF rhat tmp un21
1460 , BCC ALWAYS again2 Nothing
1461
1462 , NEWBLOCK again2
1463 -- if (q0>b || q0*vn0 > b*rhat + un0)
1464 , CMPL fmt q0 (RIReg b)
1465 , BCC GEU then2 Nothing
1466 , BCC ALWAYS no2 Nothing
1467
1468 , NEWBLOCK no2
1469 , MULL fmt tmp q0 (RIReg vn0)
1470 , SL fmt tmp1 rhat (RIImm (ImmInt half))
1471 , ADD tmp1 tmp1 (RIReg un0)
1472 , CMPL fmt tmp (RIReg tmp1)
1473 , BCC LEU endif2 Nothing
1474 , BCC ALWAYS then2 Nothing
1475
1476 , NEWBLOCK then2
1477 -- q0 = q0 - 1
1478 , ADD q0 q0 (RIImm (ImmInt (-1)))
1479 -- rhat = rhat + vn1
1480 , ADD rhat rhat (RIReg vn1)
1481 -- if (rhat<b) goto again2
1482 , CMPL fmt rhat (RIReg b)
1483 , BCC LTT again2 Nothing
1484 , BCC ALWAYS endif2 Nothing
1485
1486 , NEWBLOCK endif2
1487 -- compute remainder
1488 -- r = (un21*b + un0 - q0*v) >> s
1489 , SL fmt reg_r un21 (RIImm (ImmInt half))
1490 , ADD reg_r reg_r (RIReg un0)
1491 , MULL fmt tmp q0 (RIReg v)
1492 , SUBF reg_r tmp reg_r
1493 , SR fmt reg_r reg_r (RIReg s)
1494 -- compute quotient
1495 -- q = q1*b + q0
1496 , SL fmt reg_q q1 (RIImm (ImmInt half))
1497 , ADD reg_q reg_q (RIReg q0)
1498 ]
1499 divOp2 _ _ _ _
1500 = panic "genCCall: Wrong number of arguments for divOp2"
1501 multOp2 platform width [res_h, res_l] [arg_x, arg_y]
1502 = do let reg_h = getRegisterReg platform (CmmLocal res_h)
1503 reg_l = getRegisterReg platform (CmmLocal res_l)
1504 fmt = intFormat width
1505 (x_reg, x_code) <- getSomeReg arg_x
1506 (y_reg, y_code) <- getSomeReg arg_y
1507 return $ y_code `appOL` x_code
1508 `appOL` toOL [ MULL fmt reg_l x_reg (RIReg y_reg)
1509 , MULHU fmt reg_h x_reg y_reg
1510 ]
1511 multOp2 _ _ _ _
1512 = panic "genCall: Wrong number of arguments for multOp2"
1513 add2Op platform [res_h, res_l] [arg_x, arg_y]
1514 = do let reg_h = getRegisterReg platform (CmmLocal res_h)
1515 reg_l = getRegisterReg platform (CmmLocal res_l)
1516 (x_reg, x_code) <- getSomeReg arg_x
1517 (y_reg, y_code) <- getSomeReg arg_y
1518 return $ y_code `appOL` x_code
1519 `appOL` toOL [ LI reg_h (ImmInt 0)
1520 , ADDC reg_l x_reg y_reg
1521 , ADDZE reg_h reg_h
1522 ]
1523 add2Op _ _ _
1524 = panic "genCCall: Wrong number of arguments/results for add2"
1525
1526 -- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1,
1527 -- which is 0 for borrow and 1 otherwise. We need 1 and 0
1528 -- so xor with 1.
1529 subcOp platform [res_r, res_c] [arg_x, arg_y]
1530 = do let reg_r = getRegisterReg platform (CmmLocal res_r)
1531 reg_c = getRegisterReg platform (CmmLocal res_c)
1532 (x_reg, x_code) <- getSomeReg arg_x
1533 (y_reg, y_code) <- getSomeReg arg_y
1534 return $ y_code `appOL` x_code
1535 `appOL` toOL [ LI reg_c (ImmInt 0)
1536 , SUBFC reg_r y_reg (RIReg x_reg)
1537 , ADDZE reg_c reg_c
1538 , XOR reg_c reg_c (RIImm (ImmInt 1))
1539 ]
1540 subcOp _ _ _
1541 = panic "genCCall: Wrong number of arguments/results for subc"
1542 addSubCOp instr platform width [res_r, res_c] [arg_x, arg_y]
1543 = do let reg_r = getRegisterReg platform (CmmLocal res_r)
1544 reg_c = getRegisterReg platform (CmmLocal res_c)
1545 (x_reg, x_code) <- getSomeReg arg_x
1546 (y_reg, y_code) <- getSomeReg arg_y
1547 return $ y_code `appOL` x_code
1548 `appOL` toOL [ instr reg_r y_reg x_reg,
1549 -- SUBFO argument order reversed!
1550 MFOV (intFormat width) reg_c
1551 ]
1552 addSubCOp _ _ _ _ _
1553 = panic "genCall: Wrong number of arguments/results for addC"
1554 fabs platform [res] [arg]
1555 = do let res_r = getRegisterReg platform (CmmLocal res)
1556 (arg_reg, arg_code) <- getSomeReg arg
1557 return $ arg_code `snocOL` FABS res_r arg_reg
1558 fabs _ _ _
1559 = panic "genCall: Wrong number of arguments/results for fabs"
1560
1561 -- TODO: replace 'Int' by an enum such as 'PPC_64ABI'
1562 data GenCCallPlatform = GCPLinux | GCPDarwin | GCPLinux64ELF !Int | GCPAIX
1563
1564 platformToGCP :: Platform -> GenCCallPlatform
1565 platformToGCP platform = case platformOS platform of
1566 OSLinux -> case platformArch platform of
1567 ArchPPC -> GCPLinux
1568 ArchPPC_64 ELF_V1 -> GCPLinux64ELF 1
1569 ArchPPC_64 ELF_V2 -> GCPLinux64ELF 2
1570 _ -> panic "PPC.CodeGen.platformToGCP: Unknown Linux"
1571 OSAIX -> GCPAIX
1572 OSDarwin -> GCPDarwin
1573 _ -> panic "PPC.CodeGen.platformToGCP: not defined for this OS"
1574
1575
1576 genCCall'
1577 :: DynFlags
1578 -> GenCCallPlatform
1579 -> ForeignTarget -- function to call
1580 -> [CmmFormal] -- where to put the result
1581 -> [CmmActual] -- arguments (of mixed type)
1582 -> NatM InstrBlock
1583
1584 {-
1585 The PowerPC calling convention for Darwin/Mac OS X
1586 is described in Apple's document
1587 "Inside Mac OS X - Mach-O Runtime Architecture".
1588
1589 PowerPC Linux uses the System V Release 4 Calling Convention
1590 for PowerPC. It is described in the
1591 "System V Application Binary Interface PowerPC Processor Supplement".
1592
1593 Both conventions are similar:
1594 Parameters may be passed in general-purpose registers starting at r3, in
1595 floating point registers starting at f1, or on the stack.
1596
1597 But there are substantial differences:
1598 * The number of registers used for parameter passing and the exact set of
1599 nonvolatile registers differs (see MachRegs.hs).
1600 * On Darwin, stack space is always reserved for parameters, even if they are
1601 passed in registers. The called routine may choose to save parameters from
1602 registers to the corresponding space on the stack.
1603 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
1604 parameter is passed in an FPR.
1605 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
1606 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
1607 Darwin just treats an I64 like two separate II32s (high word first).
1608 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
1609 4-byte aligned like everything else on Darwin.
1610 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
1611 PowerPC Linux does not agree, so neither do we.
1612
1613 PowerPC 64 Linux uses the System V Release 4 Calling Convention for
1614 64-bit PowerPC. It is specified in
1615 "64-bit PowerPC ELF Application Binary Interface Supplement 1.9"
1616 (PPC64 ELF v1.9).
1617 PowerPC 64 Linux in little endian mode uses the "Power Architecture 64-Bit
1618 ELF V2 ABI Specification -- OpenPOWER ABI for Linux Supplement"
1619 (PPC64 ELF v2).
1620 AIX follows the "PowerOpen ABI: Application Binary Interface Big-Endian
1621 32-Bit Hardware Implementation"
1622
1623 According to all conventions, the parameter area should be part of the
1624 caller's stack frame, allocated in the caller's prologue code (large enough
1625 to hold the parameter lists for all called routines). The NCG already
1626 uses the stack for register spilling, leaving 64 bytes free at the top.
1627 If we need a larger parameter area than that, we just allocate a new stack
1628 frame just before ccalling.
1629 -}
1630
1631
1632 genCCall' dflags gcp target dest_regs args
1633 = ASSERT(not $ any (`elem` [II16]) $ map cmmTypeFormat argReps)
1634 -- we rely on argument promotion in the codeGen
1635 do
1636 (finalStack,passArgumentsCode,usedRegs) <- passArguments
1637 (zip args argReps)
1638 allArgRegs
1639 (allFPArgRegs platform)
1640 initialStackOffset
1641 (toOL []) []
1642
1643 (labelOrExpr, reduceToFF32) <- case target of
1644 ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
1645 uses_pic_base_implicitly
1646 return (Left lbl, False)
1647 ForeignTarget expr _ -> do
1648 uses_pic_base_implicitly
1649 return (Right expr, False)
1650 PrimTarget mop -> outOfLineMachOp mop
1651
1652 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
1653 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
1654
1655 case labelOrExpr of
1656 Left lbl -> do -- the linker does all the work for us
1657 return ( codeBefore
1658 `snocOL` BL lbl usedRegs
1659 `appOL` maybeNOP -- some ABI require a NOP after BL
1660 `appOL` codeAfter)
1661 Right dyn -> do -- implement call through function pointer
1662 (dynReg, dynCode) <- getSomeReg dyn
1663 case gcp of
1664 GCPLinux64ELF 1 -> return ( dynCode
1665 `appOL` codeBefore
1666 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 40))
1667 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0))
1668 `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8))
1669 `snocOL` MTCTR r11
1670 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16))
1671 `snocOL` BCTRL usedRegs
1672 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 40))
1673 `appOL` codeAfter)
1674 GCPLinux64ELF 2 -> return ( dynCode
1675 `appOL` codeBefore
1676 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 24))
1677 `snocOL` MR r12 dynReg
1678 `snocOL` MTCTR r12
1679 `snocOL` BCTRL usedRegs
1680 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 24))
1681 `appOL` codeAfter)
1682 GCPAIX -> return ( dynCode
1683 -- AIX/XCOFF follows the PowerOPEN ABI
1684 -- which is quite similiar to LinuxPPC64/ELFv1
1685 `appOL` codeBefore
1686 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 20))
1687 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 0))
1688 `snocOL` LD II32 toc (AddrRegImm dynReg (ImmInt 4))
1689 `snocOL` MTCTR r11
1690 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 8))
1691 `snocOL` BCTRL usedRegs
1692 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 20))
1693 `appOL` codeAfter)
1694 _ -> return ( dynCode
1695 `snocOL` MTCTR dynReg
1696 `appOL` codeBefore
1697 `snocOL` BCTRL usedRegs
1698 `appOL` codeAfter)
1699 where
1700 platform = targetPlatform dflags
1701
1702 uses_pic_base_implicitly = do
1703 -- See Note [implicit register in PPC PIC code]
1704 -- on why we claim to use PIC register here
1705 when (positionIndependent dflags && target32Bit platform) $ do
1706 _ <- getPicBaseNat $ archWordFormat True
1707 return ()
1708
1709 initialStackOffset = case gcp of
1710 GCPAIX -> 24
1711 GCPDarwin -> 24
1712 GCPLinux -> 8
1713 GCPLinux64ELF 1 -> 48
1714 GCPLinux64ELF 2 -> 32
1715 _ -> panic "genCall': unknown calling convention"
1716 -- size of linkage area + size of arguments, in bytes
1717 stackDelta finalStack = case gcp of
1718 GCPAIX ->
1719 roundTo 16 $ (24 +) $ max 32 $ sum $
1720 map (widthInBytes . typeWidth) argReps
1721 GCPDarwin ->
1722 roundTo 16 $ (24 +) $ max 32 $ sum $
1723 map (widthInBytes . typeWidth) argReps
1724 GCPLinux -> roundTo 16 finalStack
1725 GCPLinux64ELF 1 ->
1726 roundTo 16 $ (48 +) $ max 64 $ sum $
1727 map (roundTo 8 . widthInBytes . typeWidth)
1728 argReps
1729 GCPLinux64ELF 2 ->
1730 roundTo 16 $ (32 +) $ max 64 $ sum $
1731 map (roundTo 8 . widthInBytes . typeWidth)
1732 argReps
1733 _ -> panic "genCall': unknown calling conv."
1734
1735 argReps = map (cmmExprType dflags) args
1736
1737 roundTo a x | x `mod` a == 0 = x
1738 | otherwise = x + a - (x `mod` a)
1739
1740 spFormat = if target32Bit platform then II32 else II64
1741
1742 -- TODO: Do not create a new stack frame if delta is too large.
1743 move_sp_down finalStack
1744 | delta > stackFrameHeaderSize dflags =
1745 toOL [STU spFormat sp (AddrRegImm sp (ImmInt (-delta))),
1746 DELTA (-delta)]
1747 | otherwise = nilOL
1748 where delta = stackDelta finalStack
1749 move_sp_up finalStack
1750 | delta > stackFrameHeaderSize dflags =
1751 toOL [ADD sp sp (RIImm (ImmInt delta)),
1752 DELTA 0]
1753 | otherwise = nilOL
1754 where delta = stackDelta finalStack
1755
1756 -- A NOP instruction is required after a call (bl instruction)
1757 -- on AIX and 64-Bit Linux.
1758 -- If the call is to a function with a different TOC (r2) the
1759 -- link editor replaces the NOP instruction with a load of the TOC
1760 -- from the stack to restore the TOC.
1761 maybeNOP = case gcp of
1762 -- See Section 3.9.4 of OpenPower ABI
1763 GCPAIX -> unitOL NOP
1764 -- See Section 3.5.11 of PPC64 ELF v1.9
1765 GCPLinux64ELF 1 -> unitOL NOP
1766 -- See Section 2.3.6 of PPC64 ELF v2
1767 GCPLinux64ELF 2 -> unitOL NOP
1768 _ -> nilOL
1769
1770 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
1771 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
1772 accumCode accumUsed | isWord64 arg_ty
1773 && target32Bit (targetPlatform dflags) =
1774 do
1775 ChildCode64 code vr_lo <- iselExpr64 arg
1776 let vr_hi = getHiVRegFromLo vr_lo
1777
1778 case gcp of
1779 GCPAIX -> -- same as for Darwin
1780 do let storeWord vr (gpr:_) _ = MR gpr vr
1781 storeWord vr [] offset
1782 = ST II32 vr (AddrRegImm sp (ImmInt offset))
1783 passArguments args
1784 (drop 2 gprs)
1785 fprs
1786 (stackOffset+8)
1787 (accumCode `appOL` code
1788 `snocOL` storeWord vr_hi gprs stackOffset
1789 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1790 ((take 2 gprs) ++ accumUsed)
1791 GCPDarwin ->
1792 do let storeWord vr (gpr:_) _ = MR gpr vr
1793 storeWord vr [] offset
1794 = ST II32 vr (AddrRegImm sp (ImmInt offset))
1795 passArguments args
1796 (drop 2 gprs)
1797 fprs
1798 (stackOffset+8)
1799 (accumCode `appOL` code
1800 `snocOL` storeWord vr_hi gprs stackOffset
1801 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1802 ((take 2 gprs) ++ accumUsed)
1803 GCPLinux ->
1804 do let stackOffset' = roundTo 8 stackOffset
1805 stackCode = accumCode `appOL` code
1806 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
1807 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
1808 regCode hireg loreg =
1809 accumCode `appOL` code
1810 `snocOL` MR hireg vr_hi
1811 `snocOL` MR loreg vr_lo
1812
1813 case gprs of
1814 hireg : loreg : regs | even (length gprs) ->
1815 passArguments args regs fprs stackOffset
1816 (regCode hireg loreg) (hireg : loreg : accumUsed)
1817 _skipped : hireg : loreg : regs ->
1818 passArguments args regs fprs stackOffset
1819 (regCode hireg loreg) (hireg : loreg : accumUsed)
1820 _ -> -- only one or no regs left
1821 passArguments args [] fprs (stackOffset'+8)
1822 stackCode accumUsed
1823 GCPLinux64ELF _ -> panic "passArguments: 32 bit code"
1824
1825 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
1826 | reg : _ <- regs = do
1827 register <- getRegister arg
1828 let code = case register of
1829 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1830 Any _ acode -> acode reg
1831 stackOffsetRes = case gcp of
1832 -- The Darwin ABI requires that we reserve
1833 -- stack slots for register parameters
1834 GCPDarwin -> stackOffset + stackBytes
1835 -- ... so does the PowerOpen ABI.
1836 GCPAIX -> stackOffset + stackBytes
1837 -- ... the SysV ABI 32-bit doesn't.
1838 GCPLinux -> stackOffset
1839 -- ... but SysV ABI 64-bit does.
1840 GCPLinux64ELF _ -> stackOffset + stackBytes
1841 passArguments args
1842 (drop nGprs gprs)
1843 (drop nFprs fprs)
1844 stackOffsetRes
1845 (accumCode `appOL` code)
1846 (reg : accumUsed)
1847 | otherwise = do
1848 (vr, code) <- getSomeReg arg
1849 passArguments args
1850 (drop nGprs gprs)
1851 (drop nFprs fprs)
1852 (stackOffset' + stackBytes)
1853 (accumCode `appOL` code `snocOL` ST (cmmTypeFormat rep) vr stackSlot)
1854 accumUsed
1855 where
1856 stackOffset' = case gcp of
1857 GCPDarwin ->
1858 -- stackOffset is at least 4-byte aligned
1859 -- The Darwin ABI is happy with that.
1860 stackOffset
1861 GCPAIX ->
1862 -- The 32bit PowerOPEN ABI is happy with
1863 -- 32bit-alignment as well...
1864 stackOffset
1865 GCPLinux
1866 -- ... the SysV ABI requires 8-byte
1867 -- alignment for doubles.
1868 | isFloatType rep && typeWidth rep == W64 ->
1869 roundTo 8 stackOffset
1870 | otherwise ->
1871 stackOffset
1872 GCPLinux64ELF _ ->
1873 -- Everything on the stack is mapped to
1874 -- 8-byte aligned doublewords
1875 stackOffset
1876 stackOffset''
1877 | isFloatType rep && typeWidth rep == W32 =
1878 case gcp of
1879 -- The ELF v1 ABI Section 3.2.3 requires:
1880 -- "Single precision floating point values
1881 -- are mapped to the second word in a single
1882 -- doubleword"
1883 GCPLinux64ELF 1 -> stackOffset' + 4
1884 _ -> stackOffset'
1885 | otherwise = stackOffset'
1886
1887 stackSlot = AddrRegImm sp (ImmInt stackOffset'')
1888 (nGprs, nFprs, stackBytes, regs)
1889 = case gcp of
1890 GCPAIX ->
1891 case cmmTypeFormat rep of
1892 II8 -> (1, 0, 4, gprs)
1893 II16 -> (1, 0, 4, gprs)
1894 II32 -> (1, 0, 4, gprs)
1895 -- The PowerOpen ABI requires that we skip a
1896 -- corresponding number of GPRs when we use
1897 -- the FPRs.
1898 --
1899 -- E.g. for a `double` two GPRs are skipped,
1900 -- whereas for a `float` one GPR is skipped
1901 -- when parameters are assigned to
1902 -- registers.
1903 --
1904 -- The PowerOpen ABI specification can be found at
1905 -- ftp://www.sourceware.org/pub/binutils/ppc-docs/ppc-poweropen/
1906 FF32 -> (1, 1, 4, fprs)
1907 FF64 -> (2, 1, 8, fprs)
1908 II64 -> panic "genCCall' passArguments II64"
1909 FF80 -> panic "genCCall' passArguments FF80"
1910 GCPDarwin ->
1911 case cmmTypeFormat rep of
1912 II8 -> (1, 0, 4, gprs)
1913 II16 -> (1, 0, 4, gprs)
1914 II32 -> (1, 0, 4, gprs)
1915 -- The Darwin ABI requires that we skip a
1916 -- corresponding number of GPRs when we use
1917 -- the FPRs.
1918 FF32 -> (1, 1, 4, fprs)
1919 FF64 -> (2, 1, 8, fprs)
1920 II64 -> panic "genCCall' passArguments II64"
1921 FF80 -> panic "genCCall' passArguments FF80"
1922 GCPLinux ->
1923 case cmmTypeFormat rep of
1924 II8 -> (1, 0, 4, gprs)
1925 II16 -> (1, 0, 4, gprs)
1926 II32 -> (1, 0, 4, gprs)
1927 -- ... the SysV ABI doesn't.
1928 FF32 -> (0, 1, 4, fprs)
1929 FF64 -> (0, 1, 8, fprs)
1930 II64 -> panic "genCCall' passArguments II64"
1931 FF80 -> panic "genCCall' passArguments FF80"
1932 GCPLinux64ELF _ ->
1933 case cmmTypeFormat rep of
1934 II8 -> (1, 0, 8, gprs)
1935 II16 -> (1, 0, 8, gprs)
1936 II32 -> (1, 0, 8, gprs)
1937 II64 -> (1, 0, 8, gprs)
1938 -- The ELFv1 ABI requires that we skip a
1939 -- corresponding number of GPRs when we use
1940 -- the FPRs.
1941 FF32 -> (1, 1, 8, fprs)
1942 FF64 -> (1, 1, 8, fprs)
1943 FF80 -> panic "genCCall' passArguments FF80"
1944
1945 moveResult reduceToFF32 =
1946 case dest_regs of
1947 [] -> nilOL
1948 [dest]
1949 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
1950 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1951 | isWord64 rep && target32Bit (targetPlatform dflags)
1952 -> toOL [MR (getHiVRegFromLo r_dest) r3,
1953 MR r_dest r4]
1954 | otherwise -> unitOL (MR r_dest r3)
1955 where rep = cmmRegType dflags (CmmLocal dest)
1956 r_dest = getRegisterReg platform (CmmLocal dest)
1957 _ -> panic "genCCall' moveResult: Bad dest_regs"
1958
1959 outOfLineMachOp mop =
1960 do
1961 dflags <- getDynFlags
1962 mopExpr <- cmmMakeDynamicReference dflags CallReference $
1963 mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
1964 let mopLabelOrExpr = case mopExpr of
1965 CmmLit (CmmLabel lbl) -> Left lbl
1966 _ -> Right mopExpr
1967 return (mopLabelOrExpr, reduce)
1968 where
1969 (functionName, reduce) = case mop of
1970 MO_F32_Exp -> (fsLit "exp", True)
1971 MO_F32_Log -> (fsLit "log", True)
1972 MO_F32_Sqrt -> (fsLit "sqrt", True)
1973 MO_F32_Fabs -> unsupported
1974
1975 MO_F32_Sin -> (fsLit "sin", True)
1976 MO_F32_Cos -> (fsLit "cos", True)
1977 MO_F32_Tan -> (fsLit "tan", True)
1978
1979 MO_F32_Asin -> (fsLit "asin", True)
1980 MO_F32_Acos -> (fsLit "acos", True)
1981 MO_F32_Atan -> (fsLit "atan", True)
1982
1983 MO_F32_Sinh -> (fsLit "sinh", True)
1984 MO_F32_Cosh -> (fsLit "cosh", True)
1985 MO_F32_Tanh -> (fsLit "tanh", True)
1986 MO_F32_Pwr -> (fsLit "pow", True)
1987
1988 MO_F64_Exp -> (fsLit "exp", False)
1989 MO_F64_Log -> (fsLit "log", False)
1990 MO_F64_Sqrt -> (fsLit "sqrt", False)
1991 MO_F64_Fabs -> unsupported
1992
1993 MO_F64_Sin -> (fsLit "sin", False)
1994 MO_F64_Cos -> (fsLit "cos", False)
1995 MO_F64_Tan -> (fsLit "tan", False)
1996
1997 MO_F64_Asin -> (fsLit "asin", False)
1998 MO_F64_Acos -> (fsLit "acos", False)
1999 MO_F64_Atan -> (fsLit "atan", False)
2000
2001 MO_F64_Sinh -> (fsLit "sinh", False)
2002 MO_F64_Cosh -> (fsLit "cosh", False)
2003 MO_F64_Tanh -> (fsLit "tanh", False)
2004 MO_F64_Pwr -> (fsLit "pow", False)
2005
2006 MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False)
2007
2008 MO_Memcpy _ -> (fsLit "memcpy", False)
2009 MO_Memset _ -> (fsLit "memset", False)
2010 MO_Memmove _ -> (fsLit "memmove", False)
2011 MO_Memcmp _ -> (fsLit "memcmp", False)
2012
2013 MO_BSwap w -> (fsLit $ bSwapLabel w, False)
2014 MO_PopCnt w -> (fsLit $ popCntLabel w, False)
2015 MO_Pdep w -> (fsLit $ pdepLabel w, False)
2016 MO_Pext w -> (fsLit $ pextLabel w, False)
2017 MO_Clz _ -> unsupported
2018 MO_Ctz _ -> unsupported
2019 MO_AtomicRMW {} -> unsupported
2020 MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
2021 MO_AtomicRead _ -> unsupported
2022 MO_AtomicWrite _ -> unsupported
2023
2024 MO_S_QuotRem {} -> unsupported
2025 MO_U_QuotRem {} -> unsupported
2026 MO_U_QuotRem2 {} -> unsupported
2027 MO_Add2 {} -> unsupported
2028 MO_SubWordC {} -> unsupported
2029 MO_AddIntC {} -> unsupported
2030 MO_SubIntC {} -> unsupported
2031 MO_U_Mul2 {} -> unsupported
2032 MO_WriteBarrier -> unsupported
2033 MO_Touch -> unsupported
2034 MO_Prefetch_Data _ -> unsupported
2035 unsupported = panic ("outOfLineCmmOp: " ++ show mop
2036 ++ " not supported")
2037
2038 -- -----------------------------------------------------------------------------
2039 -- Generating a table-branch
2040
2041 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
2042 genSwitch dflags expr targets
2043 | OSAIX <- platformOS (targetPlatform dflags)
2044 = do
2045 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2046 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
2047 sha = if target32Bit $ targetPlatform dflags then 2 else 3
2048 tmp <- getNewRegNat fmt
2049 lbl <- getNewLabelNat
2050 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2051 (tableReg,t_code) <- getSomeReg $ dynRef
2052 let code = e_code `appOL` t_code `appOL` toOL [
2053 SL fmt tmp reg (RIImm (ImmInt sha)),
2054 LD fmt tmp (AddrRegReg tableReg tmp),
2055 MTCTR tmp,
2056 BCTR ids (Just lbl)
2057 ]
2058 return code
2059
2060 | (positionIndependent dflags) || (not $ target32Bit $ targetPlatform dflags)
2061 = do
2062 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2063 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
2064 sha = if target32Bit $ targetPlatform dflags then 2 else 3
2065 tmp <- getNewRegNat fmt
2066 lbl <- getNewLabelNat
2067 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2068 (tableReg,t_code) <- getSomeReg $ dynRef
2069 let code = e_code `appOL` t_code `appOL` toOL [
2070 SL fmt tmp reg (RIImm (ImmInt sha)),
2071 LD fmt tmp (AddrRegReg tableReg tmp),
2072 ADD tmp tmp (RIReg tableReg),
2073 MTCTR tmp,
2074 BCTR ids (Just lbl)
2075 ]
2076 return code
2077 | otherwise
2078 = do
2079 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2080 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
2081 sha = if target32Bit $ targetPlatform dflags then 2 else 3
2082 tmp <- getNewRegNat fmt
2083 lbl <- getNewLabelNat
2084 let code = e_code `appOL` toOL [
2085 SL fmt tmp reg (RIImm (ImmInt sha)),
2086 ADDIS tmp tmp (HA (ImmCLbl lbl)),
2087 LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
2088 MTCTR tmp,
2089 BCTR ids (Just lbl)
2090 ]
2091 return code
2092 where (offset, ids) = switchTargetsToTable targets
2093
2094 generateJumpTableForInstr :: DynFlags -> Instr
2095 -> Maybe (NatCmmDecl CmmStatics Instr)
2096 generateJumpTableForInstr dflags (BCTR ids (Just lbl)) =
2097 let jumpTable
2098 | (positionIndependent dflags)
2099 || (not $ target32Bit $ targetPlatform dflags)
2100 = map jumpTableEntryRel ids
2101 | otherwise = map (jumpTableEntry dflags) ids
2102 where jumpTableEntryRel Nothing
2103 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
2104 jumpTableEntryRel (Just blockid)
2105 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
2106 where blockLabel = blockLbl blockid
2107 in Just (CmmData (Section ReadOnlyData lbl) (Statics lbl jumpTable))
2108 generateJumpTableForInstr _ _ = Nothing
2109
2110 -- -----------------------------------------------------------------------------
2111 -- 'condIntReg' and 'condFltReg': condition codes into registers
2112
2113 -- Turn those condition codes into integers now (when they appear on
2114 -- the right hand side of an assignment).
2115
2116 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2117
2118 condReg :: NatM CondCode -> NatM Register
2119 condReg getCond = do
2120 CondCode _ cond cond_code <- getCond
2121 dflags <- getDynFlags
2122 let
2123 code dst = cond_code
2124 `appOL` negate_code
2125 `appOL` toOL [
2126 MFCR dst,
2127 RLWINM dst dst (bit + 1) 31 31
2128 ]
2129
2130 negate_code | do_negate = unitOL (CRNOR bit bit bit)
2131 | otherwise = nilOL
2132
2133 (bit, do_negate) = case cond of
2134 LTT -> (0, False)
2135 LE -> (1, True)
2136 EQQ -> (2, False)
2137 GE -> (0, True)
2138 GTT -> (1, False)
2139
2140 NE -> (2, True)
2141
2142 LU -> (0, False)
2143 LEU -> (1, True)
2144 GEU -> (0, True)
2145 GU -> (1, False)
2146 _ -> panic "PPC.CodeGen.codeReg: no match"
2147
2148 format = archWordFormat $ target32Bit $ targetPlatform dflags
2149 return (Any format code)
2150
2151 condIntReg cond x y = condReg (condIntCode cond x y)
2152 condFltReg cond x y = condReg (condFltCode cond x y)
2153
2154
2155
2156 -- -----------------------------------------------------------------------------
2157 -- 'trivial*Code': deal with trivial instructions
2158
2159 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2160 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2161 -- Only look for constants on the right hand side, because that's
2162 -- where the generic optimizer will have put them.
2163
2164 -- Similarly, for unary instructions, we don't have to worry about
2165 -- matching an StInt as the argument, because genericOpt will already
2166 -- have handled the constant-folding.
2167
2168
2169
2170 {-
2171 Wolfgang's PowerPC version of The Rules:
2172
2173 A slightly modified version of The Rules to take advantage of the fact
2174 that PowerPC instructions work on all registers and don't implicitly
2175 clobber any fixed registers.
2176
2177 * The only expression for which getRegister returns Fixed is (CmmReg reg).
2178
2179 * If getRegister returns Any, then the code it generates may modify only:
2180 (a) fresh temporaries
2181 (b) the destination register
2182 It may *not* modify global registers, unless the global
2183 register happens to be the destination register.
2184 It may not clobber any other registers. In fact, only ccalls clobber any
2185 fixed registers.
2186 Also, it may not modify the counter register (used by genCCall).
2187
2188 Corollary: If a getRegister for a subexpression returns Fixed, you need
2189 not move it to a fresh temporary before evaluating the next subexpression.
2190 The Fixed register won't be modified.
2191 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
2192
2193 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
2194 the value of the destination register.
2195 -}
2196
2197 trivialCode
2198 :: Width
2199 -> Bool
2200 -> (Reg -> Reg -> RI -> Instr)
2201 -> CmmExpr
2202 -> CmmExpr
2203 -> NatM Register
2204
2205 trivialCode rep signed instr x (CmmLit (CmmInt y _))
2206 | Just imm <- makeImmediate rep signed y
2207 = do
2208 (src1, code1) <- getSomeReg x
2209 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
2210 return (Any (intFormat rep) code)
2211
2212 trivialCode rep _ instr x y = do
2213 (src1, code1) <- getSomeReg x
2214 (src2, code2) <- getSomeReg y
2215 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
2216 return (Any (intFormat rep) code)
2217
2218 shiftMulCode
2219 :: Width
2220 -> Bool
2221 -> (Format-> Reg -> Reg -> RI -> Instr)
2222 -> CmmExpr
2223 -> CmmExpr
2224 -> NatM Register
2225 shiftMulCode width sign instr x (CmmLit (CmmInt y _))
2226 | Just imm <- makeImmediate width sign y
2227 = do
2228 (src1, code1) <- getSomeReg x
2229 let format = intFormat width
2230 let code dst = code1 `snocOL` instr format dst src1 (RIImm imm)
2231 return (Any format code)
2232
2233 shiftMulCode width _ instr x y = do
2234 (src1, code1) <- getSomeReg x
2235 (src2, code2) <- getSomeReg y
2236 let format = intFormat width
2237 let code dst = code1 `appOL` code2 `snocOL` instr format dst src1 (RIReg src2)
2238 return (Any format code)
2239
2240 trivialCodeNoImm' :: Format -> (Reg -> Reg -> Reg -> Instr)
2241 -> CmmExpr -> CmmExpr -> NatM Register
2242 trivialCodeNoImm' format instr x y = do
2243 (src1, code1) <- getSomeReg x
2244 (src2, code2) <- getSomeReg y
2245 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
2246 return (Any format code)
2247
2248 trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr)
2249 -> CmmExpr -> CmmExpr -> NatM Register
2250 trivialCodeNoImm format instr x y = trivialCodeNoImm' format (instr format) x y
2251
2252 trivialCodeNoImmSign :: Format -> Bool
2253 -> (Format -> Bool -> Reg -> Reg -> Reg -> Instr)
2254 -> CmmExpr -> CmmExpr -> NatM Register
2255 trivialCodeNoImmSign format sgn instr x y
2256 = trivialCodeNoImm' format (instr format sgn) x y
2257
2258
2259 trivialUCode
2260 :: Format
2261 -> (Reg -> Reg -> Instr)
2262 -> CmmExpr
2263 -> NatM Register
2264 trivialUCode rep instr x = do
2265 (src, code) <- getSomeReg x
2266 let code' dst = code `snocOL` instr dst src
2267 return (Any rep code')
2268
2269 -- There is no "remainder" instruction on the PPC, so we have to do
2270 -- it the hard way.
2271 -- The "sgn" parameter is the signedness for the division instruction
2272
2273 remainderCode :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register
2274 remainderCode rep sgn x y = do
2275 let fmt = intFormat rep
2276 (src1, code1) <- getSomeReg x
2277 (src2, code2) <- getSomeReg y
2278 let code dst = code1 `appOL` code2 `appOL` toOL [
2279 DIV fmt sgn dst src1 src2,
2280 MULL fmt dst dst (RIReg src2),
2281 SUBF dst dst src1
2282 ]
2283 return (Any (intFormat rep) code)
2284
2285 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2286 coerceInt2FP fromRep toRep x = do
2287 dflags <- getDynFlags
2288 let arch = platformArch $ targetPlatform dflags
2289 coerceInt2FP' arch fromRep toRep x
2290
2291 coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
2292 coerceInt2FP' ArchPPC fromRep toRep x = do
2293 (src, code) <- getSomeReg x
2294 lbl <- getNewLabelNat
2295 itmp <- getNewRegNat II32
2296 ftmp <- getNewRegNat FF64
2297 dflags <- getDynFlags
2298 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2299 Amode addr addr_code <- getAmode D dynRef
2300 let
2301 code' dst = code `appOL` maybe_exts `appOL` toOL [
2302 LDATA (Section ReadOnlyData lbl) $ Statics lbl
2303 [CmmStaticLit (CmmInt 0x43300000 W32),
2304 CmmStaticLit (CmmInt 0x80000000 W32)],
2305 XORIS itmp src (ImmInt 0x8000),
2306 ST II32 itmp (spRel dflags 3),
2307 LIS itmp (ImmInt 0x4330),
2308 ST II32 itmp (spRel dflags 2),
2309 LD FF64 ftmp (spRel dflags 2)
2310 ] `appOL` addr_code `appOL` toOL [
2311 LD FF64 dst addr,
2312 FSUB FF64 dst ftmp dst
2313 ] `appOL` maybe_frsp dst
2314
2315 maybe_exts = case fromRep of
2316 W8 -> unitOL $ EXTS II8 src src
2317 W16 -> unitOL $ EXTS II16 src src
2318 W32 -> nilOL
2319 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2320
2321 maybe_frsp dst
2322 = case toRep of
2323 W32 -> unitOL $ FRSP dst dst
2324 W64 -> nilOL
2325 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2326
2327 return (Any (floatFormat toRep) code')
2328
2329 -- On an ELF v1 Linux we use the compiler doubleword in the stack frame
2330 -- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only
2331 -- set right before a call and restored right after return from the call.
2332 -- So it is fine.
2333 coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do
2334 (src, code) <- getSomeReg x
2335 dflags <- getDynFlags
2336 let
2337 code' dst = code `appOL` maybe_exts `appOL` toOL [
2338 ST II64 src (spRel dflags 3),
2339 LD FF64 dst (spRel dflags 3),
2340 FCFID dst dst
2341 ] `appOL` maybe_frsp dst
2342
2343 maybe_exts = case fromRep of
2344 W8 -> unitOL $ EXTS II8 src src
2345 W16 -> unitOL $ EXTS II16 src src
2346 W32 -> unitOL $ EXTS II32 src src
2347 W64 -> nilOL
2348 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2349
2350 maybe_frsp dst
2351 = case toRep of
2352 W32 -> unitOL $ FRSP dst dst
2353 W64 -> nilOL
2354 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
2355
2356 return (Any (floatFormat toRep) code')
2357
2358 coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch"
2359
2360
2361 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2362 coerceFP2Int fromRep toRep x = do
2363 dflags <- getDynFlags
2364 let arch = platformArch $ targetPlatform dflags
2365 coerceFP2Int' arch fromRep toRep x
2366
2367 coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
2368 coerceFP2Int' ArchPPC _ toRep x = do
2369 dflags <- getDynFlags
2370 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
2371 (src, code) <- getSomeReg x
2372 tmp <- getNewRegNat FF64
2373 let
2374 code' dst = code `appOL` toOL [
2375 -- convert to int in FP reg
2376 FCTIWZ tmp src,
2377 -- store value (64bit) from FP to stack
2378 ST FF64 tmp (spRel dflags 2),
2379 -- read low word of value (high word is undefined)
2380 LD II32 dst (spRel dflags 3)]
2381 return (Any (intFormat toRep) code')
2382
2383 coerceFP2Int' (ArchPPC_64 _) _ toRep x = do
2384 dflags <- getDynFlags
2385 -- the reps don't really matter: F*->FF64 and II64->I* are no-ops
2386 (src, code) <- getSomeReg x
2387 tmp <- getNewRegNat FF64
2388 let
2389 code' dst = code `appOL` toOL [
2390 -- convert to int in FP reg
2391 FCTIDZ tmp src,
2392 -- store value (64bit) from FP to compiler word on stack
2393 ST FF64 tmp (spRel dflags 3),
2394 LD II64 dst (spRel dflags 3)]
2395 return (Any (intFormat toRep) code')
2396
2397 coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch"
2398
2399 -- Note [.LCTOC1 in PPC PIC code]
2400 -- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
2401 -- to make the most of the PPC's 16-bit displacements.
2402 -- As 16-bit signed offset is used (usually via addi/lwz instructions)
2403 -- first element will have '-32768' offset against .LCTOC1.
2404
2405 -- Note [implicit register in PPC PIC code]
2406 -- PPC generates calls by labels in assembly
2407 -- in form of:
2408 -- bl puts+32768@plt
2409 -- in this form it's not seen directly (by GHC NCG)
2410 -- that r30 (PicBaseReg) is used,
2411 -- but r30 is a required part of PLT code setup:
2412 -- puts+32768@plt:
2413 -- lwz r11,-30484(r30) ; offset in .LCTOC1
2414 -- mtctr r11
2415 -- bctr