Add NCG support for AIX/ppc32
[ghc.git] / compiler / nativeGen / PPC / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2004
8 --
9 -----------------------------------------------------------------------------
10
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, (b) the type signatures, and
13 -- (c) the #if blah_TARGET_ARCH} things, the
14 -- structure should not be too overwhelming.
15
16 module PPC.CodeGen (
17 cmmTopCodeGen,
18 generateJumpTableForInstr,
19 InstrBlock
20 )
21
22 where
23
24 #include "HsVersions.h"
25 #include "nativeGen/NCG.h"
26 #include "../includes/MachDeps.h"
27
28 -- NCG stuff:
29 import CodeGen.Platform
30 import PPC.Instr
31 import PPC.Cond
32 import PPC.Regs
33 import CPrim
34 import NCGMonad
35 import Instruction
36 import PIC
37 import Format
38 import RegClass
39 import Reg
40 import TargetReg
41 import Platform
42
43 -- Our intermediate code:
44 import BlockId
45 import PprCmm ( pprExpr )
46 import Cmm
47 import CmmUtils
48 import CmmSwitch
49 import CLabel
50 import Hoopl
51
52 -- The rest:
53 import OrdList
54 import Outputable
55 import Unique
56 import DynFlags
57
58 import Control.Monad ( mapAndUnzipM, when )
59 import Data.Bits
60 import Data.Word
61
62 import BasicTypes
63 import FastString
64 import Util
65
66 -- -----------------------------------------------------------------------------
67 -- Top-level of the instruction selector
68
69 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
70 -- They are really trees of insns to facilitate fast appending, where a
71 -- left-to-right traversal (pre-order?) yields the insns in the correct
72 -- order.
73
74 cmmTopCodeGen
75 :: RawCmmDecl
76 -> NatM [NatCmmDecl CmmStatics Instr]
77
78 cmmTopCodeGen (CmmProc info lab live graph) = do
79 let blocks = toBlockListEntryFirst graph
80 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
81 dflags <- getDynFlags
82 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
83 tops = proc : concat statics
84 os = platformOS $ targetPlatform dflags
85 arch = platformArch $ targetPlatform dflags
86 case arch of
87 ArchPPC | os == OSAIX -> return tops
88 | otherwise -> do
89 picBaseMb <- getPicBaseMaybeNat
90 case picBaseMb of
91 Just picBase -> initializePicBase_ppc arch os picBase tops
92 Nothing -> return tops
93 ArchPPC_64 ELF_V1 -> return tops
94 -- generating function descriptor is handled in
95 -- pretty printer
96 ArchPPC_64 ELF_V2 -> return tops
97 -- generating function prologue is handled in
98 -- pretty printer
99 _ -> panic "PPC.cmmTopCodeGen: unknown arch"
100
101 cmmTopCodeGen (CmmData sec dat) = do
102 return [CmmData sec dat] -- no translation, we just use CmmStatic
103
104 basicBlockCodeGen
105 :: Block CmmNode C C
106 -> NatM ( [NatBasicBlock Instr]
107 , [NatCmmDecl CmmStatics Instr])
108
109 basicBlockCodeGen block = do
110 let (_, nodes, tail) = blockSplit block
111 id = entryLabel block
112 stmts = blockToList nodes
113 mid_instrs <- stmtsToInstrs stmts
114 tail_instrs <- stmtToInstrs tail
115 let instrs = mid_instrs `appOL` tail_instrs
116 -- code generation may introduce new basic block boundaries, which
117 -- are indicated by the NEWBLOCK instruction. We must split up the
118 -- instruction stream into basic blocks again. Also, we extract
119 -- LDATAs here too.
120 let
121 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
122
123 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
124 = ([], BasicBlock id instrs : blocks, statics)
125 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
126 = (instrs, blocks, CmmData sec dat:statics)
127 mkBlocks instr (instrs,blocks,statics)
128 = (instr:instrs, blocks, statics)
129 return (BasicBlock id top : other_blocks, statics)
130
131 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
132 stmtsToInstrs stmts
133 = do instrss <- mapM stmtToInstrs stmts
134 return (concatOL instrss)
135
136 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
137 stmtToInstrs stmt = do
138 dflags <- getDynFlags
139 case stmt of
140 CmmComment s -> return (unitOL (COMMENT s))
141 CmmTick {} -> return nilOL
142 CmmUnwind {} -> return nilOL
143
144 CmmAssign reg src
145 | isFloatType ty -> assignReg_FltCode format reg src
146 | target32Bit (targetPlatform dflags) &&
147 isWord64 ty -> assignReg_I64Code reg src
148 | otherwise -> assignReg_IntCode format reg src
149 where ty = cmmRegType dflags reg
150 format = cmmTypeFormat ty
151
152 CmmStore addr src
153 | isFloatType ty -> assignMem_FltCode format addr src
154 | target32Bit (targetPlatform dflags) &&
155 isWord64 ty -> assignMem_I64Code addr src
156 | otherwise -> assignMem_IntCode format addr src
157 where ty = cmmExprType dflags src
158 format = cmmTypeFormat ty
159
160 CmmUnsafeForeignCall target result_regs args
161 -> genCCall target result_regs args
162
163 CmmBranch id -> genBranch id
164 CmmCondBranch arg true false _ -> do
165 b1 <- genCondJump true arg
166 b2 <- genBranch false
167 return (b1 `appOL` b2)
168 CmmSwitch arg ids -> do dflags <- getDynFlags
169 genSwitch dflags arg ids
170 CmmCall { cml_target = arg } -> genJump arg
171 _ ->
172 panic "stmtToInstrs: statement should have been cps'd away"
173
174
175 --------------------------------------------------------------------------------
176 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
177 -- They are really trees of insns to facilitate fast appending, where a
178 -- left-to-right traversal yields the insns in the correct order.
179 --
180 type InstrBlock
181 = OrdList Instr
182
183
184 -- | Register's passed up the tree. If the stix code forces the register
185 -- to live in a pre-decided machine register, it comes out as @Fixed@;
186 -- otherwise, it comes out as @Any@, and the parent can decide which
187 -- register to put it in.
188 --
189 data Register
190 = Fixed Format Reg InstrBlock
191 | Any Format (Reg -> InstrBlock)
192
193
194 swizzleRegisterRep :: Register -> Format -> Register
195 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
196 swizzleRegisterRep (Any _ codefn) format = Any format codefn
197
198
199 -- | Grab the Reg for a CmmReg
200 getRegisterReg :: Platform -> CmmReg -> Reg
201
202 getRegisterReg _ (CmmLocal (LocalReg u pk))
203 = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
204
205 getRegisterReg platform (CmmGlobal mid)
206 = case globalRegMaybe platform mid of
207 Just reg -> RegReal reg
208 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
209 -- By this stage, the only MagicIds remaining should be the
210 -- ones which map to a real machine register on this
211 -- platform. Hence ...
212
213 -- | Convert a BlockId to some CmmStatic data
214 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
215 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
216 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
217 where blockLabel = mkAsmTempLabel (getUnique blockid)
218
219
220
221 -- -----------------------------------------------------------------------------
222 -- General things for putting together code sequences
223
224 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
225 -- CmmExprs into CmmRegOff?
226 mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr
227 mangleIndexTree dflags (CmmRegOff reg off)
228 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
229 where width = typeWidth (cmmRegType dflags reg)
230
231 mangleIndexTree _ _
232 = panic "PPC.CodeGen.mangleIndexTree: no match"
233
234 -- -----------------------------------------------------------------------------
235 -- Code gen for 64-bit arithmetic on 32-bit platforms
236
237 {-
238 Simple support for generating 64-bit code (ie, 64 bit values and 64
239 bit assignments) on 32-bit platforms. Unlike the main code generator
240 we merely shoot for generating working code as simply as possible, and
241 pay little attention to code quality. Specifically, there is no
242 attempt to deal cleverly with the fixed-vs-floating register
243 distinction; all values are generated into (pairs of) floating
244 registers, even if this would mean some redundant reg-reg moves as a
245 result. Only one of the VRegUniques is returned, since it will be
246 of the VRegUniqueLo form, and the upper-half VReg can be determined
247 by applying getHiVRegFromLo to it.
248 -}
249
250 data ChildCode64 -- a.k.a "Register64"
251 = ChildCode64
252 InstrBlock -- code
253 Reg -- the lower 32-bit temporary which contains the
254 -- result; use getHiVRegFromLo to find the other
255 -- VRegUnique. Rules of this simplified insn
256 -- selection game are therefore that the returned
257 -- Reg may be modified
258
259
260 -- | Compute an expression into a register, but
261 -- we don't mind which one it is.
262 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
263 getSomeReg expr = do
264 r <- getRegister expr
265 case r of
266 Any rep code -> do
267 tmp <- getNewRegNat rep
268 return (tmp, code tmp)
269 Fixed _ reg code ->
270 return (reg, code)
271
272 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
273 getI64Amodes addrTree = do
274 Amode hi_addr addr_code <- getAmode D addrTree
275 case addrOffset hi_addr 4 of
276 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
277 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
278 return (AddrRegImm hi_ptr (ImmInt 0),
279 AddrRegImm hi_ptr (ImmInt 4),
280 code)
281
282
283 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
284 assignMem_I64Code addrTree valueTree = do
285 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
286 ChildCode64 vcode rlo <- iselExpr64 valueTree
287 let
288 rhi = getHiVRegFromLo rlo
289
290 -- Big-endian store
291 mov_hi = ST II32 rhi hi_addr
292 mov_lo = ST II32 rlo lo_addr
293 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
294
295
296 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
297 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
298 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
299 let
300 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
301 r_dst_hi = getHiVRegFromLo r_dst_lo
302 r_src_hi = getHiVRegFromLo r_src_lo
303 mov_lo = MR r_dst_lo r_src_lo
304 mov_hi = MR r_dst_hi r_src_hi
305 return (
306 vcode `snocOL` mov_lo `snocOL` mov_hi
307 )
308
309 assignReg_I64Code _ _
310 = panic "assignReg_I64Code(powerpc): invalid lvalue"
311
312
313 iselExpr64 :: CmmExpr -> NatM ChildCode64
314 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
315 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
316 (rlo, rhi) <- getNewRegPairNat II32
317 let mov_hi = LD II32 rhi hi_addr
318 mov_lo = LD II32 rlo lo_addr
319 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
320 rlo
321
322 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
323 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
324
325 iselExpr64 (CmmLit (CmmInt i _)) = do
326 (rlo,rhi) <- getNewRegPairNat II32
327 let
328 half0 = fromIntegral (fromIntegral i :: Word16)
329 half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
330 half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16)
331 half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16)
332
333 code = toOL [
334 LIS rlo (ImmInt half1),
335 OR rlo rlo (RIImm $ ImmInt half0),
336 LIS rhi (ImmInt half3),
337 OR rhi rhi (RIImm $ ImmInt half2)
338 ]
339 return (ChildCode64 code rlo)
340
341 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
342 ChildCode64 code1 r1lo <- iselExpr64 e1
343 ChildCode64 code2 r2lo <- iselExpr64 e2
344 (rlo,rhi) <- getNewRegPairNat II32
345 let
346 r1hi = getHiVRegFromLo r1lo
347 r2hi = getHiVRegFromLo r2lo
348 code = code1 `appOL`
349 code2 `appOL`
350 toOL [ ADDC rlo r1lo r2lo,
351 ADDE rhi r1hi r2hi ]
352 return (ChildCode64 code rlo)
353
354 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
355 ChildCode64 code1 r1lo <- iselExpr64 e1
356 ChildCode64 code2 r2lo <- iselExpr64 e2
357 (rlo,rhi) <- getNewRegPairNat II32
358 let
359 r1hi = getHiVRegFromLo r1lo
360 r2hi = getHiVRegFromLo r2lo
361 code = code1 `appOL`
362 code2 `appOL`
363 toOL [ SUBFC rlo r2lo r1lo,
364 SUBFE rhi r2hi r1hi ]
365 return (ChildCode64 code rlo)
366
367 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
368 (expr_reg,expr_code) <- getSomeReg expr
369 (rlo, rhi) <- getNewRegPairNat II32
370 let mov_hi = LI rhi (ImmInt 0)
371 mov_lo = MR rlo expr_reg
372 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
373 rlo
374 iselExpr64 expr
375 = pprPanic "iselExpr64(powerpc)" (pprExpr expr)
376
377
378
379 getRegister :: CmmExpr -> NatM Register
380 getRegister e = do dflags <- getDynFlags
381 getRegister' dflags e
382
383 getRegister' :: DynFlags -> CmmExpr -> NatM Register
384
385 getRegister' dflags (CmmReg (CmmGlobal PicBaseReg))
386 | OSAIX <- platformOS (targetPlatform dflags) = do
387 let code dst = toOL [ LD II32 dst tocAddr ]
388 tocAddr = AddrRegImm toc (ImmLit (text "ghc_toc_table[TC]"))
389 return (Any II32 code)
390 | target32Bit (targetPlatform dflags) = do
391 reg <- getPicBaseNat $ archWordFormat (target32Bit (targetPlatform dflags))
392 return (Fixed (archWordFormat (target32Bit (targetPlatform dflags)))
393 reg nilOL)
394 | otherwise = return (Fixed II64 toc nilOL)
395
396 getRegister' dflags (CmmReg reg)
397 = return (Fixed (cmmTypeFormat (cmmRegType dflags reg))
398 (getRegisterReg (targetPlatform dflags) reg) nilOL)
399
400 getRegister' dflags tree@(CmmRegOff _ _)
401 = getRegister' dflags (mangleIndexTree dflags tree)
402
403 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
404 -- TO_W_(x), TO_W_(x >> 32)
405
406 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32)
407 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
408 | target32Bit (targetPlatform dflags) = do
409 ChildCode64 code rlo <- iselExpr64 x
410 return $ Fixed II32 (getHiVRegFromLo rlo) code
411
412 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32)
413 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
414 | target32Bit (targetPlatform dflags) = do
415 ChildCode64 code rlo <- iselExpr64 x
416 return $ Fixed II32 (getHiVRegFromLo rlo) code
417
418 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x])
419 | target32Bit (targetPlatform dflags) = do
420 ChildCode64 code rlo <- iselExpr64 x
421 return $ Fixed II32 rlo code
422
423 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x])
424 | target32Bit (targetPlatform dflags) = do
425 ChildCode64 code rlo <- iselExpr64 x
426 return $ Fixed II32 rlo code
427
428 getRegister' dflags (CmmLoad mem pk)
429 | not (isWord64 pk) = do
430 let platform = targetPlatform dflags
431 Amode addr addr_code <- getAmode D mem
432 let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk)
433 addr_code `snocOL` LD format dst addr
434 return (Any format code)
435 | not (target32Bit (targetPlatform dflags)) = do
436 Amode addr addr_code <- getAmode DS mem
437 let code dst = addr_code `snocOL` LD II64 dst addr
438 return (Any II64 code)
439
440 where format = cmmTypeFormat pk
441
442 -- catch simple cases of zero- or sign-extended load
443 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
444 Amode addr addr_code <- getAmode D mem
445 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
446
447 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
448 Amode addr addr_code <- getAmode D mem
449 return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
450
451 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
452
453 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
454 Amode addr addr_code <- getAmode D mem
455 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
456
457 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
458 Amode addr addr_code <- getAmode D mem
459 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
460
461 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
462 Amode addr addr_code <- getAmode D mem
463 return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
464
465 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
466 Amode addr addr_code <- getAmode D mem
467 return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
468
469 getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
470 Amode addr addr_code <- getAmode D mem
471 return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
472
473 getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
474 Amode addr addr_code <- getAmode D mem
475 return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
476
477 getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
478 = case mop of
479 MO_Not rep -> triv_ucode_int rep NOT
480
481 MO_F_Neg w -> triv_ucode_float w FNEG
482 MO_S_Neg w -> triv_ucode_int w NEG
483
484 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
485 MO_FF_Conv W32 W64 -> conversionNop FF64 x
486
487 MO_FS_Conv from to -> coerceFP2Int from to x
488 MO_SF_Conv from to -> coerceInt2FP from to x
489
490 MO_SS_Conv from to
491 | from == to -> conversionNop (intFormat to) x
492
493 -- narrowing is a nop: we treat the high bits as undefined
494 MO_SS_Conv W64 to
495 | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit int register"
496 | otherwise -> conversionNop (intFormat to) x
497 MO_SS_Conv W32 to
498 | arch32 -> conversionNop (intFormat to) x
499 | otherwise -> case to of
500 W64 -> triv_ucode_int to (EXTS II32)
501 W16 -> conversionNop II16 x
502 W8 -> conversionNop II8 x
503 _ -> panic "PPC.CodeGen.getRegister: no match"
504 MO_SS_Conv W16 W8 -> conversionNop II8 x
505 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
506 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
507
508 MO_UU_Conv from to
509 | from == to -> conversionNop (intFormat to) x
510 -- narrowing is a nop: we treat the high bits as undefined
511 MO_UU_Conv W64 to
512 | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit target"
513 | otherwise -> conversionNop (intFormat to) x
514 MO_UU_Conv W32 to
515 | arch32 -> conversionNop (intFormat to) x
516 | otherwise ->
517 case to of
518 W64 -> trivialCode to False AND x (CmmLit (CmmInt 4294967295 W64))
519 W16 -> conversionNop II16 x
520 W8 -> conversionNop II8 x
521 _ -> panic "PPC.CodeGen.getRegister: no match"
522 MO_UU_Conv W16 W8 -> conversionNop II8 x
523 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
524 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
525 _ -> panic "PPC.CodeGen.getRegister: no match"
526
527 where
528 triv_ucode_int width instr = trivialUCode (intFormat width) instr x
529 triv_ucode_float width instr = trivialUCode (floatFormat width) instr x
530
531 conversionNop new_format expr
532 = do e_code <- getRegister' dflags expr
533 return (swizzleRegisterRep e_code new_format)
534 arch32 = target32Bit $ targetPlatform dflags
535
536 getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps
537 = case mop of
538 MO_F_Eq _ -> condFltReg EQQ x y
539 MO_F_Ne _ -> condFltReg NE x y
540 MO_F_Gt _ -> condFltReg GTT x y
541 MO_F_Ge _ -> condFltReg GE x y
542 MO_F_Lt _ -> condFltReg LTT x y
543 MO_F_Le _ -> condFltReg LE x y
544
545 MO_Eq rep -> condIntReg EQQ (extendUExpr dflags rep x)
546 (extendUExpr dflags rep y)
547 MO_Ne rep -> condIntReg NE (extendUExpr dflags rep x)
548 (extendUExpr dflags rep y)
549
550 MO_S_Gt rep -> condIntReg GTT (extendSExpr dflags rep x)
551 (extendSExpr dflags rep y)
552 MO_S_Ge rep -> condIntReg GE (extendSExpr dflags rep x)
553 (extendSExpr dflags rep y)
554 MO_S_Lt rep -> condIntReg LTT (extendSExpr dflags rep x)
555 (extendSExpr dflags rep y)
556 MO_S_Le rep -> condIntReg LE (extendSExpr dflags rep x)
557 (extendSExpr dflags rep y)
558
559 MO_U_Gt rep -> condIntReg GU (extendUExpr dflags rep x)
560 (extendUExpr dflags rep y)
561 MO_U_Ge rep -> condIntReg GEU (extendUExpr dflags rep x)
562 (extendUExpr dflags rep y)
563 MO_U_Lt rep -> condIntReg LU (extendUExpr dflags rep x)
564 (extendUExpr dflags rep y)
565 MO_U_Le rep -> condIntReg LEU (extendUExpr dflags rep x)
566 (extendUExpr dflags rep y)
567
568 MO_F_Add w -> triv_float w FADD
569 MO_F_Sub w -> triv_float w FSUB
570 MO_F_Mul w -> triv_float w FMUL
571 MO_F_Quot w -> triv_float w FDIV
572
573 -- optimize addition with 32-bit immediate
574 -- (needed for PIC)
575 MO_Add W32 ->
576 case y of
577 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
578 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
579 CmmLit lit
580 -> do
581 (src, srcCode) <- getSomeReg x
582 let imm = litToImm lit
583 code dst = srcCode `appOL` toOL [
584 ADDIS dst src (HA imm),
585 ADD dst dst (RIImm (LO imm))
586 ]
587 return (Any II32 code)
588 _ -> trivialCode W32 True ADD x y
589
590 MO_Add rep -> trivialCode rep True ADD x y
591 MO_Sub rep ->
592 case y of -- subfi ('substract from' with immediate) doesn't exist
593 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
594 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
595 _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
596
597 MO_Mul rep
598 | arch32 -> trivialCode rep True MULLW x y
599 | otherwise -> trivialCode rep True MULLD x y
600
601 MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
602 MO_S_MulMayOflo W64 -> trivialCodeNoImm' II64 MULLD_MayOflo x y
603
604 MO_S_MulMayOflo _ -> panic "S_MulMayOflo: (II8/16) not implemented"
605 MO_U_MulMayOflo _ -> panic "U_MulMayOflo: not implemented"
606
607 MO_S_Quot rep
608 | arch32 -> trivialCodeNoImm' (intFormat rep) DIVW
609 (extendSExpr dflags rep x) (extendSExpr dflags rep y)
610 | otherwise -> trivialCodeNoImm' (intFormat rep) DIVD
611 (extendSExpr dflags rep x) (extendSExpr dflags rep y)
612 MO_U_Quot rep
613 | arch32 -> trivialCodeNoImm' (intFormat rep) DIVWU
614 (extendUExpr dflags rep x) (extendUExpr dflags rep y)
615 | otherwise -> trivialCodeNoImm' (intFormat rep) DIVDU
616 (extendUExpr dflags rep x) (extendUExpr dflags rep y)
617
618 MO_S_Rem rep
619 | arch32 -> remainderCode rep DIVW (extendSExpr dflags rep x)
620 (extendSExpr dflags rep y)
621 | otherwise -> remainderCode rep DIVD (extendSExpr dflags rep x)
622 (extendSExpr dflags rep y)
623 MO_U_Rem rep
624 | arch32 -> remainderCode rep DIVWU (extendSExpr dflags rep x)
625 (extendSExpr dflags rep y)
626 | otherwise -> remainderCode rep DIVDU (extendSExpr dflags rep x)
627 (extendSExpr dflags rep y)
628
629 MO_And rep -> trivialCode rep False AND x y
630 MO_Or rep -> trivialCode rep False OR x y
631 MO_Xor rep -> trivialCode rep False XOR x y
632
633 MO_Shl rep -> shiftCode rep SL x y
634 MO_S_Shr rep -> shiftCode rep SRA (extendSExpr dflags rep x) y
635 MO_U_Shr rep -> shiftCode rep SR (extendUExpr dflags rep x) y
636 _ -> panic "PPC.CodeGen.getRegister: no match"
637
638 where
639 triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register
640 triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y
641
642 arch32 = target32Bit $ targetPlatform dflags
643
644 getRegister' _ (CmmLit (CmmInt i rep))
645 | Just imm <- makeImmediate rep True i
646 = let
647 code dst = unitOL (LI dst imm)
648 in
649 return (Any (intFormat rep) code)
650
651 getRegister' _ (CmmLit (CmmFloat f frep)) = do
652 lbl <- getNewLabelNat
653 dflags <- getDynFlags
654 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
655 Amode addr addr_code <- getAmode D dynRef
656 let format = floatFormat frep
657 code dst =
658 LDATA (Section ReadOnlyData lbl)
659 (Statics lbl [CmmStaticLit (CmmFloat f frep)])
660 `consOL` (addr_code `snocOL` LD format dst addr)
661 return (Any format code)
662
663 getRegister' dflags (CmmLit lit)
664 | target32Bit (targetPlatform dflags)
665 = let rep = cmmLitType dflags lit
666 imm = litToImm lit
667 code dst = toOL [
668 LIS dst (HA imm),
669 ADD dst dst (RIImm (LO imm))
670 ]
671 in return (Any (cmmTypeFormat rep) code)
672 | otherwise
673 = do lbl <- getNewLabelNat
674 dflags <- getDynFlags
675 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
676 Amode addr addr_code <- getAmode D dynRef
677 let rep = cmmLitType dflags lit
678 format = cmmTypeFormat rep
679 code dst =
680 LDATA (Section ReadOnlyData lbl) (Statics lbl [CmmStaticLit lit])
681 `consOL` (addr_code `snocOL` LD format dst addr)
682 return (Any format code)
683
684 getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other)
685
686 -- extend?Rep: wrap integer expression of type rep
687 -- in a conversion to II32 or II64 resp.
688 extendSExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
689 extendSExpr dflags W32 x
690 | target32Bit (targetPlatform dflags) = x
691
692 extendSExpr dflags W64 x
693 | not (target32Bit (targetPlatform dflags)) = x
694
695 extendSExpr dflags rep x =
696 let size = if target32Bit $ targetPlatform dflags
697 then W32
698 else W64
699 in CmmMachOp (MO_SS_Conv rep size) [x]
700
701 extendUExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
702 extendUExpr dflags W32 x
703 | target32Bit (targetPlatform dflags) = x
704 extendUExpr dflags W64 x
705 | not (target32Bit (targetPlatform dflags)) = x
706 extendUExpr dflags rep x =
707 let size = if target32Bit $ targetPlatform dflags
708 then W32
709 else W64
710 in CmmMachOp (MO_UU_Conv rep size) [x]
711
712 -- -----------------------------------------------------------------------------
713 -- The 'Amode' type: Memory addressing modes passed up the tree.
714
715 data Amode
716 = Amode AddrMode InstrBlock
717
718 {-
719 Now, given a tree (the argument to an CmmLoad) that references memory,
720 produce a suitable addressing mode.
721
722 A Rule of the Game (tm) for Amodes: use of the addr bit must
723 immediately follow use of the code part, since the code part puts
724 values in registers which the addr then refers to. So you can't put
725 anything in between, lest it overwrite some of those registers. If
726 you need to do some other computation between the code part and use of
727 the addr bit, first store the effective address from the amode in a
728 temporary, then do the other computation, and then use the temporary:
729
730 code
731 LEA amode, tmp
732 ... other computation ...
733 ... (tmp) ...
734 -}
735
736 data InstrForm = D | DS
737
738 getAmode :: InstrForm -> CmmExpr -> NatM Amode
739 getAmode inf tree@(CmmRegOff _ _)
740 = do dflags <- getDynFlags
741 getAmode inf (mangleIndexTree dflags tree)
742
743 getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
744 | Just off <- makeImmediate W32 True (-i)
745 = do
746 (reg, code) <- getSomeReg x
747 return (Amode (AddrRegImm reg off) code)
748
749
750 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
751 | Just off <- makeImmediate W32 True i
752 = do
753 (reg, code) <- getSomeReg x
754 return (Amode (AddrRegImm reg off) code)
755
756 getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
757 | Just off <- makeImmediate W64 True (-i)
758 = do
759 (reg, code) <- getSomeReg x
760 return (Amode (AddrRegImm reg off) code)
761
762
763 getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
764 | Just off <- makeImmediate W64 True i
765 = do
766 (reg, code) <- getSomeReg x
767 return (Amode (AddrRegImm reg off) code)
768
769 getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
770 | Just off <- makeImmediate W64 True (-i)
771 = do
772 (reg, code) <- getSomeReg x
773 (reg', off', code') <-
774 if i `mod` 4 == 0
775 then do return (reg, off, code)
776 else do
777 tmp <- getNewRegNat II64
778 return (tmp, ImmInt 0,
779 code `snocOL` ADD tmp reg (RIImm off))
780 return (Amode (AddrRegImm reg' off') code')
781
782 getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
783 | Just off <- makeImmediate W64 True i
784 = do
785 (reg, code) <- getSomeReg x
786 (reg', off', code') <-
787 if i `mod` 4 == 0
788 then do return (reg, off, code)
789 else do
790 tmp <- getNewRegNat II64
791 return (tmp, ImmInt 0,
792 code `snocOL` ADD tmp reg (RIImm off))
793 return (Amode (AddrRegImm reg' off') code')
794
795 -- optimize addition with 32-bit immediate
796 -- (needed for PIC)
797 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit])
798 = do
799 dflags <- getDynFlags
800 (src, srcCode) <- getSomeReg x
801 let imm = litToImm lit
802 case () of
803 _ | OSAIX <- platformOS (targetPlatform dflags)
804 , isCmmLabelType lit ->
805 -- HA16/LO16 relocations on labels not supported on AIX
806 return (Amode (AddrRegImm src imm) srcCode)
807 | otherwise -> do
808 tmp <- getNewRegNat II32
809 let code = srcCode `snocOL` ADDIS tmp src (HA imm)
810 return (Amode (AddrRegImm tmp (LO imm)) code)
811 where
812 isCmmLabelType (CmmLabel {}) = True
813 isCmmLabelType (CmmLabelOff {}) = True
814 isCmmLabelType (CmmLabelDiffOff {}) = True
815 isCmmLabelType _ = False
816
817 getAmode _ (CmmLit lit)
818 = do
819 dflags <- getDynFlags
820 case platformArch $ targetPlatform dflags of
821 ArchPPC -> do
822 tmp <- getNewRegNat II32
823 let imm = litToImm lit
824 code = unitOL (LIS tmp (HA imm))
825 return (Amode (AddrRegImm tmp (LO imm)) code)
826 _ -> do -- TODO: Load from TOC,
827 -- see getRegister' _ (CmmLit lit)
828 tmp <- getNewRegNat II64
829 let imm = litToImm lit
830 code = toOL [
831 LIS tmp (HIGHESTA imm),
832 OR tmp tmp (RIImm (HIGHERA imm)),
833 SL II64 tmp tmp (RIImm (ImmInt 32)),
834 ORIS tmp tmp (HA imm)
835 ]
836 return (Amode (AddrRegImm tmp (LO imm)) code)
837
838 getAmode _ (CmmMachOp (MO_Add W32) [x, y])
839 = do
840 (regX, codeX) <- getSomeReg x
841 (regY, codeY) <- getSomeReg y
842 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
843
844 getAmode _ (CmmMachOp (MO_Add W64) [x, y])
845 = do
846 (regX, codeX) <- getSomeReg x
847 (regY, codeY) <- getSomeReg y
848 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
849
850 getAmode _ other
851 = do
852 (reg, code) <- getSomeReg other
853 let
854 off = ImmInt 0
855 return (Amode (AddrRegImm reg off) code)
856
857
858 -- The 'CondCode' type: Condition codes passed up the tree.
859 data CondCode
860 = CondCode Bool Cond InstrBlock
861
862 -- Set up a condition code for a conditional branch.
863
864 getCondCode :: CmmExpr -> NatM CondCode
865
866 -- almost the same as everywhere else - but we need to
867 -- extend small integers to 32 bit or 64 bit first
868
869 getCondCode (CmmMachOp mop [x, y])
870 = do
871 dflags <- getDynFlags
872 case mop of
873 MO_F_Eq W32 -> condFltCode EQQ x y
874 MO_F_Ne W32 -> condFltCode NE x y
875 MO_F_Gt W32 -> condFltCode GTT x y
876 MO_F_Ge W32 -> condFltCode GE x y
877 MO_F_Lt W32 -> condFltCode LTT x y
878 MO_F_Le W32 -> condFltCode LE x y
879
880 MO_F_Eq W64 -> condFltCode EQQ x y
881 MO_F_Ne W64 -> condFltCode NE x y
882 MO_F_Gt W64 -> condFltCode GTT x y
883 MO_F_Ge W64 -> condFltCode GE x y
884 MO_F_Lt W64 -> condFltCode LTT x y
885 MO_F_Le W64 -> condFltCode LE x y
886
887 MO_Eq rep -> condIntCode EQQ (extendUExpr dflags rep x)
888 (extendUExpr dflags rep y)
889 MO_Ne rep -> condIntCode NE (extendUExpr dflags rep x)
890 (extendUExpr dflags rep y)
891
892 MO_S_Gt rep -> condIntCode GTT (extendSExpr dflags rep x)
893 (extendSExpr dflags rep y)
894 MO_S_Ge rep -> condIntCode GE (extendSExpr dflags rep x)
895 (extendSExpr dflags rep y)
896 MO_S_Lt rep -> condIntCode LTT (extendSExpr dflags rep x)
897 (extendSExpr dflags rep y)
898 MO_S_Le rep -> condIntCode LE (extendSExpr dflags rep x)
899 (extendSExpr dflags rep y)
900
901 MO_U_Gt rep -> condIntCode GU (extendSExpr dflags rep x)
902 (extendSExpr dflags rep y)
903 MO_U_Ge rep -> condIntCode GEU (extendSExpr dflags rep x)
904 (extendSExpr dflags rep y)
905 MO_U_Lt rep -> condIntCode LU (extendSExpr dflags rep x)
906 (extendSExpr dflags rep y)
907 MO_U_Le rep -> condIntCode LEU (extendSExpr dflags rep x)
908 (extendSExpr dflags rep y)
909
910 _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
911
912 getCondCode _ = panic "getCondCode(2)(powerpc)"
913
914
915
916 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
917 -- passed back up the tree.
918
919 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
920
921 -- ###FIXME: I16 and I8!
922 -- TODO: Is this still an issue? All arguments are extend?Expr'd.
923 condIntCode cond x (CmmLit (CmmInt y rep))
924 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
925 = do
926 (src1, code) <- getSomeReg x
927 dflags <- getDynFlags
928 let format = archWordFormat $ target32Bit $ targetPlatform dflags
929 code' = code `snocOL`
930 (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2)
931 return (CondCode False cond code')
932
933 condIntCode cond x y = do
934 (src1, code1) <- getSomeReg x
935 (src2, code2) <- getSomeReg y
936 dflags <- getDynFlags
937 let format = archWordFormat $ target32Bit $ targetPlatform dflags
938 code' = code1 `appOL` code2 `snocOL`
939 (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2)
940 return (CondCode False cond code')
941
942 condFltCode cond x y = do
943 (src1, code1) <- getSomeReg x
944 (src2, code2) <- getSomeReg y
945 let
946 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
947 code'' = case cond of -- twiddle CR to handle unordered case
948 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
949 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
950 _ -> code'
951 where
952 ltbit = 0 ; eqbit = 2 ; gtbit = 1
953 return (CondCode True cond code'')
954
955
956
957 -- -----------------------------------------------------------------------------
958 -- Generating assignments
959
960 -- Assignments are really at the heart of the whole code generation
961 -- business. Almost all top-level nodes of any real importance are
962 -- assignments, which correspond to loads, stores, or register
963 -- transfers. If we're really lucky, some of the register transfers
964 -- will go away, because we can use the destination register to
965 -- complete the code generation for the right hand side. This only
966 -- fails when the right hand side is forced into a fixed register
967 -- (e.g. the result of a call).
968
969 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
970 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
971
972 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
973 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
974
975 assignMem_IntCode pk addr src = do
976 (srcReg, code) <- getSomeReg src
977 Amode dstAddr addr_code <- case pk of
978 II64 -> getAmode DS addr
979 _ -> getAmode D addr
980 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
981
982 -- dst is a reg, but src could be anything
983 assignReg_IntCode _ reg src
984 = do
985 dflags <- getDynFlags
986 let dst = getRegisterReg (targetPlatform dflags) reg
987 r <- getRegister src
988 return $ case r of
989 Any _ code -> code dst
990 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
991
992
993
994 -- Easy, isn't it?
995 assignMem_FltCode = assignMem_IntCode
996 assignReg_FltCode = assignReg_IntCode
997
998
999
1000 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
1001
1002 genJump (CmmLit (CmmLabel lbl))
1003 = return (unitOL $ JMP lbl)
1004
1005 genJump tree
1006 = do
1007 dflags <- getDynFlags
1008 let platform = targetPlatform dflags
1009 case platformOS platform of
1010 OSLinux -> case platformArch platform of
1011 ArchPPC -> genJump' tree GCPLinux
1012 ArchPPC_64 ELF_V1 -> genJump' tree (GCPLinux64ELF 1)
1013 ArchPPC_64 ELF_V2 -> genJump' tree (GCPLinux64ELF 2)
1014 _ -> panic "PPC.CodeGen.genJump: Unknown Linux"
1015 OSAIX -> genJump' tree GCPAIX
1016 OSDarwin -> genJump' tree GCPDarwin
1017 _ -> panic "PPC.CodeGen.genJump: not defined for this os"
1018
1019
1020 genJump' :: CmmExpr -> GenCCallPlatform -> NatM InstrBlock
1021
1022 genJump' tree (GCPLinux64ELF 1)
1023 = do
1024 (target,code) <- getSomeReg tree
1025 return (code
1026 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0))
1027 `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
1028 `snocOL` MTCTR r11
1029 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
1030 `snocOL` BCTR [] Nothing)
1031
1032 genJump' tree (GCPLinux64ELF 2)
1033 = do
1034 (target,code) <- getSomeReg tree
1035 return (code
1036 `snocOL` MR r12 target
1037 `snocOL` MTCTR r12
1038 `snocOL` BCTR [] Nothing)
1039
1040 genJump' tree _
1041 = do
1042 (target,code) <- getSomeReg tree
1043 return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
1044
1045 -- -----------------------------------------------------------------------------
1046 -- Unconditional branches
1047 genBranch :: BlockId -> NatM InstrBlock
1048 genBranch = return . toOL . mkJumpInstr
1049
1050
1051 -- -----------------------------------------------------------------------------
1052 -- Conditional jumps
1053
1054 {-
1055 Conditional jumps are always to local labels, so we can use branch
1056 instructions. We peek at the arguments to decide what kind of
1057 comparison to do.
1058 -}
1059
1060
1061 genCondJump
1062 :: BlockId -- the branch target
1063 -> CmmExpr -- the condition on which to branch
1064 -> NatM InstrBlock
1065
1066 genCondJump id bool = do
1067 CondCode _ cond code <- getCondCode bool
1068 return (code `snocOL` BCC cond id)
1069
1070
1071
1072 -- -----------------------------------------------------------------------------
1073 -- Generating C calls
1074
1075 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1076 -- @get_arg@, which moves the arguments to the correct registers/stack
1077 -- locations. Apart from that, the code is easy.
1078 --
1079 -- (If applicable) Do not fill the delay slots here; you will confuse the
1080 -- register allocator.
1081
1082 genCCall :: ForeignTarget -- function to call
1083 -> [CmmFormal] -- where to put the result
1084 -> [CmmActual] -- arguments (of mixed type)
1085 -> NatM InstrBlock
1086 genCCall target dest_regs argsAndHints
1087 = do dflags <- getDynFlags
1088 let platform = targetPlatform dflags
1089 case platformOS platform of
1090 OSLinux -> case platformArch platform of
1091 ArchPPC -> genCCall' dflags GCPLinux
1092 target dest_regs argsAndHints
1093 ArchPPC_64 ELF_V1 -> genCCall' dflags (GCPLinux64ELF 1)
1094 target dest_regs argsAndHints
1095 ArchPPC_64 ELF_V2 -> genCCall' dflags (GCPLinux64ELF 2)
1096 target dest_regs argsAndHints
1097 _ -> panic "PPC.CodeGen.genCCall: Unknown Linux"
1098 OSAIX -> genCCall' dflags GCPAIX target dest_regs argsAndHints
1099 OSDarwin -> genCCall' dflags GCPDarwin target dest_regs argsAndHints
1100 _ -> panic "PPC.CodeGen.genCCall: not defined for this os"
1101
1102 data GenCCallPlatform = GCPLinux | GCPDarwin | GCPLinux64ELF Int | GCPAIX
1103
1104 genCCall'
1105 :: DynFlags
1106 -> GenCCallPlatform
1107 -> ForeignTarget -- function to call
1108 -> [CmmFormal] -- where to put the result
1109 -> [CmmActual] -- arguments (of mixed type)
1110 -> NatM InstrBlock
1111
1112 {-
1113 The PowerPC calling convention for Darwin/Mac OS X
1114 is described in Apple's document
1115 "Inside Mac OS X - Mach-O Runtime Architecture".
1116
1117 PowerPC Linux uses the System V Release 4 Calling Convention
1118 for PowerPC. It is described in the
1119 "System V Application Binary Interface PowerPC Processor Supplement".
1120
1121 Both conventions are similar:
1122 Parameters may be passed in general-purpose registers starting at r3, in
1123 floating point registers starting at f1, or on the stack.
1124
1125 But there are substantial differences:
1126 * The number of registers used for parameter passing and the exact set of
1127 nonvolatile registers differs (see MachRegs.hs).
1128 * On Darwin, stack space is always reserved for parameters, even if they are
1129 passed in registers. The called routine may choose to save parameters from
1130 registers to the corresponding space on the stack.
1131 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
1132 parameter is passed in an FPR.
1133 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
1134 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
1135 Darwin just treats an I64 like two separate II32s (high word first).
1136 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
1137 4-byte aligned like everything else on Darwin.
1138 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
1139 PowerPC Linux does not agree, so neither do we.
1140
1141 PowerPC 64 Linux uses the System V Release 4 Calling Convention for
1142 64-bit PowerPC. It is specified in
1143 "64-bit PowerPC ELF Application Binary Interface Supplement 1.9".
1144
1145 According to all conventions, the parameter area should be part of the
1146 caller's stack frame, allocated in the caller's prologue code (large enough
1147 to hold the parameter lists for all called routines). The NCG already
1148 uses the stack for register spilling, leaving 64 bytes free at the top.
1149 If we need a larger parameter area than that, we just allocate a new stack
1150 frame just before ccalling.
1151 -}
1152
1153
1154 genCCall' _ _ (PrimTarget MO_WriteBarrier) _ _
1155 = return $ unitOL LWSYNC
1156
1157 genCCall' _ _ (PrimTarget MO_Touch) _ _
1158 = return $ nilOL
1159
1160 genCCall' _ _ (PrimTarget (MO_Prefetch_Data _)) _ _
1161 = return $ nilOL
1162
1163 genCCall' dflags gcp target dest_regs args
1164 = ASSERT(not $ any (`elem` [II16]) $ map cmmTypeFormat argReps)
1165 -- we rely on argument promotion in the codeGen
1166 do
1167 (finalStack,passArgumentsCode,usedRegs) <- passArguments
1168 (zip args argReps)
1169 allArgRegs
1170 (allFPArgRegs platform)
1171 initialStackOffset
1172 (toOL []) []
1173
1174 (labelOrExpr, reduceToFF32) <- case target of
1175 ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
1176 uses_pic_base_implicitly
1177 return (Left lbl, False)
1178 ForeignTarget expr _ -> do
1179 uses_pic_base_implicitly
1180 return (Right expr, False)
1181 PrimTarget mop -> outOfLineMachOp mop
1182
1183 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
1184 `appOL` toc_before
1185 codeAfter = toc_after labelOrExpr `appOL` move_sp_up finalStack
1186 `appOL` moveResult reduceToFF32
1187
1188 case labelOrExpr of
1189 Left lbl -> do -- the linker does all the work for us
1190 return ( codeBefore
1191 `snocOL` BL lbl usedRegs
1192 `appOL` codeAfter)
1193 Right dyn -> do -- implement call through function pointer
1194 (dynReg, dynCode) <- getSomeReg dyn
1195 case gcp of
1196 GCPLinux64ELF 1 -> return ( dynCode
1197 `appOL` codeBefore
1198 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0))
1199 `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8))
1200 `snocOL` MTCTR r11
1201 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16))
1202 `snocOL` BCTRL usedRegs
1203 `appOL` codeAfter)
1204 GCPLinux64ELF 2 -> return ( dynCode
1205 `appOL` codeBefore
1206 `snocOL` MR r12 dynReg
1207 `snocOL` MTCTR r12
1208 `snocOL` BCTRL usedRegs
1209 `appOL` codeAfter)
1210 GCPAIX -> return ( dynCode
1211 -- AIX/XCOFF follows the PowerOPEN ABI
1212 -- which is quite similiar to LinuxPPC64/ELFv1
1213 `appOL` codeBefore
1214 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 0))
1215 `snocOL` LD II32 toc (AddrRegImm dynReg (ImmInt 4))
1216 `snocOL` MTCTR r11
1217 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 8))
1218 `snocOL` BCTRL usedRegs
1219 `appOL` codeAfter)
1220 _ -> return ( dynCode
1221 `snocOL` MTCTR dynReg
1222 `appOL` codeBefore
1223 `snocOL` BCTRL usedRegs
1224 `appOL` codeAfter)
1225 where
1226 platform = targetPlatform dflags
1227
1228 uses_pic_base_implicitly = do
1229 -- See Note [implicit register in PPC PIC code]
1230 -- on why we claim to use PIC register here
1231 when (gopt Opt_PIC dflags && target32Bit platform) $ do
1232 _ <- getPicBaseNat $ archWordFormat True
1233 return ()
1234
1235 initialStackOffset = case gcp of
1236 GCPAIX -> 24
1237 GCPDarwin -> 24
1238 GCPLinux -> 8
1239 GCPLinux64ELF 1 -> 48
1240 GCPLinux64ELF 2 -> 32
1241 _ -> panic "genCall': unknown calling convention"
1242 -- size of linkage area + size of arguments, in bytes
1243 stackDelta finalStack = case gcp of
1244 GCPAIX ->
1245 roundTo 16 $ (24 +) $ max 32 $ sum $
1246 map (widthInBytes . typeWidth) argReps
1247 GCPDarwin ->
1248 roundTo 16 $ (24 +) $ max 32 $ sum $
1249 map (widthInBytes . typeWidth) argReps
1250 GCPLinux -> roundTo 16 finalStack
1251 GCPLinux64ELF 1 ->
1252 roundTo 16 $ (48 +) $ max 64 $ sum $
1253 map (widthInBytes . typeWidth) argReps
1254 GCPLinux64ELF 2 ->
1255 roundTo 16 $ (32 +) $ max 64 $ sum $
1256 map (widthInBytes . typeWidth) argReps
1257 _ -> panic "genCall': unknown calling conv."
1258
1259 argReps = map (cmmExprType dflags) args
1260
1261 roundTo a x | x `mod` a == 0 = x
1262 | otherwise = x + a - (x `mod` a)
1263
1264 spFormat = if target32Bit platform then II32 else II64
1265
1266 move_sp_down finalStack
1267 | delta > 64 =
1268 toOL [STU spFormat sp (AddrRegImm sp (ImmInt (-delta))),
1269 DELTA (-delta)]
1270 | otherwise = nilOL
1271 where delta = stackDelta finalStack
1272 toc_before = case gcp of
1273 GCPLinux64ELF 1 -> unitOL $ ST spFormat toc (AddrRegImm sp (ImmInt 40))
1274 GCPLinux64ELF 2 -> unitOL $ ST spFormat toc (AddrRegImm sp (ImmInt 24))
1275 GCPAIX -> unitOL $ ST spFormat toc (AddrRegImm sp (ImmInt 20))
1276 _ -> nilOL
1277 toc_after labelOrExpr = case gcp of
1278 GCPLinux64ELF 1 -> case labelOrExpr of
1279 Left _ -> toOL [ NOP ]
1280 Right _ -> toOL [ LD spFormat toc
1281 (AddrRegImm sp
1282 (ImmInt 40))
1283 ]
1284 GCPLinux64ELF 2 -> case labelOrExpr of
1285 Left _ -> toOL [ NOP ]
1286 Right _ -> toOL [ LD spFormat toc
1287 (AddrRegImm sp
1288 (ImmInt 24))
1289 ]
1290 GCPAIX -> case labelOrExpr of
1291 Left _ -> unitOL NOP
1292 Right _ -> unitOL (LD spFormat toc
1293 (AddrRegImm sp
1294 (ImmInt 20)))
1295 _ -> nilOL
1296 move_sp_up finalStack
1297 | delta > 64 = -- TODO: fix-up stack back-chain
1298 toOL [ADD sp sp (RIImm (ImmInt delta)),
1299 DELTA 0]
1300 | otherwise = nilOL
1301 where delta = stackDelta finalStack
1302
1303
1304 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
1305 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
1306 accumCode accumUsed | isWord64 arg_ty
1307 && target32Bit (targetPlatform dflags) =
1308 do
1309 ChildCode64 code vr_lo <- iselExpr64 arg
1310 let vr_hi = getHiVRegFromLo vr_lo
1311
1312 case gcp of
1313 GCPAIX -> -- same as for Darwin
1314 do let storeWord vr (gpr:_) _ = MR gpr vr
1315 storeWord vr [] offset
1316 = ST II32 vr (AddrRegImm sp (ImmInt offset))
1317 passArguments args
1318 (drop 2 gprs)
1319 fprs
1320 (stackOffset+8)
1321 (accumCode `appOL` code
1322 `snocOL` storeWord vr_hi gprs stackOffset
1323 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1324 ((take 2 gprs) ++ accumUsed)
1325 GCPDarwin ->
1326 do let storeWord vr (gpr:_) _ = MR gpr vr
1327 storeWord vr [] offset
1328 = ST II32 vr (AddrRegImm sp (ImmInt offset))
1329 passArguments args
1330 (drop 2 gprs)
1331 fprs
1332 (stackOffset+8)
1333 (accumCode `appOL` code
1334 `snocOL` storeWord vr_hi gprs stackOffset
1335 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1336 ((take 2 gprs) ++ accumUsed)
1337 GCPLinux ->
1338 do let stackOffset' = roundTo 8 stackOffset
1339 stackCode = accumCode `appOL` code
1340 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
1341 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
1342 regCode hireg loreg =
1343 accumCode `appOL` code
1344 `snocOL` MR hireg vr_hi
1345 `snocOL` MR loreg vr_lo
1346
1347 case gprs of
1348 hireg : loreg : regs | even (length gprs) ->
1349 passArguments args regs fprs stackOffset
1350 (regCode hireg loreg) (hireg : loreg : accumUsed)
1351 _skipped : hireg : loreg : regs ->
1352 passArguments args regs fprs stackOffset
1353 (regCode hireg loreg) (hireg : loreg : accumUsed)
1354 _ -> -- only one or no regs left
1355 passArguments args [] fprs (stackOffset'+8)
1356 stackCode accumUsed
1357 GCPLinux64ELF _ -> panic "passArguments: 32 bit code"
1358
1359 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
1360 | reg : _ <- regs = do
1361 register <- getRegister arg
1362 let code = case register of
1363 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1364 Any _ acode -> acode reg
1365 stackOffsetRes = case gcp of
1366 -- The Darwin ABI requires that we reserve
1367 -- stack slots for register parameters
1368 GCPDarwin -> stackOffset + stackBytes
1369 -- ... so does the PowerOpen ABI.
1370 GCPAIX -> stackOffset + stackBytes
1371 -- ... the SysV ABI 32-bit doesn't.
1372 GCPLinux -> stackOffset
1373 -- ... but SysV ABI 64-bit does.
1374 GCPLinux64ELF _ -> stackOffset + stackBytes
1375 passArguments args
1376 (drop nGprs gprs)
1377 (drop nFprs fprs)
1378 stackOffsetRes
1379 (accumCode `appOL` code)
1380 (reg : accumUsed)
1381 | otherwise = do
1382 (vr, code) <- getSomeReg arg
1383 passArguments args
1384 (drop nGprs gprs)
1385 (drop nFprs fprs)
1386 (stackOffset' + stackBytes)
1387 (accumCode `appOL` code `snocOL` ST (cmmTypeFormat rep) vr stackSlot)
1388 accumUsed
1389 where
1390 stackOffset' = case gcp of
1391 GCPDarwin ->
1392 -- stackOffset is at least 4-byte aligned
1393 -- The Darwin ABI is happy with that.
1394 stackOffset
1395 GCPAIX ->
1396 -- The 32bit PowerOPEN ABI is happy with
1397 -- 32bit-alignment as well...
1398 stackOffset
1399 GCPLinux
1400 -- ... the SysV ABI requires 8-byte
1401 -- alignment for doubles.
1402 | isFloatType rep && typeWidth rep == W64 ->
1403 roundTo 8 stackOffset
1404 | otherwise ->
1405 stackOffset
1406 GCPLinux64ELF _ ->
1407 -- everything on the stack is 8-byte
1408 -- aligned on a 64 bit system
1409 -- (except vector status, not used now)
1410 stackOffset
1411 stackSlot = AddrRegImm sp (ImmInt stackOffset')
1412 (nGprs, nFprs, stackBytes, regs)
1413 = case gcp of
1414 GCPAIX ->
1415 case cmmTypeFormat rep of
1416 II8 -> (1, 0, 4, gprs)
1417 II16 -> (1, 0, 4, gprs)
1418 II32 -> (1, 0, 4, gprs)
1419 -- The PowerOpen ABI requires that we skip a
1420 -- corresponding number of GPRs when we use
1421 -- the FPRs.
1422 --
1423 -- E.g. for a `double` two GPRs are skipped,
1424 -- whereas for a `float` one GPR is skipped
1425 -- when parameters are assigned to
1426 -- registers.
1427 --
1428 -- The PowerOpen ABI specification can be found at
1429 -- ftp://www.sourceware.org/pub/binutils/ppc-docs/ppc-poweropen/
1430 FF32 -> (1, 1, 4, fprs)
1431 FF64 -> (2, 1, 8, fprs)
1432 II64 -> panic "genCCall' passArguments II64"
1433 FF80 -> panic "genCCall' passArguments FF80"
1434 GCPDarwin ->
1435 case cmmTypeFormat rep of
1436 II8 -> (1, 0, 4, gprs)
1437 II16 -> (1, 0, 4, gprs)
1438 II32 -> (1, 0, 4, gprs)
1439 -- The Darwin ABI requires that we skip a
1440 -- corresponding number of GPRs when we use
1441 -- the FPRs.
1442 FF32 -> (1, 1, 4, fprs)
1443 FF64 -> (2, 1, 8, fprs)
1444 II64 -> panic "genCCall' passArguments II64"
1445 FF80 -> panic "genCCall' passArguments FF80"
1446 GCPLinux ->
1447 case cmmTypeFormat rep of
1448 II8 -> (1, 0, 4, gprs)
1449 II16 -> (1, 0, 4, gprs)
1450 II32 -> (1, 0, 4, gprs)
1451 -- ... the SysV ABI doesn't.
1452 FF32 -> (0, 1, 4, fprs)
1453 FF64 -> (0, 1, 8, fprs)
1454 II64 -> panic "genCCall' passArguments II64"
1455 FF80 -> panic "genCCall' passArguments FF80"
1456 GCPLinux64ELF _ ->
1457 case cmmTypeFormat rep of
1458 II8 -> (1, 0, 8, gprs)
1459 II16 -> (1, 0, 8, gprs)
1460 II32 -> (1, 0, 8, gprs)
1461 II64 -> (1, 0, 8, gprs)
1462 -- The ELFv1 ABI requires that we skip a
1463 -- corresponding number of GPRs when we use
1464 -- the FPRs.
1465 FF32 -> (1, 1, 8, fprs)
1466 FF64 -> (1, 1, 8, fprs)
1467 FF80 -> panic "genCCall' passArguments FF80"
1468
1469 moveResult reduceToFF32 =
1470 case dest_regs of
1471 [] -> nilOL
1472 [dest]
1473 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
1474 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1475 | isWord64 rep && target32Bit (targetPlatform dflags)
1476 -> toOL [MR (getHiVRegFromLo r_dest) r3,
1477 MR r_dest r4]
1478 | otherwise -> unitOL (MR r_dest r3)
1479 where rep = cmmRegType dflags (CmmLocal dest)
1480 r_dest = getRegisterReg platform (CmmLocal dest)
1481 _ -> panic "genCCall' moveResult: Bad dest_regs"
1482
1483 outOfLineMachOp mop =
1484 do
1485 dflags <- getDynFlags
1486 mopExpr <- cmmMakeDynamicReference dflags CallReference $
1487 mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
1488 let mopLabelOrExpr = case mopExpr of
1489 CmmLit (CmmLabel lbl) -> Left lbl
1490 _ -> Right mopExpr
1491 return (mopLabelOrExpr, reduce)
1492 where
1493 (functionName, reduce) = case mop of
1494 MO_F32_Exp -> (fsLit "exp", True)
1495 MO_F32_Log -> (fsLit "log", True)
1496 MO_F32_Sqrt -> (fsLit "sqrt", True)
1497
1498 MO_F32_Sin -> (fsLit "sin", True)
1499 MO_F32_Cos -> (fsLit "cos", True)
1500 MO_F32_Tan -> (fsLit "tan", True)
1501
1502 MO_F32_Asin -> (fsLit "asin", True)
1503 MO_F32_Acos -> (fsLit "acos", True)
1504 MO_F32_Atan -> (fsLit "atan", True)
1505
1506 MO_F32_Sinh -> (fsLit "sinh", True)
1507 MO_F32_Cosh -> (fsLit "cosh", True)
1508 MO_F32_Tanh -> (fsLit "tanh", True)
1509 MO_F32_Pwr -> (fsLit "pow", True)
1510
1511 MO_F64_Exp -> (fsLit "exp", False)
1512 MO_F64_Log -> (fsLit "log", False)
1513 MO_F64_Sqrt -> (fsLit "sqrt", False)
1514
1515 MO_F64_Sin -> (fsLit "sin", False)
1516 MO_F64_Cos -> (fsLit "cos", False)
1517 MO_F64_Tan -> (fsLit "tan", False)
1518
1519 MO_F64_Asin -> (fsLit "asin", False)
1520 MO_F64_Acos -> (fsLit "acos", False)
1521 MO_F64_Atan -> (fsLit "atan", False)
1522
1523 MO_F64_Sinh -> (fsLit "sinh", False)
1524 MO_F64_Cosh -> (fsLit "cosh", False)
1525 MO_F64_Tanh -> (fsLit "tanh", False)
1526 MO_F64_Pwr -> (fsLit "pow", False)
1527
1528 MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False)
1529
1530 MO_Memcpy _ -> (fsLit "memcpy", False)
1531 MO_Memset _ -> (fsLit "memset", False)
1532 MO_Memmove _ -> (fsLit "memmove", False)
1533
1534 MO_BSwap w -> (fsLit $ bSwapLabel w, False)
1535 MO_PopCnt w -> (fsLit $ popCntLabel w, False)
1536 MO_Clz w -> (fsLit $ clzLabel w, False)
1537 MO_Ctz w -> (fsLit $ ctzLabel w, False)
1538 MO_AtomicRMW w amop -> (fsLit $ atomicRMWLabel w amop, False)
1539 MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
1540 MO_AtomicRead w -> (fsLit $ atomicReadLabel w, False)
1541 MO_AtomicWrite w -> (fsLit $ atomicWriteLabel w, False)
1542
1543 MO_S_QuotRem {} -> unsupported
1544 MO_U_QuotRem {} -> unsupported
1545 MO_U_QuotRem2 {} -> unsupported
1546 MO_Add2 {} -> unsupported
1547 MO_SubWordC {} -> unsupported
1548 MO_AddIntC {} -> unsupported
1549 MO_SubIntC {} -> unsupported
1550 MO_U_Mul2 {} -> unsupported
1551 MO_WriteBarrier -> unsupported
1552 MO_Touch -> unsupported
1553 (MO_Prefetch_Data _ ) -> unsupported
1554 unsupported = panic ("outOfLineCmmOp: " ++ show mop
1555 ++ " not supported")
1556
1557 -- -----------------------------------------------------------------------------
1558 -- Generating a table-branch
1559
1560 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
1561 genSwitch dflags expr targets
1562 | OSAIX <- platformOS (targetPlatform dflags)
1563 = do
1564 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
1565 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
1566 sha = if target32Bit $ targetPlatform dflags then 2 else 3
1567 tmp <- getNewRegNat fmt
1568 lbl <- getNewLabelNat
1569 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
1570 (tableReg,t_code) <- getSomeReg $ dynRef
1571 let code = e_code `appOL` t_code `appOL` toOL [
1572 SL fmt tmp reg (RIImm (ImmInt sha)),
1573 LD fmt tmp (AddrRegReg tableReg tmp),
1574 MTCTR tmp,
1575 BCTR ids (Just lbl)
1576 ]
1577 return code
1578
1579 | (gopt Opt_PIC dflags) || (not $ target32Bit $ targetPlatform dflags)
1580 = do
1581 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
1582 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
1583 sha = if target32Bit $ targetPlatform dflags then 2 else 3
1584 tmp <- getNewRegNat fmt
1585 lbl <- getNewLabelNat
1586 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
1587 (tableReg,t_code) <- getSomeReg $ dynRef
1588 let code = e_code `appOL` t_code `appOL` toOL [
1589 SL fmt tmp reg (RIImm (ImmInt sha)),
1590 LD fmt tmp (AddrRegReg tableReg tmp),
1591 ADD tmp tmp (RIReg tableReg),
1592 MTCTR tmp,
1593 BCTR ids (Just lbl)
1594 ]
1595 return code
1596 | otherwise
1597 = do
1598 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
1599 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
1600 sha = if target32Bit $ targetPlatform dflags then 2 else 3
1601 tmp <- getNewRegNat fmt
1602 lbl <- getNewLabelNat
1603 let code = e_code `appOL` toOL [
1604 SL fmt tmp reg (RIImm (ImmInt sha)),
1605 ADDIS tmp tmp (HA (ImmCLbl lbl)),
1606 LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
1607 MTCTR tmp,
1608 BCTR ids (Just lbl)
1609 ]
1610 return code
1611 where (offset, ids) = switchTargetsToTable targets
1612
1613 generateJumpTableForInstr :: DynFlags -> Instr
1614 -> Maybe (NatCmmDecl CmmStatics Instr)
1615 generateJumpTableForInstr dflags (BCTR ids (Just lbl)) =
1616 let jumpTable
1617 | (gopt Opt_PIC dflags)
1618 || (not $ target32Bit $ targetPlatform dflags)
1619 = map jumpTableEntryRel ids
1620 | otherwise = map (jumpTableEntry dflags) ids
1621 where jumpTableEntryRel Nothing
1622 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
1623 jumpTableEntryRel (Just blockid)
1624 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1625 where blockLabel = mkAsmTempLabel (getUnique blockid)
1626 in Just (CmmData (Section ReadOnlyData lbl) (Statics lbl jumpTable))
1627 generateJumpTableForInstr _ _ = Nothing
1628
1629 -- -----------------------------------------------------------------------------
1630 -- 'condIntReg' and 'condFltReg': condition codes into registers
1631
1632 -- Turn those condition codes into integers now (when they appear on
1633 -- the right hand side of an assignment).
1634
1635 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
1636
1637 condReg :: NatM CondCode -> NatM Register
1638 condReg getCond = do
1639 CondCode _ cond cond_code <- getCond
1640 dflags <- getDynFlags
1641 let
1642 code dst = cond_code
1643 `appOL` negate_code
1644 `appOL` toOL [
1645 MFCR dst,
1646 RLWINM dst dst (bit + 1) 31 31
1647 ]
1648
1649 negate_code | do_negate = unitOL (CRNOR bit bit bit)
1650 | otherwise = nilOL
1651
1652 (bit, do_negate) = case cond of
1653 LTT -> (0, False)
1654 LE -> (1, True)
1655 EQQ -> (2, False)
1656 GE -> (0, True)
1657 GTT -> (1, False)
1658
1659 NE -> (2, True)
1660
1661 LU -> (0, False)
1662 LEU -> (1, True)
1663 GEU -> (0, True)
1664 GU -> (1, False)
1665 _ -> panic "PPC.CodeGen.codeReg: no match"
1666
1667 format = archWordFormat $ target32Bit $ targetPlatform dflags
1668 return (Any format code)
1669
1670 condIntReg cond x y = condReg (condIntCode cond x y)
1671 condFltReg cond x y = condReg (condFltCode cond x y)
1672
1673
1674
1675 -- -----------------------------------------------------------------------------
1676 -- 'trivial*Code': deal with trivial instructions
1677
1678 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
1679 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
1680 -- Only look for constants on the right hand side, because that's
1681 -- where the generic optimizer will have put them.
1682
1683 -- Similarly, for unary instructions, we don't have to worry about
1684 -- matching an StInt as the argument, because genericOpt will already
1685 -- have handled the constant-folding.
1686
1687
1688
1689 {-
1690 Wolfgang's PowerPC version of The Rules:
1691
1692 A slightly modified version of The Rules to take advantage of the fact
1693 that PowerPC instructions work on all registers and don't implicitly
1694 clobber any fixed registers.
1695
1696 * The only expression for which getRegister returns Fixed is (CmmReg reg).
1697
1698 * If getRegister returns Any, then the code it generates may modify only:
1699 (a) fresh temporaries
1700 (b) the destination register
1701 It may *not* modify global registers, unless the global
1702 register happens to be the destination register.
1703 It may not clobber any other registers. In fact, only ccalls clobber any
1704 fixed registers.
1705 Also, it may not modify the counter register (used by genCCall).
1706
1707 Corollary: If a getRegister for a subexpression returns Fixed, you need
1708 not move it to a fresh temporary before evaluating the next subexpression.
1709 The Fixed register won't be modified.
1710 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
1711
1712 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
1713 the value of the destination register.
1714 -}
1715
1716 trivialCode
1717 :: Width
1718 -> Bool
1719 -> (Reg -> Reg -> RI -> Instr)
1720 -> CmmExpr
1721 -> CmmExpr
1722 -> NatM Register
1723
1724 trivialCode rep signed instr x (CmmLit (CmmInt y _))
1725 | Just imm <- makeImmediate rep signed y
1726 = do
1727 (src1, code1) <- getSomeReg x
1728 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
1729 return (Any (intFormat rep) code)
1730
1731 trivialCode rep _ instr x y = do
1732 (src1, code1) <- getSomeReg x
1733 (src2, code2) <- getSomeReg y
1734 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
1735 return (Any (intFormat rep) code)
1736
1737 shiftCode
1738 :: Width
1739 -> (Format-> Reg -> Reg -> RI -> Instr)
1740 -> CmmExpr
1741 -> CmmExpr
1742 -> NatM Register
1743 shiftCode width instr x (CmmLit (CmmInt y _))
1744 | Just imm <- makeImmediate width False y
1745 = do
1746 (src1, code1) <- getSomeReg x
1747 let format = intFormat width
1748 let code dst = code1 `snocOL` instr format dst src1 (RIImm imm)
1749 return (Any format code)
1750
1751 shiftCode width instr x y = do
1752 (src1, code1) <- getSomeReg x
1753 (src2, code2) <- getSomeReg y
1754 let format = intFormat width
1755 let code dst = code1 `appOL` code2 `snocOL` instr format dst src1 (RIReg src2)
1756 return (Any format code)
1757
1758 trivialCodeNoImm' :: Format -> (Reg -> Reg -> Reg -> Instr)
1759 -> CmmExpr -> CmmExpr -> NatM Register
1760 trivialCodeNoImm' format instr x y = do
1761 (src1, code1) <- getSomeReg x
1762 (src2, code2) <- getSomeReg y
1763 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
1764 return (Any format code)
1765
1766 trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr)
1767 -> CmmExpr -> CmmExpr -> NatM Register
1768 trivialCodeNoImm format instr x y = trivialCodeNoImm' format (instr format) x y
1769
1770
1771 trivialUCode
1772 :: Format
1773 -> (Reg -> Reg -> Instr)
1774 -> CmmExpr
1775 -> NatM Register
1776 trivialUCode rep instr x = do
1777 (src, code) <- getSomeReg x
1778 let code' dst = code `snocOL` instr dst src
1779 return (Any rep code')
1780
1781 -- There is no "remainder" instruction on the PPC, so we have to do
1782 -- it the hard way.
1783 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
1784
1785 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
1786 -> CmmExpr -> CmmExpr -> NatM Register
1787 remainderCode rep div x y = do
1788 dflags <- getDynFlags
1789 let mull_instr = if target32Bit $ targetPlatform dflags then MULLW
1790 else MULLD
1791 (src1, code1) <- getSomeReg x
1792 (src2, code2) <- getSomeReg y
1793 let code dst = code1 `appOL` code2 `appOL` toOL [
1794 div dst src1 src2,
1795 mull_instr dst dst (RIReg src2),
1796 SUBF dst dst src1
1797 ]
1798 return (Any (intFormat rep) code)
1799
1800 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
1801 coerceInt2FP fromRep toRep x = do
1802 dflags <- getDynFlags
1803 let arch = platformArch $ targetPlatform dflags
1804 coerceInt2FP' arch fromRep toRep x
1805
1806 coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
1807 coerceInt2FP' ArchPPC fromRep toRep x = do
1808 (src, code) <- getSomeReg x
1809 lbl <- getNewLabelNat
1810 itmp <- getNewRegNat II32
1811 ftmp <- getNewRegNat FF64
1812 dflags <- getDynFlags
1813 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
1814 Amode addr addr_code <- getAmode D dynRef
1815 let
1816 code' dst = code `appOL` maybe_exts `appOL` toOL [
1817 LDATA (Section ReadOnlyData lbl) $ Statics lbl
1818 [CmmStaticLit (CmmInt 0x43300000 W32),
1819 CmmStaticLit (CmmInt 0x80000000 W32)],
1820 XORIS itmp src (ImmInt 0x8000),
1821 ST II32 itmp (spRel dflags 3),
1822 LIS itmp (ImmInt 0x4330),
1823 ST II32 itmp (spRel dflags 2),
1824 LD FF64 ftmp (spRel dflags 2)
1825 ] `appOL` addr_code `appOL` toOL [
1826 LD FF64 dst addr,
1827 FSUB FF64 dst ftmp dst
1828 ] `appOL` maybe_frsp dst
1829
1830 maybe_exts = case fromRep of
1831 W8 -> unitOL $ EXTS II8 src src
1832 W16 -> unitOL $ EXTS II16 src src
1833 W32 -> nilOL
1834 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1835
1836 maybe_frsp dst
1837 = case toRep of
1838 W32 -> unitOL $ FRSP dst dst
1839 W64 -> nilOL
1840 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1841
1842 return (Any (floatFormat toRep) code')
1843
1844 -- On an ELF v1 Linux we use the compiler doubleword in the stack frame
1845 -- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only
1846 -- set right before a call and restored right after return from the call.
1847 -- So it is fine.
1848 coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do
1849 (src, code) <- getSomeReg x
1850 dflags <- getDynFlags
1851 let
1852 code' dst = code `appOL` maybe_exts `appOL` toOL [
1853 ST II64 src (spRel dflags 3),
1854 LD FF64 dst (spRel dflags 3),
1855 FCFID dst dst
1856 ] `appOL` maybe_frsp dst
1857
1858 maybe_exts = case fromRep of
1859 W8 -> unitOL $ EXTS II8 src src
1860 W16 -> unitOL $ EXTS II16 src src
1861 W32 -> unitOL $ EXTS II32 src src
1862 W64 -> nilOL
1863 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1864
1865 maybe_frsp dst
1866 = case toRep of
1867 W32 -> unitOL $ FRSP dst dst
1868 W64 -> nilOL
1869 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1870
1871 return (Any (floatFormat toRep) code')
1872
1873 coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch"
1874
1875
1876 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
1877 coerceFP2Int fromRep toRep x = do
1878 dflags <- getDynFlags
1879 let arch = platformArch $ targetPlatform dflags
1880 coerceFP2Int' arch fromRep toRep x
1881
1882 coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
1883 coerceFP2Int' ArchPPC _ toRep x = do
1884 dflags <- getDynFlags
1885 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
1886 (src, code) <- getSomeReg x
1887 tmp <- getNewRegNat FF64
1888 let
1889 code' dst = code `appOL` toOL [
1890 -- convert to int in FP reg
1891 FCTIWZ tmp src,
1892 -- store value (64bit) from FP to stack
1893 ST FF64 tmp (spRel dflags 2),
1894 -- read low word of value (high word is undefined)
1895 LD II32 dst (spRel dflags 3)]
1896 return (Any (intFormat toRep) code')
1897
1898 coerceFP2Int' (ArchPPC_64 _) _ toRep x = do
1899 dflags <- getDynFlags
1900 -- the reps don't really matter: F*->FF64 and II64->I* are no-ops
1901 (src, code) <- getSomeReg x
1902 tmp <- getNewRegNat FF64
1903 let
1904 code' dst = code `appOL` toOL [
1905 -- convert to int in FP reg
1906 FCTIDZ tmp src,
1907 -- store value (64bit) from FP to compiler word on stack
1908 ST FF64 tmp (spRel dflags 3),
1909 LD II64 dst (spRel dflags 3)]
1910 return (Any (intFormat toRep) code')
1911
1912 coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch"
1913
1914 -- Note [.LCTOC1 in PPC PIC code]
1915 -- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
1916 -- to make the most of the PPC's 16-bit displacements.
1917 -- As 16-bit signed offset is used (usually via addi/lwz instructions)
1918 -- first element will have '-32768' offset against .LCTOC1.
1919
1920 -- Note [implicit register in PPC PIC code]
1921 -- PPC generates calls by labels in assembly
1922 -- in form of:
1923 -- bl puts+32768@plt
1924 -- in this form it's not seen directly (by GHC NCG)
1925 -- that r30 (PicBaseReg) is used,
1926 -- but r30 is a required part of PLT code setup:
1927 -- puts+32768@plt:
1928 -- lwz r11,-30484(r30) ; offset in .LCTOC1
1929 -- mtctr r11
1930 -- bctr