12d5d88f53113fefac8bc764784d431e473b6515
[ghc.git] / compiler / nativeGen / PPC / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2004
8 --
9 -----------------------------------------------------------------------------
10
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, (b) the type signatures, and
13 -- (c) the #if blah_TARGET_ARCH} things, the
14 -- structure should not be too overwhelming.
15
16 module PPC.CodeGen (
17 cmmTopCodeGen,
18 generateJumpTableForInstr,
19 InstrBlock
20 )
21
22 where
23
24 #include "HsVersions.h"
25 #include "nativeGen/NCG.h"
26 #include "../includes/MachDeps.h"
27
28 -- NCG stuff:
29 import CodeGen.Platform
30 import PPC.Instr
31 import PPC.Cond
32 import PPC.Regs
33 import CPrim
34 import NCGMonad
35 import Instruction
36 import PIC
37 import Format
38 import RegClass
39 import Reg
40 import TargetReg
41 import Platform
42
43 -- Our intermediate code:
44 import BlockId
45 import PprCmm ( pprExpr )
46 import Cmm
47 import CmmUtils
48 import CmmSwitch
49 import CLabel
50 import Hoopl
51
52 -- The rest:
53 import OrdList
54 import Outputable
55 import Unique
56 import DynFlags
57
58 import Control.Monad ( mapAndUnzipM, when )
59 import Data.Bits
60 import Data.Word
61
62 import BasicTypes
63 import FastString
64 import Util
65
66 -- -----------------------------------------------------------------------------
67 -- Top-level of the instruction selector
68
69 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
70 -- They are really trees of insns to facilitate fast appending, where a
71 -- left-to-right traversal (pre-order?) yields the insns in the correct
72 -- order.
73
74 cmmTopCodeGen
75 :: RawCmmDecl
76 -> NatM [NatCmmDecl CmmStatics Instr]
77
78 cmmTopCodeGen (CmmProc info lab live graph) = do
79 let blocks = toBlockListEntryFirst graph
80 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
81 dflags <- getDynFlags
82 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
83 tops = proc : concat statics
84 os = platformOS $ targetPlatform dflags
85 arch = platformArch $ targetPlatform dflags
86 case arch of
87 ArchPPC | os == OSAIX -> return tops
88 | otherwise -> do
89 picBaseMb <- getPicBaseMaybeNat
90 case picBaseMb of
91 Just picBase -> initializePicBase_ppc arch os picBase tops
92 Nothing -> return tops
93 ArchPPC_64 ELF_V1 -> return tops
94 -- generating function descriptor is handled in
95 -- pretty printer
96 ArchPPC_64 ELF_V2 -> return tops
97 -- generating function prologue is handled in
98 -- pretty printer
99 _ -> panic "PPC.cmmTopCodeGen: unknown arch"
100
101 cmmTopCodeGen (CmmData sec dat) = do
102 return [CmmData sec dat] -- no translation, we just use CmmStatic
103
104 basicBlockCodeGen
105 :: Block CmmNode C C
106 -> NatM ( [NatBasicBlock Instr]
107 , [NatCmmDecl CmmStatics Instr])
108
109 basicBlockCodeGen block = do
110 let (_, nodes, tail) = blockSplit block
111 id = entryLabel block
112 stmts = blockToList nodes
113 mid_instrs <- stmtsToInstrs stmts
114 tail_instrs <- stmtToInstrs tail
115 let instrs = mid_instrs `appOL` tail_instrs
116 -- code generation may introduce new basic block boundaries, which
117 -- are indicated by the NEWBLOCK instruction. We must split up the
118 -- instruction stream into basic blocks again. Also, we extract
119 -- LDATAs here too.
120 let
121 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
122
123 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
124 = ([], BasicBlock id instrs : blocks, statics)
125 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
126 = (instrs, blocks, CmmData sec dat:statics)
127 mkBlocks instr (instrs,blocks,statics)
128 = (instr:instrs, blocks, statics)
129 return (BasicBlock id top : other_blocks, statics)
130
131 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
132 stmtsToInstrs stmts
133 = do instrss <- mapM stmtToInstrs stmts
134 return (concatOL instrss)
135
136 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
137 stmtToInstrs stmt = do
138 dflags <- getDynFlags
139 case stmt of
140 CmmComment s -> return (unitOL (COMMENT s))
141 CmmTick {} -> return nilOL
142 CmmUnwind {} -> return nilOL
143
144 CmmAssign reg src
145 | isFloatType ty -> assignReg_FltCode format reg src
146 | target32Bit (targetPlatform dflags) &&
147 isWord64 ty -> assignReg_I64Code reg src
148 | otherwise -> assignReg_IntCode format reg src
149 where ty = cmmRegType dflags reg
150 format = cmmTypeFormat ty
151
152 CmmStore addr src
153 | isFloatType ty -> assignMem_FltCode format addr src
154 | target32Bit (targetPlatform dflags) &&
155 isWord64 ty -> assignMem_I64Code addr src
156 | otherwise -> assignMem_IntCode format addr src
157 where ty = cmmExprType dflags src
158 format = cmmTypeFormat ty
159
160 CmmUnsafeForeignCall target result_regs args
161 -> genCCall target result_regs args
162
163 CmmBranch id -> genBranch id
164 CmmCondBranch arg true false _ -> do
165 b1 <- genCondJump true arg
166 b2 <- genBranch false
167 return (b1 `appOL` b2)
168 CmmSwitch arg ids -> do dflags <- getDynFlags
169 genSwitch dflags arg ids
170 CmmCall { cml_target = arg } -> genJump arg
171 _ ->
172 panic "stmtToInstrs: statement should have been cps'd away"
173
174
175 --------------------------------------------------------------------------------
176 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
177 -- They are really trees of insns to facilitate fast appending, where a
178 -- left-to-right traversal yields the insns in the correct order.
179 --
180 type InstrBlock
181 = OrdList Instr
182
183
184 -- | Register's passed up the tree. If the stix code forces the register
185 -- to live in a pre-decided machine register, it comes out as @Fixed@;
186 -- otherwise, it comes out as @Any@, and the parent can decide which
187 -- register to put it in.
188 --
189 data Register
190 = Fixed Format Reg InstrBlock
191 | Any Format (Reg -> InstrBlock)
192
193
194 swizzleRegisterRep :: Register -> Format -> Register
195 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
196 swizzleRegisterRep (Any _ codefn) format = Any format codefn
197
198
199 -- | Grab the Reg for a CmmReg
200 getRegisterReg :: Platform -> CmmReg -> Reg
201
202 getRegisterReg _ (CmmLocal (LocalReg u pk))
203 = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
204
205 getRegisterReg platform (CmmGlobal mid)
206 = case globalRegMaybe platform mid of
207 Just reg -> RegReal reg
208 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
209 -- By this stage, the only MagicIds remaining should be the
210 -- ones which map to a real machine register on this
211 -- platform. Hence ...
212
213 -- | Convert a BlockId to some CmmStatic data
214 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
215 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
216 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
217 where blockLabel = mkAsmTempLabel (getUnique blockid)
218
219
220
221 -- -----------------------------------------------------------------------------
222 -- General things for putting together code sequences
223
224 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
225 -- CmmExprs into CmmRegOff?
226 mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr
227 mangleIndexTree dflags (CmmRegOff reg off)
228 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
229 where width = typeWidth (cmmRegType dflags reg)
230
231 mangleIndexTree _ _
232 = panic "PPC.CodeGen.mangleIndexTree: no match"
233
234 -- -----------------------------------------------------------------------------
235 -- Code gen for 64-bit arithmetic on 32-bit platforms
236
237 {-
238 Simple support for generating 64-bit code (ie, 64 bit values and 64
239 bit assignments) on 32-bit platforms. Unlike the main code generator
240 we merely shoot for generating working code as simply as possible, and
241 pay little attention to code quality. Specifically, there is no
242 attempt to deal cleverly with the fixed-vs-floating register
243 distinction; all values are generated into (pairs of) floating
244 registers, even if this would mean some redundant reg-reg moves as a
245 result. Only one of the VRegUniques is returned, since it will be
246 of the VRegUniqueLo form, and the upper-half VReg can be determined
247 by applying getHiVRegFromLo to it.
248 -}
249
250 data ChildCode64 -- a.k.a "Register64"
251 = ChildCode64
252 InstrBlock -- code
253 Reg -- the lower 32-bit temporary which contains the
254 -- result; use getHiVRegFromLo to find the other
255 -- VRegUnique. Rules of this simplified insn
256 -- selection game are therefore that the returned
257 -- Reg may be modified
258
259
260 -- | Compute an expression into a register, but
261 -- we don't mind which one it is.
262 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
263 getSomeReg expr = do
264 r <- getRegister expr
265 case r of
266 Any rep code -> do
267 tmp <- getNewRegNat rep
268 return (tmp, code tmp)
269 Fixed _ reg code ->
270 return (reg, code)
271
272 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
273 getI64Amodes addrTree = do
274 Amode hi_addr addr_code <- getAmode D addrTree
275 case addrOffset hi_addr 4 of
276 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
277 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
278 return (AddrRegImm hi_ptr (ImmInt 0),
279 AddrRegImm hi_ptr (ImmInt 4),
280 code)
281
282
283 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
284 assignMem_I64Code addrTree valueTree = do
285 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
286 ChildCode64 vcode rlo <- iselExpr64 valueTree
287 let
288 rhi = getHiVRegFromLo rlo
289
290 -- Big-endian store
291 mov_hi = ST II32 rhi hi_addr
292 mov_lo = ST II32 rlo lo_addr
293 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
294
295
296 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
297 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
298 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
299 let
300 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
301 r_dst_hi = getHiVRegFromLo r_dst_lo
302 r_src_hi = getHiVRegFromLo r_src_lo
303 mov_lo = MR r_dst_lo r_src_lo
304 mov_hi = MR r_dst_hi r_src_hi
305 return (
306 vcode `snocOL` mov_lo `snocOL` mov_hi
307 )
308
309 assignReg_I64Code _ _
310 = panic "assignReg_I64Code(powerpc): invalid lvalue"
311
312
313 iselExpr64 :: CmmExpr -> NatM ChildCode64
314 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
315 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
316 (rlo, rhi) <- getNewRegPairNat II32
317 let mov_hi = LD II32 rhi hi_addr
318 mov_lo = LD II32 rlo lo_addr
319 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
320 rlo
321
322 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
323 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
324
325 iselExpr64 (CmmLit (CmmInt i _)) = do
326 (rlo,rhi) <- getNewRegPairNat II32
327 let
328 half0 = fromIntegral (fromIntegral i :: Word16)
329 half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
330 half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16)
331 half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16)
332
333 code = toOL [
334 LIS rlo (ImmInt half1),
335 OR rlo rlo (RIImm $ ImmInt half0),
336 LIS rhi (ImmInt half3),
337 OR rhi rhi (RIImm $ ImmInt half2)
338 ]
339 return (ChildCode64 code rlo)
340
341 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
342 ChildCode64 code1 r1lo <- iselExpr64 e1
343 ChildCode64 code2 r2lo <- iselExpr64 e2
344 (rlo,rhi) <- getNewRegPairNat II32
345 let
346 r1hi = getHiVRegFromLo r1lo
347 r2hi = getHiVRegFromLo r2lo
348 code = code1 `appOL`
349 code2 `appOL`
350 toOL [ ADDC rlo r1lo r2lo,
351 ADDE rhi r1hi r2hi ]
352 return (ChildCode64 code rlo)
353
354 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
355 ChildCode64 code1 r1lo <- iselExpr64 e1
356 ChildCode64 code2 r2lo <- iselExpr64 e2
357 (rlo,rhi) <- getNewRegPairNat II32
358 let
359 r1hi = getHiVRegFromLo r1lo
360 r2hi = getHiVRegFromLo r2lo
361 code = code1 `appOL`
362 code2 `appOL`
363 toOL [ SUBFC rlo r2lo r1lo,
364 SUBFE rhi r2hi r1hi ]
365 return (ChildCode64 code rlo)
366
367 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
368 (expr_reg,expr_code) <- getSomeReg expr
369 (rlo, rhi) <- getNewRegPairNat II32
370 let mov_hi = LI rhi (ImmInt 0)
371 mov_lo = MR rlo expr_reg
372 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
373 rlo
374 iselExpr64 expr
375 = pprPanic "iselExpr64(powerpc)" (pprExpr expr)
376
377
378
379 getRegister :: CmmExpr -> NatM Register
380 getRegister e = do dflags <- getDynFlags
381 getRegister' dflags e
382
383 getRegister' :: DynFlags -> CmmExpr -> NatM Register
384
385 getRegister' dflags (CmmReg (CmmGlobal PicBaseReg))
386 | OSAIX <- platformOS (targetPlatform dflags) = do
387 let code dst = toOL [ LD II32 dst tocAddr ]
388 tocAddr = AddrRegImm toc (ImmLit (text "ghc_toc_table[TC]"))
389 return (Any II32 code)
390 | target32Bit (targetPlatform dflags) = do
391 reg <- getPicBaseNat $ archWordFormat (target32Bit (targetPlatform dflags))
392 return (Fixed (archWordFormat (target32Bit (targetPlatform dflags)))
393 reg nilOL)
394 | otherwise = return (Fixed II64 toc nilOL)
395
396 getRegister' dflags (CmmReg reg)
397 = return (Fixed (cmmTypeFormat (cmmRegType dflags reg))
398 (getRegisterReg (targetPlatform dflags) reg) nilOL)
399
400 getRegister' dflags tree@(CmmRegOff _ _)
401 = getRegister' dflags (mangleIndexTree dflags tree)
402
403 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
404 -- TO_W_(x), TO_W_(x >> 32)
405
406 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32)
407 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
408 | target32Bit (targetPlatform dflags) = do
409 ChildCode64 code rlo <- iselExpr64 x
410 return $ Fixed II32 (getHiVRegFromLo rlo) code
411
412 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32)
413 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
414 | target32Bit (targetPlatform dflags) = do
415 ChildCode64 code rlo <- iselExpr64 x
416 return $ Fixed II32 (getHiVRegFromLo rlo) code
417
418 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x])
419 | target32Bit (targetPlatform dflags) = do
420 ChildCode64 code rlo <- iselExpr64 x
421 return $ Fixed II32 rlo code
422
423 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x])
424 | target32Bit (targetPlatform dflags) = do
425 ChildCode64 code rlo <- iselExpr64 x
426 return $ Fixed II32 rlo code
427
428 getRegister' dflags (CmmLoad mem pk)
429 | not (isWord64 pk) = do
430 let platform = targetPlatform dflags
431 Amode addr addr_code <- getAmode D mem
432 let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk)
433 addr_code `snocOL` LD format dst addr
434 return (Any format code)
435 | not (target32Bit (targetPlatform dflags)) = do
436 Amode addr addr_code <- getAmode DS mem
437 let code dst = addr_code `snocOL` LD II64 dst addr
438 return (Any II64 code)
439
440 where format = cmmTypeFormat pk
441
442 -- catch simple cases of zero- or sign-extended load
443 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
444 Amode addr addr_code <- getAmode D mem
445 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
446
447 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
448 Amode addr addr_code <- getAmode D mem
449 return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
450
451 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
452
453 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
454 Amode addr addr_code <- getAmode D mem
455 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
456
457 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
458 Amode addr addr_code <- getAmode D mem
459 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
460
461 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
462 Amode addr addr_code <- getAmode D mem
463 return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
464
465 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
466 Amode addr addr_code <- getAmode D mem
467 return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
468
469 getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
470 Amode addr addr_code <- getAmode D mem
471 return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
472
473 getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
474 Amode addr addr_code <- getAmode D mem
475 return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
476
477 getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
478 = case mop of
479 MO_Not rep -> triv_ucode_int rep NOT
480
481 MO_F_Neg w -> triv_ucode_float w FNEG
482 MO_S_Neg w -> triv_ucode_int w NEG
483
484 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
485 MO_FF_Conv W32 W64 -> conversionNop FF64 x
486
487 MO_FS_Conv from to -> coerceFP2Int from to x
488 MO_SF_Conv from to -> coerceInt2FP from to x
489
490 MO_SS_Conv from to
491 | from == to -> conversionNop (intFormat to) x
492
493 -- narrowing is a nop: we treat the high bits as undefined
494 MO_SS_Conv W64 to
495 | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit int register"
496 | otherwise -> conversionNop (intFormat to) x
497 MO_SS_Conv W32 to
498 | arch32 -> conversionNop (intFormat to) x
499 | otherwise -> case to of
500 W64 -> triv_ucode_int to (EXTS II32)
501 W16 -> conversionNop II16 x
502 W8 -> conversionNop II8 x
503 _ -> panic "PPC.CodeGen.getRegister: no match"
504 MO_SS_Conv W16 W8 -> conversionNop II8 x
505 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
506 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
507
508 MO_UU_Conv from to
509 | from == to -> conversionNop (intFormat to) x
510 -- narrowing is a nop: we treat the high bits as undefined
511 MO_UU_Conv W64 to
512 | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit target"
513 | otherwise -> conversionNop (intFormat to) x
514 MO_UU_Conv W32 to
515 | arch32 -> conversionNop (intFormat to) x
516 | otherwise ->
517 case to of
518 W64 -> trivialCode to False AND x (CmmLit (CmmInt 4294967295 W64))
519 W16 -> conversionNop II16 x
520 W8 -> conversionNop II8 x
521 _ -> panic "PPC.CodeGen.getRegister: no match"
522 MO_UU_Conv W16 W8 -> conversionNop II8 x
523 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
524 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
525 _ -> panic "PPC.CodeGen.getRegister: no match"
526
527 where
528 triv_ucode_int width instr = trivialUCode (intFormat width) instr x
529 triv_ucode_float width instr = trivialUCode (floatFormat width) instr x
530
531 conversionNop new_format expr
532 = do e_code <- getRegister' dflags expr
533 return (swizzleRegisterRep e_code new_format)
534 arch32 = target32Bit $ targetPlatform dflags
535
536 getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps
537 = case mop of
538 MO_F_Eq _ -> condFltReg EQQ x y
539 MO_F_Ne _ -> condFltReg NE x y
540 MO_F_Gt _ -> condFltReg GTT x y
541 MO_F_Ge _ -> condFltReg GE x y
542 MO_F_Lt _ -> condFltReg LTT x y
543 MO_F_Le _ -> condFltReg LE x y
544
545 MO_Eq rep -> condIntReg EQQ (extendUExpr dflags rep x)
546 (extendUExpr dflags rep y)
547 MO_Ne rep -> condIntReg NE (extendUExpr dflags rep x)
548 (extendUExpr dflags rep y)
549
550 MO_S_Gt rep -> condIntReg GTT (extendSExpr dflags rep x)
551 (extendSExpr dflags rep y)
552 MO_S_Ge rep -> condIntReg GE (extendSExpr dflags rep x)
553 (extendSExpr dflags rep y)
554 MO_S_Lt rep -> condIntReg LTT (extendSExpr dflags rep x)
555 (extendSExpr dflags rep y)
556 MO_S_Le rep -> condIntReg LE (extendSExpr dflags rep x)
557 (extendSExpr dflags rep y)
558
559 MO_U_Gt rep -> condIntReg GU (extendUExpr dflags rep x)
560 (extendUExpr dflags rep y)
561 MO_U_Ge rep -> condIntReg GEU (extendUExpr dflags rep x)
562 (extendUExpr dflags rep y)
563 MO_U_Lt rep -> condIntReg LU (extendUExpr dflags rep x)
564 (extendUExpr dflags rep y)
565 MO_U_Le rep -> condIntReg LEU (extendUExpr dflags rep x)
566 (extendUExpr dflags rep y)
567
568 MO_F_Add w -> triv_float w FADD
569 MO_F_Sub w -> triv_float w FSUB
570 MO_F_Mul w -> triv_float w FMUL
571 MO_F_Quot w -> triv_float w FDIV
572
573 -- optimize addition with 32-bit immediate
574 -- (needed for PIC)
575 MO_Add W32 ->
576 case y of
577 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True imm
578 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
579 CmmLit lit
580 -> do
581 (src, srcCode) <- getSomeReg x
582 let imm = litToImm lit
583 code dst = srcCode `appOL` toOL [
584 ADDIS dst src (HA imm),
585 ADD dst dst (RIImm (LO imm))
586 ]
587 return (Any II32 code)
588 _ -> trivialCode W32 True ADD x y
589
590 MO_Add rep -> trivialCode rep True ADD x y
591 MO_Sub rep ->
592 case y of -- subfi ('substract from' with immediate) doesn't exist
593 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
594 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
595 _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
596
597 MO_Mul rep
598 | arch32 -> trivialCode rep True MULLW x y
599 | otherwise -> trivialCode rep True MULLD x y
600
601 MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
602 MO_S_MulMayOflo W64 -> trivialCodeNoImm' II64 MULLD_MayOflo x y
603
604 MO_S_MulMayOflo _ -> panic "S_MulMayOflo: (II8/16) not implemented"
605 MO_U_MulMayOflo _ -> panic "U_MulMayOflo: not implemented"
606
607 MO_S_Quot rep
608 | arch32 -> trivialCodeNoImm' (intFormat rep) DIVW
609 (extendSExpr dflags rep x) (extendSExpr dflags rep y)
610 | otherwise -> trivialCodeNoImm' (intFormat rep) DIVD
611 (extendSExpr dflags rep x) (extendSExpr dflags rep y)
612 MO_U_Quot rep
613 | arch32 -> trivialCodeNoImm' (intFormat rep) DIVWU
614 (extendUExpr dflags rep x) (extendUExpr dflags rep y)
615 | otherwise -> trivialCodeNoImm' (intFormat rep) DIVDU
616 (extendUExpr dflags rep x) (extendUExpr dflags rep y)
617
618 MO_S_Rem rep
619 | arch32 -> remainderCode rep DIVW (extendSExpr dflags rep x)
620 (extendSExpr dflags rep y)
621 | otherwise -> remainderCode rep DIVD (extendSExpr dflags rep x)
622 (extendSExpr dflags rep y)
623 MO_U_Rem rep
624 | arch32 -> remainderCode rep DIVWU (extendSExpr dflags rep x)
625 (extendSExpr dflags rep y)
626 | otherwise -> remainderCode rep DIVDU (extendSExpr dflags rep x)
627 (extendSExpr dflags rep y)
628
629 MO_And rep -> case y of
630 (CmmLit (CmmInt imm _)) | imm == -8 || imm == -4
631 -> do
632 (src, srcCode) <- getSomeReg x
633 let clear_mask = if imm == -4 then 2 else 3
634 fmt = intFormat rep
635 code dst = srcCode
636 `appOL` unitOL (CLRRI fmt dst src clear_mask)
637 return (Any fmt code)
638 _ -> trivialCode rep False AND x y
639 MO_Or rep -> trivialCode rep False OR x y
640 MO_Xor rep -> trivialCode rep False XOR x y
641
642 MO_Shl rep -> shiftCode rep SL x y
643 MO_S_Shr rep -> shiftCode rep SRA (extendSExpr dflags rep x) y
644 MO_U_Shr rep -> shiftCode rep SR (extendUExpr dflags rep x) y
645 _ -> panic "PPC.CodeGen.getRegister: no match"
646
647 where
648 triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register
649 triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y
650
651 arch32 = target32Bit $ targetPlatform dflags
652
653 getRegister' _ (CmmLit (CmmInt i rep))
654 | Just imm <- makeImmediate rep True i
655 = let
656 code dst = unitOL (LI dst imm)
657 in
658 return (Any (intFormat rep) code)
659
660 getRegister' _ (CmmLit (CmmFloat f frep)) = do
661 lbl <- getNewLabelNat
662 dflags <- getDynFlags
663 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
664 Amode addr addr_code <- getAmode D dynRef
665 let format = floatFormat frep
666 code dst =
667 LDATA (Section ReadOnlyData lbl)
668 (Statics lbl [CmmStaticLit (CmmFloat f frep)])
669 `consOL` (addr_code `snocOL` LD format dst addr)
670 return (Any format code)
671
672 getRegister' dflags (CmmLit lit)
673 | target32Bit (targetPlatform dflags)
674 = let rep = cmmLitType dflags lit
675 imm = litToImm lit
676 code dst = toOL [
677 LIS dst (HA imm),
678 ADD dst dst (RIImm (LO imm))
679 ]
680 in return (Any (cmmTypeFormat rep) code)
681 | otherwise
682 = do lbl <- getNewLabelNat
683 dflags <- getDynFlags
684 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
685 Amode addr addr_code <- getAmode D dynRef
686 let rep = cmmLitType dflags lit
687 format = cmmTypeFormat rep
688 code dst =
689 LDATA (Section ReadOnlyData lbl) (Statics lbl [CmmStaticLit lit])
690 `consOL` (addr_code `snocOL` LD format dst addr)
691 return (Any format code)
692
693 getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other)
694
695 -- extend?Rep: wrap integer expression of type rep
696 -- in a conversion to II32 or II64 resp.
697 extendSExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
698 extendSExpr dflags W32 x
699 | target32Bit (targetPlatform dflags) = x
700
701 extendSExpr dflags W64 x
702 | not (target32Bit (targetPlatform dflags)) = x
703
704 extendSExpr dflags rep x =
705 let size = if target32Bit $ targetPlatform dflags
706 then W32
707 else W64
708 in CmmMachOp (MO_SS_Conv rep size) [x]
709
710 extendUExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
711 extendUExpr dflags W32 x
712 | target32Bit (targetPlatform dflags) = x
713 extendUExpr dflags W64 x
714 | not (target32Bit (targetPlatform dflags)) = x
715 extendUExpr dflags rep x =
716 let size = if target32Bit $ targetPlatform dflags
717 then W32
718 else W64
719 in CmmMachOp (MO_UU_Conv rep size) [x]
720
721 -- -----------------------------------------------------------------------------
722 -- The 'Amode' type: Memory addressing modes passed up the tree.
723
724 data Amode
725 = Amode AddrMode InstrBlock
726
727 {-
728 Now, given a tree (the argument to an CmmLoad) that references memory,
729 produce a suitable addressing mode.
730
731 A Rule of the Game (tm) for Amodes: use of the addr bit must
732 immediately follow use of the code part, since the code part puts
733 values in registers which the addr then refers to. So you can't put
734 anything in between, lest it overwrite some of those registers. If
735 you need to do some other computation between the code part and use of
736 the addr bit, first store the effective address from the amode in a
737 temporary, then do the other computation, and then use the temporary:
738
739 code
740 LEA amode, tmp
741 ... other computation ...
742 ... (tmp) ...
743 -}
744
745 data InstrForm = D | DS
746
747 getAmode :: InstrForm -> CmmExpr -> NatM Amode
748 getAmode inf tree@(CmmRegOff _ _)
749 = do dflags <- getDynFlags
750 getAmode inf (mangleIndexTree dflags tree)
751
752 getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
753 | Just off <- makeImmediate W32 True (-i)
754 = do
755 (reg, code) <- getSomeReg x
756 return (Amode (AddrRegImm reg off) code)
757
758
759 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
760 | Just off <- makeImmediate W32 True i
761 = do
762 (reg, code) <- getSomeReg x
763 return (Amode (AddrRegImm reg off) code)
764
765 getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
766 | Just off <- makeImmediate W64 True (-i)
767 = do
768 (reg, code) <- getSomeReg x
769 return (Amode (AddrRegImm reg off) code)
770
771
772 getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
773 | Just off <- makeImmediate W64 True i
774 = do
775 (reg, code) <- getSomeReg x
776 return (Amode (AddrRegImm reg off) code)
777
778 getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
779 | Just off <- makeImmediate W64 True (-i)
780 = do
781 (reg, code) <- getSomeReg x
782 (reg', off', code') <-
783 if i `mod` 4 == 0
784 then do return (reg, off, code)
785 else do
786 tmp <- getNewRegNat II64
787 return (tmp, ImmInt 0,
788 code `snocOL` ADD tmp reg (RIImm off))
789 return (Amode (AddrRegImm reg' off') code')
790
791 getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
792 | Just off <- makeImmediate W64 True i
793 = do
794 (reg, code) <- getSomeReg x
795 (reg', off', code') <-
796 if i `mod` 4 == 0
797 then do return (reg, off, code)
798 else do
799 tmp <- getNewRegNat II64
800 return (tmp, ImmInt 0,
801 code `snocOL` ADD tmp reg (RIImm off))
802 return (Amode (AddrRegImm reg' off') code')
803
804 -- optimize addition with 32-bit immediate
805 -- (needed for PIC)
806 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit])
807 = do
808 dflags <- getDynFlags
809 (src, srcCode) <- getSomeReg x
810 let imm = litToImm lit
811 case () of
812 _ | OSAIX <- platformOS (targetPlatform dflags)
813 , isCmmLabelType lit ->
814 -- HA16/LO16 relocations on labels not supported on AIX
815 return (Amode (AddrRegImm src imm) srcCode)
816 | otherwise -> do
817 tmp <- getNewRegNat II32
818 let code = srcCode `snocOL` ADDIS tmp src (HA imm)
819 return (Amode (AddrRegImm tmp (LO imm)) code)
820 where
821 isCmmLabelType (CmmLabel {}) = True
822 isCmmLabelType (CmmLabelOff {}) = True
823 isCmmLabelType (CmmLabelDiffOff {}) = True
824 isCmmLabelType _ = False
825
826 getAmode _ (CmmLit lit)
827 = do
828 dflags <- getDynFlags
829 case platformArch $ targetPlatform dflags of
830 ArchPPC -> do
831 tmp <- getNewRegNat II32
832 let imm = litToImm lit
833 code = unitOL (LIS tmp (HA imm))
834 return (Amode (AddrRegImm tmp (LO imm)) code)
835 _ -> do -- TODO: Load from TOC,
836 -- see getRegister' _ (CmmLit lit)
837 tmp <- getNewRegNat II64
838 let imm = litToImm lit
839 code = toOL [
840 LIS tmp (HIGHESTA imm),
841 OR tmp tmp (RIImm (HIGHERA imm)),
842 SL II64 tmp tmp (RIImm (ImmInt 32)),
843 ORIS tmp tmp (HA imm)
844 ]
845 return (Amode (AddrRegImm tmp (LO imm)) code)
846
847 getAmode _ (CmmMachOp (MO_Add W32) [x, y])
848 = do
849 (regX, codeX) <- getSomeReg x
850 (regY, codeY) <- getSomeReg y
851 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
852
853 getAmode _ (CmmMachOp (MO_Add W64) [x, y])
854 = do
855 (regX, codeX) <- getSomeReg x
856 (regY, codeY) <- getSomeReg y
857 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
858
859 getAmode _ other
860 = do
861 (reg, code) <- getSomeReg other
862 let
863 off = ImmInt 0
864 return (Amode (AddrRegImm reg off) code)
865
866
867 -- The 'CondCode' type: Condition codes passed up the tree.
868 data CondCode
869 = CondCode Bool Cond InstrBlock
870
871 -- Set up a condition code for a conditional branch.
872
873 getCondCode :: CmmExpr -> NatM CondCode
874
875 -- almost the same as everywhere else - but we need to
876 -- extend small integers to 32 bit or 64 bit first
877
878 getCondCode (CmmMachOp mop [x, y])
879 = do
880 dflags <- getDynFlags
881 case mop of
882 MO_F_Eq W32 -> condFltCode EQQ x y
883 MO_F_Ne W32 -> condFltCode NE x y
884 MO_F_Gt W32 -> condFltCode GTT x y
885 MO_F_Ge W32 -> condFltCode GE x y
886 MO_F_Lt W32 -> condFltCode LTT x y
887 MO_F_Le W32 -> condFltCode LE x y
888
889 MO_F_Eq W64 -> condFltCode EQQ x y
890 MO_F_Ne W64 -> condFltCode NE x y
891 MO_F_Gt W64 -> condFltCode GTT x y
892 MO_F_Ge W64 -> condFltCode GE x y
893 MO_F_Lt W64 -> condFltCode LTT x y
894 MO_F_Le W64 -> condFltCode LE x y
895
896 MO_Eq rep -> condIntCode EQQ (extendUExpr dflags rep x)
897 (extendUExpr dflags rep y)
898 MO_Ne rep -> condIntCode NE (extendUExpr dflags rep x)
899 (extendUExpr dflags rep y)
900
901 MO_S_Gt rep -> condIntCode GTT (extendSExpr dflags rep x)
902 (extendSExpr dflags rep y)
903 MO_S_Ge rep -> condIntCode GE (extendSExpr dflags rep x)
904 (extendSExpr dflags rep y)
905 MO_S_Lt rep -> condIntCode LTT (extendSExpr dflags rep x)
906 (extendSExpr dflags rep y)
907 MO_S_Le rep -> condIntCode LE (extendSExpr dflags rep x)
908 (extendSExpr dflags rep y)
909
910 MO_U_Gt rep -> condIntCode GU (extendSExpr dflags rep x)
911 (extendSExpr dflags rep y)
912 MO_U_Ge rep -> condIntCode GEU (extendSExpr dflags rep x)
913 (extendSExpr dflags rep y)
914 MO_U_Lt rep -> condIntCode LU (extendSExpr dflags rep x)
915 (extendSExpr dflags rep y)
916 MO_U_Le rep -> condIntCode LEU (extendSExpr dflags rep x)
917 (extendSExpr dflags rep y)
918
919 _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
920
921 getCondCode _ = panic "getCondCode(2)(powerpc)"
922
923
924 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
925 -- passed back up the tree.
926
927 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
928
929 -- optimize pointer tag checks. Operation andi. sets condition register
930 -- so cmpi ..., 0 is redundant.
931 condIntCode cond (CmmMachOp (MO_And _) [x, CmmLit (CmmInt imm rep)])
932 (CmmLit (CmmInt 0 _))
933 | not $ condUnsigned cond,
934 Just src2 <- makeImmediate rep False imm
935 = do
936 (src1, code) <- getSomeReg x
937 let code' = code `snocOL` AND r0 src1 (RIImm src2)
938 return (CondCode False cond code')
939
940 condIntCode cond x (CmmLit (CmmInt y rep))
941 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
942 = do
943 (src1, code) <- getSomeReg x
944 dflags <- getDynFlags
945 let format = archWordFormat $ target32Bit $ targetPlatform dflags
946 code' = code `snocOL`
947 (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2)
948 return (CondCode False cond code')
949
950 condIntCode cond x y = do
951 (src1, code1) <- getSomeReg x
952 (src2, code2) <- getSomeReg y
953 dflags <- getDynFlags
954 let format = archWordFormat $ target32Bit $ targetPlatform dflags
955 code' = code1 `appOL` code2 `snocOL`
956 (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2)
957 return (CondCode False cond code')
958
959 condFltCode cond x y = do
960 (src1, code1) <- getSomeReg x
961 (src2, code2) <- getSomeReg y
962 let
963 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
964 code'' = case cond of -- twiddle CR to handle unordered case
965 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
966 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
967 _ -> code'
968 where
969 ltbit = 0 ; eqbit = 2 ; gtbit = 1
970 return (CondCode True cond code'')
971
972
973
974 -- -----------------------------------------------------------------------------
975 -- Generating assignments
976
977 -- Assignments are really at the heart of the whole code generation
978 -- business. Almost all top-level nodes of any real importance are
979 -- assignments, which correspond to loads, stores, or register
980 -- transfers. If we're really lucky, some of the register transfers
981 -- will go away, because we can use the destination register to
982 -- complete the code generation for the right hand side. This only
983 -- fails when the right hand side is forced into a fixed register
984 -- (e.g. the result of a call).
985
986 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
987 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
988
989 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
990 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
991
992 assignMem_IntCode pk addr src = do
993 (srcReg, code) <- getSomeReg src
994 Amode dstAddr addr_code <- case pk of
995 II64 -> getAmode DS addr
996 _ -> getAmode D addr
997 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
998
999 -- dst is a reg, but src could be anything
1000 assignReg_IntCode _ reg src
1001 = do
1002 dflags <- getDynFlags
1003 let dst = getRegisterReg (targetPlatform dflags) reg
1004 r <- getRegister src
1005 return $ case r of
1006 Any _ code -> code dst
1007 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
1008
1009
1010
1011 -- Easy, isn't it?
1012 assignMem_FltCode = assignMem_IntCode
1013 assignReg_FltCode = assignReg_IntCode
1014
1015
1016
1017 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
1018
1019 genJump (CmmLit (CmmLabel lbl))
1020 = return (unitOL $ JMP lbl)
1021
1022 genJump tree
1023 = do
1024 dflags <- getDynFlags
1025 genJump' tree (platformToGCP (targetPlatform dflags))
1026
1027 genJump' :: CmmExpr -> GenCCallPlatform -> NatM InstrBlock
1028
1029 genJump' tree (GCPLinux64ELF 1)
1030 = do
1031 (target,code) <- getSomeReg tree
1032 return (code
1033 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0))
1034 `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
1035 `snocOL` MTCTR r11
1036 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
1037 `snocOL` BCTR [] Nothing)
1038
1039 genJump' tree (GCPLinux64ELF 2)
1040 = do
1041 (target,code) <- getSomeReg tree
1042 return (code
1043 `snocOL` MR r12 target
1044 `snocOL` MTCTR r12
1045 `snocOL` BCTR [] Nothing)
1046
1047 genJump' tree _
1048 = do
1049 (target,code) <- getSomeReg tree
1050 return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
1051
1052 -- -----------------------------------------------------------------------------
1053 -- Unconditional branches
1054 genBranch :: BlockId -> NatM InstrBlock
1055 genBranch = return . toOL . mkJumpInstr
1056
1057
1058 -- -----------------------------------------------------------------------------
1059 -- Conditional jumps
1060
1061 {-
1062 Conditional jumps are always to local labels, so we can use branch
1063 instructions. We peek at the arguments to decide what kind of
1064 comparison to do.
1065 -}
1066
1067
1068 genCondJump
1069 :: BlockId -- the branch target
1070 -> CmmExpr -- the condition on which to branch
1071 -> NatM InstrBlock
1072
1073 genCondJump id bool = do
1074 CondCode _ cond code <- getCondCode bool
1075 return (code `snocOL` BCC cond id)
1076
1077
1078
1079 -- -----------------------------------------------------------------------------
1080 -- Generating C calls
1081
1082 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1083 -- @get_arg@, which moves the arguments to the correct registers/stack
1084 -- locations. Apart from that, the code is easy.
1085 --
1086 -- (If applicable) Do not fill the delay slots here; you will confuse the
1087 -- register allocator.
1088
1089 genCCall :: ForeignTarget -- function to call
1090 -> [CmmFormal] -- where to put the result
1091 -> [CmmActual] -- arguments (of mixed type)
1092 -> NatM InstrBlock
1093 genCCall target dest_regs argsAndHints
1094 = do dflags <- getDynFlags
1095 genCCall' dflags (platformToGCP (targetPlatform dflags))
1096 target dest_regs argsAndHints
1097
1098 -- TODO: replace 'Int' by an enum such as 'PPC_64ABI'
1099 data GenCCallPlatform = GCPLinux | GCPDarwin | GCPLinux64ELF !Int | GCPAIX
1100 deriving Eq
1101
1102 platformToGCP :: Platform -> GenCCallPlatform
1103 platformToGCP platform = case platformOS platform of
1104 OSLinux -> case platformArch platform of
1105 ArchPPC -> GCPLinux
1106 ArchPPC_64 ELF_V1 -> GCPLinux64ELF 1
1107 ArchPPC_64 ELF_V2 -> GCPLinux64ELF 2
1108 _ -> panic "PPC.CodeGen.platformToGCP: Unknown Linux"
1109 OSAIX -> GCPAIX
1110 OSDarwin -> GCPDarwin
1111 _ -> panic "PPC.CodeGen.platformToGCP: not defined for this OS"
1112
1113
1114 genCCall'
1115 :: DynFlags
1116 -> GenCCallPlatform
1117 -> ForeignTarget -- function to call
1118 -> [CmmFormal] -- where to put the result
1119 -> [CmmActual] -- arguments (of mixed type)
1120 -> NatM InstrBlock
1121
1122 {-
1123 The PowerPC calling convention for Darwin/Mac OS X
1124 is described in Apple's document
1125 "Inside Mac OS X - Mach-O Runtime Architecture".
1126
1127 PowerPC Linux uses the System V Release 4 Calling Convention
1128 for PowerPC. It is described in the
1129 "System V Application Binary Interface PowerPC Processor Supplement".
1130
1131 Both conventions are similar:
1132 Parameters may be passed in general-purpose registers starting at r3, in
1133 floating point registers starting at f1, or on the stack.
1134
1135 But there are substantial differences:
1136 * The number of registers used for parameter passing and the exact set of
1137 nonvolatile registers differs (see MachRegs.hs).
1138 * On Darwin, stack space is always reserved for parameters, even if they are
1139 passed in registers. The called routine may choose to save parameters from
1140 registers to the corresponding space on the stack.
1141 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
1142 parameter is passed in an FPR.
1143 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
1144 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
1145 Darwin just treats an I64 like two separate II32s (high word first).
1146 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
1147 4-byte aligned like everything else on Darwin.
1148 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
1149 PowerPC Linux does not agree, so neither do we.
1150
1151 PowerPC 64 Linux uses the System V Release 4 Calling Convention for
1152 64-bit PowerPC. It is specified in
1153 "64-bit PowerPC ELF Application Binary Interface Supplement 1.9".
1154
1155 According to all conventions, the parameter area should be part of the
1156 caller's stack frame, allocated in the caller's prologue code (large enough
1157 to hold the parameter lists for all called routines). The NCG already
1158 uses the stack for register spilling, leaving 64 bytes free at the top.
1159 If we need a larger parameter area than that, we just allocate a new stack
1160 frame just before ccalling.
1161 -}
1162
1163
1164 genCCall' _ _ (PrimTarget MO_WriteBarrier) _ _
1165 = return $ unitOL LWSYNC
1166
1167 genCCall' _ _ (PrimTarget MO_Touch) _ _
1168 = return $ nilOL
1169
1170 genCCall' _ _ (PrimTarget (MO_Prefetch_Data _)) _ _
1171 = return $ nilOL
1172
1173 genCCall' dflags gcp target dest_regs args
1174 = ASSERT(not $ any (`elem` [II16]) $ map cmmTypeFormat argReps)
1175 -- we rely on argument promotion in the codeGen
1176 do
1177 (finalStack,passArgumentsCode,usedRegs) <- passArguments
1178 (zip args argReps)
1179 allArgRegs
1180 (allFPArgRegs platform)
1181 initialStackOffset
1182 (toOL []) []
1183
1184 (labelOrExpr, reduceToFF32) <- case target of
1185 ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
1186 uses_pic_base_implicitly
1187 return (Left lbl, False)
1188 ForeignTarget expr _ -> do
1189 uses_pic_base_implicitly
1190 return (Right expr, False)
1191 PrimTarget mop -> outOfLineMachOp mop
1192
1193 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
1194 `appOL` toc_before
1195 codeAfter = toc_after labelOrExpr `appOL` move_sp_up finalStack
1196 `appOL` moveResult reduceToFF32
1197
1198 case labelOrExpr of
1199 Left lbl -> do -- the linker does all the work for us
1200 return ( codeBefore
1201 `snocOL` BL lbl usedRegs
1202 `appOL` codeAfter)
1203 Right dyn -> do -- implement call through function pointer
1204 (dynReg, dynCode) <- getSomeReg dyn
1205 case gcp of
1206 GCPLinux64ELF 1 -> return ( dynCode
1207 `appOL` codeBefore
1208 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0))
1209 `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8))
1210 `snocOL` MTCTR r11
1211 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16))
1212 `snocOL` BCTRL usedRegs
1213 `appOL` codeAfter)
1214 GCPLinux64ELF 2 -> return ( dynCode
1215 `appOL` codeBefore
1216 `snocOL` MR r12 dynReg
1217 `snocOL` MTCTR r12
1218 `snocOL` BCTRL usedRegs
1219 `appOL` codeAfter)
1220 GCPAIX -> return ( dynCode
1221 -- AIX/XCOFF follows the PowerOPEN ABI
1222 -- which is quite similiar to LinuxPPC64/ELFv1
1223 `appOL` codeBefore
1224 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 0))
1225 `snocOL` LD II32 toc (AddrRegImm dynReg (ImmInt 4))
1226 `snocOL` MTCTR r11
1227 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 8))
1228 `snocOL` BCTRL usedRegs
1229 `appOL` codeAfter)
1230 _ -> return ( dynCode
1231 `snocOL` MTCTR dynReg
1232 `appOL` codeBefore
1233 `snocOL` BCTRL usedRegs
1234 `appOL` codeAfter)
1235 where
1236 platform = targetPlatform dflags
1237
1238 uses_pic_base_implicitly = do
1239 -- See Note [implicit register in PPC PIC code]
1240 -- on why we claim to use PIC register here
1241 when (gopt Opt_PIC dflags && target32Bit platform) $ do
1242 _ <- getPicBaseNat $ archWordFormat True
1243 return ()
1244
1245 initialStackOffset = case gcp of
1246 GCPAIX -> 24
1247 GCPDarwin -> 24
1248 GCPLinux -> 8
1249 GCPLinux64ELF 1 -> 48
1250 GCPLinux64ELF 2 -> 32
1251 _ -> panic "genCall': unknown calling convention"
1252 -- size of linkage area + size of arguments, in bytes
1253 stackDelta finalStack = case gcp of
1254 GCPAIX ->
1255 roundTo 16 $ (24 +) $ max 32 $ sum $
1256 map (widthInBytes . typeWidth) argReps
1257 GCPDarwin ->
1258 roundTo 16 $ (24 +) $ max 32 $ sum $
1259 map (widthInBytes . typeWidth) argReps
1260 GCPLinux -> roundTo 16 finalStack
1261 GCPLinux64ELF 1 ->
1262 roundTo 16 $ (48 +) $ max 64 $ sum $
1263 map (widthInBytes . typeWidth) argReps
1264 GCPLinux64ELF 2 ->
1265 roundTo 16 $ (32 +) $ max 64 $ sum $
1266 map (widthInBytes . typeWidth) argReps
1267 _ -> panic "genCall': unknown calling conv."
1268
1269 argReps = map (cmmExprType dflags) args
1270
1271 roundTo a x | x `mod` a == 0 = x
1272 | otherwise = x + a - (x `mod` a)
1273
1274 spFormat = if target32Bit platform then II32 else II64
1275
1276 move_sp_down finalStack
1277 | delta > 64 =
1278 toOL [STU spFormat sp (AddrRegImm sp (ImmInt (-delta))),
1279 DELTA (-delta)]
1280 | otherwise = nilOL
1281 where delta = stackDelta finalStack
1282 toc_before = case gcp of
1283 GCPLinux64ELF 1 -> unitOL $ ST spFormat toc (AddrRegImm sp (ImmInt 40))
1284 GCPLinux64ELF 2 -> unitOL $ ST spFormat toc (AddrRegImm sp (ImmInt 24))
1285 GCPAIX -> unitOL $ ST spFormat toc (AddrRegImm sp (ImmInt 20))
1286 _ -> nilOL
1287 toc_after labelOrExpr = case gcp of
1288 GCPLinux64ELF 1 -> case labelOrExpr of
1289 Left _ -> toOL [ NOP ]
1290 Right _ -> toOL [ LD spFormat toc
1291 (AddrRegImm sp
1292 (ImmInt 40))
1293 ]
1294 GCPLinux64ELF 2 -> case labelOrExpr of
1295 Left _ -> toOL [ NOP ]
1296 Right _ -> toOL [ LD spFormat toc
1297 (AddrRegImm sp
1298 (ImmInt 24))
1299 ]
1300 GCPAIX -> case labelOrExpr of
1301 Left _ -> unitOL NOP
1302 Right _ -> unitOL (LD spFormat toc
1303 (AddrRegImm sp
1304 (ImmInt 20)))
1305 _ -> nilOL
1306 move_sp_up finalStack
1307 | delta > 64 = -- TODO: fix-up stack back-chain
1308 toOL [ADD sp sp (RIImm (ImmInt delta)),
1309 DELTA 0]
1310 | otherwise = nilOL
1311 where delta = stackDelta finalStack
1312
1313
1314 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
1315 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
1316 accumCode accumUsed | isWord64 arg_ty
1317 && target32Bit (targetPlatform dflags) =
1318 do
1319 ChildCode64 code vr_lo <- iselExpr64 arg
1320 let vr_hi = getHiVRegFromLo vr_lo
1321
1322 case gcp of
1323 GCPAIX -> -- same as for Darwin
1324 do let storeWord vr (gpr:_) _ = MR gpr vr
1325 storeWord vr [] offset
1326 = ST II32 vr (AddrRegImm sp (ImmInt offset))
1327 passArguments args
1328 (drop 2 gprs)
1329 fprs
1330 (stackOffset+8)
1331 (accumCode `appOL` code
1332 `snocOL` storeWord vr_hi gprs stackOffset
1333 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1334 ((take 2 gprs) ++ accumUsed)
1335 GCPDarwin ->
1336 do let storeWord vr (gpr:_) _ = MR gpr vr
1337 storeWord vr [] offset
1338 = ST II32 vr (AddrRegImm sp (ImmInt offset))
1339 passArguments args
1340 (drop 2 gprs)
1341 fprs
1342 (stackOffset+8)
1343 (accumCode `appOL` code
1344 `snocOL` storeWord vr_hi gprs stackOffset
1345 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1346 ((take 2 gprs) ++ accumUsed)
1347 GCPLinux ->
1348 do let stackOffset' = roundTo 8 stackOffset
1349 stackCode = accumCode `appOL` code
1350 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
1351 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
1352 regCode hireg loreg =
1353 accumCode `appOL` code
1354 `snocOL` MR hireg vr_hi
1355 `snocOL` MR loreg vr_lo
1356
1357 case gprs of
1358 hireg : loreg : regs | even (length gprs) ->
1359 passArguments args regs fprs stackOffset
1360 (regCode hireg loreg) (hireg : loreg : accumUsed)
1361 _skipped : hireg : loreg : regs ->
1362 passArguments args regs fprs stackOffset
1363 (regCode hireg loreg) (hireg : loreg : accumUsed)
1364 _ -> -- only one or no regs left
1365 passArguments args [] fprs (stackOffset'+8)
1366 stackCode accumUsed
1367 GCPLinux64ELF _ -> panic "passArguments: 32 bit code"
1368
1369 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
1370 | reg : _ <- regs = do
1371 register <- getRegister arg
1372 let code = case register of
1373 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1374 Any _ acode -> acode reg
1375 stackOffsetRes = case gcp of
1376 -- The Darwin ABI requires that we reserve
1377 -- stack slots for register parameters
1378 GCPDarwin -> stackOffset + stackBytes
1379 -- ... so does the PowerOpen ABI.
1380 GCPAIX -> stackOffset + stackBytes
1381 -- ... the SysV ABI 32-bit doesn't.
1382 GCPLinux -> stackOffset
1383 -- ... but SysV ABI 64-bit does.
1384 GCPLinux64ELF _ -> stackOffset + stackBytes
1385 passArguments args
1386 (drop nGprs gprs)
1387 (drop nFprs fprs)
1388 stackOffsetRes
1389 (accumCode `appOL` code)
1390 (reg : accumUsed)
1391 | otherwise = do
1392 (vr, code) <- getSomeReg arg
1393 passArguments args
1394 (drop nGprs gprs)
1395 (drop nFprs fprs)
1396 (stackOffset' + stackBytes)
1397 (accumCode `appOL` code `snocOL` ST (cmmTypeFormat rep) vr stackSlot)
1398 accumUsed
1399 where
1400 stackOffset' = case gcp of
1401 GCPDarwin ->
1402 -- stackOffset is at least 4-byte aligned
1403 -- The Darwin ABI is happy with that.
1404 stackOffset
1405 GCPAIX ->
1406 -- The 32bit PowerOPEN ABI is happy with
1407 -- 32bit-alignment as well...
1408 stackOffset
1409 GCPLinux
1410 -- ... the SysV ABI requires 8-byte
1411 -- alignment for doubles.
1412 | isFloatType rep && typeWidth rep == W64 ->
1413 roundTo 8 stackOffset
1414 | otherwise ->
1415 stackOffset
1416 GCPLinux64ELF _ ->
1417 -- everything on the stack is 8-byte
1418 -- aligned on a 64 bit system
1419 -- (except vector status, not used now)
1420 stackOffset
1421 stackSlot = AddrRegImm sp (ImmInt stackOffset')
1422 (nGprs, nFprs, stackBytes, regs)
1423 = case gcp of
1424 GCPAIX ->
1425 case cmmTypeFormat rep of
1426 II8 -> (1, 0, 4, gprs)
1427 II16 -> (1, 0, 4, gprs)
1428 II32 -> (1, 0, 4, gprs)
1429 -- The PowerOpen ABI requires that we skip a
1430 -- corresponding number of GPRs when we use
1431 -- the FPRs.
1432 --
1433 -- E.g. for a `double` two GPRs are skipped,
1434 -- whereas for a `float` one GPR is skipped
1435 -- when parameters are assigned to
1436 -- registers.
1437 --
1438 -- The PowerOpen ABI specification can be found at
1439 -- ftp://www.sourceware.org/pub/binutils/ppc-docs/ppc-poweropen/
1440 FF32 -> (1, 1, 4, fprs)
1441 FF64 -> (2, 1, 8, fprs)
1442 II64 -> panic "genCCall' passArguments II64"
1443 FF80 -> panic "genCCall' passArguments FF80"
1444 GCPDarwin ->
1445 case cmmTypeFormat rep of
1446 II8 -> (1, 0, 4, gprs)
1447 II16 -> (1, 0, 4, gprs)
1448 II32 -> (1, 0, 4, gprs)
1449 -- The Darwin ABI requires that we skip a
1450 -- corresponding number of GPRs when we use
1451 -- the FPRs.
1452 FF32 -> (1, 1, 4, fprs)
1453 FF64 -> (2, 1, 8, fprs)
1454 II64 -> panic "genCCall' passArguments II64"
1455 FF80 -> panic "genCCall' passArguments FF80"
1456 GCPLinux ->
1457 case cmmTypeFormat rep of
1458 II8 -> (1, 0, 4, gprs)
1459 II16 -> (1, 0, 4, gprs)
1460 II32 -> (1, 0, 4, gprs)
1461 -- ... the SysV ABI doesn't.
1462 FF32 -> (0, 1, 4, fprs)
1463 FF64 -> (0, 1, 8, fprs)
1464 II64 -> panic "genCCall' passArguments II64"
1465 FF80 -> panic "genCCall' passArguments FF80"
1466 GCPLinux64ELF _ ->
1467 case cmmTypeFormat rep of
1468 II8 -> (1, 0, 8, gprs)
1469 II16 -> (1, 0, 8, gprs)
1470 II32 -> (1, 0, 8, gprs)
1471 II64 -> (1, 0, 8, gprs)
1472 -- The ELFv1 ABI requires that we skip a
1473 -- corresponding number of GPRs when we use
1474 -- the FPRs.
1475 FF32 -> (1, 1, 8, fprs)
1476 FF64 -> (1, 1, 8, fprs)
1477 FF80 -> panic "genCCall' passArguments FF80"
1478
1479 moveResult reduceToFF32 =
1480 case dest_regs of
1481 [] -> nilOL
1482 [dest]
1483 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
1484 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1485 | isWord64 rep && target32Bit (targetPlatform dflags)
1486 -> toOL [MR (getHiVRegFromLo r_dest) r3,
1487 MR r_dest r4]
1488 | otherwise -> unitOL (MR r_dest r3)
1489 where rep = cmmRegType dflags (CmmLocal dest)
1490 r_dest = getRegisterReg platform (CmmLocal dest)
1491 _ -> panic "genCCall' moveResult: Bad dest_regs"
1492
1493 outOfLineMachOp mop =
1494 do
1495 dflags <- getDynFlags
1496 mopExpr <- cmmMakeDynamicReference dflags CallReference $
1497 mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
1498 let mopLabelOrExpr = case mopExpr of
1499 CmmLit (CmmLabel lbl) -> Left lbl
1500 _ -> Right mopExpr
1501 return (mopLabelOrExpr, reduce)
1502 where
1503 (functionName, reduce) = case mop of
1504 MO_F32_Exp -> (fsLit "exp", True)
1505 MO_F32_Log -> (fsLit "log", True)
1506 MO_F32_Sqrt -> (fsLit "sqrt", True)
1507
1508 MO_F32_Sin -> (fsLit "sin", True)
1509 MO_F32_Cos -> (fsLit "cos", True)
1510 MO_F32_Tan -> (fsLit "tan", True)
1511
1512 MO_F32_Asin -> (fsLit "asin", True)
1513 MO_F32_Acos -> (fsLit "acos", True)
1514 MO_F32_Atan -> (fsLit "atan", True)
1515
1516 MO_F32_Sinh -> (fsLit "sinh", True)
1517 MO_F32_Cosh -> (fsLit "cosh", True)
1518 MO_F32_Tanh -> (fsLit "tanh", True)
1519 MO_F32_Pwr -> (fsLit "pow", True)
1520
1521 MO_F64_Exp -> (fsLit "exp", False)
1522 MO_F64_Log -> (fsLit "log", False)
1523 MO_F64_Sqrt -> (fsLit "sqrt", False)
1524
1525 MO_F64_Sin -> (fsLit "sin", False)
1526 MO_F64_Cos -> (fsLit "cos", False)
1527 MO_F64_Tan -> (fsLit "tan", False)
1528
1529 MO_F64_Asin -> (fsLit "asin", False)
1530 MO_F64_Acos -> (fsLit "acos", False)
1531 MO_F64_Atan -> (fsLit "atan", False)
1532
1533 MO_F64_Sinh -> (fsLit "sinh", False)
1534 MO_F64_Cosh -> (fsLit "cosh", False)
1535 MO_F64_Tanh -> (fsLit "tanh", False)
1536 MO_F64_Pwr -> (fsLit "pow", False)
1537
1538 MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False)
1539
1540 MO_Memcpy _ -> (fsLit "memcpy", False)
1541 MO_Memset _ -> (fsLit "memset", False)
1542 MO_Memmove _ -> (fsLit "memmove", False)
1543
1544 MO_BSwap w -> (fsLit $ bSwapLabel w, False)
1545 MO_PopCnt w -> (fsLit $ popCntLabel w, False)
1546 MO_Clz w -> (fsLit $ clzLabel w, False)
1547 MO_Ctz w -> (fsLit $ ctzLabel w, False)
1548 MO_AtomicRMW w amop -> (fsLit $ atomicRMWLabel w amop, False)
1549 MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
1550 MO_AtomicRead w -> (fsLit $ atomicReadLabel w, False)
1551 MO_AtomicWrite w -> (fsLit $ atomicWriteLabel w, False)
1552
1553 MO_S_QuotRem {} -> unsupported
1554 MO_U_QuotRem {} -> unsupported
1555 MO_U_QuotRem2 {} -> unsupported
1556 MO_Add2 {} -> unsupported
1557 MO_SubWordC {} -> unsupported
1558 MO_AddIntC {} -> unsupported
1559 MO_SubIntC {} -> unsupported
1560 MO_U_Mul2 {} -> unsupported
1561 MO_WriteBarrier -> unsupported
1562 MO_Touch -> unsupported
1563 (MO_Prefetch_Data _ ) -> unsupported
1564 unsupported = panic ("outOfLineCmmOp: " ++ show mop
1565 ++ " not supported")
1566
1567 -- -----------------------------------------------------------------------------
1568 -- Generating a table-branch
1569
1570 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
1571 genSwitch dflags expr targets
1572 | OSAIX <- platformOS (targetPlatform dflags)
1573 = do
1574 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
1575 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
1576 sha = if target32Bit $ targetPlatform dflags then 2 else 3
1577 tmp <- getNewRegNat fmt
1578 lbl <- getNewLabelNat
1579 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
1580 (tableReg,t_code) <- getSomeReg $ dynRef
1581 let code = e_code `appOL` t_code `appOL` toOL [
1582 SL fmt tmp reg (RIImm (ImmInt sha)),
1583 LD fmt tmp (AddrRegReg tableReg tmp),
1584 MTCTR tmp,
1585 BCTR ids (Just lbl)
1586 ]
1587 return code
1588
1589 | (gopt Opt_PIC dflags) || (not $ target32Bit $ targetPlatform dflags)
1590 = do
1591 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
1592 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
1593 sha = if target32Bit $ targetPlatform dflags then 2 else 3
1594 tmp <- getNewRegNat fmt
1595 lbl <- getNewLabelNat
1596 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
1597 (tableReg,t_code) <- getSomeReg $ dynRef
1598 let code = e_code `appOL` t_code `appOL` toOL [
1599 SL fmt tmp reg (RIImm (ImmInt sha)),
1600 LD fmt tmp (AddrRegReg tableReg tmp),
1601 ADD tmp tmp (RIReg tableReg),
1602 MTCTR tmp,
1603 BCTR ids (Just lbl)
1604 ]
1605 return code
1606 | otherwise
1607 = do
1608 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
1609 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
1610 sha = if target32Bit $ targetPlatform dflags then 2 else 3
1611 tmp <- getNewRegNat fmt
1612 lbl <- getNewLabelNat
1613 let code = e_code `appOL` toOL [
1614 SL fmt tmp reg (RIImm (ImmInt sha)),
1615 ADDIS tmp tmp (HA (ImmCLbl lbl)),
1616 LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
1617 MTCTR tmp,
1618 BCTR ids (Just lbl)
1619 ]
1620 return code
1621 where (offset, ids) = switchTargetsToTable targets
1622
1623 generateJumpTableForInstr :: DynFlags -> Instr
1624 -> Maybe (NatCmmDecl CmmStatics Instr)
1625 generateJumpTableForInstr dflags (BCTR ids (Just lbl)) =
1626 let jumpTable
1627 | (gopt Opt_PIC dflags)
1628 || (not $ target32Bit $ targetPlatform dflags)
1629 = map jumpTableEntryRel ids
1630 | otherwise = map (jumpTableEntry dflags) ids
1631 where jumpTableEntryRel Nothing
1632 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
1633 jumpTableEntryRel (Just blockid)
1634 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1635 where blockLabel = mkAsmTempLabel (getUnique blockid)
1636 in Just (CmmData (Section ReadOnlyData lbl) (Statics lbl jumpTable))
1637 generateJumpTableForInstr _ _ = Nothing
1638
1639 -- -----------------------------------------------------------------------------
1640 -- 'condIntReg' and 'condFltReg': condition codes into registers
1641
1642 -- Turn those condition codes into integers now (when they appear on
1643 -- the right hand side of an assignment).
1644
1645 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
1646
1647 condReg :: NatM CondCode -> NatM Register
1648 condReg getCond = do
1649 CondCode _ cond cond_code <- getCond
1650 dflags <- getDynFlags
1651 let
1652 code dst = cond_code
1653 `appOL` negate_code
1654 `appOL` toOL [
1655 MFCR dst,
1656 RLWINM dst dst (bit + 1) 31 31
1657 ]
1658
1659 negate_code | do_negate = unitOL (CRNOR bit bit bit)
1660 | otherwise = nilOL
1661
1662 (bit, do_negate) = case cond of
1663 LTT -> (0, False)
1664 LE -> (1, True)
1665 EQQ -> (2, False)
1666 GE -> (0, True)
1667 GTT -> (1, False)
1668
1669 NE -> (2, True)
1670
1671 LU -> (0, False)
1672 LEU -> (1, True)
1673 GEU -> (0, True)
1674 GU -> (1, False)
1675 _ -> panic "PPC.CodeGen.codeReg: no match"
1676
1677 format = archWordFormat $ target32Bit $ targetPlatform dflags
1678 return (Any format code)
1679
1680 condIntReg cond x y = condReg (condIntCode cond x y)
1681 condFltReg cond x y = condReg (condFltCode cond x y)
1682
1683
1684
1685 -- -----------------------------------------------------------------------------
1686 -- 'trivial*Code': deal with trivial instructions
1687
1688 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
1689 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
1690 -- Only look for constants on the right hand side, because that's
1691 -- where the generic optimizer will have put them.
1692
1693 -- Similarly, for unary instructions, we don't have to worry about
1694 -- matching an StInt as the argument, because genericOpt will already
1695 -- have handled the constant-folding.
1696
1697
1698
1699 {-
1700 Wolfgang's PowerPC version of The Rules:
1701
1702 A slightly modified version of The Rules to take advantage of the fact
1703 that PowerPC instructions work on all registers and don't implicitly
1704 clobber any fixed registers.
1705
1706 * The only expression for which getRegister returns Fixed is (CmmReg reg).
1707
1708 * If getRegister returns Any, then the code it generates may modify only:
1709 (a) fresh temporaries
1710 (b) the destination register
1711 It may *not* modify global registers, unless the global
1712 register happens to be the destination register.
1713 It may not clobber any other registers. In fact, only ccalls clobber any
1714 fixed registers.
1715 Also, it may not modify the counter register (used by genCCall).
1716
1717 Corollary: If a getRegister for a subexpression returns Fixed, you need
1718 not move it to a fresh temporary before evaluating the next subexpression.
1719 The Fixed register won't be modified.
1720 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
1721
1722 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
1723 the value of the destination register.
1724 -}
1725
1726 trivialCode
1727 :: Width
1728 -> Bool
1729 -> (Reg -> Reg -> RI -> Instr)
1730 -> CmmExpr
1731 -> CmmExpr
1732 -> NatM Register
1733
1734 trivialCode rep signed instr x (CmmLit (CmmInt y _))
1735 | Just imm <- makeImmediate rep signed y
1736 = do
1737 (src1, code1) <- getSomeReg x
1738 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
1739 return (Any (intFormat rep) code)
1740
1741 trivialCode rep _ instr x y = do
1742 (src1, code1) <- getSomeReg x
1743 (src2, code2) <- getSomeReg y
1744 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
1745 return (Any (intFormat rep) code)
1746
1747 shiftCode
1748 :: Width
1749 -> (Format-> Reg -> Reg -> RI -> Instr)
1750 -> CmmExpr
1751 -> CmmExpr
1752 -> NatM Register
1753 shiftCode width instr x (CmmLit (CmmInt y _))
1754 | Just imm <- makeImmediate width False y
1755 = do
1756 (src1, code1) <- getSomeReg x
1757 let format = intFormat width
1758 let code dst = code1 `snocOL` instr format dst src1 (RIImm imm)
1759 return (Any format code)
1760
1761 shiftCode width instr x y = do
1762 (src1, code1) <- getSomeReg x
1763 (src2, code2) <- getSomeReg y
1764 let format = intFormat width
1765 let code dst = code1 `appOL` code2 `snocOL` instr format dst src1 (RIReg src2)
1766 return (Any format code)
1767
1768 trivialCodeNoImm' :: Format -> (Reg -> Reg -> Reg -> Instr)
1769 -> CmmExpr -> CmmExpr -> NatM Register
1770 trivialCodeNoImm' format instr x y = do
1771 (src1, code1) <- getSomeReg x
1772 (src2, code2) <- getSomeReg y
1773 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
1774 return (Any format code)
1775
1776 trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr)
1777 -> CmmExpr -> CmmExpr -> NatM Register
1778 trivialCodeNoImm format instr x y = trivialCodeNoImm' format (instr format) x y
1779
1780
1781 trivialUCode
1782 :: Format
1783 -> (Reg -> Reg -> Instr)
1784 -> CmmExpr
1785 -> NatM Register
1786 trivialUCode rep instr x = do
1787 (src, code) <- getSomeReg x
1788 let code' dst = code `snocOL` instr dst src
1789 return (Any rep code')
1790
1791 -- There is no "remainder" instruction on the PPC, so we have to do
1792 -- it the hard way.
1793 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
1794
1795 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
1796 -> CmmExpr -> CmmExpr -> NatM Register
1797 remainderCode rep div x y = do
1798 dflags <- getDynFlags
1799 let mull_instr = if target32Bit $ targetPlatform dflags then MULLW
1800 else MULLD
1801 (src1, code1) <- getSomeReg x
1802 (src2, code2) <- getSomeReg y
1803 let code dst = code1 `appOL` code2 `appOL` toOL [
1804 div dst src1 src2,
1805 mull_instr dst dst (RIReg src2),
1806 SUBF dst dst src1
1807 ]
1808 return (Any (intFormat rep) code)
1809
1810 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
1811 coerceInt2FP fromRep toRep x = do
1812 dflags <- getDynFlags
1813 let arch = platformArch $ targetPlatform dflags
1814 coerceInt2FP' arch fromRep toRep x
1815
1816 coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
1817 coerceInt2FP' ArchPPC fromRep toRep x = do
1818 (src, code) <- getSomeReg x
1819 lbl <- getNewLabelNat
1820 itmp <- getNewRegNat II32
1821 ftmp <- getNewRegNat FF64
1822 dflags <- getDynFlags
1823 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
1824 Amode addr addr_code <- getAmode D dynRef
1825 let
1826 code' dst = code `appOL` maybe_exts `appOL` toOL [
1827 LDATA (Section ReadOnlyData lbl) $ Statics lbl
1828 [CmmStaticLit (CmmInt 0x43300000 W32),
1829 CmmStaticLit (CmmInt 0x80000000 W32)],
1830 XORIS itmp src (ImmInt 0x8000),
1831 ST II32 itmp (spRel dflags 3),
1832 LIS itmp (ImmInt 0x4330),
1833 ST II32 itmp (spRel dflags 2),
1834 LD FF64 ftmp (spRel dflags 2)
1835 ] `appOL` addr_code `appOL` toOL [
1836 LD FF64 dst addr,
1837 FSUB FF64 dst ftmp dst
1838 ] `appOL` maybe_frsp dst
1839
1840 maybe_exts = case fromRep of
1841 W8 -> unitOL $ EXTS II8 src src
1842 W16 -> unitOL $ EXTS II16 src src
1843 W32 -> nilOL
1844 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1845
1846 maybe_frsp dst
1847 = case toRep of
1848 W32 -> unitOL $ FRSP dst dst
1849 W64 -> nilOL
1850 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1851
1852 return (Any (floatFormat toRep) code')
1853
1854 -- On an ELF v1 Linux we use the compiler doubleword in the stack frame
1855 -- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only
1856 -- set right before a call and restored right after return from the call.
1857 -- So it is fine.
1858 coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do
1859 (src, code) <- getSomeReg x
1860 dflags <- getDynFlags
1861 let
1862 code' dst = code `appOL` maybe_exts `appOL` toOL [
1863 ST II64 src (spRel dflags 3),
1864 LD FF64 dst (spRel dflags 3),
1865 FCFID dst dst
1866 ] `appOL` maybe_frsp dst
1867
1868 maybe_exts = case fromRep of
1869 W8 -> unitOL $ EXTS II8 src src
1870 W16 -> unitOL $ EXTS II16 src src
1871 W32 -> unitOL $ EXTS II32 src src
1872 W64 -> nilOL
1873 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1874
1875 maybe_frsp dst
1876 = case toRep of
1877 W32 -> unitOL $ FRSP dst dst
1878 W64 -> nilOL
1879 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1880
1881 return (Any (floatFormat toRep) code')
1882
1883 coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch"
1884
1885
1886 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
1887 coerceFP2Int fromRep toRep x = do
1888 dflags <- getDynFlags
1889 let arch = platformArch $ targetPlatform dflags
1890 coerceFP2Int' arch fromRep toRep x
1891
1892 coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
1893 coerceFP2Int' ArchPPC _ toRep x = do
1894 dflags <- getDynFlags
1895 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
1896 (src, code) <- getSomeReg x
1897 tmp <- getNewRegNat FF64
1898 let
1899 code' dst = code `appOL` toOL [
1900 -- convert to int in FP reg
1901 FCTIWZ tmp src,
1902 -- store value (64bit) from FP to stack
1903 ST FF64 tmp (spRel dflags 2),
1904 -- read low word of value (high word is undefined)
1905 LD II32 dst (spRel dflags 3)]
1906 return (Any (intFormat toRep) code')
1907
1908 coerceFP2Int' (ArchPPC_64 _) _ toRep x = do
1909 dflags <- getDynFlags
1910 -- the reps don't really matter: F*->FF64 and II64->I* are no-ops
1911 (src, code) <- getSomeReg x
1912 tmp <- getNewRegNat FF64
1913 let
1914 code' dst = code `appOL` toOL [
1915 -- convert to int in FP reg
1916 FCTIDZ tmp src,
1917 -- store value (64bit) from FP to compiler word on stack
1918 ST FF64 tmp (spRel dflags 3),
1919 LD II64 dst (spRel dflags 3)]
1920 return (Any (intFormat toRep) code')
1921
1922 coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch"
1923
1924 -- Note [.LCTOC1 in PPC PIC code]
1925 -- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
1926 -- to make the most of the PPC's 16-bit displacements.
1927 -- As 16-bit signed offset is used (usually via addi/lwz instructions)
1928 -- first element will have '-32768' offset against .LCTOC1.
1929
1930 -- Note [implicit register in PPC PIC code]
1931 -- PPC generates calls by labels in assembly
1932 -- in form of:
1933 -- bl puts+32768@plt
1934 -- in this form it's not seen directly (by GHC NCG)
1935 -- that r30 (PicBaseReg) is used,
1936 -- but r30 is a required part of PLT code setup:
1937 -- puts+32768@plt:
1938 -- lwz r11,-30484(r30) ; offset in .LCTOC1
1939 -- mtctr r11
1940 -- bctr