PPC NCG: Fix float parameter passing on 64-bit.
[ghc.git] / compiler / nativeGen / PPC / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2004
8 --
9 -----------------------------------------------------------------------------
10
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, (b) the type signatures, and
13 -- (c) the #if blah_TARGET_ARCH} things, the
14 -- structure should not be too overwhelming.
15
16 module PPC.CodeGen (
17 cmmTopCodeGen,
18 generateJumpTableForInstr,
19 InstrBlock
20 )
21
22 where
23
24 #include "HsVersions.h"
25 #include "nativeGen/NCG.h"
26 #include "../includes/MachDeps.h"
27
28 -- NCG stuff:
29 import CodeGen.Platform
30 import PPC.Instr
31 import PPC.Cond
32 import PPC.Regs
33 import CPrim
34 import NCGMonad
35 import Instruction
36 import PIC
37 import Format
38 import RegClass
39 import Reg
40 import TargetReg
41 import Platform
42
43 -- Our intermediate code:
44 import BlockId
45 import PprCmm ( pprExpr )
46 import Cmm
47 import CmmUtils
48 import CmmSwitch
49 import CLabel
50 import Hoopl
51
52 -- The rest:
53 import OrdList
54 import Outputable
55 import Unique
56 import DynFlags
57
58 import Control.Monad ( mapAndUnzipM, when )
59 import Data.Bits
60 import Data.Word
61
62 import BasicTypes
63 import FastString
64 import Util
65
66 -- -----------------------------------------------------------------------------
67 -- Top-level of the instruction selector
68
69 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
70 -- They are really trees of insns to facilitate fast appending, where a
71 -- left-to-right traversal (pre-order?) yields the insns in the correct
72 -- order.
73
74 cmmTopCodeGen
75 :: RawCmmDecl
76 -> NatM [NatCmmDecl CmmStatics Instr]
77
78 cmmTopCodeGen (CmmProc info lab live graph) = do
79 let blocks = toBlockListEntryFirst graph
80 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
81 dflags <- getDynFlags
82 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
83 tops = proc : concat statics
84 os = platformOS $ targetPlatform dflags
85 arch = platformArch $ targetPlatform dflags
86 case arch of
87 ArchPPC | os == OSAIX -> return tops
88 | otherwise -> do
89 picBaseMb <- getPicBaseMaybeNat
90 case picBaseMb of
91 Just picBase -> initializePicBase_ppc arch os picBase tops
92 Nothing -> return tops
93 ArchPPC_64 ELF_V1 -> return tops
94 -- generating function descriptor is handled in
95 -- pretty printer
96 ArchPPC_64 ELF_V2 -> return tops
97 -- generating function prologue is handled in
98 -- pretty printer
99 _ -> panic "PPC.cmmTopCodeGen: unknown arch"
100
101 cmmTopCodeGen (CmmData sec dat) = do
102 return [CmmData sec dat] -- no translation, we just use CmmStatic
103
104 basicBlockCodeGen
105 :: Block CmmNode C C
106 -> NatM ( [NatBasicBlock Instr]
107 , [NatCmmDecl CmmStatics Instr])
108
109 basicBlockCodeGen block = do
110 let (_, nodes, tail) = blockSplit block
111 id = entryLabel block
112 stmts = blockToList nodes
113 mid_instrs <- stmtsToInstrs stmts
114 tail_instrs <- stmtToInstrs tail
115 let instrs = mid_instrs `appOL` tail_instrs
116 -- code generation may introduce new basic block boundaries, which
117 -- are indicated by the NEWBLOCK instruction. We must split up the
118 -- instruction stream into basic blocks again. Also, we extract
119 -- LDATAs here too.
120 let
121 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
122
123 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
124 = ([], BasicBlock id instrs : blocks, statics)
125 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
126 = (instrs, blocks, CmmData sec dat:statics)
127 mkBlocks instr (instrs,blocks,statics)
128 = (instr:instrs, blocks, statics)
129 return (BasicBlock id top : other_blocks, statics)
130
131 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
132 stmtsToInstrs stmts
133 = do instrss <- mapM stmtToInstrs stmts
134 return (concatOL instrss)
135
136 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
137 stmtToInstrs stmt = do
138 dflags <- getDynFlags
139 case stmt of
140 CmmComment s -> return (unitOL (COMMENT s))
141 CmmTick {} -> return nilOL
142 CmmUnwind {} -> return nilOL
143
144 CmmAssign reg src
145 | isFloatType ty -> assignReg_FltCode format reg src
146 | target32Bit (targetPlatform dflags) &&
147 isWord64 ty -> assignReg_I64Code reg src
148 | otherwise -> assignReg_IntCode format reg src
149 where ty = cmmRegType dflags reg
150 format = cmmTypeFormat ty
151
152 CmmStore addr src
153 | isFloatType ty -> assignMem_FltCode format addr src
154 | target32Bit (targetPlatform dflags) &&
155 isWord64 ty -> assignMem_I64Code addr src
156 | otherwise -> assignMem_IntCode format addr src
157 where ty = cmmExprType dflags src
158 format = cmmTypeFormat ty
159
160 CmmUnsafeForeignCall target result_regs args
161 -> genCCall target result_regs args
162
163 CmmBranch id -> genBranch id
164 CmmCondBranch arg true false _ -> do
165 b1 <- genCondJump true arg
166 b2 <- genBranch false
167 return (b1 `appOL` b2)
168 CmmSwitch arg ids -> do dflags <- getDynFlags
169 genSwitch dflags arg ids
170 CmmCall { cml_target = arg } -> genJump arg
171 _ ->
172 panic "stmtToInstrs: statement should have been cps'd away"
173
174
175 --------------------------------------------------------------------------------
176 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
177 -- They are really trees of insns to facilitate fast appending, where a
178 -- left-to-right traversal yields the insns in the correct order.
179 --
180 type InstrBlock
181 = OrdList Instr
182
183
184 -- | Register's passed up the tree. If the stix code forces the register
185 -- to live in a pre-decided machine register, it comes out as @Fixed@;
186 -- otherwise, it comes out as @Any@, and the parent can decide which
187 -- register to put it in.
188 --
189 data Register
190 = Fixed Format Reg InstrBlock
191 | Any Format (Reg -> InstrBlock)
192
193
194 swizzleRegisterRep :: Register -> Format -> Register
195 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
196 swizzleRegisterRep (Any _ codefn) format = Any format codefn
197
198
199 -- | Grab the Reg for a CmmReg
200 getRegisterReg :: Platform -> CmmReg -> Reg
201
202 getRegisterReg _ (CmmLocal (LocalReg u pk))
203 = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
204
205 getRegisterReg platform (CmmGlobal mid)
206 = case globalRegMaybe platform mid of
207 Just reg -> RegReal reg
208 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
209 -- By this stage, the only MagicIds remaining should be the
210 -- ones which map to a real machine register on this
211 -- platform. Hence ...
212
213 -- | Convert a BlockId to some CmmStatic data
214 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
215 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
216 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
217 where blockLabel = mkAsmTempLabel (getUnique blockid)
218
219
220
221 -- -----------------------------------------------------------------------------
222 -- General things for putting together code sequences
223
224 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
225 -- CmmExprs into CmmRegOff?
226 mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr
227 mangleIndexTree dflags (CmmRegOff reg off)
228 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
229 where width = typeWidth (cmmRegType dflags reg)
230
231 mangleIndexTree _ _
232 = panic "PPC.CodeGen.mangleIndexTree: no match"
233
234 -- -----------------------------------------------------------------------------
235 -- Code gen for 64-bit arithmetic on 32-bit platforms
236
237 {-
238 Simple support for generating 64-bit code (ie, 64 bit values and 64
239 bit assignments) on 32-bit platforms. Unlike the main code generator
240 we merely shoot for generating working code as simply as possible, and
241 pay little attention to code quality. Specifically, there is no
242 attempt to deal cleverly with the fixed-vs-floating register
243 distinction; all values are generated into (pairs of) floating
244 registers, even if this would mean some redundant reg-reg moves as a
245 result. Only one of the VRegUniques is returned, since it will be
246 of the VRegUniqueLo form, and the upper-half VReg can be determined
247 by applying getHiVRegFromLo to it.
248 -}
249
250 data ChildCode64 -- a.k.a "Register64"
251 = ChildCode64
252 InstrBlock -- code
253 Reg -- the lower 32-bit temporary which contains the
254 -- result; use getHiVRegFromLo to find the other
255 -- VRegUnique. Rules of this simplified insn
256 -- selection game are therefore that the returned
257 -- Reg may be modified
258
259
260 -- | Compute an expression into a register, but
261 -- we don't mind which one it is.
262 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
263 getSomeReg expr = do
264 r <- getRegister expr
265 case r of
266 Any rep code -> do
267 tmp <- getNewRegNat rep
268 return (tmp, code tmp)
269 Fixed _ reg code ->
270 return (reg, code)
271
272 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
273 getI64Amodes addrTree = do
274 Amode hi_addr addr_code <- getAmode D addrTree
275 case addrOffset hi_addr 4 of
276 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
277 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
278 return (AddrRegImm hi_ptr (ImmInt 0),
279 AddrRegImm hi_ptr (ImmInt 4),
280 code)
281
282
283 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
284 assignMem_I64Code addrTree valueTree = do
285 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
286 ChildCode64 vcode rlo <- iselExpr64 valueTree
287 let
288 rhi = getHiVRegFromLo rlo
289
290 -- Big-endian store
291 mov_hi = ST II32 rhi hi_addr
292 mov_lo = ST II32 rlo lo_addr
293 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
294
295
296 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
297 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
298 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
299 let
300 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
301 r_dst_hi = getHiVRegFromLo r_dst_lo
302 r_src_hi = getHiVRegFromLo r_src_lo
303 mov_lo = MR r_dst_lo r_src_lo
304 mov_hi = MR r_dst_hi r_src_hi
305 return (
306 vcode `snocOL` mov_lo `snocOL` mov_hi
307 )
308
309 assignReg_I64Code _ _
310 = panic "assignReg_I64Code(powerpc): invalid lvalue"
311
312
313 iselExpr64 :: CmmExpr -> NatM ChildCode64
314 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
315 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
316 (rlo, rhi) <- getNewRegPairNat II32
317 let mov_hi = LD II32 rhi hi_addr
318 mov_lo = LD II32 rlo lo_addr
319 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
320 rlo
321
322 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
323 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
324
325 iselExpr64 (CmmLit (CmmInt i _)) = do
326 (rlo,rhi) <- getNewRegPairNat II32
327 let
328 half0 = fromIntegral (fromIntegral i :: Word16)
329 half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
330 half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16)
331 half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16)
332
333 code = toOL [
334 LIS rlo (ImmInt half1),
335 OR rlo rlo (RIImm $ ImmInt half0),
336 LIS rhi (ImmInt half3),
337 OR rhi rhi (RIImm $ ImmInt half2)
338 ]
339 return (ChildCode64 code rlo)
340
341 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
342 ChildCode64 code1 r1lo <- iselExpr64 e1
343 ChildCode64 code2 r2lo <- iselExpr64 e2
344 (rlo,rhi) <- getNewRegPairNat II32
345 let
346 r1hi = getHiVRegFromLo r1lo
347 r2hi = getHiVRegFromLo r2lo
348 code = code1 `appOL`
349 code2 `appOL`
350 toOL [ ADDC rlo r1lo r2lo,
351 ADDE rhi r1hi r2hi ]
352 return (ChildCode64 code rlo)
353
354 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
355 ChildCode64 code1 r1lo <- iselExpr64 e1
356 ChildCode64 code2 r2lo <- iselExpr64 e2
357 (rlo,rhi) <- getNewRegPairNat II32
358 let
359 r1hi = getHiVRegFromLo r1lo
360 r2hi = getHiVRegFromLo r2lo
361 code = code1 `appOL`
362 code2 `appOL`
363 toOL [ SUBFC rlo r2lo r1lo,
364 SUBFE rhi r2hi r1hi ]
365 return (ChildCode64 code rlo)
366
367 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
368 (expr_reg,expr_code) <- getSomeReg expr
369 (rlo, rhi) <- getNewRegPairNat II32
370 let mov_hi = LI rhi (ImmInt 0)
371 mov_lo = MR rlo expr_reg
372 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
373 rlo
374 iselExpr64 expr
375 = pprPanic "iselExpr64(powerpc)" (pprExpr expr)
376
377
378
379 getRegister :: CmmExpr -> NatM Register
380 getRegister e = do dflags <- getDynFlags
381 getRegister' dflags e
382
383 getRegister' :: DynFlags -> CmmExpr -> NatM Register
384
385 getRegister' dflags (CmmReg (CmmGlobal PicBaseReg))
386 | OSAIX <- platformOS (targetPlatform dflags) = do
387 let code dst = toOL [ LD II32 dst tocAddr ]
388 tocAddr = AddrRegImm toc (ImmLit (text "ghc_toc_table[TC]"))
389 return (Any II32 code)
390 | target32Bit (targetPlatform dflags) = do
391 reg <- getPicBaseNat $ archWordFormat (target32Bit (targetPlatform dflags))
392 return (Fixed (archWordFormat (target32Bit (targetPlatform dflags)))
393 reg nilOL)
394 | otherwise = return (Fixed II64 toc nilOL)
395
396 getRegister' dflags (CmmReg reg)
397 = return (Fixed (cmmTypeFormat (cmmRegType dflags reg))
398 (getRegisterReg (targetPlatform dflags) reg) nilOL)
399
400 getRegister' dflags tree@(CmmRegOff _ _)
401 = getRegister' dflags (mangleIndexTree dflags tree)
402
403 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
404 -- TO_W_(x), TO_W_(x >> 32)
405
406 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32)
407 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
408 | target32Bit (targetPlatform dflags) = do
409 ChildCode64 code rlo <- iselExpr64 x
410 return $ Fixed II32 (getHiVRegFromLo rlo) code
411
412 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32)
413 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
414 | target32Bit (targetPlatform dflags) = do
415 ChildCode64 code rlo <- iselExpr64 x
416 return $ Fixed II32 (getHiVRegFromLo rlo) code
417
418 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x])
419 | target32Bit (targetPlatform dflags) = do
420 ChildCode64 code rlo <- iselExpr64 x
421 return $ Fixed II32 rlo code
422
423 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x])
424 | target32Bit (targetPlatform dflags) = do
425 ChildCode64 code rlo <- iselExpr64 x
426 return $ Fixed II32 rlo code
427
428 getRegister' dflags (CmmLoad mem pk)
429 | not (isWord64 pk) = do
430 let platform = targetPlatform dflags
431 Amode addr addr_code <- getAmode D mem
432 let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk)
433 addr_code `snocOL` LD format dst addr
434 return (Any format code)
435 | not (target32Bit (targetPlatform dflags)) = do
436 Amode addr addr_code <- getAmode DS mem
437 let code dst = addr_code `snocOL` LD II64 dst addr
438 return (Any II64 code)
439
440 where format = cmmTypeFormat pk
441
442 -- catch simple cases of zero- or sign-extended load
443 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
444 Amode addr addr_code <- getAmode D mem
445 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
446
447 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
448 Amode addr addr_code <- getAmode D mem
449 return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
450
451 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
452
453 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
454 Amode addr addr_code <- getAmode D mem
455 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
456
457 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
458 Amode addr addr_code <- getAmode D mem
459 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
460
461 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
462 Amode addr addr_code <- getAmode D mem
463 return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
464
465 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
466 Amode addr addr_code <- getAmode D mem
467 return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
468
469 getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
470 Amode addr addr_code <- getAmode D mem
471 return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
472
473 getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
474 Amode addr addr_code <- getAmode D mem
475 return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
476
477 getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
478 = case mop of
479 MO_Not rep -> triv_ucode_int rep NOT
480
481 MO_F_Neg w -> triv_ucode_float w FNEG
482 MO_S_Neg w -> triv_ucode_int w NEG
483
484 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
485 MO_FF_Conv W32 W64 -> conversionNop FF64 x
486
487 MO_FS_Conv from to -> coerceFP2Int from to x
488 MO_SF_Conv from to -> coerceInt2FP from to x
489
490 MO_SS_Conv from to
491 | from == to -> conversionNop (intFormat to) x
492
493 -- narrowing is a nop: we treat the high bits as undefined
494 MO_SS_Conv W64 to
495 | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit int register"
496 | otherwise -> conversionNop (intFormat to) x
497 MO_SS_Conv W32 to
498 | arch32 -> conversionNop (intFormat to) x
499 | otherwise -> case to of
500 W64 -> triv_ucode_int to (EXTS II32)
501 W16 -> conversionNop II16 x
502 W8 -> conversionNop II8 x
503 _ -> panic "PPC.CodeGen.getRegister: no match"
504 MO_SS_Conv W16 W8 -> conversionNop II8 x
505 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
506 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
507
508 MO_UU_Conv from to
509 | from == to -> conversionNop (intFormat to) x
510 -- narrowing is a nop: we treat the high bits as undefined
511 MO_UU_Conv W64 to
512 | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit target"
513 | otherwise -> conversionNop (intFormat to) x
514 MO_UU_Conv W32 to
515 | arch32 -> conversionNop (intFormat to) x
516 | otherwise ->
517 case to of
518 W64 -> trivialCode to False AND x (CmmLit (CmmInt 4294967295 W64))
519 W16 -> conversionNop II16 x
520 W8 -> conversionNop II8 x
521 _ -> panic "PPC.CodeGen.getRegister: no match"
522 MO_UU_Conv W16 W8 -> conversionNop II8 x
523 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
524 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
525 _ -> panic "PPC.CodeGen.getRegister: no match"
526
527 where
528 triv_ucode_int width instr = trivialUCode (intFormat width) instr x
529 triv_ucode_float width instr = trivialUCode (floatFormat width) instr x
530
531 conversionNop new_format expr
532 = do e_code <- getRegister' dflags expr
533 return (swizzleRegisterRep e_code new_format)
534 arch32 = target32Bit $ targetPlatform dflags
535
536 getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps
537 = case mop of
538 MO_F_Eq _ -> condFltReg EQQ x y
539 MO_F_Ne _ -> condFltReg NE x y
540 MO_F_Gt _ -> condFltReg GTT x y
541 MO_F_Ge _ -> condFltReg GE x y
542 MO_F_Lt _ -> condFltReg LTT x y
543 MO_F_Le _ -> condFltReg LE x y
544
545 MO_Eq rep -> condIntReg EQQ (extendUExpr dflags rep x)
546 (extendUExpr dflags rep y)
547 MO_Ne rep -> condIntReg NE (extendUExpr dflags rep x)
548 (extendUExpr dflags rep y)
549
550 MO_S_Gt rep -> condIntReg GTT (extendSExpr dflags rep x)
551 (extendSExpr dflags rep y)
552 MO_S_Ge rep -> condIntReg GE (extendSExpr dflags rep x)
553 (extendSExpr dflags rep y)
554 MO_S_Lt rep -> condIntReg LTT (extendSExpr dflags rep x)
555 (extendSExpr dflags rep y)
556 MO_S_Le rep -> condIntReg LE (extendSExpr dflags rep x)
557 (extendSExpr dflags rep y)
558
559 MO_U_Gt rep -> condIntReg GU (extendUExpr dflags rep x)
560 (extendUExpr dflags rep y)
561 MO_U_Ge rep -> condIntReg GEU (extendUExpr dflags rep x)
562 (extendUExpr dflags rep y)
563 MO_U_Lt rep -> condIntReg LU (extendUExpr dflags rep x)
564 (extendUExpr dflags rep y)
565 MO_U_Le rep -> condIntReg LEU (extendUExpr dflags rep x)
566 (extendUExpr dflags rep y)
567
568 MO_F_Add w -> triv_float w FADD
569 MO_F_Sub w -> triv_float w FSUB
570 MO_F_Mul w -> triv_float w FMUL
571 MO_F_Quot w -> triv_float w FDIV
572
573 -- optimize addition with 32-bit immediate
574 -- (needed for PIC)
575 MO_Add W32 ->
576 case y of
577 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True imm
578 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
579 CmmLit lit
580 -> do
581 (src, srcCode) <- getSomeReg x
582 let imm = litToImm lit
583 code dst = srcCode `appOL` toOL [
584 ADDIS dst src (HA imm),
585 ADD dst dst (RIImm (LO imm))
586 ]
587 return (Any II32 code)
588 _ -> trivialCode W32 True ADD x y
589
590 MO_Add rep -> trivialCode rep True ADD x y
591 MO_Sub rep ->
592 case y of -- subfi ('substract from' with immediate) doesn't exist
593 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
594 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
595 _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
596
597 MO_Mul rep
598 | arch32 -> trivialCode rep True MULLW x y
599 | otherwise -> trivialCode rep True MULLD x y
600
601 MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
602 MO_S_MulMayOflo W64 -> trivialCodeNoImm' II64 MULLD_MayOflo x y
603
604 MO_S_MulMayOflo _ -> panic "S_MulMayOflo: (II8/16) not implemented"
605 MO_U_MulMayOflo _ -> panic "U_MulMayOflo: not implemented"
606
607 MO_S_Quot rep
608 | arch32 -> trivialCodeNoImm' (intFormat rep) DIVW
609 (extendSExpr dflags rep x) (extendSExpr dflags rep y)
610 | otherwise -> trivialCodeNoImm' (intFormat rep) DIVD
611 (extendSExpr dflags rep x) (extendSExpr dflags rep y)
612 MO_U_Quot rep
613 | arch32 -> trivialCodeNoImm' (intFormat rep) DIVWU
614 (extendUExpr dflags rep x) (extendUExpr dflags rep y)
615 | otherwise -> trivialCodeNoImm' (intFormat rep) DIVDU
616 (extendUExpr dflags rep x) (extendUExpr dflags rep y)
617
618 MO_S_Rem rep
619 | arch32 -> remainderCode rep DIVW (extendSExpr dflags rep x)
620 (extendSExpr dflags rep y)
621 | otherwise -> remainderCode rep DIVD (extendSExpr dflags rep x)
622 (extendSExpr dflags rep y)
623 MO_U_Rem rep
624 | arch32 -> remainderCode rep DIVWU (extendSExpr dflags rep x)
625 (extendSExpr dflags rep y)
626 | otherwise -> remainderCode rep DIVDU (extendSExpr dflags rep x)
627 (extendSExpr dflags rep y)
628
629 MO_And rep -> case y of
630 (CmmLit (CmmInt imm _)) | imm == -8 || imm == -4
631 -> do
632 (src, srcCode) <- getSomeReg x
633 let clear_mask = if imm == -4 then 2 else 3
634 fmt = intFormat rep
635 code dst = srcCode
636 `appOL` unitOL (CLRRI fmt dst src clear_mask)
637 return (Any fmt code)
638 _ -> trivialCode rep False AND x y
639 MO_Or rep -> trivialCode rep False OR x y
640 MO_Xor rep -> trivialCode rep False XOR x y
641
642 MO_Shl rep -> shiftCode rep SL x y
643 MO_S_Shr rep -> shiftCode rep SRA (extendSExpr dflags rep x) y
644 MO_U_Shr rep -> shiftCode rep SR (extendUExpr dflags rep x) y
645 _ -> panic "PPC.CodeGen.getRegister: no match"
646
647 where
648 triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register
649 triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y
650
651 arch32 = target32Bit $ targetPlatform dflags
652
653 getRegister' _ (CmmLit (CmmInt i rep))
654 | Just imm <- makeImmediate rep True i
655 = let
656 code dst = unitOL (LI dst imm)
657 in
658 return (Any (intFormat rep) code)
659
660 getRegister' _ (CmmLit (CmmFloat f frep)) = do
661 lbl <- getNewLabelNat
662 dflags <- getDynFlags
663 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
664 Amode addr addr_code <- getAmode D dynRef
665 let format = floatFormat frep
666 code dst =
667 LDATA (Section ReadOnlyData lbl)
668 (Statics lbl [CmmStaticLit (CmmFloat f frep)])
669 `consOL` (addr_code `snocOL` LD format dst addr)
670 return (Any format code)
671
672 getRegister' dflags (CmmLit lit)
673 | target32Bit (targetPlatform dflags)
674 = let rep = cmmLitType dflags lit
675 imm = litToImm lit
676 code dst = toOL [
677 LIS dst (HA imm),
678 ADD dst dst (RIImm (LO imm))
679 ]
680 in return (Any (cmmTypeFormat rep) code)
681 | otherwise
682 = do lbl <- getNewLabelNat
683 dflags <- getDynFlags
684 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
685 Amode addr addr_code <- getAmode D dynRef
686 let rep = cmmLitType dflags lit
687 format = cmmTypeFormat rep
688 code dst =
689 LDATA (Section ReadOnlyData lbl) (Statics lbl [CmmStaticLit lit])
690 `consOL` (addr_code `snocOL` LD format dst addr)
691 return (Any format code)
692
693 getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other)
694
695 -- extend?Rep: wrap integer expression of type rep
696 -- in a conversion to II32 or II64 resp.
697 extendSExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
698 extendSExpr dflags W32 x
699 | target32Bit (targetPlatform dflags) = x
700
701 extendSExpr dflags W64 x
702 | not (target32Bit (targetPlatform dflags)) = x
703
704 extendSExpr dflags rep x =
705 let size = if target32Bit $ targetPlatform dflags
706 then W32
707 else W64
708 in CmmMachOp (MO_SS_Conv rep size) [x]
709
710 extendUExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
711 extendUExpr dflags W32 x
712 | target32Bit (targetPlatform dflags) = x
713 extendUExpr dflags W64 x
714 | not (target32Bit (targetPlatform dflags)) = x
715 extendUExpr dflags rep x =
716 let size = if target32Bit $ targetPlatform dflags
717 then W32
718 else W64
719 in CmmMachOp (MO_UU_Conv rep size) [x]
720
721 -- -----------------------------------------------------------------------------
722 -- The 'Amode' type: Memory addressing modes passed up the tree.
723
724 data Amode
725 = Amode AddrMode InstrBlock
726
727 {-
728 Now, given a tree (the argument to an CmmLoad) that references memory,
729 produce a suitable addressing mode.
730
731 A Rule of the Game (tm) for Amodes: use of the addr bit must
732 immediately follow use of the code part, since the code part puts
733 values in registers which the addr then refers to. So you can't put
734 anything in between, lest it overwrite some of those registers. If
735 you need to do some other computation between the code part and use of
736 the addr bit, first store the effective address from the amode in a
737 temporary, then do the other computation, and then use the temporary:
738
739 code
740 LEA amode, tmp
741 ... other computation ...
742 ... (tmp) ...
743 -}
744
745 data InstrForm = D | DS
746
747 getAmode :: InstrForm -> CmmExpr -> NatM Amode
748 getAmode inf tree@(CmmRegOff _ _)
749 = do dflags <- getDynFlags
750 getAmode inf (mangleIndexTree dflags tree)
751
752 getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
753 | Just off <- makeImmediate W32 True (-i)
754 = do
755 (reg, code) <- getSomeReg x
756 return (Amode (AddrRegImm reg off) code)
757
758
759 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
760 | Just off <- makeImmediate W32 True i
761 = do
762 (reg, code) <- getSomeReg x
763 return (Amode (AddrRegImm reg off) code)
764
765 getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
766 | Just off <- makeImmediate W64 True (-i)
767 = do
768 (reg, code) <- getSomeReg x
769 return (Amode (AddrRegImm reg off) code)
770
771
772 getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
773 | Just off <- makeImmediate W64 True i
774 = do
775 (reg, code) <- getSomeReg x
776 return (Amode (AddrRegImm reg off) code)
777
778 getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
779 | Just off <- makeImmediate W64 True (-i)
780 = do
781 (reg, code) <- getSomeReg x
782 (reg', off', code') <-
783 if i `mod` 4 == 0
784 then do return (reg, off, code)
785 else do
786 tmp <- getNewRegNat II64
787 return (tmp, ImmInt 0,
788 code `snocOL` ADD tmp reg (RIImm off))
789 return (Amode (AddrRegImm reg' off') code')
790
791 getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
792 | Just off <- makeImmediate W64 True i
793 = do
794 (reg, code) <- getSomeReg x
795 (reg', off', code') <-
796 if i `mod` 4 == 0
797 then do return (reg, off, code)
798 else do
799 tmp <- getNewRegNat II64
800 return (tmp, ImmInt 0,
801 code `snocOL` ADD tmp reg (RIImm off))
802 return (Amode (AddrRegImm reg' off') code')
803
804 -- optimize addition with 32-bit immediate
805 -- (needed for PIC)
806 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit])
807 = do
808 dflags <- getDynFlags
809 (src, srcCode) <- getSomeReg x
810 let imm = litToImm lit
811 case () of
812 _ | OSAIX <- platformOS (targetPlatform dflags)
813 , isCmmLabelType lit ->
814 -- HA16/LO16 relocations on labels not supported on AIX
815 return (Amode (AddrRegImm src imm) srcCode)
816 | otherwise -> do
817 tmp <- getNewRegNat II32
818 let code = srcCode `snocOL` ADDIS tmp src (HA imm)
819 return (Amode (AddrRegImm tmp (LO imm)) code)
820 where
821 isCmmLabelType (CmmLabel {}) = True
822 isCmmLabelType (CmmLabelOff {}) = True
823 isCmmLabelType (CmmLabelDiffOff {}) = True
824 isCmmLabelType _ = False
825
826 getAmode _ (CmmLit lit)
827 = do
828 dflags <- getDynFlags
829 case platformArch $ targetPlatform dflags of
830 ArchPPC -> do
831 tmp <- getNewRegNat II32
832 let imm = litToImm lit
833 code = unitOL (LIS tmp (HA imm))
834 return (Amode (AddrRegImm tmp (LO imm)) code)
835 _ -> do -- TODO: Load from TOC,
836 -- see getRegister' _ (CmmLit lit)
837 tmp <- getNewRegNat II64
838 let imm = litToImm lit
839 code = toOL [
840 LIS tmp (HIGHESTA imm),
841 OR tmp tmp (RIImm (HIGHERA imm)),
842 SL II64 tmp tmp (RIImm (ImmInt 32)),
843 ORIS tmp tmp (HA imm)
844 ]
845 return (Amode (AddrRegImm tmp (LO imm)) code)
846
847 getAmode _ (CmmMachOp (MO_Add W32) [x, y])
848 = do
849 (regX, codeX) <- getSomeReg x
850 (regY, codeY) <- getSomeReg y
851 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
852
853 getAmode _ (CmmMachOp (MO_Add W64) [x, y])
854 = do
855 (regX, codeX) <- getSomeReg x
856 (regY, codeY) <- getSomeReg y
857 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
858
859 getAmode _ other
860 = do
861 (reg, code) <- getSomeReg other
862 let
863 off = ImmInt 0
864 return (Amode (AddrRegImm reg off) code)
865
866
867 -- The 'CondCode' type: Condition codes passed up the tree.
868 data CondCode
869 = CondCode Bool Cond InstrBlock
870
871 -- Set up a condition code for a conditional branch.
872
873 getCondCode :: CmmExpr -> NatM CondCode
874
875 -- almost the same as everywhere else - but we need to
876 -- extend small integers to 32 bit or 64 bit first
877
878 getCondCode (CmmMachOp mop [x, y])
879 = do
880 dflags <- getDynFlags
881 case mop of
882 MO_F_Eq W32 -> condFltCode EQQ x y
883 MO_F_Ne W32 -> condFltCode NE x y
884 MO_F_Gt W32 -> condFltCode GTT x y
885 MO_F_Ge W32 -> condFltCode GE x y
886 MO_F_Lt W32 -> condFltCode LTT x y
887 MO_F_Le W32 -> condFltCode LE x y
888
889 MO_F_Eq W64 -> condFltCode EQQ x y
890 MO_F_Ne W64 -> condFltCode NE x y
891 MO_F_Gt W64 -> condFltCode GTT x y
892 MO_F_Ge W64 -> condFltCode GE x y
893 MO_F_Lt W64 -> condFltCode LTT x y
894 MO_F_Le W64 -> condFltCode LE x y
895
896 MO_Eq rep -> condIntCode EQQ (extendUExpr dflags rep x)
897 (extendUExpr dflags rep y)
898 MO_Ne rep -> condIntCode NE (extendUExpr dflags rep x)
899 (extendUExpr dflags rep y)
900
901 MO_S_Gt rep -> condIntCode GTT (extendSExpr dflags rep x)
902 (extendSExpr dflags rep y)
903 MO_S_Ge rep -> condIntCode GE (extendSExpr dflags rep x)
904 (extendSExpr dflags rep y)
905 MO_S_Lt rep -> condIntCode LTT (extendSExpr dflags rep x)
906 (extendSExpr dflags rep y)
907 MO_S_Le rep -> condIntCode LE (extendSExpr dflags rep x)
908 (extendSExpr dflags rep y)
909
910 MO_U_Gt rep -> condIntCode GU (extendSExpr dflags rep x)
911 (extendSExpr dflags rep y)
912 MO_U_Ge rep -> condIntCode GEU (extendSExpr dflags rep x)
913 (extendSExpr dflags rep y)
914 MO_U_Lt rep -> condIntCode LU (extendSExpr dflags rep x)
915 (extendSExpr dflags rep y)
916 MO_U_Le rep -> condIntCode LEU (extendSExpr dflags rep x)
917 (extendSExpr dflags rep y)
918
919 _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
920
921 getCondCode _ = panic "getCondCode(2)(powerpc)"
922
923
924 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
925 -- passed back up the tree.
926
927 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
928
929 -- optimize pointer tag checks. Operation andi. sets condition register
930 -- so cmpi ..., 0 is redundant.
931 condIntCode cond (CmmMachOp (MO_And _) [x, CmmLit (CmmInt imm rep)])
932 (CmmLit (CmmInt 0 _))
933 | not $ condUnsigned cond,
934 Just src2 <- makeImmediate rep False imm
935 = do
936 (src1, code) <- getSomeReg x
937 let code' = code `snocOL` AND r0 src1 (RIImm src2)
938 return (CondCode False cond code')
939
940 condIntCode cond x (CmmLit (CmmInt y rep))
941 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
942 = do
943 (src1, code) <- getSomeReg x
944 dflags <- getDynFlags
945 let format = archWordFormat $ target32Bit $ targetPlatform dflags
946 code' = code `snocOL`
947 (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2)
948 return (CondCode False cond code')
949
950 condIntCode cond x y = do
951 (src1, code1) <- getSomeReg x
952 (src2, code2) <- getSomeReg y
953 dflags <- getDynFlags
954 let format = archWordFormat $ target32Bit $ targetPlatform dflags
955 code' = code1 `appOL` code2 `snocOL`
956 (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2)
957 return (CondCode False cond code')
958
959 condFltCode cond x y = do
960 (src1, code1) <- getSomeReg x
961 (src2, code2) <- getSomeReg y
962 let
963 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
964 code'' = case cond of -- twiddle CR to handle unordered case
965 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
966 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
967 _ -> code'
968 where
969 ltbit = 0 ; eqbit = 2 ; gtbit = 1
970 return (CondCode True cond code'')
971
972
973
974 -- -----------------------------------------------------------------------------
975 -- Generating assignments
976
977 -- Assignments are really at the heart of the whole code generation
978 -- business. Almost all top-level nodes of any real importance are
979 -- assignments, which correspond to loads, stores, or register
980 -- transfers. If we're really lucky, some of the register transfers
981 -- will go away, because we can use the destination register to
982 -- complete the code generation for the right hand side. This only
983 -- fails when the right hand side is forced into a fixed register
984 -- (e.g. the result of a call).
985
986 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
987 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
988
989 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
990 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
991
992 assignMem_IntCode pk addr src = do
993 (srcReg, code) <- getSomeReg src
994 Amode dstAddr addr_code <- case pk of
995 II64 -> getAmode DS addr
996 _ -> getAmode D addr
997 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
998
999 -- dst is a reg, but src could be anything
1000 assignReg_IntCode _ reg src
1001 = do
1002 dflags <- getDynFlags
1003 let dst = getRegisterReg (targetPlatform dflags) reg
1004 r <- getRegister src
1005 return $ case r of
1006 Any _ code -> code dst
1007 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
1008
1009
1010
1011 -- Easy, isn't it?
1012 assignMem_FltCode = assignMem_IntCode
1013 assignReg_FltCode = assignReg_IntCode
1014
1015
1016
1017 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
1018
1019 genJump (CmmLit (CmmLabel lbl))
1020 = return (unitOL $ JMP lbl)
1021
1022 genJump tree
1023 = do
1024 dflags <- getDynFlags
1025 genJump' tree (platformToGCP (targetPlatform dflags))
1026
1027 genJump' :: CmmExpr -> GenCCallPlatform -> NatM InstrBlock
1028
1029 genJump' tree (GCPLinux64ELF 1)
1030 = do
1031 (target,code) <- getSomeReg tree
1032 return (code
1033 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0))
1034 `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
1035 `snocOL` MTCTR r11
1036 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
1037 `snocOL` BCTR [] Nothing)
1038
1039 genJump' tree (GCPLinux64ELF 2)
1040 = do
1041 (target,code) <- getSomeReg tree
1042 return (code
1043 `snocOL` MR r12 target
1044 `snocOL` MTCTR r12
1045 `snocOL` BCTR [] Nothing)
1046
1047 genJump' tree _
1048 = do
1049 (target,code) <- getSomeReg tree
1050 return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
1051
1052 -- -----------------------------------------------------------------------------
1053 -- Unconditional branches
1054 genBranch :: BlockId -> NatM InstrBlock
1055 genBranch = return . toOL . mkJumpInstr
1056
1057
1058 -- -----------------------------------------------------------------------------
1059 -- Conditional jumps
1060
1061 {-
1062 Conditional jumps are always to local labels, so we can use branch
1063 instructions. We peek at the arguments to decide what kind of
1064 comparison to do.
1065 -}
1066
1067
1068 genCondJump
1069 :: BlockId -- the branch target
1070 -> CmmExpr -- the condition on which to branch
1071 -> NatM InstrBlock
1072
1073 genCondJump id bool = do
1074 CondCode _ cond code <- getCondCode bool
1075 return (code `snocOL` BCC cond id)
1076
1077
1078
1079 -- -----------------------------------------------------------------------------
1080 -- Generating C calls
1081
1082 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1083 -- @get_arg@, which moves the arguments to the correct registers/stack
1084 -- locations. Apart from that, the code is easy.
1085 --
1086 -- (If applicable) Do not fill the delay slots here; you will confuse the
1087 -- register allocator.
1088
1089 genCCall :: ForeignTarget -- function to call
1090 -> [CmmFormal] -- where to put the result
1091 -> [CmmActual] -- arguments (of mixed type)
1092 -> NatM InstrBlock
1093 genCCall target dest_regs argsAndHints
1094 = do dflags <- getDynFlags
1095 genCCall' dflags (platformToGCP (targetPlatform dflags))
1096 target dest_regs argsAndHints
1097
1098 -- TODO: replace 'Int' by an enum such as 'PPC_64ABI'
1099 data GenCCallPlatform = GCPLinux | GCPDarwin | GCPLinux64ELF !Int | GCPAIX
1100 deriving Eq
1101
1102 platformToGCP :: Platform -> GenCCallPlatform
1103 platformToGCP platform = case platformOS platform of
1104 OSLinux -> case platformArch platform of
1105 ArchPPC -> GCPLinux
1106 ArchPPC_64 ELF_V1 -> GCPLinux64ELF 1
1107 ArchPPC_64 ELF_V2 -> GCPLinux64ELF 2
1108 _ -> panic "PPC.CodeGen.platformToGCP: Unknown Linux"
1109 OSAIX -> GCPAIX
1110 OSDarwin -> GCPDarwin
1111 _ -> panic "PPC.CodeGen.platformToGCP: not defined for this OS"
1112
1113
1114 genCCall'
1115 :: DynFlags
1116 -> GenCCallPlatform
1117 -> ForeignTarget -- function to call
1118 -> [CmmFormal] -- where to put the result
1119 -> [CmmActual] -- arguments (of mixed type)
1120 -> NatM InstrBlock
1121
1122 {-
1123 The PowerPC calling convention for Darwin/Mac OS X
1124 is described in Apple's document
1125 "Inside Mac OS X - Mach-O Runtime Architecture".
1126
1127 PowerPC Linux uses the System V Release 4 Calling Convention
1128 for PowerPC. It is described in the
1129 "System V Application Binary Interface PowerPC Processor Supplement".
1130
1131 Both conventions are similar:
1132 Parameters may be passed in general-purpose registers starting at r3, in
1133 floating point registers starting at f1, or on the stack.
1134
1135 But there are substantial differences:
1136 * The number of registers used for parameter passing and the exact set of
1137 nonvolatile registers differs (see MachRegs.hs).
1138 * On Darwin, stack space is always reserved for parameters, even if they are
1139 passed in registers. The called routine may choose to save parameters from
1140 registers to the corresponding space on the stack.
1141 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
1142 parameter is passed in an FPR.
1143 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
1144 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
1145 Darwin just treats an I64 like two separate II32s (high word first).
1146 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
1147 4-byte aligned like everything else on Darwin.
1148 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
1149 PowerPC Linux does not agree, so neither do we.
1150
1151 PowerPC 64 Linux uses the System V Release 4 Calling Convention for
1152 64-bit PowerPC. It is specified in
1153 "64-bit PowerPC ELF Application Binary Interface Supplement 1.9".
1154
1155 According to all conventions, the parameter area should be part of the
1156 caller's stack frame, allocated in the caller's prologue code (large enough
1157 to hold the parameter lists for all called routines). The NCG already
1158 uses the stack for register spilling, leaving 64 bytes free at the top.
1159 If we need a larger parameter area than that, we just allocate a new stack
1160 frame just before ccalling.
1161 -}
1162
1163
1164 genCCall' _ _ (PrimTarget MO_WriteBarrier) _ _
1165 = return $ unitOL LWSYNC
1166
1167 genCCall' _ _ (PrimTarget MO_Touch) _ _
1168 = return $ nilOL
1169
1170 genCCall' _ _ (PrimTarget (MO_Prefetch_Data _)) _ _
1171 = return $ nilOL
1172
1173 genCCall' dflags gcp target dest_regs args
1174 = ASSERT(not $ any (`elem` [II16]) $ map cmmTypeFormat argReps)
1175 -- we rely on argument promotion in the codeGen
1176 do
1177 (finalStack,passArgumentsCode,usedRegs) <- passArguments
1178 (zip args argReps)
1179 allArgRegs
1180 (allFPArgRegs platform)
1181 initialStackOffset
1182 (toOL []) []
1183
1184 (labelOrExpr, reduceToFF32) <- case target of
1185 ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
1186 uses_pic_base_implicitly
1187 return (Left lbl, False)
1188 ForeignTarget expr _ -> do
1189 uses_pic_base_implicitly
1190 return (Right expr, False)
1191 PrimTarget mop -> outOfLineMachOp mop
1192
1193 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
1194 `appOL` toc_before
1195 codeAfter = toc_after labelOrExpr `appOL` move_sp_up finalStack
1196 `appOL` moveResult reduceToFF32
1197
1198 case labelOrExpr of
1199 Left lbl -> do -- the linker does all the work for us
1200 return ( codeBefore
1201 `snocOL` BL lbl usedRegs
1202 `appOL` codeAfter)
1203 Right dyn -> do -- implement call through function pointer
1204 (dynReg, dynCode) <- getSomeReg dyn
1205 case gcp of
1206 GCPLinux64ELF 1 -> return ( dynCode
1207 `appOL` codeBefore
1208 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0))
1209 `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8))
1210 `snocOL` MTCTR r11
1211 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16))
1212 `snocOL` BCTRL usedRegs
1213 `appOL` codeAfter)
1214 GCPLinux64ELF 2 -> return ( dynCode
1215 `appOL` codeBefore
1216 `snocOL` MR r12 dynReg
1217 `snocOL` MTCTR r12
1218 `snocOL` BCTRL usedRegs
1219 `appOL` codeAfter)
1220 GCPAIX -> return ( dynCode
1221 -- AIX/XCOFF follows the PowerOPEN ABI
1222 -- which is quite similiar to LinuxPPC64/ELFv1
1223 `appOL` codeBefore
1224 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 0))
1225 `snocOL` LD II32 toc (AddrRegImm dynReg (ImmInt 4))
1226 `snocOL` MTCTR r11
1227 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 8))
1228 `snocOL` BCTRL usedRegs
1229 `appOL` codeAfter)
1230 _ -> return ( dynCode
1231 `snocOL` MTCTR dynReg
1232 `appOL` codeBefore
1233 `snocOL` BCTRL usedRegs
1234 `appOL` codeAfter)
1235 where
1236 platform = targetPlatform dflags
1237
1238 uses_pic_base_implicitly = do
1239 -- See Note [implicit register in PPC PIC code]
1240 -- on why we claim to use PIC register here
1241 when (gopt Opt_PIC dflags && target32Bit platform) $ do
1242 _ <- getPicBaseNat $ archWordFormat True
1243 return ()
1244
1245 initialStackOffset = case gcp of
1246 GCPAIX -> 24
1247 GCPDarwin -> 24
1248 GCPLinux -> 8
1249 GCPLinux64ELF 1 -> 48
1250 GCPLinux64ELF 2 -> 32
1251 _ -> panic "genCall': unknown calling convention"
1252 -- size of linkage area + size of arguments, in bytes
1253 stackDelta finalStack = case gcp of
1254 GCPAIX ->
1255 roundTo 16 $ (24 +) $ max 32 $ sum $
1256 map (widthInBytes . typeWidth) argReps
1257 GCPDarwin ->
1258 roundTo 16 $ (24 +) $ max 32 $ sum $
1259 map (widthInBytes . typeWidth) argReps
1260 GCPLinux -> roundTo 16 finalStack
1261 GCPLinux64ELF 1 ->
1262 roundTo 16 $ (48 +) $ max 64 $ sum $
1263 map (roundTo 8 . widthInBytes . typeWidth)
1264 argReps
1265 GCPLinux64ELF 2 ->
1266 roundTo 16 $ (32 +) $ max 64 $ sum $
1267 map (roundTo 8 . widthInBytes . typeWidth)
1268 argReps
1269 _ -> panic "genCall': unknown calling conv."
1270
1271 argReps = map (cmmExprType dflags) args
1272
1273 roundTo a x | x `mod` a == 0 = x
1274 | otherwise = x + a - (x `mod` a)
1275
1276 spFormat = if target32Bit platform then II32 else II64
1277
1278 move_sp_down finalStack
1279 | delta > 64 =
1280 toOL [STU spFormat sp (AddrRegImm sp (ImmInt (-delta))),
1281 DELTA (-delta)]
1282 | otherwise = nilOL
1283 where delta = stackDelta finalStack
1284 toc_before = case gcp of
1285 GCPLinux64ELF 1 -> unitOL $ ST spFormat toc (AddrRegImm sp (ImmInt 40))
1286 GCPLinux64ELF 2 -> unitOL $ ST spFormat toc (AddrRegImm sp (ImmInt 24))
1287 GCPAIX -> unitOL $ ST spFormat toc (AddrRegImm sp (ImmInt 20))
1288 _ -> nilOL
1289 toc_after labelOrExpr = case gcp of
1290 GCPLinux64ELF 1 -> case labelOrExpr of
1291 Left _ -> toOL [ NOP ]
1292 Right _ -> toOL [ LD spFormat toc
1293 (AddrRegImm sp
1294 (ImmInt 40))
1295 ]
1296 GCPLinux64ELF 2 -> case labelOrExpr of
1297 Left _ -> toOL [ NOP ]
1298 Right _ -> toOL [ LD spFormat toc
1299 (AddrRegImm sp
1300 (ImmInt 24))
1301 ]
1302 GCPAIX -> case labelOrExpr of
1303 Left _ -> unitOL NOP
1304 Right _ -> unitOL (LD spFormat toc
1305 (AddrRegImm sp
1306 (ImmInt 20)))
1307 _ -> nilOL
1308 move_sp_up finalStack
1309 | delta > 64 = -- TODO: fix-up stack back-chain
1310 toOL [ADD sp sp (RIImm (ImmInt delta)),
1311 DELTA 0]
1312 | otherwise = nilOL
1313 where delta = stackDelta finalStack
1314
1315
1316 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
1317 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
1318 accumCode accumUsed | isWord64 arg_ty
1319 && target32Bit (targetPlatform dflags) =
1320 do
1321 ChildCode64 code vr_lo <- iselExpr64 arg
1322 let vr_hi = getHiVRegFromLo vr_lo
1323
1324 case gcp of
1325 GCPAIX -> -- same as for Darwin
1326 do let storeWord vr (gpr:_) _ = MR gpr vr
1327 storeWord vr [] offset
1328 = ST II32 vr (AddrRegImm sp (ImmInt offset))
1329 passArguments args
1330 (drop 2 gprs)
1331 fprs
1332 (stackOffset+8)
1333 (accumCode `appOL` code
1334 `snocOL` storeWord vr_hi gprs stackOffset
1335 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1336 ((take 2 gprs) ++ accumUsed)
1337 GCPDarwin ->
1338 do let storeWord vr (gpr:_) _ = MR gpr vr
1339 storeWord vr [] offset
1340 = ST II32 vr (AddrRegImm sp (ImmInt offset))
1341 passArguments args
1342 (drop 2 gprs)
1343 fprs
1344 (stackOffset+8)
1345 (accumCode `appOL` code
1346 `snocOL` storeWord vr_hi gprs stackOffset
1347 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1348 ((take 2 gprs) ++ accumUsed)
1349 GCPLinux ->
1350 do let stackOffset' = roundTo 8 stackOffset
1351 stackCode = accumCode `appOL` code
1352 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
1353 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
1354 regCode hireg loreg =
1355 accumCode `appOL` code
1356 `snocOL` MR hireg vr_hi
1357 `snocOL` MR loreg vr_lo
1358
1359 case gprs of
1360 hireg : loreg : regs | even (length gprs) ->
1361 passArguments args regs fprs stackOffset
1362 (regCode hireg loreg) (hireg : loreg : accumUsed)
1363 _skipped : hireg : loreg : regs ->
1364 passArguments args regs fprs stackOffset
1365 (regCode hireg loreg) (hireg : loreg : accumUsed)
1366 _ -> -- only one or no regs left
1367 passArguments args [] fprs (stackOffset'+8)
1368 stackCode accumUsed
1369 GCPLinux64ELF _ -> panic "passArguments: 32 bit code"
1370
1371 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
1372 | reg : _ <- regs = do
1373 register <- getRegister arg
1374 let code = case register of
1375 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1376 Any _ acode -> acode reg
1377 stackOffsetRes = case gcp of
1378 -- The Darwin ABI requires that we reserve
1379 -- stack slots for register parameters
1380 GCPDarwin -> stackOffset + stackBytes
1381 -- ... so does the PowerOpen ABI.
1382 GCPAIX -> stackOffset + stackBytes
1383 -- ... the SysV ABI 32-bit doesn't.
1384 GCPLinux -> stackOffset
1385 -- ... but SysV ABI 64-bit does.
1386 GCPLinux64ELF _ -> stackOffset + stackBytes
1387 passArguments args
1388 (drop nGprs gprs)
1389 (drop nFprs fprs)
1390 stackOffsetRes
1391 (accumCode `appOL` code)
1392 (reg : accumUsed)
1393 | otherwise = do
1394 (vr, code) <- getSomeReg arg
1395 passArguments args
1396 (drop nGprs gprs)
1397 (drop nFprs fprs)
1398 (stackOffset' + stackBytes)
1399 (accumCode `appOL` code `snocOL` ST (cmmTypeFormat rep) vr stackSlot)
1400 accumUsed
1401 where
1402 stackOffset' = case gcp of
1403 GCPDarwin ->
1404 -- stackOffset is at least 4-byte aligned
1405 -- The Darwin ABI is happy with that.
1406 stackOffset
1407 GCPAIX ->
1408 -- The 32bit PowerOPEN ABI is happy with
1409 -- 32bit-alignment as well...
1410 stackOffset
1411 GCPLinux
1412 -- ... the SysV ABI requires 8-byte
1413 -- alignment for doubles.
1414 | isFloatType rep && typeWidth rep == W64 ->
1415 roundTo 8 stackOffset
1416 | otherwise ->
1417 stackOffset
1418 GCPLinux64ELF _ ->
1419 -- Everything on the stack is mapped to
1420 -- 8-byte aligned doublewords
1421 stackOffset
1422 stackOffset''
1423 | isFloatType rep && typeWidth rep == W32 =
1424 case gcp of
1425 -- The ELF v1 ABI Section 3.2.3 requires:
1426 -- "Single precision floating point values
1427 -- are mapped to the second word in a single
1428 -- doubleword"
1429 GCPLinux64ELF 1 -> stackOffset' + 4
1430 _ -> stackOffset'
1431 | otherwise = stackOffset'
1432
1433 stackSlot = AddrRegImm sp (ImmInt stackOffset'')
1434 (nGprs, nFprs, stackBytes, regs)
1435 = case gcp of
1436 GCPAIX ->
1437 case cmmTypeFormat rep of
1438 II8 -> (1, 0, 4, gprs)
1439 II16 -> (1, 0, 4, gprs)
1440 II32 -> (1, 0, 4, gprs)
1441 -- The PowerOpen ABI requires that we skip a
1442 -- corresponding number of GPRs when we use
1443 -- the FPRs.
1444 --
1445 -- E.g. for a `double` two GPRs are skipped,
1446 -- whereas for a `float` one GPR is skipped
1447 -- when parameters are assigned to
1448 -- registers.
1449 --
1450 -- The PowerOpen ABI specification can be found at
1451 -- ftp://www.sourceware.org/pub/binutils/ppc-docs/ppc-poweropen/
1452 FF32 -> (1, 1, 4, fprs)
1453 FF64 -> (2, 1, 8, fprs)
1454 II64 -> panic "genCCall' passArguments II64"
1455 FF80 -> panic "genCCall' passArguments FF80"
1456 GCPDarwin ->
1457 case cmmTypeFormat rep of
1458 II8 -> (1, 0, 4, gprs)
1459 II16 -> (1, 0, 4, gprs)
1460 II32 -> (1, 0, 4, gprs)
1461 -- The Darwin ABI requires that we skip a
1462 -- corresponding number of GPRs when we use
1463 -- the FPRs.
1464 FF32 -> (1, 1, 4, fprs)
1465 FF64 -> (2, 1, 8, fprs)
1466 II64 -> panic "genCCall' passArguments II64"
1467 FF80 -> panic "genCCall' passArguments FF80"
1468 GCPLinux ->
1469 case cmmTypeFormat rep of
1470 II8 -> (1, 0, 4, gprs)
1471 II16 -> (1, 0, 4, gprs)
1472 II32 -> (1, 0, 4, gprs)
1473 -- ... the SysV ABI doesn't.
1474 FF32 -> (0, 1, 4, fprs)
1475 FF64 -> (0, 1, 8, fprs)
1476 II64 -> panic "genCCall' passArguments II64"
1477 FF80 -> panic "genCCall' passArguments FF80"
1478 GCPLinux64ELF _ ->
1479 case cmmTypeFormat rep of
1480 II8 -> (1, 0, 8, gprs)
1481 II16 -> (1, 0, 8, gprs)
1482 II32 -> (1, 0, 8, gprs)
1483 II64 -> (1, 0, 8, gprs)
1484 -- The ELFv1 ABI requires that we skip a
1485 -- corresponding number of GPRs when we use
1486 -- the FPRs.
1487 FF32 -> (1, 1, 8, fprs)
1488 FF64 -> (1, 1, 8, fprs)
1489 FF80 -> panic "genCCall' passArguments FF80"
1490
1491 moveResult reduceToFF32 =
1492 case dest_regs of
1493 [] -> nilOL
1494 [dest]
1495 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
1496 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1497 | isWord64 rep && target32Bit (targetPlatform dflags)
1498 -> toOL [MR (getHiVRegFromLo r_dest) r3,
1499 MR r_dest r4]
1500 | otherwise -> unitOL (MR r_dest r3)
1501 where rep = cmmRegType dflags (CmmLocal dest)
1502 r_dest = getRegisterReg platform (CmmLocal dest)
1503 _ -> panic "genCCall' moveResult: Bad dest_regs"
1504
1505 outOfLineMachOp mop =
1506 do
1507 dflags <- getDynFlags
1508 mopExpr <- cmmMakeDynamicReference dflags CallReference $
1509 mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
1510 let mopLabelOrExpr = case mopExpr of
1511 CmmLit (CmmLabel lbl) -> Left lbl
1512 _ -> Right mopExpr
1513 return (mopLabelOrExpr, reduce)
1514 where
1515 (functionName, reduce) = case mop of
1516 MO_F32_Exp -> (fsLit "exp", True)
1517 MO_F32_Log -> (fsLit "log", True)
1518 MO_F32_Sqrt -> (fsLit "sqrt", True)
1519
1520 MO_F32_Sin -> (fsLit "sin", True)
1521 MO_F32_Cos -> (fsLit "cos", True)
1522 MO_F32_Tan -> (fsLit "tan", True)
1523
1524 MO_F32_Asin -> (fsLit "asin", True)
1525 MO_F32_Acos -> (fsLit "acos", True)
1526 MO_F32_Atan -> (fsLit "atan", True)
1527
1528 MO_F32_Sinh -> (fsLit "sinh", True)
1529 MO_F32_Cosh -> (fsLit "cosh", True)
1530 MO_F32_Tanh -> (fsLit "tanh", True)
1531 MO_F32_Pwr -> (fsLit "pow", True)
1532
1533 MO_F64_Exp -> (fsLit "exp", False)
1534 MO_F64_Log -> (fsLit "log", False)
1535 MO_F64_Sqrt -> (fsLit "sqrt", False)
1536
1537 MO_F64_Sin -> (fsLit "sin", False)
1538 MO_F64_Cos -> (fsLit "cos", False)
1539 MO_F64_Tan -> (fsLit "tan", False)
1540
1541 MO_F64_Asin -> (fsLit "asin", False)
1542 MO_F64_Acos -> (fsLit "acos", False)
1543 MO_F64_Atan -> (fsLit "atan", False)
1544
1545 MO_F64_Sinh -> (fsLit "sinh", False)
1546 MO_F64_Cosh -> (fsLit "cosh", False)
1547 MO_F64_Tanh -> (fsLit "tanh", False)
1548 MO_F64_Pwr -> (fsLit "pow", False)
1549
1550 MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False)
1551
1552 MO_Memcpy _ -> (fsLit "memcpy", False)
1553 MO_Memset _ -> (fsLit "memset", False)
1554 MO_Memmove _ -> (fsLit "memmove", False)
1555
1556 MO_BSwap w -> (fsLit $ bSwapLabel w, False)
1557 MO_PopCnt w -> (fsLit $ popCntLabel w, False)
1558 MO_Clz w -> (fsLit $ clzLabel w, False)
1559 MO_Ctz w -> (fsLit $ ctzLabel w, False)
1560 MO_AtomicRMW w amop -> (fsLit $ atomicRMWLabel w amop, False)
1561 MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
1562 MO_AtomicRead w -> (fsLit $ atomicReadLabel w, False)
1563 MO_AtomicWrite w -> (fsLit $ atomicWriteLabel w, False)
1564
1565 MO_S_QuotRem {} -> unsupported
1566 MO_U_QuotRem {} -> unsupported
1567 MO_U_QuotRem2 {} -> unsupported
1568 MO_Add2 {} -> unsupported
1569 MO_SubWordC {} -> unsupported
1570 MO_AddIntC {} -> unsupported
1571 MO_SubIntC {} -> unsupported
1572 MO_U_Mul2 {} -> unsupported
1573 MO_WriteBarrier -> unsupported
1574 MO_Touch -> unsupported
1575 (MO_Prefetch_Data _ ) -> unsupported
1576 unsupported = panic ("outOfLineCmmOp: " ++ show mop
1577 ++ " not supported")
1578
1579 -- -----------------------------------------------------------------------------
1580 -- Generating a table-branch
1581
1582 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
1583 genSwitch dflags expr targets
1584 | OSAIX <- platformOS (targetPlatform dflags)
1585 = do
1586 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
1587 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
1588 sha = if target32Bit $ targetPlatform dflags then 2 else 3
1589 tmp <- getNewRegNat fmt
1590 lbl <- getNewLabelNat
1591 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
1592 (tableReg,t_code) <- getSomeReg $ dynRef
1593 let code = e_code `appOL` t_code `appOL` toOL [
1594 SL fmt tmp reg (RIImm (ImmInt sha)),
1595 LD fmt tmp (AddrRegReg tableReg tmp),
1596 MTCTR tmp,
1597 BCTR ids (Just lbl)
1598 ]
1599 return code
1600
1601 | (gopt Opt_PIC dflags) || (not $ target32Bit $ targetPlatform dflags)
1602 = do
1603 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
1604 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
1605 sha = if target32Bit $ targetPlatform dflags then 2 else 3
1606 tmp <- getNewRegNat fmt
1607 lbl <- getNewLabelNat
1608 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
1609 (tableReg,t_code) <- getSomeReg $ dynRef
1610 let code = e_code `appOL` t_code `appOL` toOL [
1611 SL fmt tmp reg (RIImm (ImmInt sha)),
1612 LD fmt tmp (AddrRegReg tableReg tmp),
1613 ADD tmp tmp (RIReg tableReg),
1614 MTCTR tmp,
1615 BCTR ids (Just lbl)
1616 ]
1617 return code
1618 | otherwise
1619 = do
1620 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
1621 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
1622 sha = if target32Bit $ targetPlatform dflags then 2 else 3
1623 tmp <- getNewRegNat fmt
1624 lbl <- getNewLabelNat
1625 let code = e_code `appOL` toOL [
1626 SL fmt tmp reg (RIImm (ImmInt sha)),
1627 ADDIS tmp tmp (HA (ImmCLbl lbl)),
1628 LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
1629 MTCTR tmp,
1630 BCTR ids (Just lbl)
1631 ]
1632 return code
1633 where (offset, ids) = switchTargetsToTable targets
1634
1635 generateJumpTableForInstr :: DynFlags -> Instr
1636 -> Maybe (NatCmmDecl CmmStatics Instr)
1637 generateJumpTableForInstr dflags (BCTR ids (Just lbl)) =
1638 let jumpTable
1639 | (gopt Opt_PIC dflags)
1640 || (not $ target32Bit $ targetPlatform dflags)
1641 = map jumpTableEntryRel ids
1642 | otherwise = map (jumpTableEntry dflags) ids
1643 where jumpTableEntryRel Nothing
1644 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
1645 jumpTableEntryRel (Just blockid)
1646 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1647 where blockLabel = mkAsmTempLabel (getUnique blockid)
1648 in Just (CmmData (Section ReadOnlyData lbl) (Statics lbl jumpTable))
1649 generateJumpTableForInstr _ _ = Nothing
1650
1651 -- -----------------------------------------------------------------------------
1652 -- 'condIntReg' and 'condFltReg': condition codes into registers
1653
1654 -- Turn those condition codes into integers now (when they appear on
1655 -- the right hand side of an assignment).
1656
1657 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
1658
1659 condReg :: NatM CondCode -> NatM Register
1660 condReg getCond = do
1661 CondCode _ cond cond_code <- getCond
1662 dflags <- getDynFlags
1663 let
1664 code dst = cond_code
1665 `appOL` negate_code
1666 `appOL` toOL [
1667 MFCR dst,
1668 RLWINM dst dst (bit + 1) 31 31
1669 ]
1670
1671 negate_code | do_negate = unitOL (CRNOR bit bit bit)
1672 | otherwise = nilOL
1673
1674 (bit, do_negate) = case cond of
1675 LTT -> (0, False)
1676 LE -> (1, True)
1677 EQQ -> (2, False)
1678 GE -> (0, True)
1679 GTT -> (1, False)
1680
1681 NE -> (2, True)
1682
1683 LU -> (0, False)
1684 LEU -> (1, True)
1685 GEU -> (0, True)
1686 GU -> (1, False)
1687 _ -> panic "PPC.CodeGen.codeReg: no match"
1688
1689 format = archWordFormat $ target32Bit $ targetPlatform dflags
1690 return (Any format code)
1691
1692 condIntReg cond x y = condReg (condIntCode cond x y)
1693 condFltReg cond x y = condReg (condFltCode cond x y)
1694
1695
1696
1697 -- -----------------------------------------------------------------------------
1698 -- 'trivial*Code': deal with trivial instructions
1699
1700 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
1701 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
1702 -- Only look for constants on the right hand side, because that's
1703 -- where the generic optimizer will have put them.
1704
1705 -- Similarly, for unary instructions, we don't have to worry about
1706 -- matching an StInt as the argument, because genericOpt will already
1707 -- have handled the constant-folding.
1708
1709
1710
1711 {-
1712 Wolfgang's PowerPC version of The Rules:
1713
1714 A slightly modified version of The Rules to take advantage of the fact
1715 that PowerPC instructions work on all registers and don't implicitly
1716 clobber any fixed registers.
1717
1718 * The only expression for which getRegister returns Fixed is (CmmReg reg).
1719
1720 * If getRegister returns Any, then the code it generates may modify only:
1721 (a) fresh temporaries
1722 (b) the destination register
1723 It may *not* modify global registers, unless the global
1724 register happens to be the destination register.
1725 It may not clobber any other registers. In fact, only ccalls clobber any
1726 fixed registers.
1727 Also, it may not modify the counter register (used by genCCall).
1728
1729 Corollary: If a getRegister for a subexpression returns Fixed, you need
1730 not move it to a fresh temporary before evaluating the next subexpression.
1731 The Fixed register won't be modified.
1732 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
1733
1734 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
1735 the value of the destination register.
1736 -}
1737
1738 trivialCode
1739 :: Width
1740 -> Bool
1741 -> (Reg -> Reg -> RI -> Instr)
1742 -> CmmExpr
1743 -> CmmExpr
1744 -> NatM Register
1745
1746 trivialCode rep signed instr x (CmmLit (CmmInt y _))
1747 | Just imm <- makeImmediate rep signed y
1748 = do
1749 (src1, code1) <- getSomeReg x
1750 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
1751 return (Any (intFormat rep) code)
1752
1753 trivialCode rep _ instr x y = do
1754 (src1, code1) <- getSomeReg x
1755 (src2, code2) <- getSomeReg y
1756 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
1757 return (Any (intFormat rep) code)
1758
1759 shiftCode
1760 :: Width
1761 -> (Format-> Reg -> Reg -> RI -> Instr)
1762 -> CmmExpr
1763 -> CmmExpr
1764 -> NatM Register
1765 shiftCode width instr x (CmmLit (CmmInt y _))
1766 | Just imm <- makeImmediate width False y
1767 = do
1768 (src1, code1) <- getSomeReg x
1769 let format = intFormat width
1770 let code dst = code1 `snocOL` instr format dst src1 (RIImm imm)
1771 return (Any format code)
1772
1773 shiftCode width instr x y = do
1774 (src1, code1) <- getSomeReg x
1775 (src2, code2) <- getSomeReg y
1776 let format = intFormat width
1777 let code dst = code1 `appOL` code2 `snocOL` instr format dst src1 (RIReg src2)
1778 return (Any format code)
1779
1780 trivialCodeNoImm' :: Format -> (Reg -> Reg -> Reg -> Instr)
1781 -> CmmExpr -> CmmExpr -> NatM Register
1782 trivialCodeNoImm' format instr x y = do
1783 (src1, code1) <- getSomeReg x
1784 (src2, code2) <- getSomeReg y
1785 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
1786 return (Any format code)
1787
1788 trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr)
1789 -> CmmExpr -> CmmExpr -> NatM Register
1790 trivialCodeNoImm format instr x y = trivialCodeNoImm' format (instr format) x y
1791
1792
1793 trivialUCode
1794 :: Format
1795 -> (Reg -> Reg -> Instr)
1796 -> CmmExpr
1797 -> NatM Register
1798 trivialUCode rep instr x = do
1799 (src, code) <- getSomeReg x
1800 let code' dst = code `snocOL` instr dst src
1801 return (Any rep code')
1802
1803 -- There is no "remainder" instruction on the PPC, so we have to do
1804 -- it the hard way.
1805 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
1806
1807 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
1808 -> CmmExpr -> CmmExpr -> NatM Register
1809 remainderCode rep div x y = do
1810 dflags <- getDynFlags
1811 let mull_instr = if target32Bit $ targetPlatform dflags then MULLW
1812 else MULLD
1813 (src1, code1) <- getSomeReg x
1814 (src2, code2) <- getSomeReg y
1815 let code dst = code1 `appOL` code2 `appOL` toOL [
1816 div dst src1 src2,
1817 mull_instr dst dst (RIReg src2),
1818 SUBF dst dst src1
1819 ]
1820 return (Any (intFormat rep) code)
1821
1822 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
1823 coerceInt2FP fromRep toRep x = do
1824 dflags <- getDynFlags
1825 let arch = platformArch $ targetPlatform dflags
1826 coerceInt2FP' arch fromRep toRep x
1827
1828 coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
1829 coerceInt2FP' ArchPPC fromRep toRep x = do
1830 (src, code) <- getSomeReg x
1831 lbl <- getNewLabelNat
1832 itmp <- getNewRegNat II32
1833 ftmp <- getNewRegNat FF64
1834 dflags <- getDynFlags
1835 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
1836 Amode addr addr_code <- getAmode D dynRef
1837 let
1838 code' dst = code `appOL` maybe_exts `appOL` toOL [
1839 LDATA (Section ReadOnlyData lbl) $ Statics lbl
1840 [CmmStaticLit (CmmInt 0x43300000 W32),
1841 CmmStaticLit (CmmInt 0x80000000 W32)],
1842 XORIS itmp src (ImmInt 0x8000),
1843 ST II32 itmp (spRel dflags 3),
1844 LIS itmp (ImmInt 0x4330),
1845 ST II32 itmp (spRel dflags 2),
1846 LD FF64 ftmp (spRel dflags 2)
1847 ] `appOL` addr_code `appOL` toOL [
1848 LD FF64 dst addr,
1849 FSUB FF64 dst ftmp dst
1850 ] `appOL` maybe_frsp dst
1851
1852 maybe_exts = case fromRep of
1853 W8 -> unitOL $ EXTS II8 src src
1854 W16 -> unitOL $ EXTS II16 src src
1855 W32 -> nilOL
1856 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1857
1858 maybe_frsp dst
1859 = case toRep of
1860 W32 -> unitOL $ FRSP dst dst
1861 W64 -> nilOL
1862 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1863
1864 return (Any (floatFormat toRep) code')
1865
1866 -- On an ELF v1 Linux we use the compiler doubleword in the stack frame
1867 -- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only
1868 -- set right before a call and restored right after return from the call.
1869 -- So it is fine.
1870 coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do
1871 (src, code) <- getSomeReg x
1872 dflags <- getDynFlags
1873 let
1874 code' dst = code `appOL` maybe_exts `appOL` toOL [
1875 ST II64 src (spRel dflags 3),
1876 LD FF64 dst (spRel dflags 3),
1877 FCFID dst dst
1878 ] `appOL` maybe_frsp dst
1879
1880 maybe_exts = case fromRep of
1881 W8 -> unitOL $ EXTS II8 src src
1882 W16 -> unitOL $ EXTS II16 src src
1883 W32 -> unitOL $ EXTS II32 src src
1884 W64 -> nilOL
1885 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1886
1887 maybe_frsp dst
1888 = case toRep of
1889 W32 -> unitOL $ FRSP dst dst
1890 W64 -> nilOL
1891 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1892
1893 return (Any (floatFormat toRep) code')
1894
1895 coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch"
1896
1897
1898 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
1899 coerceFP2Int fromRep toRep x = do
1900 dflags <- getDynFlags
1901 let arch = platformArch $ targetPlatform dflags
1902 coerceFP2Int' arch fromRep toRep x
1903
1904 coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
1905 coerceFP2Int' ArchPPC _ toRep x = do
1906 dflags <- getDynFlags
1907 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
1908 (src, code) <- getSomeReg x
1909 tmp <- getNewRegNat FF64
1910 let
1911 code' dst = code `appOL` toOL [
1912 -- convert to int in FP reg
1913 FCTIWZ tmp src,
1914 -- store value (64bit) from FP to stack
1915 ST FF64 tmp (spRel dflags 2),
1916 -- read low word of value (high word is undefined)
1917 LD II32 dst (spRel dflags 3)]
1918 return (Any (intFormat toRep) code')
1919
1920 coerceFP2Int' (ArchPPC_64 _) _ toRep x = do
1921 dflags <- getDynFlags
1922 -- the reps don't really matter: F*->FF64 and II64->I* are no-ops
1923 (src, code) <- getSomeReg x
1924 tmp <- getNewRegNat FF64
1925 let
1926 code' dst = code `appOL` toOL [
1927 -- convert to int in FP reg
1928 FCTIDZ tmp src,
1929 -- store value (64bit) from FP to compiler word on stack
1930 ST FF64 tmp (spRel dflags 3),
1931 LD II64 dst (spRel dflags 3)]
1932 return (Any (intFormat toRep) code')
1933
1934 coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch"
1935
1936 -- Note [.LCTOC1 in PPC PIC code]
1937 -- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
1938 -- to make the most of the PPC's 16-bit displacements.
1939 -- As 16-bit signed offset is used (usually via addi/lwz instructions)
1940 -- first element will have '-32768' offset against .LCTOC1.
1941
1942 -- Note [implicit register in PPC PIC code]
1943 -- PPC generates calls by labels in assembly
1944 -- in form of:
1945 -- bl puts+32768@plt
1946 -- in this form it's not seen directly (by GHC NCG)
1947 -- that r30 (PicBaseReg) is used,
1948 -- but r30 is a required part of PLT code setup:
1949 -- puts+32768@plt:
1950 -- lwz r11,-30484(r30) ; offset in .LCTOC1
1951 -- mtctr r11
1952 -- bctr