Fix todo in compiler/nativeGen: Rename Size to Format
[ghc.git] / compiler / nativeGen / PPC / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2004
8 --
9 -----------------------------------------------------------------------------
10
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, (b) the type signatures, and
13 -- (c) the #if blah_TARGET_ARCH} things, the
14 -- structure should not be too overwhelming.
15
16 module PPC.CodeGen (
17 cmmTopCodeGen,
18 generateJumpTableForInstr,
19 InstrBlock
20 )
21
22 where
23
24 #include "HsVersions.h"
25 #include "nativeGen/NCG.h"
26 #include "../includes/MachDeps.h"
27
28 -- NCG stuff:
29 import CodeGen.Platform
30 import PPC.Instr
31 import PPC.Cond
32 import PPC.Regs
33 import CPrim
34 import NCGMonad
35 import Instruction
36 import PIC
37 import Format
38 import RegClass
39 import Reg
40 import TargetReg
41 import Platform
42
43 -- Our intermediate code:
44 import BlockId
45 import PprCmm ( pprExpr )
46 import Cmm
47 import CmmUtils
48 import CmmSwitch
49 import CLabel
50 import Hoopl
51
52 -- The rest:
53 import OrdList
54 import Outputable
55 import Unique
56 import DynFlags
57
58 import Control.Monad ( mapAndUnzipM, when )
59 import Data.Bits
60 import Data.Word
61
62 import BasicTypes
63 import FastString
64 import Util
65
66 -- -----------------------------------------------------------------------------
67 -- Top-level of the instruction selector
68
69 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
70 -- They are really trees of insns to facilitate fast appending, where a
71 -- left-to-right traversal (pre-order?) yields the insns in the correct
72 -- order.
73
74 cmmTopCodeGen
75 :: RawCmmDecl
76 -> NatM [NatCmmDecl CmmStatics Instr]
77
78 cmmTopCodeGen (CmmProc info lab live graph) = do
79 let blocks = toBlockListEntryFirst graph
80 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
81 dflags <- getDynFlags
82 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
83 tops = proc : concat statics
84 os = platformOS $ targetPlatform dflags
85 arch = platformArch $ targetPlatform dflags
86 case arch of
87 ArchPPC -> do
88 picBaseMb <- getPicBaseMaybeNat
89 case picBaseMb of
90 Just picBase -> initializePicBase_ppc arch os picBase tops
91 Nothing -> return tops
92 ArchPPC_64 ELF_V1 -> return tops
93 -- generating function descriptor is handled in
94 -- pretty printer
95 ArchPPC_64 ELF_V2 -> return tops
96 -- generating function prologue is handled in
97 -- pretty printer
98 _ -> panic "PPC.cmmTopCodeGen: unknown arch"
99
100 cmmTopCodeGen (CmmData sec dat) = do
101 return [CmmData sec dat] -- no translation, we just use CmmStatic
102
103 basicBlockCodeGen
104 :: Block CmmNode C C
105 -> NatM ( [NatBasicBlock Instr]
106 , [NatCmmDecl CmmStatics Instr])
107
108 basicBlockCodeGen block = do
109 let (_, nodes, tail) = blockSplit block
110 id = entryLabel block
111 stmts = blockToList nodes
112 mid_instrs <- stmtsToInstrs stmts
113 tail_instrs <- stmtToInstrs tail
114 let instrs = mid_instrs `appOL` tail_instrs
115 -- code generation may introduce new basic block boundaries, which
116 -- are indicated by the NEWBLOCK instruction. We must split up the
117 -- instruction stream into basic blocks again. Also, we extract
118 -- LDATAs here too.
119 let
120 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
121
122 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
123 = ([], BasicBlock id instrs : blocks, statics)
124 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
125 = (instrs, blocks, CmmData sec dat:statics)
126 mkBlocks instr (instrs,blocks,statics)
127 = (instr:instrs, blocks, statics)
128 return (BasicBlock id top : other_blocks, statics)
129
130 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
131 stmtsToInstrs stmts
132 = do instrss <- mapM stmtToInstrs stmts
133 return (concatOL instrss)
134
135 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
136 stmtToInstrs stmt = do
137 dflags <- getDynFlags
138 case stmt of
139 CmmComment s -> return (unitOL (COMMENT s))
140 CmmTick {} -> return nilOL
141 CmmUnwind {} -> return nilOL
142
143 CmmAssign reg src
144 | isFloatType ty -> assignReg_FltCode format reg src
145 | target32Bit (targetPlatform dflags) &&
146 isWord64 ty -> assignReg_I64Code reg src
147 | otherwise -> assignReg_IntCode format reg src
148 where ty = cmmRegType dflags reg
149 format = cmmTypeFormat ty
150
151 CmmStore addr src
152 | isFloatType ty -> assignMem_FltCode format addr src
153 | target32Bit (targetPlatform dflags) &&
154 isWord64 ty -> assignMem_I64Code addr src
155 | otherwise -> assignMem_IntCode format addr src
156 where ty = cmmExprType dflags src
157 format = cmmTypeFormat ty
158
159 CmmUnsafeForeignCall target result_regs args
160 -> genCCall target result_regs args
161
162 CmmBranch id -> genBranch id
163 CmmCondBranch arg true false -> do b1 <- genCondJump true arg
164 b2 <- genBranch false
165 return (b1 `appOL` b2)
166 CmmSwitch arg ids -> do dflags <- getDynFlags
167 genSwitch dflags arg ids
168 CmmCall { cml_target = arg } -> genJump arg
169 _ ->
170 panic "stmtToInstrs: statement should have been cps'd away"
171
172
173 --------------------------------------------------------------------------------
174 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
175 -- They are really trees of insns to facilitate fast appending, where a
176 -- left-to-right traversal yields the insns in the correct order.
177 --
178 type InstrBlock
179 = OrdList Instr
180
181
182 -- | Register's passed up the tree. If the stix code forces the register
183 -- to live in a pre-decided machine register, it comes out as @Fixed@;
184 -- otherwise, it comes out as @Any@, and the parent can decide which
185 -- register to put it in.
186 --
187 data Register
188 = Fixed Format Reg InstrBlock
189 | Any Format (Reg -> InstrBlock)
190
191
192 swizzleRegisterRep :: Register -> Format -> Register
193 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
194 swizzleRegisterRep (Any _ codefn) format = Any format codefn
195
196
197 -- | Grab the Reg for a CmmReg
198 getRegisterReg :: Platform -> CmmReg -> Reg
199
200 getRegisterReg _ (CmmLocal (LocalReg u pk))
201 = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
202
203 getRegisterReg platform (CmmGlobal mid)
204 = case globalRegMaybe platform mid of
205 Just reg -> RegReal reg
206 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
207 -- By this stage, the only MagicIds remaining should be the
208 -- ones which map to a real machine register on this
209 -- platform. Hence ...
210
211 -- | Convert a BlockId to some CmmStatic data
212 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
213 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
214 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
215 where blockLabel = mkAsmTempLabel (getUnique blockid)
216
217
218
219 -- -----------------------------------------------------------------------------
220 -- General things for putting together code sequences
221
222 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
223 -- CmmExprs into CmmRegOff?
224 mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr
225 mangleIndexTree dflags (CmmRegOff reg off)
226 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
227 where width = typeWidth (cmmRegType dflags reg)
228
229 mangleIndexTree _ _
230 = panic "PPC.CodeGen.mangleIndexTree: no match"
231
232 -- -----------------------------------------------------------------------------
233 -- Code gen for 64-bit arithmetic on 32-bit platforms
234
235 {-
236 Simple support for generating 64-bit code (ie, 64 bit values and 64
237 bit assignments) on 32-bit platforms. Unlike the main code generator
238 we merely shoot for generating working code as simply as possible, and
239 pay little attention to code quality. Specifically, there is no
240 attempt to deal cleverly with the fixed-vs-floating register
241 distinction; all values are generated into (pairs of) floating
242 registers, even if this would mean some redundant reg-reg moves as a
243 result. Only one of the VRegUniques is returned, since it will be
244 of the VRegUniqueLo form, and the upper-half VReg can be determined
245 by applying getHiVRegFromLo to it.
246 -}
247
248 data ChildCode64 -- a.k.a "Register64"
249 = ChildCode64
250 InstrBlock -- code
251 Reg -- the lower 32-bit temporary which contains the
252 -- result; use getHiVRegFromLo to find the other
253 -- VRegUnique. Rules of this simplified insn
254 -- selection game are therefore that the returned
255 -- Reg may be modified
256
257
258 -- | Compute an expression into a register, but
259 -- we don't mind which one it is.
260 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
261 getSomeReg expr = do
262 r <- getRegister expr
263 case r of
264 Any rep code -> do
265 tmp <- getNewRegNat rep
266 return (tmp, code tmp)
267 Fixed _ reg code ->
268 return (reg, code)
269
270 getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
271 getI64Amodes addrTree = do
272 Amode hi_addr addr_code <- getAmode D addrTree
273 case addrOffset hi_addr 4 of
274 Just lo_addr -> return (hi_addr, lo_addr, addr_code)
275 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
276 return (AddrRegImm hi_ptr (ImmInt 0),
277 AddrRegImm hi_ptr (ImmInt 4),
278 code)
279
280
281 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
282 assignMem_I64Code addrTree valueTree = do
283 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
284 ChildCode64 vcode rlo <- iselExpr64 valueTree
285 let
286 rhi = getHiVRegFromLo rlo
287
288 -- Big-endian store
289 mov_hi = ST II32 rhi hi_addr
290 mov_lo = ST II32 rlo lo_addr
291 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
292
293
294 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
295 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
296 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
297 let
298 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
299 r_dst_hi = getHiVRegFromLo r_dst_lo
300 r_src_hi = getHiVRegFromLo r_src_lo
301 mov_lo = MR r_dst_lo r_src_lo
302 mov_hi = MR r_dst_hi r_src_hi
303 return (
304 vcode `snocOL` mov_lo `snocOL` mov_hi
305 )
306
307 assignReg_I64Code _ _
308 = panic "assignReg_I64Code(powerpc): invalid lvalue"
309
310
311 iselExpr64 :: CmmExpr -> NatM ChildCode64
312 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
313 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
314 (rlo, rhi) <- getNewRegPairNat II32
315 let mov_hi = LD II32 rhi hi_addr
316 mov_lo = LD II32 rlo lo_addr
317 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
318 rlo
319
320 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
321 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
322
323 iselExpr64 (CmmLit (CmmInt i _)) = do
324 (rlo,rhi) <- getNewRegPairNat II32
325 let
326 half0 = fromIntegral (fromIntegral i :: Word16)
327 half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
328 half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16)
329 half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16)
330
331 code = toOL [
332 LIS rlo (ImmInt half1),
333 OR rlo rlo (RIImm $ ImmInt half0),
334 LIS rhi (ImmInt half3),
335 OR rhi rhi (RIImm $ ImmInt half2)
336 ]
337 return (ChildCode64 code rlo)
338
339 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
340 ChildCode64 code1 r1lo <- iselExpr64 e1
341 ChildCode64 code2 r2lo <- iselExpr64 e2
342 (rlo,rhi) <- getNewRegPairNat II32
343 let
344 r1hi = getHiVRegFromLo r1lo
345 r2hi = getHiVRegFromLo r2lo
346 code = code1 `appOL`
347 code2 `appOL`
348 toOL [ ADDC rlo r1lo r2lo,
349 ADDE rhi r1hi r2hi ]
350 return (ChildCode64 code rlo)
351
352 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
353 ChildCode64 code1 r1lo <- iselExpr64 e1
354 ChildCode64 code2 r2lo <- iselExpr64 e2
355 (rlo,rhi) <- getNewRegPairNat II32
356 let
357 r1hi = getHiVRegFromLo r1lo
358 r2hi = getHiVRegFromLo r2lo
359 code = code1 `appOL`
360 code2 `appOL`
361 toOL [ SUBFC rlo r2lo r1lo,
362 SUBFE rhi r2hi r1hi ]
363 return (ChildCode64 code rlo)
364
365 iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
366 (expr_reg,expr_code) <- getSomeReg expr
367 (rlo, rhi) <- getNewRegPairNat II32
368 let mov_hi = LI rhi (ImmInt 0)
369 mov_lo = MR rlo expr_reg
370 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
371 rlo
372 iselExpr64 expr
373 = pprPanic "iselExpr64(powerpc)" (pprExpr expr)
374
375
376
377 getRegister :: CmmExpr -> NatM Register
378 getRegister e = do dflags <- getDynFlags
379 getRegister' dflags e
380
381 getRegister' :: DynFlags -> CmmExpr -> NatM Register
382
383 getRegister' dflags (CmmReg (CmmGlobal PicBaseReg))
384 | target32Bit (targetPlatform dflags) = do
385 reg <- getPicBaseNat $ archWordFormat (target32Bit (targetPlatform dflags))
386 return (Fixed (archWordFormat (target32Bit (targetPlatform dflags)))
387 reg nilOL)
388 | otherwise = return (Fixed II64 toc nilOL)
389
390 getRegister' dflags (CmmReg reg)
391 = return (Fixed (cmmTypeFormat (cmmRegType dflags reg))
392 (getRegisterReg (targetPlatform dflags) reg) nilOL)
393
394 getRegister' dflags tree@(CmmRegOff _ _)
395 = getRegister' dflags (mangleIndexTree dflags tree)
396
397 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
398 -- TO_W_(x), TO_W_(x >> 32)
399
400 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32)
401 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
402 | target32Bit (targetPlatform dflags) = do
403 ChildCode64 code rlo <- iselExpr64 x
404 return $ Fixed II32 (getHiVRegFromLo rlo) code
405
406 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32)
407 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
408 | target32Bit (targetPlatform dflags) = do
409 ChildCode64 code rlo <- iselExpr64 x
410 return $ Fixed II32 (getHiVRegFromLo rlo) code
411
412 getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x])
413 | target32Bit (targetPlatform dflags) = do
414 ChildCode64 code rlo <- iselExpr64 x
415 return $ Fixed II32 rlo code
416
417 getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x])
418 | target32Bit (targetPlatform dflags) = do
419 ChildCode64 code rlo <- iselExpr64 x
420 return $ Fixed II32 rlo code
421
422 getRegister' dflags (CmmLoad mem pk)
423 | not (isWord64 pk) = do
424 let platform = targetPlatform dflags
425 Amode addr addr_code <- getAmode D mem
426 let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk)
427 addr_code `snocOL` LD format dst addr
428 return (Any format code)
429 | not (target32Bit (targetPlatform dflags)) = do
430 Amode addr addr_code <- getAmode DS mem
431 let code dst = addr_code `snocOL` LD II64 dst addr
432 return (Any II64 code)
433
434 where format = cmmTypeFormat pk
435
436 -- catch simple cases of zero- or sign-extended load
437 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
438 Amode addr addr_code <- getAmode D mem
439 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
440
441 getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
442 Amode addr addr_code <- getAmode D mem
443 return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
444
445 -- Note: there is no Load Byte Arithmetic instruction, so no signed case here
446
447 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
448 Amode addr addr_code <- getAmode D mem
449 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
450
451 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
452 Amode addr addr_code <- getAmode D mem
453 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
454
455 getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
456 Amode addr addr_code <- getAmode D mem
457 return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
458
459 getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
460 Amode addr addr_code <- getAmode D mem
461 return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
462
463 getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
464 Amode addr addr_code <- getAmode D mem
465 return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
466
467 getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
468 Amode addr addr_code <- getAmode D mem
469 return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
470
471 getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
472 = case mop of
473 MO_Not rep -> triv_ucode_int rep NOT
474
475 MO_F_Neg w -> triv_ucode_float w FNEG
476 MO_S_Neg w -> triv_ucode_int w NEG
477
478 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
479 MO_FF_Conv W32 W64 -> conversionNop FF64 x
480
481 MO_FS_Conv from to -> coerceFP2Int from to x
482 MO_SF_Conv from to -> coerceInt2FP from to x
483
484 MO_SS_Conv from to
485 | from == to -> conversionNop (intFormat to) x
486
487 -- narrowing is a nop: we treat the high bits as undefined
488 MO_SS_Conv W64 to
489 | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit int register"
490 | otherwise -> conversionNop (intFormat to) x
491 MO_SS_Conv W32 to
492 | arch32 -> conversionNop (intFormat to) x
493 | otherwise -> case to of
494 W64 -> triv_ucode_int to (EXTS II32)
495 W16 -> conversionNop II16 x
496 W8 -> conversionNop II8 x
497 _ -> panic "PPC.CodeGen.getRegister: no match"
498 MO_SS_Conv W16 W8 -> conversionNop II8 x
499 MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8)
500 MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16)
501
502 MO_UU_Conv from to
503 | from == to -> conversionNop (intFormat to) x
504 -- narrowing is a nop: we treat the high bits as undefined
505 MO_UU_Conv W64 to
506 | arch32 -> panic "PPC.CodeGen.getRegister no 64 bit target"
507 | otherwise -> conversionNop (intFormat to) x
508 MO_UU_Conv W32 to
509 | arch32 -> conversionNop (intFormat to) x
510 | otherwise ->
511 case to of
512 W64 -> trivialCode to False AND x (CmmLit (CmmInt 4294967295 W64))
513 W16 -> conversionNop II16 x
514 W8 -> conversionNop II8 x
515 _ -> panic "PPC.CodeGen.getRegister: no match"
516 MO_UU_Conv W16 W8 -> conversionNop II8 x
517 MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32))
518 MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32))
519 _ -> panic "PPC.CodeGen.getRegister: no match"
520
521 where
522 triv_ucode_int width instr = trivialUCode (intFormat width) instr x
523 triv_ucode_float width instr = trivialUCode (floatFormat width) instr x
524
525 conversionNop new_format expr
526 = do e_code <- getRegister' dflags expr
527 return (swizzleRegisterRep e_code new_format)
528 arch32 = target32Bit $ targetPlatform dflags
529
530 getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps
531 = case mop of
532 MO_F_Eq _ -> condFltReg EQQ x y
533 MO_F_Ne _ -> condFltReg NE x y
534 MO_F_Gt _ -> condFltReg GTT x y
535 MO_F_Ge _ -> condFltReg GE x y
536 MO_F_Lt _ -> condFltReg LTT x y
537 MO_F_Le _ -> condFltReg LE x y
538
539 MO_Eq rep -> condIntReg EQQ (extendUExpr dflags rep x)
540 (extendUExpr dflags rep y)
541 MO_Ne rep -> condIntReg NE (extendUExpr dflags rep x)
542 (extendUExpr dflags rep y)
543
544 MO_S_Gt rep -> condIntReg GTT (extendSExpr dflags rep x)
545 (extendSExpr dflags rep y)
546 MO_S_Ge rep -> condIntReg GE (extendSExpr dflags rep x)
547 (extendSExpr dflags rep y)
548 MO_S_Lt rep -> condIntReg LTT (extendSExpr dflags rep x)
549 (extendSExpr dflags rep y)
550 MO_S_Le rep -> condIntReg LE (extendSExpr dflags rep x)
551 (extendSExpr dflags rep y)
552
553 MO_U_Gt rep -> condIntReg GU (extendUExpr dflags rep x)
554 (extendUExpr dflags rep y)
555 MO_U_Ge rep -> condIntReg GEU (extendUExpr dflags rep x)
556 (extendUExpr dflags rep y)
557 MO_U_Lt rep -> condIntReg LU (extendUExpr dflags rep x)
558 (extendUExpr dflags rep y)
559 MO_U_Le rep -> condIntReg LEU (extendUExpr dflags rep x)
560 (extendUExpr dflags rep y)
561
562 MO_F_Add w -> triv_float w FADD
563 MO_F_Sub w -> triv_float w FSUB
564 MO_F_Mul w -> triv_float w FMUL
565 MO_F_Quot w -> triv_float w FDIV
566
567 -- optimize addition with 32-bit immediate
568 -- (needed for PIC)
569 MO_Add W32 ->
570 case y of
571 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
572 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
573 CmmLit lit
574 -> do
575 (src, srcCode) <- getSomeReg x
576 let imm = litToImm lit
577 code dst = srcCode `appOL` toOL [
578 ADDIS dst src (HA imm),
579 ADD dst dst (RIImm (LO imm))
580 ]
581 return (Any II32 code)
582 _ -> trivialCode W32 True ADD x y
583
584 MO_Add rep -> trivialCode rep True ADD x y
585 MO_Sub rep ->
586 case y of -- subfi ('substract from' with immediate) doesn't exist
587 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
588 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
589 _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
590
591 MO_Mul rep
592 | arch32 -> trivialCode rep True MULLW x y
593 | otherwise -> trivialCode rep True MULLD x y
594
595 MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
596 MO_S_MulMayOflo W64 -> trivialCodeNoImm' II64 MULLD_MayOflo x y
597
598 MO_S_MulMayOflo _ -> panic "S_MulMayOflo: (II8/16) not implemented"
599 MO_U_MulMayOflo _ -> panic "U_MulMayOflo: not implemented"
600
601 MO_S_Quot rep
602 | arch32 -> trivialCodeNoImm' (intFormat rep) DIVW
603 (extendSExpr dflags rep x) (extendSExpr dflags rep y)
604 | otherwise -> trivialCodeNoImm' (intFormat rep) DIVD
605 (extendSExpr dflags rep x) (extendSExpr dflags rep y)
606 MO_U_Quot rep
607 | arch32 -> trivialCodeNoImm' (intFormat rep) DIVWU
608 (extendUExpr dflags rep x) (extendUExpr dflags rep y)
609 | otherwise -> trivialCodeNoImm' (intFormat rep) DIVDU
610 (extendUExpr dflags rep x) (extendUExpr dflags rep y)
611
612 MO_S_Rem rep
613 | arch32 -> remainderCode rep DIVW (extendSExpr dflags rep x)
614 (extendSExpr dflags rep y)
615 | otherwise -> remainderCode rep DIVD (extendSExpr dflags rep x)
616 (extendSExpr dflags rep y)
617 MO_U_Rem rep
618 | arch32 -> remainderCode rep DIVWU (extendSExpr dflags rep x)
619 (extendSExpr dflags rep y)
620 | otherwise -> remainderCode rep DIVDU (extendSExpr dflags rep x)
621 (extendSExpr dflags rep y)
622
623 MO_And rep -> trivialCode rep False AND x y
624 MO_Or rep -> trivialCode rep False OR x y
625 MO_Xor rep -> trivialCode rep False XOR x y
626
627 MO_Shl rep -> shiftCode rep SL x y
628 MO_S_Shr rep -> shiftCode rep SRA (extendSExpr dflags rep x) y
629 MO_U_Shr rep -> shiftCode rep SR (extendUExpr dflags rep x) y
630 _ -> panic "PPC.CodeGen.getRegister: no match"
631
632 where
633 triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register
634 triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y
635
636 arch32 = target32Bit $ targetPlatform dflags
637
638 getRegister' _ (CmmLit (CmmInt i rep))
639 | Just imm <- makeImmediate rep True i
640 = let
641 code dst = unitOL (LI dst imm)
642 in
643 return (Any (intFormat rep) code)
644
645 getRegister' _ (CmmLit (CmmFloat f frep)) = do
646 lbl <- getNewLabelNat
647 dflags <- getDynFlags
648 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
649 Amode addr addr_code <- getAmode D dynRef
650 let format = floatFormat frep
651 code dst =
652 LDATA ReadOnlyData (Statics lbl
653 [CmmStaticLit (CmmFloat f frep)])
654 `consOL` (addr_code `snocOL` LD format dst addr)
655 return (Any format code)
656
657 getRegister' dflags (CmmLit lit)
658 | target32Bit (targetPlatform dflags)
659 = let rep = cmmLitType dflags lit
660 imm = litToImm lit
661 code dst = toOL [
662 LIS dst (HA imm),
663 ADD dst dst (RIImm (LO imm))
664 ]
665 in return (Any (cmmTypeFormat rep) code)
666 | otherwise
667 = do lbl <- getNewLabelNat
668 dflags <- getDynFlags
669 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
670 Amode addr addr_code <- getAmode D dynRef
671 let rep = cmmLitType dflags lit
672 format = cmmTypeFormat rep
673 code dst =
674 LDATA ReadOnlyData (Statics lbl
675 [CmmStaticLit lit])
676 `consOL` (addr_code `snocOL` LD format dst addr)
677 return (Any format code)
678
679 getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other)
680
681 -- extend?Rep: wrap integer expression of type rep
682 -- in a conversion to II32 or II64 resp.
683 extendSExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
684 extendSExpr dflags W32 x
685 | target32Bit (targetPlatform dflags) = x
686
687 extendSExpr dflags W64 x
688 | not (target32Bit (targetPlatform dflags)) = x
689
690 extendSExpr dflags rep x =
691 let size = if target32Bit $ targetPlatform dflags
692 then W32
693 else W64
694 in CmmMachOp (MO_SS_Conv rep size) [x]
695
696 extendUExpr :: DynFlags -> Width -> CmmExpr -> CmmExpr
697 extendUExpr dflags W32 x
698 | target32Bit (targetPlatform dflags) = x
699 extendUExpr dflags W64 x
700 | not (target32Bit (targetPlatform dflags)) = x
701 extendUExpr dflags rep x =
702 let size = if target32Bit $ targetPlatform dflags
703 then W32
704 else W64
705 in CmmMachOp (MO_UU_Conv rep size) [x]
706
707 -- -----------------------------------------------------------------------------
708 -- The 'Amode' type: Memory addressing modes passed up the tree.
709
710 data Amode
711 = Amode AddrMode InstrBlock
712
713 {-
714 Now, given a tree (the argument to an CmmLoad) that references memory,
715 produce a suitable addressing mode.
716
717 A Rule of the Game (tm) for Amodes: use of the addr bit must
718 immediately follow use of the code part, since the code part puts
719 values in registers which the addr then refers to. So you can't put
720 anything in between, lest it overwrite some of those registers. If
721 you need to do some other computation between the code part and use of
722 the addr bit, first store the effective address from the amode in a
723 temporary, then do the other computation, and then use the temporary:
724
725 code
726 LEA amode, tmp
727 ... other computation ...
728 ... (tmp) ...
729 -}
730
731 data InstrForm = D | DS
732
733 getAmode :: InstrForm -> CmmExpr -> NatM Amode
734 getAmode inf tree@(CmmRegOff _ _)
735 = do dflags <- getDynFlags
736 getAmode inf (mangleIndexTree dflags tree)
737
738 getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
739 | Just off <- makeImmediate W32 True (-i)
740 = do
741 (reg, code) <- getSomeReg x
742 return (Amode (AddrRegImm reg off) code)
743
744
745 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
746 | Just off <- makeImmediate W32 True i
747 = do
748 (reg, code) <- getSomeReg x
749 return (Amode (AddrRegImm reg off) code)
750
751 getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
752 | Just off <- makeImmediate W64 True (-i)
753 = do
754 (reg, code) <- getSomeReg x
755 return (Amode (AddrRegImm reg off) code)
756
757
758 getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
759 | Just off <- makeImmediate W64 True i
760 = do
761 (reg, code) <- getSomeReg x
762 return (Amode (AddrRegImm reg off) code)
763
764 getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
765 | Just off <- makeImmediate W64 True (-i)
766 = do
767 (reg, code) <- getSomeReg x
768 (reg', off', code') <-
769 if i `mod` 4 == 0
770 then do return (reg, off, code)
771 else do
772 tmp <- getNewRegNat II64
773 return (tmp, ImmInt 0,
774 code `snocOL` ADD tmp reg (RIImm off))
775 return (Amode (AddrRegImm reg' off') code')
776
777 getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
778 | Just off <- makeImmediate W64 True i
779 = do
780 (reg, code) <- getSomeReg x
781 (reg', off', code') <-
782 if i `mod` 4 == 0
783 then do return (reg, off, code)
784 else do
785 tmp <- getNewRegNat II64
786 return (tmp, ImmInt 0,
787 code `snocOL` ADD tmp reg (RIImm off))
788 return (Amode (AddrRegImm reg' off') code')
789
790 -- optimize addition with 32-bit immediate
791 -- (needed for PIC)
792 getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit])
793 = do
794 tmp <- getNewRegNat II32
795 (src, srcCode) <- getSomeReg x
796 let imm = litToImm lit
797 code = srcCode `snocOL` ADDIS tmp src (HA imm)
798 return (Amode (AddrRegImm tmp (LO imm)) code)
799
800 getAmode _ (CmmLit lit)
801 = do
802 dflags <- getDynFlags
803 case platformArch $ targetPlatform dflags of
804 ArchPPC -> do
805 tmp <- getNewRegNat II32
806 let imm = litToImm lit
807 code = unitOL (LIS tmp (HA imm))
808 return (Amode (AddrRegImm tmp (LO imm)) code)
809 _ -> do -- TODO: Load from TOC,
810 -- see getRegister' _ (CmmLit lit)
811 tmp <- getNewRegNat II64
812 let imm = litToImm lit
813 code = toOL [
814 LIS tmp (HIGHESTA imm),
815 OR tmp tmp (RIImm (HIGHERA imm)),
816 SL II64 tmp tmp (RIImm (ImmInt 32)),
817 ORIS tmp tmp (HA imm)
818 ]
819 return (Amode (AddrRegImm tmp (LO imm)) code)
820
821 getAmode _ (CmmMachOp (MO_Add W32) [x, y])
822 = do
823 (regX, codeX) <- getSomeReg x
824 (regY, codeY) <- getSomeReg y
825 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
826
827 getAmode _ (CmmMachOp (MO_Add W64) [x, y])
828 = do
829 (regX, codeX) <- getSomeReg x
830 (regY, codeY) <- getSomeReg y
831 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
832
833 getAmode _ other
834 = do
835 (reg, code) <- getSomeReg other
836 let
837 off = ImmInt 0
838 return (Amode (AddrRegImm reg off) code)
839
840
841 -- The 'CondCode' type: Condition codes passed up the tree.
842 data CondCode
843 = CondCode Bool Cond InstrBlock
844
845 -- Set up a condition code for a conditional branch.
846
847 getCondCode :: CmmExpr -> NatM CondCode
848
849 -- almost the same as everywhere else - but we need to
850 -- extend small integers to 32 bit or 64 bit first
851
852 getCondCode (CmmMachOp mop [x, y])
853 = do
854 dflags <- getDynFlags
855 case mop of
856 MO_F_Eq W32 -> condFltCode EQQ x y
857 MO_F_Ne W32 -> condFltCode NE x y
858 MO_F_Gt W32 -> condFltCode GTT x y
859 MO_F_Ge W32 -> condFltCode GE x y
860 MO_F_Lt W32 -> condFltCode LTT x y
861 MO_F_Le W32 -> condFltCode LE x y
862
863 MO_F_Eq W64 -> condFltCode EQQ x y
864 MO_F_Ne W64 -> condFltCode NE x y
865 MO_F_Gt W64 -> condFltCode GTT x y
866 MO_F_Ge W64 -> condFltCode GE x y
867 MO_F_Lt W64 -> condFltCode LTT x y
868 MO_F_Le W64 -> condFltCode LE x y
869
870 MO_Eq rep -> condIntCode EQQ (extendUExpr dflags rep x)
871 (extendUExpr dflags rep y)
872 MO_Ne rep -> condIntCode NE (extendUExpr dflags rep x)
873 (extendUExpr dflags rep y)
874
875 MO_S_Gt rep -> condIntCode GTT (extendSExpr dflags rep x)
876 (extendSExpr dflags rep y)
877 MO_S_Ge rep -> condIntCode GE (extendSExpr dflags rep x)
878 (extendSExpr dflags rep y)
879 MO_S_Lt rep -> condIntCode LTT (extendSExpr dflags rep x)
880 (extendSExpr dflags rep y)
881 MO_S_Le rep -> condIntCode LE (extendSExpr dflags rep x)
882 (extendSExpr dflags rep y)
883
884 MO_U_Gt rep -> condIntCode GU (extendSExpr dflags rep x)
885 (extendSExpr dflags rep y)
886 MO_U_Ge rep -> condIntCode GEU (extendSExpr dflags rep x)
887 (extendSExpr dflags rep y)
888 MO_U_Lt rep -> condIntCode LU (extendSExpr dflags rep x)
889 (extendSExpr dflags rep y)
890 MO_U_Le rep -> condIntCode LEU (extendSExpr dflags rep x)
891 (extendSExpr dflags rep y)
892
893 _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
894
895 getCondCode _ = panic "getCondCode(2)(powerpc)"
896
897
898
899 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
900 -- passed back up the tree.
901
902 condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
903
904 -- ###FIXME: I16 and I8!
905 -- TODO: Is this still an issue? All arguments are extend?Expr'd.
906 condIntCode cond x (CmmLit (CmmInt y rep))
907 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
908 = do
909 (src1, code) <- getSomeReg x
910 dflags <- getDynFlags
911 let format = archWordFormat $ target32Bit $ targetPlatform dflags
912 code' = code `snocOL`
913 (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2)
914 return (CondCode False cond code')
915
916 condIntCode cond x y = do
917 (src1, code1) <- getSomeReg x
918 (src2, code2) <- getSomeReg y
919 dflags <- getDynFlags
920 let format = archWordFormat $ target32Bit $ targetPlatform dflags
921 code' = code1 `appOL` code2 `snocOL`
922 (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2)
923 return (CondCode False cond code')
924
925 condFltCode cond x y = do
926 (src1, code1) <- getSomeReg x
927 (src2, code2) <- getSomeReg y
928 let
929 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
930 code'' = case cond of -- twiddle CR to handle unordered case
931 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
932 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
933 _ -> code'
934 where
935 ltbit = 0 ; eqbit = 2 ; gtbit = 1
936 return (CondCode True cond code'')
937
938
939
940 -- -----------------------------------------------------------------------------
941 -- Generating assignments
942
943 -- Assignments are really at the heart of the whole code generation
944 -- business. Almost all top-level nodes of any real importance are
945 -- assignments, which correspond to loads, stores, or register
946 -- transfers. If we're really lucky, some of the register transfers
947 -- will go away, because we can use the destination register to
948 -- complete the code generation for the right hand side. This only
949 -- fails when the right hand side is forced into a fixed register
950 -- (e.g. the result of a call).
951
952 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
953 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
954
955 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
956 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
957
958 assignMem_IntCode pk addr src = do
959 (srcReg, code) <- getSomeReg src
960 Amode dstAddr addr_code <- case pk of
961 II64 -> getAmode DS addr
962 _ -> getAmode D addr
963 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
964
965 -- dst is a reg, but src could be anything
966 assignReg_IntCode _ reg src
967 = do
968 dflags <- getDynFlags
969 let dst = getRegisterReg (targetPlatform dflags) reg
970 r <- getRegister src
971 return $ case r of
972 Any _ code -> code dst
973 Fixed _ freg fcode -> fcode `snocOL` MR dst freg
974
975
976
977 -- Easy, isn't it?
978 assignMem_FltCode = assignMem_IntCode
979 assignReg_FltCode = assignReg_IntCode
980
981
982
983 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
984
985 genJump (CmmLit (CmmLabel lbl))
986 = return (unitOL $ JMP lbl)
987
988 genJump tree
989 = do
990 dflags <- getDynFlags
991 let platform = targetPlatform dflags
992 case platformOS platform of
993 OSLinux -> case platformArch platform of
994 ArchPPC -> genJump' tree GCPLinux
995 ArchPPC_64 ELF_V1 -> genJump' tree (GCPLinux64ELF 1)
996 ArchPPC_64 ELF_V2 -> genJump' tree (GCPLinux64ELF 2)
997 _ -> panic "PPC.CodeGen.genJump: Unknown Linux"
998 OSDarwin -> genJump' tree GCPDarwin
999 _ -> panic "PPC.CodeGen.genJump: not defined for this os"
1000
1001
1002 genJump' :: CmmExpr -> GenCCallPlatform -> NatM InstrBlock
1003
1004 genJump' tree (GCPLinux64ELF 1)
1005 = do
1006 (target,code) <- getSomeReg tree
1007 return (code
1008 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0))
1009 `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
1010 `snocOL` MTCTR r11
1011 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
1012 `snocOL` BCTR [] Nothing)
1013
1014 genJump' tree (GCPLinux64ELF 2)
1015 = do
1016 (target,code) <- getSomeReg tree
1017 return (code
1018 `snocOL` MR r12 target
1019 `snocOL` MTCTR r12
1020 `snocOL` BCTR [] Nothing)
1021
1022 genJump' tree _
1023 = do
1024 (target,code) <- getSomeReg tree
1025 return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
1026
1027 -- -----------------------------------------------------------------------------
1028 -- Unconditional branches
1029 genBranch :: BlockId -> NatM InstrBlock
1030 genBranch = return . toOL . mkJumpInstr
1031
1032
1033 -- -----------------------------------------------------------------------------
1034 -- Conditional jumps
1035
1036 {-
1037 Conditional jumps are always to local labels, so we can use branch
1038 instructions. We peek at the arguments to decide what kind of
1039 comparison to do.
1040 -}
1041
1042
1043 genCondJump
1044 :: BlockId -- the branch target
1045 -> CmmExpr -- the condition on which to branch
1046 -> NatM InstrBlock
1047
1048 genCondJump id bool = do
1049 CondCode _ cond code <- getCondCode bool
1050 return (code `snocOL` BCC cond id)
1051
1052
1053
1054 -- -----------------------------------------------------------------------------
1055 -- Generating C calls
1056
1057 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1058 -- @get_arg@, which moves the arguments to the correct registers/stack
1059 -- locations. Apart from that, the code is easy.
1060 --
1061 -- (If applicable) Do not fill the delay slots here; you will confuse the
1062 -- register allocator.
1063
1064 genCCall :: ForeignTarget -- function to call
1065 -> [CmmFormal] -- where to put the result
1066 -> [CmmActual] -- arguments (of mixed type)
1067 -> NatM InstrBlock
1068 genCCall target dest_regs argsAndHints
1069 = do dflags <- getDynFlags
1070 let platform = targetPlatform dflags
1071 case platformOS platform of
1072 OSLinux -> case platformArch platform of
1073 ArchPPC -> genCCall' dflags GCPLinux
1074 target dest_regs argsAndHints
1075 ArchPPC_64 ELF_V1 -> genCCall' dflags (GCPLinux64ELF 1)
1076 target dest_regs argsAndHints
1077 ArchPPC_64 ELF_V2 -> genCCall' dflags (GCPLinux64ELF 2)
1078 target dest_regs argsAndHints
1079 _ -> panic "PPC.CodeGen.genCCall: Unknown Linux"
1080 OSDarwin -> genCCall' dflags GCPDarwin target dest_regs argsAndHints
1081 _ -> panic "PPC.CodeGen.genCCall: not defined for this os"
1082
1083 data GenCCallPlatform = GCPLinux | GCPDarwin | GCPLinux64ELF Int
1084
1085 genCCall'
1086 :: DynFlags
1087 -> GenCCallPlatform
1088 -> ForeignTarget -- function to call
1089 -> [CmmFormal] -- where to put the result
1090 -> [CmmActual] -- arguments (of mixed type)
1091 -> NatM InstrBlock
1092
1093 {-
1094 The PowerPC calling convention for Darwin/Mac OS X
1095 is described in Apple's document
1096 "Inside Mac OS X - Mach-O Runtime Architecture".
1097
1098 PowerPC Linux uses the System V Release 4 Calling Convention
1099 for PowerPC. It is described in the
1100 "System V Application Binary Interface PowerPC Processor Supplement".
1101
1102 Both conventions are similar:
1103 Parameters may be passed in general-purpose registers starting at r3, in
1104 floating point registers starting at f1, or on the stack.
1105
1106 But there are substantial differences:
1107 * The number of registers used for parameter passing and the exact set of
1108 nonvolatile registers differs (see MachRegs.hs).
1109 * On Darwin, stack space is always reserved for parameters, even if they are
1110 passed in registers. The called routine may choose to save parameters from
1111 registers to the corresponding space on the stack.
1112 * On Darwin, a corresponding amount of GPRs is skipped when a floating point
1113 parameter is passed in an FPR.
1114 * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
1115 starting with an odd-numbered GPR. It may skip a GPR to achieve this.
1116 Darwin just treats an I64 like two separate II32s (high word first).
1117 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
1118 4-byte aligned like everything else on Darwin.
1119 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
1120 PowerPC Linux does not agree, so neither do we.
1121
1122 PowerPC 64 Linux uses the System V Release 4 Calling Convention for
1123 64-bit PowerPC. It is specified in
1124 "64-bit PowerPC ELF Application Binary Interface Supplement 1.9".
1125
1126 According to all conventions, the parameter area should be part of the
1127 caller's stack frame, allocated in the caller's prologue code (large enough
1128 to hold the parameter lists for all called routines). The NCG already
1129 uses the stack for register spilling, leaving 64 bytes free at the top.
1130 If we need a larger parameter area than that, we just allocate a new stack
1131 frame just before ccalling.
1132 -}
1133
1134
1135 genCCall' _ _ (PrimTarget MO_WriteBarrier) _ _
1136 = return $ unitOL LWSYNC
1137
1138 genCCall' _ _ (PrimTarget MO_Touch) _ _
1139 = return $ nilOL
1140
1141 genCCall' _ _ (PrimTarget (MO_Prefetch_Data _)) _ _
1142 = return $ nilOL
1143
1144 genCCall' dflags gcp target dest_regs args
1145 = ASSERT(not $ any (`elem` [II16]) $ map cmmTypeFormat argReps)
1146 -- we rely on argument promotion in the codeGen
1147 do
1148 (finalStack,passArgumentsCode,usedRegs) <- passArguments
1149 (zip args argReps)
1150 allArgRegs
1151 (allFPArgRegs platform)
1152 initialStackOffset
1153 (toOL []) []
1154
1155 (labelOrExpr, reduceToFF32) <- case target of
1156 ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
1157 uses_pic_base_implicitly
1158 return (Left lbl, False)
1159 ForeignTarget expr _ -> do
1160 uses_pic_base_implicitly
1161 return (Right expr, False)
1162 PrimTarget mop -> outOfLineMachOp mop
1163
1164 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
1165 `appOL` toc_before
1166 codeAfter = toc_after labelOrExpr `appOL` move_sp_up finalStack
1167 `appOL` moveResult reduceToFF32
1168
1169 case labelOrExpr of
1170 Left lbl -> do -- the linker does all the work for us
1171 return ( codeBefore
1172 `snocOL` BL lbl usedRegs
1173 `appOL` codeAfter)
1174 Right dyn -> do -- implement call through function pointer
1175 (dynReg, dynCode) <- getSomeReg dyn
1176 case gcp of
1177 GCPLinux64ELF 1 -> return ( dynCode
1178 `appOL` codeBefore
1179 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0))
1180 `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8))
1181 `snocOL` MTCTR r11
1182 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16))
1183 `snocOL` BCTRL usedRegs
1184 `appOL` codeAfter)
1185 GCPLinux64ELF 2 -> return ( dynCode
1186 `appOL` codeBefore
1187 `snocOL` MR r12 dynReg
1188 `snocOL` MTCTR r12
1189 `snocOL` BCTRL usedRegs
1190 `appOL` codeAfter)
1191 _ -> return ( dynCode
1192 `snocOL` MTCTR dynReg
1193 `appOL` codeBefore
1194 `snocOL` BCTRL usedRegs
1195 `appOL` codeAfter)
1196 where
1197 platform = targetPlatform dflags
1198
1199 uses_pic_base_implicitly = do
1200 -- See Note [implicit register in PPC PIC code]
1201 -- on why we claim to use PIC register here
1202 when (gopt Opt_PIC dflags && target32Bit platform) $ do
1203 _ <- getPicBaseNat $ archWordFormat True
1204 return ()
1205
1206 initialStackOffset = case gcp of
1207 GCPDarwin -> 24
1208 GCPLinux -> 8
1209 GCPLinux64ELF 1 -> 48
1210 GCPLinux64ELF 2 -> 32
1211 _ -> panic "genCall': unknown calling convention"
1212 -- size of linkage area + size of arguments, in bytes
1213 stackDelta finalStack = case gcp of
1214 GCPDarwin ->
1215 roundTo 16 $ (24 +) $ max 32 $ sum $
1216 map (widthInBytes . typeWidth) argReps
1217 GCPLinux -> roundTo 16 finalStack
1218 GCPLinux64ELF 1 ->
1219 roundTo 16 $ (48 +) $ max 64 $ sum $
1220 map (widthInBytes . typeWidth) argReps
1221 GCPLinux64ELF 2 ->
1222 roundTo 16 $ (32 +) $ max 64 $ sum $
1223 map (widthInBytes . typeWidth) argReps
1224 _ -> panic "genCall': unknown calling conv."
1225
1226 argReps = map (cmmExprType dflags) args
1227
1228 roundTo a x | x `mod` a == 0 = x
1229 | otherwise = x + a - (x `mod` a)
1230
1231 spFormat = if target32Bit platform then II32 else II64
1232
1233 move_sp_down finalStack
1234 | delta > 64 =
1235 toOL [STU spFormat sp (AddrRegImm sp (ImmInt (-delta))),
1236 DELTA (-delta)]
1237 | otherwise = nilOL
1238 where delta = stackDelta finalStack
1239 toc_before = case gcp of
1240 GCPLinux64ELF 1 -> unitOL $ ST spFormat toc (AddrRegImm sp (ImmInt 40))
1241 GCPLinux64ELF 2 -> unitOL $ ST spFormat toc (AddrRegImm sp (ImmInt 24))
1242 _ -> nilOL
1243 toc_after labelOrExpr = case gcp of
1244 GCPLinux64ELF 1 -> case labelOrExpr of
1245 Left _ -> toOL [ NOP ]
1246 Right _ -> toOL [ LD spFormat toc
1247 (AddrRegImm sp
1248 (ImmInt 40))
1249 ]
1250 GCPLinux64ELF 2 -> case labelOrExpr of
1251 Left _ -> toOL [ NOP ]
1252 Right _ -> toOL [ LD spFormat toc
1253 (AddrRegImm sp
1254 (ImmInt 24))
1255 ]
1256 _ -> nilOL
1257 move_sp_up finalStack
1258 | delta > 64 = -- TODO: fix-up stack back-chain
1259 toOL [ADD sp sp (RIImm (ImmInt delta)),
1260 DELTA 0]
1261 | otherwise = nilOL
1262 where delta = stackDelta finalStack
1263
1264
1265 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
1266 passArguments ((arg,arg_ty):args) gprs fprs stackOffset
1267 accumCode accumUsed | isWord64 arg_ty
1268 && target32Bit (targetPlatform dflags) =
1269 do
1270 ChildCode64 code vr_lo <- iselExpr64 arg
1271 let vr_hi = getHiVRegFromLo vr_lo
1272
1273 case gcp of
1274 GCPDarwin ->
1275 do let storeWord vr (gpr:_) _ = MR gpr vr
1276 storeWord vr [] offset
1277 = ST II32 vr (AddrRegImm sp (ImmInt offset))
1278 passArguments args
1279 (drop 2 gprs)
1280 fprs
1281 (stackOffset+8)
1282 (accumCode `appOL` code
1283 `snocOL` storeWord vr_hi gprs stackOffset
1284 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1285 ((take 2 gprs) ++ accumUsed)
1286 GCPLinux ->
1287 do let stackOffset' = roundTo 8 stackOffset
1288 stackCode = accumCode `appOL` code
1289 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
1290 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
1291 regCode hireg loreg =
1292 accumCode `appOL` code
1293 `snocOL` MR hireg vr_hi
1294 `snocOL` MR loreg vr_lo
1295
1296 case gprs of
1297 hireg : loreg : regs | even (length gprs) ->
1298 passArguments args regs fprs stackOffset
1299 (regCode hireg loreg) (hireg : loreg : accumUsed)
1300 _skipped : hireg : loreg : regs ->
1301 passArguments args regs fprs stackOffset
1302 (regCode hireg loreg) (hireg : loreg : accumUsed)
1303 _ -> -- only one or no regs left
1304 passArguments args [] fprs (stackOffset'+8)
1305 stackCode accumUsed
1306 GCPLinux64ELF _ -> panic "passArguments: 32 bit code"
1307
1308 passArguments ((arg,rep):args) gprs fprs stackOffset accumCode accumUsed
1309 | reg : _ <- regs = do
1310 register <- getRegister arg
1311 let code = case register of
1312 Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1313 Any _ acode -> acode reg
1314 stackOffsetRes = case gcp of
1315 -- The Darwin ABI requires that we reserve
1316 -- stack slots for register parameters
1317 GCPDarwin -> stackOffset + stackBytes
1318 -- ... the SysV ABI 32-bit doesn't.
1319 GCPLinux -> stackOffset
1320 -- ... but SysV ABI 64-bit does.
1321 GCPLinux64ELF _ -> stackOffset + stackBytes
1322 passArguments args
1323 (drop nGprs gprs)
1324 (drop nFprs fprs)
1325 stackOffsetRes
1326 (accumCode `appOL` code)
1327 (reg : accumUsed)
1328 | otherwise = do
1329 (vr, code) <- getSomeReg arg
1330 passArguments args
1331 (drop nGprs gprs)
1332 (drop nFprs fprs)
1333 (stackOffset' + stackBytes)
1334 (accumCode `appOL` code `snocOL` ST (cmmTypeFormat rep) vr stackSlot)
1335 accumUsed
1336 where
1337 stackOffset' = case gcp of
1338 GCPDarwin ->
1339 -- stackOffset is at least 4-byte aligned
1340 -- The Darwin ABI is happy with that.
1341 stackOffset
1342 GCPLinux
1343 -- ... the SysV ABI requires 8-byte
1344 -- alignment for doubles.
1345 | isFloatType rep && typeWidth rep == W64 ->
1346 roundTo 8 stackOffset
1347 | otherwise ->
1348 stackOffset
1349 GCPLinux64ELF _ ->
1350 -- everything on the stack is 8-byte
1351 -- aligned on a 64 bit system
1352 -- (except vector status, not used now)
1353 stackOffset
1354 stackSlot = AddrRegImm sp (ImmInt stackOffset')
1355 (nGprs, nFprs, stackBytes, regs)
1356 = case gcp of
1357 GCPDarwin ->
1358 case cmmTypeFormat rep of
1359 II8 -> (1, 0, 4, gprs)
1360 II16 -> (1, 0, 4, gprs)
1361 II32 -> (1, 0, 4, gprs)
1362 -- The Darwin ABI requires that we skip a
1363 -- corresponding number of GPRs when we use
1364 -- the FPRs.
1365 FF32 -> (1, 1, 4, fprs)
1366 FF64 -> (2, 1, 8, fprs)
1367 II64 -> panic "genCCall' passArguments II64"
1368 FF80 -> panic "genCCall' passArguments FF80"
1369 GCPLinux ->
1370 case cmmTypeFormat rep of
1371 II8 -> (1, 0, 4, gprs)
1372 II16 -> (1, 0, 4, gprs)
1373 II32 -> (1, 0, 4, gprs)
1374 -- ... the SysV ABI doesn't.
1375 FF32 -> (0, 1, 4, fprs)
1376 FF64 -> (0, 1, 8, fprs)
1377 II64 -> panic "genCCall' passArguments II64"
1378 FF80 -> panic "genCCall' passArguments FF80"
1379 GCPLinux64ELF _ ->
1380 case cmmTypeFormat rep of
1381 II8 -> (1, 0, 8, gprs)
1382 II16 -> (1, 0, 8, gprs)
1383 II32 -> (1, 0, 8, gprs)
1384 II64 -> (1, 0, 8, gprs)
1385 -- The ELFv1 ABI requires that we skip a
1386 -- corresponding number of GPRs when we use
1387 -- the FPRs.
1388 FF32 -> (1, 1, 8, fprs)
1389 FF64 -> (1, 1, 8, fprs)
1390 FF80 -> panic "genCCall' passArguments FF80"
1391
1392 moveResult reduceToFF32 =
1393 case dest_regs of
1394 [] -> nilOL
1395 [dest]
1396 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
1397 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1398 | isWord64 rep && target32Bit (targetPlatform dflags)
1399 -> toOL [MR (getHiVRegFromLo r_dest) r3,
1400 MR r_dest r4]
1401 | otherwise -> unitOL (MR r_dest r3)
1402 where rep = cmmRegType dflags (CmmLocal dest)
1403 r_dest = getRegisterReg platform (CmmLocal dest)
1404 _ -> panic "genCCall' moveResult: Bad dest_regs"
1405
1406 outOfLineMachOp mop =
1407 do
1408 dflags <- getDynFlags
1409 mopExpr <- cmmMakeDynamicReference dflags CallReference $
1410 mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
1411 let mopLabelOrExpr = case mopExpr of
1412 CmmLit (CmmLabel lbl) -> Left lbl
1413 _ -> Right mopExpr
1414 return (mopLabelOrExpr, reduce)
1415 where
1416 (functionName, reduce) = case mop of
1417 MO_F32_Exp -> (fsLit "exp", True)
1418 MO_F32_Log -> (fsLit "log", True)
1419 MO_F32_Sqrt -> (fsLit "sqrt", True)
1420
1421 MO_F32_Sin -> (fsLit "sin", True)
1422 MO_F32_Cos -> (fsLit "cos", True)
1423 MO_F32_Tan -> (fsLit "tan", True)
1424
1425 MO_F32_Asin -> (fsLit "asin", True)
1426 MO_F32_Acos -> (fsLit "acos", True)
1427 MO_F32_Atan -> (fsLit "atan", True)
1428
1429 MO_F32_Sinh -> (fsLit "sinh", True)
1430 MO_F32_Cosh -> (fsLit "cosh", True)
1431 MO_F32_Tanh -> (fsLit "tanh", True)
1432 MO_F32_Pwr -> (fsLit "pow", True)
1433
1434 MO_F64_Exp -> (fsLit "exp", False)
1435 MO_F64_Log -> (fsLit "log", False)
1436 MO_F64_Sqrt -> (fsLit "sqrt", False)
1437
1438 MO_F64_Sin -> (fsLit "sin", False)
1439 MO_F64_Cos -> (fsLit "cos", False)
1440 MO_F64_Tan -> (fsLit "tan", False)
1441
1442 MO_F64_Asin -> (fsLit "asin", False)
1443 MO_F64_Acos -> (fsLit "acos", False)
1444 MO_F64_Atan -> (fsLit "atan", False)
1445
1446 MO_F64_Sinh -> (fsLit "sinh", False)
1447 MO_F64_Cosh -> (fsLit "cosh", False)
1448 MO_F64_Tanh -> (fsLit "tanh", False)
1449 MO_F64_Pwr -> (fsLit "pow", False)
1450
1451 MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False)
1452
1453 MO_Memcpy _ -> (fsLit "memcpy", False)
1454 MO_Memset _ -> (fsLit "memset", False)
1455 MO_Memmove _ -> (fsLit "memmove", False)
1456
1457 MO_BSwap w -> (fsLit $ bSwapLabel w, False)
1458 MO_PopCnt w -> (fsLit $ popCntLabel w, False)
1459 MO_Clz w -> (fsLit $ clzLabel w, False)
1460 MO_Ctz w -> (fsLit $ ctzLabel w, False)
1461 MO_AtomicRMW w amop -> (fsLit $ atomicRMWLabel w amop, False)
1462 MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
1463 MO_AtomicRead w -> (fsLit $ atomicReadLabel w, False)
1464 MO_AtomicWrite w -> (fsLit $ atomicWriteLabel w, False)
1465
1466 MO_S_QuotRem {} -> unsupported
1467 MO_U_QuotRem {} -> unsupported
1468 MO_U_QuotRem2 {} -> unsupported
1469 MO_Add2 {} -> unsupported
1470 MO_AddIntC {} -> unsupported
1471 MO_SubIntC {} -> unsupported
1472 MO_U_Mul2 {} -> unsupported
1473 MO_WriteBarrier -> unsupported
1474 MO_Touch -> unsupported
1475 (MO_Prefetch_Data _ ) -> unsupported
1476 unsupported = panic ("outOfLineCmmOp: " ++ show mop
1477 ++ " not supported")
1478
1479 -- -----------------------------------------------------------------------------
1480 -- Generating a table-branch
1481
1482 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
1483 genSwitch dflags expr targets
1484 | (gopt Opt_PIC dflags) || (not $ target32Bit $ targetPlatform dflags)
1485 = do
1486 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
1487 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
1488 sha = if target32Bit $ targetPlatform dflags then 2 else 3
1489 tmp <- getNewRegNat fmt
1490 lbl <- getNewLabelNat
1491 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
1492 (tableReg,t_code) <- getSomeReg $ dynRef
1493 let code = e_code `appOL` t_code `appOL` toOL [
1494 SL fmt tmp reg (RIImm (ImmInt sha)),
1495 LD fmt tmp (AddrRegReg tableReg tmp),
1496 ADD tmp tmp (RIReg tableReg),
1497 MTCTR tmp,
1498 BCTR ids (Just lbl)
1499 ]
1500 return code
1501 | otherwise
1502 = do
1503 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
1504 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
1505 sha = if target32Bit $ targetPlatform dflags then 2 else 3
1506 tmp <- getNewRegNat fmt
1507 lbl <- getNewLabelNat
1508 let code = e_code `appOL` toOL [
1509 SL fmt tmp reg (RIImm (ImmInt sha)),
1510 ADDIS tmp tmp (HA (ImmCLbl lbl)),
1511 LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
1512 MTCTR tmp,
1513 BCTR ids (Just lbl)
1514 ]
1515 return code
1516 where (offset, ids) = switchTargetsToTable targets
1517
1518 generateJumpTableForInstr :: DynFlags -> Instr
1519 -> Maybe (NatCmmDecl CmmStatics Instr)
1520 generateJumpTableForInstr dflags (BCTR ids (Just lbl)) =
1521 let jumpTable
1522 | (gopt Opt_PIC dflags)
1523 || (not $ target32Bit $ targetPlatform dflags)
1524 = map jumpTableEntryRel ids
1525 | otherwise = map (jumpTableEntry dflags) ids
1526 where jumpTableEntryRel Nothing
1527 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
1528 jumpTableEntryRel (Just blockid)
1529 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
1530 where blockLabel = mkAsmTempLabel (getUnique blockid)
1531 in Just (CmmData ReadOnlyData (Statics lbl jumpTable))
1532 generateJumpTableForInstr _ _ = Nothing
1533
1534 -- -----------------------------------------------------------------------------
1535 -- 'condIntReg' and 'condFltReg': condition codes into registers
1536
1537 -- Turn those condition codes into integers now (when they appear on
1538 -- the right hand side of an assignment).
1539
1540 condIntReg, condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
1541
1542 condReg :: NatM CondCode -> NatM Register
1543 condReg getCond = do
1544 CondCode _ cond cond_code <- getCond
1545 dflags <- getDynFlags
1546 let
1547 code dst = cond_code
1548 `appOL` negate_code
1549 `appOL` toOL [
1550 MFCR dst,
1551 RLWINM dst dst (bit + 1) 31 31
1552 ]
1553
1554 negate_code | do_negate = unitOL (CRNOR bit bit bit)
1555 | otherwise = nilOL
1556
1557 (bit, do_negate) = case cond of
1558 LTT -> (0, False)
1559 LE -> (1, True)
1560 EQQ -> (2, False)
1561 GE -> (0, True)
1562 GTT -> (1, False)
1563
1564 NE -> (2, True)
1565
1566 LU -> (0, False)
1567 LEU -> (1, True)
1568 GEU -> (0, True)
1569 GU -> (1, False)
1570 _ -> panic "PPC.CodeGen.codeReg: no match"
1571
1572 format = archWordFormat $ target32Bit $ targetPlatform dflags
1573 return (Any format code)
1574
1575 condIntReg cond x y = condReg (condIntCode cond x y)
1576 condFltReg cond x y = condReg (condFltCode cond x y)
1577
1578
1579
1580 -- -----------------------------------------------------------------------------
1581 -- 'trivial*Code': deal with trivial instructions
1582
1583 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
1584 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
1585 -- Only look for constants on the right hand side, because that's
1586 -- where the generic optimizer will have put them.
1587
1588 -- Similarly, for unary instructions, we don't have to worry about
1589 -- matching an StInt as the argument, because genericOpt will already
1590 -- have handled the constant-folding.
1591
1592
1593
1594 {-
1595 Wolfgang's PowerPC version of The Rules:
1596
1597 A slightly modified version of The Rules to take advantage of the fact
1598 that PowerPC instructions work on all registers and don't implicitly
1599 clobber any fixed registers.
1600
1601 * The only expression for which getRegister returns Fixed is (CmmReg reg).
1602
1603 * If getRegister returns Any, then the code it generates may modify only:
1604 (a) fresh temporaries
1605 (b) the destination register
1606 It may *not* modify global registers, unless the global
1607 register happens to be the destination register.
1608 It may not clobber any other registers. In fact, only ccalls clobber any
1609 fixed registers.
1610 Also, it may not modify the counter register (used by genCCall).
1611
1612 Corollary: If a getRegister for a subexpression returns Fixed, you need
1613 not move it to a fresh temporary before evaluating the next subexpression.
1614 The Fixed register won't be modified.
1615 Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
1616
1617 * SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
1618 the value of the destination register.
1619 -}
1620
1621 trivialCode
1622 :: Width
1623 -> Bool
1624 -> (Reg -> Reg -> RI -> Instr)
1625 -> CmmExpr
1626 -> CmmExpr
1627 -> NatM Register
1628
1629 trivialCode rep signed instr x (CmmLit (CmmInt y _))
1630 | Just imm <- makeImmediate rep signed y
1631 = do
1632 (src1, code1) <- getSomeReg x
1633 let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
1634 return (Any (intFormat rep) code)
1635
1636 trivialCode rep _ instr x y = do
1637 (src1, code1) <- getSomeReg x
1638 (src2, code2) <- getSomeReg y
1639 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
1640 return (Any (intFormat rep) code)
1641
1642 shiftCode
1643 :: Width
1644 -> (Format-> Reg -> Reg -> RI -> Instr)
1645 -> CmmExpr
1646 -> CmmExpr
1647 -> NatM Register
1648 shiftCode width instr x (CmmLit (CmmInt y _))
1649 | Just imm <- makeImmediate width False y
1650 = do
1651 (src1, code1) <- getSomeReg x
1652 let format = intFormat width
1653 let code dst = code1 `snocOL` instr format dst src1 (RIImm imm)
1654 return (Any format code)
1655
1656 shiftCode width instr x y = do
1657 (src1, code1) <- getSomeReg x
1658 (src2, code2) <- getSomeReg y
1659 let format = intFormat width
1660 let code dst = code1 `appOL` code2 `snocOL` instr format dst src1 (RIReg src2)
1661 return (Any format code)
1662
1663 trivialCodeNoImm' :: Format -> (Reg -> Reg -> Reg -> Instr)
1664 -> CmmExpr -> CmmExpr -> NatM Register
1665 trivialCodeNoImm' format instr x y = do
1666 (src1, code1) <- getSomeReg x
1667 (src2, code2) <- getSomeReg y
1668 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
1669 return (Any format code)
1670
1671 trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr)
1672 -> CmmExpr -> CmmExpr -> NatM Register
1673 trivialCodeNoImm format instr x y = trivialCodeNoImm' format (instr format) x y
1674
1675
1676 trivialUCode
1677 :: Format
1678 -> (Reg -> Reg -> Instr)
1679 -> CmmExpr
1680 -> NatM Register
1681 trivialUCode rep instr x = do
1682 (src, code) <- getSomeReg x
1683 let code' dst = code `snocOL` instr dst src
1684 return (Any rep code')
1685
1686 -- There is no "remainder" instruction on the PPC, so we have to do
1687 -- it the hard way.
1688 -- The "div" parameter is the division instruction to use (DIVW or DIVWU)
1689
1690 remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
1691 -> CmmExpr -> CmmExpr -> NatM Register
1692 remainderCode rep div x y = do
1693 dflags <- getDynFlags
1694 let mull_instr = if target32Bit $ targetPlatform dflags then MULLW
1695 else MULLD
1696 (src1, code1) <- getSomeReg x
1697 (src2, code2) <- getSomeReg y
1698 let code dst = code1 `appOL` code2 `appOL` toOL [
1699 div dst src1 src2,
1700 mull_instr dst dst (RIReg src2),
1701 SUBF dst dst src1
1702 ]
1703 return (Any (intFormat rep) code)
1704
1705 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
1706 coerceInt2FP fromRep toRep x = do
1707 dflags <- getDynFlags
1708 let arch = platformArch $ targetPlatform dflags
1709 coerceInt2FP' arch fromRep toRep x
1710
1711 coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
1712 coerceInt2FP' ArchPPC fromRep toRep x = do
1713 (src, code) <- getSomeReg x
1714 lbl <- getNewLabelNat
1715 itmp <- getNewRegNat II32
1716 ftmp <- getNewRegNat FF64
1717 dflags <- getDynFlags
1718 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
1719 Amode addr addr_code <- getAmode D dynRef
1720 let
1721 code' dst = code `appOL` maybe_exts `appOL` toOL [
1722 LDATA ReadOnlyData $ Statics lbl
1723 [CmmStaticLit (CmmInt 0x43300000 W32),
1724 CmmStaticLit (CmmInt 0x80000000 W32)],
1725 XORIS itmp src (ImmInt 0x8000),
1726 ST II32 itmp (spRel dflags 3),
1727 LIS itmp (ImmInt 0x4330),
1728 ST II32 itmp (spRel dflags 2),
1729 LD FF64 ftmp (spRel dflags 2)
1730 ] `appOL` addr_code `appOL` toOL [
1731 LD FF64 dst addr,
1732 FSUB FF64 dst ftmp dst
1733 ] `appOL` maybe_frsp dst
1734
1735 maybe_exts = case fromRep of
1736 W8 -> unitOL $ EXTS II8 src src
1737 W16 -> unitOL $ EXTS II16 src src
1738 W32 -> nilOL
1739 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1740
1741 maybe_frsp dst
1742 = case toRep of
1743 W32 -> unitOL $ FRSP dst dst
1744 W64 -> nilOL
1745 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1746
1747 return (Any (floatFormat toRep) code')
1748
1749 -- On an ELF v1 Linux we use the compiler doubleword in the stack frame
1750 -- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only
1751 -- set right before a call and restored right after return from the call.
1752 -- So it is fine.
1753 coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do
1754 (src, code) <- getSomeReg x
1755 dflags <- getDynFlags
1756 let
1757 code' dst = code `appOL` maybe_exts `appOL` toOL [
1758 ST II64 src (spRel dflags 3),
1759 LD FF64 dst (spRel dflags 3),
1760 FCFID dst dst
1761 ] `appOL` maybe_frsp dst
1762
1763 maybe_exts = case fromRep of
1764 W8 -> unitOL $ EXTS II8 src src
1765 W16 -> unitOL $ EXTS II16 src src
1766 W32 -> unitOL $ EXTS II32 src src
1767 W64 -> nilOL
1768 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1769
1770 maybe_frsp dst
1771 = case toRep of
1772 W32 -> unitOL $ FRSP dst dst
1773 W64 -> nilOL
1774 _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
1775
1776 return (Any (floatFormat toRep) code')
1777
1778 coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch"
1779
1780
1781 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
1782 coerceFP2Int fromRep toRep x = do
1783 dflags <- getDynFlags
1784 let arch = platformArch $ targetPlatform dflags
1785 coerceFP2Int' arch fromRep toRep x
1786
1787 coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
1788 coerceFP2Int' ArchPPC _ toRep x = do
1789 dflags <- getDynFlags
1790 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
1791 (src, code) <- getSomeReg x
1792 tmp <- getNewRegNat FF64
1793 let
1794 code' dst = code `appOL` toOL [
1795 -- convert to int in FP reg
1796 FCTIWZ tmp src,
1797 -- store value (64bit) from FP to stack
1798 ST FF64 tmp (spRel dflags 2),
1799 -- read low word of value (high word is undefined)
1800 LD II32 dst (spRel dflags 3)]
1801 return (Any (intFormat toRep) code')
1802
1803 coerceFP2Int' (ArchPPC_64 _) _ toRep x = do
1804 dflags <- getDynFlags
1805 -- the reps don't really matter: F*->FF64 and II64->I* are no-ops
1806 (src, code) <- getSomeReg x
1807 tmp <- getNewRegNat FF64
1808 let
1809 code' dst = code `appOL` toOL [
1810 -- convert to int in FP reg
1811 FCTIDZ tmp src,
1812 -- store value (64bit) from FP to compiler word on stack
1813 ST FF64 tmp (spRel dflags 3),
1814 LD II64 dst (spRel dflags 3)]
1815 return (Any (intFormat toRep) code')
1816
1817 coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch"
1818
1819 -- Note [.LCTOC1 in PPC PIC code]
1820 -- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
1821 -- to make the most of the PPC's 16-bit displacements.
1822 -- As 16-bit signed offset is used (usually via addi/lwz instructions)
1823 -- first element will have '-32768' offset against .LCTOC1.
1824
1825 -- Note [implicit register in PPC PIC code]
1826 -- PPC generates calls by labels in assembly
1827 -- in form of:
1828 -- bl puts+32768@plt
1829 -- in this form it's not seen directly (by GHC NCG)
1830 -- that r30 (PicBaseReg) is used,
1831 -- but r30 is a required part of PLT code setup:
1832 -- puts+32768@plt:
1833 -- lwz r11,-30484(r30) ; offset in .LCTOC1
1834 -- mtctr r11
1835 -- bctr