PPC NCG: Use liveness information in CmmCall
[ghc.git] / compiler / nativeGen / PPC / Instr.hs
1 {-# LANGUAGE CPP #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Machine-dependent assembly language
6 --
7 -- (c) The University of Glasgow 1993-2004
8 --
9 -----------------------------------------------------------------------------
10
11 #include "HsVersions.h"
12 #include "nativeGen/NCG.h"
13
14 module PPC.Instr (
15 archWordFormat,
16 RI(..),
17 Instr(..),
18 stackFrameHeaderSize,
19 maxSpillSlots,
20 allocMoreStack,
21 makeFarBranches
22 )
23
24 where
25
26 import GhcPrelude
27
28 import PPC.Regs
29 import PPC.Cond
30 import Instruction
31 import Format
32 import TargetReg
33 import RegClass
34 import Reg
35
36 import CodeGen.Platform
37 import BlockId
38 import Hoopl.Collections
39 import Hoopl.Label
40 import DynFlags
41 import Cmm
42 import CmmInfo
43 import FastString
44 import CLabel
45 import Outputable
46 import Platform
47 import UniqFM (listToUFM, lookupUFM)
48 import UniqSupply
49
50 import Control.Monad (replicateM)
51 import Data.Maybe (fromMaybe)
52
53 --------------------------------------------------------------------------------
54 -- Format of a PPC memory address.
55 --
56 archWordFormat :: Bool -> Format
57 archWordFormat is32Bit
58 | is32Bit = II32
59 | otherwise = II64
60
61
62 -- | Instruction instance for powerpc
63 instance Instruction Instr where
64 regUsageOfInstr = ppc_regUsageOfInstr
65 patchRegsOfInstr = ppc_patchRegsOfInstr
66 isJumpishInstr = ppc_isJumpishInstr
67 jumpDestsOfInstr = ppc_jumpDestsOfInstr
68 patchJumpInstr = ppc_patchJumpInstr
69 mkSpillInstr = ppc_mkSpillInstr
70 mkLoadInstr = ppc_mkLoadInstr
71 takeDeltaInstr = ppc_takeDeltaInstr
72 isMetaInstr = ppc_isMetaInstr
73 mkRegRegMoveInstr _ = ppc_mkRegRegMoveInstr
74 takeRegRegMoveInstr = ppc_takeRegRegMoveInstr
75 mkJumpInstr = ppc_mkJumpInstr
76 mkStackAllocInstr = ppc_mkStackAllocInstr
77 mkStackDeallocInstr = ppc_mkStackDeallocInstr
78
79
80 ppc_mkStackAllocInstr :: Platform -> Int -> [Instr]
81 ppc_mkStackAllocInstr platform amount
82 = ppc_mkStackAllocInstr' platform (-amount)
83
84 ppc_mkStackDeallocInstr :: Platform -> Int -> [Instr]
85 ppc_mkStackDeallocInstr platform amount
86 = ppc_mkStackAllocInstr' platform amount
87
88 ppc_mkStackAllocInstr' :: Platform -> Int -> [Instr]
89 ppc_mkStackAllocInstr' platform amount
90 | fits16Bits amount
91 = [ LD fmt r0 (AddrRegImm sp zero)
92 , STU fmt r0 (AddrRegImm sp immAmount)
93 ]
94 | otherwise
95 = [ LD fmt r0 (AddrRegImm sp zero)
96 , ADDIS tmp sp (HA immAmount)
97 , ADD tmp tmp (RIImm (LO immAmount))
98 , STU fmt r0 (AddrRegReg sp tmp)
99 ]
100 where
101 fmt = intFormat $ widthFromBytes ((platformWordSize platform) `quot` 8)
102 zero = ImmInt 0
103 tmp = tmpReg platform
104 immAmount = ImmInt amount
105
106 --
107 -- See note [extra spill slots] in X86/Instr.hs
108 --
109 allocMoreStack
110 :: Platform
111 -> Int
112 -> NatCmmDecl statics PPC.Instr.Instr
113 -> UniqSM (NatCmmDecl statics PPC.Instr.Instr, [(BlockId,BlockId)])
114
115 allocMoreStack _ _ top@(CmmData _ _) = return (top,[])
116 allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do
117 let
118 infos = mapKeys info
119 entries = case code of
120 [] -> infos
121 BasicBlock entry _ : _ -- first block is the entry point
122 | entry `elem` infos -> infos
123 | otherwise -> entry : infos
124
125 uniqs <- replicateM (length entries) getUniqueM
126
127 let
128 delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
129 where x = slots * spillSlotSize -- sp delta
130
131 alloc = mkStackAllocInstr platform delta
132 dealloc = mkStackDeallocInstr platform delta
133
134 retargetList = (zip entries (map mkBlockId uniqs))
135
136 new_blockmap :: LabelMap BlockId
137 new_blockmap = mapFromList retargetList
138
139 insert_stack_insns (BasicBlock id insns)
140 | Just new_blockid <- mapLookup id new_blockmap
141 = [ BasicBlock id $ alloc ++ [BCC ALWAYS new_blockid Nothing]
142 , BasicBlock new_blockid block'
143 ]
144 | otherwise
145 = [ BasicBlock id block' ]
146 where
147 block' = foldr insert_dealloc [] insns
148
149 insert_dealloc insn r
150 -- BCTR might or might not be a non-local jump. For
151 -- "labeled-goto" we use JMP, and for "computed-goto" we
152 -- use MTCTR followed by BCTR. See 'PPC.CodeGen.genJump'.
153 = case insn of
154 JMP _ _ -> dealloc ++ (insn : r)
155 BCTR [] Nothing _ -> dealloc ++ (insn : r)
156 BCTR ids label rs -> BCTR (map (fmap retarget) ids) label rs : r
157 BCCFAR cond b p -> BCCFAR cond (retarget b) p : r
158 BCC cond b p -> BCC cond (retarget b) p : r
159 _ -> insn : r
160 -- BL and BCTRL are call-like instructions rather than
161 -- jumps, and are used only for C calls.
162
163 retarget :: BlockId -> BlockId
164 retarget b
165 = fromMaybe b (mapLookup b new_blockmap)
166
167 new_code
168 = concatMap insert_stack_insns code
169
170 -- in
171 return (CmmProc info lbl live (ListGraph new_code),retargetList)
172
173
174 -- -----------------------------------------------------------------------------
175 -- Machine's assembly language
176
177 -- We have a few common "instructions" (nearly all the pseudo-ops) but
178 -- mostly all of 'Instr' is machine-specific.
179
180 -- Register or immediate
181 data RI
182 = RIReg Reg
183 | RIImm Imm
184
185 data Instr
186 -- comment pseudo-op
187 = COMMENT FastString
188
189 -- some static data spat out during code
190 -- generation. Will be extracted before
191 -- pretty-printing.
192 | LDATA Section CmmStatics
193
194 -- start a new basic block. Useful during
195 -- codegen, removed later. Preceding
196 -- instruction should be a jump, as per the
197 -- invariants for a BasicBlock (see Cmm).
198 | NEWBLOCK BlockId
199
200 -- specify current stack offset for
201 -- benefit of subsequent passes
202 | DELTA Int
203
204 -- Loads and stores.
205 | LD Format Reg AddrMode -- Load format, dst, src
206 | LDFAR Format Reg AddrMode -- Load format, dst, src 32 bit offset
207 | LDR Format Reg AddrMode -- Load and reserve format, dst, src
208 | LA Format Reg AddrMode -- Load arithmetic format, dst, src
209 | ST Format Reg AddrMode -- Store format, src, dst
210 | STFAR Format Reg AddrMode -- Store format, src, dst 32 bit offset
211 | STU Format Reg AddrMode -- Store with Update format, src, dst
212 | STC Format Reg AddrMode -- Store conditional format, src, dst
213 | LIS Reg Imm -- Load Immediate Shifted dst, src
214 | LI Reg Imm -- Load Immediate dst, src
215 | MR Reg Reg -- Move Register dst, src -- also for fmr
216
217 | CMP Format Reg RI -- format, src1, src2
218 | CMPL Format Reg RI -- format, src1, src2
219
220 | BCC Cond BlockId (Maybe Bool) -- cond, block, hint
221 | BCCFAR Cond BlockId (Maybe Bool) -- cond, block, hint
222 -- hint:
223 -- Just True: branch likely taken
224 -- Just False: branch likely not taken
225 -- Nothing: no hint
226 | JMP CLabel [Reg] -- same as branch,
227 -- but with CLabel instead of block ID
228 -- and live global registers
229 | MTCTR Reg
230 | BCTR [Maybe BlockId] (Maybe CLabel) [Reg]
231 -- with list of local destinations, and
232 -- jump table location if necessary
233 | BL CLabel [Reg] -- with list of argument regs
234 | BCTRL [Reg]
235
236 | ADD Reg Reg RI -- dst, src1, src2
237 | ADDO Reg Reg Reg -- add and set overflow
238 | ADDC Reg Reg Reg -- (carrying) dst, src1, src2
239 | ADDE Reg Reg Reg -- (extended) dst, src1, src2
240 | ADDZE Reg Reg -- (to zero extended) dst, src
241 | ADDIS Reg Reg Imm -- Add Immediate Shifted dst, src1, src2
242 | SUBF Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
243 | SUBFO Reg Reg Reg -- subtract from and set overflow
244 | SUBFC Reg Reg RI -- (carrying) dst, src1, src2 ;
245 -- dst = src2 - src1
246 | SUBFE Reg Reg Reg -- (extended) dst, src1, src2 ;
247 -- dst = src2 - src1
248 | MULL Format Reg Reg RI
249 | MULLO Format Reg Reg Reg -- multiply and set overflow
250 | MFOV Format Reg -- move overflow bit (1|33) to register
251 -- pseudo-instruction; pretty printed as
252 -- mfxer dst
253 -- extr[w|d]i dst, dst, 1, [1|33]
254 | MULHU Format Reg Reg Reg
255 | DIV Format Bool Reg Reg Reg
256 | AND Reg Reg RI -- dst, src1, src2
257 | ANDC Reg Reg Reg -- AND with complement, dst = src1 & ~ src2
258 | NAND Reg Reg Reg -- dst, src1, src2
259 | OR Reg Reg RI -- dst, src1, src2
260 | ORIS Reg Reg Imm -- OR Immediate Shifted dst, src1, src2
261 | XOR Reg Reg RI -- dst, src1, src2
262 | XORIS Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2
263
264 | EXTS Format Reg Reg
265 | CNTLZ Format Reg Reg
266
267 | NEG Reg Reg
268 | NOT Reg Reg
269
270 | SL Format Reg Reg RI -- shift left
271 | SR Format Reg Reg RI -- shift right
272 | SRA Format Reg Reg RI -- shift right arithmetic
273
274 | RLWINM Reg Reg Int Int Int -- Rotate Left Word Immediate then AND with Mask
275 | CLRLI Format Reg Reg Int -- clear left immediate (extended mnemonic)
276 | CLRRI Format Reg Reg Int -- clear right immediate (extended mnemonic)
277
278 | FADD Format Reg Reg Reg
279 | FSUB Format Reg Reg Reg
280 | FMUL Format Reg Reg Reg
281 | FDIV Format Reg Reg Reg
282 | FABS Reg Reg -- abs is the same for single and double
283 | FNEG Reg Reg -- negate is the same for single and double prec.
284
285 | FCMP Reg Reg
286
287 | FCTIWZ Reg Reg -- convert to integer word
288 | FCTIDZ Reg Reg -- convert to integer double word
289 | FCFID Reg Reg -- convert from integer double word
290 | FRSP Reg Reg -- reduce to single precision
291 -- (but destination is a FP register)
292
293 | CRNOR Int Int Int -- condition register nor
294 | MFCR Reg -- move from condition register
295
296 | MFLR Reg -- move from link register
297 | FETCHPC Reg -- pseudo-instruction:
298 -- bcl to next insn, mflr reg
299 | HWSYNC -- heavy weight sync
300 | ISYNC -- instruction synchronize
301 | LWSYNC -- memory barrier
302 | NOP -- no operation, PowerPC 64 bit
303 -- needs this as place holder to
304 -- reload TOC pointer
305
306 -- | Get the registers that are being used by this instruction.
307 -- regUsage doesn't need to do any trickery for jumps and such.
308 -- Just state precisely the regs read and written by that insn.
309 -- The consequences of control flow transfers, as far as register
310 -- allocation goes, are taken care of by the register allocator.
311 --
312 ppc_regUsageOfInstr :: Platform -> Instr -> RegUsage
313 ppc_regUsageOfInstr platform instr
314 = case instr of
315 LD _ reg addr -> usage (regAddr addr, [reg])
316 LDFAR _ reg addr -> usage (regAddr addr, [reg])
317 LDR _ reg addr -> usage (regAddr addr, [reg])
318 LA _ reg addr -> usage (regAddr addr, [reg])
319 ST _ reg addr -> usage (reg : regAddr addr, [])
320 STFAR _ reg addr -> usage (reg : regAddr addr, [])
321 STU _ reg addr -> usage (reg : regAddr addr, [])
322 STC _ reg addr -> usage (reg : regAddr addr, [])
323 LIS reg _ -> usage ([], [reg])
324 LI reg _ -> usage ([], [reg])
325 MR reg1 reg2 -> usage ([reg2], [reg1])
326 CMP _ reg ri -> usage (reg : regRI ri,[])
327 CMPL _ reg ri -> usage (reg : regRI ri,[])
328 BCC _ _ _ -> noUsage
329 BCCFAR _ _ _ -> noUsage
330 JMP _ regs -> usage (regs, [])
331 MTCTR reg -> usage ([reg],[])
332 BCTR _ _ regs -> usage (regs, [])
333 BL _ params -> usage (params, callClobberedRegs platform)
334 BCTRL params -> usage (params, callClobberedRegs platform)
335
336 ADD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
337 ADDO reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
338 ADDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
339 ADDE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
340 ADDZE reg1 reg2 -> usage ([reg2], [reg1])
341 ADDIS reg1 reg2 _ -> usage ([reg2], [reg1])
342 SUBF reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
343 SUBFO reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
344 SUBFC reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
345 SUBFE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
346 MULL _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
347 MULLO _ reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
348 MFOV _ reg -> usage ([], [reg])
349 MULHU _ reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
350 DIV _ _ reg1 reg2 reg3
351 -> usage ([reg2,reg3], [reg1])
352
353 AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
354 ANDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
355 NAND reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
356 OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
357 ORIS reg1 reg2 _ -> usage ([reg2], [reg1])
358 XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
359 XORIS reg1 reg2 _ -> usage ([reg2], [reg1])
360 EXTS _ reg1 reg2 -> usage ([reg2], [reg1])
361 CNTLZ _ reg1 reg2 -> usage ([reg2], [reg1])
362 NEG reg1 reg2 -> usage ([reg2], [reg1])
363 NOT reg1 reg2 -> usage ([reg2], [reg1])
364 SL _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
365 SR _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
366 SRA _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
367 RLWINM reg1 reg2 _ _ _ -> usage ([reg2], [reg1])
368 CLRLI _ reg1 reg2 _ -> usage ([reg2], [reg1])
369 CLRRI _ reg1 reg2 _ -> usage ([reg2], [reg1])
370
371 FADD _ r1 r2 r3 -> usage ([r2,r3], [r1])
372 FSUB _ r1 r2 r3 -> usage ([r2,r3], [r1])
373 FMUL _ r1 r2 r3 -> usage ([r2,r3], [r1])
374 FDIV _ r1 r2 r3 -> usage ([r2,r3], [r1])
375 FABS r1 r2 -> usage ([r2], [r1])
376 FNEG r1 r2 -> usage ([r2], [r1])
377 FCMP r1 r2 -> usage ([r1,r2], [])
378 FCTIWZ r1 r2 -> usage ([r2], [r1])
379 FCTIDZ r1 r2 -> usage ([r2], [r1])
380 FCFID r1 r2 -> usage ([r2], [r1])
381 FRSP r1 r2 -> usage ([r2], [r1])
382 MFCR reg -> usage ([], [reg])
383 MFLR reg -> usage ([], [reg])
384 FETCHPC reg -> usage ([], [reg])
385 _ -> noUsage
386 where
387 usage (src, dst) = RU (filter (interesting platform) src)
388 (filter (interesting platform) dst)
389 regAddr (AddrRegReg r1 r2) = [r1, r2]
390 regAddr (AddrRegImm r1 _) = [r1]
391
392 regRI (RIReg r) = [r]
393 regRI _ = []
394
395 interesting :: Platform -> Reg -> Bool
396 interesting _ (RegVirtual _) = True
397 interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
398 interesting _ (RegReal (RealRegPair{}))
399 = panic "PPC.Instr.interesting: no reg pairs on this arch"
400
401
402
403 -- | Apply a given mapping to all the register references in this
404 -- instruction.
405 ppc_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
406 ppc_patchRegsOfInstr instr env
407 = case instr of
408 LD fmt reg addr -> LD fmt (env reg) (fixAddr addr)
409 LDFAR fmt reg addr -> LDFAR fmt (env reg) (fixAddr addr)
410 LDR fmt reg addr -> LDR fmt (env reg) (fixAddr addr)
411 LA fmt reg addr -> LA fmt (env reg) (fixAddr addr)
412 ST fmt reg addr -> ST fmt (env reg) (fixAddr addr)
413 STFAR fmt reg addr -> STFAR fmt (env reg) (fixAddr addr)
414 STU fmt reg addr -> STU fmt (env reg) (fixAddr addr)
415 STC fmt reg addr -> STC fmt (env reg) (fixAddr addr)
416 LIS reg imm -> LIS (env reg) imm
417 LI reg imm -> LI (env reg) imm
418 MR reg1 reg2 -> MR (env reg1) (env reg2)
419 CMP fmt reg ri -> CMP fmt (env reg) (fixRI ri)
420 CMPL fmt reg ri -> CMPL fmt (env reg) (fixRI ri)
421 BCC cond lbl p -> BCC cond lbl p
422 BCCFAR cond lbl p -> BCCFAR cond lbl p
423 JMP l regs -> JMP l regs -- global regs will not be remapped
424 MTCTR reg -> MTCTR (env reg)
425 BCTR targets lbl rs -> BCTR targets lbl rs
426 BL imm argRegs -> BL imm argRegs -- argument regs
427 BCTRL argRegs -> BCTRL argRegs -- cannot be remapped
428 ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri)
429 ADDO reg1 reg2 reg3 -> ADDO (env reg1) (env reg2) (env reg3)
430 ADDC reg1 reg2 reg3 -> ADDC (env reg1) (env reg2) (env reg3)
431 ADDE reg1 reg2 reg3 -> ADDE (env reg1) (env reg2) (env reg3)
432 ADDZE reg1 reg2 -> ADDZE (env reg1) (env reg2)
433 ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm
434 SUBF reg1 reg2 reg3 -> SUBF (env reg1) (env reg2) (env reg3)
435 SUBFO reg1 reg2 reg3 -> SUBFO (env reg1) (env reg2) (env reg3)
436 SUBFC reg1 reg2 ri -> SUBFC (env reg1) (env reg2) (fixRI ri)
437 SUBFE reg1 reg2 reg3 -> SUBFE (env reg1) (env reg2) (env reg3)
438 MULL fmt reg1 reg2 ri
439 -> MULL fmt (env reg1) (env reg2) (fixRI ri)
440 MULLO fmt reg1 reg2 reg3
441 -> MULLO fmt (env reg1) (env reg2) (env reg3)
442 MFOV fmt reg -> MFOV fmt (env reg)
443 MULHU fmt reg1 reg2 reg3
444 -> MULHU fmt (env reg1) (env reg2) (env reg3)
445 DIV fmt sgn reg1 reg2 reg3
446 -> DIV fmt sgn (env reg1) (env reg2) (env reg3)
447
448 AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri)
449 ANDC reg1 reg2 reg3 -> ANDC (env reg1) (env reg2) (env reg3)
450 NAND reg1 reg2 reg3 -> NAND (env reg1) (env reg2) (env reg3)
451 OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri)
452 ORIS reg1 reg2 imm -> ORIS (env reg1) (env reg2) imm
453 XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri)
454 XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm
455 EXTS fmt reg1 reg2 -> EXTS fmt (env reg1) (env reg2)
456 CNTLZ fmt reg1 reg2 -> CNTLZ fmt (env reg1) (env reg2)
457 NEG reg1 reg2 -> NEG (env reg1) (env reg2)
458 NOT reg1 reg2 -> NOT (env reg1) (env reg2)
459 SL fmt reg1 reg2 ri
460 -> SL fmt (env reg1) (env reg2) (fixRI ri)
461 SR fmt reg1 reg2 ri
462 -> SR fmt (env reg1) (env reg2) (fixRI ri)
463 SRA fmt reg1 reg2 ri
464 -> SRA fmt (env reg1) (env reg2) (fixRI ri)
465 RLWINM reg1 reg2 sh mb me
466 -> RLWINM (env reg1) (env reg2) sh mb me
467 CLRLI fmt reg1 reg2 n -> CLRLI fmt (env reg1) (env reg2) n
468 CLRRI fmt reg1 reg2 n -> CLRRI fmt (env reg1) (env reg2) n
469 FADD fmt r1 r2 r3 -> FADD fmt (env r1) (env r2) (env r3)
470 FSUB fmt r1 r2 r3 -> FSUB fmt (env r1) (env r2) (env r3)
471 FMUL fmt r1 r2 r3 -> FMUL fmt (env r1) (env r2) (env r3)
472 FDIV fmt r1 r2 r3 -> FDIV fmt (env r1) (env r2) (env r3)
473 FABS r1 r2 -> FABS (env r1) (env r2)
474 FNEG r1 r2 -> FNEG (env r1) (env r2)
475 FCMP r1 r2 -> FCMP (env r1) (env r2)
476 FCTIWZ r1 r2 -> FCTIWZ (env r1) (env r2)
477 FCTIDZ r1 r2 -> FCTIDZ (env r1) (env r2)
478 FCFID r1 r2 -> FCFID (env r1) (env r2)
479 FRSP r1 r2 -> FRSP (env r1) (env r2)
480 MFCR reg -> MFCR (env reg)
481 MFLR reg -> MFLR (env reg)
482 FETCHPC reg -> FETCHPC (env reg)
483 _ -> instr
484 where
485 fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
486 fixAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i
487
488 fixRI (RIReg r) = RIReg (env r)
489 fixRI other = other
490
491
492 --------------------------------------------------------------------------------
493 -- | Checks whether this instruction is a jump/branch instruction.
494 -- One that can change the flow of control in a way that the
495 -- register allocator needs to worry about.
496 ppc_isJumpishInstr :: Instr -> Bool
497 ppc_isJumpishInstr instr
498 = case instr of
499 BCC{} -> True
500 BCCFAR{} -> True
501 BCTR{} -> True
502 BCTRL{} -> True
503 BL{} -> True
504 JMP{} -> True
505 _ -> False
506
507
508 -- | Checks whether this instruction is a jump/branch instruction.
509 -- One that can change the flow of control in a way that the
510 -- register allocator needs to worry about.
511 ppc_jumpDestsOfInstr :: Instr -> [BlockId]
512 ppc_jumpDestsOfInstr insn
513 = case insn of
514 BCC _ id _ -> [id]
515 BCCFAR _ id _ -> [id]
516 BCTR targets _ _ -> [id | Just id <- targets]
517 _ -> []
518
519
520 -- | Change the destination of this jump instruction.
521 -- Used in the linear allocator when adding fixup blocks for join
522 -- points.
523 ppc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
524 ppc_patchJumpInstr insn patchF
525 = case insn of
526 BCC cc id p -> BCC cc (patchF id) p
527 BCCFAR cc id p -> BCCFAR cc (patchF id) p
528 BCTR ids lbl rs -> BCTR (map (fmap patchF) ids) lbl rs
529 _ -> insn
530
531
532 -- -----------------------------------------------------------------------------
533
534 -- | An instruction to spill a register into a spill slot.
535 ppc_mkSpillInstr
536 :: DynFlags
537 -> Reg -- register to spill
538 -> Int -- current stack delta
539 -> Int -- spill slot to use
540 -> Instr
541
542 ppc_mkSpillInstr dflags reg delta slot
543 = let platform = targetPlatform dflags
544 off = spillSlotToOffset dflags slot
545 arch = platformArch platform
546 in
547 let fmt = case targetClassOfReg platform reg of
548 RcInteger -> case arch of
549 ArchPPC -> II32
550 _ -> II64
551 RcDouble -> FF64
552 _ -> panic "PPC.Instr.mkSpillInstr: no match"
553 instr = case makeImmediate W32 True (off-delta) of
554 Just _ -> ST
555 Nothing -> STFAR -- pseudo instruction: 32 bit offsets
556
557 in instr fmt reg (AddrRegImm sp (ImmInt (off-delta)))
558
559
560 ppc_mkLoadInstr
561 :: DynFlags
562 -> Reg -- register to load
563 -> Int -- current stack delta
564 -> Int -- spill slot to use
565 -> Instr
566
567 ppc_mkLoadInstr dflags reg delta slot
568 = let platform = targetPlatform dflags
569 off = spillSlotToOffset dflags slot
570 arch = platformArch platform
571 in
572 let fmt = case targetClassOfReg platform reg of
573 RcInteger -> case arch of
574 ArchPPC -> II32
575 _ -> II64
576 RcDouble -> FF64
577 _ -> panic "PPC.Instr.mkLoadInstr: no match"
578 instr = case makeImmediate W32 True (off-delta) of
579 Just _ -> LD
580 Nothing -> LDFAR -- pseudo instruction: 32 bit offsets
581
582 in instr fmt reg (AddrRegImm sp (ImmInt (off-delta)))
583
584
585 -- | The size of a minimal stackframe header including minimal
586 -- parameter save area.
587 stackFrameHeaderSize :: DynFlags -> Int
588 stackFrameHeaderSize dflags
589 = case platformOS platform of
590 OSAIX -> 24 + 8 * 4
591 _ -> case platformArch platform of
592 -- header + parameter save area
593 ArchPPC -> 64 -- TODO: check ABI spec
594 ArchPPC_64 ELF_V1 -> 48 + 8 * 8
595 ArchPPC_64 ELF_V2 -> 32 + 8 * 8
596 _ -> panic "PPC.stackFrameHeaderSize: not defined for this OS"
597 where platform = targetPlatform dflags
598
599 -- | The maximum number of bytes required to spill a register. PPC32
600 -- has 32-bit GPRs and 64-bit FPRs, while PPC64 has 64-bit GPRs and
601 -- 64-bit FPRs. So the maximum is 8 regardless of platforms unlike
602 -- x86. Note that AltiVec's vector registers are 128-bit wide so we
603 -- must not use this to spill them.
604 spillSlotSize :: Int
605 spillSlotSize = 8
606
607 -- | The number of spill slots available without allocating more.
608 maxSpillSlots :: DynFlags -> Int
609 maxSpillSlots dflags
610 = ((rESERVED_C_STACK_BYTES dflags - stackFrameHeaderSize dflags)
611 `div` spillSlotSize) - 1
612 -- = 0 -- useful for testing allocMoreStack
613
614 -- | The number of bytes that the stack pointer should be aligned
615 -- to. This is 16 both on PPC32 and PPC64 ELF (see ELF processor
616 -- specific supplements).
617 stackAlign :: Int
618 stackAlign = 16
619
620 -- | Convert a spill slot number to a *byte* offset, with no sign.
621 spillSlotToOffset :: DynFlags -> Int -> Int
622 spillSlotToOffset dflags slot
623 = stackFrameHeaderSize dflags + spillSlotSize * slot
624
625
626 --------------------------------------------------------------------------------
627 -- | See if this instruction is telling us the current C stack delta
628 ppc_takeDeltaInstr
629 :: Instr
630 -> Maybe Int
631
632 ppc_takeDeltaInstr instr
633 = case instr of
634 DELTA i -> Just i
635 _ -> Nothing
636
637
638 ppc_isMetaInstr
639 :: Instr
640 -> Bool
641
642 ppc_isMetaInstr instr
643 = case instr of
644 COMMENT{} -> True
645 LDATA{} -> True
646 NEWBLOCK{} -> True
647 DELTA{} -> True
648 _ -> False
649
650
651 -- | Copy the value in a register to another one.
652 -- Must work for all register classes.
653 ppc_mkRegRegMoveInstr
654 :: Reg
655 -> Reg
656 -> Instr
657
658 ppc_mkRegRegMoveInstr src dst
659 = MR dst src
660
661
662 -- | Make an unconditional jump instruction.
663 ppc_mkJumpInstr
664 :: BlockId
665 -> [Instr]
666
667 ppc_mkJumpInstr id
668 = [BCC ALWAYS id Nothing]
669
670
671 -- | Take the source and destination from this reg -> reg move instruction
672 -- or Nothing if it's not one
673 ppc_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg)
674 ppc_takeRegRegMoveInstr (MR dst src) = Just (src,dst)
675 ppc_takeRegRegMoveInstr _ = Nothing
676
677 -- -----------------------------------------------------------------------------
678 -- Making far branches
679
680 -- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
681 -- big, we have to work around this limitation.
682
683 makeFarBranches
684 :: LabelMap CmmStatics
685 -> [NatBasicBlock Instr]
686 -> [NatBasicBlock Instr]
687 makeFarBranches info_env blocks
688 | last blockAddresses < nearLimit = blocks
689 | otherwise = zipWith handleBlock blockAddresses blocks
690 where
691 blockAddresses = scanl (+) 0 $ map blockLen blocks
692 blockLen (BasicBlock _ instrs) = length instrs
693
694 handleBlock addr (BasicBlock id instrs)
695 = BasicBlock id (zipWith makeFar [addr..] instrs)
696
697 makeFar _ (BCC ALWAYS tgt _) = BCC ALWAYS tgt Nothing
698 makeFar addr (BCC cond tgt p)
699 | abs (addr - targetAddr) >= nearLimit
700 = BCCFAR cond tgt p
701 | otherwise
702 = BCC cond tgt p
703 where Just targetAddr = lookupUFM blockAddressMap tgt
704 makeFar _ other = other
705
706 -- 8192 instructions are allowed; let's keep some distance, as
707 -- we have a few pseudo-insns that are pretty-printed as
708 -- multiple instructions, and it's just not worth the effort
709 -- to calculate things exactly
710 nearLimit = 7000 - mapSize info_env * maxRetInfoTableSizeW
711
712 blockAddressMap = listToUFM $ zip (map blockId blocks) blockAddresses