Generalize CmmUnwind and pass unwind information through NCG
[ghc.git] / compiler / nativeGen / X86 / Instr.hs
1 {-# LANGUAGE CPP, TypeFamilies #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Machine-dependent assembly language
6 --
7 -- (c) The University of Glasgow 1993-2004
8 --
9 -----------------------------------------------------------------------------
10
11 module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest,
12 getJumpDestBlockId, canShortcut, shortcutStatics,
13 shortcutJump, i386_insert_ffrees, allocMoreStack,
14 maxSpillSlots, archWordFormat)
15 where
16
17 #include "HsVersions.h"
18 #include "nativeGen/NCG.h"
19
20 import X86.Cond
21 import X86.Regs
22 import Instruction
23 import Format
24 import RegClass
25 import Reg
26 import TargetReg
27
28 import BlockId
29 import Hoopl
30 import CodeGen.Platform
31 import Cmm
32 import FastString
33 import Outputable
34 import Platform
35
36 import BasicTypes (Alignment)
37 import CLabel
38 import DynFlags
39 import UniqSet
40 import Unique
41 import UniqSupply
42 import Debug (UnwindTable)
43
44 import Control.Monad
45 import Data.Maybe (fromMaybe)
46
47 -- Format of an x86/x86_64 memory address, in bytes.
48 --
49 archWordFormat :: Bool -> Format
50 archWordFormat is32Bit
51 | is32Bit = II32
52 | otherwise = II64
53
54 -- | Instruction instance for x86 instruction set.
55 instance Instruction Instr where
56 regUsageOfInstr = x86_regUsageOfInstr
57 patchRegsOfInstr = x86_patchRegsOfInstr
58 isJumpishInstr = x86_isJumpishInstr
59 jumpDestsOfInstr = x86_jumpDestsOfInstr
60 patchJumpInstr = x86_patchJumpInstr
61 mkSpillInstr = x86_mkSpillInstr
62 mkLoadInstr = x86_mkLoadInstr
63 takeDeltaInstr = x86_takeDeltaInstr
64 isMetaInstr = x86_isMetaInstr
65 mkRegRegMoveInstr = x86_mkRegRegMoveInstr
66 takeRegRegMoveInstr = x86_takeRegRegMoveInstr
67 mkJumpInstr = x86_mkJumpInstr
68 mkStackAllocInstr = x86_mkStackAllocInstr
69 mkStackDeallocInstr = x86_mkStackDeallocInstr
70
71
72 -- -----------------------------------------------------------------------------
73 -- Intel x86 instructions
74
75 {-
76 Intel, in their infinite wisdom, selected a stack model for floating
77 point registers on x86. That might have made sense back in 1979 --
78 nowadays we can see it for the nonsense it really is. A stack model
79 fits poorly with the existing nativeGen infrastructure, which assumes
80 flat integer and FP register sets. Prior to this commit, nativeGen
81 could not generate correct x86 FP code -- to do so would have meant
82 somehow working the register-stack paradigm into the register
83 allocator and spiller, which sounds very difficult.
84
85 We have decided to cheat, and go for a simple fix which requires no
86 infrastructure modifications, at the expense of generating ropey but
87 correct FP code. All notions of the x86 FP stack and its insns have
88 been removed. Instead, we pretend (to the instruction selector and
89 register allocator) that x86 has six floating point registers, %fake0
90 .. %fake5, which can be used in the usual flat manner. We further
91 claim that x86 has floating point instructions very similar to SPARC
92 and Alpha, that is, a simple 3-operand register-register arrangement.
93 Code generation and register allocation proceed on this basis.
94
95 When we come to print out the final assembly, our convenient fiction
96 is converted to dismal reality. Each fake instruction is
97 independently converted to a series of real x86 instructions.
98 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
99 arithmetic operations, the two operands are pushed onto the top of the
100 FP stack, the operation done, and the result copied back into the
101 relevant register. There are only six %fake registers because 2 are
102 needed for the translation, and x86 has 8 in total.
103
104 The translation is inefficient but is simple and it works. A cleverer
105 translation would handle a sequence of insns, simulating the FP stack
106 contents, would not impose a fixed mapping from %fake to %st regs, and
107 hopefully could avoid most of the redundant reg-reg moves of the
108 current translation.
109
110 We might as well make use of whatever unique FP facilities Intel have
111 chosen to bless us with (let's not be churlish, after all).
112 Hence GLDZ and GLD1. Bwahahahahahahaha!
113 -}
114
115 {-
116 Note [x86 Floating point precision]
117
118 Intel's internal floating point registers are by default 80 bit
119 extended precision. This means that all operations done on values in
120 registers are done at 80 bits, and unless the intermediate values are
121 truncated to the appropriate size (32 or 64 bits) by storing in
122 memory, calculations in registers will give different results from
123 calculations which pass intermediate values in memory (eg. via
124 function calls).
125
126 One solution is to set the FPU into 64 bit precision mode. Some OSs
127 do this (eg. FreeBSD) and some don't (eg. Linux). The problem here is
128 that this will only affect 64-bit precision arithmetic; 32-bit
129 calculations will still be done at 64-bit precision in registers. So
130 it doesn't solve the whole problem.
131
132 There's also the issue of what the C library is expecting in terms of
133 precision. It seems to be the case that glibc on Linux expects the
134 FPU to be set to 80 bit precision, so setting it to 64 bit could have
135 unexpected effects. Changing the default could have undesirable
136 effects on other 3rd-party library code too, so the right thing would
137 be to save/restore the FPU control word across Haskell code if we were
138 to do this.
139
140 gcc's -ffloat-store gives consistent results by always storing the
141 results of floating-point calculations in memory, which works for both
142 32 and 64-bit precision. However, it only affects the values of
143 user-declared floating point variables in C, not intermediate results.
144 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
145 flag).
146
147 Another problem is how to spill floating point registers in the
148 register allocator. Should we spill the whole 80 bits, or just 64?
149 On an OS which is set to 64 bit precision, spilling 64 is fine. On
150 Linux, spilling 64 bits will round the results of some operations.
151 This is what gcc does. Spilling at 80 bits requires taking up a full
152 128 bit slot (so we get alignment). We spill at 80-bits and ignore
153 the alignment problems.
154
155 In the future [edit: now available in GHC 7.0.1, with the -msse2
156 flag], we'll use the SSE registers for floating point. This requires
157 a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
158 float ops), which means P4 or Xeon and above. Using SSE will solve
159 all these problems, because the SSE registers use fixed 32 bit or 64
160 bit precision.
161
162 --SDM 1/2003
163 -}
164
165 data Instr
166 -- comment pseudo-op
167 = COMMENT FastString
168
169 -- location pseudo-op (file, line, col, name)
170 | LOCATION Int Int Int String
171
172 -- some static data spat out during code
173 -- generation. Will be extracted before
174 -- pretty-printing.
175 | LDATA Section (Alignment, CmmStatics)
176
177 -- start a new basic block. Useful during
178 -- codegen, removed later. Preceding
179 -- instruction should be a jump, as per the
180 -- invariants for a BasicBlock (see Cmm).
181 | NEWBLOCK BlockId
182
183 -- unwinding information
184 -- See Note [Unwinding information in the NCG].
185 | UNWIND BlockId UnwindTable
186
187 -- specify current stack offset for benefit of subsequent passes.
188 -- This carries a BlockId so it can be used in unwinding information.
189 | DELTA Int
190
191 -- Moves.
192 | MOV Format Operand Operand
193 | CMOV Cond Format Operand Reg
194 | MOVZxL Format Operand Operand -- format is the size of operand 1
195 | MOVSxL Format Operand Operand -- format is the size of operand 1
196 -- x86_64 note: plain mov into a 32-bit register always zero-extends
197 -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
198 -- don't affect the high bits of the register.
199
200 -- Load effective address (also a very useful three-operand add instruction :-)
201 | LEA Format Operand Operand
202
203 -- Int Arithmetic.
204 | ADD Format Operand Operand
205 | ADC Format Operand Operand
206 | SUB Format Operand Operand
207 | SBB Format Operand Operand
208
209 | MUL Format Operand Operand
210 | MUL2 Format Operand -- %edx:%eax = operand * %rax
211 | IMUL Format Operand Operand -- signed int mul
212 | IMUL2 Format Operand -- %edx:%eax = operand * %eax
213
214 | DIV Format Operand -- eax := eax:edx/op, edx := eax:edx%op
215 | IDIV Format Operand -- ditto, but signed
216
217 -- Int Arithmetic, where the effects on the condition register
218 -- are important. Used in specialized sequences such as MO_Add2.
219 -- Do not rewrite these instructions to "equivalent" ones that
220 -- have different effect on the condition register! (See #9013.)
221 | ADD_CC Format Operand Operand
222 | SUB_CC Format Operand Operand
223
224 -- Simple bit-twiddling.
225 | AND Format Operand Operand
226 | OR Format Operand Operand
227 | XOR Format Operand Operand
228 | NOT Format Operand
229 | NEGI Format Operand -- NEG instruction (name clash with Cond)
230 | BSWAP Format Reg
231
232 -- Shifts (amount may be immediate or %cl only)
233 | SHL Format Operand{-amount-} Operand
234 | SAR Format Operand{-amount-} Operand
235 | SHR Format Operand{-amount-} Operand
236
237 | BT Format Imm Operand
238 | NOP
239
240 -- x86 Float Arithmetic.
241 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
242 -- as single instructions right up until we spit them out.
243 -- all the 3-operand fake fp insns are src1 src2 dst
244 -- and furthermore are constrained to be fp regs only.
245 -- IMPORTANT: keep is_G_insn up to date with any changes here
246 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
247 | GLD Format AddrMode Reg -- src, dst(fpreg)
248 | GST Format Reg AddrMode -- src(fpreg), dst
249
250 | GLDZ Reg -- dst(fpreg)
251 | GLD1 Reg -- dst(fpreg)
252
253 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
254 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
255
256 | GITOF Reg Reg -- src(intreg), dst(fpreg)
257 | GITOD Reg Reg -- src(intreg), dst(fpreg)
258
259 | GDTOF Reg Reg -- src(fpreg), dst(fpreg)
260
261 | GADD Format Reg Reg Reg -- src1, src2, dst
262 | GDIV Format Reg Reg Reg -- src1, src2, dst
263 | GSUB Format Reg Reg Reg -- src1, src2, dst
264 | GMUL Format Reg Reg Reg -- src1, src2, dst
265
266 -- FP compare. Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
267 -- Compare src1 with src2; set the Zero flag iff the numbers are
268 -- comparable and the comparison is True. Subsequent code must
269 -- test the %eflags zero flag regardless of the supplied Cond.
270 | GCMP Cond Reg Reg -- src1, src2
271
272 | GABS Format Reg Reg -- src, dst
273 | GNEG Format Reg Reg -- src, dst
274 | GSQRT Format Reg Reg -- src, dst
275 | GSIN Format CLabel CLabel Reg Reg -- src, dst
276 | GCOS Format CLabel CLabel Reg Reg -- src, dst
277 | GTAN Format CLabel CLabel Reg Reg -- src, dst
278
279 | GFREE -- do ffree on all x86 regs; an ugly hack
280
281
282 -- SSE2 floating point: we use a restricted set of the available SSE2
283 -- instructions for floating-point.
284 -- use MOV for moving (either movss or movsd (movlpd better?))
285 | CVTSS2SD Reg Reg -- F32 to F64
286 | CVTSD2SS Reg Reg -- F64 to F32
287 | CVTTSS2SIQ Format Operand Reg -- F32 to I32/I64 (with truncation)
288 | CVTTSD2SIQ Format Operand Reg -- F64 to I32/I64 (with truncation)
289 | CVTSI2SS Format Operand Reg -- I32/I64 to F32
290 | CVTSI2SD Format Operand Reg -- I32/I64 to F64
291
292 -- use ADD & SUB for arithmetic. In both cases, operands
293 -- are Operand Reg.
294
295 -- SSE2 floating-point division:
296 | FDIV Format Operand Operand -- divisor, dividend(dst)
297
298 -- use CMP for comparisons. ucomiss and ucomisd instructions
299 -- compare single/double prec floating point respectively.
300
301 | SQRT Format Operand Reg -- src, dst
302
303
304 -- Comparison
305 | TEST Format Operand Operand
306 | CMP Format Operand Operand
307 | SETCC Cond Operand
308
309 -- Stack Operations.
310 | PUSH Format Operand
311 | POP Format Operand
312 -- both unused (SDM):
313 -- | PUSHA
314 -- | POPA
315
316 -- Jumping around.
317 | JMP Operand [Reg] -- including live Regs at the call
318 | JXX Cond BlockId -- includes unconditional branches
319 | JXX_GBL Cond Imm -- non-local version of JXX
320 -- Table jump
321 | JMP_TBL Operand -- Address to jump to
322 [Maybe BlockId] -- Blocks in the jump table
323 Section -- Data section jump table should be put in
324 CLabel -- Label of jump table
325 | CALL (Either Imm Reg) [Reg]
326
327 -- Other things.
328 | CLTD Format -- sign extend %eax into %edx:%eax
329
330 | FETCHGOT Reg -- pseudo-insn for ELF position-independent code
331 -- pretty-prints as
332 -- call 1f
333 -- 1: popl %reg
334 -- addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
335 | FETCHPC Reg -- pseudo-insn for Darwin position-independent code
336 -- pretty-prints as
337 -- call 1f
338 -- 1: popl %reg
339
340 -- bit counting instructions
341 | POPCNT Format Operand Reg -- [SSE4.2] count number of bits set to 1
342 | BSF Format Operand Reg -- bit scan forward
343 | BSR Format Operand Reg -- bit scan reverse
344
345 -- prefetch
346 | PREFETCH PrefetchVariant Format Operand -- prefetch Variant, addr size, address to prefetch
347 -- variant can be NTA, Lvl0, Lvl1, or Lvl2
348
349 | LOCK Instr -- lock prefix
350 | XADD Format Operand Operand -- src (r), dst (r/m)
351 | CMPXCHG Format Operand Operand -- src (r), dst (r/m), eax implicit
352 | MFENCE
353
354 data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2
355
356
357 data Operand
358 = OpReg Reg -- register
359 | OpImm Imm -- immediate value
360 | OpAddr AddrMode -- memory reference
361
362
363
364 -- | Returns which registers are read and written as a (read, written)
365 -- pair.
366 x86_regUsageOfInstr :: Platform -> Instr -> RegUsage
367 x86_regUsageOfInstr platform instr
368 = case instr of
369 MOV _ src dst -> usageRW src dst
370 CMOV _ _ src dst -> mkRU (use_R src [dst]) [dst]
371 MOVZxL _ src dst -> usageRW src dst
372 MOVSxL _ src dst -> usageRW src dst
373 LEA _ src dst -> usageRW src dst
374 ADD _ src dst -> usageRM src dst
375 ADC _ src dst -> usageRM src dst
376 SUB _ src dst -> usageRM src dst
377 SBB _ src dst -> usageRM src dst
378 IMUL _ src dst -> usageRM src dst
379 IMUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
380 MUL _ src dst -> usageRM src dst
381 MUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
382 DIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
383 IDIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
384 ADD_CC _ src dst -> usageRM src dst
385 SUB_CC _ src dst -> usageRM src dst
386 AND _ src dst -> usageRM src dst
387 OR _ src dst -> usageRM src dst
388
389 XOR _ (OpReg src) (OpReg dst)
390 | src == dst -> mkRU [] [dst]
391
392 XOR _ src dst -> usageRM src dst
393 NOT _ op -> usageM op
394 BSWAP _ reg -> mkRU [reg] [reg]
395 NEGI _ op -> usageM op
396 SHL _ imm dst -> usageRM imm dst
397 SAR _ imm dst -> usageRM imm dst
398 SHR _ imm dst -> usageRM imm dst
399 BT _ _ src -> mkRUR (use_R src [])
400
401 PUSH _ op -> mkRUR (use_R op [])
402 POP _ op -> mkRU [] (def_W op)
403 TEST _ src dst -> mkRUR (use_R src $! use_R dst [])
404 CMP _ src dst -> mkRUR (use_R src $! use_R dst [])
405 SETCC _ op -> mkRU [] (def_W op)
406 JXX _ _ -> mkRU [] []
407 JXX_GBL _ _ -> mkRU [] []
408 JMP op regs -> mkRUR (use_R op regs)
409 JMP_TBL op _ _ _ -> mkRUR (use_R op [])
410 CALL (Left _) params -> mkRU params (callClobberedRegs platform)
411 CALL (Right reg) params -> mkRU (reg:params) (callClobberedRegs platform)
412 CLTD _ -> mkRU [eax] [edx]
413 NOP -> mkRU [] []
414
415 GMOV src dst -> mkRU [src] [dst]
416 GLD _ src dst -> mkRU (use_EA src []) [dst]
417 GST _ src dst -> mkRUR (src : use_EA dst [])
418
419 GLDZ dst -> mkRU [] [dst]
420 GLD1 dst -> mkRU [] [dst]
421
422 GFTOI src dst -> mkRU [src] [dst]
423 GDTOI src dst -> mkRU [src] [dst]
424
425 GITOF src dst -> mkRU [src] [dst]
426 GITOD src dst -> mkRU [src] [dst]
427
428 GDTOF src dst -> mkRU [src] [dst]
429
430 GADD _ s1 s2 dst -> mkRU [s1,s2] [dst]
431 GSUB _ s1 s2 dst -> mkRU [s1,s2] [dst]
432 GMUL _ s1 s2 dst -> mkRU [s1,s2] [dst]
433 GDIV _ s1 s2 dst -> mkRU [s1,s2] [dst]
434
435 GCMP _ src1 src2 -> mkRUR [src1,src2]
436 GABS _ src dst -> mkRU [src] [dst]
437 GNEG _ src dst -> mkRU [src] [dst]
438 GSQRT _ src dst -> mkRU [src] [dst]
439 GSIN _ _ _ src dst -> mkRU [src] [dst]
440 GCOS _ _ _ src dst -> mkRU [src] [dst]
441 GTAN _ _ _ src dst -> mkRU [src] [dst]
442
443 CVTSS2SD src dst -> mkRU [src] [dst]
444 CVTSD2SS src dst -> mkRU [src] [dst]
445 CVTTSS2SIQ _ src dst -> mkRU (use_R src []) [dst]
446 CVTTSD2SIQ _ src dst -> mkRU (use_R src []) [dst]
447 CVTSI2SS _ src dst -> mkRU (use_R src []) [dst]
448 CVTSI2SD _ src dst -> mkRU (use_R src []) [dst]
449 FDIV _ src dst -> usageRM src dst
450
451 FETCHGOT reg -> mkRU [] [reg]
452 FETCHPC reg -> mkRU [] [reg]
453
454 COMMENT _ -> noUsage
455 LOCATION{} -> noUsage
456 UNWIND{} -> noUsage
457 DELTA _ -> noUsage
458
459 POPCNT _ src dst -> mkRU (use_R src []) [dst]
460 BSF _ src dst -> mkRU (use_R src []) [dst]
461 BSR _ src dst -> mkRU (use_R src []) [dst]
462
463 -- note: might be a better way to do this
464 PREFETCH _ _ src -> mkRU (use_R src []) []
465 LOCK i -> x86_regUsageOfInstr platform i
466 XADD _ src dst -> usageMM src dst
467 CMPXCHG _ src dst -> usageRMM src dst (OpReg eax)
468 MFENCE -> noUsage
469
470 _other -> panic "regUsage: unrecognised instr"
471 where
472 -- # Definitions
473 --
474 -- Written: If the operand is a register, it's written. If it's an
475 -- address, registers mentioned in the address are read.
476 --
477 -- Modified: If the operand is a register, it's both read and
478 -- written. If it's an address, registers mentioned in the address
479 -- are read.
480
481 -- 2 operand form; first operand Read; second Written
482 usageRW :: Operand -> Operand -> RegUsage
483 usageRW op (OpReg reg) = mkRU (use_R op []) [reg]
484 usageRW op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
485 usageRW _ _ = panic "X86.RegInfo.usageRW: no match"
486
487 -- 2 operand form; first operand Read; second Modified
488 usageRM :: Operand -> Operand -> RegUsage
489 usageRM op (OpReg reg) = mkRU (use_R op [reg]) [reg]
490 usageRM op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
491 usageRM _ _ = panic "X86.RegInfo.usageRM: no match"
492
493 -- 2 operand form; first operand Modified; second Modified
494 usageMM :: Operand -> Operand -> RegUsage
495 usageMM (OpReg src) (OpReg dst) = mkRU [src, dst] [src, dst]
496 usageMM (OpReg src) (OpAddr ea) = mkRU (use_EA ea [src]) [src]
497 usageMM _ _ = panic "X86.RegInfo.usageMM: no match"
498
499 -- 3 operand form; first operand Read; second Modified; third Modified
500 usageRMM :: Operand -> Operand -> Operand -> RegUsage
501 usageRMM (OpReg src) (OpReg dst) (OpReg reg) = mkRU [src, dst, reg] [dst, reg]
502 usageRMM (OpReg src) (OpAddr ea) (OpReg reg) = mkRU (use_EA ea [src, reg]) [reg]
503 usageRMM _ _ _ = panic "X86.RegInfo.usageRMM: no match"
504
505 -- 1 operand form; operand Modified
506 usageM :: Operand -> RegUsage
507 usageM (OpReg reg) = mkRU [reg] [reg]
508 usageM (OpAddr ea) = mkRUR (use_EA ea [])
509 usageM _ = panic "X86.RegInfo.usageM: no match"
510
511 -- Registers defd when an operand is written.
512 def_W (OpReg reg) = [reg]
513 def_W (OpAddr _ ) = []
514 def_W _ = panic "X86.RegInfo.def_W: no match"
515
516 -- Registers used when an operand is read.
517 use_R (OpReg reg) tl = reg : tl
518 use_R (OpImm _) tl = tl
519 use_R (OpAddr ea) tl = use_EA ea tl
520
521 -- Registers used to compute an effective address.
522 use_EA (ImmAddr _ _) tl = tl
523 use_EA (AddrBaseIndex base index _) tl =
524 use_base base $! use_index index tl
525 where use_base (EABaseReg r) tl = r : tl
526 use_base _ tl = tl
527 use_index EAIndexNone tl = tl
528 use_index (EAIndex i _) tl = i : tl
529
530 mkRUR src = src' `seq` RU src' []
531 where src' = filter (interesting platform) src
532
533 mkRU src dst = src' `seq` dst' `seq` RU src' dst'
534 where src' = filter (interesting platform) src
535 dst' = filter (interesting platform) dst
536
537 -- | Is this register interesting for the register allocator?
538 interesting :: Platform -> Reg -> Bool
539 interesting _ (RegVirtual _) = True
540 interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
541 interesting _ (RegReal (RealRegPair{})) = panic "X86.interesting: no reg pairs on this arch"
542
543
544
545 -- | Applies the supplied function to all registers in instructions.
546 -- Typically used to change virtual registers to real registers.
547 x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
548 x86_patchRegsOfInstr instr env
549 = case instr of
550 MOV fmt src dst -> patch2 (MOV fmt) src dst
551 CMOV cc fmt src dst -> CMOV cc fmt (patchOp src) (env dst)
552 MOVZxL fmt src dst -> patch2 (MOVZxL fmt) src dst
553 MOVSxL fmt src dst -> patch2 (MOVSxL fmt) src dst
554 LEA fmt src dst -> patch2 (LEA fmt) src dst
555 ADD fmt src dst -> patch2 (ADD fmt) src dst
556 ADC fmt src dst -> patch2 (ADC fmt) src dst
557 SUB fmt src dst -> patch2 (SUB fmt) src dst
558 SBB fmt src dst -> patch2 (SBB fmt) src dst
559 IMUL fmt src dst -> patch2 (IMUL fmt) src dst
560 IMUL2 fmt src -> patch1 (IMUL2 fmt) src
561 MUL fmt src dst -> patch2 (MUL fmt) src dst
562 MUL2 fmt src -> patch1 (MUL2 fmt) src
563 IDIV fmt op -> patch1 (IDIV fmt) op
564 DIV fmt op -> patch1 (DIV fmt) op
565 ADD_CC fmt src dst -> patch2 (ADD_CC fmt) src dst
566 SUB_CC fmt src dst -> patch2 (SUB_CC fmt) src dst
567 AND fmt src dst -> patch2 (AND fmt) src dst
568 OR fmt src dst -> patch2 (OR fmt) src dst
569 XOR fmt src dst -> patch2 (XOR fmt) src dst
570 NOT fmt op -> patch1 (NOT fmt) op
571 BSWAP fmt reg -> BSWAP fmt (env reg)
572 NEGI fmt op -> patch1 (NEGI fmt) op
573 SHL fmt imm dst -> patch1 (SHL fmt imm) dst
574 SAR fmt imm dst -> patch1 (SAR fmt imm) dst
575 SHR fmt imm dst -> patch1 (SHR fmt imm) dst
576 BT fmt imm src -> patch1 (BT fmt imm) src
577 TEST fmt src dst -> patch2 (TEST fmt) src dst
578 CMP fmt src dst -> patch2 (CMP fmt) src dst
579 PUSH fmt op -> patch1 (PUSH fmt) op
580 POP fmt op -> patch1 (POP fmt) op
581 SETCC cond op -> patch1 (SETCC cond) op
582 JMP op regs -> JMP (patchOp op) regs
583 JMP_TBL op ids s lbl -> JMP_TBL (patchOp op) ids s lbl
584
585 GMOV src dst -> GMOV (env src) (env dst)
586 GLD fmt src dst -> GLD fmt (lookupAddr src) (env dst)
587 GST fmt src dst -> GST fmt (env src) (lookupAddr dst)
588
589 GLDZ dst -> GLDZ (env dst)
590 GLD1 dst -> GLD1 (env dst)
591
592 GFTOI src dst -> GFTOI (env src) (env dst)
593 GDTOI src dst -> GDTOI (env src) (env dst)
594
595 GITOF src dst -> GITOF (env src) (env dst)
596 GITOD src dst -> GITOD (env src) (env dst)
597
598 GDTOF src dst -> GDTOF (env src) (env dst)
599
600 GADD fmt s1 s2 dst -> GADD fmt (env s1) (env s2) (env dst)
601 GSUB fmt s1 s2 dst -> GSUB fmt (env s1) (env s2) (env dst)
602 GMUL fmt s1 s2 dst -> GMUL fmt (env s1) (env s2) (env dst)
603 GDIV fmt s1 s2 dst -> GDIV fmt (env s1) (env s2) (env dst)
604
605 GCMP fmt src1 src2 -> GCMP fmt (env src1) (env src2)
606 GABS fmt src dst -> GABS fmt (env src) (env dst)
607 GNEG fmt src dst -> GNEG fmt (env src) (env dst)
608 GSQRT fmt src dst -> GSQRT fmt (env src) (env dst)
609 GSIN fmt l1 l2 src dst -> GSIN fmt l1 l2 (env src) (env dst)
610 GCOS fmt l1 l2 src dst -> GCOS fmt l1 l2 (env src) (env dst)
611 GTAN fmt l1 l2 src dst -> GTAN fmt l1 l2 (env src) (env dst)
612
613 CVTSS2SD src dst -> CVTSS2SD (env src) (env dst)
614 CVTSD2SS src dst -> CVTSD2SS (env src) (env dst)
615 CVTTSS2SIQ fmt src dst -> CVTTSS2SIQ fmt (patchOp src) (env dst)
616 CVTTSD2SIQ fmt src dst -> CVTTSD2SIQ fmt (patchOp src) (env dst)
617 CVTSI2SS fmt src dst -> CVTSI2SS fmt (patchOp src) (env dst)
618 CVTSI2SD fmt src dst -> CVTSI2SD fmt (patchOp src) (env dst)
619 FDIV fmt src dst -> FDIV fmt (patchOp src) (patchOp dst)
620
621 CALL (Left _) _ -> instr
622 CALL (Right reg) p -> CALL (Right (env reg)) p
623
624 FETCHGOT reg -> FETCHGOT (env reg)
625 FETCHPC reg -> FETCHPC (env reg)
626
627 NOP -> instr
628 COMMENT _ -> instr
629 LOCATION {} -> instr
630 UNWIND {} -> instr
631 DELTA _ -> instr
632
633 JXX _ _ -> instr
634 JXX_GBL _ _ -> instr
635 CLTD _ -> instr
636
637 POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst)
638 BSF fmt src dst -> BSF fmt (patchOp src) (env dst)
639 BSR fmt src dst -> BSR fmt (patchOp src) (env dst)
640
641 PREFETCH lvl format src -> PREFETCH lvl format (patchOp src)
642
643 LOCK i -> LOCK (x86_patchRegsOfInstr i env)
644 XADD fmt src dst -> patch2 (XADD fmt) src dst
645 CMPXCHG fmt src dst -> patch2 (CMPXCHG fmt) src dst
646 MFENCE -> instr
647
648 _other -> panic "patchRegs: unrecognised instr"
649
650 where
651 patch1 :: (Operand -> a) -> Operand -> a
652 patch1 insn op = insn $! patchOp op
653 patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
654 patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
655
656 patchOp (OpReg reg) = OpReg $! env reg
657 patchOp (OpImm imm) = OpImm imm
658 patchOp (OpAddr ea) = OpAddr $! lookupAddr ea
659
660 lookupAddr (ImmAddr imm off) = ImmAddr imm off
661 lookupAddr (AddrBaseIndex base index disp)
662 = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
663 where
664 lookupBase EABaseNone = EABaseNone
665 lookupBase EABaseRip = EABaseRip
666 lookupBase (EABaseReg r) = EABaseReg $! env r
667
668 lookupIndex EAIndexNone = EAIndexNone
669 lookupIndex (EAIndex r i) = (EAIndex $! env r) i
670
671
672 --------------------------------------------------------------------------------
673 x86_isJumpishInstr
674 :: Instr -> Bool
675
676 x86_isJumpishInstr instr
677 = case instr of
678 JMP{} -> True
679 JXX{} -> True
680 JXX_GBL{} -> True
681 JMP_TBL{} -> True
682 CALL{} -> True
683 _ -> False
684
685
686 x86_jumpDestsOfInstr
687 :: Instr
688 -> [BlockId]
689
690 x86_jumpDestsOfInstr insn
691 = case insn of
692 JXX _ id -> [id]
693 JMP_TBL _ ids _ _ -> [id | Just id <- ids]
694 _ -> []
695
696
697 x86_patchJumpInstr
698 :: Instr -> (BlockId -> BlockId) -> Instr
699
700 x86_patchJumpInstr insn patchF
701 = case insn of
702 JXX cc id -> JXX cc (patchF id)
703 JMP_TBL op ids section lbl
704 -> JMP_TBL op (map (fmap patchF) ids) section lbl
705 _ -> insn
706
707
708
709
710 -- -----------------------------------------------------------------------------
711 -- | Make a spill instruction.
712 x86_mkSpillInstr
713 :: DynFlags
714 -> Reg -- register to spill
715 -> Int -- current stack delta
716 -> Int -- spill slot to use
717 -> Instr
718
719 x86_mkSpillInstr dflags reg delta slot
720 = let off = spillSlotToOffset platform slot - delta
721 in
722 case targetClassOfReg platform reg of
723 RcInteger -> MOV (archWordFormat is32Bit)
724 (OpReg reg) (OpAddr (spRel dflags off))
725 RcDouble -> GST FF80 reg (spRel dflags off) {- RcFloat/RcDouble -}
726 RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off))
727 _ -> panic "X86.mkSpillInstr: no match"
728 where platform = targetPlatform dflags
729 is32Bit = target32Bit platform
730
731 -- | Make a spill reload instruction.
732 x86_mkLoadInstr
733 :: DynFlags
734 -> Reg -- register to load
735 -> Int -- current stack delta
736 -> Int -- spill slot to use
737 -> Instr
738
739 x86_mkLoadInstr dflags reg delta slot
740 = let off = spillSlotToOffset platform slot - delta
741 in
742 case targetClassOfReg platform reg of
743 RcInteger -> MOV (archWordFormat is32Bit)
744 (OpAddr (spRel dflags off)) (OpReg reg)
745 RcDouble -> GLD FF80 (spRel dflags off) reg {- RcFloat/RcDouble -}
746 RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg)
747 _ -> panic "X86.x86_mkLoadInstr"
748 where platform = targetPlatform dflags
749 is32Bit = target32Bit platform
750
751 spillSlotSize :: Platform -> Int
752 spillSlotSize dflags = if is32Bit then 12 else 8
753 where is32Bit = target32Bit dflags
754
755 maxSpillSlots :: DynFlags -> Int
756 maxSpillSlots dflags
757 = ((rESERVED_C_STACK_BYTES dflags - 64) `div` spillSlotSize (targetPlatform dflags)) - 1
758 -- = 0 -- useful for testing allocMoreStack
759
760 -- number of bytes that the stack pointer should be aligned to
761 stackAlign :: Int
762 stackAlign = 16
763
764 -- convert a spill slot number to a *byte* offset, with no sign:
765 -- decide on a per arch basis whether you are spilling above or below
766 -- the C stack pointer.
767 spillSlotToOffset :: Platform -> Int -> Int
768 spillSlotToOffset platform slot
769 = 64 + spillSlotSize platform * slot
770
771 --------------------------------------------------------------------------------
772
773 -- | See if this instruction is telling us the current C stack delta
774 x86_takeDeltaInstr
775 :: Instr
776 -> Maybe Int
777
778 x86_takeDeltaInstr instr
779 = case instr of
780 DELTA i -> Just i
781 _ -> Nothing
782
783
784 x86_isMetaInstr
785 :: Instr
786 -> Bool
787
788 x86_isMetaInstr instr
789 = case instr of
790 COMMENT{} -> True
791 LOCATION{} -> True
792 LDATA{} -> True
793 NEWBLOCK{} -> True
794 UNWIND{} -> True
795 DELTA{} -> True
796 _ -> False
797
798
799
800 -- | Make a reg-reg move instruction.
801 -- On SPARC v8 there are no instructions to move directly between
802 -- floating point and integer regs. If we need to do that then we
803 -- have to go via memory.
804 --
805 x86_mkRegRegMoveInstr
806 :: Platform
807 -> Reg
808 -> Reg
809 -> Instr
810
811 x86_mkRegRegMoveInstr platform src dst
812 = case targetClassOfReg platform src of
813 RcInteger -> case platformArch platform of
814 ArchX86 -> MOV II32 (OpReg src) (OpReg dst)
815 ArchX86_64 -> MOV II64 (OpReg src) (OpReg dst)
816 _ -> panic "x86_mkRegRegMoveInstr: Bad arch"
817 RcDouble -> GMOV src dst
818 RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst)
819 _ -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
820
821 -- | Check whether an instruction represents a reg-reg move.
822 -- The register allocator attempts to eliminate reg->reg moves whenever it can,
823 -- by assigning the src and dest temporaries to the same real register.
824 --
825 x86_takeRegRegMoveInstr
826 :: Instr
827 -> Maybe (Reg,Reg)
828
829 x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
830 = Just (r1,r2)
831
832 x86_takeRegRegMoveInstr _ = Nothing
833
834
835 -- | Make an unconditional branch instruction.
836 x86_mkJumpInstr
837 :: BlockId
838 -> [Instr]
839
840 x86_mkJumpInstr id
841 = [JXX ALWAYS id]
842
843
844 x86_mkStackAllocInstr
845 :: Platform
846 -> Int
847 -> Instr
848 x86_mkStackAllocInstr platform amount
849 = case platformArch platform of
850 ArchX86 -> SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
851 ArchX86_64 -> SUB II64 (OpImm (ImmInt amount)) (OpReg rsp)
852 _ -> panic "x86_mkStackAllocInstr"
853
854 x86_mkStackDeallocInstr
855 :: Platform
856 -> Int
857 -> Instr
858 x86_mkStackDeallocInstr platform amount
859 = case platformArch platform of
860 ArchX86 -> ADD II32 (OpImm (ImmInt amount)) (OpReg esp)
861 ArchX86_64 -> ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)
862 _ -> panic "x86_mkStackDeallocInstr"
863
864 i386_insert_ffrees
865 :: [GenBasicBlock Instr]
866 -> [GenBasicBlock Instr]
867
868 i386_insert_ffrees blocks
869 | any (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ]
870 = map insertGFREEs blocks
871 | otherwise
872 = blocks
873 where
874 insertGFREEs (BasicBlock id insns)
875 = BasicBlock id (insertBeforeNonlocalTransfers GFREE insns)
876
877 insertBeforeNonlocalTransfers :: Instr -> [Instr] -> [Instr]
878 insertBeforeNonlocalTransfers insert insns
879 = foldr p [] insns
880 where p insn r = case insn of
881 CALL _ _ -> insert : insn : r
882 JMP _ _ -> insert : insn : r
883 JXX_GBL _ _ -> panic "insertBeforeNonlocalTransfers: cannot handle JXX_GBL"
884 _ -> insn : r
885
886
887 -- if you ever add a new FP insn to the fake x86 FP insn set,
888 -- you must update this too
889 is_G_instr :: Instr -> Bool
890 is_G_instr instr
891 = case instr of
892 GMOV{} -> True
893 GLD{} -> True
894 GST{} -> True
895 GLDZ{} -> True
896 GLD1{} -> True
897 GFTOI{} -> True
898 GDTOI{} -> True
899 GITOF{} -> True
900 GITOD{} -> True
901 GDTOF{} -> True
902 GADD{} -> True
903 GDIV{} -> True
904 GSUB{} -> True
905 GMUL{} -> True
906 GCMP{} -> True
907 GABS{} -> True
908 GNEG{} -> True
909 GSQRT{} -> True
910 GSIN{} -> True
911 GCOS{} -> True
912 GTAN{} -> True
913 GFREE -> panic "is_G_instr: GFREE (!)"
914 _ -> False
915
916
917 --
918 -- Note [extra spill slots]
919 --
920 -- If the register allocator used more spill slots than we have
921 -- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more
922 -- C stack space on entry and exit from this proc. Therefore we
923 -- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp"
924 -- before every non-local jump.
925 --
926 -- This became necessary when the new codegen started bundling entire
927 -- functions together into one proc, because the register allocator
928 -- assigns a different stack slot to each virtual reg within a proc.
929 -- To avoid using so many slots we could also:
930 --
931 -- - split up the proc into connected components before code generator
932 --
933 -- - rename the virtual regs, so that we re-use vreg names and hence
934 -- stack slots for non-overlapping vregs.
935 --
936 -- Note that when a block is both a non-local entry point (with an
937 -- info table) and a local branch target, we have to split it into
938 -- two, like so:
939 --
940 -- <info table>
941 -- L:
942 -- <code>
943 --
944 -- becomes
945 --
946 -- <info table>
947 -- L:
948 -- subl $rsp, N
949 -- jmp Lnew
950 -- Lnew:
951 -- <code>
952 --
953 -- and all branches pointing to L are retargetted to point to Lnew.
954 -- Otherwise, we would repeat the $rsp adjustment for each branch to
955 -- L.
956 --
957 allocMoreStack
958 :: Platform
959 -> Int
960 -> NatCmmDecl statics X86.Instr.Instr
961 -> UniqSM (NatCmmDecl statics X86.Instr.Instr)
962
963 allocMoreStack _ _ top@(CmmData _ _) = return top
964 allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
965 let entries = entryBlocks proc
966
967 uniqs <- replicateM (length entries) getUniqueM
968
969 let
970 delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
971 where x = slots * spillSlotSize platform -- sp delta
972
973 alloc = mkStackAllocInstr platform delta
974 dealloc = mkStackDeallocInstr platform delta
975
976 new_blockmap :: LabelMap BlockId
977 new_blockmap = mapFromList (zip entries (map mkBlockId uniqs))
978
979 insert_stack_insns (BasicBlock id insns)
980 | Just new_blockid <- mapLookup id new_blockmap
981 = [ BasicBlock id [alloc, JXX ALWAYS new_blockid]
982 , BasicBlock new_blockid block' ]
983 | otherwise
984 = [ BasicBlock id block' ]
985 where
986 block' = foldr insert_dealloc [] insns
987
988 insert_dealloc insn r = case insn of
989 JMP _ _ -> dealloc : insn : r
990 JXX_GBL _ _ -> panic "insert_dealloc: cannot handle JXX_GBL"
991 _other -> x86_patchJumpInstr insn retarget : r
992 where retarget b = fromMaybe b (mapLookup b new_blockmap)
993
994 new_code = concatMap insert_stack_insns code
995 -- in
996 return (CmmProc info lbl live (ListGraph new_code))
997
998
999 data JumpDest = DestBlockId BlockId | DestImm Imm
1000
1001 getJumpDestBlockId :: JumpDest -> Maybe BlockId
1002 getJumpDestBlockId (DestBlockId bid) = Just bid
1003 getJumpDestBlockId _ = Nothing
1004
1005 canShortcut :: Instr -> Maybe JumpDest
1006 canShortcut (JXX ALWAYS id) = Just (DestBlockId id)
1007 canShortcut (JMP (OpImm imm) _) = Just (DestImm imm)
1008 canShortcut _ = Nothing
1009
1010
1011 -- This helper shortcuts a sequence of branches.
1012 -- The blockset helps avoid following cycles.
1013 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
1014 shortcutJump fn insn = shortcutJump' fn (setEmpty :: LabelSet) insn
1015 where shortcutJump' fn seen insn@(JXX cc id) =
1016 if setMember id seen then insn
1017 else case fn id of
1018 Nothing -> insn
1019 Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
1020 Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm)
1021 where seen' = setInsert id seen
1022 shortcutJump' _ _ other = other
1023
1024 -- Here because it knows about JumpDest
1025 shortcutStatics :: (BlockId -> Maybe JumpDest) -> (Alignment, CmmStatics) -> (Alignment, CmmStatics)
1026 shortcutStatics fn (align, Statics lbl statics)
1027 = (align, Statics lbl $ map (shortcutStatic fn) statics)
1028 -- we need to get the jump tables, so apply the mapping to the entries
1029 -- of a CmmData too.
1030
1031 shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
1032 shortcutLabel fn lab
1033 | Just uq <- maybeAsmTemp lab = shortBlockId fn emptyUniqSet (mkBlockId uq)
1034 | otherwise = lab
1035
1036 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
1037 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
1038 = CmmStaticLit (CmmLabel (shortcutLabel fn lab))
1039 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
1040 = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off)
1041 -- slightly dodgy, we're ignoring the second label, but this
1042 -- works with the way we use CmmLabelDiffOff for jump tables now.
1043 shortcutStatic _ other_static
1044 = other_static
1045
1046 shortBlockId
1047 :: (BlockId -> Maybe JumpDest)
1048 -> UniqSet Unique
1049 -> BlockId
1050 -> CLabel
1051
1052 shortBlockId fn seen blockid =
1053 case (elementOfUniqSet uq seen, fn blockid) of
1054 (True, _) -> mkAsmTempLabel uq
1055 (_, Nothing) -> mkAsmTempLabel uq
1056 (_, Just (DestBlockId blockid')) -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
1057 (_, Just (DestImm (ImmCLbl lbl))) -> lbl
1058 (_, _other) -> panic "shortBlockId"
1059 where uq = getUnique blockid