Stop exporting, and stop using, functions marked as deprecated
[ghc.git] / compiler / nativeGen / X86 / Instr.hs
1 {-# LANGUAGE CPP, TypeFamilies #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Machine-dependent assembly language
6 --
7 -- (c) The University of Glasgow 1993-2004
8 --
9 -----------------------------------------------------------------------------
10
11 module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest,
12 getJumpDestBlockId, canShortcut, shortcutStatics,
13 shortcutJump, i386_insert_ffrees, allocMoreStack,
14 maxSpillSlots, archWordSize)
15 where
16
17 #include "HsVersions.h"
18 #include "nativeGen/NCG.h"
19
20 import X86.Cond
21 import X86.Regs
22 import Instruction
23 import Size
24 import RegClass
25 import Reg
26 import TargetReg
27
28 import BlockId
29 import CodeGen.Platform
30 import Cmm
31 import FastString
32 import FastBool
33 import Outputable
34 import Platform
35
36 import BasicTypes (Alignment)
37 import CLabel
38 import DynFlags
39 import UniqSet
40 import Unique
41 import UniqSupply
42
43 import Control.Monad
44 import Data.Maybe (fromMaybe)
45
46 -- Size of an x86/x86_64 memory address, in bytes.
47 --
48 archWordSize :: Bool -> Size
49 archWordSize is32Bit
50 | is32Bit = II32
51 | otherwise = II64
52
53 -- | Instruction instance for x86 instruction set.
54 instance Instruction Instr where
55 regUsageOfInstr = x86_regUsageOfInstr
56 patchRegsOfInstr = x86_patchRegsOfInstr
57 isJumpishInstr = x86_isJumpishInstr
58 jumpDestsOfInstr = x86_jumpDestsOfInstr
59 patchJumpInstr = x86_patchJumpInstr
60 mkSpillInstr = x86_mkSpillInstr
61 mkLoadInstr = x86_mkLoadInstr
62 takeDeltaInstr = x86_takeDeltaInstr
63 isMetaInstr = x86_isMetaInstr
64 mkRegRegMoveInstr = x86_mkRegRegMoveInstr
65 takeRegRegMoveInstr = x86_takeRegRegMoveInstr
66 mkJumpInstr = x86_mkJumpInstr
67 mkStackAllocInstr = x86_mkStackAllocInstr
68 mkStackDeallocInstr = x86_mkStackDeallocInstr
69
70
71 -- -----------------------------------------------------------------------------
72 -- Intel x86 instructions
73
74 {-
75 Intel, in their infinite wisdom, selected a stack model for floating
76 point registers on x86. That might have made sense back in 1979 --
77 nowadays we can see it for the nonsense it really is. A stack model
78 fits poorly with the existing nativeGen infrastructure, which assumes
79 flat integer and FP register sets. Prior to this commit, nativeGen
80 could not generate correct x86 FP code -- to do so would have meant
81 somehow working the register-stack paradigm into the register
82 allocator and spiller, which sounds very difficult.
83
84 We have decided to cheat, and go for a simple fix which requires no
85 infrastructure modifications, at the expense of generating ropey but
86 correct FP code. All notions of the x86 FP stack and its insns have
87 been removed. Instead, we pretend (to the instruction selector and
88 register allocator) that x86 has six floating point registers, %fake0
89 .. %fake5, which can be used in the usual flat manner. We further
90 claim that x86 has floating point instructions very similar to SPARC
91 and Alpha, that is, a simple 3-operand register-register arrangement.
92 Code generation and register allocation proceed on this basis.
93
94 When we come to print out the final assembly, our convenient fiction
95 is converted to dismal reality. Each fake instruction is
96 independently converted to a series of real x86 instructions.
97 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
98 arithmetic operations, the two operands are pushed onto the top of the
99 FP stack, the operation done, and the result copied back into the
100 relevant register. There are only six %fake registers because 2 are
101 needed for the translation, and x86 has 8 in total.
102
103 The translation is inefficient but is simple and it works. A cleverer
104 translation would handle a sequence of insns, simulating the FP stack
105 contents, would not impose a fixed mapping from %fake to %st regs, and
106 hopefully could avoid most of the redundant reg-reg moves of the
107 current translation.
108
109 We might as well make use of whatever unique FP facilities Intel have
110 chosen to bless us with (let's not be churlish, after all).
111 Hence GLDZ and GLD1. Bwahahahahahahaha!
112 -}
113
114 {-
115 Note [x86 Floating point precision]
116
117 Intel's internal floating point registers are by default 80 bit
118 extended precision. This means that all operations done on values in
119 registers are done at 80 bits, and unless the intermediate values are
120 truncated to the appropriate size (32 or 64 bits) by storing in
121 memory, calculations in registers will give different results from
122 calculations which pass intermediate values in memory (eg. via
123 function calls).
124
125 One solution is to set the FPU into 64 bit precision mode. Some OSs
126 do this (eg. FreeBSD) and some don't (eg. Linux). The problem here is
127 that this will only affect 64-bit precision arithmetic; 32-bit
128 calculations will still be done at 64-bit precision in registers. So
129 it doesn't solve the whole problem.
130
131 There's also the issue of what the C library is expecting in terms of
132 precision. It seems to be the case that glibc on Linux expects the
133 FPU to be set to 80 bit precision, so setting it to 64 bit could have
134 unexpected effects. Changing the default could have undesirable
135 effects on other 3rd-party library code too, so the right thing would
136 be to save/restore the FPU control word across Haskell code if we were
137 to do this.
138
139 gcc's -ffloat-store gives consistent results by always storing the
140 results of floating-point calculations in memory, which works for both
141 32 and 64-bit precision. However, it only affects the values of
142 user-declared floating point variables in C, not intermediate results.
143 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
144 flag).
145
146 Another problem is how to spill floating point registers in the
147 register allocator. Should we spill the whole 80 bits, or just 64?
148 On an OS which is set to 64 bit precision, spilling 64 is fine. On
149 Linux, spilling 64 bits will round the results of some operations.
150 This is what gcc does. Spilling at 80 bits requires taking up a full
151 128 bit slot (so we get alignment). We spill at 80-bits and ignore
152 the alignment problems.
153
154 In the future [edit: now available in GHC 7.0.1, with the -msse2
155 flag], we'll use the SSE registers for floating point. This requires
156 a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
157 float ops), which means P4 or Xeon and above. Using SSE will solve
158 all these problems, because the SSE registers use fixed 32 bit or 64
159 bit precision.
160
161 --SDM 1/2003
162 -}
163
164 data Instr
165 -- comment pseudo-op
166 = COMMENT FastString
167
168 -- some static data spat out during code
169 -- generation. Will be extracted before
170 -- pretty-printing.
171 | LDATA Section (Alignment, CmmStatics)
172
173 -- start a new basic block. Useful during
174 -- codegen, removed later. Preceding
175 -- instruction should be a jump, as per the
176 -- invariants for a BasicBlock (see Cmm).
177 | NEWBLOCK BlockId
178
179 -- specify current stack offset for
180 -- benefit of subsequent passes
181 | DELTA Int
182
183 -- Moves.
184 | MOV Size Operand Operand
185 | CMOV Cond Size Operand Reg
186 | MOVZxL Size Operand Operand -- size is the size of operand 1
187 | MOVSxL Size Operand Operand -- size is the size of operand 1
188 -- x86_64 note: plain mov into a 32-bit register always zero-extends
189 -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
190 -- don't affect the high bits of the register.
191
192 -- Load effective address (also a very useful three-operand add instruction :-)
193 | LEA Size Operand Operand
194
195 -- Int Arithmetic.
196 | ADD Size Operand Operand
197 | ADC Size Operand Operand
198 | SUB Size Operand Operand
199
200 | MUL Size Operand Operand
201 | MUL2 Size Operand -- %edx:%eax = operand * %rax
202 | IMUL Size Operand Operand -- signed int mul
203 | IMUL2 Size Operand -- %edx:%eax = operand * %eax
204
205 | DIV Size Operand -- eax := eax:edx/op, edx := eax:edx%op
206 | IDIV Size Operand -- ditto, but signed
207
208 -- Int Arithmetic, where the effects on the condition register
209 -- are important. Used in specialized sequences such as MO_Add2.
210 -- Do not rewrite these instructions to "equivalent" ones that
211 -- have different effect on the condition register! (See #9013.)
212 | ADD_CC Size Operand Operand
213 | SUB_CC Size Operand Operand
214
215 -- Simple bit-twiddling.
216 | AND Size Operand Operand
217 | OR Size Operand Operand
218 | XOR Size Operand Operand
219 | NOT Size Operand
220 | NEGI Size Operand -- NEG instruction (name clash with Cond)
221 | BSWAP Size Reg
222
223 -- Shifts (amount may be immediate or %cl only)
224 | SHL Size Operand{-amount-} Operand
225 | SAR Size Operand{-amount-} Operand
226 | SHR Size Operand{-amount-} Operand
227
228 | BT Size Imm Operand
229 | NOP
230
231 -- x86 Float Arithmetic.
232 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
233 -- as single instructions right up until we spit them out.
234 -- all the 3-operand fake fp insns are src1 src2 dst
235 -- and furthermore are constrained to be fp regs only.
236 -- IMPORTANT: keep is_G_insn up to date with any changes here
237 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
238 | GLD Size AddrMode Reg -- src, dst(fpreg)
239 | GST Size Reg AddrMode -- src(fpreg), dst
240
241 | GLDZ Reg -- dst(fpreg)
242 | GLD1 Reg -- dst(fpreg)
243
244 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
245 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
246
247 | GITOF Reg Reg -- src(intreg), dst(fpreg)
248 | GITOD Reg Reg -- src(intreg), dst(fpreg)
249
250 | GDTOF Reg Reg -- src(fpreg), dst(fpreg)
251
252 | GADD Size Reg Reg Reg -- src1, src2, dst
253 | GDIV Size Reg Reg Reg -- src1, src2, dst
254 | GSUB Size Reg Reg Reg -- src1, src2, dst
255 | GMUL Size Reg Reg Reg -- src1, src2, dst
256
257 -- FP compare. Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
258 -- Compare src1 with src2; set the Zero flag iff the numbers are
259 -- comparable and the comparison is True. Subsequent code must
260 -- test the %eflags zero flag regardless of the supplied Cond.
261 | GCMP Cond Reg Reg -- src1, src2
262
263 | GABS Size Reg Reg -- src, dst
264 | GNEG Size Reg Reg -- src, dst
265 | GSQRT Size Reg Reg -- src, dst
266 | GSIN Size CLabel CLabel Reg Reg -- src, dst
267 | GCOS Size CLabel CLabel Reg Reg -- src, dst
268 | GTAN Size CLabel CLabel Reg Reg -- src, dst
269
270 | GFREE -- do ffree on all x86 regs; an ugly hack
271
272
273 -- SSE2 floating point: we use a restricted set of the available SSE2
274 -- instructions for floating-point.
275 -- use MOV for moving (either movss or movsd (movlpd better?))
276 | CVTSS2SD Reg Reg -- F32 to F64
277 | CVTSD2SS Reg Reg -- F64 to F32
278 | CVTTSS2SIQ Size Operand Reg -- F32 to I32/I64 (with truncation)
279 | CVTTSD2SIQ Size Operand Reg -- F64 to I32/I64 (with truncation)
280 | CVTSI2SS Size Operand Reg -- I32/I64 to F32
281 | CVTSI2SD Size Operand Reg -- I32/I64 to F64
282
283 -- use ADD & SUB for arithmetic. In both cases, operands
284 -- are Operand Reg.
285
286 -- SSE2 floating-point division:
287 | FDIV Size Operand Operand -- divisor, dividend(dst)
288
289 -- use CMP for comparisons. ucomiss and ucomisd instructions
290 -- compare single/double prec floating point respectively.
291
292 | SQRT Size Operand Reg -- src, dst
293
294
295 -- Comparison
296 | TEST Size Operand Operand
297 | CMP Size Operand Operand
298 | SETCC Cond Operand
299
300 -- Stack Operations.
301 | PUSH Size Operand
302 | POP Size Operand
303 -- both unused (SDM):
304 -- | PUSHA
305 -- | POPA
306
307 -- Jumping around.
308 | JMP Operand [Reg] -- including live Regs at the call
309 | JXX Cond BlockId -- includes unconditional branches
310 | JXX_GBL Cond Imm -- non-local version of JXX
311 -- Table jump
312 | JMP_TBL Operand -- Address to jump to
313 [Maybe BlockId] -- Blocks in the jump table
314 Section -- Data section jump table should be put in
315 CLabel -- Label of jump table
316 | CALL (Either Imm Reg) [Reg]
317
318 -- Other things.
319 | CLTD Size -- sign extend %eax into %edx:%eax
320
321 | FETCHGOT Reg -- pseudo-insn for ELF position-independent code
322 -- pretty-prints as
323 -- call 1f
324 -- 1: popl %reg
325 -- addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
326 | FETCHPC Reg -- pseudo-insn for Darwin position-independent code
327 -- pretty-prints as
328 -- call 1f
329 -- 1: popl %reg
330
331 -- bit counting instructions
332 | POPCNT Size Operand Reg -- [SSE4.2] count number of bits set to 1
333 | BSF Size Operand Reg -- bit scan forward
334 | BSR Size Operand Reg -- bit scan reverse
335
336 -- prefetch
337 | PREFETCH PrefetchVariant Size Operand -- prefetch Variant, addr size, address to prefetch
338 -- variant can be NTA, Lvl0, Lvl1, or Lvl2
339
340 | LOCK Instr -- lock prefix
341 | XADD Size Operand Operand -- src (r), dst (r/m)
342 | CMPXCHG Size Operand Operand -- src (r), dst (r/m), eax implicit
343 | MFENCE
344
345 data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2
346
347
348 data Operand
349 = OpReg Reg -- register
350 | OpImm Imm -- immediate value
351 | OpAddr AddrMode -- memory reference
352
353
354
355 -- | Returns which registers are read and written as a (read, written)
356 -- pair.
357 x86_regUsageOfInstr :: Platform -> Instr -> RegUsage
358 x86_regUsageOfInstr platform instr
359 = case instr of
360 MOV _ src dst -> usageRW src dst
361 CMOV _ _ src dst -> mkRU (use_R src [dst]) [dst]
362 MOVZxL _ src dst -> usageRW src dst
363 MOVSxL _ src dst -> usageRW src dst
364 LEA _ src dst -> usageRW src dst
365 ADD _ src dst -> usageRM src dst
366 ADC _ src dst -> usageRM src dst
367 SUB _ src dst -> usageRM src dst
368 IMUL _ src dst -> usageRM src dst
369 IMUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
370 MUL _ src dst -> usageRM src dst
371 MUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
372 DIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
373 IDIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
374 ADD_CC _ src dst -> usageRM src dst
375 SUB_CC _ src dst -> usageRM src dst
376 AND _ src dst -> usageRM src dst
377 OR _ src dst -> usageRM src dst
378
379 XOR _ (OpReg src) (OpReg dst)
380 | src == dst -> mkRU [] [dst]
381
382 XOR _ src dst -> usageRM src dst
383 NOT _ op -> usageM op
384 BSWAP _ reg -> mkRU [reg] [reg]
385 NEGI _ op -> usageM op
386 SHL _ imm dst -> usageRM imm dst
387 SAR _ imm dst -> usageRM imm dst
388 SHR _ imm dst -> usageRM imm dst
389 BT _ _ src -> mkRUR (use_R src [])
390
391 PUSH _ op -> mkRUR (use_R op [])
392 POP _ op -> mkRU [] (def_W op)
393 TEST _ src dst -> mkRUR (use_R src $! use_R dst [])
394 CMP _ src dst -> mkRUR (use_R src $! use_R dst [])
395 SETCC _ op -> mkRU [] (def_W op)
396 JXX _ _ -> mkRU [] []
397 JXX_GBL _ _ -> mkRU [] []
398 JMP op regs -> mkRUR (use_R op regs)
399 JMP_TBL op _ _ _ -> mkRUR (use_R op [])
400 CALL (Left _) params -> mkRU params (callClobberedRegs platform)
401 CALL (Right reg) params -> mkRU (reg:params) (callClobberedRegs platform)
402 CLTD _ -> mkRU [eax] [edx]
403 NOP -> mkRU [] []
404
405 GMOV src dst -> mkRU [src] [dst]
406 GLD _ src dst -> mkRU (use_EA src []) [dst]
407 GST _ src dst -> mkRUR (src : use_EA dst [])
408
409 GLDZ dst -> mkRU [] [dst]
410 GLD1 dst -> mkRU [] [dst]
411
412 GFTOI src dst -> mkRU [src] [dst]
413 GDTOI src dst -> mkRU [src] [dst]
414
415 GITOF src dst -> mkRU [src] [dst]
416 GITOD src dst -> mkRU [src] [dst]
417
418 GDTOF src dst -> mkRU [src] [dst]
419
420 GADD _ s1 s2 dst -> mkRU [s1,s2] [dst]
421 GSUB _ s1 s2 dst -> mkRU [s1,s2] [dst]
422 GMUL _ s1 s2 dst -> mkRU [s1,s2] [dst]
423 GDIV _ s1 s2 dst -> mkRU [s1,s2] [dst]
424
425 GCMP _ src1 src2 -> mkRUR [src1,src2]
426 GABS _ src dst -> mkRU [src] [dst]
427 GNEG _ src dst -> mkRU [src] [dst]
428 GSQRT _ src dst -> mkRU [src] [dst]
429 GSIN _ _ _ src dst -> mkRU [src] [dst]
430 GCOS _ _ _ src dst -> mkRU [src] [dst]
431 GTAN _ _ _ src dst -> mkRU [src] [dst]
432
433 CVTSS2SD src dst -> mkRU [src] [dst]
434 CVTSD2SS src dst -> mkRU [src] [dst]
435 CVTTSS2SIQ _ src dst -> mkRU (use_R src []) [dst]
436 CVTTSD2SIQ _ src dst -> mkRU (use_R src []) [dst]
437 CVTSI2SS _ src dst -> mkRU (use_R src []) [dst]
438 CVTSI2SD _ src dst -> mkRU (use_R src []) [dst]
439 FDIV _ src dst -> usageRM src dst
440
441 FETCHGOT reg -> mkRU [] [reg]
442 FETCHPC reg -> mkRU [] [reg]
443
444 COMMENT _ -> noUsage
445 DELTA _ -> noUsage
446
447 POPCNT _ src dst -> mkRU (use_R src []) [dst]
448 BSF _ src dst -> mkRU (use_R src []) [dst]
449 BSR _ src dst -> mkRU (use_R src []) [dst]
450
451 -- note: might be a better way to do this
452 PREFETCH _ _ src -> mkRU (use_R src []) []
453 LOCK i -> x86_regUsageOfInstr platform i
454 XADD _ src dst -> usageMM src dst
455 CMPXCHG _ src dst -> usageRMM src dst (OpReg eax)
456 MFENCE -> noUsage
457
458 _other -> panic "regUsage: unrecognised instr"
459 where
460 -- # Definitions
461 --
462 -- Written: If the operand is a register, it's written. If it's an
463 -- address, registers mentioned in the address are read.
464 --
465 -- Modified: If the operand is a register, it's both read and
466 -- written. If it's an address, registers mentioned in the address
467 -- are read.
468
469 -- 2 operand form; first operand Read; second Written
470 usageRW :: Operand -> Operand -> RegUsage
471 usageRW op (OpReg reg) = mkRU (use_R op []) [reg]
472 usageRW op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
473 usageRW _ _ = panic "X86.RegInfo.usageRW: no match"
474
475 -- 2 operand form; first operand Read; second Modified
476 usageRM :: Operand -> Operand -> RegUsage
477 usageRM op (OpReg reg) = mkRU (use_R op [reg]) [reg]
478 usageRM op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
479 usageRM _ _ = panic "X86.RegInfo.usageRM: no match"
480
481 -- 2 operand form; first operand Modified; second Modified
482 usageMM :: Operand -> Operand -> RegUsage
483 usageMM (OpReg src) (OpReg dst) = mkRU [src, dst] [src, dst]
484 usageMM (OpReg src) (OpAddr ea) = mkRU (use_EA ea [src]) [src]
485 usageMM _ _ = panic "X86.RegInfo.usageMM: no match"
486
487 -- 3 operand form; first operand Read; second Modified; third Modified
488 usageRMM :: Operand -> Operand -> Operand -> RegUsage
489 usageRMM (OpReg src) (OpReg dst) (OpReg reg) = mkRU [src, dst, reg] [dst, reg]
490 usageRMM (OpReg src) (OpAddr ea) (OpReg reg) = mkRU (use_EA ea [src, reg]) [reg]
491 usageRMM _ _ _ = panic "X86.RegInfo.usageRMM: no match"
492
493 -- 1 operand form; operand Modified
494 usageM :: Operand -> RegUsage
495 usageM (OpReg reg) = mkRU [reg] [reg]
496 usageM (OpAddr ea) = mkRUR (use_EA ea [])
497 usageM _ = panic "X86.RegInfo.usageM: no match"
498
499 -- Registers defd when an operand is written.
500 def_W (OpReg reg) = [reg]
501 def_W (OpAddr _ ) = []
502 def_W _ = panic "X86.RegInfo.def_W: no match"
503
504 -- Registers used when an operand is read.
505 use_R (OpReg reg) tl = reg : tl
506 use_R (OpImm _) tl = tl
507 use_R (OpAddr ea) tl = use_EA ea tl
508
509 -- Registers used to compute an effective address.
510 use_EA (ImmAddr _ _) tl = tl
511 use_EA (AddrBaseIndex base index _) tl =
512 use_base base $! use_index index tl
513 where use_base (EABaseReg r) tl = r : tl
514 use_base _ tl = tl
515 use_index EAIndexNone tl = tl
516 use_index (EAIndex i _) tl = i : tl
517
518 mkRUR src = src' `seq` RU src' []
519 where src' = filter (interesting platform) src
520
521 mkRU src dst = src' `seq` dst' `seq` RU src' dst'
522 where src' = filter (interesting platform) src
523 dst' = filter (interesting platform) dst
524
525 -- | Is this register interesting for the register allocator?
526 interesting :: Platform -> Reg -> Bool
527 interesting _ (RegVirtual _) = True
528 interesting platform (RegReal (RealRegSingle i)) = isFastTrue (freeReg platform i)
529 interesting _ (RegReal (RealRegPair{})) = panic "X86.interesting: no reg pairs on this arch"
530
531
532
533 -- | Applies the supplied function to all registers in instructions.
534 -- Typically used to change virtual registers to real registers.
535 x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
536 x86_patchRegsOfInstr instr env
537 = case instr of
538 MOV sz src dst -> patch2 (MOV sz) src dst
539 CMOV cc sz src dst -> CMOV cc sz (patchOp src) (env dst)
540 MOVZxL sz src dst -> patch2 (MOVZxL sz) src dst
541 MOVSxL sz src dst -> patch2 (MOVSxL sz) src dst
542 LEA sz src dst -> patch2 (LEA sz) src dst
543 ADD sz src dst -> patch2 (ADD sz) src dst
544 ADC sz src dst -> patch2 (ADC sz) src dst
545 SUB sz src dst -> patch2 (SUB sz) src dst
546 IMUL sz src dst -> patch2 (IMUL sz) src dst
547 IMUL2 sz src -> patch1 (IMUL2 sz) src
548 MUL sz src dst -> patch2 (MUL sz) src dst
549 MUL2 sz src -> patch1 (MUL2 sz) src
550 IDIV sz op -> patch1 (IDIV sz) op
551 DIV sz op -> patch1 (DIV sz) op
552 ADD_CC sz src dst -> patch2 (ADD_CC sz) src dst
553 SUB_CC sz src dst -> patch2 (SUB_CC sz) src dst
554 AND sz src dst -> patch2 (AND sz) src dst
555 OR sz src dst -> patch2 (OR sz) src dst
556 XOR sz src dst -> patch2 (XOR sz) src dst
557 NOT sz op -> patch1 (NOT sz) op
558 BSWAP sz reg -> BSWAP sz (env reg)
559 NEGI sz op -> patch1 (NEGI sz) op
560 SHL sz imm dst -> patch1 (SHL sz imm) dst
561 SAR sz imm dst -> patch1 (SAR sz imm) dst
562 SHR sz imm dst -> patch1 (SHR sz imm) dst
563 BT sz imm src -> patch1 (BT sz imm) src
564 TEST sz src dst -> patch2 (TEST sz) src dst
565 CMP sz src dst -> patch2 (CMP sz) src dst
566 PUSH sz op -> patch1 (PUSH sz) op
567 POP sz op -> patch1 (POP sz) op
568 SETCC cond op -> patch1 (SETCC cond) op
569 JMP op regs -> JMP (patchOp op) regs
570 JMP_TBL op ids s lbl-> JMP_TBL (patchOp op) ids s lbl
571
572 GMOV src dst -> GMOV (env src) (env dst)
573 GLD sz src dst -> GLD sz (lookupAddr src) (env dst)
574 GST sz src dst -> GST sz (env src) (lookupAddr dst)
575
576 GLDZ dst -> GLDZ (env dst)
577 GLD1 dst -> GLD1 (env dst)
578
579 GFTOI src dst -> GFTOI (env src) (env dst)
580 GDTOI src dst -> GDTOI (env src) (env dst)
581
582 GITOF src dst -> GITOF (env src) (env dst)
583 GITOD src dst -> GITOD (env src) (env dst)
584
585 GDTOF src dst -> GDTOF (env src) (env dst)
586
587 GADD sz s1 s2 dst -> GADD sz (env s1) (env s2) (env dst)
588 GSUB sz s1 s2 dst -> GSUB sz (env s1) (env s2) (env dst)
589 GMUL sz s1 s2 dst -> GMUL sz (env s1) (env s2) (env dst)
590 GDIV sz s1 s2 dst -> GDIV sz (env s1) (env s2) (env dst)
591
592 GCMP sz src1 src2 -> GCMP sz (env src1) (env src2)
593 GABS sz src dst -> GABS sz (env src) (env dst)
594 GNEG sz src dst -> GNEG sz (env src) (env dst)
595 GSQRT sz src dst -> GSQRT sz (env src) (env dst)
596 GSIN sz l1 l2 src dst -> GSIN sz l1 l2 (env src) (env dst)
597 GCOS sz l1 l2 src dst -> GCOS sz l1 l2 (env src) (env dst)
598 GTAN sz l1 l2 src dst -> GTAN sz l1 l2 (env src) (env dst)
599
600 CVTSS2SD src dst -> CVTSS2SD (env src) (env dst)
601 CVTSD2SS src dst -> CVTSD2SS (env src) (env dst)
602 CVTTSS2SIQ sz src dst -> CVTTSS2SIQ sz (patchOp src) (env dst)
603 CVTTSD2SIQ sz src dst -> CVTTSD2SIQ sz (patchOp src) (env dst)
604 CVTSI2SS sz src dst -> CVTSI2SS sz (patchOp src) (env dst)
605 CVTSI2SD sz src dst -> CVTSI2SD sz (patchOp src) (env dst)
606 FDIV sz src dst -> FDIV sz (patchOp src) (patchOp dst)
607
608 CALL (Left _) _ -> instr
609 CALL (Right reg) p -> CALL (Right (env reg)) p
610
611 FETCHGOT reg -> FETCHGOT (env reg)
612 FETCHPC reg -> FETCHPC (env reg)
613
614 NOP -> instr
615 COMMENT _ -> instr
616 DELTA _ -> instr
617
618 JXX _ _ -> instr
619 JXX_GBL _ _ -> instr
620 CLTD _ -> instr
621
622 POPCNT sz src dst -> POPCNT sz (patchOp src) (env dst)
623 BSF sz src dst -> BSF sz (patchOp src) (env dst)
624 BSR sz src dst -> BSR sz (patchOp src) (env dst)
625
626 PREFETCH lvl size src -> PREFETCH lvl size (patchOp src)
627
628 LOCK i -> LOCK (x86_patchRegsOfInstr i env)
629 XADD sz src dst -> patch2 (XADD sz) src dst
630 CMPXCHG sz src dst -> patch2 (CMPXCHG sz) src dst
631 MFENCE -> instr
632
633 _other -> panic "patchRegs: unrecognised instr"
634
635 where
636 patch1 :: (Operand -> a) -> Operand -> a
637 patch1 insn op = insn $! patchOp op
638 patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
639 patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
640
641 patchOp (OpReg reg) = OpReg $! env reg
642 patchOp (OpImm imm) = OpImm imm
643 patchOp (OpAddr ea) = OpAddr $! lookupAddr ea
644
645 lookupAddr (ImmAddr imm off) = ImmAddr imm off
646 lookupAddr (AddrBaseIndex base index disp)
647 = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
648 where
649 lookupBase EABaseNone = EABaseNone
650 lookupBase EABaseRip = EABaseRip
651 lookupBase (EABaseReg r) = EABaseReg $! env r
652
653 lookupIndex EAIndexNone = EAIndexNone
654 lookupIndex (EAIndex r i) = (EAIndex $! env r) i
655
656
657 --------------------------------------------------------------------------------
658 x86_isJumpishInstr
659 :: Instr -> Bool
660
661 x86_isJumpishInstr instr
662 = case instr of
663 JMP{} -> True
664 JXX{} -> True
665 JXX_GBL{} -> True
666 JMP_TBL{} -> True
667 CALL{} -> True
668 _ -> False
669
670
671 x86_jumpDestsOfInstr
672 :: Instr
673 -> [BlockId]
674
675 x86_jumpDestsOfInstr insn
676 = case insn of
677 JXX _ id -> [id]
678 JMP_TBL _ ids _ _ -> [id | Just id <- ids]
679 _ -> []
680
681
682 x86_patchJumpInstr
683 :: Instr -> (BlockId -> BlockId) -> Instr
684
685 x86_patchJumpInstr insn patchF
686 = case insn of
687 JXX cc id -> JXX cc (patchF id)
688 JMP_TBL op ids section lbl
689 -> JMP_TBL op (map (fmap patchF) ids) section lbl
690 _ -> insn
691
692
693
694
695 -- -----------------------------------------------------------------------------
696 -- | Make a spill instruction.
697 x86_mkSpillInstr
698 :: DynFlags
699 -> Reg -- register to spill
700 -> Int -- current stack delta
701 -> Int -- spill slot to use
702 -> Instr
703
704 x86_mkSpillInstr dflags reg delta slot
705 = let off = spillSlotToOffset platform slot - delta
706 in
707 case targetClassOfReg platform reg of
708 RcInteger -> MOV (archWordSize is32Bit)
709 (OpReg reg) (OpAddr (spRel dflags off))
710 RcDouble -> GST FF80 reg (spRel dflags off) {- RcFloat/RcDouble -}
711 RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off))
712 _ -> panic "X86.mkSpillInstr: no match"
713 where platform = targetPlatform dflags
714 is32Bit = target32Bit platform
715
716 -- | Make a spill reload instruction.
717 x86_mkLoadInstr
718 :: DynFlags
719 -> Reg -- register to load
720 -> Int -- current stack delta
721 -> Int -- spill slot to use
722 -> Instr
723
724 x86_mkLoadInstr dflags reg delta slot
725 = let off = spillSlotToOffset platform slot - delta
726 in
727 case targetClassOfReg platform reg of
728 RcInteger -> MOV (archWordSize is32Bit)
729 (OpAddr (spRel dflags off)) (OpReg reg)
730 RcDouble -> GLD FF80 (spRel dflags off) reg {- RcFloat/RcDouble -}
731 RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg)
732 _ -> panic "X86.x86_mkLoadInstr"
733 where platform = targetPlatform dflags
734 is32Bit = target32Bit platform
735
736 spillSlotSize :: Platform -> Int
737 spillSlotSize dflags = if is32Bit then 12 else 8
738 where is32Bit = target32Bit dflags
739
740 maxSpillSlots :: DynFlags -> Int
741 maxSpillSlots dflags
742 = ((rESERVED_C_STACK_BYTES dflags - 64) `div` spillSlotSize (targetPlatform dflags)) - 1
743 -- = 0 -- useful for testing allocMoreStack
744
745 -- number of bytes that the stack pointer should be aligned to
746 stackAlign :: Int
747 stackAlign = 16
748
749 -- convert a spill slot number to a *byte* offset, with no sign:
750 -- decide on a per arch basis whether you are spilling above or below
751 -- the C stack pointer.
752 spillSlotToOffset :: Platform -> Int -> Int
753 spillSlotToOffset platform slot
754 = 64 + spillSlotSize platform * slot
755
756 --------------------------------------------------------------------------------
757
758 -- | See if this instruction is telling us the current C stack delta
759 x86_takeDeltaInstr
760 :: Instr
761 -> Maybe Int
762
763 x86_takeDeltaInstr instr
764 = case instr of
765 DELTA i -> Just i
766 _ -> Nothing
767
768
769 x86_isMetaInstr
770 :: Instr
771 -> Bool
772
773 x86_isMetaInstr instr
774 = case instr of
775 COMMENT{} -> True
776 LDATA{} -> True
777 NEWBLOCK{} -> True
778 DELTA{} -> True
779 _ -> False
780
781
782
783 -- | Make a reg-reg move instruction.
784 -- On SPARC v8 there are no instructions to move directly between
785 -- floating point and integer regs. If we need to do that then we
786 -- have to go via memory.
787 --
788 x86_mkRegRegMoveInstr
789 :: Platform
790 -> Reg
791 -> Reg
792 -> Instr
793
794 x86_mkRegRegMoveInstr platform src dst
795 = case targetClassOfReg platform src of
796 RcInteger -> case platformArch platform of
797 ArchX86 -> MOV II32 (OpReg src) (OpReg dst)
798 ArchX86_64 -> MOV II64 (OpReg src) (OpReg dst)
799 _ -> panic "x86_mkRegRegMoveInstr: Bad arch"
800 RcDouble -> GMOV src dst
801 RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst)
802 _ -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
803
804 -- | Check whether an instruction represents a reg-reg move.
805 -- The register allocator attempts to eliminate reg->reg moves whenever it can,
806 -- by assigning the src and dest temporaries to the same real register.
807 --
808 x86_takeRegRegMoveInstr
809 :: Instr
810 -> Maybe (Reg,Reg)
811
812 x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
813 = Just (r1,r2)
814
815 x86_takeRegRegMoveInstr _ = Nothing
816
817
818 -- | Make an unconditional branch instruction.
819 x86_mkJumpInstr
820 :: BlockId
821 -> [Instr]
822
823 x86_mkJumpInstr id
824 = [JXX ALWAYS id]
825
826
827 x86_mkStackAllocInstr
828 :: Platform
829 -> Int
830 -> Instr
831 x86_mkStackAllocInstr platform amount
832 = case platformArch platform of
833 ArchX86 -> SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
834 ArchX86_64 -> SUB II64 (OpImm (ImmInt amount)) (OpReg rsp)
835 _ -> panic "x86_mkStackAllocInstr"
836
837 x86_mkStackDeallocInstr
838 :: Platform
839 -> Int
840 -> Instr
841 x86_mkStackDeallocInstr platform amount
842 = case platformArch platform of
843 ArchX86 -> ADD II32 (OpImm (ImmInt amount)) (OpReg esp)
844 ArchX86_64 -> ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)
845 _ -> panic "x86_mkStackDeallocInstr"
846
847 i386_insert_ffrees
848 :: [GenBasicBlock Instr]
849 -> [GenBasicBlock Instr]
850
851 i386_insert_ffrees blocks
852 | any (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ]
853 = map insertGFREEs blocks
854 | otherwise
855 = blocks
856 where
857 insertGFREEs (BasicBlock id insns)
858 = BasicBlock id (insertBeforeNonlocalTransfers GFREE insns)
859
860 insertBeforeNonlocalTransfers :: Instr -> [Instr] -> [Instr]
861 insertBeforeNonlocalTransfers insert insns
862 = foldr p [] insns
863 where p insn r = case insn of
864 CALL _ _ -> insert : insn : r
865 JMP _ _ -> insert : insn : r
866 JXX_GBL _ _ -> panic "insertBeforeNonlocalTransfers: cannot handle JXX_GBL"
867 _ -> insn : r
868
869
870 -- if you ever add a new FP insn to the fake x86 FP insn set,
871 -- you must update this too
872 is_G_instr :: Instr -> Bool
873 is_G_instr instr
874 = case instr of
875 GMOV{} -> True
876 GLD{} -> True
877 GST{} -> True
878 GLDZ{} -> True
879 GLD1{} -> True
880 GFTOI{} -> True
881 GDTOI{} -> True
882 GITOF{} -> True
883 GITOD{} -> True
884 GDTOF{} -> True
885 GADD{} -> True
886 GDIV{} -> True
887 GSUB{} -> True
888 GMUL{} -> True
889 GCMP{} -> True
890 GABS{} -> True
891 GNEG{} -> True
892 GSQRT{} -> True
893 GSIN{} -> True
894 GCOS{} -> True
895 GTAN{} -> True
896 GFREE -> panic "is_G_instr: GFREE (!)"
897 _ -> False
898
899
900 --
901 -- Note [extra spill slots]
902 --
903 -- If the register allocator used more spill slots than we have
904 -- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more
905 -- C stack space on entry and exit from this proc. Therefore we
906 -- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp"
907 -- before every non-local jump.
908 --
909 -- This became necessary when the new codegen started bundling entire
910 -- functions together into one proc, because the register allocator
911 -- assigns a different stack slot to each virtual reg within a proc.
912 -- To avoid using so many slots we could also:
913 --
914 -- - split up the proc into connected components before code generator
915 --
916 -- - rename the virtual regs, so that we re-use vreg names and hence
917 -- stack slots for non-overlapping vregs.
918 --
919 -- Note that when a block is both a non-local entry point (with an
920 -- info table) and a local branch target, we have to split it into
921 -- two, like so:
922 --
923 -- <info table>
924 -- L:
925 -- <code>
926 --
927 -- becomes
928 --
929 -- <info table>
930 -- L:
931 -- subl $rsp, N
932 -- jmp Lnew
933 -- Lnew:
934 -- <code>
935 --
936 -- and all branches pointing to L are retargetted to point to Lnew.
937 -- Otherwise, we would repeat the $rsp adjustment for each branch to
938 -- L.
939 --
940 allocMoreStack
941 :: Platform
942 -> Int
943 -> NatCmmDecl statics X86.Instr.Instr
944 -> UniqSM (NatCmmDecl statics X86.Instr.Instr)
945
946 allocMoreStack _ _ top@(CmmData _ _) = return top
947 allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
948 let entries = entryBlocks proc
949
950 uniqs <- replicateM (length entries) getUniqueM
951
952 let
953 delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
954 where x = slots * spillSlotSize platform -- sp delta
955
956 alloc = mkStackAllocInstr platform delta
957 dealloc = mkStackDeallocInstr platform delta
958
959 new_blockmap :: BlockEnv BlockId
960 new_blockmap = mapFromList (zip entries (map mkBlockId uniqs))
961
962 insert_stack_insns (BasicBlock id insns)
963 | Just new_blockid <- mapLookup id new_blockmap
964 = [ BasicBlock id [alloc, JXX ALWAYS new_blockid]
965 , BasicBlock new_blockid block' ]
966 | otherwise
967 = [ BasicBlock id block' ]
968 where
969 block' = foldr insert_dealloc [] insns
970
971 insert_dealloc insn r = case insn of
972 JMP _ _ -> dealloc : insn : r
973 JXX_GBL _ _ -> panic "insert_dealloc: cannot handle JXX_GBL"
974 _other -> x86_patchJumpInstr insn retarget : r
975 where retarget b = fromMaybe b (mapLookup b new_blockmap)
976
977 new_code = concatMap insert_stack_insns code
978 -- in
979 return (CmmProc info lbl live (ListGraph new_code))
980
981
982 data JumpDest = DestBlockId BlockId | DestImm Imm
983
984 getJumpDestBlockId :: JumpDest -> Maybe BlockId
985 getJumpDestBlockId (DestBlockId bid) = Just bid
986 getJumpDestBlockId _ = Nothing
987
988 canShortcut :: Instr -> Maybe JumpDest
989 canShortcut (JXX ALWAYS id) = Just (DestBlockId id)
990 canShortcut (JMP (OpImm imm) _) = Just (DestImm imm)
991 canShortcut _ = Nothing
992
993
994 -- This helper shortcuts a sequence of branches.
995 -- The blockset helps avoid following cycles.
996 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
997 shortcutJump fn insn = shortcutJump' fn (setEmpty :: BlockSet) insn
998 where shortcutJump' fn seen insn@(JXX cc id) =
999 if setMember id seen then insn
1000 else case fn id of
1001 Nothing -> insn
1002 Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
1003 Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm)
1004 where seen' = setInsert id seen
1005 shortcutJump' _ _ other = other
1006
1007 -- Here because it knows about JumpDest
1008 shortcutStatics :: (BlockId -> Maybe JumpDest) -> (Alignment, CmmStatics) -> (Alignment, CmmStatics)
1009 shortcutStatics fn (align, Statics lbl statics)
1010 = (align, Statics lbl $ map (shortcutStatic fn) statics)
1011 -- we need to get the jump tables, so apply the mapping to the entries
1012 -- of a CmmData too.
1013
1014 shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
1015 shortcutLabel fn lab
1016 | Just uq <- maybeAsmTemp lab = shortBlockId fn emptyUniqSet (mkBlockId uq)
1017 | otherwise = lab
1018
1019 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
1020 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
1021 = CmmStaticLit (CmmLabel (shortcutLabel fn lab))
1022 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
1023 = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off)
1024 -- slightly dodgy, we're ignoring the second label, but this
1025 -- works with the way we use CmmLabelDiffOff for jump tables now.
1026 shortcutStatic _ other_static
1027 = other_static
1028
1029 shortBlockId
1030 :: (BlockId -> Maybe JumpDest)
1031 -> UniqSet Unique
1032 -> BlockId
1033 -> CLabel
1034
1035 shortBlockId fn seen blockid =
1036 case (elementOfUniqSet uq seen, fn blockid) of
1037 (True, _) -> mkAsmTempLabel uq
1038 (_, Nothing) -> mkAsmTempLabel uq
1039 (_, Just (DestBlockId blockid')) -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
1040 (_, Just (DestImm (ImmCLbl lbl))) -> lbl
1041 (_, _other) -> panic "shortBlockId"
1042 where uq = getUnique blockid