Fix todo in compiler/nativeGen: Rename Size to Format
[ghc.git] / compiler / nativeGen / X86 / Instr.hs
1 {-# LANGUAGE CPP, TypeFamilies #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Machine-dependent assembly language
6 --
7 -- (c) The University of Glasgow 1993-2004
8 --
9 -----------------------------------------------------------------------------
10
11 module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest,
12 getJumpDestBlockId, canShortcut, shortcutStatics,
13 shortcutJump, i386_insert_ffrees, allocMoreStack,
14 maxSpillSlots, archWordFormat)
15 where
16
17 #include "HsVersions.h"
18 #include "nativeGen/NCG.h"
19
20 import X86.Cond
21 import X86.Regs
22 import Instruction
23 import Format
24 import RegClass
25 import Reg
26 import TargetReg
27
28 import BlockId
29 import CodeGen.Platform
30 import Cmm
31 import FastString
32 import FastBool
33 import Outputable
34 import Platform
35
36 import BasicTypes (Alignment)
37 import CLabel
38 import DynFlags
39 import UniqSet
40 import Unique
41 import UniqSupply
42
43 import Control.Monad
44 import Data.Maybe (fromMaybe)
45
46 -- Format of an x86/x86_64 memory address, in bytes.
47 --
48 archWordFormat :: Bool -> Format
49 archWordFormat is32Bit
50 | is32Bit = II32
51 | otherwise = II64
52
53 -- | Instruction instance for x86 instruction set.
54 instance Instruction Instr where
55 regUsageOfInstr = x86_regUsageOfInstr
56 patchRegsOfInstr = x86_patchRegsOfInstr
57 isJumpishInstr = x86_isJumpishInstr
58 jumpDestsOfInstr = x86_jumpDestsOfInstr
59 patchJumpInstr = x86_patchJumpInstr
60 mkSpillInstr = x86_mkSpillInstr
61 mkLoadInstr = x86_mkLoadInstr
62 takeDeltaInstr = x86_takeDeltaInstr
63 isMetaInstr = x86_isMetaInstr
64 mkRegRegMoveInstr = x86_mkRegRegMoveInstr
65 takeRegRegMoveInstr = x86_takeRegRegMoveInstr
66 mkJumpInstr = x86_mkJumpInstr
67 mkStackAllocInstr = x86_mkStackAllocInstr
68 mkStackDeallocInstr = x86_mkStackDeallocInstr
69
70
71 -- -----------------------------------------------------------------------------
72 -- Intel x86 instructions
73
74 {-
75 Intel, in their infinite wisdom, selected a stack model for floating
76 point registers on x86. That might have made sense back in 1979 --
77 nowadays we can see it for the nonsense it really is. A stack model
78 fits poorly with the existing nativeGen infrastructure, which assumes
79 flat integer and FP register sets. Prior to this commit, nativeGen
80 could not generate correct x86 FP code -- to do so would have meant
81 somehow working the register-stack paradigm into the register
82 allocator and spiller, which sounds very difficult.
83
84 We have decided to cheat, and go for a simple fix which requires no
85 infrastructure modifications, at the expense of generating ropey but
86 correct FP code. All notions of the x86 FP stack and its insns have
87 been removed. Instead, we pretend (to the instruction selector and
88 register allocator) that x86 has six floating point registers, %fake0
89 .. %fake5, which can be used in the usual flat manner. We further
90 claim that x86 has floating point instructions very similar to SPARC
91 and Alpha, that is, a simple 3-operand register-register arrangement.
92 Code generation and register allocation proceed on this basis.
93
94 When we come to print out the final assembly, our convenient fiction
95 is converted to dismal reality. Each fake instruction is
96 independently converted to a series of real x86 instructions.
97 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
98 arithmetic operations, the two operands are pushed onto the top of the
99 FP stack, the operation done, and the result copied back into the
100 relevant register. There are only six %fake registers because 2 are
101 needed for the translation, and x86 has 8 in total.
102
103 The translation is inefficient but is simple and it works. A cleverer
104 translation would handle a sequence of insns, simulating the FP stack
105 contents, would not impose a fixed mapping from %fake to %st regs, and
106 hopefully could avoid most of the redundant reg-reg moves of the
107 current translation.
108
109 We might as well make use of whatever unique FP facilities Intel have
110 chosen to bless us with (let's not be churlish, after all).
111 Hence GLDZ and GLD1. Bwahahahahahahaha!
112 -}
113
114 {-
115 Note [x86 Floating point precision]
116
117 Intel's internal floating point registers are by default 80 bit
118 extended precision. This means that all operations done on values in
119 registers are done at 80 bits, and unless the intermediate values are
120 truncated to the appropriate size (32 or 64 bits) by storing in
121 memory, calculations in registers will give different results from
122 calculations which pass intermediate values in memory (eg. via
123 function calls).
124
125 One solution is to set the FPU into 64 bit precision mode. Some OSs
126 do this (eg. FreeBSD) and some don't (eg. Linux). The problem here is
127 that this will only affect 64-bit precision arithmetic; 32-bit
128 calculations will still be done at 64-bit precision in registers. So
129 it doesn't solve the whole problem.
130
131 There's also the issue of what the C library is expecting in terms of
132 precision. It seems to be the case that glibc on Linux expects the
133 FPU to be set to 80 bit precision, so setting it to 64 bit could have
134 unexpected effects. Changing the default could have undesirable
135 effects on other 3rd-party library code too, so the right thing would
136 be to save/restore the FPU control word across Haskell code if we were
137 to do this.
138
139 gcc's -ffloat-store gives consistent results by always storing the
140 results of floating-point calculations in memory, which works for both
141 32 and 64-bit precision. However, it only affects the values of
142 user-declared floating point variables in C, not intermediate results.
143 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
144 flag).
145
146 Another problem is how to spill floating point registers in the
147 register allocator. Should we spill the whole 80 bits, or just 64?
148 On an OS which is set to 64 bit precision, spilling 64 is fine. On
149 Linux, spilling 64 bits will round the results of some operations.
150 This is what gcc does. Spilling at 80 bits requires taking up a full
151 128 bit slot (so we get alignment). We spill at 80-bits and ignore
152 the alignment problems.
153
154 In the future [edit: now available in GHC 7.0.1, with the -msse2
155 flag], we'll use the SSE registers for floating point. This requires
156 a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
157 float ops), which means P4 or Xeon and above. Using SSE will solve
158 all these problems, because the SSE registers use fixed 32 bit or 64
159 bit precision.
160
161 --SDM 1/2003
162 -}
163
164 data Instr
165 -- comment pseudo-op
166 = COMMENT FastString
167
168 -- location pseudo-op (file, line, col, name)
169 | LOCATION Int Int Int String
170
171 -- some static data spat out during code
172 -- generation. Will be extracted before
173 -- pretty-printing.
174 | LDATA Section (Alignment, CmmStatics)
175
176 -- start a new basic block. Useful during
177 -- codegen, removed later. Preceding
178 -- instruction should be a jump, as per the
179 -- invariants for a BasicBlock (see Cmm).
180 | NEWBLOCK BlockId
181
182 -- specify current stack offset for
183 -- benefit of subsequent passes
184 | DELTA Int
185
186 -- Moves.
187 | MOV Format Operand Operand
188 | CMOV Cond Format Operand Reg
189 | MOVZxL Format Operand Operand -- format is the size of operand 1
190 | MOVSxL Format Operand Operand -- format is the size of operand 1
191 -- x86_64 note: plain mov into a 32-bit register always zero-extends
192 -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
193 -- don't affect the high bits of the register.
194
195 -- Load effective address (also a very useful three-operand add instruction :-)
196 | LEA Format Operand Operand
197
198 -- Int Arithmetic.
199 | ADD Format Operand Operand
200 | ADC Format Operand Operand
201 | SUB Format Operand Operand
202 | SBB Format Operand Operand
203
204 | MUL Format Operand Operand
205 | MUL2 Format Operand -- %edx:%eax = operand * %rax
206 | IMUL Format Operand Operand -- signed int mul
207 | IMUL2 Format Operand -- %edx:%eax = operand * %eax
208
209 | DIV Format Operand -- eax := eax:edx/op, edx := eax:edx%op
210 | IDIV Format Operand -- ditto, but signed
211
212 -- Int Arithmetic, where the effects on the condition register
213 -- are important. Used in specialized sequences such as MO_Add2.
214 -- Do not rewrite these instructions to "equivalent" ones that
215 -- have different effect on the condition register! (See #9013.)
216 | ADD_CC Format Operand Operand
217 | SUB_CC Format Operand Operand
218
219 -- Simple bit-twiddling.
220 | AND Format Operand Operand
221 | OR Format Operand Operand
222 | XOR Format Operand Operand
223 | NOT Format Operand
224 | NEGI Format Operand -- NEG instruction (name clash with Cond)
225 | BSWAP Format Reg
226
227 -- Shifts (amount may be immediate or %cl only)
228 | SHL Format Operand{-amount-} Operand
229 | SAR Format Operand{-amount-} Operand
230 | SHR Format Operand{-amount-} Operand
231
232 | BT Format Imm Operand
233 | NOP
234
235 -- x86 Float Arithmetic.
236 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
237 -- as single instructions right up until we spit them out.
238 -- all the 3-operand fake fp insns are src1 src2 dst
239 -- and furthermore are constrained to be fp regs only.
240 -- IMPORTANT: keep is_G_insn up to date with any changes here
241 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
242 | GLD Format AddrMode Reg -- src, dst(fpreg)
243 | GST Format Reg AddrMode -- src(fpreg), dst
244
245 | GLDZ Reg -- dst(fpreg)
246 | GLD1 Reg -- dst(fpreg)
247
248 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
249 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
250
251 | GITOF Reg Reg -- src(intreg), dst(fpreg)
252 | GITOD Reg Reg -- src(intreg), dst(fpreg)
253
254 | GDTOF Reg Reg -- src(fpreg), dst(fpreg)
255
256 | GADD Format Reg Reg Reg -- src1, src2, dst
257 | GDIV Format Reg Reg Reg -- src1, src2, dst
258 | GSUB Format Reg Reg Reg -- src1, src2, dst
259 | GMUL Format Reg Reg Reg -- src1, src2, dst
260
261 -- FP compare. Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
262 -- Compare src1 with src2; set the Zero flag iff the numbers are
263 -- comparable and the comparison is True. Subsequent code must
264 -- test the %eflags zero flag regardless of the supplied Cond.
265 | GCMP Cond Reg Reg -- src1, src2
266
267 | GABS Format Reg Reg -- src, dst
268 | GNEG Format Reg Reg -- src, dst
269 | GSQRT Format Reg Reg -- src, dst
270 | GSIN Format CLabel CLabel Reg Reg -- src, dst
271 | GCOS Format CLabel CLabel Reg Reg -- src, dst
272 | GTAN Format CLabel CLabel Reg Reg -- src, dst
273
274 | GFREE -- do ffree on all x86 regs; an ugly hack
275
276
277 -- SSE2 floating point: we use a restricted set of the available SSE2
278 -- instructions for floating-point.
279 -- use MOV for moving (either movss or movsd (movlpd better?))
280 | CVTSS2SD Reg Reg -- F32 to F64
281 | CVTSD2SS Reg Reg -- F64 to F32
282 | CVTTSS2SIQ Format Operand Reg -- F32 to I32/I64 (with truncation)
283 | CVTTSD2SIQ Format Operand Reg -- F64 to I32/I64 (with truncation)
284 | CVTSI2SS Format Operand Reg -- I32/I64 to F32
285 | CVTSI2SD Format Operand Reg -- I32/I64 to F64
286
287 -- use ADD & SUB for arithmetic. In both cases, operands
288 -- are Operand Reg.
289
290 -- SSE2 floating-point division:
291 | FDIV Format Operand Operand -- divisor, dividend(dst)
292
293 -- use CMP for comparisons. ucomiss and ucomisd instructions
294 -- compare single/double prec floating point respectively.
295
296 | SQRT Format Operand Reg -- src, dst
297
298
299 -- Comparison
300 | TEST Format Operand Operand
301 | CMP Format Operand Operand
302 | SETCC Cond Operand
303
304 -- Stack Operations.
305 | PUSH Format Operand
306 | POP Format Operand
307 -- both unused (SDM):
308 -- | PUSHA
309 -- | POPA
310
311 -- Jumping around.
312 | JMP Operand [Reg] -- including live Regs at the call
313 | JXX Cond BlockId -- includes unconditional branches
314 | JXX_GBL Cond Imm -- non-local version of JXX
315 -- Table jump
316 | JMP_TBL Operand -- Address to jump to
317 [Maybe BlockId] -- Blocks in the jump table
318 Section -- Data section jump table should be put in
319 CLabel -- Label of jump table
320 | CALL (Either Imm Reg) [Reg]
321
322 -- Other things.
323 | CLTD Format -- sign extend %eax into %edx:%eax
324
325 | FETCHGOT Reg -- pseudo-insn for ELF position-independent code
326 -- pretty-prints as
327 -- call 1f
328 -- 1: popl %reg
329 -- addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
330 | FETCHPC Reg -- pseudo-insn for Darwin position-independent code
331 -- pretty-prints as
332 -- call 1f
333 -- 1: popl %reg
334
335 -- bit counting instructions
336 | POPCNT Format Operand Reg -- [SSE4.2] count number of bits set to 1
337 | BSF Format Operand Reg -- bit scan forward
338 | BSR Format Operand Reg -- bit scan reverse
339
340 -- prefetch
341 | PREFETCH PrefetchVariant Format Operand -- prefetch Variant, addr size, address to prefetch
342 -- variant can be NTA, Lvl0, Lvl1, or Lvl2
343
344 | LOCK Instr -- lock prefix
345 | XADD Format Operand Operand -- src (r), dst (r/m)
346 | CMPXCHG Format Operand Operand -- src (r), dst (r/m), eax implicit
347 | MFENCE
348
349 data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2
350
351
352 data Operand
353 = OpReg Reg -- register
354 | OpImm Imm -- immediate value
355 | OpAddr AddrMode -- memory reference
356
357
358
359 -- | Returns which registers are read and written as a (read, written)
360 -- pair.
361 x86_regUsageOfInstr :: Platform -> Instr -> RegUsage
362 x86_regUsageOfInstr platform instr
363 = case instr of
364 MOV _ src dst -> usageRW src dst
365 CMOV _ _ src dst -> mkRU (use_R src [dst]) [dst]
366 MOVZxL _ src dst -> usageRW src dst
367 MOVSxL _ src dst -> usageRW src dst
368 LEA _ src dst -> usageRW src dst
369 ADD _ src dst -> usageRM src dst
370 ADC _ src dst -> usageRM src dst
371 SUB _ src dst -> usageRM src dst
372 SBB _ src dst -> usageRM src dst
373 IMUL _ src dst -> usageRM src dst
374 IMUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
375 MUL _ src dst -> usageRM src dst
376 MUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
377 DIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
378 IDIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
379 ADD_CC _ src dst -> usageRM src dst
380 SUB_CC _ src dst -> usageRM src dst
381 AND _ src dst -> usageRM src dst
382 OR _ src dst -> usageRM src dst
383
384 XOR _ (OpReg src) (OpReg dst)
385 | src == dst -> mkRU [] [dst]
386
387 XOR _ src dst -> usageRM src dst
388 NOT _ op -> usageM op
389 BSWAP _ reg -> mkRU [reg] [reg]
390 NEGI _ op -> usageM op
391 SHL _ imm dst -> usageRM imm dst
392 SAR _ imm dst -> usageRM imm dst
393 SHR _ imm dst -> usageRM imm dst
394 BT _ _ src -> mkRUR (use_R src [])
395
396 PUSH _ op -> mkRUR (use_R op [])
397 POP _ op -> mkRU [] (def_W op)
398 TEST _ src dst -> mkRUR (use_R src $! use_R dst [])
399 CMP _ src dst -> mkRUR (use_R src $! use_R dst [])
400 SETCC _ op -> mkRU [] (def_W op)
401 JXX _ _ -> mkRU [] []
402 JXX_GBL _ _ -> mkRU [] []
403 JMP op regs -> mkRUR (use_R op regs)
404 JMP_TBL op _ _ _ -> mkRUR (use_R op [])
405 CALL (Left _) params -> mkRU params (callClobberedRegs platform)
406 CALL (Right reg) params -> mkRU (reg:params) (callClobberedRegs platform)
407 CLTD _ -> mkRU [eax] [edx]
408 NOP -> mkRU [] []
409
410 GMOV src dst -> mkRU [src] [dst]
411 GLD _ src dst -> mkRU (use_EA src []) [dst]
412 GST _ src dst -> mkRUR (src : use_EA dst [])
413
414 GLDZ dst -> mkRU [] [dst]
415 GLD1 dst -> mkRU [] [dst]
416
417 GFTOI src dst -> mkRU [src] [dst]
418 GDTOI src dst -> mkRU [src] [dst]
419
420 GITOF src dst -> mkRU [src] [dst]
421 GITOD src dst -> mkRU [src] [dst]
422
423 GDTOF src dst -> mkRU [src] [dst]
424
425 GADD _ s1 s2 dst -> mkRU [s1,s2] [dst]
426 GSUB _ s1 s2 dst -> mkRU [s1,s2] [dst]
427 GMUL _ s1 s2 dst -> mkRU [s1,s2] [dst]
428 GDIV _ s1 s2 dst -> mkRU [s1,s2] [dst]
429
430 GCMP _ src1 src2 -> mkRUR [src1,src2]
431 GABS _ src dst -> mkRU [src] [dst]
432 GNEG _ src dst -> mkRU [src] [dst]
433 GSQRT _ src dst -> mkRU [src] [dst]
434 GSIN _ _ _ src dst -> mkRU [src] [dst]
435 GCOS _ _ _ src dst -> mkRU [src] [dst]
436 GTAN _ _ _ src dst -> mkRU [src] [dst]
437
438 CVTSS2SD src dst -> mkRU [src] [dst]
439 CVTSD2SS src dst -> mkRU [src] [dst]
440 CVTTSS2SIQ _ src dst -> mkRU (use_R src []) [dst]
441 CVTTSD2SIQ _ src dst -> mkRU (use_R src []) [dst]
442 CVTSI2SS _ src dst -> mkRU (use_R src []) [dst]
443 CVTSI2SD _ src dst -> mkRU (use_R src []) [dst]
444 FDIV _ src dst -> usageRM src dst
445
446 FETCHGOT reg -> mkRU [] [reg]
447 FETCHPC reg -> mkRU [] [reg]
448
449 COMMENT _ -> noUsage
450 LOCATION{} -> noUsage
451 DELTA _ -> noUsage
452
453 POPCNT _ src dst -> mkRU (use_R src []) [dst]
454 BSF _ src dst -> mkRU (use_R src []) [dst]
455 BSR _ src dst -> mkRU (use_R src []) [dst]
456
457 -- note: might be a better way to do this
458 PREFETCH _ _ src -> mkRU (use_R src []) []
459 LOCK i -> x86_regUsageOfInstr platform i
460 XADD _ src dst -> usageMM src dst
461 CMPXCHG _ src dst -> usageRMM src dst (OpReg eax)
462 MFENCE -> noUsage
463
464 _other -> panic "regUsage: unrecognised instr"
465 where
466 -- # Definitions
467 --
468 -- Written: If the operand is a register, it's written. If it's an
469 -- address, registers mentioned in the address are read.
470 --
471 -- Modified: If the operand is a register, it's both read and
472 -- written. If it's an address, registers mentioned in the address
473 -- are read.
474
475 -- 2 operand form; first operand Read; second Written
476 usageRW :: Operand -> Operand -> RegUsage
477 usageRW op (OpReg reg) = mkRU (use_R op []) [reg]
478 usageRW op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
479 usageRW _ _ = panic "X86.RegInfo.usageRW: no match"
480
481 -- 2 operand form; first operand Read; second Modified
482 usageRM :: Operand -> Operand -> RegUsage
483 usageRM op (OpReg reg) = mkRU (use_R op [reg]) [reg]
484 usageRM op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
485 usageRM _ _ = panic "X86.RegInfo.usageRM: no match"
486
487 -- 2 operand form; first operand Modified; second Modified
488 usageMM :: Operand -> Operand -> RegUsage
489 usageMM (OpReg src) (OpReg dst) = mkRU [src, dst] [src, dst]
490 usageMM (OpReg src) (OpAddr ea) = mkRU (use_EA ea [src]) [src]
491 usageMM _ _ = panic "X86.RegInfo.usageMM: no match"
492
493 -- 3 operand form; first operand Read; second Modified; third Modified
494 usageRMM :: Operand -> Operand -> Operand -> RegUsage
495 usageRMM (OpReg src) (OpReg dst) (OpReg reg) = mkRU [src, dst, reg] [dst, reg]
496 usageRMM (OpReg src) (OpAddr ea) (OpReg reg) = mkRU (use_EA ea [src, reg]) [reg]
497 usageRMM _ _ _ = panic "X86.RegInfo.usageRMM: no match"
498
499 -- 1 operand form; operand Modified
500 usageM :: Operand -> RegUsage
501 usageM (OpReg reg) = mkRU [reg] [reg]
502 usageM (OpAddr ea) = mkRUR (use_EA ea [])
503 usageM _ = panic "X86.RegInfo.usageM: no match"
504
505 -- Registers defd when an operand is written.
506 def_W (OpReg reg) = [reg]
507 def_W (OpAddr _ ) = []
508 def_W _ = panic "X86.RegInfo.def_W: no match"
509
510 -- Registers used when an operand is read.
511 use_R (OpReg reg) tl = reg : tl
512 use_R (OpImm _) tl = tl
513 use_R (OpAddr ea) tl = use_EA ea tl
514
515 -- Registers used to compute an effective address.
516 use_EA (ImmAddr _ _) tl = tl
517 use_EA (AddrBaseIndex base index _) tl =
518 use_base base $! use_index index tl
519 where use_base (EABaseReg r) tl = r : tl
520 use_base _ tl = tl
521 use_index EAIndexNone tl = tl
522 use_index (EAIndex i _) tl = i : tl
523
524 mkRUR src = src' `seq` RU src' []
525 where src' = filter (interesting platform) src
526
527 mkRU src dst = src' `seq` dst' `seq` RU src' dst'
528 where src' = filter (interesting platform) src
529 dst' = filter (interesting platform) dst
530
531 -- | Is this register interesting for the register allocator?
532 interesting :: Platform -> Reg -> Bool
533 interesting _ (RegVirtual _) = True
534 interesting platform (RegReal (RealRegSingle i)) = isFastTrue (freeReg platform i)
535 interesting _ (RegReal (RealRegPair{})) = panic "X86.interesting: no reg pairs on this arch"
536
537
538
539 -- | Applies the supplied function to all registers in instructions.
540 -- Typically used to change virtual registers to real registers.
541 x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
542 x86_patchRegsOfInstr instr env
543 = case instr of
544 MOV fmt src dst -> patch2 (MOV fmt) src dst
545 CMOV cc fmt src dst -> CMOV cc fmt (patchOp src) (env dst)
546 MOVZxL fmt src dst -> patch2 (MOVZxL fmt) src dst
547 MOVSxL fmt src dst -> patch2 (MOVSxL fmt) src dst
548 LEA fmt src dst -> patch2 (LEA fmt) src dst
549 ADD fmt src dst -> patch2 (ADD fmt) src dst
550 ADC fmt src dst -> patch2 (ADC fmt) src dst
551 SUB fmt src dst -> patch2 (SUB fmt) src dst
552 SBB fmt src dst -> patch2 (SBB fmt) src dst
553 IMUL fmt src dst -> patch2 (IMUL fmt) src dst
554 IMUL2 fmt src -> patch1 (IMUL2 fmt) src
555 MUL fmt src dst -> patch2 (MUL fmt) src dst
556 MUL2 fmt src -> patch1 (MUL2 fmt) src
557 IDIV fmt op -> patch1 (IDIV fmt) op
558 DIV fmt op -> patch1 (DIV fmt) op
559 ADD_CC fmt src dst -> patch2 (ADD_CC fmt) src dst
560 SUB_CC fmt src dst -> patch2 (SUB_CC fmt) src dst
561 AND fmt src dst -> patch2 (AND fmt) src dst
562 OR fmt src dst -> patch2 (OR fmt) src dst
563 XOR fmt src dst -> patch2 (XOR fmt) src dst
564 NOT fmt op -> patch1 (NOT fmt) op
565 BSWAP fmt reg -> BSWAP fmt (env reg)
566 NEGI fmt op -> patch1 (NEGI fmt) op
567 SHL fmt imm dst -> patch1 (SHL fmt imm) dst
568 SAR fmt imm dst -> patch1 (SAR fmt imm) dst
569 SHR fmt imm dst -> patch1 (SHR fmt imm) dst
570 BT fmt imm src -> patch1 (BT fmt imm) src
571 TEST fmt src dst -> patch2 (TEST fmt) src dst
572 CMP fmt src dst -> patch2 (CMP fmt) src dst
573 PUSH fmt op -> patch1 (PUSH fmt) op
574 POP fmt op -> patch1 (POP fmt) op
575 SETCC cond op -> patch1 (SETCC cond) op
576 JMP op regs -> JMP (patchOp op) regs
577 JMP_TBL op ids s lbl -> JMP_TBL (patchOp op) ids s lbl
578
579 GMOV src dst -> GMOV (env src) (env dst)
580 GLD fmt src dst -> GLD fmt (lookupAddr src) (env dst)
581 GST fmt src dst -> GST fmt (env src) (lookupAddr dst)
582
583 GLDZ dst -> GLDZ (env dst)
584 GLD1 dst -> GLD1 (env dst)
585
586 GFTOI src dst -> GFTOI (env src) (env dst)
587 GDTOI src dst -> GDTOI (env src) (env dst)
588
589 GITOF src dst -> GITOF (env src) (env dst)
590 GITOD src dst -> GITOD (env src) (env dst)
591
592 GDTOF src dst -> GDTOF (env src) (env dst)
593
594 GADD fmt s1 s2 dst -> GADD fmt (env s1) (env s2) (env dst)
595 GSUB fmt s1 s2 dst -> GSUB fmt (env s1) (env s2) (env dst)
596 GMUL fmt s1 s2 dst -> GMUL fmt (env s1) (env s2) (env dst)
597 GDIV fmt s1 s2 dst -> GDIV fmt (env s1) (env s2) (env dst)
598
599 GCMP fmt src1 src2 -> GCMP fmt (env src1) (env src2)
600 GABS fmt src dst -> GABS fmt (env src) (env dst)
601 GNEG fmt src dst -> GNEG fmt (env src) (env dst)
602 GSQRT fmt src dst -> GSQRT fmt (env src) (env dst)
603 GSIN fmt l1 l2 src dst -> GSIN fmt l1 l2 (env src) (env dst)
604 GCOS fmt l1 l2 src dst -> GCOS fmt l1 l2 (env src) (env dst)
605 GTAN fmt l1 l2 src dst -> GTAN fmt l1 l2 (env src) (env dst)
606
607 CVTSS2SD src dst -> CVTSS2SD (env src) (env dst)
608 CVTSD2SS src dst -> CVTSD2SS (env src) (env dst)
609 CVTTSS2SIQ fmt src dst -> CVTTSS2SIQ fmt (patchOp src) (env dst)
610 CVTTSD2SIQ fmt src dst -> CVTTSD2SIQ fmt (patchOp src) (env dst)
611 CVTSI2SS fmt src dst -> CVTSI2SS fmt (patchOp src) (env dst)
612 CVTSI2SD fmt src dst -> CVTSI2SD fmt (patchOp src) (env dst)
613 FDIV fmt src dst -> FDIV fmt (patchOp src) (patchOp dst)
614
615 CALL (Left _) _ -> instr
616 CALL (Right reg) p -> CALL (Right (env reg)) p
617
618 FETCHGOT reg -> FETCHGOT (env reg)
619 FETCHPC reg -> FETCHPC (env reg)
620
621 NOP -> instr
622 COMMENT _ -> instr
623 LOCATION {} -> instr
624 DELTA _ -> instr
625
626 JXX _ _ -> instr
627 JXX_GBL _ _ -> instr
628 CLTD _ -> instr
629
630 POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst)
631 BSF fmt src dst -> BSF fmt (patchOp src) (env dst)
632 BSR fmt src dst -> BSR fmt (patchOp src) (env dst)
633
634 PREFETCH lvl format src -> PREFETCH lvl format (patchOp src)
635
636 LOCK i -> LOCK (x86_patchRegsOfInstr i env)
637 XADD fmt src dst -> patch2 (XADD fmt) src dst
638 CMPXCHG fmt src dst -> patch2 (CMPXCHG fmt) src dst
639 MFENCE -> instr
640
641 _other -> panic "patchRegs: unrecognised instr"
642
643 where
644 patch1 :: (Operand -> a) -> Operand -> a
645 patch1 insn op = insn $! patchOp op
646 patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
647 patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
648
649 patchOp (OpReg reg) = OpReg $! env reg
650 patchOp (OpImm imm) = OpImm imm
651 patchOp (OpAddr ea) = OpAddr $! lookupAddr ea
652
653 lookupAddr (ImmAddr imm off) = ImmAddr imm off
654 lookupAddr (AddrBaseIndex base index disp)
655 = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
656 where
657 lookupBase EABaseNone = EABaseNone
658 lookupBase EABaseRip = EABaseRip
659 lookupBase (EABaseReg r) = EABaseReg $! env r
660
661 lookupIndex EAIndexNone = EAIndexNone
662 lookupIndex (EAIndex r i) = (EAIndex $! env r) i
663
664
665 --------------------------------------------------------------------------------
666 x86_isJumpishInstr
667 :: Instr -> Bool
668
669 x86_isJumpishInstr instr
670 = case instr of
671 JMP{} -> True
672 JXX{} -> True
673 JXX_GBL{} -> True
674 JMP_TBL{} -> True
675 CALL{} -> True
676 _ -> False
677
678
679 x86_jumpDestsOfInstr
680 :: Instr
681 -> [BlockId]
682
683 x86_jumpDestsOfInstr insn
684 = case insn of
685 JXX _ id -> [id]
686 JMP_TBL _ ids _ _ -> [id | Just id <- ids]
687 _ -> []
688
689
690 x86_patchJumpInstr
691 :: Instr -> (BlockId -> BlockId) -> Instr
692
693 x86_patchJumpInstr insn patchF
694 = case insn of
695 JXX cc id -> JXX cc (patchF id)
696 JMP_TBL op ids section lbl
697 -> JMP_TBL op (map (fmap patchF) ids) section lbl
698 _ -> insn
699
700
701
702
703 -- -----------------------------------------------------------------------------
704 -- | Make a spill instruction.
705 x86_mkSpillInstr
706 :: DynFlags
707 -> Reg -- register to spill
708 -> Int -- current stack delta
709 -> Int -- spill slot to use
710 -> Instr
711
712 x86_mkSpillInstr dflags reg delta slot
713 = let off = spillSlotToOffset platform slot - delta
714 in
715 case targetClassOfReg platform reg of
716 RcInteger -> MOV (archWordFormat is32Bit)
717 (OpReg reg) (OpAddr (spRel dflags off))
718 RcDouble -> GST FF80 reg (spRel dflags off) {- RcFloat/RcDouble -}
719 RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off))
720 _ -> panic "X86.mkSpillInstr: no match"
721 where platform = targetPlatform dflags
722 is32Bit = target32Bit platform
723
724 -- | Make a spill reload instruction.
725 x86_mkLoadInstr
726 :: DynFlags
727 -> Reg -- register to load
728 -> Int -- current stack delta
729 -> Int -- spill slot to use
730 -> Instr
731
732 x86_mkLoadInstr dflags reg delta slot
733 = let off = spillSlotToOffset platform slot - delta
734 in
735 case targetClassOfReg platform reg of
736 RcInteger -> MOV (archWordFormat is32Bit)
737 (OpAddr (spRel dflags off)) (OpReg reg)
738 RcDouble -> GLD FF80 (spRel dflags off) reg {- RcFloat/RcDouble -}
739 RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg)
740 _ -> panic "X86.x86_mkLoadInstr"
741 where platform = targetPlatform dflags
742 is32Bit = target32Bit platform
743
744 spillSlotSize :: Platform -> Int
745 spillSlotSize dflags = if is32Bit then 12 else 8
746 where is32Bit = target32Bit dflags
747
748 maxSpillSlots :: DynFlags -> Int
749 maxSpillSlots dflags
750 = ((rESERVED_C_STACK_BYTES dflags - 64) `div` spillSlotSize (targetPlatform dflags)) - 1
751 -- = 0 -- useful for testing allocMoreStack
752
753 -- number of bytes that the stack pointer should be aligned to
754 stackAlign :: Int
755 stackAlign = 16
756
757 -- convert a spill slot number to a *byte* offset, with no sign:
758 -- decide on a per arch basis whether you are spilling above or below
759 -- the C stack pointer.
760 spillSlotToOffset :: Platform -> Int -> Int
761 spillSlotToOffset platform slot
762 = 64 + spillSlotSize platform * slot
763
764 --------------------------------------------------------------------------------
765
766 -- | See if this instruction is telling us the current C stack delta
767 x86_takeDeltaInstr
768 :: Instr
769 -> Maybe Int
770
771 x86_takeDeltaInstr instr
772 = case instr of
773 DELTA i -> Just i
774 _ -> Nothing
775
776
777 x86_isMetaInstr
778 :: Instr
779 -> Bool
780
781 x86_isMetaInstr instr
782 = case instr of
783 COMMENT{} -> True
784 LOCATION{} -> True
785 LDATA{} -> True
786 NEWBLOCK{} -> True
787 DELTA{} -> True
788 _ -> False
789
790
791
792 -- | Make a reg-reg move instruction.
793 -- On SPARC v8 there are no instructions to move directly between
794 -- floating point and integer regs. If we need to do that then we
795 -- have to go via memory.
796 --
797 x86_mkRegRegMoveInstr
798 :: Platform
799 -> Reg
800 -> Reg
801 -> Instr
802
803 x86_mkRegRegMoveInstr platform src dst
804 = case targetClassOfReg platform src of
805 RcInteger -> case platformArch platform of
806 ArchX86 -> MOV II32 (OpReg src) (OpReg dst)
807 ArchX86_64 -> MOV II64 (OpReg src) (OpReg dst)
808 _ -> panic "x86_mkRegRegMoveInstr: Bad arch"
809 RcDouble -> GMOV src dst
810 RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst)
811 _ -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
812
813 -- | Check whether an instruction represents a reg-reg move.
814 -- The register allocator attempts to eliminate reg->reg moves whenever it can,
815 -- by assigning the src and dest temporaries to the same real register.
816 --
817 x86_takeRegRegMoveInstr
818 :: Instr
819 -> Maybe (Reg,Reg)
820
821 x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
822 = Just (r1,r2)
823
824 x86_takeRegRegMoveInstr _ = Nothing
825
826
827 -- | Make an unconditional branch instruction.
828 x86_mkJumpInstr
829 :: BlockId
830 -> [Instr]
831
832 x86_mkJumpInstr id
833 = [JXX ALWAYS id]
834
835
836 x86_mkStackAllocInstr
837 :: Platform
838 -> Int
839 -> Instr
840 x86_mkStackAllocInstr platform amount
841 = case platformArch platform of
842 ArchX86 -> SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
843 ArchX86_64 -> SUB II64 (OpImm (ImmInt amount)) (OpReg rsp)
844 _ -> panic "x86_mkStackAllocInstr"
845
846 x86_mkStackDeallocInstr
847 :: Platform
848 -> Int
849 -> Instr
850 x86_mkStackDeallocInstr platform amount
851 = case platformArch platform of
852 ArchX86 -> ADD II32 (OpImm (ImmInt amount)) (OpReg esp)
853 ArchX86_64 -> ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)
854 _ -> panic "x86_mkStackDeallocInstr"
855
856 i386_insert_ffrees
857 :: [GenBasicBlock Instr]
858 -> [GenBasicBlock Instr]
859
860 i386_insert_ffrees blocks
861 | any (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ]
862 = map insertGFREEs blocks
863 | otherwise
864 = blocks
865 where
866 insertGFREEs (BasicBlock id insns)
867 = BasicBlock id (insertBeforeNonlocalTransfers GFREE insns)
868
869 insertBeforeNonlocalTransfers :: Instr -> [Instr] -> [Instr]
870 insertBeforeNonlocalTransfers insert insns
871 = foldr p [] insns
872 where p insn r = case insn of
873 CALL _ _ -> insert : insn : r
874 JMP _ _ -> insert : insn : r
875 JXX_GBL _ _ -> panic "insertBeforeNonlocalTransfers: cannot handle JXX_GBL"
876 _ -> insn : r
877
878
879 -- if you ever add a new FP insn to the fake x86 FP insn set,
880 -- you must update this too
881 is_G_instr :: Instr -> Bool
882 is_G_instr instr
883 = case instr of
884 GMOV{} -> True
885 GLD{} -> True
886 GST{} -> True
887 GLDZ{} -> True
888 GLD1{} -> True
889 GFTOI{} -> True
890 GDTOI{} -> True
891 GITOF{} -> True
892 GITOD{} -> True
893 GDTOF{} -> True
894 GADD{} -> True
895 GDIV{} -> True
896 GSUB{} -> True
897 GMUL{} -> True
898 GCMP{} -> True
899 GABS{} -> True
900 GNEG{} -> True
901 GSQRT{} -> True
902 GSIN{} -> True
903 GCOS{} -> True
904 GTAN{} -> True
905 GFREE -> panic "is_G_instr: GFREE (!)"
906 _ -> False
907
908
909 --
910 -- Note [extra spill slots]
911 --
912 -- If the register allocator used more spill slots than we have
913 -- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more
914 -- C stack space on entry and exit from this proc. Therefore we
915 -- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp"
916 -- before every non-local jump.
917 --
918 -- This became necessary when the new codegen started bundling entire
919 -- functions together into one proc, because the register allocator
920 -- assigns a different stack slot to each virtual reg within a proc.
921 -- To avoid using so many slots we could also:
922 --
923 -- - split up the proc into connected components before code generator
924 --
925 -- - rename the virtual regs, so that we re-use vreg names and hence
926 -- stack slots for non-overlapping vregs.
927 --
928 -- Note that when a block is both a non-local entry point (with an
929 -- info table) and a local branch target, we have to split it into
930 -- two, like so:
931 --
932 -- <info table>
933 -- L:
934 -- <code>
935 --
936 -- becomes
937 --
938 -- <info table>
939 -- L:
940 -- subl $rsp, N
941 -- jmp Lnew
942 -- Lnew:
943 -- <code>
944 --
945 -- and all branches pointing to L are retargetted to point to Lnew.
946 -- Otherwise, we would repeat the $rsp adjustment for each branch to
947 -- L.
948 --
949 allocMoreStack
950 :: Platform
951 -> Int
952 -> NatCmmDecl statics X86.Instr.Instr
953 -> UniqSM (NatCmmDecl statics X86.Instr.Instr)
954
955 allocMoreStack _ _ top@(CmmData _ _) = return top
956 allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
957 let entries = entryBlocks proc
958
959 uniqs <- replicateM (length entries) getUniqueM
960
961 let
962 delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
963 where x = slots * spillSlotSize platform -- sp delta
964
965 alloc = mkStackAllocInstr platform delta
966 dealloc = mkStackDeallocInstr platform delta
967
968 new_blockmap :: BlockEnv BlockId
969 new_blockmap = mapFromList (zip entries (map mkBlockId uniqs))
970
971 insert_stack_insns (BasicBlock id insns)
972 | Just new_blockid <- mapLookup id new_blockmap
973 = [ BasicBlock id [alloc, JXX ALWAYS new_blockid]
974 , BasicBlock new_blockid block' ]
975 | otherwise
976 = [ BasicBlock id block' ]
977 where
978 block' = foldr insert_dealloc [] insns
979
980 insert_dealloc insn r = case insn of
981 JMP _ _ -> dealloc : insn : r
982 JXX_GBL _ _ -> panic "insert_dealloc: cannot handle JXX_GBL"
983 _other -> x86_patchJumpInstr insn retarget : r
984 where retarget b = fromMaybe b (mapLookup b new_blockmap)
985
986 new_code = concatMap insert_stack_insns code
987 -- in
988 return (CmmProc info lbl live (ListGraph new_code))
989
990
991 data JumpDest = DestBlockId BlockId | DestImm Imm
992
993 getJumpDestBlockId :: JumpDest -> Maybe BlockId
994 getJumpDestBlockId (DestBlockId bid) = Just bid
995 getJumpDestBlockId _ = Nothing
996
997 canShortcut :: Instr -> Maybe JumpDest
998 canShortcut (JXX ALWAYS id) = Just (DestBlockId id)
999 canShortcut (JMP (OpImm imm) _) = Just (DestImm imm)
1000 canShortcut _ = Nothing
1001
1002
1003 -- This helper shortcuts a sequence of branches.
1004 -- The blockset helps avoid following cycles.
1005 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
1006 shortcutJump fn insn = shortcutJump' fn (setEmpty :: BlockSet) insn
1007 where shortcutJump' fn seen insn@(JXX cc id) =
1008 if setMember id seen then insn
1009 else case fn id of
1010 Nothing -> insn
1011 Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
1012 Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm)
1013 where seen' = setInsert id seen
1014 shortcutJump' _ _ other = other
1015
1016 -- Here because it knows about JumpDest
1017 shortcutStatics :: (BlockId -> Maybe JumpDest) -> (Alignment, CmmStatics) -> (Alignment, CmmStatics)
1018 shortcutStatics fn (align, Statics lbl statics)
1019 = (align, Statics lbl $ map (shortcutStatic fn) statics)
1020 -- we need to get the jump tables, so apply the mapping to the entries
1021 -- of a CmmData too.
1022
1023 shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
1024 shortcutLabel fn lab
1025 | Just uq <- maybeAsmTemp lab = shortBlockId fn emptyUniqSet (mkBlockId uq)
1026 | otherwise = lab
1027
1028 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
1029 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
1030 = CmmStaticLit (CmmLabel (shortcutLabel fn lab))
1031 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
1032 = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off)
1033 -- slightly dodgy, we're ignoring the second label, but this
1034 -- works with the way we use CmmLabelDiffOff for jump tables now.
1035 shortcutStatic _ other_static
1036 = other_static
1037
1038 shortBlockId
1039 :: (BlockId -> Maybe JumpDest)
1040 -> UniqSet Unique
1041 -> BlockId
1042 -> CLabel
1043
1044 shortBlockId fn seen blockid =
1045 case (elementOfUniqSet uq seen, fn blockid) of
1046 (True, _) -> mkAsmTempLabel uq
1047 (_, Nothing) -> mkAsmTempLabel uq
1048 (_, Just (DestBlockId blockid')) -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
1049 (_, Just (DestImm (ImmCLbl lbl))) -> lbl
1050 (_, _other) -> panic "shortBlockId"
1051 where uq = getUnique blockid