Delete FastBool
[ghc.git] / compiler / nativeGen / X86 / Instr.hs
1 {-# LANGUAGE CPP, TypeFamilies #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Machine-dependent assembly language
6 --
7 -- (c) The University of Glasgow 1993-2004
8 --
9 -----------------------------------------------------------------------------
10
11 module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest,
12 getJumpDestBlockId, canShortcut, shortcutStatics,
13 shortcutJump, i386_insert_ffrees, allocMoreStack,
14 maxSpillSlots, archWordFormat)
15 where
16
17 #include "HsVersions.h"
18 #include "nativeGen/NCG.h"
19
20 import X86.Cond
21 import X86.Regs
22 import Instruction
23 import Format
24 import RegClass
25 import Reg
26 import TargetReg
27
28 import BlockId
29 import CodeGen.Platform
30 import Cmm
31 import FastString
32 import Outputable
33 import Platform
34
35 import BasicTypes (Alignment)
36 import CLabel
37 import DynFlags
38 import UniqSet
39 import Unique
40 import UniqSupply
41
42 import Control.Monad
43 import Data.Maybe (fromMaybe)
44
45 -- Format of an x86/x86_64 memory address, in bytes.
46 --
47 archWordFormat :: Bool -> Format
48 archWordFormat is32Bit
49 | is32Bit = II32
50 | otherwise = II64
51
52 -- | Instruction instance for x86 instruction set.
53 instance Instruction Instr where
54 regUsageOfInstr = x86_regUsageOfInstr
55 patchRegsOfInstr = x86_patchRegsOfInstr
56 isJumpishInstr = x86_isJumpishInstr
57 jumpDestsOfInstr = x86_jumpDestsOfInstr
58 patchJumpInstr = x86_patchJumpInstr
59 mkSpillInstr = x86_mkSpillInstr
60 mkLoadInstr = x86_mkLoadInstr
61 takeDeltaInstr = x86_takeDeltaInstr
62 isMetaInstr = x86_isMetaInstr
63 mkRegRegMoveInstr = x86_mkRegRegMoveInstr
64 takeRegRegMoveInstr = x86_takeRegRegMoveInstr
65 mkJumpInstr = x86_mkJumpInstr
66 mkStackAllocInstr = x86_mkStackAllocInstr
67 mkStackDeallocInstr = x86_mkStackDeallocInstr
68
69
70 -- -----------------------------------------------------------------------------
71 -- Intel x86 instructions
72
73 {-
74 Intel, in their infinite wisdom, selected a stack model for floating
75 point registers on x86. That might have made sense back in 1979 --
76 nowadays we can see it for the nonsense it really is. A stack model
77 fits poorly with the existing nativeGen infrastructure, which assumes
78 flat integer and FP register sets. Prior to this commit, nativeGen
79 could not generate correct x86 FP code -- to do so would have meant
80 somehow working the register-stack paradigm into the register
81 allocator and spiller, which sounds very difficult.
82
83 We have decided to cheat, and go for a simple fix which requires no
84 infrastructure modifications, at the expense of generating ropey but
85 correct FP code. All notions of the x86 FP stack and its insns have
86 been removed. Instead, we pretend (to the instruction selector and
87 register allocator) that x86 has six floating point registers, %fake0
88 .. %fake5, which can be used in the usual flat manner. We further
89 claim that x86 has floating point instructions very similar to SPARC
90 and Alpha, that is, a simple 3-operand register-register arrangement.
91 Code generation and register allocation proceed on this basis.
92
93 When we come to print out the final assembly, our convenient fiction
94 is converted to dismal reality. Each fake instruction is
95 independently converted to a series of real x86 instructions.
96 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
97 arithmetic operations, the two operands are pushed onto the top of the
98 FP stack, the operation done, and the result copied back into the
99 relevant register. There are only six %fake registers because 2 are
100 needed for the translation, and x86 has 8 in total.
101
102 The translation is inefficient but is simple and it works. A cleverer
103 translation would handle a sequence of insns, simulating the FP stack
104 contents, would not impose a fixed mapping from %fake to %st regs, and
105 hopefully could avoid most of the redundant reg-reg moves of the
106 current translation.
107
108 We might as well make use of whatever unique FP facilities Intel have
109 chosen to bless us with (let's not be churlish, after all).
110 Hence GLDZ and GLD1. Bwahahahahahahaha!
111 -}
112
113 {-
114 Note [x86 Floating point precision]
115
116 Intel's internal floating point registers are by default 80 bit
117 extended precision. This means that all operations done on values in
118 registers are done at 80 bits, and unless the intermediate values are
119 truncated to the appropriate size (32 or 64 bits) by storing in
120 memory, calculations in registers will give different results from
121 calculations which pass intermediate values in memory (eg. via
122 function calls).
123
124 One solution is to set the FPU into 64 bit precision mode. Some OSs
125 do this (eg. FreeBSD) and some don't (eg. Linux). The problem here is
126 that this will only affect 64-bit precision arithmetic; 32-bit
127 calculations will still be done at 64-bit precision in registers. So
128 it doesn't solve the whole problem.
129
130 There's also the issue of what the C library is expecting in terms of
131 precision. It seems to be the case that glibc on Linux expects the
132 FPU to be set to 80 bit precision, so setting it to 64 bit could have
133 unexpected effects. Changing the default could have undesirable
134 effects on other 3rd-party library code too, so the right thing would
135 be to save/restore the FPU control word across Haskell code if we were
136 to do this.
137
138 gcc's -ffloat-store gives consistent results by always storing the
139 results of floating-point calculations in memory, which works for both
140 32 and 64-bit precision. However, it only affects the values of
141 user-declared floating point variables in C, not intermediate results.
142 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
143 flag).
144
145 Another problem is how to spill floating point registers in the
146 register allocator. Should we spill the whole 80 bits, or just 64?
147 On an OS which is set to 64 bit precision, spilling 64 is fine. On
148 Linux, spilling 64 bits will round the results of some operations.
149 This is what gcc does. Spilling at 80 bits requires taking up a full
150 128 bit slot (so we get alignment). We spill at 80-bits and ignore
151 the alignment problems.
152
153 In the future [edit: now available in GHC 7.0.1, with the -msse2
154 flag], we'll use the SSE registers for floating point. This requires
155 a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
156 float ops), which means P4 or Xeon and above. Using SSE will solve
157 all these problems, because the SSE registers use fixed 32 bit or 64
158 bit precision.
159
160 --SDM 1/2003
161 -}
162
163 data Instr
164 -- comment pseudo-op
165 = COMMENT FastString
166
167 -- location pseudo-op (file, line, col, name)
168 | LOCATION Int Int Int String
169
170 -- some static data spat out during code
171 -- generation. Will be extracted before
172 -- pretty-printing.
173 | LDATA Section (Alignment, CmmStatics)
174
175 -- start a new basic block. Useful during
176 -- codegen, removed later. Preceding
177 -- instruction should be a jump, as per the
178 -- invariants for a BasicBlock (see Cmm).
179 | NEWBLOCK BlockId
180
181 -- specify current stack offset for
182 -- benefit of subsequent passes
183 | DELTA Int
184
185 -- Moves.
186 | MOV Format Operand Operand
187 | CMOV Cond Format Operand Reg
188 | MOVZxL Format Operand Operand -- format is the size of operand 1
189 | MOVSxL Format Operand Operand -- format is the size of operand 1
190 -- x86_64 note: plain mov into a 32-bit register always zero-extends
191 -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
192 -- don't affect the high bits of the register.
193
194 -- Load effective address (also a very useful three-operand add instruction :-)
195 | LEA Format Operand Operand
196
197 -- Int Arithmetic.
198 | ADD Format Operand Operand
199 | ADC Format Operand Operand
200 | SUB Format Operand Operand
201 | SBB Format Operand Operand
202
203 | MUL Format Operand Operand
204 | MUL2 Format Operand -- %edx:%eax = operand * %rax
205 | IMUL Format Operand Operand -- signed int mul
206 | IMUL2 Format Operand -- %edx:%eax = operand * %eax
207
208 | DIV Format Operand -- eax := eax:edx/op, edx := eax:edx%op
209 | IDIV Format Operand -- ditto, but signed
210
211 -- Int Arithmetic, where the effects on the condition register
212 -- are important. Used in specialized sequences such as MO_Add2.
213 -- Do not rewrite these instructions to "equivalent" ones that
214 -- have different effect on the condition register! (See #9013.)
215 | ADD_CC Format Operand Operand
216 | SUB_CC Format Operand Operand
217
218 -- Simple bit-twiddling.
219 | AND Format Operand Operand
220 | OR Format Operand Operand
221 | XOR Format Operand Operand
222 | NOT Format Operand
223 | NEGI Format Operand -- NEG instruction (name clash with Cond)
224 | BSWAP Format Reg
225
226 -- Shifts (amount may be immediate or %cl only)
227 | SHL Format Operand{-amount-} Operand
228 | SAR Format Operand{-amount-} Operand
229 | SHR Format Operand{-amount-} Operand
230
231 | BT Format Imm Operand
232 | NOP
233
234 -- x86 Float Arithmetic.
235 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
236 -- as single instructions right up until we spit them out.
237 -- all the 3-operand fake fp insns are src1 src2 dst
238 -- and furthermore are constrained to be fp regs only.
239 -- IMPORTANT: keep is_G_insn up to date with any changes here
240 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
241 | GLD Format AddrMode Reg -- src, dst(fpreg)
242 | GST Format Reg AddrMode -- src(fpreg), dst
243
244 | GLDZ Reg -- dst(fpreg)
245 | GLD1 Reg -- dst(fpreg)
246
247 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
248 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
249
250 | GITOF Reg Reg -- src(intreg), dst(fpreg)
251 | GITOD Reg Reg -- src(intreg), dst(fpreg)
252
253 | GDTOF Reg Reg -- src(fpreg), dst(fpreg)
254
255 | GADD Format Reg Reg Reg -- src1, src2, dst
256 | GDIV Format Reg Reg Reg -- src1, src2, dst
257 | GSUB Format Reg Reg Reg -- src1, src2, dst
258 | GMUL Format Reg Reg Reg -- src1, src2, dst
259
260 -- FP compare. Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
261 -- Compare src1 with src2; set the Zero flag iff the numbers are
262 -- comparable and the comparison is True. Subsequent code must
263 -- test the %eflags zero flag regardless of the supplied Cond.
264 | GCMP Cond Reg Reg -- src1, src2
265
266 | GABS Format Reg Reg -- src, dst
267 | GNEG Format Reg Reg -- src, dst
268 | GSQRT Format Reg Reg -- src, dst
269 | GSIN Format CLabel CLabel Reg Reg -- src, dst
270 | GCOS Format CLabel CLabel Reg Reg -- src, dst
271 | GTAN Format CLabel CLabel Reg Reg -- src, dst
272
273 | GFREE -- do ffree on all x86 regs; an ugly hack
274
275
276 -- SSE2 floating point: we use a restricted set of the available SSE2
277 -- instructions for floating-point.
278 -- use MOV for moving (either movss or movsd (movlpd better?))
279 | CVTSS2SD Reg Reg -- F32 to F64
280 | CVTSD2SS Reg Reg -- F64 to F32
281 | CVTTSS2SIQ Format Operand Reg -- F32 to I32/I64 (with truncation)
282 | CVTTSD2SIQ Format Operand Reg -- F64 to I32/I64 (with truncation)
283 | CVTSI2SS Format Operand Reg -- I32/I64 to F32
284 | CVTSI2SD Format Operand Reg -- I32/I64 to F64
285
286 -- use ADD & SUB for arithmetic. In both cases, operands
287 -- are Operand Reg.
288
289 -- SSE2 floating-point division:
290 | FDIV Format Operand Operand -- divisor, dividend(dst)
291
292 -- use CMP for comparisons. ucomiss and ucomisd instructions
293 -- compare single/double prec floating point respectively.
294
295 | SQRT Format Operand Reg -- src, dst
296
297
298 -- Comparison
299 | TEST Format Operand Operand
300 | CMP Format Operand Operand
301 | SETCC Cond Operand
302
303 -- Stack Operations.
304 | PUSH Format Operand
305 | POP Format Operand
306 -- both unused (SDM):
307 -- | PUSHA
308 -- | POPA
309
310 -- Jumping around.
311 | JMP Operand [Reg] -- including live Regs at the call
312 | JXX Cond BlockId -- includes unconditional branches
313 | JXX_GBL Cond Imm -- non-local version of JXX
314 -- Table jump
315 | JMP_TBL Operand -- Address to jump to
316 [Maybe BlockId] -- Blocks in the jump table
317 Section -- Data section jump table should be put in
318 CLabel -- Label of jump table
319 | CALL (Either Imm Reg) [Reg]
320
321 -- Other things.
322 | CLTD Format -- sign extend %eax into %edx:%eax
323
324 | FETCHGOT Reg -- pseudo-insn for ELF position-independent code
325 -- pretty-prints as
326 -- call 1f
327 -- 1: popl %reg
328 -- addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
329 | FETCHPC Reg -- pseudo-insn for Darwin position-independent code
330 -- pretty-prints as
331 -- call 1f
332 -- 1: popl %reg
333
334 -- bit counting instructions
335 | POPCNT Format Operand Reg -- [SSE4.2] count number of bits set to 1
336 | BSF Format Operand Reg -- bit scan forward
337 | BSR Format Operand Reg -- bit scan reverse
338
339 -- prefetch
340 | PREFETCH PrefetchVariant Format Operand -- prefetch Variant, addr size, address to prefetch
341 -- variant can be NTA, Lvl0, Lvl1, or Lvl2
342
343 | LOCK Instr -- lock prefix
344 | XADD Format Operand Operand -- src (r), dst (r/m)
345 | CMPXCHG Format Operand Operand -- src (r), dst (r/m), eax implicit
346 | MFENCE
347
348 data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2
349
350
351 data Operand
352 = OpReg Reg -- register
353 | OpImm Imm -- immediate value
354 | OpAddr AddrMode -- memory reference
355
356
357
358 -- | Returns which registers are read and written as a (read, written)
359 -- pair.
360 x86_regUsageOfInstr :: Platform -> Instr -> RegUsage
361 x86_regUsageOfInstr platform instr
362 = case instr of
363 MOV _ src dst -> usageRW src dst
364 CMOV _ _ src dst -> mkRU (use_R src [dst]) [dst]
365 MOVZxL _ src dst -> usageRW src dst
366 MOVSxL _ src dst -> usageRW src dst
367 LEA _ src dst -> usageRW src dst
368 ADD _ src dst -> usageRM src dst
369 ADC _ src dst -> usageRM src dst
370 SUB _ src dst -> usageRM src dst
371 SBB _ src dst -> usageRM src dst
372 IMUL _ src dst -> usageRM src dst
373 IMUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
374 MUL _ src dst -> usageRM src dst
375 MUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
376 DIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
377 IDIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
378 ADD_CC _ src dst -> usageRM src dst
379 SUB_CC _ src dst -> usageRM src dst
380 AND _ src dst -> usageRM src dst
381 OR _ src dst -> usageRM src dst
382
383 XOR _ (OpReg src) (OpReg dst)
384 | src == dst -> mkRU [] [dst]
385
386 XOR _ src dst -> usageRM src dst
387 NOT _ op -> usageM op
388 BSWAP _ reg -> mkRU [reg] [reg]
389 NEGI _ op -> usageM op
390 SHL _ imm dst -> usageRM imm dst
391 SAR _ imm dst -> usageRM imm dst
392 SHR _ imm dst -> usageRM imm dst
393 BT _ _ src -> mkRUR (use_R src [])
394
395 PUSH _ op -> mkRUR (use_R op [])
396 POP _ op -> mkRU [] (def_W op)
397 TEST _ src dst -> mkRUR (use_R src $! use_R dst [])
398 CMP _ src dst -> mkRUR (use_R src $! use_R dst [])
399 SETCC _ op -> mkRU [] (def_W op)
400 JXX _ _ -> mkRU [] []
401 JXX_GBL _ _ -> mkRU [] []
402 JMP op regs -> mkRUR (use_R op regs)
403 JMP_TBL op _ _ _ -> mkRUR (use_R op [])
404 CALL (Left _) params -> mkRU params (callClobberedRegs platform)
405 CALL (Right reg) params -> mkRU (reg:params) (callClobberedRegs platform)
406 CLTD _ -> mkRU [eax] [edx]
407 NOP -> mkRU [] []
408
409 GMOV src dst -> mkRU [src] [dst]
410 GLD _ src dst -> mkRU (use_EA src []) [dst]
411 GST _ src dst -> mkRUR (src : use_EA dst [])
412
413 GLDZ dst -> mkRU [] [dst]
414 GLD1 dst -> mkRU [] [dst]
415
416 GFTOI src dst -> mkRU [src] [dst]
417 GDTOI src dst -> mkRU [src] [dst]
418
419 GITOF src dst -> mkRU [src] [dst]
420 GITOD src dst -> mkRU [src] [dst]
421
422 GDTOF src dst -> mkRU [src] [dst]
423
424 GADD _ s1 s2 dst -> mkRU [s1,s2] [dst]
425 GSUB _ s1 s2 dst -> mkRU [s1,s2] [dst]
426 GMUL _ s1 s2 dst -> mkRU [s1,s2] [dst]
427 GDIV _ s1 s2 dst -> mkRU [s1,s2] [dst]
428
429 GCMP _ src1 src2 -> mkRUR [src1,src2]
430 GABS _ src dst -> mkRU [src] [dst]
431 GNEG _ src dst -> mkRU [src] [dst]
432 GSQRT _ src dst -> mkRU [src] [dst]
433 GSIN _ _ _ src dst -> mkRU [src] [dst]
434 GCOS _ _ _ src dst -> mkRU [src] [dst]
435 GTAN _ _ _ src dst -> mkRU [src] [dst]
436
437 CVTSS2SD src dst -> mkRU [src] [dst]
438 CVTSD2SS src dst -> mkRU [src] [dst]
439 CVTTSS2SIQ _ src dst -> mkRU (use_R src []) [dst]
440 CVTTSD2SIQ _ src dst -> mkRU (use_R src []) [dst]
441 CVTSI2SS _ src dst -> mkRU (use_R src []) [dst]
442 CVTSI2SD _ src dst -> mkRU (use_R src []) [dst]
443 FDIV _ src dst -> usageRM src dst
444
445 FETCHGOT reg -> mkRU [] [reg]
446 FETCHPC reg -> mkRU [] [reg]
447
448 COMMENT _ -> noUsage
449 LOCATION{} -> noUsage
450 DELTA _ -> noUsage
451
452 POPCNT _ src dst -> mkRU (use_R src []) [dst]
453 BSF _ src dst -> mkRU (use_R src []) [dst]
454 BSR _ src dst -> mkRU (use_R src []) [dst]
455
456 -- note: might be a better way to do this
457 PREFETCH _ _ src -> mkRU (use_R src []) []
458 LOCK i -> x86_regUsageOfInstr platform i
459 XADD _ src dst -> usageMM src dst
460 CMPXCHG _ src dst -> usageRMM src dst (OpReg eax)
461 MFENCE -> noUsage
462
463 _other -> panic "regUsage: unrecognised instr"
464 where
465 -- # Definitions
466 --
467 -- Written: If the operand is a register, it's written. If it's an
468 -- address, registers mentioned in the address are read.
469 --
470 -- Modified: If the operand is a register, it's both read and
471 -- written. If it's an address, registers mentioned in the address
472 -- are read.
473
474 -- 2 operand form; first operand Read; second Written
475 usageRW :: Operand -> Operand -> RegUsage
476 usageRW op (OpReg reg) = mkRU (use_R op []) [reg]
477 usageRW op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
478 usageRW _ _ = panic "X86.RegInfo.usageRW: no match"
479
480 -- 2 operand form; first operand Read; second Modified
481 usageRM :: Operand -> Operand -> RegUsage
482 usageRM op (OpReg reg) = mkRU (use_R op [reg]) [reg]
483 usageRM op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
484 usageRM _ _ = panic "X86.RegInfo.usageRM: no match"
485
486 -- 2 operand form; first operand Modified; second Modified
487 usageMM :: Operand -> Operand -> RegUsage
488 usageMM (OpReg src) (OpReg dst) = mkRU [src, dst] [src, dst]
489 usageMM (OpReg src) (OpAddr ea) = mkRU (use_EA ea [src]) [src]
490 usageMM _ _ = panic "X86.RegInfo.usageMM: no match"
491
492 -- 3 operand form; first operand Read; second Modified; third Modified
493 usageRMM :: Operand -> Operand -> Operand -> RegUsage
494 usageRMM (OpReg src) (OpReg dst) (OpReg reg) = mkRU [src, dst, reg] [dst, reg]
495 usageRMM (OpReg src) (OpAddr ea) (OpReg reg) = mkRU (use_EA ea [src, reg]) [reg]
496 usageRMM _ _ _ = panic "X86.RegInfo.usageRMM: no match"
497
498 -- 1 operand form; operand Modified
499 usageM :: Operand -> RegUsage
500 usageM (OpReg reg) = mkRU [reg] [reg]
501 usageM (OpAddr ea) = mkRUR (use_EA ea [])
502 usageM _ = panic "X86.RegInfo.usageM: no match"
503
504 -- Registers defd when an operand is written.
505 def_W (OpReg reg) = [reg]
506 def_W (OpAddr _ ) = []
507 def_W _ = panic "X86.RegInfo.def_W: no match"
508
509 -- Registers used when an operand is read.
510 use_R (OpReg reg) tl = reg : tl
511 use_R (OpImm _) tl = tl
512 use_R (OpAddr ea) tl = use_EA ea tl
513
514 -- Registers used to compute an effective address.
515 use_EA (ImmAddr _ _) tl = tl
516 use_EA (AddrBaseIndex base index _) tl =
517 use_base base $! use_index index tl
518 where use_base (EABaseReg r) tl = r : tl
519 use_base _ tl = tl
520 use_index EAIndexNone tl = tl
521 use_index (EAIndex i _) tl = i : tl
522
523 mkRUR src = src' `seq` RU src' []
524 where src' = filter (interesting platform) src
525
526 mkRU src dst = src' `seq` dst' `seq` RU src' dst'
527 where src' = filter (interesting platform) src
528 dst' = filter (interesting platform) dst
529
530 -- | Is this register interesting for the register allocator?
531 interesting :: Platform -> Reg -> Bool
532 interesting _ (RegVirtual _) = True
533 interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
534 interesting _ (RegReal (RealRegPair{})) = panic "X86.interesting: no reg pairs on this arch"
535
536
537
538 -- | Applies the supplied function to all registers in instructions.
539 -- Typically used to change virtual registers to real registers.
540 x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
541 x86_patchRegsOfInstr instr env
542 = case instr of
543 MOV fmt src dst -> patch2 (MOV fmt) src dst
544 CMOV cc fmt src dst -> CMOV cc fmt (patchOp src) (env dst)
545 MOVZxL fmt src dst -> patch2 (MOVZxL fmt) src dst
546 MOVSxL fmt src dst -> patch2 (MOVSxL fmt) src dst
547 LEA fmt src dst -> patch2 (LEA fmt) src dst
548 ADD fmt src dst -> patch2 (ADD fmt) src dst
549 ADC fmt src dst -> patch2 (ADC fmt) src dst
550 SUB fmt src dst -> patch2 (SUB fmt) src dst
551 SBB fmt src dst -> patch2 (SBB fmt) src dst
552 IMUL fmt src dst -> patch2 (IMUL fmt) src dst
553 IMUL2 fmt src -> patch1 (IMUL2 fmt) src
554 MUL fmt src dst -> patch2 (MUL fmt) src dst
555 MUL2 fmt src -> patch1 (MUL2 fmt) src
556 IDIV fmt op -> patch1 (IDIV fmt) op
557 DIV fmt op -> patch1 (DIV fmt) op
558 ADD_CC fmt src dst -> patch2 (ADD_CC fmt) src dst
559 SUB_CC fmt src dst -> patch2 (SUB_CC fmt) src dst
560 AND fmt src dst -> patch2 (AND fmt) src dst
561 OR fmt src dst -> patch2 (OR fmt) src dst
562 XOR fmt src dst -> patch2 (XOR fmt) src dst
563 NOT fmt op -> patch1 (NOT fmt) op
564 BSWAP fmt reg -> BSWAP fmt (env reg)
565 NEGI fmt op -> patch1 (NEGI fmt) op
566 SHL fmt imm dst -> patch1 (SHL fmt imm) dst
567 SAR fmt imm dst -> patch1 (SAR fmt imm) dst
568 SHR fmt imm dst -> patch1 (SHR fmt imm) dst
569 BT fmt imm src -> patch1 (BT fmt imm) src
570 TEST fmt src dst -> patch2 (TEST fmt) src dst
571 CMP fmt src dst -> patch2 (CMP fmt) src dst
572 PUSH fmt op -> patch1 (PUSH fmt) op
573 POP fmt op -> patch1 (POP fmt) op
574 SETCC cond op -> patch1 (SETCC cond) op
575 JMP op regs -> JMP (patchOp op) regs
576 JMP_TBL op ids s lbl -> JMP_TBL (patchOp op) ids s lbl
577
578 GMOV src dst -> GMOV (env src) (env dst)
579 GLD fmt src dst -> GLD fmt (lookupAddr src) (env dst)
580 GST fmt src dst -> GST fmt (env src) (lookupAddr dst)
581
582 GLDZ dst -> GLDZ (env dst)
583 GLD1 dst -> GLD1 (env dst)
584
585 GFTOI src dst -> GFTOI (env src) (env dst)
586 GDTOI src dst -> GDTOI (env src) (env dst)
587
588 GITOF src dst -> GITOF (env src) (env dst)
589 GITOD src dst -> GITOD (env src) (env dst)
590
591 GDTOF src dst -> GDTOF (env src) (env dst)
592
593 GADD fmt s1 s2 dst -> GADD fmt (env s1) (env s2) (env dst)
594 GSUB fmt s1 s2 dst -> GSUB fmt (env s1) (env s2) (env dst)
595 GMUL fmt s1 s2 dst -> GMUL fmt (env s1) (env s2) (env dst)
596 GDIV fmt s1 s2 dst -> GDIV fmt (env s1) (env s2) (env dst)
597
598 GCMP fmt src1 src2 -> GCMP fmt (env src1) (env src2)
599 GABS fmt src dst -> GABS fmt (env src) (env dst)
600 GNEG fmt src dst -> GNEG fmt (env src) (env dst)
601 GSQRT fmt src dst -> GSQRT fmt (env src) (env dst)
602 GSIN fmt l1 l2 src dst -> GSIN fmt l1 l2 (env src) (env dst)
603 GCOS fmt l1 l2 src dst -> GCOS fmt l1 l2 (env src) (env dst)
604 GTAN fmt l1 l2 src dst -> GTAN fmt l1 l2 (env src) (env dst)
605
606 CVTSS2SD src dst -> CVTSS2SD (env src) (env dst)
607 CVTSD2SS src dst -> CVTSD2SS (env src) (env dst)
608 CVTTSS2SIQ fmt src dst -> CVTTSS2SIQ fmt (patchOp src) (env dst)
609 CVTTSD2SIQ fmt src dst -> CVTTSD2SIQ fmt (patchOp src) (env dst)
610 CVTSI2SS fmt src dst -> CVTSI2SS fmt (patchOp src) (env dst)
611 CVTSI2SD fmt src dst -> CVTSI2SD fmt (patchOp src) (env dst)
612 FDIV fmt src dst -> FDIV fmt (patchOp src) (patchOp dst)
613
614 CALL (Left _) _ -> instr
615 CALL (Right reg) p -> CALL (Right (env reg)) p
616
617 FETCHGOT reg -> FETCHGOT (env reg)
618 FETCHPC reg -> FETCHPC (env reg)
619
620 NOP -> instr
621 COMMENT _ -> instr
622 LOCATION {} -> instr
623 DELTA _ -> instr
624
625 JXX _ _ -> instr
626 JXX_GBL _ _ -> instr
627 CLTD _ -> instr
628
629 POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst)
630 BSF fmt src dst -> BSF fmt (patchOp src) (env dst)
631 BSR fmt src dst -> BSR fmt (patchOp src) (env dst)
632
633 PREFETCH lvl format src -> PREFETCH lvl format (patchOp src)
634
635 LOCK i -> LOCK (x86_patchRegsOfInstr i env)
636 XADD fmt src dst -> patch2 (XADD fmt) src dst
637 CMPXCHG fmt src dst -> patch2 (CMPXCHG fmt) src dst
638 MFENCE -> instr
639
640 _other -> panic "patchRegs: unrecognised instr"
641
642 where
643 patch1 :: (Operand -> a) -> Operand -> a
644 patch1 insn op = insn $! patchOp op
645 patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
646 patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
647
648 patchOp (OpReg reg) = OpReg $! env reg
649 patchOp (OpImm imm) = OpImm imm
650 patchOp (OpAddr ea) = OpAddr $! lookupAddr ea
651
652 lookupAddr (ImmAddr imm off) = ImmAddr imm off
653 lookupAddr (AddrBaseIndex base index disp)
654 = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
655 where
656 lookupBase EABaseNone = EABaseNone
657 lookupBase EABaseRip = EABaseRip
658 lookupBase (EABaseReg r) = EABaseReg $! env r
659
660 lookupIndex EAIndexNone = EAIndexNone
661 lookupIndex (EAIndex r i) = (EAIndex $! env r) i
662
663
664 --------------------------------------------------------------------------------
665 x86_isJumpishInstr
666 :: Instr -> Bool
667
668 x86_isJumpishInstr instr
669 = case instr of
670 JMP{} -> True
671 JXX{} -> True
672 JXX_GBL{} -> True
673 JMP_TBL{} -> True
674 CALL{} -> True
675 _ -> False
676
677
678 x86_jumpDestsOfInstr
679 :: Instr
680 -> [BlockId]
681
682 x86_jumpDestsOfInstr insn
683 = case insn of
684 JXX _ id -> [id]
685 JMP_TBL _ ids _ _ -> [id | Just id <- ids]
686 _ -> []
687
688
689 x86_patchJumpInstr
690 :: Instr -> (BlockId -> BlockId) -> Instr
691
692 x86_patchJumpInstr insn patchF
693 = case insn of
694 JXX cc id -> JXX cc (patchF id)
695 JMP_TBL op ids section lbl
696 -> JMP_TBL op (map (fmap patchF) ids) section lbl
697 _ -> insn
698
699
700
701
702 -- -----------------------------------------------------------------------------
703 -- | Make a spill instruction.
704 x86_mkSpillInstr
705 :: DynFlags
706 -> Reg -- register to spill
707 -> Int -- current stack delta
708 -> Int -- spill slot to use
709 -> Instr
710
711 x86_mkSpillInstr dflags reg delta slot
712 = let off = spillSlotToOffset platform slot - delta
713 in
714 case targetClassOfReg platform reg of
715 RcInteger -> MOV (archWordFormat is32Bit)
716 (OpReg reg) (OpAddr (spRel dflags off))
717 RcDouble -> GST FF80 reg (spRel dflags off) {- RcFloat/RcDouble -}
718 RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off))
719 _ -> panic "X86.mkSpillInstr: no match"
720 where platform = targetPlatform dflags
721 is32Bit = target32Bit platform
722
723 -- | Make a spill reload instruction.
724 x86_mkLoadInstr
725 :: DynFlags
726 -> Reg -- register to load
727 -> Int -- current stack delta
728 -> Int -- spill slot to use
729 -> Instr
730
731 x86_mkLoadInstr dflags reg delta slot
732 = let off = spillSlotToOffset platform slot - delta
733 in
734 case targetClassOfReg platform reg of
735 RcInteger -> MOV (archWordFormat is32Bit)
736 (OpAddr (spRel dflags off)) (OpReg reg)
737 RcDouble -> GLD FF80 (spRel dflags off) reg {- RcFloat/RcDouble -}
738 RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg)
739 _ -> panic "X86.x86_mkLoadInstr"
740 where platform = targetPlatform dflags
741 is32Bit = target32Bit platform
742
743 spillSlotSize :: Platform -> Int
744 spillSlotSize dflags = if is32Bit then 12 else 8
745 where is32Bit = target32Bit dflags
746
747 maxSpillSlots :: DynFlags -> Int
748 maxSpillSlots dflags
749 = ((rESERVED_C_STACK_BYTES dflags - 64) `div` spillSlotSize (targetPlatform dflags)) - 1
750 -- = 0 -- useful for testing allocMoreStack
751
752 -- number of bytes that the stack pointer should be aligned to
753 stackAlign :: Int
754 stackAlign = 16
755
756 -- convert a spill slot number to a *byte* offset, with no sign:
757 -- decide on a per arch basis whether you are spilling above or below
758 -- the C stack pointer.
759 spillSlotToOffset :: Platform -> Int -> Int
760 spillSlotToOffset platform slot
761 = 64 + spillSlotSize platform * slot
762
763 --------------------------------------------------------------------------------
764
765 -- | See if this instruction is telling us the current C stack delta
766 x86_takeDeltaInstr
767 :: Instr
768 -> Maybe Int
769
770 x86_takeDeltaInstr instr
771 = case instr of
772 DELTA i -> Just i
773 _ -> Nothing
774
775
776 x86_isMetaInstr
777 :: Instr
778 -> Bool
779
780 x86_isMetaInstr instr
781 = case instr of
782 COMMENT{} -> True
783 LOCATION{} -> True
784 LDATA{} -> True
785 NEWBLOCK{} -> True
786 DELTA{} -> True
787 _ -> False
788
789
790
791 -- | Make a reg-reg move instruction.
792 -- On SPARC v8 there are no instructions to move directly between
793 -- floating point and integer regs. If we need to do that then we
794 -- have to go via memory.
795 --
796 x86_mkRegRegMoveInstr
797 :: Platform
798 -> Reg
799 -> Reg
800 -> Instr
801
802 x86_mkRegRegMoveInstr platform src dst
803 = case targetClassOfReg platform src of
804 RcInteger -> case platformArch platform of
805 ArchX86 -> MOV II32 (OpReg src) (OpReg dst)
806 ArchX86_64 -> MOV II64 (OpReg src) (OpReg dst)
807 _ -> panic "x86_mkRegRegMoveInstr: Bad arch"
808 RcDouble -> GMOV src dst
809 RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst)
810 _ -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
811
812 -- | Check whether an instruction represents a reg-reg move.
813 -- The register allocator attempts to eliminate reg->reg moves whenever it can,
814 -- by assigning the src and dest temporaries to the same real register.
815 --
816 x86_takeRegRegMoveInstr
817 :: Instr
818 -> Maybe (Reg,Reg)
819
820 x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
821 = Just (r1,r2)
822
823 x86_takeRegRegMoveInstr _ = Nothing
824
825
826 -- | Make an unconditional branch instruction.
827 x86_mkJumpInstr
828 :: BlockId
829 -> [Instr]
830
831 x86_mkJumpInstr id
832 = [JXX ALWAYS id]
833
834
835 x86_mkStackAllocInstr
836 :: Platform
837 -> Int
838 -> Instr
839 x86_mkStackAllocInstr platform amount
840 = case platformArch platform of
841 ArchX86 -> SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
842 ArchX86_64 -> SUB II64 (OpImm (ImmInt amount)) (OpReg rsp)
843 _ -> panic "x86_mkStackAllocInstr"
844
845 x86_mkStackDeallocInstr
846 :: Platform
847 -> Int
848 -> Instr
849 x86_mkStackDeallocInstr platform amount
850 = case platformArch platform of
851 ArchX86 -> ADD II32 (OpImm (ImmInt amount)) (OpReg esp)
852 ArchX86_64 -> ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)
853 _ -> panic "x86_mkStackDeallocInstr"
854
855 i386_insert_ffrees
856 :: [GenBasicBlock Instr]
857 -> [GenBasicBlock Instr]
858
859 i386_insert_ffrees blocks
860 | any (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ]
861 = map insertGFREEs blocks
862 | otherwise
863 = blocks
864 where
865 insertGFREEs (BasicBlock id insns)
866 = BasicBlock id (insertBeforeNonlocalTransfers GFREE insns)
867
868 insertBeforeNonlocalTransfers :: Instr -> [Instr] -> [Instr]
869 insertBeforeNonlocalTransfers insert insns
870 = foldr p [] insns
871 where p insn r = case insn of
872 CALL _ _ -> insert : insn : r
873 JMP _ _ -> insert : insn : r
874 JXX_GBL _ _ -> panic "insertBeforeNonlocalTransfers: cannot handle JXX_GBL"
875 _ -> insn : r
876
877
878 -- if you ever add a new FP insn to the fake x86 FP insn set,
879 -- you must update this too
880 is_G_instr :: Instr -> Bool
881 is_G_instr instr
882 = case instr of
883 GMOV{} -> True
884 GLD{} -> True
885 GST{} -> True
886 GLDZ{} -> True
887 GLD1{} -> True
888 GFTOI{} -> True
889 GDTOI{} -> True
890 GITOF{} -> True
891 GITOD{} -> True
892 GDTOF{} -> True
893 GADD{} -> True
894 GDIV{} -> True
895 GSUB{} -> True
896 GMUL{} -> True
897 GCMP{} -> True
898 GABS{} -> True
899 GNEG{} -> True
900 GSQRT{} -> True
901 GSIN{} -> True
902 GCOS{} -> True
903 GTAN{} -> True
904 GFREE -> panic "is_G_instr: GFREE (!)"
905 _ -> False
906
907
908 --
909 -- Note [extra spill slots]
910 --
911 -- If the register allocator used more spill slots than we have
912 -- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more
913 -- C stack space on entry and exit from this proc. Therefore we
914 -- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp"
915 -- before every non-local jump.
916 --
917 -- This became necessary when the new codegen started bundling entire
918 -- functions together into one proc, because the register allocator
919 -- assigns a different stack slot to each virtual reg within a proc.
920 -- To avoid using so many slots we could also:
921 --
922 -- - split up the proc into connected components before code generator
923 --
924 -- - rename the virtual regs, so that we re-use vreg names and hence
925 -- stack slots for non-overlapping vregs.
926 --
927 -- Note that when a block is both a non-local entry point (with an
928 -- info table) and a local branch target, we have to split it into
929 -- two, like so:
930 --
931 -- <info table>
932 -- L:
933 -- <code>
934 --
935 -- becomes
936 --
937 -- <info table>
938 -- L:
939 -- subl $rsp, N
940 -- jmp Lnew
941 -- Lnew:
942 -- <code>
943 --
944 -- and all branches pointing to L are retargetted to point to Lnew.
945 -- Otherwise, we would repeat the $rsp adjustment for each branch to
946 -- L.
947 --
948 allocMoreStack
949 :: Platform
950 -> Int
951 -> NatCmmDecl statics X86.Instr.Instr
952 -> UniqSM (NatCmmDecl statics X86.Instr.Instr)
953
954 allocMoreStack _ _ top@(CmmData _ _) = return top
955 allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
956 let entries = entryBlocks proc
957
958 uniqs <- replicateM (length entries) getUniqueM
959
960 let
961 delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
962 where x = slots * spillSlotSize platform -- sp delta
963
964 alloc = mkStackAllocInstr platform delta
965 dealloc = mkStackDeallocInstr platform delta
966
967 new_blockmap :: BlockEnv BlockId
968 new_blockmap = mapFromList (zip entries (map mkBlockId uniqs))
969
970 insert_stack_insns (BasicBlock id insns)
971 | Just new_blockid <- mapLookup id new_blockmap
972 = [ BasicBlock id [alloc, JXX ALWAYS new_blockid]
973 , BasicBlock new_blockid block' ]
974 | otherwise
975 = [ BasicBlock id block' ]
976 where
977 block' = foldr insert_dealloc [] insns
978
979 insert_dealloc insn r = case insn of
980 JMP _ _ -> dealloc : insn : r
981 JXX_GBL _ _ -> panic "insert_dealloc: cannot handle JXX_GBL"
982 _other -> x86_patchJumpInstr insn retarget : r
983 where retarget b = fromMaybe b (mapLookup b new_blockmap)
984
985 new_code = concatMap insert_stack_insns code
986 -- in
987 return (CmmProc info lbl live (ListGraph new_code))
988
989
990 data JumpDest = DestBlockId BlockId | DestImm Imm
991
992 getJumpDestBlockId :: JumpDest -> Maybe BlockId
993 getJumpDestBlockId (DestBlockId bid) = Just bid
994 getJumpDestBlockId _ = Nothing
995
996 canShortcut :: Instr -> Maybe JumpDest
997 canShortcut (JXX ALWAYS id) = Just (DestBlockId id)
998 canShortcut (JMP (OpImm imm) _) = Just (DestImm imm)
999 canShortcut _ = Nothing
1000
1001
1002 -- This helper shortcuts a sequence of branches.
1003 -- The blockset helps avoid following cycles.
1004 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
1005 shortcutJump fn insn = shortcutJump' fn (setEmpty :: BlockSet) insn
1006 where shortcutJump' fn seen insn@(JXX cc id) =
1007 if setMember id seen then insn
1008 else case fn id of
1009 Nothing -> insn
1010 Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
1011 Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm)
1012 where seen' = setInsert id seen
1013 shortcutJump' _ _ other = other
1014
1015 -- Here because it knows about JumpDest
1016 shortcutStatics :: (BlockId -> Maybe JumpDest) -> (Alignment, CmmStatics) -> (Alignment, CmmStatics)
1017 shortcutStatics fn (align, Statics lbl statics)
1018 = (align, Statics lbl $ map (shortcutStatic fn) statics)
1019 -- we need to get the jump tables, so apply the mapping to the entries
1020 -- of a CmmData too.
1021
1022 shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
1023 shortcutLabel fn lab
1024 | Just uq <- maybeAsmTemp lab = shortBlockId fn emptyUniqSet (mkBlockId uq)
1025 | otherwise = lab
1026
1027 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
1028 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
1029 = CmmStaticLit (CmmLabel (shortcutLabel fn lab))
1030 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
1031 = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off)
1032 -- slightly dodgy, we're ignoring the second label, but this
1033 -- works with the way we use CmmLabelDiffOff for jump tables now.
1034 shortcutStatic _ other_static
1035 = other_static
1036
1037 shortBlockId
1038 :: (BlockId -> Maybe JumpDest)
1039 -> UniqSet Unique
1040 -> BlockId
1041 -> CLabel
1042
1043 shortBlockId fn seen blockid =
1044 case (elementOfUniqSet uq seen, fn blockid) of
1045 (True, _) -> mkAsmTempLabel uq
1046 (_, Nothing) -> mkAsmTempLabel uq
1047 (_, Just (DestBlockId blockid')) -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
1048 (_, Just (DestImm (ImmCLbl lbl))) -> lbl
1049 (_, _other) -> panic "shortBlockId"
1050 where uq = getUnique blockid