Eliminate all uses of IF_ARCH_i386, and remove the definition
[ghc.git] / compiler / nativeGen / X86 / Instr.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Machine-dependent assembly language
4 --
5 -- (c) The University of Glasgow 1993-2004
6 --
7 -----------------------------------------------------------------------------
8
9 #include "HsVersions.h"
10 #include "nativeGen/NCG.h"
11
12 module X86.Instr (Instr(..), Operand(..),
13 getJumpDestBlockId, canShortcut, shortcutStatics,
14 shortcutJump, i386_insert_ffrees,
15 maxSpillSlots, archWordSize)
16 where
17
18 import X86.Cond
19 import X86.Regs
20 import Instruction
21 import Size
22 import RegClass
23 import Reg
24 import TargetReg
25
26 import BlockId
27 import OldCmm
28 import FastString
29 import FastBool
30 import Outputable
31 import Platform
32 import Constants (rESERVED_C_STACK_BYTES)
33
34 import BasicTypes (Alignment)
35 import CLabel
36 import UniqSet
37 import Unique
38
39 -- Size of an x86/x86_64 memory address, in bytes.
40 --
41 archWordSize :: Bool -> Size
42 archWordSize is32Bit
43 | is32Bit = II32
44 | otherwise = II64
45
46 -- | Instruction instance for x86 instruction set.
47 instance Instruction Instr where
48 regUsageOfInstr = x86_regUsageOfInstr
49 patchRegsOfInstr = x86_patchRegsOfInstr
50 isJumpishInstr = x86_isJumpishInstr
51 jumpDestsOfInstr = x86_jumpDestsOfInstr
52 patchJumpInstr = x86_patchJumpInstr
53 mkSpillInstr = x86_mkSpillInstr
54 mkLoadInstr = x86_mkLoadInstr
55 takeDeltaInstr = x86_takeDeltaInstr
56 isMetaInstr = x86_isMetaInstr
57 mkRegRegMoveInstr = x86_mkRegRegMoveInstr
58 takeRegRegMoveInstr = x86_takeRegRegMoveInstr
59 mkJumpInstr = x86_mkJumpInstr
60
61
62 -- -----------------------------------------------------------------------------
63 -- Intel x86 instructions
64
65 {-
66 Intel, in their infinite wisdom, selected a stack model for floating
67 point registers on x86. That might have made sense back in 1979 --
68 nowadays we can see it for the nonsense it really is. A stack model
69 fits poorly with the existing nativeGen infrastructure, which assumes
70 flat integer and FP register sets. Prior to this commit, nativeGen
71 could not generate correct x86 FP code -- to do so would have meant
72 somehow working the register-stack paradigm into the register
73 allocator and spiller, which sounds very difficult.
74
75 We have decided to cheat, and go for a simple fix which requires no
76 infrastructure modifications, at the expense of generating ropey but
77 correct FP code. All notions of the x86 FP stack and its insns have
78 been removed. Instead, we pretend (to the instruction selector and
79 register allocator) that x86 has six floating point registers, %fake0
80 .. %fake5, which can be used in the usual flat manner. We further
81 claim that x86 has floating point instructions very similar to SPARC
82 and Alpha, that is, a simple 3-operand register-register arrangement.
83 Code generation and register allocation proceed on this basis.
84
85 When we come to print out the final assembly, our convenient fiction
86 is converted to dismal reality. Each fake instruction is
87 independently converted to a series of real x86 instructions.
88 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
89 arithmetic operations, the two operands are pushed onto the top of the
90 FP stack, the operation done, and the result copied back into the
91 relevant register. There are only six %fake registers because 2 are
92 needed for the translation, and x86 has 8 in total.
93
94 The translation is inefficient but is simple and it works. A cleverer
95 translation would handle a sequence of insns, simulating the FP stack
96 contents, would not impose a fixed mapping from %fake to %st regs, and
97 hopefully could avoid most of the redundant reg-reg moves of the
98 current translation.
99
100 We might as well make use of whatever unique FP facilities Intel have
101 chosen to bless us with (let's not be churlish, after all).
102 Hence GLDZ and GLD1. Bwahahahahahahaha!
103 -}
104
105 {-
106 Note [x86 Floating point precision]
107
108 Intel's internal floating point registers are by default 80 bit
109 extended precision. This means that all operations done on values in
110 registers are done at 80 bits, and unless the intermediate values are
111 truncated to the appropriate size (32 or 64 bits) by storing in
112 memory, calculations in registers will give different results from
113 calculations which pass intermediate values in memory (eg. via
114 function calls).
115
116 One solution is to set the FPU into 64 bit precision mode. Some OSs
117 do this (eg. FreeBSD) and some don't (eg. Linux). The problem here is
118 that this will only affect 64-bit precision arithmetic; 32-bit
119 calculations will still be done at 64-bit precision in registers. So
120 it doesn't solve the whole problem.
121
122 There's also the issue of what the C library is expecting in terms of
123 precision. It seems to be the case that glibc on Linux expects the
124 FPU to be set to 80 bit precision, so setting it to 64 bit could have
125 unexpected effects. Changing the default could have undesirable
126 effects on other 3rd-party library code too, so the right thing would
127 be to save/restore the FPU control word across Haskell code if we were
128 to do this.
129
130 gcc's -ffloat-store gives consistent results by always storing the
131 results of floating-point calculations in memory, which works for both
132 32 and 64-bit precision. However, it only affects the values of
133 user-declared floating point variables in C, not intermediate results.
134 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
135 flag).
136
137 Another problem is how to spill floating point registers in the
138 register allocator. Should we spill the whole 80 bits, or just 64?
139 On an OS which is set to 64 bit precision, spilling 64 is fine. On
140 Linux, spilling 64 bits will round the results of some operations.
141 This is what gcc does. Spilling at 80 bits requires taking up a full
142 128 bit slot (so we get alignment). We spill at 80-bits and ignore
143 the alignment problems.
144
145 In the future [edit: now available in GHC 7.0.1, with the -msse2
146 flag], we'll use the SSE registers for floating point. This requires
147 a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
148 float ops), which means P4 or Xeon and above. Using SSE will solve
149 all these problems, because the SSE registers use fixed 32 bit or 64
150 bit precision.
151
152 --SDM 1/2003
153 -}
154
155 data Instr
156 -- comment pseudo-op
157 = COMMENT FastString
158
159 -- some static data spat out during code
160 -- generation. Will be extracted before
161 -- pretty-printing.
162 | LDATA Section (Alignment, CmmStatics)
163
164 -- start a new basic block. Useful during
165 -- codegen, removed later. Preceding
166 -- instruction should be a jump, as per the
167 -- invariants for a BasicBlock (see Cmm).
168 | NEWBLOCK BlockId
169
170 -- specify current stack offset for
171 -- benefit of subsequent passes
172 | DELTA Int
173
174 -- Moves.
175 | MOV Size Operand Operand
176 | MOVZxL Size Operand Operand -- size is the size of operand 1
177 | MOVSxL Size Operand Operand -- size is the size of operand 1
178 -- x86_64 note: plain mov into a 32-bit register always zero-extends
179 -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
180 -- don't affect the high bits of the register.
181
182 -- Load effective address (also a very useful three-operand add instruction :-)
183 | LEA Size Operand Operand
184
185 -- Int Arithmetic.
186 | ADD Size Operand Operand
187 | ADC Size Operand Operand
188 | SUB Size Operand Operand
189
190 | MUL Size Operand Operand
191 | IMUL Size Operand Operand -- signed int mul
192 | IMUL2 Size Operand -- %edx:%eax = operand * %eax
193
194 | DIV Size Operand -- eax := eax:edx/op, edx := eax:edx%op
195 | IDIV Size Operand -- ditto, but signed
196
197 -- Simple bit-twiddling.
198 | AND Size Operand Operand
199 | OR Size Operand Operand
200 | XOR Size Operand Operand
201 | NOT Size Operand
202 | NEGI Size Operand -- NEG instruction (name clash with Cond)
203
204 -- Shifts (amount may be immediate or %cl only)
205 | SHL Size Operand{-amount-} Operand
206 | SAR Size Operand{-amount-} Operand
207 | SHR Size Operand{-amount-} Operand
208
209 | BT Size Imm Operand
210 | NOP
211
212 -- x86 Float Arithmetic.
213 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
214 -- as single instructions right up until we spit them out.
215 -- all the 3-operand fake fp insns are src1 src2 dst
216 -- and furthermore are constrained to be fp regs only.
217 -- IMPORTANT: keep is_G_insn up to date with any changes here
218 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
219 | GLD Size AddrMode Reg -- src, dst(fpreg)
220 | GST Size Reg AddrMode -- src(fpreg), dst
221
222 | GLDZ Reg -- dst(fpreg)
223 | GLD1 Reg -- dst(fpreg)
224
225 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
226 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
227
228 | GITOF Reg Reg -- src(intreg), dst(fpreg)
229 | GITOD Reg Reg -- src(intreg), dst(fpreg)
230
231 | GDTOF Reg Reg -- src(fpreg), dst(fpreg)
232
233 | GADD Size Reg Reg Reg -- src1, src2, dst
234 | GDIV Size Reg Reg Reg -- src1, src2, dst
235 | GSUB Size Reg Reg Reg -- src1, src2, dst
236 | GMUL Size Reg Reg Reg -- src1, src2, dst
237
238 -- FP compare. Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
239 -- Compare src1 with src2; set the Zero flag iff the numbers are
240 -- comparable and the comparison is True. Subsequent code must
241 -- test the %eflags zero flag regardless of the supplied Cond.
242 | GCMP Cond Reg Reg -- src1, src2
243
244 | GABS Size Reg Reg -- src, dst
245 | GNEG Size Reg Reg -- src, dst
246 | GSQRT Size Reg Reg -- src, dst
247 | GSIN Size CLabel CLabel Reg Reg -- src, dst
248 | GCOS Size CLabel CLabel Reg Reg -- src, dst
249 | GTAN Size CLabel CLabel Reg Reg -- src, dst
250
251 | GFREE -- do ffree on all x86 regs; an ugly hack
252
253
254 -- SSE2 floating point: we use a restricted set of the available SSE2
255 -- instructions for floating-point.
256 -- use MOV for moving (either movss or movsd (movlpd better?))
257 | CVTSS2SD Reg Reg -- F32 to F64
258 | CVTSD2SS Reg Reg -- F64 to F32
259 | CVTTSS2SIQ Size Operand Reg -- F32 to I32/I64 (with truncation)
260 | CVTTSD2SIQ Size Operand Reg -- F64 to I32/I64 (with truncation)
261 | CVTSI2SS Size Operand Reg -- I32/I64 to F32
262 | CVTSI2SD Size Operand Reg -- I32/I64 to F64
263
264 -- use ADD & SUB for arithmetic. In both cases, operands
265 -- are Operand Reg.
266
267 -- SSE2 floating-point division:
268 | FDIV Size Operand Operand -- divisor, dividend(dst)
269
270 -- use CMP for comparisons. ucomiss and ucomisd instructions
271 -- compare single/double prec floating point respectively.
272
273 | SQRT Size Operand Reg -- src, dst
274
275
276 -- Comparison
277 | TEST Size Operand Operand
278 | CMP Size Operand Operand
279 | SETCC Cond Operand
280
281 -- Stack Operations.
282 | PUSH Size Operand
283 | POP Size Operand
284 -- both unused (SDM):
285 -- | PUSHA
286 -- | POPA
287
288 -- Jumping around.
289 | JMP Operand
290 | JXX Cond BlockId -- includes unconditional branches
291 | JXX_GBL Cond Imm -- non-local version of JXX
292 -- Table jump
293 | JMP_TBL Operand -- Address to jump to
294 [Maybe BlockId] -- Blocks in the jump table
295 Section -- Data section jump table should be put in
296 CLabel -- Label of jump table
297 | CALL (Either Imm Reg) [Reg]
298
299 -- Other things.
300 | CLTD Size -- sign extend %eax into %edx:%eax
301
302 | FETCHGOT Reg -- pseudo-insn for ELF position-independent code
303 -- pretty-prints as
304 -- call 1f
305 -- 1: popl %reg
306 -- addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
307 | FETCHPC Reg -- pseudo-insn for Darwin position-independent code
308 -- pretty-prints as
309 -- call 1f
310 -- 1: popl %reg
311
312 -- SSE4.2
313 | POPCNT Size Operand Reg -- src, dst
314
315 data Operand
316 = OpReg Reg -- register
317 | OpImm Imm -- immediate value
318 | OpAddr AddrMode -- memory reference
319
320
321
322 x86_regUsageOfInstr :: Instr -> RegUsage
323 x86_regUsageOfInstr instr
324 = case instr of
325 MOV _ src dst -> usageRW src dst
326 MOVZxL _ src dst -> usageRW src dst
327 MOVSxL _ src dst -> usageRW src dst
328 LEA _ src dst -> usageRW src dst
329 ADD _ src dst -> usageRM src dst
330 ADC _ src dst -> usageRM src dst
331 SUB _ src dst -> usageRM src dst
332 IMUL _ src dst -> usageRM src dst
333 IMUL2 _ src -> mkRU (eax:use_R src) [eax,edx]
334 MUL _ src dst -> usageRM src dst
335 DIV _ op -> mkRU (eax:edx:use_R op) [eax,edx]
336 IDIV _ op -> mkRU (eax:edx:use_R op) [eax,edx]
337 AND _ src dst -> usageRM src dst
338 OR _ src dst -> usageRM src dst
339
340 XOR _ (OpReg src) (OpReg dst)
341 | src == dst -> mkRU [] [dst]
342
343 XOR _ src dst -> usageRM src dst
344 NOT _ op -> usageM op
345 NEGI _ op -> usageM op
346 SHL _ imm dst -> usageRM imm dst
347 SAR _ imm dst -> usageRM imm dst
348 SHR _ imm dst -> usageRM imm dst
349 BT _ _ src -> mkRUR (use_R src)
350
351 PUSH _ op -> mkRUR (use_R op)
352 POP _ op -> mkRU [] (def_W op)
353 TEST _ src dst -> mkRUR (use_R src ++ use_R dst)
354 CMP _ src dst -> mkRUR (use_R src ++ use_R dst)
355 SETCC _ op -> mkRU [] (def_W op)
356 JXX _ _ -> mkRU [] []
357 JXX_GBL _ _ -> mkRU [] []
358 JMP op -> mkRUR (use_R op)
359 JMP_TBL op _ _ _ -> mkRUR (use_R op)
360 CALL (Left _) params -> mkRU params callClobberedRegs
361 CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
362 CLTD _ -> mkRU [eax] [edx]
363 NOP -> mkRU [] []
364
365 GMOV src dst -> mkRU [src] [dst]
366 GLD _ src dst -> mkRU (use_EA src) [dst]
367 GST _ src dst -> mkRUR (src : use_EA dst)
368
369 GLDZ dst -> mkRU [] [dst]
370 GLD1 dst -> mkRU [] [dst]
371
372 GFTOI src dst -> mkRU [src] [dst]
373 GDTOI src dst -> mkRU [src] [dst]
374
375 GITOF src dst -> mkRU [src] [dst]
376 GITOD src dst -> mkRU [src] [dst]
377
378 GDTOF src dst -> mkRU [src] [dst]
379
380 GADD _ s1 s2 dst -> mkRU [s1,s2] [dst]
381 GSUB _ s1 s2 dst -> mkRU [s1,s2] [dst]
382 GMUL _ s1 s2 dst -> mkRU [s1,s2] [dst]
383 GDIV _ s1 s2 dst -> mkRU [s1,s2] [dst]
384
385 GCMP _ src1 src2 -> mkRUR [src1,src2]
386 GABS _ src dst -> mkRU [src] [dst]
387 GNEG _ src dst -> mkRU [src] [dst]
388 GSQRT _ src dst -> mkRU [src] [dst]
389 GSIN _ _ _ src dst -> mkRU [src] [dst]
390 GCOS _ _ _ src dst -> mkRU [src] [dst]
391 GTAN _ _ _ src dst -> mkRU [src] [dst]
392
393 CVTSS2SD src dst -> mkRU [src] [dst]
394 CVTSD2SS src dst -> mkRU [src] [dst]
395 CVTTSS2SIQ _ src dst -> mkRU (use_R src) [dst]
396 CVTTSD2SIQ _ src dst -> mkRU (use_R src) [dst]
397 CVTSI2SS _ src dst -> mkRU (use_R src) [dst]
398 CVTSI2SD _ src dst -> mkRU (use_R src) [dst]
399 FDIV _ src dst -> usageRM src dst
400
401 FETCHGOT reg -> mkRU [] [reg]
402 FETCHPC reg -> mkRU [] [reg]
403
404 COMMENT _ -> noUsage
405 DELTA _ -> noUsage
406
407 POPCNT _ src dst -> mkRU (use_R src) [dst]
408
409 _other -> panic "regUsage: unrecognised instr"
410
411 where
412 -- 2 operand form; first operand Read; second Written
413 usageRW :: Operand -> Operand -> RegUsage
414 usageRW op (OpReg reg) = mkRU (use_R op) [reg]
415 usageRW op (OpAddr ea) = mkRUR (use_R op ++ use_EA ea)
416 usageRW _ _ = panic "X86.RegInfo.usageRW: no match"
417
418 -- 2 operand form; first operand Read; second Modified
419 usageRM :: Operand -> Operand -> RegUsage
420 usageRM op (OpReg reg) = mkRU (use_R op ++ [reg]) [reg]
421 usageRM op (OpAddr ea) = mkRUR (use_R op ++ use_EA ea)
422 usageRM _ _ = panic "X86.RegInfo.usageRM: no match"
423
424 -- 1 operand form; operand Modified
425 usageM :: Operand -> RegUsage
426 usageM (OpReg reg) = mkRU [reg] [reg]
427 usageM (OpAddr ea) = mkRUR (use_EA ea)
428 usageM _ = panic "X86.RegInfo.usageM: no match"
429
430 -- Registers defd when an operand is written.
431 def_W (OpReg reg) = [reg]
432 def_W (OpAddr _ ) = []
433 def_W _ = panic "X86.RegInfo.def_W: no match"
434
435 -- Registers used when an operand is read.
436 use_R (OpReg reg) = [reg]
437 use_R (OpImm _) = []
438 use_R (OpAddr ea) = use_EA ea
439
440 -- Registers used to compute an effective address.
441 use_EA (ImmAddr _ _) = []
442 use_EA (AddrBaseIndex base index _) =
443 use_base base $! use_index index
444 where use_base (EABaseReg r) x = r : x
445 use_base _ x = x
446 use_index EAIndexNone = []
447 use_index (EAIndex i _) = [i]
448
449 mkRUR src = src' `seq` RU src' []
450 where src' = filter interesting src
451
452 mkRU src dst = src' `seq` dst' `seq` RU src' dst'
453 where src' = filter interesting src
454 dst' = filter interesting dst
455
456 interesting :: Reg -> Bool
457 interesting (RegVirtual _) = True
458 interesting (RegReal (RealRegSingle i)) = isFastTrue (freeReg i)
459 interesting (RegReal (RealRegPair{})) = panic "X86.interesting: no reg pairs on this arch"
460
461
462
463 x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
464 x86_patchRegsOfInstr instr env
465 = case instr of
466 MOV sz src dst -> patch2 (MOV sz) src dst
467 MOVZxL sz src dst -> patch2 (MOVZxL sz) src dst
468 MOVSxL sz src dst -> patch2 (MOVSxL sz) src dst
469 LEA sz src dst -> patch2 (LEA sz) src dst
470 ADD sz src dst -> patch2 (ADD sz) src dst
471 ADC sz src dst -> patch2 (ADC sz) src dst
472 SUB sz src dst -> patch2 (SUB sz) src dst
473 IMUL sz src dst -> patch2 (IMUL sz) src dst
474 IMUL2 sz src -> patch1 (IMUL2 sz) src
475 MUL sz src dst -> patch2 (MUL sz) src dst
476 IDIV sz op -> patch1 (IDIV sz) op
477 DIV sz op -> patch1 (DIV sz) op
478 AND sz src dst -> patch2 (AND sz) src dst
479 OR sz src dst -> patch2 (OR sz) src dst
480 XOR sz src dst -> patch2 (XOR sz) src dst
481 NOT sz op -> patch1 (NOT sz) op
482 NEGI sz op -> patch1 (NEGI sz) op
483 SHL sz imm dst -> patch1 (SHL sz imm) dst
484 SAR sz imm dst -> patch1 (SAR sz imm) dst
485 SHR sz imm dst -> patch1 (SHR sz imm) dst
486 BT sz imm src -> patch1 (BT sz imm) src
487 TEST sz src dst -> patch2 (TEST sz) src dst
488 CMP sz src dst -> patch2 (CMP sz) src dst
489 PUSH sz op -> patch1 (PUSH sz) op
490 POP sz op -> patch1 (POP sz) op
491 SETCC cond op -> patch1 (SETCC cond) op
492 JMP op -> patch1 JMP op
493 JMP_TBL op ids s lbl-> JMP_TBL (patchOp op) ids s lbl
494
495 GMOV src dst -> GMOV (env src) (env dst)
496 GLD sz src dst -> GLD sz (lookupAddr src) (env dst)
497 GST sz src dst -> GST sz (env src) (lookupAddr dst)
498
499 GLDZ dst -> GLDZ (env dst)
500 GLD1 dst -> GLD1 (env dst)
501
502 GFTOI src dst -> GFTOI (env src) (env dst)
503 GDTOI src dst -> GDTOI (env src) (env dst)
504
505 GITOF src dst -> GITOF (env src) (env dst)
506 GITOD src dst -> GITOD (env src) (env dst)
507
508 GDTOF src dst -> GDTOF (env src) (env dst)
509
510 GADD sz s1 s2 dst -> GADD sz (env s1) (env s2) (env dst)
511 GSUB sz s1 s2 dst -> GSUB sz (env s1) (env s2) (env dst)
512 GMUL sz s1 s2 dst -> GMUL sz (env s1) (env s2) (env dst)
513 GDIV sz s1 s2 dst -> GDIV sz (env s1) (env s2) (env dst)
514
515 GCMP sz src1 src2 -> GCMP sz (env src1) (env src2)
516 GABS sz src dst -> GABS sz (env src) (env dst)
517 GNEG sz src dst -> GNEG sz (env src) (env dst)
518 GSQRT sz src dst -> GSQRT sz (env src) (env dst)
519 GSIN sz l1 l2 src dst -> GSIN sz l1 l2 (env src) (env dst)
520 GCOS sz l1 l2 src dst -> GCOS sz l1 l2 (env src) (env dst)
521 GTAN sz l1 l2 src dst -> GTAN sz l1 l2 (env src) (env dst)
522
523 CVTSS2SD src dst -> CVTSS2SD (env src) (env dst)
524 CVTSD2SS src dst -> CVTSD2SS (env src) (env dst)
525 CVTTSS2SIQ sz src dst -> CVTTSS2SIQ sz (patchOp src) (env dst)
526 CVTTSD2SIQ sz src dst -> CVTTSD2SIQ sz (patchOp src) (env dst)
527 CVTSI2SS sz src dst -> CVTSI2SS sz (patchOp src) (env dst)
528 CVTSI2SD sz src dst -> CVTSI2SD sz (patchOp src) (env dst)
529 FDIV sz src dst -> FDIV sz (patchOp src) (patchOp dst)
530
531 CALL (Left _) _ -> instr
532 CALL (Right reg) p -> CALL (Right (env reg)) p
533
534 FETCHGOT reg -> FETCHGOT (env reg)
535 FETCHPC reg -> FETCHPC (env reg)
536
537 NOP -> instr
538 COMMENT _ -> instr
539 DELTA _ -> instr
540
541 JXX _ _ -> instr
542 JXX_GBL _ _ -> instr
543 CLTD _ -> instr
544
545 POPCNT sz src dst -> POPCNT sz (patchOp src) (env dst)
546
547 _other -> panic "patchRegs: unrecognised instr"
548
549 where
550 patch1 :: (Operand -> a) -> Operand -> a
551 patch1 insn op = insn $! patchOp op
552 patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
553 patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
554
555 patchOp (OpReg reg) = OpReg $! env reg
556 patchOp (OpImm imm) = OpImm imm
557 patchOp (OpAddr ea) = OpAddr $! lookupAddr ea
558
559 lookupAddr (ImmAddr imm off) = ImmAddr imm off
560 lookupAddr (AddrBaseIndex base index disp)
561 = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
562 where
563 lookupBase EABaseNone = EABaseNone
564 lookupBase EABaseRip = EABaseRip
565 lookupBase (EABaseReg r) = EABaseReg (env r)
566
567 lookupIndex EAIndexNone = EAIndexNone
568 lookupIndex (EAIndex r i) = EAIndex (env r) i
569
570
571 --------------------------------------------------------------------------------
572 x86_isJumpishInstr
573 :: Instr -> Bool
574
575 x86_isJumpishInstr instr
576 = case instr of
577 JMP{} -> True
578 JXX{} -> True
579 JXX_GBL{} -> True
580 JMP_TBL{} -> True
581 CALL{} -> True
582 _ -> False
583
584
585 x86_jumpDestsOfInstr
586 :: Instr
587 -> [BlockId]
588
589 x86_jumpDestsOfInstr insn
590 = case insn of
591 JXX _ id -> [id]
592 JMP_TBL _ ids _ _ -> [id | Just id <- ids]
593 _ -> []
594
595
596 x86_patchJumpInstr
597 :: Instr -> (BlockId -> BlockId) -> Instr
598
599 x86_patchJumpInstr insn patchF
600 = case insn of
601 JXX cc id -> JXX cc (patchF id)
602 JMP_TBL op ids section lbl
603 -> JMP_TBL op (map (fmap patchF) ids) section lbl
604 _ -> insn
605
606
607
608
609 -- -----------------------------------------------------------------------------
610 -- | Make a spill instruction.
611 x86_mkSpillInstr
612 :: Platform
613 -> Reg -- register to spill
614 -> Int -- current stack delta
615 -> Int -- spill slot to use
616 -> Instr
617
618 x86_mkSpillInstr platform reg delta slot
619 = let off = spillSlotToOffset is32Bit slot
620 in
621 let off_w = (off - delta) `div` (if is32Bit then 4 else 8)
622 in case targetClassOfReg platform reg of
623 RcInteger -> MOV (archWordSize is32Bit)
624 (OpReg reg) (OpAddr (spRel platform off_w))
625 RcDouble -> GST FF80 reg (spRel platform off_w) {- RcFloat/RcDouble -}
626 RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel platform off_w))
627 _ -> panic "X86.mkSpillInstr: no match"
628 where is32Bit = target32Bit platform
629
630 -- | Make a spill reload instruction.
631 x86_mkLoadInstr
632 :: Platform
633 -> Reg -- register to load
634 -> Int -- current stack delta
635 -> Int -- spill slot to use
636 -> Instr
637
638 x86_mkLoadInstr platform reg delta slot
639 = let off = spillSlotToOffset is32Bit slot
640 in
641 let off_w = (off-delta) `div` (if is32Bit then 4 else 8)
642 in case targetClassOfReg platform reg of
643 RcInteger -> MOV (archWordSize is32Bit)
644 (OpAddr (spRel platform off_w)) (OpReg reg)
645 RcDouble -> GLD FF80 (spRel platform off_w) reg {- RcFloat/RcDouble -}
646 RcDoubleSSE -> MOV FF64 (OpAddr (spRel platform off_w)) (OpReg reg)
647 _ -> panic "X86.x86_mkLoadInstr"
648 where is32Bit = target32Bit platform
649
650 spillSlotSize :: Bool -> Int
651 spillSlotSize is32Bit = if is32Bit then 12 else 8
652
653 maxSpillSlots :: Bool -> Int
654 maxSpillSlots is32Bit
655 = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize is32Bit) - 1
656
657 -- convert a spill slot number to a *byte* offset, with no sign:
658 -- decide on a per arch basis whether you are spilling above or below
659 -- the C stack pointer.
660 spillSlotToOffset :: Bool -> Int -> Int
661 spillSlotToOffset is32Bit slot
662 | slot >= 0 && slot < maxSpillSlots is32Bit
663 = 64 + spillSlotSize is32Bit * slot
664 | otherwise
665 = pprPanic "spillSlotToOffset:"
666 ( text "invalid spill location: " <> int slot
667 $$ text "maxSpillSlots: " <> int (maxSpillSlots is32Bit))
668
669 --------------------------------------------------------------------------------
670
671 -- | See if this instruction is telling us the current C stack delta
672 x86_takeDeltaInstr
673 :: Instr
674 -> Maybe Int
675
676 x86_takeDeltaInstr instr
677 = case instr of
678 DELTA i -> Just i
679 _ -> Nothing
680
681
682 x86_isMetaInstr
683 :: Instr
684 -> Bool
685
686 x86_isMetaInstr instr
687 = case instr of
688 COMMENT{} -> True
689 LDATA{} -> True
690 NEWBLOCK{} -> True
691 DELTA{} -> True
692 _ -> False
693
694
695
696 -- | Make a reg-reg move instruction.
697 -- On SPARC v8 there are no instructions to move directly between
698 -- floating point and integer regs. If we need to do that then we
699 -- have to go via memory.
700 --
701 x86_mkRegRegMoveInstr
702 :: Platform
703 -> Reg
704 -> Reg
705 -> Instr
706
707 x86_mkRegRegMoveInstr platform src dst
708 = case targetClassOfReg platform src of
709 #if i386_TARGET_ARCH
710 RcInteger -> MOV II32 (OpReg src) (OpReg dst)
711 #else
712 RcInteger -> MOV II64 (OpReg src) (OpReg dst)
713 #endif
714 RcDouble -> GMOV src dst
715 RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst)
716 _ -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
717
718 -- | Check whether an instruction represents a reg-reg move.
719 -- The register allocator attempts to eliminate reg->reg moves whenever it can,
720 -- by assigning the src and dest temporaries to the same real register.
721 --
722 x86_takeRegRegMoveInstr
723 :: Instr
724 -> Maybe (Reg,Reg)
725
726 x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
727 = Just (r1,r2)
728
729 x86_takeRegRegMoveInstr _ = Nothing
730
731
732 -- | Make an unconditional branch instruction.
733 x86_mkJumpInstr
734 :: BlockId
735 -> [Instr]
736
737 x86_mkJumpInstr id
738 = [JXX ALWAYS id]
739
740
741
742
743
744 i386_insert_ffrees
745 :: [GenBasicBlock Instr]
746 -> [GenBasicBlock Instr]
747
748 i386_insert_ffrees blocks
749 | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
750 = map ffree_before_nonlocal_transfers blocks
751
752 | otherwise
753 = blocks
754 where
755 ffree_before_nonlocal_transfers (BasicBlock id insns)
756 = BasicBlock id (foldr p [] insns)
757 where p insn r = case insn of
758 CALL _ _ -> GFREE : insn : r
759 JMP _ -> GFREE : insn : r
760 JXX_GBL _ _ -> panic "i386_insert_ffrees: cannot handle JXX_GBL"
761 _ -> insn : r
762
763 -- if you ever add a new FP insn to the fake x86 FP insn set,
764 -- you must update this too
765 is_G_instr :: Instr -> Bool
766 is_G_instr instr
767 = case instr of
768 GMOV{} -> True
769 GLD{} -> True
770 GST{} -> True
771 GLDZ{} -> True
772 GLD1{} -> True
773 GFTOI{} -> True
774 GDTOI{} -> True
775 GITOF{} -> True
776 GITOD{} -> True
777 GDTOF{} -> True
778 GADD{} -> True
779 GDIV{} -> True
780 GSUB{} -> True
781 GMUL{} -> True
782 GCMP{} -> True
783 GABS{} -> True
784 GNEG{} -> True
785 GSQRT{} -> True
786 GSIN{} -> True
787 GCOS{} -> True
788 GTAN{} -> True
789 GFREE -> panic "is_G_instr: GFREE (!)"
790 _ -> False
791
792
793 data JumpDest = DestBlockId BlockId | DestImm Imm
794
795 getJumpDestBlockId :: JumpDest -> Maybe BlockId
796 getJumpDestBlockId (DestBlockId bid) = Just bid
797 getJumpDestBlockId _ = Nothing
798
799 canShortcut :: Instr -> Maybe JumpDest
800 canShortcut (JXX ALWAYS id) = Just (DestBlockId id)
801 canShortcut (JMP (OpImm imm)) = Just (DestImm imm)
802 canShortcut _ = Nothing
803
804
805 -- This helper shortcuts a sequence of branches.
806 -- The blockset helps avoid following cycles.
807 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
808 shortcutJump fn insn = shortcutJump' fn (setEmpty :: BlockSet) insn
809 where shortcutJump' fn seen insn@(JXX cc id) =
810 if setMember id seen then insn
811 else case fn id of
812 Nothing -> insn
813 Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
814 Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm)
815 where seen' = setInsert id seen
816 shortcutJump' _ _ other = other
817
818 -- Here because it knows about JumpDest
819 shortcutStatics :: (BlockId -> Maybe JumpDest) -> (Alignment, CmmStatics) -> (Alignment, CmmStatics)
820 shortcutStatics fn (align, Statics lbl statics)
821 = (align, Statics lbl $ map (shortcutStatic fn) statics)
822 -- we need to get the jump tables, so apply the mapping to the entries
823 -- of a CmmData too.
824
825 shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
826 shortcutLabel fn lab
827 | Just uq <- maybeAsmTemp lab = shortBlockId fn emptyUniqSet (mkBlockId uq)
828 | otherwise = lab
829
830 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
831 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
832 = CmmStaticLit (CmmLabel (shortcutLabel fn lab))
833 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
834 = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off)
835 -- slightly dodgy, we're ignoring the second label, but this
836 -- works with the way we use CmmLabelDiffOff for jump tables now.
837 shortcutStatic _ other_static
838 = other_static
839
840 shortBlockId
841 :: (BlockId -> Maybe JumpDest)
842 -> UniqSet Unique
843 -> BlockId
844 -> CLabel
845
846 shortBlockId fn seen blockid =
847 case (elementOfUniqSet uq seen, fn blockid) of
848 (True, _) -> mkAsmTempLabel uq
849 (_, Nothing) -> mkAsmTempLabel uq
850 (_, Just (DestBlockId blockid')) -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
851 (_, Just (DestImm (ImmCLbl lbl))) -> lbl
852 (_, _other) -> panic "shortBlockId"
853 where uq = getUnique blockid