d7dfd295765b61e389cac0287cdbcddc216a1894
[ghc.git] / compiler / nativeGen / X86 / Instr.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Machine-dependent assembly language
4 --
5 -- (c) The University of Glasgow 1993-2004
6 --
7 -----------------------------------------------------------------------------
8
9 #include "HsVersions.h"
10 #include "nativeGen/NCG.h"
11
12 module X86.Instr
13 where
14
15 import X86.Cond
16 import X86.Regs
17 import Instruction
18 import Size
19 import RegClass
20 import Reg
21 import TargetReg
22
23 import BlockId
24 import OldCmm
25 import FastString
26 import FastBool
27 import Outputable
28 import Platform
29 import Constants (rESERVED_C_STACK_BYTES)
30
31 import BasicTypes (Alignment)
32 import CLabel
33 import UniqSet
34 import Unique
35
36 -- Size of an x86/x86_64 memory address, in bytes.
37 --
38 archWordSize :: Bool -> Size
39 archWordSize is32Bit
40 | is32Bit = II32
41 | otherwise = II64
42
43 -- | Instruction instance for x86 instruction set.
44 instance Instruction Instr where
45 regUsageOfInstr = x86_regUsageOfInstr
46 patchRegsOfInstr = x86_patchRegsOfInstr
47 isJumpishInstr = x86_isJumpishInstr
48 jumpDestsOfInstr = x86_jumpDestsOfInstr
49 patchJumpInstr = x86_patchJumpInstr
50 mkSpillInstr = x86_mkSpillInstr
51 mkLoadInstr = x86_mkLoadInstr
52 takeDeltaInstr = x86_takeDeltaInstr
53 isMetaInstr = x86_isMetaInstr
54 mkRegRegMoveInstr = x86_mkRegRegMoveInstr
55 takeRegRegMoveInstr = x86_takeRegRegMoveInstr
56 mkJumpInstr = x86_mkJumpInstr
57
58
59 -- -----------------------------------------------------------------------------
60 -- Intel x86 instructions
61
62 {-
63 Intel, in their infinite wisdom, selected a stack model for floating
64 point registers on x86. That might have made sense back in 1979 --
65 nowadays we can see it for the nonsense it really is. A stack model
66 fits poorly with the existing nativeGen infrastructure, which assumes
67 flat integer and FP register sets. Prior to this commit, nativeGen
68 could not generate correct x86 FP code -- to do so would have meant
69 somehow working the register-stack paradigm into the register
70 allocator and spiller, which sounds very difficult.
71
72 We have decided to cheat, and go for a simple fix which requires no
73 infrastructure modifications, at the expense of generating ropey but
74 correct FP code. All notions of the x86 FP stack and its insns have
75 been removed. Instead, we pretend (to the instruction selector and
76 register allocator) that x86 has six floating point registers, %fake0
77 .. %fake5, which can be used in the usual flat manner. We further
78 claim that x86 has floating point instructions very similar to SPARC
79 and Alpha, that is, a simple 3-operand register-register arrangement.
80 Code generation and register allocation proceed on this basis.
81
82 When we come to print out the final assembly, our convenient fiction
83 is converted to dismal reality. Each fake instruction is
84 independently converted to a series of real x86 instructions.
85 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
86 arithmetic operations, the two operands are pushed onto the top of the
87 FP stack, the operation done, and the result copied back into the
88 relevant register. There are only six %fake registers because 2 are
89 needed for the translation, and x86 has 8 in total.
90
91 The translation is inefficient but is simple and it works. A cleverer
92 translation would handle a sequence of insns, simulating the FP stack
93 contents, would not impose a fixed mapping from %fake to %st regs, and
94 hopefully could avoid most of the redundant reg-reg moves of the
95 current translation.
96
97 We might as well make use of whatever unique FP facilities Intel have
98 chosen to bless us with (let's not be churlish, after all).
99 Hence GLDZ and GLD1. Bwahahahahahahaha!
100 -}
101
102 {-
103 Note [x86 Floating point precision]
104
105 Intel's internal floating point registers are by default 80 bit
106 extended precision. This means that all operations done on values in
107 registers are done at 80 bits, and unless the intermediate values are
108 truncated to the appropriate size (32 or 64 bits) by storing in
109 memory, calculations in registers will give different results from
110 calculations which pass intermediate values in memory (eg. via
111 function calls).
112
113 One solution is to set the FPU into 64 bit precision mode. Some OSs
114 do this (eg. FreeBSD) and some don't (eg. Linux). The problem here is
115 that this will only affect 64-bit precision arithmetic; 32-bit
116 calculations will still be done at 64-bit precision in registers. So
117 it doesn't solve the whole problem.
118
119 There's also the issue of what the C library is expecting in terms of
120 precision. It seems to be the case that glibc on Linux expects the
121 FPU to be set to 80 bit precision, so setting it to 64 bit could have
122 unexpected effects. Changing the default could have undesirable
123 effects on other 3rd-party library code too, so the right thing would
124 be to save/restore the FPU control word across Haskell code if we were
125 to do this.
126
127 gcc's -ffloat-store gives consistent results by always storing the
128 results of floating-point calculations in memory, which works for both
129 32 and 64-bit precision. However, it only affects the values of
130 user-declared floating point variables in C, not intermediate results.
131 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
132 flag).
133
134 Another problem is how to spill floating point registers in the
135 register allocator. Should we spill the whole 80 bits, or just 64?
136 On an OS which is set to 64 bit precision, spilling 64 is fine. On
137 Linux, spilling 64 bits will round the results of some operations.
138 This is what gcc does. Spilling at 80 bits requires taking up a full
139 128 bit slot (so we get alignment). We spill at 80-bits and ignore
140 the alignment problems.
141
142 In the future [edit: now available in GHC 7.0.1, with the -msse2
143 flag], we'll use the SSE registers for floating point. This requires
144 a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
145 float ops), which means P4 or Xeon and above. Using SSE will solve
146 all these problems, because the SSE registers use fixed 32 bit or 64
147 bit precision.
148
149 --SDM 1/2003
150 -}
151
152 data Instr
153 -- comment pseudo-op
154 = COMMENT FastString
155
156 -- some static data spat out during code
157 -- generation. Will be extracted before
158 -- pretty-printing.
159 | LDATA Section (Alignment, CmmStatics)
160
161 -- start a new basic block. Useful during
162 -- codegen, removed later. Preceding
163 -- instruction should be a jump, as per the
164 -- invariants for a BasicBlock (see Cmm).
165 | NEWBLOCK BlockId
166
167 -- specify current stack offset for
168 -- benefit of subsequent passes
169 | DELTA Int
170
171 -- Moves.
172 | MOV Size Operand Operand
173 | MOVZxL Size Operand Operand -- size is the size of operand 1
174 | MOVSxL Size Operand Operand -- size is the size of operand 1
175 -- x86_64 note: plain mov into a 32-bit register always zero-extends
176 -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
177 -- don't affect the high bits of the register.
178
179 -- Load effective address (also a very useful three-operand add instruction :-)
180 | LEA Size Operand Operand
181
182 -- Int Arithmetic.
183 | ADD Size Operand Operand
184 | ADC Size Operand Operand
185 | SUB Size Operand Operand
186
187 | MUL Size Operand Operand
188 | IMUL Size Operand Operand -- signed int mul
189 | IMUL2 Size Operand -- %edx:%eax = operand * %eax
190
191 | DIV Size Operand -- eax := eax:edx/op, edx := eax:edx%op
192 | IDIV Size Operand -- ditto, but signed
193
194 -- Simple bit-twiddling.
195 | AND Size Operand Operand
196 | OR Size Operand Operand
197 | XOR Size Operand Operand
198 | NOT Size Operand
199 | NEGI Size Operand -- NEG instruction (name clash with Cond)
200
201 -- Shifts (amount may be immediate or %cl only)
202 | SHL Size Operand{-amount-} Operand
203 | SAR Size Operand{-amount-} Operand
204 | SHR Size Operand{-amount-} Operand
205
206 | BT Size Imm Operand
207 | NOP
208
209 -- x86 Float Arithmetic.
210 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
211 -- as single instructions right up until we spit them out.
212 -- all the 3-operand fake fp insns are src1 src2 dst
213 -- and furthermore are constrained to be fp regs only.
214 -- IMPORTANT: keep is_G_insn up to date with any changes here
215 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
216 | GLD Size AddrMode Reg -- src, dst(fpreg)
217 | GST Size Reg AddrMode -- src(fpreg), dst
218
219 | GLDZ Reg -- dst(fpreg)
220 | GLD1 Reg -- dst(fpreg)
221
222 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
223 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
224
225 | GITOF Reg Reg -- src(intreg), dst(fpreg)
226 | GITOD Reg Reg -- src(intreg), dst(fpreg)
227
228 | GDTOF Reg Reg -- src(fpreg), dst(fpreg)
229
230 | GADD Size Reg Reg Reg -- src1, src2, dst
231 | GDIV Size Reg Reg Reg -- src1, src2, dst
232 | GSUB Size Reg Reg Reg -- src1, src2, dst
233 | GMUL Size Reg Reg Reg -- src1, src2, dst
234
235 -- FP compare. Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
236 -- Compare src1 with src2; set the Zero flag iff the numbers are
237 -- comparable and the comparison is True. Subsequent code must
238 -- test the %eflags zero flag regardless of the supplied Cond.
239 | GCMP Cond Reg Reg -- src1, src2
240
241 | GABS Size Reg Reg -- src, dst
242 | GNEG Size Reg Reg -- src, dst
243 | GSQRT Size Reg Reg -- src, dst
244 | GSIN Size CLabel CLabel Reg Reg -- src, dst
245 | GCOS Size CLabel CLabel Reg Reg -- src, dst
246 | GTAN Size CLabel CLabel Reg Reg -- src, dst
247
248 | GFREE -- do ffree on all x86 regs; an ugly hack
249
250
251 -- SSE2 floating point: we use a restricted set of the available SSE2
252 -- instructions for floating-point.
253 -- use MOV for moving (either movss or movsd (movlpd better?))
254 | CVTSS2SD Reg Reg -- F32 to F64
255 | CVTSD2SS Reg Reg -- F64 to F32
256 | CVTTSS2SIQ Size Operand Reg -- F32 to I32/I64 (with truncation)
257 | CVTTSD2SIQ Size Operand Reg -- F64 to I32/I64 (with truncation)
258 | CVTSI2SS Size Operand Reg -- I32/I64 to F32
259 | CVTSI2SD Size Operand Reg -- I32/I64 to F64
260
261 -- use ADD & SUB for arithmetic. In both cases, operands
262 -- are Operand Reg.
263
264 -- SSE2 floating-point division:
265 | FDIV Size Operand Operand -- divisor, dividend(dst)
266
267 -- use CMP for comparisons. ucomiss and ucomisd instructions
268 -- compare single/double prec floating point respectively.
269
270 | SQRT Size Operand Reg -- src, dst
271
272
273 -- Comparison
274 | TEST Size Operand Operand
275 | CMP Size Operand Operand
276 | SETCC Cond Operand
277
278 -- Stack Operations.
279 | PUSH Size Operand
280 | POP Size Operand
281 -- both unused (SDM):
282 -- | PUSHA
283 -- | POPA
284
285 -- Jumping around.
286 | JMP Operand
287 | JXX Cond BlockId -- includes unconditional branches
288 | JXX_GBL Cond Imm -- non-local version of JXX
289 -- Table jump
290 | JMP_TBL Operand -- Address to jump to
291 [Maybe BlockId] -- Blocks in the jump table
292 Section -- Data section jump table should be put in
293 CLabel -- Label of jump table
294 | CALL (Either Imm Reg) [Reg]
295
296 -- Other things.
297 | CLTD Size -- sign extend %eax into %edx:%eax
298
299 | FETCHGOT Reg -- pseudo-insn for ELF position-independent code
300 -- pretty-prints as
301 -- call 1f
302 -- 1: popl %reg
303 -- addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
304 | FETCHPC Reg -- pseudo-insn for Darwin position-independent code
305 -- pretty-prints as
306 -- call 1f
307 -- 1: popl %reg
308
309 -- SSE4.2
310 | POPCNT Size Operand Reg -- src, dst
311
312 data Operand
313 = OpReg Reg -- register
314 | OpImm Imm -- immediate value
315 | OpAddr AddrMode -- memory reference
316
317
318
319 x86_regUsageOfInstr :: Instr -> RegUsage
320 x86_regUsageOfInstr instr
321 = case instr of
322 MOV _ src dst -> usageRW src dst
323 MOVZxL _ src dst -> usageRW src dst
324 MOVSxL _ src dst -> usageRW src dst
325 LEA _ src dst -> usageRW src dst
326 ADD _ src dst -> usageRM src dst
327 ADC _ src dst -> usageRM src dst
328 SUB _ src dst -> usageRM src dst
329 IMUL _ src dst -> usageRM src dst
330 IMUL2 _ src -> mkRU (eax:use_R src) [eax,edx]
331 MUL _ src dst -> usageRM src dst
332 DIV _ op -> mkRU (eax:edx:use_R op) [eax,edx]
333 IDIV _ op -> mkRU (eax:edx:use_R op) [eax,edx]
334 AND _ src dst -> usageRM src dst
335 OR _ src dst -> usageRM src dst
336
337 XOR _ (OpReg src) (OpReg dst)
338 | src == dst -> mkRU [] [dst]
339
340 XOR _ src dst -> usageRM src dst
341 NOT _ op -> usageM op
342 NEGI _ op -> usageM op
343 SHL _ imm dst -> usageRM imm dst
344 SAR _ imm dst -> usageRM imm dst
345 SHR _ imm dst -> usageRM imm dst
346 BT _ _ src -> mkRUR (use_R src)
347
348 PUSH _ op -> mkRUR (use_R op)
349 POP _ op -> mkRU [] (def_W op)
350 TEST _ src dst -> mkRUR (use_R src ++ use_R dst)
351 CMP _ src dst -> mkRUR (use_R src ++ use_R dst)
352 SETCC _ op -> mkRU [] (def_W op)
353 JXX _ _ -> mkRU [] []
354 JXX_GBL _ _ -> mkRU [] []
355 JMP op -> mkRUR (use_R op)
356 JMP_TBL op _ _ _ -> mkRUR (use_R op)
357 CALL (Left _) params -> mkRU params callClobberedRegs
358 CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
359 CLTD _ -> mkRU [eax] [edx]
360 NOP -> mkRU [] []
361
362 GMOV src dst -> mkRU [src] [dst]
363 GLD _ src dst -> mkRU (use_EA src) [dst]
364 GST _ src dst -> mkRUR (src : use_EA dst)
365
366 GLDZ dst -> mkRU [] [dst]
367 GLD1 dst -> mkRU [] [dst]
368
369 GFTOI src dst -> mkRU [src] [dst]
370 GDTOI src dst -> mkRU [src] [dst]
371
372 GITOF src dst -> mkRU [src] [dst]
373 GITOD src dst -> mkRU [src] [dst]
374
375 GDTOF src dst -> mkRU [src] [dst]
376
377 GADD _ s1 s2 dst -> mkRU [s1,s2] [dst]
378 GSUB _ s1 s2 dst -> mkRU [s1,s2] [dst]
379 GMUL _ s1 s2 dst -> mkRU [s1,s2] [dst]
380 GDIV _ s1 s2 dst -> mkRU [s1,s2] [dst]
381
382 GCMP _ src1 src2 -> mkRUR [src1,src2]
383 GABS _ src dst -> mkRU [src] [dst]
384 GNEG _ src dst -> mkRU [src] [dst]
385 GSQRT _ src dst -> mkRU [src] [dst]
386 GSIN _ _ _ src dst -> mkRU [src] [dst]
387 GCOS _ _ _ src dst -> mkRU [src] [dst]
388 GTAN _ _ _ src dst -> mkRU [src] [dst]
389
390 CVTSS2SD src dst -> mkRU [src] [dst]
391 CVTSD2SS src dst -> mkRU [src] [dst]
392 CVTTSS2SIQ _ src dst -> mkRU (use_R src) [dst]
393 CVTTSD2SIQ _ src dst -> mkRU (use_R src) [dst]
394 CVTSI2SS _ src dst -> mkRU (use_R src) [dst]
395 CVTSI2SD _ src dst -> mkRU (use_R src) [dst]
396 FDIV _ src dst -> usageRM src dst
397
398 FETCHGOT reg -> mkRU [] [reg]
399 FETCHPC reg -> mkRU [] [reg]
400
401 COMMENT _ -> noUsage
402 DELTA _ -> noUsage
403
404 POPCNT _ src dst -> mkRU (use_R src) [dst]
405
406 _other -> panic "regUsage: unrecognised instr"
407
408 where
409 -- 2 operand form; first operand Read; second Written
410 usageRW :: Operand -> Operand -> RegUsage
411 usageRW op (OpReg reg) = mkRU (use_R op) [reg]
412 usageRW op (OpAddr ea) = mkRUR (use_R op ++ use_EA ea)
413 usageRW _ _ = panic "X86.RegInfo.usageRW: no match"
414
415 -- 2 operand form; first operand Read; second Modified
416 usageRM :: Operand -> Operand -> RegUsage
417 usageRM op (OpReg reg) = mkRU (use_R op ++ [reg]) [reg]
418 usageRM op (OpAddr ea) = mkRUR (use_R op ++ use_EA ea)
419 usageRM _ _ = panic "X86.RegInfo.usageRM: no match"
420
421 -- 1 operand form; operand Modified
422 usageM :: Operand -> RegUsage
423 usageM (OpReg reg) = mkRU [reg] [reg]
424 usageM (OpAddr ea) = mkRUR (use_EA ea)
425 usageM _ = panic "X86.RegInfo.usageM: no match"
426
427 -- Registers defd when an operand is written.
428 def_W (OpReg reg) = [reg]
429 def_W (OpAddr _ ) = []
430 def_W _ = panic "X86.RegInfo.def_W: no match"
431
432 -- Registers used when an operand is read.
433 use_R (OpReg reg) = [reg]
434 use_R (OpImm _) = []
435 use_R (OpAddr ea) = use_EA ea
436
437 -- Registers used to compute an effective address.
438 use_EA (ImmAddr _ _) = []
439 use_EA (AddrBaseIndex base index _) =
440 use_base base $! use_index index
441 where use_base (EABaseReg r) x = r : x
442 use_base _ x = x
443 use_index EAIndexNone = []
444 use_index (EAIndex i _) = [i]
445
446 mkRUR src = src' `seq` RU src' []
447 where src' = filter interesting src
448
449 mkRU src dst = src' `seq` dst' `seq` RU src' dst'
450 where src' = filter interesting src
451 dst' = filter interesting dst
452
453 interesting :: Reg -> Bool
454 interesting (RegVirtual _) = True
455 interesting (RegReal (RealRegSingle i)) = isFastTrue (freeReg i)
456 interesting (RegReal (RealRegPair{})) = panic "X86.interesting: no reg pairs on this arch"
457
458
459
460 x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
461 x86_patchRegsOfInstr instr env
462 = case instr of
463 MOV sz src dst -> patch2 (MOV sz) src dst
464 MOVZxL sz src dst -> patch2 (MOVZxL sz) src dst
465 MOVSxL sz src dst -> patch2 (MOVSxL sz) src dst
466 LEA sz src dst -> patch2 (LEA sz) src dst
467 ADD sz src dst -> patch2 (ADD sz) src dst
468 ADC sz src dst -> patch2 (ADC sz) src dst
469 SUB sz src dst -> patch2 (SUB sz) src dst
470 IMUL sz src dst -> patch2 (IMUL sz) src dst
471 IMUL2 sz src -> patch1 (IMUL2 sz) src
472 MUL sz src dst -> patch2 (MUL sz) src dst
473 IDIV sz op -> patch1 (IDIV sz) op
474 DIV sz op -> patch1 (DIV sz) op
475 AND sz src dst -> patch2 (AND sz) src dst
476 OR sz src dst -> patch2 (OR sz) src dst
477 XOR sz src dst -> patch2 (XOR sz) src dst
478 NOT sz op -> patch1 (NOT sz) op
479 NEGI sz op -> patch1 (NEGI sz) op
480 SHL sz imm dst -> patch1 (SHL sz imm) dst
481 SAR sz imm dst -> patch1 (SAR sz imm) dst
482 SHR sz imm dst -> patch1 (SHR sz imm) dst
483 BT sz imm src -> patch1 (BT sz imm) src
484 TEST sz src dst -> patch2 (TEST sz) src dst
485 CMP sz src dst -> patch2 (CMP sz) src dst
486 PUSH sz op -> patch1 (PUSH sz) op
487 POP sz op -> patch1 (POP sz) op
488 SETCC cond op -> patch1 (SETCC cond) op
489 JMP op -> patch1 JMP op
490 JMP_TBL op ids s lbl-> JMP_TBL (patchOp op) ids s lbl
491
492 GMOV src dst -> GMOV (env src) (env dst)
493 GLD sz src dst -> GLD sz (lookupAddr src) (env dst)
494 GST sz src dst -> GST sz (env src) (lookupAddr dst)
495
496 GLDZ dst -> GLDZ (env dst)
497 GLD1 dst -> GLD1 (env dst)
498
499 GFTOI src dst -> GFTOI (env src) (env dst)
500 GDTOI src dst -> GDTOI (env src) (env dst)
501
502 GITOF src dst -> GITOF (env src) (env dst)
503 GITOD src dst -> GITOD (env src) (env dst)
504
505 GDTOF src dst -> GDTOF (env src) (env dst)
506
507 GADD sz s1 s2 dst -> GADD sz (env s1) (env s2) (env dst)
508 GSUB sz s1 s2 dst -> GSUB sz (env s1) (env s2) (env dst)
509 GMUL sz s1 s2 dst -> GMUL sz (env s1) (env s2) (env dst)
510 GDIV sz s1 s2 dst -> GDIV sz (env s1) (env s2) (env dst)
511
512 GCMP sz src1 src2 -> GCMP sz (env src1) (env src2)
513 GABS sz src dst -> GABS sz (env src) (env dst)
514 GNEG sz src dst -> GNEG sz (env src) (env dst)
515 GSQRT sz src dst -> GSQRT sz (env src) (env dst)
516 GSIN sz l1 l2 src dst -> GSIN sz l1 l2 (env src) (env dst)
517 GCOS sz l1 l2 src dst -> GCOS sz l1 l2 (env src) (env dst)
518 GTAN sz l1 l2 src dst -> GTAN sz l1 l2 (env src) (env dst)
519
520 CVTSS2SD src dst -> CVTSS2SD (env src) (env dst)
521 CVTSD2SS src dst -> CVTSD2SS (env src) (env dst)
522 CVTTSS2SIQ sz src dst -> CVTTSS2SIQ sz (patchOp src) (env dst)
523 CVTTSD2SIQ sz src dst -> CVTTSD2SIQ sz (patchOp src) (env dst)
524 CVTSI2SS sz src dst -> CVTSI2SS sz (patchOp src) (env dst)
525 CVTSI2SD sz src dst -> CVTSI2SD sz (patchOp src) (env dst)
526 FDIV sz src dst -> FDIV sz (patchOp src) (patchOp dst)
527
528 CALL (Left _) _ -> instr
529 CALL (Right reg) p -> CALL (Right (env reg)) p
530
531 FETCHGOT reg -> FETCHGOT (env reg)
532 FETCHPC reg -> FETCHPC (env reg)
533
534 NOP -> instr
535 COMMENT _ -> instr
536 DELTA _ -> instr
537
538 JXX _ _ -> instr
539 JXX_GBL _ _ -> instr
540 CLTD _ -> instr
541
542 POPCNT sz src dst -> POPCNT sz (patchOp src) (env dst)
543
544 _other -> panic "patchRegs: unrecognised instr"
545
546 where
547 patch1 :: (Operand -> a) -> Operand -> a
548 patch1 insn op = insn $! patchOp op
549 patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
550 patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
551
552 patchOp (OpReg reg) = OpReg $! env reg
553 patchOp (OpImm imm) = OpImm imm
554 patchOp (OpAddr ea) = OpAddr $! lookupAddr ea
555
556 lookupAddr (ImmAddr imm off) = ImmAddr imm off
557 lookupAddr (AddrBaseIndex base index disp)
558 = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
559 where
560 lookupBase EABaseNone = EABaseNone
561 lookupBase EABaseRip = EABaseRip
562 lookupBase (EABaseReg r) = EABaseReg (env r)
563
564 lookupIndex EAIndexNone = EAIndexNone
565 lookupIndex (EAIndex r i) = EAIndex (env r) i
566
567
568 --------------------------------------------------------------------------------
569 x86_isJumpishInstr
570 :: Instr -> Bool
571
572 x86_isJumpishInstr instr
573 = case instr of
574 JMP{} -> True
575 JXX{} -> True
576 JXX_GBL{} -> True
577 JMP_TBL{} -> True
578 CALL{} -> True
579 _ -> False
580
581
582 x86_jumpDestsOfInstr
583 :: Instr
584 -> [BlockId]
585
586 x86_jumpDestsOfInstr insn
587 = case insn of
588 JXX _ id -> [id]
589 JMP_TBL _ ids _ _ -> [id | Just id <- ids]
590 _ -> []
591
592
593 x86_patchJumpInstr
594 :: Instr -> (BlockId -> BlockId) -> Instr
595
596 x86_patchJumpInstr insn patchF
597 = case insn of
598 JXX cc id -> JXX cc (patchF id)
599 JMP_TBL op ids section lbl
600 -> JMP_TBL op (map (fmap patchF) ids) section lbl
601 _ -> insn
602
603
604
605
606 -- -----------------------------------------------------------------------------
607 -- | Make a spill instruction.
608 x86_mkSpillInstr
609 :: Platform
610 -> Reg -- register to spill
611 -> Int -- current stack delta
612 -> Int -- spill slot to use
613 -> Instr
614
615 x86_mkSpillInstr platform reg delta slot
616 = let off = spillSlotToOffset slot
617 in
618 let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
619 in case targetClassOfReg platform reg of
620 RcInteger -> MOV (archWordSize (target32Bit platform))
621 (OpReg reg) (OpAddr (spRel platform off_w))
622 RcDouble -> GST FF80 reg (spRel platform off_w) {- RcFloat/RcDouble -}
623 RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel platform off_w))
624 _ -> panic "X86.mkSpillInstr: no match"
625
626
627 -- | Make a spill reload instruction.
628 x86_mkLoadInstr
629 :: Platform
630 -> Reg -- register to load
631 -> Int -- current stack delta
632 -> Int -- spill slot to use
633 -> Instr
634
635 x86_mkLoadInstr platform reg delta slot
636 = let off = spillSlotToOffset slot
637 in
638 let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
639 in case targetClassOfReg platform reg of
640 RcInteger -> MOV (archWordSize (target32Bit platform))
641 (OpAddr (spRel platform off_w)) (OpReg reg)
642 RcDouble -> GLD FF80 (spRel platform off_w) reg {- RcFloat/RcDouble -}
643 RcDoubleSSE -> MOV FF64 (OpAddr (spRel platform off_w)) (OpReg reg)
644 _ -> panic "X86.x86_mkLoadInstr"
645
646 spillSlotSize :: Int
647 spillSlotSize = IF_ARCH_i386(12, 8)
648
649 maxSpillSlots :: Int
650 maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
651
652 -- convert a spill slot number to a *byte* offset, with no sign:
653 -- decide on a per arch basis whether you are spilling above or below
654 -- the C stack pointer.
655 spillSlotToOffset :: Int -> Int
656 spillSlotToOffset slot
657 | slot >= 0 && slot < maxSpillSlots
658 = 64 + spillSlotSize * slot
659 | otherwise
660 = pprPanic "spillSlotToOffset:"
661 ( text "invalid spill location: " <> int slot
662 $$ text "maxSpillSlots: " <> int maxSpillSlots)
663
664 --------------------------------------------------------------------------------
665
666 -- | See if this instruction is telling us the current C stack delta
667 x86_takeDeltaInstr
668 :: Instr
669 -> Maybe Int
670
671 x86_takeDeltaInstr instr
672 = case instr of
673 DELTA i -> Just i
674 _ -> Nothing
675
676
677 x86_isMetaInstr
678 :: Instr
679 -> Bool
680
681 x86_isMetaInstr instr
682 = case instr of
683 COMMENT{} -> True
684 LDATA{} -> True
685 NEWBLOCK{} -> True
686 DELTA{} -> True
687 _ -> False
688
689
690
691 -- | Make a reg-reg move instruction.
692 -- On SPARC v8 there are no instructions to move directly between
693 -- floating point and integer regs. If we need to do that then we
694 -- have to go via memory.
695 --
696 x86_mkRegRegMoveInstr
697 :: Platform
698 -> Reg
699 -> Reg
700 -> Instr
701
702 x86_mkRegRegMoveInstr platform src dst
703 = case targetClassOfReg platform src of
704 #if i386_TARGET_ARCH
705 RcInteger -> MOV II32 (OpReg src) (OpReg dst)
706 #else
707 RcInteger -> MOV II64 (OpReg src) (OpReg dst)
708 #endif
709 RcDouble -> GMOV src dst
710 RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst)
711 _ -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
712
713 -- | Check whether an instruction represents a reg-reg move.
714 -- The register allocator attempts to eliminate reg->reg moves whenever it can,
715 -- by assigning the src and dest temporaries to the same real register.
716 --
717 x86_takeRegRegMoveInstr
718 :: Instr
719 -> Maybe (Reg,Reg)
720
721 x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
722 = Just (r1,r2)
723
724 x86_takeRegRegMoveInstr _ = Nothing
725
726
727 -- | Make an unconditional branch instruction.
728 x86_mkJumpInstr
729 :: BlockId
730 -> [Instr]
731
732 x86_mkJumpInstr id
733 = [JXX ALWAYS id]
734
735
736
737
738
739 i386_insert_ffrees
740 :: [GenBasicBlock Instr]
741 -> [GenBasicBlock Instr]
742
743 i386_insert_ffrees blocks
744 | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
745 = map ffree_before_nonlocal_transfers blocks
746
747 | otherwise
748 = blocks
749 where
750 ffree_before_nonlocal_transfers (BasicBlock id insns)
751 = BasicBlock id (foldr p [] insns)
752 where p insn r = case insn of
753 CALL _ _ -> GFREE : insn : r
754 JMP _ -> GFREE : insn : r
755 JXX_GBL _ _ -> panic "i386_insert_ffrees: cannot handle JXX_GBL"
756 _ -> insn : r
757
758 -- if you ever add a new FP insn to the fake x86 FP insn set,
759 -- you must update this too
760 is_G_instr :: Instr -> Bool
761 is_G_instr instr
762 = case instr of
763 GMOV{} -> True
764 GLD{} -> True
765 GST{} -> True
766 GLDZ{} -> True
767 GLD1{} -> True
768 GFTOI{} -> True
769 GDTOI{} -> True
770 GITOF{} -> True
771 GITOD{} -> True
772 GDTOF{} -> True
773 GADD{} -> True
774 GDIV{} -> True
775 GSUB{} -> True
776 GMUL{} -> True
777 GCMP{} -> True
778 GABS{} -> True
779 GNEG{} -> True
780 GSQRT{} -> True
781 GSIN{} -> True
782 GCOS{} -> True
783 GTAN{} -> True
784 GFREE -> panic "is_G_instr: GFREE (!)"
785 _ -> False
786
787
788 data JumpDest = DestBlockId BlockId | DestImm Imm
789
790 getJumpDestBlockId :: JumpDest -> Maybe BlockId
791 getJumpDestBlockId (DestBlockId bid) = Just bid
792 getJumpDestBlockId _ = Nothing
793
794 canShortcut :: Instr -> Maybe JumpDest
795 canShortcut (JXX ALWAYS id) = Just (DestBlockId id)
796 canShortcut (JMP (OpImm imm)) = Just (DestImm imm)
797 canShortcut _ = Nothing
798
799
800 -- This helper shortcuts a sequence of branches.
801 -- The blockset helps avoid following cycles.
802 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
803 shortcutJump fn insn = shortcutJump' fn (setEmpty :: BlockSet) insn
804 where shortcutJump' fn seen insn@(JXX cc id) =
805 if setMember id seen then insn
806 else case fn id of
807 Nothing -> insn
808 Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
809 Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm)
810 where seen' = setInsert id seen
811 shortcutJump' _ _ other = other
812
813 -- Here because it knows about JumpDest
814 shortcutStatics :: (BlockId -> Maybe JumpDest) -> (Alignment, CmmStatics) -> (Alignment, CmmStatics)
815 shortcutStatics fn (align, Statics lbl statics)
816 = (align, Statics lbl $ map (shortcutStatic fn) statics)
817 -- we need to get the jump tables, so apply the mapping to the entries
818 -- of a CmmData too.
819
820 shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
821 shortcutLabel fn lab
822 | Just uq <- maybeAsmTemp lab = shortBlockId fn emptyUniqSet (mkBlockId uq)
823 | otherwise = lab
824
825 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
826 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
827 = CmmStaticLit (CmmLabel (shortcutLabel fn lab))
828 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
829 = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off)
830 -- slightly dodgy, we're ignoring the second label, but this
831 -- works with the way we use CmmLabelDiffOff for jump tables now.
832 shortcutStatic _ other_static
833 = other_static
834
835 shortBlockId
836 :: (BlockId -> Maybe JumpDest)
837 -> UniqSet Unique
838 -> BlockId
839 -> CLabel
840
841 shortBlockId fn seen blockid =
842 case (elementOfUniqSet uq seen, fn blockid) of
843 (True, _) -> mkAsmTempLabel uq
844 (_, Nothing) -> mkAsmTempLabel uq
845 (_, Just (DestBlockId blockid')) -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
846 (_, Just (DestImm (ImmCLbl lbl))) -> lbl
847 (_, _other) -> panic "shortBlockId"
848 where uq = getUnique blockid