5e790e481e3cfbd6c79b9a400e94ebce8de15424
[ghc.git] / compiler / nativeGen / X86 / Instr.hs
1 {-# LANGUAGE CPP, TypeFamilies #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Machine-dependent assembly language
6 --
7 -- (c) The University of Glasgow 1993-2004
8 --
9 -----------------------------------------------------------------------------
10
11 module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest(..),
12 getJumpDestBlockId, canShortcut, shortcutStatics,
13 shortcutJump, i386_insert_ffrees, allocMoreStack,
14 maxSpillSlots, archWordFormat )
15 where
16
17 #include "HsVersions.h"
18 #include "nativeGen/NCG.h"
19
20 import GhcPrelude
21
22 import X86.Cond
23 import X86.Regs
24 import Instruction
25 import Format
26 import RegClass
27 import Reg
28 import TargetReg
29
30 import BlockId
31 import Hoopl.Collections
32 import Hoopl.Label
33 import CodeGen.Platform
34 import Cmm
35 import FastString
36 import Outputable
37 import Platform
38
39 import BasicTypes (Alignment)
40 import CLabel
41 import DynFlags
42 import UniqSet
43 import Unique
44 import UniqSupply
45 import Debug (UnwindTable)
46
47 import Control.Monad
48 import Data.Maybe (fromMaybe)
49
50 -- Format of an x86/x86_64 memory address, in bytes.
51 --
52 archWordFormat :: Bool -> Format
53 archWordFormat is32Bit
54 | is32Bit = II32
55 | otherwise = II64
56
57 -- | Instruction instance for x86 instruction set.
58 instance Instruction Instr where
59 regUsageOfInstr = x86_regUsageOfInstr
60 patchRegsOfInstr = x86_patchRegsOfInstr
61 isJumpishInstr = x86_isJumpishInstr
62 jumpDestsOfInstr = x86_jumpDestsOfInstr
63 patchJumpInstr = x86_patchJumpInstr
64 mkSpillInstr = x86_mkSpillInstr
65 mkLoadInstr = x86_mkLoadInstr
66 takeDeltaInstr = x86_takeDeltaInstr
67 isMetaInstr = x86_isMetaInstr
68 mkRegRegMoveInstr = x86_mkRegRegMoveInstr
69 takeRegRegMoveInstr = x86_takeRegRegMoveInstr
70 mkJumpInstr = x86_mkJumpInstr
71 mkStackAllocInstr = x86_mkStackAllocInstr
72 mkStackDeallocInstr = x86_mkStackDeallocInstr
73
74
75 -- -----------------------------------------------------------------------------
76 -- Intel x86 instructions
77
78 {-
79 Intel, in their infinite wisdom, selected a stack model for floating
80 point registers on x86. That might have made sense back in 1979 --
81 nowadays we can see it for the nonsense it really is. A stack model
82 fits poorly with the existing nativeGen infrastructure, which assumes
83 flat integer and FP register sets. Prior to this commit, nativeGen
84 could not generate correct x86 FP code -- to do so would have meant
85 somehow working the register-stack paradigm into the register
86 allocator and spiller, which sounds very difficult.
87
88 We have decided to cheat, and go for a simple fix which requires no
89 infrastructure modifications, at the expense of generating ropey but
90 correct FP code. All notions of the x86 FP stack and its insns have
91 been removed. Instead, we pretend (to the instruction selector and
92 register allocator) that x86 has six floating point registers, %fake0
93 .. %fake5, which can be used in the usual flat manner. We further
94 claim that x86 has floating point instructions very similar to SPARC
95 and Alpha, that is, a simple 3-operand register-register arrangement.
96 Code generation and register allocation proceed on this basis.
97
98 When we come to print out the final assembly, our convenient fiction
99 is converted to dismal reality. Each fake instruction is
100 independently converted to a series of real x86 instructions.
101 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
102 arithmetic operations, the two operands are pushed onto the top of the
103 FP stack, the operation done, and the result copied back into the
104 relevant register. There are only six %fake registers because 2 are
105 needed for the translation, and x86 has 8 in total.
106
107 The translation is inefficient but is simple and it works. A cleverer
108 translation would handle a sequence of insns, simulating the FP stack
109 contents, would not impose a fixed mapping from %fake to %st regs, and
110 hopefully could avoid most of the redundant reg-reg moves of the
111 current translation.
112
113 We might as well make use of whatever unique FP facilities Intel have
114 chosen to bless us with (let's not be churlish, after all).
115 Hence GLDZ and GLD1. Bwahahahahahahaha!
116 -}
117
118 {-
119 Note [x86 Floating point precision]
120
121 Intel's internal floating point registers are by default 80 bit
122 extended precision. This means that all operations done on values in
123 registers are done at 80 bits, and unless the intermediate values are
124 truncated to the appropriate size (32 or 64 bits) by storing in
125 memory, calculations in registers will give different results from
126 calculations which pass intermediate values in memory (eg. via
127 function calls).
128
129 One solution is to set the FPU into 64 bit precision mode. Some OSs
130 do this (eg. FreeBSD) and some don't (eg. Linux). The problem here is
131 that this will only affect 64-bit precision arithmetic; 32-bit
132 calculations will still be done at 64-bit precision in registers. So
133 it doesn't solve the whole problem.
134
135 There's also the issue of what the C library is expecting in terms of
136 precision. It seems to be the case that glibc on Linux expects the
137 FPU to be set to 80 bit precision, so setting it to 64 bit could have
138 unexpected effects. Changing the default could have undesirable
139 effects on other 3rd-party library code too, so the right thing would
140 be to save/restore the FPU control word across Haskell code if we were
141 to do this.
142
143 gcc's -ffloat-store gives consistent results by always storing the
144 results of floating-point calculations in memory, which works for both
145 32 and 64-bit precision. However, it only affects the values of
146 user-declared floating point variables in C, not intermediate results.
147 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
148 flag).
149
150 Another problem is how to spill floating point registers in the
151 register allocator. Should we spill the whole 80 bits, or just 64?
152 On an OS which is set to 64 bit precision, spilling 64 is fine. On
153 Linux, spilling 64 bits will round the results of some operations.
154 This is what gcc does. Spilling at 80 bits requires taking up a full
155 128 bit slot (so we get alignment). We spill at 80-bits and ignore
156 the alignment problems.
157
158 In the future [edit: now available in GHC 7.0.1, with the -msse2
159 flag], we'll use the SSE registers for floating point. This requires
160 a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
161 float ops), which means P4 or Xeon and above. Using SSE will solve
162 all these problems, because the SSE registers use fixed 32 bit or 64
163 bit precision.
164
165 --SDM 1/2003
166 -}
167
168 data Instr
169 -- comment pseudo-op
170 = COMMENT FastString
171
172 -- location pseudo-op (file, line, col, name)
173 | LOCATION Int Int Int String
174
175 -- some static data spat out during code
176 -- generation. Will be extracted before
177 -- pretty-printing.
178 | LDATA Section (Alignment, CmmStatics)
179
180 -- start a new basic block. Useful during
181 -- codegen, removed later. Preceding
182 -- instruction should be a jump, as per the
183 -- invariants for a BasicBlock (see Cmm).
184 | NEWBLOCK BlockId
185
186 -- unwinding information
187 -- See Note [Unwinding information in the NCG].
188 | UNWIND CLabel UnwindTable
189
190 -- specify current stack offset for benefit of subsequent passes.
191 -- This carries a BlockId so it can be used in unwinding information.
192 | DELTA Int
193
194 -- Moves.
195 | MOV Format Operand Operand
196 | CMOV Cond Format Operand Reg
197 | MOVZxL Format Operand Operand -- format is the size of operand 1
198 | MOVSxL Format Operand Operand -- format is the size of operand 1
199 -- x86_64 note: plain mov into a 32-bit register always zero-extends
200 -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
201 -- don't affect the high bits of the register.
202
203 -- Load effective address (also a very useful three-operand add instruction :-)
204 | LEA Format Operand Operand
205
206 -- Int Arithmetic.
207 | ADD Format Operand Operand
208 | ADC Format Operand Operand
209 | SUB Format Operand Operand
210 | SBB Format Operand Operand
211
212 | MUL Format Operand Operand
213 | MUL2 Format Operand -- %edx:%eax = operand * %rax
214 | IMUL Format Operand Operand -- signed int mul
215 | IMUL2 Format Operand -- %edx:%eax = operand * %eax
216
217 | DIV Format Operand -- eax := eax:edx/op, edx := eax:edx%op
218 | IDIV Format Operand -- ditto, but signed
219
220 -- Int Arithmetic, where the effects on the condition register
221 -- are important. Used in specialized sequences such as MO_Add2.
222 -- Do not rewrite these instructions to "equivalent" ones that
223 -- have different effect on the condition register! (See #9013.)
224 | ADD_CC Format Operand Operand
225 | SUB_CC Format Operand Operand
226
227 -- Simple bit-twiddling.
228 | AND Format Operand Operand
229 | OR Format Operand Operand
230 | XOR Format Operand Operand
231 | NOT Format Operand
232 | NEGI Format Operand -- NEG instruction (name clash with Cond)
233 | BSWAP Format Reg
234
235 -- Shifts (amount may be immediate or %cl only)
236 | SHL Format Operand{-amount-} Operand
237 | SAR Format Operand{-amount-} Operand
238 | SHR Format Operand{-amount-} Operand
239
240 | BT Format Imm Operand
241 | NOP
242
243 -- x86 Float Arithmetic.
244 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
245 -- as single instructions right up until we spit them out.
246 -- all the 3-operand fake fp insns are src1 src2 dst
247 -- and furthermore are constrained to be fp regs only.
248 -- IMPORTANT: keep is_G_insn up to date with any changes here
249 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
250 | GLD Format AddrMode Reg -- src, dst(fpreg)
251 | GST Format Reg AddrMode -- src(fpreg), dst
252
253 | GLDZ Reg -- dst(fpreg)
254 | GLD1 Reg -- dst(fpreg)
255
256 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
257 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
258
259 | GITOF Reg Reg -- src(intreg), dst(fpreg)
260 | GITOD Reg Reg -- src(intreg), dst(fpreg)
261
262 | GDTOF Reg Reg -- src(fpreg), dst(fpreg)
263
264 | GADD Format Reg Reg Reg -- src1, src2, dst
265 | GDIV Format Reg Reg Reg -- src1, src2, dst
266 | GSUB Format Reg Reg Reg -- src1, src2, dst
267 | GMUL Format Reg Reg Reg -- src1, src2, dst
268
269 -- FP compare. Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
270 -- Compare src1 with src2; set the Zero flag iff the numbers are
271 -- comparable and the comparison is True. Subsequent code must
272 -- test the %eflags zero flag regardless of the supplied Cond.
273 | GCMP Cond Reg Reg -- src1, src2
274
275 | GABS Format Reg Reg -- src, dst
276 | GNEG Format Reg Reg -- src, dst
277 | GSQRT Format Reg Reg -- src, dst
278 | GSIN Format CLabel CLabel Reg Reg -- src, dst
279 | GCOS Format CLabel CLabel Reg Reg -- src, dst
280 | GTAN Format CLabel CLabel Reg Reg -- src, dst
281
282 | GFREE -- do ffree on all x86 regs; an ugly hack
283
284
285 -- SSE2 floating point: we use a restricted set of the available SSE2
286 -- instructions for floating-point.
287 -- use MOV for moving (either movss or movsd (movlpd better?))
288 | CVTSS2SD Reg Reg -- F32 to F64
289 | CVTSD2SS Reg Reg -- F64 to F32
290 | CVTTSS2SIQ Format Operand Reg -- F32 to I32/I64 (with truncation)
291 | CVTTSD2SIQ Format Operand Reg -- F64 to I32/I64 (with truncation)
292 | CVTSI2SS Format Operand Reg -- I32/I64 to F32
293 | CVTSI2SD Format Operand Reg -- I32/I64 to F64
294
295 -- use ADD, SUB, and SQRT for arithmetic. In both cases, operands
296 -- are Operand Reg.
297
298 -- SSE2 floating-point division:
299 | FDIV Format Operand Operand -- divisor, dividend(dst)
300
301 -- use CMP for comparisons. ucomiss and ucomisd instructions
302 -- compare single/double prec floating point respectively.
303
304 | SQRT Format Operand Reg -- src, dst
305
306
307 -- Comparison
308 | TEST Format Operand Operand
309 | CMP Format Operand Operand
310 | SETCC Cond Operand
311
312 -- Stack Operations.
313 | PUSH Format Operand
314 | POP Format Operand
315 -- both unused (SDM):
316 -- | PUSHA
317 -- | POPA
318
319 -- Jumping around.
320 | JMP Operand [Reg] -- including live Regs at the call
321 | JXX Cond BlockId -- includes unconditional branches
322 | JXX_GBL Cond Imm -- non-local version of JXX
323 -- Table jump
324 | JMP_TBL Operand -- Address to jump to
325 [Maybe JumpDest] -- Targets of the jump table
326 Section -- Data section jump table should be put in
327 CLabel -- Label of jump table
328 | CALL (Either Imm Reg) [Reg]
329
330 -- Other things.
331 | CLTD Format -- sign extend %eax into %edx:%eax
332
333 | FETCHGOT Reg -- pseudo-insn for ELF position-independent code
334 -- pretty-prints as
335 -- call 1f
336 -- 1: popl %reg
337 -- addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
338 | FETCHPC Reg -- pseudo-insn for Darwin position-independent code
339 -- pretty-prints as
340 -- call 1f
341 -- 1: popl %reg
342
343 -- bit counting instructions
344 | POPCNT Format Operand Reg -- [SSE4.2] count number of bits set to 1
345 | LZCNT Format Operand Reg -- [BMI2] count number of leading zeros
346 | TZCNT Format Operand Reg -- [BMI2] count number of trailing zeros
347 | BSF Format Operand Reg -- bit scan forward
348 | BSR Format Operand Reg -- bit scan reverse
349
350 -- bit manipulation instructions
351 | PDEP Format Operand Operand Reg -- [BMI2] deposit bits to the specified mask
352 | PEXT Format Operand Operand Reg -- [BMI2] extract bits from the specified mask
353
354 -- prefetch
355 | PREFETCH PrefetchVariant Format Operand -- prefetch Variant, addr size, address to prefetch
356 -- variant can be NTA, Lvl0, Lvl1, or Lvl2
357
358 | LOCK Instr -- lock prefix
359 | XADD Format Operand Operand -- src (r), dst (r/m)
360 | CMPXCHG Format Operand Operand -- src (r), dst (r/m), eax implicit
361 | MFENCE
362
363 data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2
364
365
366 data Operand
367 = OpReg Reg -- register
368 | OpImm Imm -- immediate value
369 | OpAddr AddrMode -- memory reference
370
371
372
373 -- | Returns which registers are read and written as a (read, written)
374 -- pair.
375 x86_regUsageOfInstr :: Platform -> Instr -> RegUsage
376 x86_regUsageOfInstr platform instr
377 = case instr of
378 MOV _ src dst -> usageRW src dst
379 CMOV _ _ src dst -> mkRU (use_R src [dst]) [dst]
380 MOVZxL _ src dst -> usageRW src dst
381 MOVSxL _ src dst -> usageRW src dst
382 LEA _ src dst -> usageRW src dst
383 ADD _ src dst -> usageRM src dst
384 ADC _ src dst -> usageRM src dst
385 SUB _ src dst -> usageRM src dst
386 SBB _ src dst -> usageRM src dst
387 IMUL _ src dst -> usageRM src dst
388
389 -- Result of IMULB will be in just in %ax
390 IMUL2 II8 src -> mkRU (eax:use_R src []) [eax]
391 -- Result of IMUL for wider values, will be split between %dx/%edx/%rdx and
392 -- %ax/%eax/%rax.
393 IMUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
394
395 MUL _ src dst -> usageRM src dst
396 MUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
397 DIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
398 IDIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
399 ADD_CC _ src dst -> usageRM src dst
400 SUB_CC _ src dst -> usageRM src dst
401 AND _ src dst -> usageRM src dst
402 OR _ src dst -> usageRM src dst
403
404 XOR _ (OpReg src) (OpReg dst)
405 | src == dst -> mkRU [] [dst]
406
407 XOR _ src dst -> usageRM src dst
408 NOT _ op -> usageM op
409 BSWAP _ reg -> mkRU [reg] [reg]
410 NEGI _ op -> usageM op
411 SHL _ imm dst -> usageRM imm dst
412 SAR _ imm dst -> usageRM imm dst
413 SHR _ imm dst -> usageRM imm dst
414 BT _ _ src -> mkRUR (use_R src [])
415
416 PUSH _ op -> mkRUR (use_R op [])
417 POP _ op -> mkRU [] (def_W op)
418 TEST _ src dst -> mkRUR (use_R src $! use_R dst [])
419 CMP _ src dst -> mkRUR (use_R src $! use_R dst [])
420 SETCC _ op -> mkRU [] (def_W op)
421 JXX _ _ -> mkRU [] []
422 JXX_GBL _ _ -> mkRU [] []
423 JMP op regs -> mkRUR (use_R op regs)
424 JMP_TBL op _ _ _ -> mkRUR (use_R op [])
425 CALL (Left _) params -> mkRU params (callClobberedRegs platform)
426 CALL (Right reg) params -> mkRU (reg:params) (callClobberedRegs platform)
427 CLTD _ -> mkRU [eax] [edx]
428 NOP -> mkRU [] []
429
430 GMOV src dst -> mkRU [src] [dst]
431 GLD _ src dst -> mkRU (use_EA src []) [dst]
432 GST _ src dst -> mkRUR (src : use_EA dst [])
433
434 GLDZ dst -> mkRU [] [dst]
435 GLD1 dst -> mkRU [] [dst]
436
437 GFTOI src dst -> mkRU [src] [dst]
438 GDTOI src dst -> mkRU [src] [dst]
439
440 GITOF src dst -> mkRU [src] [dst]
441 GITOD src dst -> mkRU [src] [dst]
442
443 GDTOF src dst -> mkRU [src] [dst]
444
445 GADD _ s1 s2 dst -> mkRU [s1,s2] [dst]
446 GSUB _ s1 s2 dst -> mkRU [s1,s2] [dst]
447 GMUL _ s1 s2 dst -> mkRU [s1,s2] [dst]
448 GDIV _ s1 s2 dst -> mkRU [s1,s2] [dst]
449
450 GCMP _ src1 src2 -> mkRUR [src1,src2]
451 GABS _ src dst -> mkRU [src] [dst]
452 GNEG _ src dst -> mkRU [src] [dst]
453 GSQRT _ src dst -> mkRU [src] [dst]
454 GSIN _ _ _ src dst -> mkRU [src] [dst]
455 GCOS _ _ _ src dst -> mkRU [src] [dst]
456 GTAN _ _ _ src dst -> mkRU [src] [dst]
457
458 CVTSS2SD src dst -> mkRU [src] [dst]
459 CVTSD2SS src dst -> mkRU [src] [dst]
460 CVTTSS2SIQ _ src dst -> mkRU (use_R src []) [dst]
461 CVTTSD2SIQ _ src dst -> mkRU (use_R src []) [dst]
462 CVTSI2SS _ src dst -> mkRU (use_R src []) [dst]
463 CVTSI2SD _ src dst -> mkRU (use_R src []) [dst]
464 FDIV _ src dst -> usageRM src dst
465 SQRT _ src dst -> mkRU (use_R src []) [dst]
466
467 FETCHGOT reg -> mkRU [] [reg]
468 FETCHPC reg -> mkRU [] [reg]
469
470 COMMENT _ -> noUsage
471 LOCATION{} -> noUsage
472 UNWIND{} -> noUsage
473 DELTA _ -> noUsage
474
475 POPCNT _ src dst -> mkRU (use_R src []) [dst]
476 LZCNT _ src dst -> mkRU (use_R src []) [dst]
477 TZCNT _ src dst -> mkRU (use_R src []) [dst]
478 BSF _ src dst -> mkRU (use_R src []) [dst]
479 BSR _ src dst -> mkRU (use_R src []) [dst]
480
481 PDEP _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
482 PEXT _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
483
484 -- note: might be a better way to do this
485 PREFETCH _ _ src -> mkRU (use_R src []) []
486 LOCK i -> x86_regUsageOfInstr platform i
487 XADD _ src dst -> usageMM src dst
488 CMPXCHG _ src dst -> usageRMM src dst (OpReg eax)
489 MFENCE -> noUsage
490
491 _other -> panic "regUsage: unrecognised instr"
492 where
493 -- # Definitions
494 --
495 -- Written: If the operand is a register, it's written. If it's an
496 -- address, registers mentioned in the address are read.
497 --
498 -- Modified: If the operand is a register, it's both read and
499 -- written. If it's an address, registers mentioned in the address
500 -- are read.
501
502 -- 2 operand form; first operand Read; second Written
503 usageRW :: Operand -> Operand -> RegUsage
504 usageRW op (OpReg reg) = mkRU (use_R op []) [reg]
505 usageRW op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
506 usageRW _ _ = panic "X86.RegInfo.usageRW: no match"
507
508 -- 2 operand form; first operand Read; second Modified
509 usageRM :: Operand -> Operand -> RegUsage
510 usageRM op (OpReg reg) = mkRU (use_R op [reg]) [reg]
511 usageRM op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
512 usageRM _ _ = panic "X86.RegInfo.usageRM: no match"
513
514 -- 2 operand form; first operand Modified; second Modified
515 usageMM :: Operand -> Operand -> RegUsage
516 usageMM (OpReg src) (OpReg dst) = mkRU [src, dst] [src, dst]
517 usageMM (OpReg src) (OpAddr ea) = mkRU (use_EA ea [src]) [src]
518 usageMM _ _ = panic "X86.RegInfo.usageMM: no match"
519
520 -- 3 operand form; first operand Read; second Modified; third Modified
521 usageRMM :: Operand -> Operand -> Operand -> RegUsage
522 usageRMM (OpReg src) (OpReg dst) (OpReg reg) = mkRU [src, dst, reg] [dst, reg]
523 usageRMM (OpReg src) (OpAddr ea) (OpReg reg) = mkRU (use_EA ea [src, reg]) [reg]
524 usageRMM _ _ _ = panic "X86.RegInfo.usageRMM: no match"
525
526 -- 1 operand form; operand Modified
527 usageM :: Operand -> RegUsage
528 usageM (OpReg reg) = mkRU [reg] [reg]
529 usageM (OpAddr ea) = mkRUR (use_EA ea [])
530 usageM _ = panic "X86.RegInfo.usageM: no match"
531
532 -- Registers defd when an operand is written.
533 def_W (OpReg reg) = [reg]
534 def_W (OpAddr _ ) = []
535 def_W _ = panic "X86.RegInfo.def_W: no match"
536
537 -- Registers used when an operand is read.
538 use_R (OpReg reg) tl = reg : tl
539 use_R (OpImm _) tl = tl
540 use_R (OpAddr ea) tl = use_EA ea tl
541
542 -- Registers used to compute an effective address.
543 use_EA (ImmAddr _ _) tl = tl
544 use_EA (AddrBaseIndex base index _) tl =
545 use_base base $! use_index index tl
546 where use_base (EABaseReg r) tl = r : tl
547 use_base _ tl = tl
548 use_index EAIndexNone tl = tl
549 use_index (EAIndex i _) tl = i : tl
550
551 mkRUR src = src' `seq` RU src' []
552 where src' = filter (interesting platform) src
553
554 mkRU src dst = src' `seq` dst' `seq` RU src' dst'
555 where src' = filter (interesting platform) src
556 dst' = filter (interesting platform) dst
557
558 -- | Is this register interesting for the register allocator?
559 interesting :: Platform -> Reg -> Bool
560 interesting _ (RegVirtual _) = True
561 interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
562 interesting _ (RegReal (RealRegPair{})) = panic "X86.interesting: no reg pairs on this arch"
563
564
565
566 -- | Applies the supplied function to all registers in instructions.
567 -- Typically used to change virtual registers to real registers.
568 x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
569 x86_patchRegsOfInstr instr env
570 = case instr of
571 MOV fmt src dst -> patch2 (MOV fmt) src dst
572 CMOV cc fmt src dst -> CMOV cc fmt (patchOp src) (env dst)
573 MOVZxL fmt src dst -> patch2 (MOVZxL fmt) src dst
574 MOVSxL fmt src dst -> patch2 (MOVSxL fmt) src dst
575 LEA fmt src dst -> patch2 (LEA fmt) src dst
576 ADD fmt src dst -> patch2 (ADD fmt) src dst
577 ADC fmt src dst -> patch2 (ADC fmt) src dst
578 SUB fmt src dst -> patch2 (SUB fmt) src dst
579 SBB fmt src dst -> patch2 (SBB fmt) src dst
580 IMUL fmt src dst -> patch2 (IMUL fmt) src dst
581 IMUL2 fmt src -> patch1 (IMUL2 fmt) src
582 MUL fmt src dst -> patch2 (MUL fmt) src dst
583 MUL2 fmt src -> patch1 (MUL2 fmt) src
584 IDIV fmt op -> patch1 (IDIV fmt) op
585 DIV fmt op -> patch1 (DIV fmt) op
586 ADD_CC fmt src dst -> patch2 (ADD_CC fmt) src dst
587 SUB_CC fmt src dst -> patch2 (SUB_CC fmt) src dst
588 AND fmt src dst -> patch2 (AND fmt) src dst
589 OR fmt src dst -> patch2 (OR fmt) src dst
590 XOR fmt src dst -> patch2 (XOR fmt) src dst
591 NOT fmt op -> patch1 (NOT fmt) op
592 BSWAP fmt reg -> BSWAP fmt (env reg)
593 NEGI fmt op -> patch1 (NEGI fmt) op
594 SHL fmt imm dst -> patch1 (SHL fmt imm) dst
595 SAR fmt imm dst -> patch1 (SAR fmt imm) dst
596 SHR fmt imm dst -> patch1 (SHR fmt imm) dst
597 BT fmt imm src -> patch1 (BT fmt imm) src
598 TEST fmt src dst -> patch2 (TEST fmt) src dst
599 CMP fmt src dst -> patch2 (CMP fmt) src dst
600 PUSH fmt op -> patch1 (PUSH fmt) op
601 POP fmt op -> patch1 (POP fmt) op
602 SETCC cond op -> patch1 (SETCC cond) op
603 JMP op regs -> JMP (patchOp op) regs
604 JMP_TBL op ids s lbl -> JMP_TBL (patchOp op) ids s lbl
605
606 GMOV src dst -> GMOV (env src) (env dst)
607 GLD fmt src dst -> GLD fmt (lookupAddr src) (env dst)
608 GST fmt src dst -> GST fmt (env src) (lookupAddr dst)
609
610 GLDZ dst -> GLDZ (env dst)
611 GLD1 dst -> GLD1 (env dst)
612
613 GFTOI src dst -> GFTOI (env src) (env dst)
614 GDTOI src dst -> GDTOI (env src) (env dst)
615
616 GITOF src dst -> GITOF (env src) (env dst)
617 GITOD src dst -> GITOD (env src) (env dst)
618
619 GDTOF src dst -> GDTOF (env src) (env dst)
620
621 GADD fmt s1 s2 dst -> GADD fmt (env s1) (env s2) (env dst)
622 GSUB fmt s1 s2 dst -> GSUB fmt (env s1) (env s2) (env dst)
623 GMUL fmt s1 s2 dst -> GMUL fmt (env s1) (env s2) (env dst)
624 GDIV fmt s1 s2 dst -> GDIV fmt (env s1) (env s2) (env dst)
625
626 GCMP fmt src1 src2 -> GCMP fmt (env src1) (env src2)
627 GABS fmt src dst -> GABS fmt (env src) (env dst)
628 GNEG fmt src dst -> GNEG fmt (env src) (env dst)
629 GSQRT fmt src dst -> GSQRT fmt (env src) (env dst)
630 GSIN fmt l1 l2 src dst -> GSIN fmt l1 l2 (env src) (env dst)
631 GCOS fmt l1 l2 src dst -> GCOS fmt l1 l2 (env src) (env dst)
632 GTAN fmt l1 l2 src dst -> GTAN fmt l1 l2 (env src) (env dst)
633
634 CVTSS2SD src dst -> CVTSS2SD (env src) (env dst)
635 CVTSD2SS src dst -> CVTSD2SS (env src) (env dst)
636 CVTTSS2SIQ fmt src dst -> CVTTSS2SIQ fmt (patchOp src) (env dst)
637 CVTTSD2SIQ fmt src dst -> CVTTSD2SIQ fmt (patchOp src) (env dst)
638 CVTSI2SS fmt src dst -> CVTSI2SS fmt (patchOp src) (env dst)
639 CVTSI2SD fmt src dst -> CVTSI2SD fmt (patchOp src) (env dst)
640 FDIV fmt src dst -> FDIV fmt (patchOp src) (patchOp dst)
641 SQRT fmt src dst -> SQRT fmt (patchOp src) (env dst)
642
643 CALL (Left _) _ -> instr
644 CALL (Right reg) p -> CALL (Right (env reg)) p
645
646 FETCHGOT reg -> FETCHGOT (env reg)
647 FETCHPC reg -> FETCHPC (env reg)
648
649 NOP -> instr
650 COMMENT _ -> instr
651 LOCATION {} -> instr
652 UNWIND {} -> instr
653 DELTA _ -> instr
654
655 JXX _ _ -> instr
656 JXX_GBL _ _ -> instr
657 CLTD _ -> instr
658
659 POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst)
660 LZCNT fmt src dst -> LZCNT fmt (patchOp src) (env dst)
661 TZCNT fmt src dst -> TZCNT fmt (patchOp src) (env dst)
662 PDEP fmt src mask dst -> PDEP fmt (patchOp src) (patchOp mask) (env dst)
663 PEXT fmt src mask dst -> PEXT fmt (patchOp src) (patchOp mask) (env dst)
664 BSF fmt src dst -> BSF fmt (patchOp src) (env dst)
665 BSR fmt src dst -> BSR fmt (patchOp src) (env dst)
666
667 PREFETCH lvl format src -> PREFETCH lvl format (patchOp src)
668
669 LOCK i -> LOCK (x86_patchRegsOfInstr i env)
670 XADD fmt src dst -> patch2 (XADD fmt) src dst
671 CMPXCHG fmt src dst -> patch2 (CMPXCHG fmt) src dst
672 MFENCE -> instr
673
674 _other -> panic "patchRegs: unrecognised instr"
675
676 where
677 patch1 :: (Operand -> a) -> Operand -> a
678 patch1 insn op = insn $! patchOp op
679 patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
680 patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
681
682 patchOp (OpReg reg) = OpReg $! env reg
683 patchOp (OpImm imm) = OpImm imm
684 patchOp (OpAddr ea) = OpAddr $! lookupAddr ea
685
686 lookupAddr (ImmAddr imm off) = ImmAddr imm off
687 lookupAddr (AddrBaseIndex base index disp)
688 = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
689 where
690 lookupBase EABaseNone = EABaseNone
691 lookupBase EABaseRip = EABaseRip
692 lookupBase (EABaseReg r) = EABaseReg $! env r
693
694 lookupIndex EAIndexNone = EAIndexNone
695 lookupIndex (EAIndex r i) = (EAIndex $! env r) i
696
697
698 --------------------------------------------------------------------------------
699 x86_isJumpishInstr
700 :: Instr -> Bool
701
702 x86_isJumpishInstr instr
703 = case instr of
704 JMP{} -> True
705 JXX{} -> True
706 JXX_GBL{} -> True
707 JMP_TBL{} -> True
708 CALL{} -> True
709 _ -> False
710
711
712 x86_jumpDestsOfInstr
713 :: Instr
714 -> [BlockId]
715
716 x86_jumpDestsOfInstr insn
717 = case insn of
718 JXX _ id -> [id]
719 JMP_TBL _ ids _ _ -> [id | Just (DestBlockId id) <- ids]
720 _ -> []
721
722
723 x86_patchJumpInstr
724 :: Instr -> (BlockId -> BlockId) -> Instr
725
726 x86_patchJumpInstr insn patchF
727 = case insn of
728 JXX cc id -> JXX cc (patchF id)
729 JMP_TBL op ids section lbl
730 -> JMP_TBL op (map (fmap (patchJumpDest patchF)) ids) section lbl
731 _ -> insn
732 where
733 patchJumpDest f (DestBlockId id) = DestBlockId (f id)
734 patchJumpDest _ dest = dest
735
736
737
738
739
740 -- -----------------------------------------------------------------------------
741 -- | Make a spill instruction.
742 x86_mkSpillInstr
743 :: DynFlags
744 -> Reg -- register to spill
745 -> Int -- current stack delta
746 -> Int -- spill slot to use
747 -> Instr
748
749 x86_mkSpillInstr dflags reg delta slot
750 = let off = spillSlotToOffset platform slot - delta
751 in
752 case targetClassOfReg platform reg of
753 RcInteger -> MOV (archWordFormat is32Bit)
754 (OpReg reg) (OpAddr (spRel dflags off))
755 RcDouble -> GST FF80 reg (spRel dflags off) {- RcFloat/RcDouble -}
756 RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off))
757 _ -> panic "X86.mkSpillInstr: no match"
758 where platform = targetPlatform dflags
759 is32Bit = target32Bit platform
760
761 -- | Make a spill reload instruction.
762 x86_mkLoadInstr
763 :: DynFlags
764 -> Reg -- register to load
765 -> Int -- current stack delta
766 -> Int -- spill slot to use
767 -> Instr
768
769 x86_mkLoadInstr dflags reg delta slot
770 = let off = spillSlotToOffset platform slot - delta
771 in
772 case targetClassOfReg platform reg of
773 RcInteger -> MOV (archWordFormat is32Bit)
774 (OpAddr (spRel dflags off)) (OpReg reg)
775 RcDouble -> GLD FF80 (spRel dflags off) reg {- RcFloat/RcDouble -}
776 RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg)
777 _ -> panic "X86.x86_mkLoadInstr"
778 where platform = targetPlatform dflags
779 is32Bit = target32Bit platform
780
781 spillSlotSize :: Platform -> Int
782 spillSlotSize dflags = if is32Bit then 12 else 8
783 where is32Bit = target32Bit dflags
784
785 maxSpillSlots :: DynFlags -> Int
786 maxSpillSlots dflags
787 = ((rESERVED_C_STACK_BYTES dflags - 64) `div` spillSlotSize (targetPlatform dflags)) - 1
788 -- = 0 -- useful for testing allocMoreStack
789
790 -- number of bytes that the stack pointer should be aligned to
791 stackAlign :: Int
792 stackAlign = 16
793
794 -- convert a spill slot number to a *byte* offset, with no sign:
795 -- decide on a per arch basis whether you are spilling above or below
796 -- the C stack pointer.
797 spillSlotToOffset :: Platform -> Int -> Int
798 spillSlotToOffset platform slot
799 = 64 + spillSlotSize platform * slot
800
801 --------------------------------------------------------------------------------
802
803 -- | See if this instruction is telling us the current C stack delta
804 x86_takeDeltaInstr
805 :: Instr
806 -> Maybe Int
807
808 x86_takeDeltaInstr instr
809 = case instr of
810 DELTA i -> Just i
811 _ -> Nothing
812
813
814 x86_isMetaInstr
815 :: Instr
816 -> Bool
817
818 x86_isMetaInstr instr
819 = case instr of
820 COMMENT{} -> True
821 LOCATION{} -> True
822 LDATA{} -> True
823 NEWBLOCK{} -> True
824 UNWIND{} -> True
825 DELTA{} -> True
826 _ -> False
827
828
829
830 -- | Make a reg-reg move instruction.
831 -- On SPARC v8 there are no instructions to move directly between
832 -- floating point and integer regs. If we need to do that then we
833 -- have to go via memory.
834 --
835 x86_mkRegRegMoveInstr
836 :: Platform
837 -> Reg
838 -> Reg
839 -> Instr
840
841 x86_mkRegRegMoveInstr platform src dst
842 = case targetClassOfReg platform src of
843 RcInteger -> case platformArch platform of
844 ArchX86 -> MOV II32 (OpReg src) (OpReg dst)
845 ArchX86_64 -> MOV II64 (OpReg src) (OpReg dst)
846 _ -> panic "x86_mkRegRegMoveInstr: Bad arch"
847 RcDouble -> GMOV src dst
848 RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst)
849 _ -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
850
851 -- | Check whether an instruction represents a reg-reg move.
852 -- The register allocator attempts to eliminate reg->reg moves whenever it can,
853 -- by assigning the src and dest temporaries to the same real register.
854 --
855 x86_takeRegRegMoveInstr
856 :: Instr
857 -> Maybe (Reg,Reg)
858
859 x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
860 = Just (r1,r2)
861
862 x86_takeRegRegMoveInstr _ = Nothing
863
864
865 -- | Make an unconditional branch instruction.
866 x86_mkJumpInstr
867 :: BlockId
868 -> [Instr]
869
870 x86_mkJumpInstr id
871 = [JXX ALWAYS id]
872
873 -- Note [Windows stack layout]
874 -- | On most OSes the kernel will place a guard page after the current stack
875 -- page. If you allocate larger than a page worth you may jump over this
876 -- guard page. Not only is this a security issue, but on certain OSes such
877 -- as Windows a new page won't be allocated if you don't hit the guard. This
878 -- will cause a segfault or access fault.
879 --
880 -- This function defines if the current allocation amount requires a probe.
881 -- On Windows (for now) we emit a call to _chkstk for this. For other OSes
882 -- this is not yet implemented.
883 -- See https://docs.microsoft.com/en-us/windows/desktop/DevNotes/-win32-chkstk
884 -- The Windows stack looks like this:
885 --
886 -- +-------------------+
887 -- | SP |
888 -- +-------------------+
889 -- | |
890 -- | GUARD PAGE |
891 -- | |
892 -- +-------------------+
893 -- | |
894 -- | |
895 -- | UNMAPPED |
896 -- | |
897 -- | |
898 -- +-------------------+
899 --
900 -- In essense each allocation larger than a page size needs to be chunked and
901 -- a probe emitted after each page allocation. You have to hit the guard
902 -- page so the kernel can map in the next page, otherwise you'll segfault.
903 --
904 needs_probe_call :: Platform -> Int -> Bool
905 needs_probe_call platform amount
906 = case platformOS platform of
907 OSMinGW32 -> case platformArch platform of
908 ArchX86 -> amount > (4 * 1024)
909 ArchX86_64 -> amount > (8 * 1024)
910 _ -> False
911 _ -> False
912
913 x86_mkStackAllocInstr
914 :: Platform
915 -> Int
916 -> [Instr]
917 x86_mkStackAllocInstr platform amount
918 = case platformOS platform of
919 OSMinGW32 ->
920 -- These will clobber AX but this should be ok because
921 --
922 -- 1. It is the first thing we do when entering the closure and AX is
923 -- a caller saved registers on Windows both on x86_64 and x86.
924 --
925 -- 2. The closures are only entered via a call or longjmp in which case
926 -- there are no expectations for volatile registers.
927 --
928 -- 3. When the target is a local branch point it is re-targeted
929 -- after the dealloc, preserving #2. See note [extra spill slots].
930 --
931 -- We emit a call because the stack probes are quite involved and
932 -- would bloat code size a lot. GHC doesn't really have an -Os.
933 -- __chkstk is guaranteed to leave all nonvolatile registers and AX
934 -- untouched. It's part of the standard prologue code for any Windows
935 -- function dropping the stack more than a page.
936 -- See Note [Windows stack layout]
937 case platformArch platform of
938 ArchX86 | needs_probe_call platform amount ->
939 [ MOV II32 (OpImm (ImmInt amount)) (OpReg eax)
940 , CALL (Left $ strImmLit "___chkstk_ms") [eax]
941 , SUB II32 (OpReg eax) (OpReg esp)
942 ]
943 | otherwise ->
944 [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
945 , TEST II32 (OpReg esp) (OpReg esp)
946 ]
947 ArchX86_64 | needs_probe_call platform amount ->
948 [ MOV II64 (OpImm (ImmInt amount)) (OpReg rax)
949 , CALL (Left $ strImmLit "__chkstk_ms") [rax]
950 , SUB II64 (OpReg rax) (OpReg rsp)
951 ]
952 | otherwise ->
953 [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp)
954 , TEST II64 (OpReg rsp) (OpReg rsp)
955 ]
956 _ -> panic "x86_mkStackAllocInstr"
957 _ ->
958 case platformArch platform of
959 ArchX86 -> [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp) ]
960 ArchX86_64 -> [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) ]
961 _ -> panic "x86_mkStackAllocInstr"
962
963 x86_mkStackDeallocInstr
964 :: Platform
965 -> Int
966 -> [Instr]
967 x86_mkStackDeallocInstr platform amount
968 = case platformArch platform of
969 ArchX86 -> [ADD II32 (OpImm (ImmInt amount)) (OpReg esp)]
970 ArchX86_64 -> [ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)]
971 _ -> panic "x86_mkStackDeallocInstr"
972
973 i386_insert_ffrees
974 :: [GenBasicBlock Instr]
975 -> [GenBasicBlock Instr]
976
977 i386_insert_ffrees blocks
978 | any (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ]
979 = map insertGFREEs blocks
980 | otherwise
981 = blocks
982 where
983 insertGFREEs (BasicBlock id insns)
984 = BasicBlock id (insertBeforeNonlocalTransfers GFREE insns)
985
986 insertBeforeNonlocalTransfers :: Instr -> [Instr] -> [Instr]
987 insertBeforeNonlocalTransfers insert insns
988 = foldr p [] insns
989 where p insn r = case insn of
990 CALL _ _ -> insert : insn : r
991 JMP _ _ -> insert : insn : r
992 JXX_GBL _ _ -> panic "insertBeforeNonlocalTransfers: cannot handle JXX_GBL"
993 _ -> insn : r
994
995
996 -- if you ever add a new FP insn to the fake x86 FP insn set,
997 -- you must update this too
998 is_G_instr :: Instr -> Bool
999 is_G_instr instr
1000 = case instr of
1001 GMOV{} -> True
1002 GLD{} -> True
1003 GST{} -> True
1004 GLDZ{} -> True
1005 GLD1{} -> True
1006 GFTOI{} -> True
1007 GDTOI{} -> True
1008 GITOF{} -> True
1009 GITOD{} -> True
1010 GDTOF{} -> True
1011 GADD{} -> True
1012 GDIV{} -> True
1013 GSUB{} -> True
1014 GMUL{} -> True
1015 GCMP{} -> True
1016 GABS{} -> True
1017 GNEG{} -> True
1018 GSQRT{} -> True
1019 GSIN{} -> True
1020 GCOS{} -> True
1021 GTAN{} -> True
1022 GFREE -> panic "is_G_instr: GFREE (!)"
1023 _ -> False
1024
1025
1026 --
1027 -- Note [extra spill slots]
1028 --
1029 -- If the register allocator used more spill slots than we have
1030 -- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more
1031 -- C stack space on entry and exit from this proc. Therefore we
1032 -- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp"
1033 -- before every non-local jump.
1034 --
1035 -- This became necessary when the new codegen started bundling entire
1036 -- functions together into one proc, because the register allocator
1037 -- assigns a different stack slot to each virtual reg within a proc.
1038 -- To avoid using so many slots we could also:
1039 --
1040 -- - split up the proc into connected components before code generator
1041 --
1042 -- - rename the virtual regs, so that we re-use vreg names and hence
1043 -- stack slots for non-overlapping vregs.
1044 --
1045 -- Note that when a block is both a non-local entry point (with an
1046 -- info table) and a local branch target, we have to split it into
1047 -- two, like so:
1048 --
1049 -- <info table>
1050 -- L:
1051 -- <code>
1052 --
1053 -- becomes
1054 --
1055 -- <info table>
1056 -- L:
1057 -- subl $rsp, N
1058 -- jmp Lnew
1059 -- Lnew:
1060 -- <code>
1061 --
1062 -- and all branches pointing to L are retargetted to point to Lnew.
1063 -- Otherwise, we would repeat the $rsp adjustment for each branch to
1064 -- L.
1065 --
1066 allocMoreStack
1067 :: Platform
1068 -> Int
1069 -> NatCmmDecl statics X86.Instr.Instr
1070 -> UniqSM (NatCmmDecl statics X86.Instr.Instr, [(BlockId,BlockId)])
1071
1072 allocMoreStack _ _ top@(CmmData _ _) = return (top,[])
1073 allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
1074 let entries = entryBlocks proc
1075
1076 uniqs <- replicateM (length entries) getUniqueM
1077
1078 let
1079 delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
1080 where x = slots * spillSlotSize platform -- sp delta
1081
1082 alloc = mkStackAllocInstr platform delta
1083 dealloc = mkStackDeallocInstr platform delta
1084
1085 retargetList = (zip entries (map mkBlockId uniqs))
1086
1087 new_blockmap :: LabelMap BlockId
1088 new_blockmap = mapFromList retargetList
1089
1090 insert_stack_insns (BasicBlock id insns)
1091 | Just new_blockid <- mapLookup id new_blockmap
1092 = [ BasicBlock id $ alloc ++ [JXX ALWAYS new_blockid]
1093 , BasicBlock new_blockid block' ]
1094 | otherwise
1095 = [ BasicBlock id block' ]
1096 where
1097 block' = foldr insert_dealloc [] insns
1098
1099 insert_dealloc insn r = case insn of
1100 JMP _ _ -> dealloc ++ (insn : r)
1101 JXX_GBL _ _ -> panic "insert_dealloc: cannot handle JXX_GBL"
1102 _other -> x86_patchJumpInstr insn retarget : r
1103 where retarget b = fromMaybe b (mapLookup b new_blockmap)
1104
1105 new_code = concatMap insert_stack_insns code
1106 -- in
1107 return (CmmProc info lbl live (ListGraph new_code), retargetList)
1108
1109 data JumpDest = DestBlockId BlockId | DestImm Imm
1110
1111 -- Debug Instance
1112 instance Outputable JumpDest where
1113 ppr (DestBlockId bid) = text "jd<blk>:" <> ppr bid
1114 ppr (DestImm _imm) = text "jd<imm>:noShow"
1115
1116
1117 getJumpDestBlockId :: JumpDest -> Maybe BlockId
1118 getJumpDestBlockId (DestBlockId bid) = Just bid
1119 getJumpDestBlockId _ = Nothing
1120
1121 canShortcut :: Instr -> Maybe JumpDest
1122 canShortcut (JXX ALWAYS id) = Just (DestBlockId id)
1123 canShortcut (JMP (OpImm imm) _) = Just (DestImm imm)
1124 canShortcut _ = Nothing
1125
1126
1127 -- This helper shortcuts a sequence of branches.
1128 -- The blockset helps avoid following cycles.
1129 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
1130 shortcutJump fn insn = shortcutJump' fn (setEmpty :: LabelSet) insn
1131 where
1132 shortcutJump' :: (BlockId -> Maybe JumpDest) -> LabelSet -> Instr -> Instr
1133 shortcutJump' fn seen insn@(JXX cc id) =
1134 if setMember id seen then insn
1135 else case fn id of
1136 Nothing -> insn
1137 Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
1138 Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm)
1139 where seen' = setInsert id seen
1140 shortcutJump' fn _ (JMP_TBL addr blocks section tblId) =
1141 let updateBlock (Just (DestBlockId bid)) =
1142 case fn bid of
1143 Nothing -> Just (DestBlockId bid )
1144 Just dest -> Just dest
1145 updateBlock dest = dest
1146 blocks' = map updateBlock blocks
1147 in JMP_TBL addr blocks' section tblId
1148 shortcutJump' _ _ other = other
1149
1150 -- Here because it knows about JumpDest
1151 shortcutStatics :: (BlockId -> Maybe JumpDest) -> (Alignment, CmmStatics) -> (Alignment, CmmStatics)
1152 shortcutStatics fn (align, Statics lbl statics)
1153 = (align, Statics lbl $ map (shortcutStatic fn) statics)
1154 -- we need to get the jump tables, so apply the mapping to the entries
1155 -- of a CmmData too.
1156
1157 shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
1158 shortcutLabel fn lab
1159 | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn emptyUniqSet blkId
1160 | otherwise = lab
1161
1162 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
1163 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
1164 = CmmStaticLit (CmmLabel (shortcutLabel fn lab))
1165 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w))
1166 = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w)
1167 -- slightly dodgy, we're ignoring the second label, but this
1168 -- works with the way we use CmmLabelDiffOff for jump tables now.
1169 shortcutStatic _ other_static
1170 = other_static
1171
1172 shortBlockId
1173 :: (BlockId -> Maybe JumpDest)
1174 -> UniqSet Unique
1175 -> BlockId
1176 -> CLabel
1177
1178 shortBlockId fn seen blockid =
1179 case (elementOfUniqSet uq seen, fn blockid) of
1180 (True, _) -> blockLbl blockid
1181 (_, Nothing) -> blockLbl blockid
1182 (_, Just (DestBlockId blockid')) -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
1183 (_, Just (DestImm (ImmCLbl lbl))) -> lbl
1184 (_, _other) -> panic "shortBlockId"
1185 where uq = getUnique blockid