Revert "Batch merge"
[ghc.git] / compiler / nativeGen / X86 / Instr.hs
1 {-# LANGUAGE CPP, TypeFamilies #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Machine-dependent assembly language
6 --
7 -- (c) The University of Glasgow 1993-2004
8 --
9 -----------------------------------------------------------------------------
10
11 module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest(..),
12 getJumpDestBlockId, canShortcut, shortcutStatics,
13 shortcutJump, i386_insert_ffrees, allocMoreStack,
14 maxSpillSlots, archWordFormat )
15 where
16
17 #include "HsVersions.h"
18 #include "nativeGen/NCG.h"
19
20 import GhcPrelude
21
22 import X86.Cond
23 import X86.Regs
24 import Instruction
25 import Format
26 import RegClass
27 import Reg
28 import TargetReg
29
30 import BlockId
31 import Hoopl.Collections
32 import Hoopl.Label
33 import CodeGen.Platform
34 import Cmm
35 import FastString
36 import Outputable
37 import Platform
38
39 import BasicTypes (Alignment)
40 import CLabel
41 import DynFlags
42 import UniqSet
43 import Unique
44 import UniqSupply
45 import Debug (UnwindTable)
46
47 import Control.Monad
48 import Data.Maybe (fromMaybe)
49
50 -- Format of an x86/x86_64 memory address, in bytes.
51 --
52 archWordFormat :: Bool -> Format
53 archWordFormat is32Bit
54 | is32Bit = II32
55 | otherwise = II64
56
57 -- | Instruction instance for x86 instruction set.
58 instance Instruction Instr where
59 regUsageOfInstr = x86_regUsageOfInstr
60 patchRegsOfInstr = x86_patchRegsOfInstr
61 isJumpishInstr = x86_isJumpishInstr
62 jumpDestsOfInstr = x86_jumpDestsOfInstr
63 patchJumpInstr = x86_patchJumpInstr
64 mkSpillInstr = x86_mkSpillInstr
65 mkLoadInstr = x86_mkLoadInstr
66 takeDeltaInstr = x86_takeDeltaInstr
67 isMetaInstr = x86_isMetaInstr
68 mkRegRegMoveInstr = x86_mkRegRegMoveInstr
69 takeRegRegMoveInstr = x86_takeRegRegMoveInstr
70 mkJumpInstr = x86_mkJumpInstr
71 mkStackAllocInstr = x86_mkStackAllocInstr
72 mkStackDeallocInstr = x86_mkStackDeallocInstr
73
74
75 -- -----------------------------------------------------------------------------
76 -- Intel x86 instructions
77
78 {-
79 Intel, in their infinite wisdom, selected a stack model for floating
80 point registers on x86. That might have made sense back in 1979 --
81 nowadays we can see it for the nonsense it really is. A stack model
82 fits poorly with the existing nativeGen infrastructure, which assumes
83 flat integer and FP register sets. Prior to this commit, nativeGen
84 could not generate correct x86 FP code -- to do so would have meant
85 somehow working the register-stack paradigm into the register
86 allocator and spiller, which sounds very difficult.
87
88 We have decided to cheat, and go for a simple fix which requires no
89 infrastructure modifications, at the expense of generating ropey but
90 correct FP code. All notions of the x86 FP stack and its insns have
91 been removed. Instead, we pretend (to the instruction selector and
92 register allocator) that x86 has six floating point registers, %fake0
93 .. %fake5, which can be used in the usual flat manner. We further
94 claim that x86 has floating point instructions very similar to SPARC
95 and Alpha, that is, a simple 3-operand register-register arrangement.
96 Code generation and register allocation proceed on this basis.
97
98 When we come to print out the final assembly, our convenient fiction
99 is converted to dismal reality. Each fake instruction is
100 independently converted to a series of real x86 instructions.
101 %fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
102 arithmetic operations, the two operands are pushed onto the top of the
103 FP stack, the operation done, and the result copied back into the
104 relevant register. There are only six %fake registers because 2 are
105 needed for the translation, and x86 has 8 in total.
106
107 The translation is inefficient but is simple and it works. A cleverer
108 translation would handle a sequence of insns, simulating the FP stack
109 contents, would not impose a fixed mapping from %fake to %st regs, and
110 hopefully could avoid most of the redundant reg-reg moves of the
111 current translation.
112
113 We might as well make use of whatever unique FP facilities Intel have
114 chosen to bless us with (let's not be churlish, after all).
115 Hence GLDZ and GLD1. Bwahahahahahahaha!
116 -}
117
118 {-
119 Note [x86 Floating point precision]
120
121 Intel's internal floating point registers are by default 80 bit
122 extended precision. This means that all operations done on values in
123 registers are done at 80 bits, and unless the intermediate values are
124 truncated to the appropriate size (32 or 64 bits) by storing in
125 memory, calculations in registers will give different results from
126 calculations which pass intermediate values in memory (eg. via
127 function calls).
128
129 One solution is to set the FPU into 64 bit precision mode. Some OSs
130 do this (eg. FreeBSD) and some don't (eg. Linux). The problem here is
131 that this will only affect 64-bit precision arithmetic; 32-bit
132 calculations will still be done at 64-bit precision in registers. So
133 it doesn't solve the whole problem.
134
135 There's also the issue of what the C library is expecting in terms of
136 precision. It seems to be the case that glibc on Linux expects the
137 FPU to be set to 80 bit precision, so setting it to 64 bit could have
138 unexpected effects. Changing the default could have undesirable
139 effects on other 3rd-party library code too, so the right thing would
140 be to save/restore the FPU control word across Haskell code if we were
141 to do this.
142
143 gcc's -ffloat-store gives consistent results by always storing the
144 results of floating-point calculations in memory, which works for both
145 32 and 64-bit precision. However, it only affects the values of
146 user-declared floating point variables in C, not intermediate results.
147 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
148 flag).
149
150 Another problem is how to spill floating point registers in the
151 register allocator. Should we spill the whole 80 bits, or just 64?
152 On an OS which is set to 64 bit precision, spilling 64 is fine. On
153 Linux, spilling 64 bits will round the results of some operations.
154 This is what gcc does. Spilling at 80 bits requires taking up a full
155 128 bit slot (so we get alignment). We spill at 80-bits and ignore
156 the alignment problems.
157
158 In the future [edit: now available in GHC 7.0.1, with the -msse2
159 flag], we'll use the SSE registers for floating point. This requires
160 a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
161 float ops), which means P4 or Xeon and above. Using SSE will solve
162 all these problems, because the SSE registers use fixed 32 bit or 64
163 bit precision.
164
165 --SDM 1/2003
166 -}
167
168 data Instr
169 -- comment pseudo-op
170 = COMMENT FastString
171
172 -- location pseudo-op (file, line, col, name)
173 | LOCATION Int Int Int String
174
175 -- some static data spat out during code
176 -- generation. Will be extracted before
177 -- pretty-printing.
178 | LDATA Section (Alignment, CmmStatics)
179
180 -- start a new basic block. Useful during
181 -- codegen, removed later. Preceding
182 -- instruction should be a jump, as per the
183 -- invariants for a BasicBlock (see Cmm).
184 | NEWBLOCK BlockId
185
186 -- unwinding information
187 -- See Note [Unwinding information in the NCG].
188 | UNWIND CLabel UnwindTable
189
190 -- specify current stack offset for benefit of subsequent passes.
191 -- This carries a BlockId so it can be used in unwinding information.
192 | DELTA Int
193
194 -- Moves.
195 | MOV Format Operand Operand
196 | CMOV Cond Format Operand Reg
197 | MOVZxL Format Operand Operand -- format is the size of operand 1
198 | MOVSxL Format Operand Operand -- format is the size of operand 1
199 -- x86_64 note: plain mov into a 32-bit register always zero-extends
200 -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
201 -- don't affect the high bits of the register.
202
203 -- Load effective address (also a very useful three-operand add instruction :-)
204 | LEA Format Operand Operand
205
206 -- Int Arithmetic.
207 | ADD Format Operand Operand
208 | ADC Format Operand Operand
209 | SUB Format Operand Operand
210 | SBB Format Operand Operand
211
212 | MUL Format Operand Operand
213 | MUL2 Format Operand -- %edx:%eax = operand * %rax
214 | IMUL Format Operand Operand -- signed int mul
215 | IMUL2 Format Operand -- %edx:%eax = operand * %eax
216
217 | DIV Format Operand -- eax := eax:edx/op, edx := eax:edx%op
218 | IDIV Format Operand -- ditto, but signed
219
220 -- Int Arithmetic, where the effects on the condition register
221 -- are important. Used in specialized sequences such as MO_Add2.
222 -- Do not rewrite these instructions to "equivalent" ones that
223 -- have different effect on the condition register! (See #9013.)
224 | ADD_CC Format Operand Operand
225 | SUB_CC Format Operand Operand
226
227 -- Simple bit-twiddling.
228 | AND Format Operand Operand
229 | OR Format Operand Operand
230 | XOR Format Operand Operand
231 | NOT Format Operand
232 | NEGI Format Operand -- NEG instruction (name clash with Cond)
233 | BSWAP Format Reg
234
235 -- Shifts (amount may be immediate or %cl only)
236 | SHL Format Operand{-amount-} Operand
237 | SAR Format Operand{-amount-} Operand
238 | SHR Format Operand{-amount-} Operand
239
240 | BT Format Imm Operand
241 | NOP
242
243 -- x86 Float Arithmetic.
244 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
245 -- as single instructions right up until we spit them out.
246 -- all the 3-operand fake fp insns are src1 src2 dst
247 -- and furthermore are constrained to be fp regs only.
248 -- IMPORTANT: keep is_G_insn up to date with any changes here
249 | GMOV Reg Reg -- src(fpreg), dst(fpreg)
250 | GLD Format AddrMode Reg -- src, dst(fpreg)
251 | GST Format Reg AddrMode -- src(fpreg), dst
252
253 | GLDZ Reg -- dst(fpreg)
254 | GLD1 Reg -- dst(fpreg)
255
256 | GFTOI Reg Reg -- src(fpreg), dst(intreg)
257 | GDTOI Reg Reg -- src(fpreg), dst(intreg)
258
259 | GITOF Reg Reg -- src(intreg), dst(fpreg)
260 | GITOD Reg Reg -- src(intreg), dst(fpreg)
261
262 | GDTOF Reg Reg -- src(fpreg), dst(fpreg)
263
264 | GADD Format Reg Reg Reg -- src1, src2, dst
265 | GDIV Format Reg Reg Reg -- src1, src2, dst
266 | GSUB Format Reg Reg Reg -- src1, src2, dst
267 | GMUL Format Reg Reg Reg -- src1, src2, dst
268
269 -- FP compare. Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
270 -- Compare src1 with src2; set the Zero flag iff the numbers are
271 -- comparable and the comparison is True. Subsequent code must
272 -- test the %eflags zero flag regardless of the supplied Cond.
273 | GCMP Cond Reg Reg -- src1, src2
274
275 | GABS Format Reg Reg -- src, dst
276 | GNEG Format Reg Reg -- src, dst
277 | GSQRT Format Reg Reg -- src, dst
278 | GSIN Format CLabel CLabel Reg Reg -- src, dst
279 | GCOS Format CLabel CLabel Reg Reg -- src, dst
280 | GTAN Format CLabel CLabel Reg Reg -- src, dst
281
282 | GFREE -- do ffree on all x86 regs; an ugly hack
283
284
285 -- SSE2 floating point: we use a restricted set of the available SSE2
286 -- instructions for floating-point.
287 -- use MOV for moving (either movss or movsd (movlpd better?))
288 | CVTSS2SD Reg Reg -- F32 to F64
289 | CVTSD2SS Reg Reg -- F64 to F32
290 | CVTTSS2SIQ Format Operand Reg -- F32 to I32/I64 (with truncation)
291 | CVTTSD2SIQ Format Operand Reg -- F64 to I32/I64 (with truncation)
292 | CVTSI2SS Format Operand Reg -- I32/I64 to F32
293 | CVTSI2SD Format Operand Reg -- I32/I64 to F64
294
295 -- use ADD, SUB, and SQRT for arithmetic. In both cases, operands
296 -- are Operand Reg.
297
298 -- SSE2 floating-point division:
299 | FDIV Format Operand Operand -- divisor, dividend(dst)
300
301 -- use CMP for comparisons. ucomiss and ucomisd instructions
302 -- compare single/double prec floating point respectively.
303
304 | SQRT Format Operand Reg -- src, dst
305
306
307 -- Comparison
308 | TEST Format Operand Operand
309 | CMP Format Operand Operand
310 | SETCC Cond Operand
311
312 -- Stack Operations.
313 | PUSH Format Operand
314 | POP Format Operand
315 -- both unused (SDM):
316 -- | PUSHA
317 -- | POPA
318
319 -- Jumping around.
320 | JMP Operand [Reg] -- including live Regs at the call
321 | JXX Cond BlockId -- includes unconditional branches
322 | JXX_GBL Cond Imm -- non-local version of JXX
323 -- Table jump
324 | JMP_TBL Operand -- Address to jump to
325 [Maybe JumpDest] -- Targets of the jump table
326 Section -- Data section jump table should be put in
327 CLabel -- Label of jump table
328 | CALL (Either Imm Reg) [Reg]
329
330 -- Other things.
331 | CLTD Format -- sign extend %eax into %edx:%eax
332
333 | FETCHGOT Reg -- pseudo-insn for ELF position-independent code
334 -- pretty-prints as
335 -- call 1f
336 -- 1: popl %reg
337 -- addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
338 | FETCHPC Reg -- pseudo-insn for Darwin position-independent code
339 -- pretty-prints as
340 -- call 1f
341 -- 1: popl %reg
342
343 -- bit counting instructions
344 | POPCNT Format Operand Reg -- [SSE4.2] count number of bits set to 1
345 | BSF Format Operand Reg -- bit scan forward
346 | BSR Format Operand Reg -- bit scan reverse
347
348 -- bit manipulation instructions
349 | PDEP Format Operand Operand Reg -- [BMI2] deposit bits to the specified mask
350 | PEXT Format Operand Operand Reg -- [BMI2] extract bits from the specified mask
351
352 -- prefetch
353 | PREFETCH PrefetchVariant Format Operand -- prefetch Variant, addr size, address to prefetch
354 -- variant can be NTA, Lvl0, Lvl1, or Lvl2
355
356 | LOCK Instr -- lock prefix
357 | XADD Format Operand Operand -- src (r), dst (r/m)
358 | CMPXCHG Format Operand Operand -- src (r), dst (r/m), eax implicit
359 | MFENCE
360
361 data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2
362
363
364 data Operand
365 = OpReg Reg -- register
366 | OpImm Imm -- immediate value
367 | OpAddr AddrMode -- memory reference
368
369
370
371 -- | Returns which registers are read and written as a (read, written)
372 -- pair.
373 x86_regUsageOfInstr :: Platform -> Instr -> RegUsage
374 x86_regUsageOfInstr platform instr
375 = case instr of
376 MOV _ src dst -> usageRW src dst
377 CMOV _ _ src dst -> mkRU (use_R src [dst]) [dst]
378 MOVZxL _ src dst -> usageRW src dst
379 MOVSxL _ src dst -> usageRW src dst
380 LEA _ src dst -> usageRW src dst
381 ADD _ src dst -> usageRM src dst
382 ADC _ src dst -> usageRM src dst
383 SUB _ src dst -> usageRM src dst
384 SBB _ src dst -> usageRM src dst
385 IMUL _ src dst -> usageRM src dst
386
387 -- Result of IMULB will be in just in %ax
388 IMUL2 II8 src -> mkRU (eax:use_R src []) [eax]
389 -- Result of IMUL for wider values, will be split between %dx/%edx/%rdx and
390 -- %ax/%eax/%rax.
391 IMUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
392
393 MUL _ src dst -> usageRM src dst
394 MUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
395 DIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
396 IDIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
397 ADD_CC _ src dst -> usageRM src dst
398 SUB_CC _ src dst -> usageRM src dst
399 AND _ src dst -> usageRM src dst
400 OR _ src dst -> usageRM src dst
401
402 XOR _ (OpReg src) (OpReg dst)
403 | src == dst -> mkRU [] [dst]
404
405 XOR _ src dst -> usageRM src dst
406 NOT _ op -> usageM op
407 BSWAP _ reg -> mkRU [reg] [reg]
408 NEGI _ op -> usageM op
409 SHL _ imm dst -> usageRM imm dst
410 SAR _ imm dst -> usageRM imm dst
411 SHR _ imm dst -> usageRM imm dst
412 BT _ _ src -> mkRUR (use_R src [])
413
414 PUSH _ op -> mkRUR (use_R op [])
415 POP _ op -> mkRU [] (def_W op)
416 TEST _ src dst -> mkRUR (use_R src $! use_R dst [])
417 CMP _ src dst -> mkRUR (use_R src $! use_R dst [])
418 SETCC _ op -> mkRU [] (def_W op)
419 JXX _ _ -> mkRU [] []
420 JXX_GBL _ _ -> mkRU [] []
421 JMP op regs -> mkRUR (use_R op regs)
422 JMP_TBL op _ _ _ -> mkRUR (use_R op [])
423 CALL (Left _) params -> mkRU params (callClobberedRegs platform)
424 CALL (Right reg) params -> mkRU (reg:params) (callClobberedRegs platform)
425 CLTD _ -> mkRU [eax] [edx]
426 NOP -> mkRU [] []
427
428 GMOV src dst -> mkRU [src] [dst]
429 GLD _ src dst -> mkRU (use_EA src []) [dst]
430 GST _ src dst -> mkRUR (src : use_EA dst [])
431
432 GLDZ dst -> mkRU [] [dst]
433 GLD1 dst -> mkRU [] [dst]
434
435 GFTOI src dst -> mkRU [src] [dst]
436 GDTOI src dst -> mkRU [src] [dst]
437
438 GITOF src dst -> mkRU [src] [dst]
439 GITOD src dst -> mkRU [src] [dst]
440
441 GDTOF src dst -> mkRU [src] [dst]
442
443 GADD _ s1 s2 dst -> mkRU [s1,s2] [dst]
444 GSUB _ s1 s2 dst -> mkRU [s1,s2] [dst]
445 GMUL _ s1 s2 dst -> mkRU [s1,s2] [dst]
446 GDIV _ s1 s2 dst -> mkRU [s1,s2] [dst]
447
448 GCMP _ src1 src2 -> mkRUR [src1,src2]
449 GABS _ src dst -> mkRU [src] [dst]
450 GNEG _ src dst -> mkRU [src] [dst]
451 GSQRT _ src dst -> mkRU [src] [dst]
452 GSIN _ _ _ src dst -> mkRU [src] [dst]
453 GCOS _ _ _ src dst -> mkRU [src] [dst]
454 GTAN _ _ _ src dst -> mkRU [src] [dst]
455
456 CVTSS2SD src dst -> mkRU [src] [dst]
457 CVTSD2SS src dst -> mkRU [src] [dst]
458 CVTTSS2SIQ _ src dst -> mkRU (use_R src []) [dst]
459 CVTTSD2SIQ _ src dst -> mkRU (use_R src []) [dst]
460 CVTSI2SS _ src dst -> mkRU (use_R src []) [dst]
461 CVTSI2SD _ src dst -> mkRU (use_R src []) [dst]
462 FDIV _ src dst -> usageRM src dst
463 SQRT _ src dst -> mkRU (use_R src []) [dst]
464
465 FETCHGOT reg -> mkRU [] [reg]
466 FETCHPC reg -> mkRU [] [reg]
467
468 COMMENT _ -> noUsage
469 LOCATION{} -> noUsage
470 UNWIND{} -> noUsage
471 DELTA _ -> noUsage
472
473 POPCNT _ src dst -> mkRU (use_R src []) [dst]
474 BSF _ src dst -> mkRU (use_R src []) [dst]
475 BSR _ src dst -> mkRU (use_R src []) [dst]
476
477 PDEP _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
478 PEXT _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
479
480 -- note: might be a better way to do this
481 PREFETCH _ _ src -> mkRU (use_R src []) []
482 LOCK i -> x86_regUsageOfInstr platform i
483 XADD _ src dst -> usageMM src dst
484 CMPXCHG _ src dst -> usageRMM src dst (OpReg eax)
485 MFENCE -> noUsage
486
487 _other -> panic "regUsage: unrecognised instr"
488 where
489 -- # Definitions
490 --
491 -- Written: If the operand is a register, it's written. If it's an
492 -- address, registers mentioned in the address are read.
493 --
494 -- Modified: If the operand is a register, it's both read and
495 -- written. If it's an address, registers mentioned in the address
496 -- are read.
497
498 -- 2 operand form; first operand Read; second Written
499 usageRW :: Operand -> Operand -> RegUsage
500 usageRW op (OpReg reg) = mkRU (use_R op []) [reg]
501 usageRW op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
502 usageRW _ _ = panic "X86.RegInfo.usageRW: no match"
503
504 -- 2 operand form; first operand Read; second Modified
505 usageRM :: Operand -> Operand -> RegUsage
506 usageRM op (OpReg reg) = mkRU (use_R op [reg]) [reg]
507 usageRM op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
508 usageRM _ _ = panic "X86.RegInfo.usageRM: no match"
509
510 -- 2 operand form; first operand Modified; second Modified
511 usageMM :: Operand -> Operand -> RegUsage
512 usageMM (OpReg src) (OpReg dst) = mkRU [src, dst] [src, dst]
513 usageMM (OpReg src) (OpAddr ea) = mkRU (use_EA ea [src]) [src]
514 usageMM _ _ = panic "X86.RegInfo.usageMM: no match"
515
516 -- 3 operand form; first operand Read; second Modified; third Modified
517 usageRMM :: Operand -> Operand -> Operand -> RegUsage
518 usageRMM (OpReg src) (OpReg dst) (OpReg reg) = mkRU [src, dst, reg] [dst, reg]
519 usageRMM (OpReg src) (OpAddr ea) (OpReg reg) = mkRU (use_EA ea [src, reg]) [reg]
520 usageRMM _ _ _ = panic "X86.RegInfo.usageRMM: no match"
521
522 -- 1 operand form; operand Modified
523 usageM :: Operand -> RegUsage
524 usageM (OpReg reg) = mkRU [reg] [reg]
525 usageM (OpAddr ea) = mkRUR (use_EA ea [])
526 usageM _ = panic "X86.RegInfo.usageM: no match"
527
528 -- Registers defd when an operand is written.
529 def_W (OpReg reg) = [reg]
530 def_W (OpAddr _ ) = []
531 def_W _ = panic "X86.RegInfo.def_W: no match"
532
533 -- Registers used when an operand is read.
534 use_R (OpReg reg) tl = reg : tl
535 use_R (OpImm _) tl = tl
536 use_R (OpAddr ea) tl = use_EA ea tl
537
538 -- Registers used to compute an effective address.
539 use_EA (ImmAddr _ _) tl = tl
540 use_EA (AddrBaseIndex base index _) tl =
541 use_base base $! use_index index tl
542 where use_base (EABaseReg r) tl = r : tl
543 use_base _ tl = tl
544 use_index EAIndexNone tl = tl
545 use_index (EAIndex i _) tl = i : tl
546
547 mkRUR src = src' `seq` RU src' []
548 where src' = filter (interesting platform) src
549
550 mkRU src dst = src' `seq` dst' `seq` RU src' dst'
551 where src' = filter (interesting platform) src
552 dst' = filter (interesting platform) dst
553
554 -- | Is this register interesting for the register allocator?
555 interesting :: Platform -> Reg -> Bool
556 interesting _ (RegVirtual _) = True
557 interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
558 interesting _ (RegReal (RealRegPair{})) = panic "X86.interesting: no reg pairs on this arch"
559
560
561
562 -- | Applies the supplied function to all registers in instructions.
563 -- Typically used to change virtual registers to real registers.
564 x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
565 x86_patchRegsOfInstr instr env
566 = case instr of
567 MOV fmt src dst -> patch2 (MOV fmt) src dst
568 CMOV cc fmt src dst -> CMOV cc fmt (patchOp src) (env dst)
569 MOVZxL fmt src dst -> patch2 (MOVZxL fmt) src dst
570 MOVSxL fmt src dst -> patch2 (MOVSxL fmt) src dst
571 LEA fmt src dst -> patch2 (LEA fmt) src dst
572 ADD fmt src dst -> patch2 (ADD fmt) src dst
573 ADC fmt src dst -> patch2 (ADC fmt) src dst
574 SUB fmt src dst -> patch2 (SUB fmt) src dst
575 SBB fmt src dst -> patch2 (SBB fmt) src dst
576 IMUL fmt src dst -> patch2 (IMUL fmt) src dst
577 IMUL2 fmt src -> patch1 (IMUL2 fmt) src
578 MUL fmt src dst -> patch2 (MUL fmt) src dst
579 MUL2 fmt src -> patch1 (MUL2 fmt) src
580 IDIV fmt op -> patch1 (IDIV fmt) op
581 DIV fmt op -> patch1 (DIV fmt) op
582 ADD_CC fmt src dst -> patch2 (ADD_CC fmt) src dst
583 SUB_CC fmt src dst -> patch2 (SUB_CC fmt) src dst
584 AND fmt src dst -> patch2 (AND fmt) src dst
585 OR fmt src dst -> patch2 (OR fmt) src dst
586 XOR fmt src dst -> patch2 (XOR fmt) src dst
587 NOT fmt op -> patch1 (NOT fmt) op
588 BSWAP fmt reg -> BSWAP fmt (env reg)
589 NEGI fmt op -> patch1 (NEGI fmt) op
590 SHL fmt imm dst -> patch1 (SHL fmt imm) dst
591 SAR fmt imm dst -> patch1 (SAR fmt imm) dst
592 SHR fmt imm dst -> patch1 (SHR fmt imm) dst
593 BT fmt imm src -> patch1 (BT fmt imm) src
594 TEST fmt src dst -> patch2 (TEST fmt) src dst
595 CMP fmt src dst -> patch2 (CMP fmt) src dst
596 PUSH fmt op -> patch1 (PUSH fmt) op
597 POP fmt op -> patch1 (POP fmt) op
598 SETCC cond op -> patch1 (SETCC cond) op
599 JMP op regs -> JMP (patchOp op) regs
600 JMP_TBL op ids s lbl -> JMP_TBL (patchOp op) ids s lbl
601
602 GMOV src dst -> GMOV (env src) (env dst)
603 GLD fmt src dst -> GLD fmt (lookupAddr src) (env dst)
604 GST fmt src dst -> GST fmt (env src) (lookupAddr dst)
605
606 GLDZ dst -> GLDZ (env dst)
607 GLD1 dst -> GLD1 (env dst)
608
609 GFTOI src dst -> GFTOI (env src) (env dst)
610 GDTOI src dst -> GDTOI (env src) (env dst)
611
612 GITOF src dst -> GITOF (env src) (env dst)
613 GITOD src dst -> GITOD (env src) (env dst)
614
615 GDTOF src dst -> GDTOF (env src) (env dst)
616
617 GADD fmt s1 s2 dst -> GADD fmt (env s1) (env s2) (env dst)
618 GSUB fmt s1 s2 dst -> GSUB fmt (env s1) (env s2) (env dst)
619 GMUL fmt s1 s2 dst -> GMUL fmt (env s1) (env s2) (env dst)
620 GDIV fmt s1 s2 dst -> GDIV fmt (env s1) (env s2) (env dst)
621
622 GCMP fmt src1 src2 -> GCMP fmt (env src1) (env src2)
623 GABS fmt src dst -> GABS fmt (env src) (env dst)
624 GNEG fmt src dst -> GNEG fmt (env src) (env dst)
625 GSQRT fmt src dst -> GSQRT fmt (env src) (env dst)
626 GSIN fmt l1 l2 src dst -> GSIN fmt l1 l2 (env src) (env dst)
627 GCOS fmt l1 l2 src dst -> GCOS fmt l1 l2 (env src) (env dst)
628 GTAN fmt l1 l2 src dst -> GTAN fmt l1 l2 (env src) (env dst)
629
630 CVTSS2SD src dst -> CVTSS2SD (env src) (env dst)
631 CVTSD2SS src dst -> CVTSD2SS (env src) (env dst)
632 CVTTSS2SIQ fmt src dst -> CVTTSS2SIQ fmt (patchOp src) (env dst)
633 CVTTSD2SIQ fmt src dst -> CVTTSD2SIQ fmt (patchOp src) (env dst)
634 CVTSI2SS fmt src dst -> CVTSI2SS fmt (patchOp src) (env dst)
635 CVTSI2SD fmt src dst -> CVTSI2SD fmt (patchOp src) (env dst)
636 FDIV fmt src dst -> FDIV fmt (patchOp src) (patchOp dst)
637 SQRT fmt src dst -> SQRT fmt (patchOp src) (env dst)
638
639 CALL (Left _) _ -> instr
640 CALL (Right reg) p -> CALL (Right (env reg)) p
641
642 FETCHGOT reg -> FETCHGOT (env reg)
643 FETCHPC reg -> FETCHPC (env reg)
644
645 NOP -> instr
646 COMMENT _ -> instr
647 LOCATION {} -> instr
648 UNWIND {} -> instr
649 DELTA _ -> instr
650
651 JXX _ _ -> instr
652 JXX_GBL _ _ -> instr
653 CLTD _ -> instr
654
655 POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst)
656 PDEP fmt src mask dst -> PDEP fmt (patchOp src) (patchOp mask) (env dst)
657 PEXT fmt src mask dst -> PEXT fmt (patchOp src) (patchOp mask) (env dst)
658 BSF fmt src dst -> BSF fmt (patchOp src) (env dst)
659 BSR fmt src dst -> BSR fmt (patchOp src) (env dst)
660
661 PREFETCH lvl format src -> PREFETCH lvl format (patchOp src)
662
663 LOCK i -> LOCK (x86_patchRegsOfInstr i env)
664 XADD fmt src dst -> patch2 (XADD fmt) src dst
665 CMPXCHG fmt src dst -> patch2 (CMPXCHG fmt) src dst
666 MFENCE -> instr
667
668 _other -> panic "patchRegs: unrecognised instr"
669
670 where
671 patch1 :: (Operand -> a) -> Operand -> a
672 patch1 insn op = insn $! patchOp op
673 patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
674 patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
675
676 patchOp (OpReg reg) = OpReg $! env reg
677 patchOp (OpImm imm) = OpImm imm
678 patchOp (OpAddr ea) = OpAddr $! lookupAddr ea
679
680 lookupAddr (ImmAddr imm off) = ImmAddr imm off
681 lookupAddr (AddrBaseIndex base index disp)
682 = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
683 where
684 lookupBase EABaseNone = EABaseNone
685 lookupBase EABaseRip = EABaseRip
686 lookupBase (EABaseReg r) = EABaseReg $! env r
687
688 lookupIndex EAIndexNone = EAIndexNone
689 lookupIndex (EAIndex r i) = (EAIndex $! env r) i
690
691
692 --------------------------------------------------------------------------------
693 x86_isJumpishInstr
694 :: Instr -> Bool
695
696 x86_isJumpishInstr instr
697 = case instr of
698 JMP{} -> True
699 JXX{} -> True
700 JXX_GBL{} -> True
701 JMP_TBL{} -> True
702 CALL{} -> True
703 _ -> False
704
705
706 x86_jumpDestsOfInstr
707 :: Instr
708 -> [BlockId]
709
710 x86_jumpDestsOfInstr insn
711 = case insn of
712 JXX _ id -> [id]
713 JMP_TBL _ ids _ _ -> [id | Just (DestBlockId id) <- ids]
714 _ -> []
715
716
717 x86_patchJumpInstr
718 :: Instr -> (BlockId -> BlockId) -> Instr
719
720 x86_patchJumpInstr insn patchF
721 = case insn of
722 JXX cc id -> JXX cc (patchF id)
723 JMP_TBL op ids section lbl
724 -> JMP_TBL op (map (fmap (patchJumpDest patchF)) ids) section lbl
725 _ -> insn
726 where
727 patchJumpDest f (DestBlockId id) = DestBlockId (f id)
728 patchJumpDest _ dest = dest
729
730
731
732
733
734 -- -----------------------------------------------------------------------------
735 -- | Make a spill instruction.
736 x86_mkSpillInstr
737 :: DynFlags
738 -> Reg -- register to spill
739 -> Int -- current stack delta
740 -> Int -- spill slot to use
741 -> Instr
742
743 x86_mkSpillInstr dflags reg delta slot
744 = let off = spillSlotToOffset platform slot - delta
745 in
746 case targetClassOfReg platform reg of
747 RcInteger -> MOV (archWordFormat is32Bit)
748 (OpReg reg) (OpAddr (spRel dflags off))
749 RcDouble -> GST FF80 reg (spRel dflags off) {- RcFloat/RcDouble -}
750 RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off))
751 _ -> panic "X86.mkSpillInstr: no match"
752 where platform = targetPlatform dflags
753 is32Bit = target32Bit platform
754
755 -- | Make a spill reload instruction.
756 x86_mkLoadInstr
757 :: DynFlags
758 -> Reg -- register to load
759 -> Int -- current stack delta
760 -> Int -- spill slot to use
761 -> Instr
762
763 x86_mkLoadInstr dflags reg delta slot
764 = let off = spillSlotToOffset platform slot - delta
765 in
766 case targetClassOfReg platform reg of
767 RcInteger -> MOV (archWordFormat is32Bit)
768 (OpAddr (spRel dflags off)) (OpReg reg)
769 RcDouble -> GLD FF80 (spRel dflags off) reg {- RcFloat/RcDouble -}
770 RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg)
771 _ -> panic "X86.x86_mkLoadInstr"
772 where platform = targetPlatform dflags
773 is32Bit = target32Bit platform
774
775 spillSlotSize :: Platform -> Int
776 spillSlotSize dflags = if is32Bit then 12 else 8
777 where is32Bit = target32Bit dflags
778
779 maxSpillSlots :: DynFlags -> Int
780 maxSpillSlots dflags
781 = ((rESERVED_C_STACK_BYTES dflags - 64) `div` spillSlotSize (targetPlatform dflags)) - 1
782 -- = 0 -- useful for testing allocMoreStack
783
784 -- number of bytes that the stack pointer should be aligned to
785 stackAlign :: Int
786 stackAlign = 16
787
788 -- convert a spill slot number to a *byte* offset, with no sign:
789 -- decide on a per arch basis whether you are spilling above or below
790 -- the C stack pointer.
791 spillSlotToOffset :: Platform -> Int -> Int
792 spillSlotToOffset platform slot
793 = 64 + spillSlotSize platform * slot
794
795 --------------------------------------------------------------------------------
796
797 -- | See if this instruction is telling us the current C stack delta
798 x86_takeDeltaInstr
799 :: Instr
800 -> Maybe Int
801
802 x86_takeDeltaInstr instr
803 = case instr of
804 DELTA i -> Just i
805 _ -> Nothing
806
807
808 x86_isMetaInstr
809 :: Instr
810 -> Bool
811
812 x86_isMetaInstr instr
813 = case instr of
814 COMMENT{} -> True
815 LOCATION{} -> True
816 LDATA{} -> True
817 NEWBLOCK{} -> True
818 UNWIND{} -> True
819 DELTA{} -> True
820 _ -> False
821
822
823
824 -- | Make a reg-reg move instruction.
825 -- On SPARC v8 there are no instructions to move directly between
826 -- floating point and integer regs. If we need to do that then we
827 -- have to go via memory.
828 --
829 x86_mkRegRegMoveInstr
830 :: Platform
831 -> Reg
832 -> Reg
833 -> Instr
834
835 x86_mkRegRegMoveInstr platform src dst
836 = case targetClassOfReg platform src of
837 RcInteger -> case platformArch platform of
838 ArchX86 -> MOV II32 (OpReg src) (OpReg dst)
839 ArchX86_64 -> MOV II64 (OpReg src) (OpReg dst)
840 _ -> panic "x86_mkRegRegMoveInstr: Bad arch"
841 RcDouble -> GMOV src dst
842 RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst)
843 _ -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
844
845 -- | Check whether an instruction represents a reg-reg move.
846 -- The register allocator attempts to eliminate reg->reg moves whenever it can,
847 -- by assigning the src and dest temporaries to the same real register.
848 --
849 x86_takeRegRegMoveInstr
850 :: Instr
851 -> Maybe (Reg,Reg)
852
853 x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
854 = Just (r1,r2)
855
856 x86_takeRegRegMoveInstr _ = Nothing
857
858
859 -- | Make an unconditional branch instruction.
860 x86_mkJumpInstr
861 :: BlockId
862 -> [Instr]
863
864 x86_mkJumpInstr id
865 = [JXX ALWAYS id]
866
867 -- Note [Windows stack layout]
868 -- | On most OSes the kernel will place a guard page after the current stack
869 -- page. If you allocate larger than a page worth you may jump over this
870 -- guard page. Not only is this a security issue, but on certain OSes such
871 -- as Windows a new page won't be allocated if you don't hit the guard. This
872 -- will cause a segfault or access fault.
873 --
874 -- This function defines if the current allocation amount requires a probe.
875 -- On Windows (for now) we emit a call to _chkstk for this. For other OSes
876 -- this is not yet implemented.
877 -- See https://docs.microsoft.com/en-us/windows/desktop/DevNotes/-win32-chkstk
878 -- The Windows stack looks like this:
879 --
880 -- +-------------------+
881 -- | SP |
882 -- +-------------------+
883 -- | |
884 -- | GUARD PAGE |
885 -- | |
886 -- +-------------------+
887 -- | |
888 -- | |
889 -- | UNMAPPED |
890 -- | |
891 -- | |
892 -- +-------------------+
893 --
894 -- In essense each allocation larger than a page size needs to be chunked and
895 -- a probe emitted after each page allocation. You have to hit the guard
896 -- page so the kernel can map in the next page, otherwise you'll segfault.
897 --
898 needs_probe_call :: Platform -> Int -> Bool
899 needs_probe_call platform amount
900 = case platformOS platform of
901 OSMinGW32 -> case platformArch platform of
902 ArchX86 -> amount > (4 * 1024)
903 ArchX86_64 -> amount > (8 * 1024)
904 _ -> False
905 _ -> False
906
907 x86_mkStackAllocInstr
908 :: Platform
909 -> Int
910 -> [Instr]
911 x86_mkStackAllocInstr platform amount
912 = case platformOS platform of
913 OSMinGW32 ->
914 -- These will clobber AX but this should be ok because
915 --
916 -- 1. It is the first thing we do when entering the closure and AX is
917 -- a caller saved registers on Windows both on x86_64 and x86.
918 --
919 -- 2. The closures are only entered via a call or longjmp in which case
920 -- there are no expectations for volatile registers.
921 --
922 -- 3. When the target is a local branch point it is re-targeted
923 -- after the dealloc, preserving #2. See note [extra spill slots].
924 --
925 -- We emit a call because the stack probes are quite involved and
926 -- would bloat code size a lot. GHC doesn't really have an -Os.
927 -- __chkstk is guaranteed to leave all nonvolatile registers and AX
928 -- untouched. It's part of the standard prologue code for any Windows
929 -- function dropping the stack more than a page.
930 -- See Note [Windows stack layout]
931 case platformArch platform of
932 ArchX86 | needs_probe_call platform amount ->
933 [ MOV II32 (OpImm (ImmInt amount)) (OpReg eax)
934 , CALL (Left $ strImmLit "___chkstk_ms") [eax]
935 , SUB II32 (OpReg eax) (OpReg esp)
936 ]
937 | otherwise ->
938 [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
939 , TEST II32 (OpReg esp) (OpReg esp)
940 ]
941 ArchX86_64 | needs_probe_call platform amount ->
942 [ MOV II64 (OpImm (ImmInt amount)) (OpReg rax)
943 , CALL (Left $ strImmLit "__chkstk_ms") [rax]
944 , SUB II64 (OpReg rax) (OpReg rsp)
945 ]
946 | otherwise ->
947 [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp)
948 , TEST II64 (OpReg rsp) (OpReg rsp)
949 ]
950 _ -> panic "x86_mkStackAllocInstr"
951 _ ->
952 case platformArch platform of
953 ArchX86 -> [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp) ]
954 ArchX86_64 -> [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) ]
955 _ -> panic "x86_mkStackAllocInstr"
956
957 x86_mkStackDeallocInstr
958 :: Platform
959 -> Int
960 -> [Instr]
961 x86_mkStackDeallocInstr platform amount
962 = case platformArch platform of
963 ArchX86 -> [ADD II32 (OpImm (ImmInt amount)) (OpReg esp)]
964 ArchX86_64 -> [ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)]
965 _ -> panic "x86_mkStackDeallocInstr"
966
967 i386_insert_ffrees
968 :: [GenBasicBlock Instr]
969 -> [GenBasicBlock Instr]
970
971 i386_insert_ffrees blocks
972 | any (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ]
973 = map insertGFREEs blocks
974 | otherwise
975 = blocks
976 where
977 insertGFREEs (BasicBlock id insns)
978 = BasicBlock id (insertBeforeNonlocalTransfers GFREE insns)
979
980 insertBeforeNonlocalTransfers :: Instr -> [Instr] -> [Instr]
981 insertBeforeNonlocalTransfers insert insns
982 = foldr p [] insns
983 where p insn r = case insn of
984 CALL _ _ -> insert : insn : r
985 JMP _ _ -> insert : insn : r
986 JXX_GBL _ _ -> panic "insertBeforeNonlocalTransfers: cannot handle JXX_GBL"
987 _ -> insn : r
988
989
990 -- if you ever add a new FP insn to the fake x86 FP insn set,
991 -- you must update this too
992 is_G_instr :: Instr -> Bool
993 is_G_instr instr
994 = case instr of
995 GMOV{} -> True
996 GLD{} -> True
997 GST{} -> True
998 GLDZ{} -> True
999 GLD1{} -> True
1000 GFTOI{} -> True
1001 GDTOI{} -> True
1002 GITOF{} -> True
1003 GITOD{} -> True
1004 GDTOF{} -> True
1005 GADD{} -> True
1006 GDIV{} -> True
1007 GSUB{} -> True
1008 GMUL{} -> True
1009 GCMP{} -> True
1010 GABS{} -> True
1011 GNEG{} -> True
1012 GSQRT{} -> True
1013 GSIN{} -> True
1014 GCOS{} -> True
1015 GTAN{} -> True
1016 GFREE -> panic "is_G_instr: GFREE (!)"
1017 _ -> False
1018
1019
1020 --
1021 -- Note [extra spill slots]
1022 --
1023 -- If the register allocator used more spill slots than we have
1024 -- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more
1025 -- C stack space on entry and exit from this proc. Therefore we
1026 -- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp"
1027 -- before every non-local jump.
1028 --
1029 -- This became necessary when the new codegen started bundling entire
1030 -- functions together into one proc, because the register allocator
1031 -- assigns a different stack slot to each virtual reg within a proc.
1032 -- To avoid using so many slots we could also:
1033 --
1034 -- - split up the proc into connected components before code generator
1035 --
1036 -- - rename the virtual regs, so that we re-use vreg names and hence
1037 -- stack slots for non-overlapping vregs.
1038 --
1039 -- Note that when a block is both a non-local entry point (with an
1040 -- info table) and a local branch target, we have to split it into
1041 -- two, like so:
1042 --
1043 -- <info table>
1044 -- L:
1045 -- <code>
1046 --
1047 -- becomes
1048 --
1049 -- <info table>
1050 -- L:
1051 -- subl $rsp, N
1052 -- jmp Lnew
1053 -- Lnew:
1054 -- <code>
1055 --
1056 -- and all branches pointing to L are retargetted to point to Lnew.
1057 -- Otherwise, we would repeat the $rsp adjustment for each branch to
1058 -- L.
1059 --
1060 allocMoreStack
1061 :: Platform
1062 -> Int
1063 -> NatCmmDecl statics X86.Instr.Instr
1064 -> UniqSM (NatCmmDecl statics X86.Instr.Instr, [(BlockId,BlockId)])
1065
1066 allocMoreStack _ _ top@(CmmData _ _) = return (top,[])
1067 allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
1068 let entries = entryBlocks proc
1069
1070 uniqs <- replicateM (length entries) getUniqueM
1071
1072 let
1073 delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
1074 where x = slots * spillSlotSize platform -- sp delta
1075
1076 alloc = mkStackAllocInstr platform delta
1077 dealloc = mkStackDeallocInstr platform delta
1078
1079 retargetList = (zip entries (map mkBlockId uniqs))
1080
1081 new_blockmap :: LabelMap BlockId
1082 new_blockmap = mapFromList retargetList
1083
1084 insert_stack_insns (BasicBlock id insns)
1085 | Just new_blockid <- mapLookup id new_blockmap
1086 = [ BasicBlock id $ alloc ++ [JXX ALWAYS new_blockid]
1087 , BasicBlock new_blockid block' ]
1088 | otherwise
1089 = [ BasicBlock id block' ]
1090 where
1091 block' = foldr insert_dealloc [] insns
1092
1093 insert_dealloc insn r = case insn of
1094 JMP _ _ -> dealloc ++ (insn : r)
1095 JXX_GBL _ _ -> panic "insert_dealloc: cannot handle JXX_GBL"
1096 _other -> x86_patchJumpInstr insn retarget : r
1097 where retarget b = fromMaybe b (mapLookup b new_blockmap)
1098
1099 new_code = concatMap insert_stack_insns code
1100 -- in
1101 return (CmmProc info lbl live (ListGraph new_code), retargetList)
1102
1103 data JumpDest = DestBlockId BlockId | DestImm Imm
1104
1105 -- Debug Instance
1106 instance Outputable JumpDest where
1107 ppr (DestBlockId bid) = text "jd<blk>:" <> ppr bid
1108 ppr (DestImm _imm) = text "jd<imm>:noShow"
1109
1110
1111 getJumpDestBlockId :: JumpDest -> Maybe BlockId
1112 getJumpDestBlockId (DestBlockId bid) = Just bid
1113 getJumpDestBlockId _ = Nothing
1114
1115 canShortcut :: Instr -> Maybe JumpDest
1116 canShortcut (JXX ALWAYS id) = Just (DestBlockId id)
1117 canShortcut (JMP (OpImm imm) _) = Just (DestImm imm)
1118 canShortcut _ = Nothing
1119
1120
1121 -- This helper shortcuts a sequence of branches.
1122 -- The blockset helps avoid following cycles.
1123 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
1124 shortcutJump fn insn = shortcutJump' fn (setEmpty :: LabelSet) insn
1125 where
1126 shortcutJump' :: (BlockId -> Maybe JumpDest) -> LabelSet -> Instr -> Instr
1127 shortcutJump' fn seen insn@(JXX cc id) =
1128 if setMember id seen then insn
1129 else case fn id of
1130 Nothing -> insn
1131 Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
1132 Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm)
1133 where seen' = setInsert id seen
1134 shortcutJump' fn _ (JMP_TBL addr blocks section tblId) =
1135 let updateBlock (Just (DestBlockId bid)) =
1136 case fn bid of
1137 Nothing -> Just (DestBlockId bid )
1138 Just dest -> Just dest
1139 updateBlock dest = dest
1140 blocks' = map updateBlock blocks
1141 in JMP_TBL addr blocks' section tblId
1142 shortcutJump' _ _ other = other
1143
1144 -- Here because it knows about JumpDest
1145 shortcutStatics :: (BlockId -> Maybe JumpDest) -> (Alignment, CmmStatics) -> (Alignment, CmmStatics)
1146 shortcutStatics fn (align, Statics lbl statics)
1147 = (align, Statics lbl $ map (shortcutStatic fn) statics)
1148 -- we need to get the jump tables, so apply the mapping to the entries
1149 -- of a CmmData too.
1150
1151 shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
1152 shortcutLabel fn lab
1153 | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn emptyUniqSet blkId
1154 | otherwise = lab
1155
1156 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
1157 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
1158 = CmmStaticLit (CmmLabel (shortcutLabel fn lab))
1159 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w))
1160 = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w)
1161 -- slightly dodgy, we're ignoring the second label, but this
1162 -- works with the way we use CmmLabelDiffOff for jump tables now.
1163 shortcutStatic _ other_static
1164 = other_static
1165
1166 shortBlockId
1167 :: (BlockId -> Maybe JumpDest)
1168 -> UniqSet Unique
1169 -> BlockId
1170 -> CLabel
1171
1172 shortBlockId fn seen blockid =
1173 case (elementOfUniqSet uq seen, fn blockid) of
1174 (True, _) -> blockLbl blockid
1175 (_, Nothing) -> blockLbl blockid
1176 (_, Just (DestBlockId blockid')) -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
1177 (_, Just (DestImm (ImmCLbl lbl))) -> lbl
1178 (_, _other) -> panic "shortBlockId"
1179 where uq = getUnique blockid