Update Trac ticket URLs to point to GitLab
[ghc.git] / compiler / nativeGen / X86 / Ppr.hs
1 {-# LANGUAGE CPP #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Pretty-printing assembly language
6 --
7 -- (c) The University of Glasgow 1993-2005
8 --
9 -----------------------------------------------------------------------------
10
11 {-# OPTIONS_GHC -fno-warn-orphans #-}
12 module X86.Ppr (
13 pprNatCmmDecl,
14 pprData,
15 pprInstr,
16 pprFormat,
17 pprImm,
18 pprDataItem,
19 )
20
21 where
22
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
25
26 import GhcPrelude
27
28 import X86.Regs
29 import X86.Instr
30 import X86.Cond
31 import Instruction
32 import Format
33 import Reg
34 import PprBase
35
36
37 import Hoopl.Collections
38 import Hoopl.Label
39 import BasicTypes (Alignment)
40 import DynFlags
41 import Cmm hiding (topInfoTable)
42 import BlockId
43 import CLabel
44 import Unique ( pprUniqueAlways )
45 import Platform
46 import FastString
47 import Outputable
48
49 import Data.Word
50 import Data.Bits
51
52 -- -----------------------------------------------------------------------------
53 -- Printing this stuff out
54 --
55 --
56 -- Note [Subsections Via Symbols]
57 --
58 -- If we are using the .subsections_via_symbols directive
59 -- (available on recent versions of Darwin),
60 -- we have to make sure that there is some kind of reference
61 -- from the entry code to a label on the _top_ of of the info table,
62 -- so that the linker will not think it is unreferenced and dead-strip
63 -- it. That's why the label is called a DeadStripPreventer (_dsp).
64 --
65 -- The LLVM code gen already creates `iTableSuf` symbols, where
66 -- the X86 would generate the DeadStripPreventer (_dsp) symbol.
67 -- Therefore all that is left for llvm code gen, is to ensure
68 -- that all the `iTableSuf` symbols are marked as used.
69 -- As of this writing the documentation regarding the
70 -- .subsections_via_symbols and -dead_strip can be found at
71 -- <https://developer.apple.com/library/mac/documentation/DeveloperTools/Reference/Assembler/040-Assembler_Directives/asm_directives.html#//apple_ref/doc/uid/TP30000823-TPXREF101>
72
73 pprProcAlignment :: SDoc
74 pprProcAlignment = sdocWithDynFlags $ \dflags ->
75 (maybe empty pprAlign . cmmProcAlignment $ dflags)
76
77 pprNatCmmDecl :: NatCmmDecl (Alignment, CmmStatics) Instr -> SDoc
78 pprNatCmmDecl (CmmData section dats) =
79 pprSectionAlign section $$ pprDatas dats
80
81 pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
82 sdocWithDynFlags $ \dflags ->
83 pprProcAlignment $$
84 case topInfoTable proc of
85 Nothing ->
86 -- special case for code without info table:
87 pprSectionAlign (Section Text lbl) $$
88 pprProcAlignment $$
89 pprLabel lbl $$ -- blocks guaranteed not null, so label needed
90 vcat (map (pprBasicBlock top_info) blocks) $$
91 (if debugLevel dflags > 0
92 then ppr (mkAsmTempEndLabel lbl) <> char ':' else empty) $$
93 pprSizeDecl lbl
94
95 Just (Statics info_lbl _) ->
96 sdocWithPlatform $ \platform ->
97 pprSectionAlign (Section Text info_lbl) $$
98 pprProcAlignment $$
99 (if platformHasSubsectionsViaSymbols platform
100 then ppr (mkDeadStripPreventer info_lbl) <> char ':'
101 else empty) $$
102 vcat (map (pprBasicBlock top_info) blocks) $$
103 -- above: Even the first block gets a label, because with branch-chain
104 -- elimination, it might be the target of a goto.
105 (if platformHasSubsectionsViaSymbols platform
106 then -- See Note [Subsections Via Symbols]
107 text "\t.long "
108 <+> ppr info_lbl
109 <+> char '-'
110 <+> ppr (mkDeadStripPreventer info_lbl)
111 else empty) $$
112 pprSizeDecl info_lbl
113
114 -- | Output the ELF .size directive.
115 pprSizeDecl :: CLabel -> SDoc
116 pprSizeDecl lbl
117 = sdocWithPlatform $ \platform ->
118 if osElfTarget (platformOS platform)
119 then text "\t.size" <+> ppr lbl <> ptext (sLit ", .-") <> ppr lbl
120 else empty
121
122 pprBasicBlock :: LabelMap CmmStatics -> NatBasicBlock Instr -> SDoc
123 pprBasicBlock info_env (BasicBlock blockid instrs)
124 = sdocWithDynFlags $ \dflags ->
125 maybe_infotable dflags $
126 pprLabel asmLbl $$
127 vcat (map pprInstr instrs) $$
128 (if debugLevel dflags > 0
129 then ppr (mkAsmTempEndLabel asmLbl) <> char ':' else empty)
130 where
131 asmLbl = blockLbl blockid
132 maybe_infotable dflags c = case mapLookup blockid info_env of
133 Nothing -> c
134 Just (Statics infoLbl info) ->
135 pprAlignForSection Text $$
136 infoTableLoc $$
137 vcat (map pprData info) $$
138 pprLabel infoLbl $$
139 c $$
140 (if debugLevel dflags > 0
141 then ppr (mkAsmTempEndLabel infoLbl) <> char ':' else empty)
142 -- Make sure the info table has the right .loc for the block
143 -- coming right after it. See [Note: Info Offset]
144 infoTableLoc = case instrs of
145 (l@LOCATION{} : _) -> pprInstr l
146 _other -> empty
147
148 pprDatas :: (Alignment, CmmStatics) -> SDoc
149 pprDatas (align, (Statics lbl dats))
150 = vcat (pprAlign align : pprLabel lbl : map pprData dats)
151
152 pprData :: CmmStatic -> SDoc
153 pprData (CmmString str) = pprBytes str
154
155 pprData (CmmUninitialised bytes)
156 = sdocWithPlatform $ \platform ->
157 if platformOS platform == OSDarwin then text ".space " <> int bytes
158 else text ".skip " <> int bytes
159
160 pprData (CmmStaticLit lit) = pprDataItem lit
161
162 pprGloblDecl :: CLabel -> SDoc
163 pprGloblDecl lbl
164 | not (externallyVisibleCLabel lbl) = empty
165 | otherwise = text ".globl " <> ppr lbl
166
167 pprLabelType' :: DynFlags -> CLabel -> SDoc
168 pprLabelType' dflags lbl =
169 if isCFunctionLabel lbl || functionOkInfoTable then
170 text "@function"
171 else
172 text "@object"
173 where
174 {-
175 NOTE: This is a bit hacky.
176
177 With the `tablesNextToCode` info tables look like this:
178 ```
179 <info table data>
180 label_info:
181 <info table code>
182 ```
183 So actually info table label points exactly to the code and we can mark
184 the label as @function. (This is required to make perf and potentially other
185 tools to work on Haskell binaries).
186 This usually works well but it can cause issues with a linker.
187 A linker uses different algorithms for the relocation depending on
188 the symbol type.For some reason, a linker will generate JUMP_SLOT relocation
189 when constructor info table is referenced from a data section.
190 This only happens with static constructor call so
191 we mark _con_info symbols as `@object` to avoid the issue with relocations.
192
193 @SimonMarlow hack explanation:
194 "The reasoning goes like this:
195
196 * The danger when we mark a symbol as `@function` is that the linker will
197 redirect it to point to the PLT and use a `JUMP_SLOT` relocation when
198 the symbol refers to something outside the current shared object.
199 A PLT / JUMP_SLOT reference only works for symbols that we jump to, not
200 for symbols representing data,, nor for info table symbol references which
201 we expect to point directly to the info table.
202 * GHC generates code that might refer to any info table symbol from the text
203 segment, but that's OK, because those will be explicit GOT references
204 generated by the code generator.
205 * When we refer to info tables from the data segment, it's either
206 * a FUN_STATIC/THUNK_STATIC local to this module
207 * a `con_info` that could be from anywhere
208
209 So, the only info table symbols that we might refer to from the data segment
210 of another shared object are `con_info` symbols, so those are the ones we
211 need to exclude from getting the @function treatment.
212 "
213
214 A good place to check for more
215 https://ghc.haskell.org/trac/ghc/wiki/Commentary/PositionIndependentCode
216
217 Another possible hack is to create an extra local function symbol for
218 every code-like thing to give the needed information for to the tools
219 but mess up with the relocation. https://phabricator.haskell.org/D4730
220 -}
221 functionOkInfoTable = tablesNextToCode dflags &&
222 isInfoTableLabel lbl && not (isConInfoTableLabel lbl)
223
224
225 pprTypeDecl :: CLabel -> SDoc
226 pprTypeDecl lbl
227 = sdocWithPlatform $ \platform ->
228 if osElfTarget (platformOS platform) && externallyVisibleCLabel lbl
229 then
230 sdocWithDynFlags $ \df ->
231 text ".type " <> ppr lbl <> ptext (sLit ", ") <> pprLabelType' df lbl
232 else empty
233
234 pprLabel :: CLabel -> SDoc
235 pprLabel lbl = pprGloblDecl lbl
236 $$ pprTypeDecl lbl
237 $$ (ppr lbl <> char ':')
238
239 pprAlign :: Int -> SDoc
240 pprAlign bytes
241 = sdocWithPlatform $ \platform ->
242 text ".align " <> int (alignment platform)
243 where
244 alignment platform = if platformOS platform == OSDarwin
245 then log2 bytes
246 else bytes
247
248 log2 :: Int -> Int -- cache the common ones
249 log2 1 = 0
250 log2 2 = 1
251 log2 4 = 2
252 log2 8 = 3
253 log2 n = 1 + log2 (n `quot` 2)
254
255 -- -----------------------------------------------------------------------------
256 -- pprInstr: print an 'Instr'
257
258 instance Outputable Instr where
259 ppr instr = pprInstr instr
260
261
262 pprReg :: Format -> Reg -> SDoc
263 pprReg f r
264 = case r of
265 RegReal (RealRegSingle i) ->
266 sdocWithPlatform $ \platform ->
267 if target32Bit platform then ppr32_reg_no f i
268 else ppr64_reg_no f i
269 RegReal (RealRegPair _ _) -> panic "X86.Ppr: no reg pairs on this arch"
270 RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u
271 RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u
272 RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u
273 RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u
274 RegVirtual (VirtualRegSSE u) -> text "%vSSE_" <> pprUniqueAlways u
275 where
276 ppr32_reg_no :: Format -> Int -> SDoc
277 ppr32_reg_no II8 = ppr32_reg_byte
278 ppr32_reg_no II16 = ppr32_reg_word
279 ppr32_reg_no _ = ppr32_reg_long
280
281 ppr32_reg_byte i = ptext
282 (case i of {
283 0 -> sLit "%al"; 1 -> sLit "%bl";
284 2 -> sLit "%cl"; 3 -> sLit "%dl";
285 _ -> sLit $ "very naughty I386 byte register: " ++ show i
286 })
287
288 ppr32_reg_word i = ptext
289 (case i of {
290 0 -> sLit "%ax"; 1 -> sLit "%bx";
291 2 -> sLit "%cx"; 3 -> sLit "%dx";
292 4 -> sLit "%si"; 5 -> sLit "%di";
293 6 -> sLit "%bp"; 7 -> sLit "%sp";
294 _ -> sLit "very naughty I386 word register"
295 })
296
297 ppr32_reg_long i = ptext
298 (case i of {
299 0 -> sLit "%eax"; 1 -> sLit "%ebx";
300 2 -> sLit "%ecx"; 3 -> sLit "%edx";
301 4 -> sLit "%esi"; 5 -> sLit "%edi";
302 6 -> sLit "%ebp"; 7 -> sLit "%esp";
303 _ -> ppr_reg_float i
304 })
305
306 ppr64_reg_no :: Format -> Int -> SDoc
307 ppr64_reg_no II8 = ppr64_reg_byte
308 ppr64_reg_no II16 = ppr64_reg_word
309 ppr64_reg_no II32 = ppr64_reg_long
310 ppr64_reg_no _ = ppr64_reg_quad
311
312 ppr64_reg_byte i = ptext
313 (case i of {
314 0 -> sLit "%al"; 1 -> sLit "%bl";
315 2 -> sLit "%cl"; 3 -> sLit "%dl";
316 4 -> sLit "%sil"; 5 -> sLit "%dil"; -- new 8-bit regs!
317 6 -> sLit "%bpl"; 7 -> sLit "%spl";
318 8 -> sLit "%r8b"; 9 -> sLit "%r9b";
319 10 -> sLit "%r10b"; 11 -> sLit "%r11b";
320 12 -> sLit "%r12b"; 13 -> sLit "%r13b";
321 14 -> sLit "%r14b"; 15 -> sLit "%r15b";
322 _ -> sLit $ "very naughty x86_64 byte register: " ++ show i
323 })
324
325 ppr64_reg_word i = ptext
326 (case i of {
327 0 -> sLit "%ax"; 1 -> sLit "%bx";
328 2 -> sLit "%cx"; 3 -> sLit "%dx";
329 4 -> sLit "%si"; 5 -> sLit "%di";
330 6 -> sLit "%bp"; 7 -> sLit "%sp";
331 8 -> sLit "%r8w"; 9 -> sLit "%r9w";
332 10 -> sLit "%r10w"; 11 -> sLit "%r11w";
333 12 -> sLit "%r12w"; 13 -> sLit "%r13w";
334 14 -> sLit "%r14w"; 15 -> sLit "%r15w";
335 _ -> sLit "very naughty x86_64 word register"
336 })
337
338 ppr64_reg_long i = ptext
339 (case i of {
340 0 -> sLit "%eax"; 1 -> sLit "%ebx";
341 2 -> sLit "%ecx"; 3 -> sLit "%edx";
342 4 -> sLit "%esi"; 5 -> sLit "%edi";
343 6 -> sLit "%ebp"; 7 -> sLit "%esp";
344 8 -> sLit "%r8d"; 9 -> sLit "%r9d";
345 10 -> sLit "%r10d"; 11 -> sLit "%r11d";
346 12 -> sLit "%r12d"; 13 -> sLit "%r13d";
347 14 -> sLit "%r14d"; 15 -> sLit "%r15d";
348 _ -> sLit "very naughty x86_64 register"
349 })
350
351 ppr64_reg_quad i = ptext
352 (case i of {
353 0 -> sLit "%rax"; 1 -> sLit "%rbx";
354 2 -> sLit "%rcx"; 3 -> sLit "%rdx";
355 4 -> sLit "%rsi"; 5 -> sLit "%rdi";
356 6 -> sLit "%rbp"; 7 -> sLit "%rsp";
357 8 -> sLit "%r8"; 9 -> sLit "%r9";
358 10 -> sLit "%r10"; 11 -> sLit "%r11";
359 12 -> sLit "%r12"; 13 -> sLit "%r13";
360 14 -> sLit "%r14"; 15 -> sLit "%r15";
361 _ -> ppr_reg_float i
362 })
363
364 ppr_reg_float :: Int -> PtrString
365 ppr_reg_float i = case i of
366 16 -> sLit "%fake0"; 17 -> sLit "%fake1"
367 18 -> sLit "%fake2"; 19 -> sLit "%fake3"
368 20 -> sLit "%fake4"; 21 -> sLit "%fake5"
369 24 -> sLit "%xmm0"; 25 -> sLit "%xmm1"
370 26 -> sLit "%xmm2"; 27 -> sLit "%xmm3"
371 28 -> sLit "%xmm4"; 29 -> sLit "%xmm5"
372 30 -> sLit "%xmm6"; 31 -> sLit "%xmm7"
373 32 -> sLit "%xmm8"; 33 -> sLit "%xmm9"
374 34 -> sLit "%xmm10"; 35 -> sLit "%xmm11"
375 36 -> sLit "%xmm12"; 37 -> sLit "%xmm13"
376 38 -> sLit "%xmm14"; 39 -> sLit "%xmm15"
377 _ -> sLit "very naughty x86 register"
378
379 pprFormat :: Format -> SDoc
380 pprFormat x
381 = ptext (case x of
382 II8 -> sLit "b"
383 II16 -> sLit "w"
384 II32 -> sLit "l"
385 II64 -> sLit "q"
386 FF32 -> sLit "ss" -- "scalar single-precision float" (SSE2)
387 FF64 -> sLit "sd" -- "scalar double-precision float" (SSE2)
388 FF80 -> sLit "t"
389 )
390
391 pprFormat_x87 :: Format -> SDoc
392 pprFormat_x87 x
393 = ptext $ case x of
394 FF32 -> sLit "s"
395 FF64 -> sLit "l"
396 FF80 -> sLit "t"
397 _ -> panic "X86.Ppr.pprFormat_x87"
398
399 pprCond :: Cond -> SDoc
400 pprCond c
401 = ptext (case c of {
402 GEU -> sLit "ae"; LU -> sLit "b";
403 EQQ -> sLit "e"; GTT -> sLit "g";
404 GE -> sLit "ge"; GU -> sLit "a";
405 LTT -> sLit "l"; LE -> sLit "le";
406 LEU -> sLit "be"; NE -> sLit "ne";
407 NEG -> sLit "s"; POS -> sLit "ns";
408 CARRY -> sLit "c"; OFLO -> sLit "o";
409 PARITY -> sLit "p"; NOTPARITY -> sLit "np";
410 ALWAYS -> sLit "mp"})
411
412
413 pprImm :: Imm -> SDoc
414 pprImm (ImmInt i) = int i
415 pprImm (ImmInteger i) = integer i
416 pprImm (ImmCLbl l) = ppr l
417 pprImm (ImmIndex l i) = ppr l <> char '+' <> int i
418 pprImm (ImmLit s) = s
419
420 pprImm (ImmFloat _) = text "naughty float immediate"
421 pprImm (ImmDouble _) = text "naughty double immediate"
422
423 pprImm (ImmConstantSum a b) = pprImm a <> char '+' <> pprImm b
424 pprImm (ImmConstantDiff a b) = pprImm a <> char '-'
425 <> lparen <> pprImm b <> rparen
426
427
428
429 pprAddr :: AddrMode -> SDoc
430 pprAddr (ImmAddr imm off)
431 = let pp_imm = pprImm imm
432 in
433 if (off == 0) then
434 pp_imm
435 else if (off < 0) then
436 pp_imm <> int off
437 else
438 pp_imm <> char '+' <> int off
439
440 pprAddr (AddrBaseIndex base index displacement)
441 = sdocWithPlatform $ \platform ->
442 let
443 pp_disp = ppr_disp displacement
444 pp_off p = pp_disp <> char '(' <> p <> char ')'
445 pp_reg r = pprReg (archWordFormat (target32Bit platform)) r
446 in
447 case (base, index) of
448 (EABaseNone, EAIndexNone) -> pp_disp
449 (EABaseReg b, EAIndexNone) -> pp_off (pp_reg b)
450 (EABaseRip, EAIndexNone) -> pp_off (text "%rip")
451 (EABaseNone, EAIndex r i) -> pp_off (comma <> pp_reg r <> comma <> int i)
452 (EABaseReg b, EAIndex r i) -> pp_off (pp_reg b <> comma <> pp_reg r
453 <> comma <> int i)
454 _ -> panic "X86.Ppr.pprAddr: no match"
455
456 where
457 ppr_disp (ImmInt 0) = empty
458 ppr_disp imm = pprImm imm
459
460 -- | Print section header and appropriate alignment for that section.
461 pprSectionAlign :: Section -> SDoc
462 pprSectionAlign (Section (OtherSection _) _) =
463 panic "X86.Ppr.pprSectionAlign: unknown section"
464 pprSectionAlign sec@(Section seg _) =
465 sdocWithPlatform $ \platform ->
466 pprSectionHeader platform sec $$
467 pprAlignForSection seg
468
469 -- | Print appropriate alignment for the given section type.
470 pprAlignForSection :: SectionType -> SDoc
471 pprAlignForSection seg =
472 sdocWithPlatform $ \platform ->
473 text ".align " <>
474 case platformOS platform of
475 -- Darwin: alignments are given as shifts.
476 OSDarwin
477 | target32Bit platform ->
478 case seg of
479 ReadOnlyData16 -> int 4
480 CString -> int 1
481 _ -> int 2
482 | otherwise ->
483 case seg of
484 ReadOnlyData16 -> int 4
485 CString -> int 1
486 _ -> int 3
487 -- Other: alignments are given as bytes.
488 _
489 | target32Bit platform ->
490 case seg of
491 Text -> text "4,0x90"
492 ReadOnlyData16 -> int 16
493 CString -> int 1
494 _ -> int 4
495 | otherwise ->
496 case seg of
497 ReadOnlyData16 -> int 16
498 CString -> int 1
499 _ -> int 8
500
501 pprDataItem :: CmmLit -> SDoc
502 pprDataItem lit = sdocWithDynFlags $ \dflags -> pprDataItem' dflags lit
503
504 pprDataItem' :: DynFlags -> CmmLit -> SDoc
505 pprDataItem' dflags lit
506 = vcat (ppr_item (cmmTypeFormat $ cmmLitType dflags lit) lit)
507 where
508 platform = targetPlatform dflags
509 imm = litToImm lit
510
511 -- These seem to be common:
512 ppr_item II8 _ = [text "\t.byte\t" <> pprImm imm]
513 ppr_item II16 _ = [text "\t.word\t" <> pprImm imm]
514 ppr_item II32 _ = [text "\t.long\t" <> pprImm imm]
515
516 ppr_item FF32 (CmmFloat r _)
517 = let bs = floatToBytes (fromRational r)
518 in map (\b -> text "\t.byte\t" <> pprImm (ImmInt b)) bs
519
520 ppr_item FF64 (CmmFloat r _)
521 = let bs = doubleToBytes (fromRational r)
522 in map (\b -> text "\t.byte\t" <> pprImm (ImmInt b)) bs
523
524 ppr_item II64 _
525 = case platformOS platform of
526 OSDarwin
527 | target32Bit platform ->
528 case lit of
529 CmmInt x _ ->
530 [text "\t.long\t"
531 <> int (fromIntegral (fromIntegral x :: Word32)),
532 text "\t.long\t"
533 <> int (fromIntegral
534 (fromIntegral (x `shiftR` 32) :: Word32))]
535 _ -> panic "X86.Ppr.ppr_item: no match for II64"
536 | otherwise ->
537 [text "\t.quad\t" <> pprImm imm]
538 _
539 | target32Bit platform ->
540 [text "\t.quad\t" <> pprImm imm]
541 | otherwise ->
542 -- x86_64: binutils can't handle the R_X86_64_PC64
543 -- relocation type, which means we can't do
544 -- pc-relative 64-bit addresses. Fortunately we're
545 -- assuming the small memory model, in which all such
546 -- offsets will fit into 32 bits, so we have to stick
547 -- to 32-bit offset fields and modify the RTS
548 -- appropriately
549 --
550 -- See Note [x86-64-relative] in includes/rts/storage/InfoTables.h
551 --
552 case lit of
553 -- A relative relocation:
554 CmmLabelDiffOff _ _ _ _ ->
555 [text "\t.long\t" <> pprImm imm,
556 text "\t.long\t0"]
557 _ ->
558 [text "\t.quad\t" <> pprImm imm]
559
560 ppr_item _ _
561 = panic "X86.Ppr.ppr_item: no match"
562
563
564 asmComment :: SDoc -> SDoc
565 asmComment c = whenPprDebug $ text "# " <> c
566
567 pprInstr :: Instr -> SDoc
568
569 pprInstr (COMMENT s)
570 = asmComment (ftext s)
571
572 pprInstr (LOCATION file line col _name)
573 = text "\t.loc " <> ppr file <+> ppr line <+> ppr col
574
575 pprInstr (DELTA d)
576 = asmComment $ text ("\tdelta = " ++ show d)
577
578 pprInstr (NEWBLOCK _)
579 = panic "PprMach.pprInstr: NEWBLOCK"
580
581 pprInstr (UNWIND lbl d)
582 = asmComment (text "\tunwind = " <> ppr d)
583 $$ ppr lbl <> colon
584
585 pprInstr (LDATA _ _)
586 = panic "PprMach.pprInstr: LDATA"
587
588 {-
589 pprInstr (SPILL reg slot)
590 = hcat [
591 text "\tSPILL",
592 char ' ',
593 pprUserReg reg,
594 comma,
595 text "SLOT" <> parens (int slot)]
596
597 pprInstr (RELOAD slot reg)
598 = hcat [
599 text "\tRELOAD",
600 char ' ',
601 text "SLOT" <> parens (int slot),
602 comma,
603 pprUserReg reg]
604 -}
605
606 -- Replace 'mov $0x0,%reg' by 'xor %reg,%reg', which is smaller and cheaper.
607 -- The code generator catches most of these already, but not all.
608 pprInstr (MOV format (OpImm (ImmInt 0)) dst@(OpReg _))
609 = pprInstr (XOR format' dst dst)
610 where format' = case format of
611 II64 -> II32 -- 32-bit version is equivalent, and smaller
612 _ -> format
613 pprInstr (MOV format src dst)
614 = pprFormatOpOp (sLit "mov") format src dst
615
616 pprInstr (CMOV cc format src dst)
617 = pprCondOpReg (sLit "cmov") format cc src dst
618
619 pprInstr (MOVZxL II32 src dst) = pprFormatOpOp (sLit "mov") II32 src dst
620 -- 32-to-64 bit zero extension on x86_64 is accomplished by a simple
621 -- movl. But we represent it as a MOVZxL instruction, because
622 -- the reg alloc would tend to throw away a plain reg-to-reg
623 -- move, and we still want it to do that.
624
625 pprInstr (MOVZxL formats src dst)
626 = pprFormatOpOpCoerce (sLit "movz") formats II32 src dst
627 -- zero-extension only needs to extend to 32 bits: on x86_64,
628 -- the remaining zero-extension to 64 bits is automatic, and the 32-bit
629 -- instruction is shorter.
630
631 pprInstr (MOVSxL formats src dst)
632 = sdocWithPlatform $ \platform ->
633 pprFormatOpOpCoerce (sLit "movs") formats (archWordFormat (target32Bit platform)) src dst
634
635 -- here we do some patching, since the physical registers are only set late
636 -- in the code generation.
637 pprInstr (LEA format (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
638 | reg1 == reg3
639 = pprFormatOpOp (sLit "add") format (OpReg reg2) dst
640
641 pprInstr (LEA format (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
642 | reg2 == reg3
643 = pprFormatOpOp (sLit "add") format (OpReg reg1) dst
644
645 pprInstr (LEA format (OpAddr (AddrBaseIndex (EABaseReg reg1) EAIndexNone displ)) dst@(OpReg reg3))
646 | reg1 == reg3
647 = pprInstr (ADD format (OpImm displ) dst)
648
649 pprInstr (LEA format src dst) = pprFormatOpOp (sLit "lea") format src dst
650
651 pprInstr (ADD format (OpImm (ImmInt (-1))) dst)
652 = pprFormatOp (sLit "dec") format dst
653 pprInstr (ADD format (OpImm (ImmInt 1)) dst)
654 = pprFormatOp (sLit "inc") format dst
655 pprInstr (ADD format src dst) = pprFormatOpOp (sLit "add") format src dst
656 pprInstr (ADC format src dst) = pprFormatOpOp (sLit "adc") format src dst
657 pprInstr (SUB format src dst) = pprFormatOpOp (sLit "sub") format src dst
658 pprInstr (SBB format src dst) = pprFormatOpOp (sLit "sbb") format src dst
659 pprInstr (IMUL format op1 op2) = pprFormatOpOp (sLit "imul") format op1 op2
660
661 pprInstr (ADD_CC format src dst)
662 = pprFormatOpOp (sLit "add") format src dst
663 pprInstr (SUB_CC format src dst)
664 = pprFormatOpOp (sLit "sub") format src dst
665
666 {- A hack. The Intel documentation says that "The two and three
667 operand forms [of IMUL] may also be used with unsigned operands
668 because the lower half of the product is the same regardless if
669 (sic) the operands are signed or unsigned. The CF and OF flags,
670 however, cannot be used to determine if the upper half of the
671 result is non-zero." So there.
672 -}
673
674 -- Use a 32-bit instruction when possible as it saves a byte.
675 -- Notably, extracting the tag bits of a pointer has this form.
676 -- TODO: we could save a byte in a subsequent CMP instruction too,
677 -- but need something like a peephole pass for this
678 pprInstr (AND II64 src@(OpImm (ImmInteger mask)) dst)
679 | 0 <= mask && mask < 0xffffffff
680 = pprInstr (AND II32 src dst)
681 pprInstr (AND FF32 src dst) = pprOpOp (sLit "andps") FF32 src dst
682 pprInstr (AND FF64 src dst) = pprOpOp (sLit "andpd") FF64 src dst
683 pprInstr (AND format src dst) = pprFormatOpOp (sLit "and") format src dst
684 pprInstr (OR format src dst) = pprFormatOpOp (sLit "or") format src dst
685
686 pprInstr (XOR FF32 src dst) = pprOpOp (sLit "xorps") FF32 src dst
687 pprInstr (XOR FF64 src dst) = pprOpOp (sLit "xorpd") FF64 src dst
688 pprInstr (XOR format src dst) = pprFormatOpOp (sLit "xor") format src dst
689
690 pprInstr (POPCNT format src dst) = pprOpOp (sLit "popcnt") format src (OpReg dst)
691 pprInstr (LZCNT format src dst) = pprOpOp (sLit "lzcnt") format src (OpReg dst)
692 pprInstr (TZCNT format src dst) = pprOpOp (sLit "tzcnt") format src (OpReg dst)
693 pprInstr (BSF format src dst) = pprOpOp (sLit "bsf") format src (OpReg dst)
694 pprInstr (BSR format src dst) = pprOpOp (sLit "bsr") format src (OpReg dst)
695
696 pprInstr (PDEP format src mask dst) = pprFormatOpOpReg (sLit "pdep") format src mask dst
697 pprInstr (PEXT format src mask dst) = pprFormatOpOpReg (sLit "pext") format src mask dst
698
699 pprInstr (PREFETCH NTA format src ) = pprFormatOp_ (sLit "prefetchnta") format src
700 pprInstr (PREFETCH Lvl0 format src) = pprFormatOp_ (sLit "prefetcht0") format src
701 pprInstr (PREFETCH Lvl1 format src) = pprFormatOp_ (sLit "prefetcht1") format src
702 pprInstr (PREFETCH Lvl2 format src) = pprFormatOp_ (sLit "prefetcht2") format src
703
704 pprInstr (NOT format op) = pprFormatOp (sLit "not") format op
705 pprInstr (BSWAP format op) = pprFormatOp (sLit "bswap") format (OpReg op)
706 pprInstr (NEGI format op) = pprFormatOp (sLit "neg") format op
707
708 pprInstr (SHL format src dst) = pprShift (sLit "shl") format src dst
709 pprInstr (SAR format src dst) = pprShift (sLit "sar") format src dst
710 pprInstr (SHR format src dst) = pprShift (sLit "shr") format src dst
711
712 pprInstr (BT format imm src) = pprFormatImmOp (sLit "bt") format imm src
713
714 pprInstr (CMP format src dst)
715 | isFloatFormat format = pprFormatOpOp (sLit "ucomi") format src dst -- SSE2
716 | otherwise = pprFormatOpOp (sLit "cmp") format src dst
717
718 pprInstr (TEST format src dst) = sdocWithPlatform $ \platform ->
719 let format' = case (src,dst) of
720 -- Match instructions like 'test $0x3,%esi' or 'test $0x7,%rbx'.
721 -- We can replace them by equivalent, but smaller instructions
722 -- by reducing the size of the immediate operand as far as possible.
723 -- (We could handle masks larger than a single byte too,
724 -- but it would complicate the code considerably
725 -- and tag checks are by far the most common case.)
726 -- The mask must have the high bit clear for this smaller encoding
727 -- to be completely equivalent to the original; in particular so
728 -- that the signed comparison condition bits are the same as they
729 -- would be if doing a full word comparison. See #13425.
730 (OpImm (ImmInteger mask), OpReg dstReg)
731 | 0 <= mask && mask < 128 -> minSizeOfReg platform dstReg
732 _ -> format
733 in pprFormatOpOp (sLit "test") format' src dst
734 where
735 minSizeOfReg platform (RegReal (RealRegSingle i))
736 | target32Bit platform && i <= 3 = II8 -- al, bl, cl, dl
737 | target32Bit platform && i <= 7 = II16 -- si, di, bp, sp
738 | not (target32Bit platform) && i <= 15 = II8 -- al .. r15b
739 minSizeOfReg _ _ = format -- other
740
741 pprInstr (PUSH format op) = pprFormatOp (sLit "push") format op
742 pprInstr (POP format op) = pprFormatOp (sLit "pop") format op
743
744 -- both unused (SDM):
745 -- pprInstr PUSHA = text "\tpushal"
746 -- pprInstr POPA = text "\tpopal"
747
748 pprInstr NOP = text "\tnop"
749 pprInstr (CLTD II8) = text "\tcbtw"
750 pprInstr (CLTD II16) = text "\tcwtd"
751 pprInstr (CLTD II32) = text "\tcltd"
752 pprInstr (CLTD II64) = text "\tcqto"
753 pprInstr (CLTD x) = panic $ "pprInstr: " ++ show x
754
755 pprInstr (SETCC cond op) = pprCondInstr (sLit "set") cond (pprOperand II8 op)
756
757 pprInstr (JXX cond blockid)
758 = pprCondInstr (sLit "j") cond (ppr lab)
759 where lab = blockLbl blockid
760
761 pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm)
762
763 pprInstr (JMP (OpImm imm) _) = text "\tjmp " <> pprImm imm
764 pprInstr (JMP op _) = sdocWithPlatform $ \platform ->
765 text "\tjmp *"
766 <> pprOperand (archWordFormat (target32Bit platform)) op
767 pprInstr (JMP_TBL op _ _ _) = pprInstr (JMP op [])
768 pprInstr (CALL (Left imm) _) = text "\tcall " <> pprImm imm
769 pprInstr (CALL (Right reg) _) = sdocWithPlatform $ \platform ->
770 text "\tcall *"
771 <> pprReg (archWordFormat (target32Bit platform)) reg
772
773 pprInstr (IDIV fmt op) = pprFormatOp (sLit "idiv") fmt op
774 pprInstr (DIV fmt op) = pprFormatOp (sLit "div") fmt op
775 pprInstr (IMUL2 fmt op) = pprFormatOp (sLit "imul") fmt op
776
777 -- x86_64 only
778 pprInstr (MUL format op1 op2) = pprFormatOpOp (sLit "mul") format op1 op2
779 pprInstr (MUL2 format op) = pprFormatOp (sLit "mul") format op
780
781 pprInstr (FDIV format op1 op2) = pprFormatOpOp (sLit "div") format op1 op2
782 pprInstr (SQRT format op1 op2) = pprFormatOpReg (sLit "sqrt") format op1 op2
783
784 pprInstr (CVTSS2SD from to) = pprRegReg (sLit "cvtss2sd") from to
785 pprInstr (CVTSD2SS from to) = pprRegReg (sLit "cvtsd2ss") from to
786 pprInstr (CVTTSS2SIQ fmt from to) = pprFormatFormatOpReg (sLit "cvttss2si") FF32 fmt from to
787 pprInstr (CVTTSD2SIQ fmt from to) = pprFormatFormatOpReg (sLit "cvttsd2si") FF64 fmt from to
788 pprInstr (CVTSI2SS fmt from to) = pprFormatOpReg (sLit "cvtsi2ss") fmt from to
789 pprInstr (CVTSI2SD fmt from to) = pprFormatOpReg (sLit "cvtsi2sd") fmt from to
790
791 -- FETCHGOT for PIC on ELF platforms
792 pprInstr (FETCHGOT reg)
793 = vcat [ text "\tcall 1f",
794 hcat [ text "1:\tpopl\t", pprReg II32 reg ],
795 hcat [ text "\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), ",
796 pprReg II32 reg ]
797 ]
798
799 -- FETCHPC for PIC on Darwin/x86
800 -- get the instruction pointer into a register
801 -- (Terminology note: the IP is called Program Counter on PPC,
802 -- and it's a good thing to use the same name on both platforms)
803 pprInstr (FETCHPC reg)
804 = vcat [ text "\tcall 1f",
805 hcat [ text "1:\tpopl\t", pprReg II32 reg ]
806 ]
807
808
809 -- -----------------------------------------------------------------------------
810 -- i386 floating-point
811
812 -- Simulating a flat register set on the x86 FP stack is tricky.
813 -- you have to free %st(7) before pushing anything on the FP reg stack
814 -- so as to preclude the possibility of a FP stack overflow exception.
815 pprInstr g@(GMOV src dst)
816 | src == dst
817 = empty
818 | otherwise
819 = pprG g (hcat [gtab, gpush src 0, gsemi, gpop dst 1])
820
821 -- GLD fmt addr dst ==> FLDsz addr ; FSTP (dst+1)
822 pprInstr g@(GLD fmt addr dst)
823 = pprG g (hcat [gtab, text "fld", pprFormat_x87 fmt, gsp,
824 pprAddr addr, gsemi, gpop dst 1])
825
826 -- GST fmt src addr ==> FLD dst ; FSTPsz addr
827 pprInstr g@(GST fmt src addr)
828 | src == fake0 && fmt /= FF80 -- fstt instruction doesn't exist
829 = pprG g (hcat [gtab,
830 text "fst", pprFormat_x87 fmt, gsp, pprAddr addr])
831 | otherwise
832 = pprG g (hcat [gtab, gpush src 0, gsemi,
833 text "fstp", pprFormat_x87 fmt, gsp, pprAddr addr])
834
835 pprInstr g@(GLDZ dst)
836 = pprG g (hcat [gtab, text "fldz ; ", gpop dst 1])
837 pprInstr g@(GLD1 dst)
838 = pprG g (hcat [gtab, text "fld1 ; ", gpop dst 1])
839
840 pprInstr (GFTOI src dst)
841 = pprInstr (GDTOI src dst)
842
843 pprInstr g@(GDTOI src dst)
844 = pprG g (vcat [
845 hcat [gtab, text "subl $8, %esp ; fnstcw 4(%esp)"],
846 hcat [gtab, gpush src 0],
847 hcat [gtab, text "movzwl 4(%esp), ", reg,
848 text " ; orl $0xC00, ", reg],
849 hcat [gtab, text "movl ", reg, text ", 0(%esp) ; fldcw 0(%esp)"],
850 hcat [gtab, text "fistpl 0(%esp)"],
851 hcat [gtab, text "fldcw 4(%esp) ; movl 0(%esp), ", reg],
852 hcat [gtab, text "addl $8, %esp"]
853 ])
854 where
855 reg = pprReg II32 dst
856
857 pprInstr (GITOF src dst)
858 = pprInstr (GITOD src dst)
859
860 pprInstr g@(GITOD src dst)
861 = pprG g (hcat [gtab, text "pushl ", pprReg II32 src,
862 text " ; fildl (%esp) ; ",
863 gpop dst 1, text " ; addl $4,%esp"])
864
865 pprInstr g@(GDTOF src dst)
866 = pprG g (vcat [gtab <> gpush src 0,
867 gtab <> text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ;",
868 gtab <> gpop dst 1])
869
870 {- Gruesome swamp follows. If you're unfortunate enough to have ventured
871 this far into the jungle AND you give a Rat's Ass (tm) what's going
872 on, here's the deal. Generate code to do a floating point comparison
873 of src1 and src2, of kind cond, and set the Zero flag if true.
874
875 The complications are to do with handling NaNs correctly. We want the
876 property that if either argument is NaN, then the result of the
877 comparison is False ... except if we're comparing for inequality,
878 in which case the answer is True.
879
880 Here's how the general (non-inequality) case works. As an
881 example, consider generating the an equality test:
882
883 pushl %eax -- we need to mess with this
884 <get src1 to top of FPU stack>
885 fcomp <src2 location in FPU stack> and pop pushed src1
886 -- Result of comparison is in FPU Status Register bits
887 -- C3 C2 and C0
888 fstsw %ax -- Move FPU Status Reg to %ax
889 sahf -- move C3 C2 C0 from %ax to integer flag reg
890 -- now the serious magic begins
891 setpo %ah -- %ah = if comparable(neither arg was NaN) then 1 else 0
892 sete %al -- %al = if arg1 == arg2 then 1 else 0
893 andb %ah,%al -- %al &= %ah
894 -- so %al == 1 iff (comparable && same); else it holds 0
895 decb %al -- %al == 0, ZeroFlag=1 iff (comparable && same);
896 else %al == 0xFF, ZeroFlag=0
897 -- the zero flag is now set as we desire.
898 popl %eax
899
900 The special case of inequality differs thusly:
901
902 setpe %ah -- %ah = if incomparable(either arg was NaN) then 1 else 0
903 setne %al -- %al = if arg1 /= arg2 then 1 else 0
904 orb %ah,%al -- %al = if (incomparable || different) then 1 else 0
905 decb %al -- if (incomparable || different) then (%al == 0, ZF=1)
906 else (%al == 0xFF, ZF=0)
907 -}
908 pprInstr g@(GCMP cond src1 src2)
909 | case cond of { NE -> True; _ -> False }
910 = pprG g (vcat [
911 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
912 hcat [gtab, text "fcomp ", greg src2 1,
913 text "; fstsw %ax ; sahf ; setpe %ah"],
914 hcat [gtab, text "setne %al ; ",
915 text "orb %ah,%al ; decb %al ; popl %eax"]
916 ])
917 | otherwise
918 = pprG g (vcat [
919 hcat [gtab, text "pushl %eax ; ",gpush src1 0],
920 hcat [gtab, text "fcomp ", greg src2 1,
921 text "; fstsw %ax ; sahf ; setpo %ah"],
922 hcat [gtab, text "set", pprCond (fix_FP_cond cond), text " %al ; ",
923 text "andb %ah,%al ; decb %al ; popl %eax"]
924 ])
925 where
926 {- On the 486, the flags set by FP compare are the unsigned ones!
927 (This looks like a HACK to me. WDP 96/03)
928 -}
929 fix_FP_cond :: Cond -> Cond
930 fix_FP_cond GE = GEU
931 fix_FP_cond GTT = GU
932 fix_FP_cond LTT = LU
933 fix_FP_cond LE = LEU
934 fix_FP_cond EQQ = EQQ
935 fix_FP_cond NE = NE
936 fix_FP_cond _ = panic "X86.Ppr.fix_FP_cond: no match"
937 -- there should be no others
938
939
940 pprInstr g@(GABS _ src dst)
941 = pprG g (hcat [gtab, gpush src 0, text " ; fabs ; ", gpop dst 1])
942
943 pprInstr g@(GNEG _ src dst)
944 = pprG g (hcat [gtab, gpush src 0, text " ; fchs ; ", gpop dst 1])
945
946 pprInstr g@(GSQRT fmt src dst)
947 = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt"] $$
948 hcat [gtab, gcoerceto fmt, gpop dst 1])
949
950 pprInstr g@(GSIN fmt l1 l2 src dst)
951 = pprG g (pprTrigOp "fsin" False l1 l2 src dst fmt)
952
953 pprInstr g@(GCOS fmt l1 l2 src dst)
954 = pprG g (pprTrigOp "fcos" False l1 l2 src dst fmt)
955
956 pprInstr g@(GTAN fmt l1 l2 src dst)
957 = pprG g (pprTrigOp "fptan" True l1 l2 src dst fmt)
958
959 -- In the translations for GADD, GMUL, GSUB and GDIV,
960 -- the first two cases are mere optimisations. The otherwise clause
961 -- generates correct code under all circumstances.
962
963 pprInstr g@(GADD _ src1 src2 dst)
964 | src1 == dst
965 = pprG g (text "\t#GADD-xxxcase1" $$
966 hcat [gtab, gpush src2 0,
967 text " ; faddp %st(0),", greg src1 1])
968 | src2 == dst
969 = pprG g (text "\t#GADD-xxxcase2" $$
970 hcat [gtab, gpush src1 0,
971 text " ; faddp %st(0),", greg src2 1])
972 | otherwise
973 = pprG g (hcat [gtab, gpush src1 0,
974 text " ; fadd ", greg src2 1, text ",%st(0)",
975 gsemi, gpop dst 1])
976
977
978 pprInstr g@(GMUL _ src1 src2 dst)
979 | src1 == dst
980 = pprG g (text "\t#GMUL-xxxcase1" $$
981 hcat [gtab, gpush src2 0,
982 text " ; fmulp %st(0),", greg src1 1])
983 | src2 == dst
984 = pprG g (text "\t#GMUL-xxxcase2" $$
985 hcat [gtab, gpush src1 0,
986 text " ; fmulp %st(0),", greg src2 1])
987 | otherwise
988 = pprG g (hcat [gtab, gpush src1 0,
989 text " ; fmul ", greg src2 1, text ",%st(0)",
990 gsemi, gpop dst 1])
991
992
993 pprInstr g@(GSUB _ src1 src2 dst)
994 | src1 == dst
995 = pprG g (text "\t#GSUB-xxxcase1" $$
996 hcat [gtab, gpush src2 0,
997 text " ; fsubrp %st(0),", greg src1 1])
998 | src2 == dst
999 = pprG g (text "\t#GSUB-xxxcase2" $$
1000 hcat [gtab, gpush src1 0,
1001 text " ; fsubp %st(0),", greg src2 1])
1002 | otherwise
1003 = pprG g (hcat [gtab, gpush src1 0,
1004 text " ; fsub ", greg src2 1, text ",%st(0)",
1005 gsemi, gpop dst 1])
1006
1007
1008 pprInstr g@(GDIV _ src1 src2 dst)
1009 | src1 == dst
1010 = pprG g (text "\t#GDIV-xxxcase1" $$
1011 hcat [gtab, gpush src2 0,
1012 text " ; fdivrp %st(0),", greg src1 1])
1013 | src2 == dst
1014 = pprG g (text "\t#GDIV-xxxcase2" $$
1015 hcat [gtab, gpush src1 0,
1016 text " ; fdivp %st(0),", greg src2 1])
1017 | otherwise
1018 = pprG g (hcat [gtab, gpush src1 0,
1019 text " ; fdiv ", greg src2 1, text ",%st(0)",
1020 gsemi, gpop dst 1])
1021
1022
1023 pprInstr GFREE
1024 = vcat [ text "\tffree %st(0) ;ffree %st(1) ;ffree %st(2) ;ffree %st(3)",
1025 text "\tffree %st(4) ;ffree %st(5)"
1026 ]
1027
1028 -- Atomics
1029
1030 pprInstr (LOCK i) = text "\tlock" $$ pprInstr i
1031
1032 pprInstr MFENCE = text "\tmfence"
1033
1034 pprInstr (XADD format src dst) = pprFormatOpOp (sLit "xadd") format src dst
1035
1036 pprInstr (CMPXCHG format src dst)
1037 = pprFormatOpOp (sLit "cmpxchg") format src dst
1038
1039
1040 pprTrigOp :: String -> Bool -> CLabel -> CLabel
1041 -> Reg -> Reg -> Format -> SDoc
1042 pprTrigOp op -- fsin, fcos or fptan
1043 isTan -- we need a couple of extra steps if we're doing tan
1044 l1 l2 -- internal labels for us to use
1045 src dst fmt
1046 = -- We'll be needing %eax later on
1047 hcat [gtab, text "pushl %eax;"] $$
1048 -- tan is going to use an extra space on the FP stack
1049 (if isTan then hcat [gtab, text "ffree %st(6)"] else empty) $$
1050 -- First put the value in %st(0) and try to apply the op to it
1051 hcat [gpush src 0, text ("; " ++ op)] $$
1052 -- Now look to see if C2 was set (overflow, |value| >= 2^63)
1053 hcat [gtab, text "fnstsw %ax"] $$
1054 hcat [gtab, text "test $0x400,%eax"] $$
1055 -- If we were in bounds then jump to the end
1056 hcat [gtab, text "je " <> ppr l1] $$
1057 -- Otherwise we need to shrink the value. Start by
1058 -- loading pi, doubleing it (by adding it to itself),
1059 -- and then swapping pi with the value, so the value we
1060 -- want to apply op to is in %st(0) again
1061 hcat [gtab, text "ffree %st(7); fldpi"] $$
1062 hcat [gtab, text "fadd %st(0),%st"] $$
1063 hcat [gtab, text "fxch %st(1)"] $$
1064 -- Now we have a loop in which we make the value smaller,
1065 -- see if it's small enough, and loop if not
1066 (ppr l2 <> char ':') $$
1067 hcat [gtab, text "fprem1"] $$
1068 -- My Debian libc uses fstsw here for the tan code, but I can't
1069 -- see any reason why it should need to be different for tan.
1070 hcat [gtab, text "fnstsw %ax"] $$
1071 hcat [gtab, text "test $0x400,%eax"] $$
1072 hcat [gtab, text "jne " <> ppr l2] $$
1073 hcat [gtab, text "fstp %st(1)"] $$
1074 hcat [gtab, text op] $$
1075 (ppr l1 <> char ':') $$
1076 -- Pop the 1.0 tan gave us
1077 (if isTan then hcat [gtab, text "fstp %st(0)"] else empty) $$
1078 -- Restore %eax
1079 hcat [gtab, text "popl %eax;"] $$
1080 -- And finally make the result the right size
1081 hcat [gtab, gcoerceto fmt, gpop dst 1]
1082
1083 --------------------------
1084
1085 -- coerce %st(0) to the specified size
1086 gcoerceto :: Format -> SDoc
1087 gcoerceto FF64 = empty
1088 gcoerceto FF32 = empty --text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ; "
1089 gcoerceto _ = panic "X86.Ppr.gcoerceto: no match"
1090
1091 gpush :: Reg -> RegNo -> SDoc
1092 gpush reg offset
1093 = hcat [text "fld ", greg reg offset]
1094
1095 gpop :: Reg -> RegNo -> SDoc
1096 gpop reg offset
1097 = hcat [text "fstp ", greg reg offset]
1098
1099 greg :: Reg -> RegNo -> SDoc
1100 greg reg offset = text "%st(" <> int (gregno reg - firstfake+offset) <> char ')'
1101
1102 gsemi :: SDoc
1103 gsemi = text " ; "
1104
1105 gtab :: SDoc
1106 gtab = char '\t'
1107
1108 gsp :: SDoc
1109 gsp = char ' '
1110
1111 gregno :: Reg -> RegNo
1112 gregno (RegReal (RealRegSingle i)) = i
1113 gregno _ = --pprPanic "gregno" (ppr other)
1114 999 -- bogus; only needed for debug printing
1115
1116 pprG :: Instr -> SDoc -> SDoc
1117 pprG fake actual
1118 = (char '#' <> pprGInstr fake) $$ actual
1119
1120
1121 pprGInstr :: Instr -> SDoc
1122 pprGInstr (GMOV src dst) = pprFormatRegReg (sLit "gmov") FF64 src dst
1123 pprGInstr (GLD fmt src dst) = pprFormatAddrReg (sLit "gld") fmt src dst
1124 pprGInstr (GST fmt src dst) = pprFormatRegAddr (sLit "gst") fmt src dst
1125
1126 pprGInstr (GLDZ dst) = pprFormatReg (sLit "gldz") FF64 dst
1127 pprGInstr (GLD1 dst) = pprFormatReg (sLit "gld1") FF64 dst
1128
1129 pprGInstr (GFTOI src dst) = pprFormatFormatRegReg (sLit "gftoi") FF32 II32 src dst
1130 pprGInstr (GDTOI src dst) = pprFormatFormatRegReg (sLit "gdtoi") FF64 II32 src dst
1131
1132 pprGInstr (GITOF src dst) = pprFormatFormatRegReg (sLit "gitof") II32 FF32 src dst
1133 pprGInstr (GITOD src dst) = pprFormatFormatRegReg (sLit "gitod") II32 FF64 src dst
1134 pprGInstr (GDTOF src dst) = pprFormatFormatRegReg (sLit "gdtof") FF64 FF32 src dst
1135
1136 pprGInstr (GCMP co src dst) = pprCondRegReg (sLit "gcmp_") FF64 co src dst
1137 pprGInstr (GABS fmt src dst) = pprFormatRegReg (sLit "gabs") fmt src dst
1138 pprGInstr (GNEG fmt src dst) = pprFormatRegReg (sLit "gneg") fmt src dst
1139 pprGInstr (GSQRT fmt src dst) = pprFormatRegReg (sLit "gsqrt") fmt src dst
1140 pprGInstr (GSIN fmt _ _ src dst) = pprFormatRegReg (sLit "gsin") fmt src dst
1141 pprGInstr (GCOS fmt _ _ src dst) = pprFormatRegReg (sLit "gcos") fmt src dst
1142 pprGInstr (GTAN fmt _ _ src dst) = pprFormatRegReg (sLit "gtan") fmt src dst
1143
1144 pprGInstr (GADD fmt src1 src2 dst) = pprFormatRegRegReg (sLit "gadd") fmt src1 src2 dst
1145 pprGInstr (GSUB fmt src1 src2 dst) = pprFormatRegRegReg (sLit "gsub") fmt src1 src2 dst
1146 pprGInstr (GMUL fmt src1 src2 dst) = pprFormatRegRegReg (sLit "gmul") fmt src1 src2 dst
1147 pprGInstr (GDIV fmt src1 src2 dst) = pprFormatRegRegReg (sLit "gdiv") fmt src1 src2 dst
1148
1149 pprGInstr _ = panic "X86.Ppr.pprGInstr: no match"
1150
1151 pprDollImm :: Imm -> SDoc
1152 pprDollImm i = text "$" <> pprImm i
1153
1154
1155 pprOperand :: Format -> Operand -> SDoc
1156 pprOperand f (OpReg r) = pprReg f r
1157 pprOperand _ (OpImm i) = pprDollImm i
1158 pprOperand _ (OpAddr ea) = pprAddr ea
1159
1160
1161 pprMnemonic_ :: PtrString -> SDoc
1162 pprMnemonic_ name =
1163 char '\t' <> ptext name <> space
1164
1165
1166 pprMnemonic :: PtrString -> Format -> SDoc
1167 pprMnemonic name format =
1168 char '\t' <> ptext name <> pprFormat format <> space
1169
1170
1171 pprFormatImmOp :: PtrString -> Format -> Imm -> Operand -> SDoc
1172 pprFormatImmOp name format imm op1
1173 = hcat [
1174 pprMnemonic name format,
1175 char '$',
1176 pprImm imm,
1177 comma,
1178 pprOperand format op1
1179 ]
1180
1181
1182 pprFormatOp_ :: PtrString -> Format -> Operand -> SDoc
1183 pprFormatOp_ name format op1
1184 = hcat [
1185 pprMnemonic_ name ,
1186 pprOperand format op1
1187 ]
1188
1189 pprFormatOp :: PtrString -> Format -> Operand -> SDoc
1190 pprFormatOp name format op1
1191 = hcat [
1192 pprMnemonic name format,
1193 pprOperand format op1
1194 ]
1195
1196
1197 pprFormatOpOp :: PtrString -> Format -> Operand -> Operand -> SDoc
1198 pprFormatOpOp name format op1 op2
1199 = hcat [
1200 pprMnemonic name format,
1201 pprOperand format op1,
1202 comma,
1203 pprOperand format op2
1204 ]
1205
1206
1207 pprOpOp :: PtrString -> Format -> Operand -> Operand -> SDoc
1208 pprOpOp name format op1 op2
1209 = hcat [
1210 pprMnemonic_ name,
1211 pprOperand format op1,
1212 comma,
1213 pprOperand format op2
1214 ]
1215
1216
1217 pprFormatReg :: PtrString -> Format -> Reg -> SDoc
1218 pprFormatReg name format reg1
1219 = hcat [
1220 pprMnemonic name format,
1221 pprReg format reg1
1222 ]
1223
1224
1225 pprFormatRegReg :: PtrString -> Format -> Reg -> Reg -> SDoc
1226 pprFormatRegReg name format reg1 reg2
1227 = hcat [
1228 pprMnemonic name format,
1229 pprReg format reg1,
1230 comma,
1231 pprReg format reg2
1232 ]
1233
1234
1235 pprRegReg :: PtrString -> Reg -> Reg -> SDoc
1236 pprRegReg name reg1 reg2
1237 = sdocWithPlatform $ \platform ->
1238 hcat [
1239 pprMnemonic_ name,
1240 pprReg (archWordFormat (target32Bit platform)) reg1,
1241 comma,
1242 pprReg (archWordFormat (target32Bit platform)) reg2
1243 ]
1244
1245
1246 pprFormatOpReg :: PtrString -> Format -> Operand -> Reg -> SDoc
1247 pprFormatOpReg name format op1 reg2
1248 = sdocWithPlatform $ \platform ->
1249 hcat [
1250 pprMnemonic name format,
1251 pprOperand format op1,
1252 comma,
1253 pprReg (archWordFormat (target32Bit platform)) reg2
1254 ]
1255
1256 pprCondOpReg :: PtrString -> Format -> Cond -> Operand -> Reg -> SDoc
1257 pprCondOpReg name format cond op1 reg2
1258 = hcat [
1259 char '\t',
1260 ptext name,
1261 pprCond cond,
1262 space,
1263 pprOperand format op1,
1264 comma,
1265 pprReg format reg2
1266 ]
1267
1268 pprCondRegReg :: PtrString -> Format -> Cond -> Reg -> Reg -> SDoc
1269 pprCondRegReg name format cond reg1 reg2
1270 = hcat [
1271 char '\t',
1272 ptext name,
1273 pprCond cond,
1274 space,
1275 pprReg format reg1,
1276 comma,
1277 pprReg format reg2
1278 ]
1279
1280 pprFormatFormatRegReg :: PtrString -> Format -> Format -> Reg -> Reg -> SDoc
1281 pprFormatFormatRegReg name format1 format2 reg1 reg2
1282 = hcat [
1283 char '\t',
1284 ptext name,
1285 pprFormat format1,
1286 pprFormat format2,
1287 space,
1288 pprReg format1 reg1,
1289 comma,
1290 pprReg format2 reg2
1291 ]
1292
1293 pprFormatFormatOpReg :: PtrString -> Format -> Format -> Operand -> Reg -> SDoc
1294 pprFormatFormatOpReg name format1 format2 op1 reg2
1295 = hcat [
1296 pprMnemonic name format2,
1297 pprOperand format1 op1,
1298 comma,
1299 pprReg format2 reg2
1300 ]
1301
1302 pprFormatRegRegReg :: PtrString -> Format -> Reg -> Reg -> Reg -> SDoc
1303 pprFormatRegRegReg name format reg1 reg2 reg3
1304 = hcat [
1305 pprMnemonic name format,
1306 pprReg format reg1,
1307 comma,
1308 pprReg format reg2,
1309 comma,
1310 pprReg format reg3
1311 ]
1312
1313 pprFormatOpOpReg :: PtrString -> Format -> Operand -> Operand -> Reg -> SDoc
1314 pprFormatOpOpReg name format op1 op2 reg3
1315 = hcat [
1316 pprMnemonic name format,
1317 pprOperand format op1,
1318 comma,
1319 pprOperand format op2,
1320 comma,
1321 pprReg format reg3
1322 ]
1323
1324 pprFormatAddrReg :: PtrString -> Format -> AddrMode -> Reg -> SDoc
1325 pprFormatAddrReg name format op dst
1326 = hcat [
1327 pprMnemonic name format,
1328 pprAddr op,
1329 comma,
1330 pprReg format dst
1331 ]
1332
1333
1334 pprFormatRegAddr :: PtrString -> Format -> Reg -> AddrMode -> SDoc
1335 pprFormatRegAddr name format src op
1336 = hcat [
1337 pprMnemonic name format,
1338 pprReg format src,
1339 comma,
1340 pprAddr op
1341 ]
1342
1343
1344 pprShift :: PtrString -> Format -> Operand -> Operand -> SDoc
1345 pprShift name format src dest
1346 = hcat [
1347 pprMnemonic name format,
1348 pprOperand II8 src, -- src is 8-bit sized
1349 comma,
1350 pprOperand format dest
1351 ]
1352
1353
1354 pprFormatOpOpCoerce :: PtrString -> Format -> Format -> Operand -> Operand -> SDoc
1355 pprFormatOpOpCoerce name format1 format2 op1 op2
1356 = hcat [ char '\t', ptext name, pprFormat format1, pprFormat format2, space,
1357 pprOperand format1 op1,
1358 comma,
1359 pprOperand format2 op2
1360 ]
1361
1362
1363 pprCondInstr :: PtrString -> Cond -> SDoc -> SDoc
1364 pprCondInstr name cond arg
1365 = hcat [ char '\t', ptext name, pprCond cond, space, arg]