a0b0673d27cec187ff8a7e6eb1eb76d3fe224231
[ghc.git] / compiler / nativeGen / X86 / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-}
2
3 -- The default iteration limit is a bit too low for the definitions
4 -- in this module.
5 {-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-}
6
7 -----------------------------------------------------------------------------
8 --
9 -- Generating machine code (instruction selection)
10 --
11 -- (c) The University of Glasgow 1996-2004
12 --
13 -----------------------------------------------------------------------------
14
15 -- This is a big module, but, if you pay attention to
16 -- (a) the sectioning, and (b) the type signatures, the
17 -- structure should not be too overwhelming.
18
19 module X86.CodeGen (
20 cmmTopCodeGen,
21 generateJumpTableForInstr,
22 extractUnwindPoints,
23 InstrBlock
24 )
25
26 where
27
28 #include "HsVersions.h"
29 #include "nativeGen/NCG.h"
30 #include "../includes/MachDeps.h"
31
32 -- NCG stuff:
33 import GhcPrelude
34
35 import X86.Instr
36 import X86.Cond
37 import X86.Regs
38 import X86.RegInfo
39 import CodeGen.Platform
40 import CPrim
41 import Debug ( DebugBlock(..), UnwindPoint(..), UnwindTable
42 , UnwindExpr(UwReg), toUnwindExpr )
43 import Instruction
44 import PIC
45 import NCGMonad
46 import Format
47 import Reg
48 import Platform
49
50 -- Our intermediate code:
51 import BasicTypes
52 import BlockId
53 import Module ( primUnitId )
54 import PprCmm ()
55 import CmmUtils
56 import CmmSwitch
57 import Cmm
58 import Hoopl.Block
59 import Hoopl.Graph
60 import CLabel
61 import CoreSyn ( Tickish(..) )
62 import SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
63
64 -- The rest:
65 import ForeignCall ( CCallConv(..) )
66 import OrdList
67 import Outputable
68 import FastString
69 import DynFlags
70 import Util
71 import UniqSupply ( getUniqueM )
72
73 import Control.Monad
74 import Data.Bits
75 import Data.Foldable (fold)
76 import Data.Int
77 import Data.Maybe
78 import Data.Word
79
80 import qualified Data.Map as M
81
82 is32BitPlatform :: NatM Bool
83 is32BitPlatform = do
84 dflags <- getDynFlags
85 return $ target32Bit (targetPlatform dflags)
86
87 sse2Enabled :: NatM Bool
88 sse2Enabled = do
89 dflags <- getDynFlags
90 return (isSse2Enabled dflags)
91
92 sse4_2Enabled :: NatM Bool
93 sse4_2Enabled = do
94 dflags <- getDynFlags
95 return (isSse4_2Enabled dflags)
96
97 if_sse2 :: NatM a -> NatM a -> NatM a
98 if_sse2 sse2 x87 = do
99 b <- sse2Enabled
100 if b then sse2 else x87
101
102 cmmTopCodeGen
103 :: RawCmmDecl
104 -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
105
106 cmmTopCodeGen (CmmProc info lab live graph) = do
107 let blocks = toBlockListEntryFirst graph
108 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
109 picBaseMb <- getPicBaseMaybeNat
110 dflags <- getDynFlags
111 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
112 tops = proc : concat statics
113 os = platformOS $ targetPlatform dflags
114
115 case picBaseMb of
116 Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
117 Nothing -> return tops
118
119 cmmTopCodeGen (CmmData sec dat) = do
120 return [CmmData sec (1, dat)] -- no translation, we just use CmmStatic
121
122
123 basicBlockCodeGen
124 :: CmmBlock
125 -> NatM ( [NatBasicBlock Instr]
126 , [NatCmmDecl (Alignment, CmmStatics) Instr])
127
128 basicBlockCodeGen block = do
129 let (_, nodes, tail) = blockSplit block
130 id = entryLabel block
131 stmts = blockToList nodes
132 -- Generate location directive
133 dbg <- getDebugBlock (entryLabel block)
134 loc_instrs <- case dblSourceTick =<< dbg of
135 Just (SourceNote span name)
136 -> do fileId <- getFileId (srcSpanFile span)
137 let line = srcSpanStartLine span; col = srcSpanStartCol span
138 return $ unitOL $ LOCATION fileId line col name
139 _ -> return nilOL
140 mid_instrs <- stmtsToInstrs stmts
141 tail_instrs <- stmtToInstrs tail
142 let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
143 instrs' <- fold <$> traverse addSpUnwindings instrs
144 -- code generation may introduce new basic block boundaries, which
145 -- are indicated by the NEWBLOCK instruction. We must split up the
146 -- instruction stream into basic blocks again. Also, we extract
147 -- LDATAs here too.
148 let
149 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs'
150
151 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
152 = ([], BasicBlock id instrs : blocks, statics)
153 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
154 = (instrs, blocks, CmmData sec dat:statics)
155 mkBlocks instr (instrs,blocks,statics)
156 = (instr:instrs, blocks, statics)
157 return (BasicBlock id top : other_blocks, statics)
158
159 -- | Convert 'DELTA' instructions into 'UNWIND' instructions to capture changes
160 -- in the @sp@ register. See Note [What is this unwinding business?] in Debug
161 -- for details.
162 addSpUnwindings :: Instr -> NatM (OrdList Instr)
163 addSpUnwindings instr@(DELTA d) = do
164 dflags <- getDynFlags
165 if debugLevel dflags >= 1
166 then do lbl <- mkAsmTempLabel <$> getUniqueM
167 let unwind = M.singleton MachSp (Just $ UwReg MachSp $ negate d)
168 return $ toOL [ instr, UNWIND lbl unwind ]
169 else return (unitOL instr)
170 addSpUnwindings instr = return $ unitOL instr
171
172 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
173 stmtsToInstrs stmts
174 = do instrss <- mapM stmtToInstrs stmts
175 return (concatOL instrss)
176
177
178 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
179 stmtToInstrs stmt = do
180 dflags <- getDynFlags
181 is32Bit <- is32BitPlatform
182 case stmt of
183 CmmComment s -> return (unitOL (COMMENT s))
184 CmmTick {} -> return nilOL
185
186 CmmUnwind regs -> do
187 let to_unwind_entry :: (GlobalReg, Maybe CmmExpr) -> UnwindTable
188 to_unwind_entry (reg, expr) = M.singleton reg (fmap toUnwindExpr expr)
189 case foldMap to_unwind_entry regs of
190 tbl | M.null tbl -> return nilOL
191 | otherwise -> do
192 lbl <- mkAsmTempLabel <$> getUniqueM
193 return $ unitOL $ UNWIND lbl tbl
194
195 CmmAssign reg src
196 | isFloatType ty -> assignReg_FltCode format reg src
197 | is32Bit && isWord64 ty -> assignReg_I64Code reg src
198 | otherwise -> assignReg_IntCode format reg src
199 where ty = cmmRegType dflags reg
200 format = cmmTypeFormat ty
201
202 CmmStore addr src
203 | isFloatType ty -> assignMem_FltCode format addr src
204 | is32Bit && isWord64 ty -> assignMem_I64Code addr src
205 | otherwise -> assignMem_IntCode format addr src
206 where ty = cmmExprType dflags src
207 format = cmmTypeFormat ty
208
209 CmmUnsafeForeignCall target result_regs args
210 -> genCCall dflags is32Bit target result_regs args
211
212 CmmBranch id -> genBranch id
213
214 --We try to arrange blocks such that the likely branch is the fallthrough
215 --in CmmContFlowOpt. So we can assume the condition is likely false here.
216 CmmCondBranch arg true false _ -> do
217 b1 <- genCondJump true arg
218 b2 <- genBranch false
219 return (b1 `appOL` b2)
220 CmmSwitch arg ids -> do dflags <- getDynFlags
221 genSwitch dflags arg ids
222 CmmCall { cml_target = arg
223 , cml_args_regs = gregs } -> do
224 dflags <- getDynFlags
225 genJump arg (jumpRegs dflags gregs)
226 _ ->
227 panic "stmtToInstrs: statement should have been cps'd away"
228
229
230 jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
231 jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
232 where platform = targetPlatform dflags
233
234 --------------------------------------------------------------------------------
235 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
236 -- They are really trees of insns to facilitate fast appending, where a
237 -- left-to-right traversal yields the insns in the correct order.
238 --
239 type InstrBlock
240 = OrdList Instr
241
242
243 -- | Condition codes passed up the tree.
244 --
245 data CondCode
246 = CondCode Bool Cond InstrBlock
247
248
249 -- | a.k.a "Register64"
250 -- Reg is the lower 32-bit temporary which contains the result.
251 -- Use getHiVRegFromLo to find the other VRegUnique.
252 --
253 -- Rules of this simplified insn selection game are therefore that
254 -- the returned Reg may be modified
255 --
256 data ChildCode64
257 = ChildCode64
258 InstrBlock
259 Reg
260
261
262 -- | Register's passed up the tree. If the stix code forces the register
263 -- to live in a pre-decided machine register, it comes out as @Fixed@;
264 -- otherwise, it comes out as @Any@, and the parent can decide which
265 -- register to put it in.
266 --
267 data Register
268 = Fixed Format Reg InstrBlock
269 | Any Format (Reg -> InstrBlock)
270
271
272 swizzleRegisterRep :: Register -> Format -> Register
273 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
274 swizzleRegisterRep (Any _ codefn) format = Any format codefn
275
276
277 -- | Grab the Reg for a CmmReg
278 getRegisterReg :: Platform -> Bool -> CmmReg -> Reg
279
280 getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk))
281 = let fmt = cmmTypeFormat pk in
282 if isFloatFormat fmt && not use_sse2
283 then RegVirtual (mkVirtualReg u FF80)
284 else RegVirtual (mkVirtualReg u fmt)
285
286 getRegisterReg platform _ (CmmGlobal mid)
287 = case globalRegMaybe platform mid of
288 Just reg -> RegReal $ reg
289 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
290 -- By this stage, the only MagicIds remaining should be the
291 -- ones which map to a real machine register on this
292 -- platform. Hence ...
293
294
295 -- | Memory addressing modes passed up the tree.
296 data Amode
297 = Amode AddrMode InstrBlock
298
299 {-
300 Now, given a tree (the argument to a CmmLoad) that references memory,
301 produce a suitable addressing mode.
302
303 A Rule of the Game (tm) for Amodes: use of the addr bit must
304 immediately follow use of the code part, since the code part puts
305 values in registers which the addr then refers to. So you can't put
306 anything in between, lest it overwrite some of those registers. If
307 you need to do some other computation between the code part and use of
308 the addr bit, first store the effective address from the amode in a
309 temporary, then do the other computation, and then use the temporary:
310
311 code
312 LEA amode, tmp
313 ... other computation ...
314 ... (tmp) ...
315 -}
316
317
318 -- | Check whether an integer will fit in 32 bits.
319 -- A CmmInt is intended to be truncated to the appropriate
320 -- number of bits, so here we truncate it to Int64. This is
321 -- important because e.g. -1 as a CmmInt might be either
322 -- -1 or 18446744073709551615.
323 --
324 is32BitInteger :: Integer -> Bool
325 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
326 where i64 = fromIntegral i :: Int64
327
328
329 -- | Convert a BlockId to some CmmStatic data
330 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
331 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
332 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
333 where blockLabel = blockLbl blockid
334
335
336 -- -----------------------------------------------------------------------------
337 -- General things for putting together code sequences
338
339 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
340 -- CmmExprs into CmmRegOff?
341 mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
342 mangleIndexTree dflags reg off
343 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
344 where width = typeWidth (cmmRegType dflags reg)
345
346 -- | The dual to getAnyReg: compute an expression into a register, but
347 -- we don't mind which one it is.
348 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
349 getSomeReg expr = do
350 r <- getRegister expr
351 case r of
352 Any rep code -> do
353 tmp <- getNewRegNat rep
354 return (tmp, code tmp)
355 Fixed _ reg code ->
356 return (reg, code)
357
358
359 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
360 assignMem_I64Code addrTree valueTree = do
361 Amode addr addr_code <- getAmode addrTree
362 ChildCode64 vcode rlo <- iselExpr64 valueTree
363 let
364 rhi = getHiVRegFromLo rlo
365
366 -- Little-endian store
367 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
368 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
369 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
370
371
372 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
373 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
374 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
375 let
376 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
377 r_dst_hi = getHiVRegFromLo r_dst_lo
378 r_src_hi = getHiVRegFromLo r_src_lo
379 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
380 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
381 return (
382 vcode `snocOL` mov_lo `snocOL` mov_hi
383 )
384
385 assignReg_I64Code _ _
386 = panic "assignReg_I64Code(i386): invalid lvalue"
387
388
389 iselExpr64 :: CmmExpr -> NatM ChildCode64
390 iselExpr64 (CmmLit (CmmInt i _)) = do
391 (rlo,rhi) <- getNewRegPairNat II32
392 let
393 r = fromIntegral (fromIntegral i :: Word32)
394 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
395 code = toOL [
396 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
397 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
398 ]
399 return (ChildCode64 code rlo)
400
401 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
402 Amode addr addr_code <- getAmode addrTree
403 (rlo,rhi) <- getNewRegPairNat II32
404 let
405 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
406 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
407 return (
408 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
409 rlo
410 )
411
412 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
413 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
414
415 -- we handle addition, but rather badly
416 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
417 ChildCode64 code1 r1lo <- iselExpr64 e1
418 (rlo,rhi) <- getNewRegPairNat II32
419 let
420 r = fromIntegral (fromIntegral i :: Word32)
421 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
422 r1hi = getHiVRegFromLo r1lo
423 code = code1 `appOL`
424 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
425 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
426 MOV II32 (OpReg r1hi) (OpReg rhi),
427 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
428 return (ChildCode64 code rlo)
429
430 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
431 ChildCode64 code1 r1lo <- iselExpr64 e1
432 ChildCode64 code2 r2lo <- iselExpr64 e2
433 (rlo,rhi) <- getNewRegPairNat II32
434 let
435 r1hi = getHiVRegFromLo r1lo
436 r2hi = getHiVRegFromLo r2lo
437 code = code1 `appOL`
438 code2 `appOL`
439 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
440 ADD II32 (OpReg r2lo) (OpReg rlo),
441 MOV II32 (OpReg r1hi) (OpReg rhi),
442 ADC II32 (OpReg r2hi) (OpReg rhi) ]
443 return (ChildCode64 code rlo)
444
445 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
446 ChildCode64 code1 r1lo <- iselExpr64 e1
447 ChildCode64 code2 r2lo <- iselExpr64 e2
448 (rlo,rhi) <- getNewRegPairNat II32
449 let
450 r1hi = getHiVRegFromLo r1lo
451 r2hi = getHiVRegFromLo r2lo
452 code = code1 `appOL`
453 code2 `appOL`
454 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
455 SUB II32 (OpReg r2lo) (OpReg rlo),
456 MOV II32 (OpReg r1hi) (OpReg rhi),
457 SBB II32 (OpReg r2hi) (OpReg rhi) ]
458 return (ChildCode64 code rlo)
459
460 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
461 fn <- getAnyReg expr
462 r_dst_lo <- getNewRegNat II32
463 let r_dst_hi = getHiVRegFromLo r_dst_lo
464 code = fn r_dst_lo
465 return (
466 ChildCode64 (code `snocOL`
467 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
468 r_dst_lo
469 )
470
471 iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
472 fn <- getAnyReg expr
473 r_dst_lo <- getNewRegNat II32
474 let r_dst_hi = getHiVRegFromLo r_dst_lo
475 code = fn r_dst_lo
476 return (
477 ChildCode64 (code `snocOL`
478 MOV II32 (OpReg r_dst_lo) (OpReg eax) `snocOL`
479 CLTD II32 `snocOL`
480 MOV II32 (OpReg eax) (OpReg r_dst_lo) `snocOL`
481 MOV II32 (OpReg edx) (OpReg r_dst_hi))
482 r_dst_lo
483 )
484
485 iselExpr64 expr
486 = pprPanic "iselExpr64(i386)" (ppr expr)
487
488
489 --------------------------------------------------------------------------------
490 getRegister :: CmmExpr -> NatM Register
491 getRegister e = do dflags <- getDynFlags
492 is32Bit <- is32BitPlatform
493 getRegister' dflags is32Bit e
494
495 getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
496
497 getRegister' dflags is32Bit (CmmReg reg)
498 = case reg of
499 CmmGlobal PicBaseReg
500 | is32Bit ->
501 -- on x86_64, we have %rip for PicBaseReg, but it's not
502 -- a full-featured register, it can only be used for
503 -- rip-relative addressing.
504 do reg' <- getPicBaseNat (archWordFormat is32Bit)
505 return (Fixed (archWordFormat is32Bit) reg' nilOL)
506 _ ->
507 do use_sse2 <- sse2Enabled
508 let
509 fmt = cmmTypeFormat (cmmRegType dflags reg)
510 format | not use_sse2 && isFloatFormat fmt = FF80
511 | otherwise = fmt
512 --
513 let platform = targetPlatform dflags
514 return (Fixed format
515 (getRegisterReg platform use_sse2 reg)
516 nilOL)
517
518
519 getRegister' dflags is32Bit (CmmRegOff r n)
520 = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
521
522 getRegister' dflags is32Bit (CmmMachOp (MO_AlignmentCheck align _) [e])
523 = addAlignmentCheck align <$> getRegister' dflags is32Bit e
524
525 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
526 -- TO_W_(x), TO_W_(x >> 32)
527
528 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
529 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
530 | is32Bit = do
531 ChildCode64 code rlo <- iselExpr64 x
532 return $ Fixed II32 (getHiVRegFromLo rlo) code
533
534 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
535 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
536 | is32Bit = do
537 ChildCode64 code rlo <- iselExpr64 x
538 return $ Fixed II32 (getHiVRegFromLo rlo) code
539
540 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
541 | is32Bit = do
542 ChildCode64 code rlo <- iselExpr64 x
543 return $ Fixed II32 rlo code
544
545 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
546 | is32Bit = do
547 ChildCode64 code rlo <- iselExpr64 x
548 return $ Fixed II32 rlo code
549
550 getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
551 if_sse2 float_const_sse2 float_const_x87
552 where
553 float_const_sse2
554 | f == 0.0 = do
555 let
556 format = floatFormat w
557 code dst = unitOL (XOR format (OpReg dst) (OpReg dst))
558 -- I don't know why there are xorpd, xorps, and pxor instructions.
559 -- They all appear to do the same thing --SDM
560 return (Any format code)
561
562 | otherwise = do
563 Amode addr code <- memConstant (widthInBytes w) lit
564 loadFloatAmode True w addr code
565
566 float_const_x87 = case w of
567 W64
568 | f == 0.0 ->
569 let code dst = unitOL (GLDZ dst)
570 in return (Any FF80 code)
571
572 | f == 1.0 ->
573 let code dst = unitOL (GLD1 dst)
574 in return (Any FF80 code)
575
576 _otherwise -> do
577 Amode addr code <- memConstant (widthInBytes w) lit
578 loadFloatAmode False w addr code
579
580 -- catch simple cases of zero- or sign-extended load
581 getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
582 code <- intLoadCode (MOVZxL II8) addr
583 return (Any II32 code)
584
585 getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
586 code <- intLoadCode (MOVSxL II8) addr
587 return (Any II32 code)
588
589 getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
590 code <- intLoadCode (MOVZxL II16) addr
591 return (Any II32 code)
592
593 getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
594 code <- intLoadCode (MOVSxL II16) addr
595 return (Any II32 code)
596
597 -- catch simple cases of zero- or sign-extended load
598 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
599 | not is32Bit = do
600 code <- intLoadCode (MOVZxL II8) addr
601 return (Any II64 code)
602
603 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
604 | not is32Bit = do
605 code <- intLoadCode (MOVSxL II8) addr
606 return (Any II64 code)
607
608 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
609 | not is32Bit = do
610 code <- intLoadCode (MOVZxL II16) addr
611 return (Any II64 code)
612
613 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
614 | not is32Bit = do
615 code <- intLoadCode (MOVSxL II16) addr
616 return (Any II64 code)
617
618 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
619 | not is32Bit = do
620 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
621 return (Any II64 code)
622
623 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
624 | not is32Bit = do
625 code <- intLoadCode (MOVSxL II32) addr
626 return (Any II64 code)
627
628 getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
629 CmmLit displacement])
630 | not is32Bit = do
631 return $ Any II64 (\dst -> unitOL $
632 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
633
634 getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
635 sse2 <- sse2Enabled
636 case mop of
637 MO_F_Neg w
638 | sse2 -> sse2NegCode w x
639 | otherwise -> trivialUFCode FF80 (GNEG FF80) x
640
641 MO_S_Neg w -> triv_ucode NEGI (intFormat w)
642 MO_Not w -> triv_ucode NOT (intFormat w)
643
644 -- Nop conversions
645 MO_UU_Conv W32 W8 -> toI8Reg W32 x
646 MO_SS_Conv W32 W8 -> toI8Reg W32 x
647 MO_UU_Conv W16 W8 -> toI8Reg W16 x
648 MO_SS_Conv W16 W8 -> toI8Reg W16 x
649 MO_UU_Conv W32 W16 -> toI16Reg W32 x
650 MO_SS_Conv W32 W16 -> toI16Reg W32 x
651
652 MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
653 MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
654 MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
655 MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
656 MO_UU_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
657 MO_SS_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
658
659 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
660 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
661
662 -- widenings
663 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
664 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
665 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
666
667 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
668 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
669 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
670
671 MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x
672 MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
673 MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
674 MO_SS_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVSxL x
675 MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
676 MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
677 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
678 -- However, we don't want the register allocator to throw it
679 -- away as an unnecessary reg-to-reg move, so we keep it in
680 -- the form of a movzl and print it as a movl later.
681
682 MO_FF_Conv W32 W64
683 | sse2 -> coerceFP2FP W64 x
684 | otherwise -> conversionNop FF80 x
685
686 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
687
688 MO_FS_Conv from to -> coerceFP2Int from to x
689 MO_SF_Conv from to -> coerceInt2FP from to x
690
691 MO_V_Insert {} -> needLlvm
692 MO_V_Extract {} -> needLlvm
693 MO_V_Add {} -> needLlvm
694 MO_V_Sub {} -> needLlvm
695 MO_V_Mul {} -> needLlvm
696 MO_VS_Quot {} -> needLlvm
697 MO_VS_Rem {} -> needLlvm
698 MO_VS_Neg {} -> needLlvm
699 MO_VU_Quot {} -> needLlvm
700 MO_VU_Rem {} -> needLlvm
701 MO_VF_Insert {} -> needLlvm
702 MO_VF_Extract {} -> needLlvm
703 MO_VF_Add {} -> needLlvm
704 MO_VF_Sub {} -> needLlvm
705 MO_VF_Mul {} -> needLlvm
706 MO_VF_Quot {} -> needLlvm
707 MO_VF_Neg {} -> needLlvm
708
709 _other -> pprPanic "getRegister" (pprMachOp mop)
710 where
711 triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
712 triv_ucode instr format = trivialUCode format (instr format) x
713
714 -- signed or unsigned extension.
715 integerExtend :: Width -> Width
716 -> (Format -> Operand -> Operand -> Instr)
717 -> CmmExpr -> NatM Register
718 integerExtend from to instr expr = do
719 (reg,e_code) <- if from == W8 then getByteReg expr
720 else getSomeReg expr
721 let
722 code dst =
723 e_code `snocOL`
724 instr (intFormat from) (OpReg reg) (OpReg dst)
725 return (Any (intFormat to) code)
726
727 toI8Reg :: Width -> CmmExpr -> NatM Register
728 toI8Reg new_rep expr
729 = do codefn <- getAnyReg expr
730 return (Any (intFormat new_rep) codefn)
731 -- HACK: use getAnyReg to get a byte-addressable register.
732 -- If the source was a Fixed register, this will add the
733 -- mov instruction to put it into the desired destination.
734 -- We're assuming that the destination won't be a fixed
735 -- non-byte-addressable register; it won't be, because all
736 -- fixed registers are word-sized.
737
738 toI16Reg = toI8Reg -- for now
739
740 conversionNop :: Format -> CmmExpr -> NatM Register
741 conversionNop new_format expr
742 = do e_code <- getRegister' dflags is32Bit expr
743 return (swizzleRegisterRep e_code new_format)
744
745
746 getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
747 sse2 <- sse2Enabled
748 case mop of
749 MO_F_Eq _ -> condFltReg is32Bit EQQ x y
750 MO_F_Ne _ -> condFltReg is32Bit NE x y
751 MO_F_Gt _ -> condFltReg is32Bit GTT x y
752 MO_F_Ge _ -> condFltReg is32Bit GE x y
753 MO_F_Lt _ -> condFltReg is32Bit LTT x y
754 MO_F_Le _ -> condFltReg is32Bit LE x y
755
756 MO_Eq _ -> condIntReg EQQ x y
757 MO_Ne _ -> condIntReg NE x y
758
759 MO_S_Gt _ -> condIntReg GTT x y
760 MO_S_Ge _ -> condIntReg GE x y
761 MO_S_Lt _ -> condIntReg LTT x y
762 MO_S_Le _ -> condIntReg LE x y
763
764 MO_U_Gt _ -> condIntReg GU x y
765 MO_U_Ge _ -> condIntReg GEU x y
766 MO_U_Lt _ -> condIntReg LU x y
767 MO_U_Le _ -> condIntReg LEU x y
768
769 MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y
770 | otherwise -> trivialFCode_x87 GADD x y
771 MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y
772 | otherwise -> trivialFCode_x87 GSUB x y
773 MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y
774 | otherwise -> trivialFCode_x87 GDIV x y
775 MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y
776 | otherwise -> trivialFCode_x87 GMUL x y
777
778 MO_Add rep -> add_code rep x y
779 MO_Sub rep -> sub_code rep x y
780
781 MO_S_Quot rep -> div_code rep True True x y
782 MO_S_Rem rep -> div_code rep True False x y
783 MO_U_Quot rep -> div_code rep False True x y
784 MO_U_Rem rep -> div_code rep False False x y
785
786 MO_S_MulMayOflo rep -> imulMayOflo rep x y
787
788 MO_Mul rep -> triv_op rep IMUL
789 MO_And rep -> triv_op rep AND
790 MO_Or rep -> triv_op rep OR
791 MO_Xor rep -> triv_op rep XOR
792
793 {- Shift ops on x86s have constraints on their source, it
794 either has to be Imm, CL or 1
795 => trivialCode is not restrictive enough (sigh.)
796 -}
797 MO_Shl rep -> shift_code rep SHL x y {-False-}
798 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
799 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
800
801 MO_V_Insert {} -> needLlvm
802 MO_V_Extract {} -> needLlvm
803 MO_V_Add {} -> needLlvm
804 MO_V_Sub {} -> needLlvm
805 MO_V_Mul {} -> needLlvm
806 MO_VS_Quot {} -> needLlvm
807 MO_VS_Rem {} -> needLlvm
808 MO_VS_Neg {} -> needLlvm
809 MO_VF_Insert {} -> needLlvm
810 MO_VF_Extract {} -> needLlvm
811 MO_VF_Add {} -> needLlvm
812 MO_VF_Sub {} -> needLlvm
813 MO_VF_Mul {} -> needLlvm
814 MO_VF_Quot {} -> needLlvm
815 MO_VF_Neg {} -> needLlvm
816
817 _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
818 where
819 --------------------
820 triv_op width instr = trivialCode width op (Just op) x y
821 where op = instr (intFormat width)
822
823 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
824 imulMayOflo rep a b = do
825 (a_reg, a_code) <- getNonClobberedReg a
826 b_code <- getAnyReg b
827 let
828 shift_amt = case rep of
829 W32 -> 31
830 W64 -> 63
831 _ -> panic "shift_amt"
832
833 format = intFormat rep
834 code = a_code `appOL` b_code eax `appOL`
835 toOL [
836 IMUL2 format (OpReg a_reg), -- result in %edx:%eax
837 SAR format (OpImm (ImmInt shift_amt)) (OpReg eax),
838 -- sign extend lower part
839 SUB format (OpReg edx) (OpReg eax)
840 -- compare against upper
841 -- eax==0 if high part == sign extended low part
842 ]
843 return (Fixed format eax code)
844
845 --------------------
846 shift_code :: Width
847 -> (Format -> Operand -> Operand -> Instr)
848 -> CmmExpr
849 -> CmmExpr
850 -> NatM Register
851
852 {- Case1: shift length as immediate -}
853 shift_code width instr x (CmmLit lit) = do
854 x_code <- getAnyReg x
855 let
856 format = intFormat width
857 code dst
858 = x_code dst `snocOL`
859 instr format (OpImm (litToImm lit)) (OpReg dst)
860 return (Any format code)
861
862 {- Case2: shift length is complex (non-immediate)
863 * y must go in %ecx.
864 * we cannot do y first *and* put its result in %ecx, because
865 %ecx might be clobbered by x.
866 * if we do y second, then x cannot be
867 in a clobbered reg. Also, we cannot clobber x's reg
868 with the instruction itself.
869 * so we can either:
870 - do y first, put its result in a fresh tmp, then copy it to %ecx later
871 - do y second and put its result into %ecx. x gets placed in a fresh
872 tmp. This is likely to be better, because the reg alloc can
873 eliminate this reg->reg move here (it won't eliminate the other one,
874 because the move is into the fixed %ecx).
875 -}
876 shift_code width instr x y{-amount-} = do
877 x_code <- getAnyReg x
878 let format = intFormat width
879 tmp <- getNewRegNat format
880 y_code <- getAnyReg y
881 let
882 code = x_code tmp `appOL`
883 y_code ecx `snocOL`
884 instr format (OpReg ecx) (OpReg tmp)
885 return (Fixed format tmp code)
886
887 --------------------
888 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
889 add_code rep x (CmmLit (CmmInt y _))
890 | is32BitInteger y = add_int rep x y
891 add_code rep x y = trivialCode rep (ADD format) (Just (ADD format)) x y
892 where format = intFormat rep
893 -- TODO: There are other interesting patterns we want to replace
894 -- with a LEA, e.g. `(x + offset) + (y << shift)`.
895
896 --------------------
897 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
898 sub_code rep x (CmmLit (CmmInt y _))
899 | is32BitInteger (-y) = add_int rep x (-y)
900 sub_code rep x y = trivialCode rep (SUB (intFormat rep)) Nothing x y
901
902 -- our three-operand add instruction:
903 add_int width x y = do
904 (x_reg, x_code) <- getSomeReg x
905 let
906 format = intFormat width
907 imm = ImmInt (fromInteger y)
908 code dst
909 = x_code `snocOL`
910 LEA format
911 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
912 (OpReg dst)
913 --
914 return (Any format code)
915
916 ----------------------
917 div_code width signed quotient x y = do
918 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
919 x_code <- getAnyReg x
920 let
921 format = intFormat width
922 widen | signed = CLTD format
923 | otherwise = XOR format (OpReg edx) (OpReg edx)
924
925 instr | signed = IDIV
926 | otherwise = DIV
927
928 code = y_code `appOL`
929 x_code eax `appOL`
930 toOL [widen, instr format y_op]
931
932 result | quotient = eax
933 | otherwise = edx
934
935 return (Fixed format result code)
936
937
938 getRegister' _ _ (CmmLoad mem pk)
939 | isFloatType pk
940 = do
941 Amode addr mem_code <- getAmode mem
942 use_sse2 <- sse2Enabled
943 loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
944
945 getRegister' _ is32Bit (CmmLoad mem pk)
946 | is32Bit && not (isWord64 pk)
947 = do
948 code <- intLoadCode instr mem
949 return (Any format code)
950 where
951 width = typeWidth pk
952 format = intFormat width
953 instr = case width of
954 W8 -> MOVZxL II8
955 _other -> MOV format
956 -- We always zero-extend 8-bit loads, if we
957 -- can't think of anything better. This is because
958 -- we can't guarantee access to an 8-bit variant of every register
959 -- (esi and edi don't have 8-bit variants), so to make things
960 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
961
962 -- Simpler memory load code on x86_64
963 getRegister' _ is32Bit (CmmLoad mem pk)
964 | not is32Bit
965 = do
966 code <- intLoadCode (MOV format) mem
967 return (Any format code)
968 where format = intFormat $ typeWidth pk
969
970 getRegister' _ is32Bit (CmmLit (CmmInt 0 width))
971 = let
972 format = intFormat width
973
974 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
975 format1 = if is32Bit then format
976 else case format of
977 II64 -> II32
978 _ -> format
979 code dst
980 = unitOL (XOR format1 (OpReg dst) (OpReg dst))
981 in
982 return (Any format code)
983
984 -- optimisation for loading small literals on x86_64: take advantage
985 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
986 -- instruction forms are shorter.
987 getRegister' dflags is32Bit (CmmLit lit)
988 | not is32Bit, isWord64 (cmmLitType dflags lit), not (isBigLit lit)
989 = let
990 imm = litToImm lit
991 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
992 in
993 return (Any II64 code)
994 where
995 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
996 isBigLit _ = False
997 -- note1: not the same as (not.is32BitLit), because that checks for
998 -- signed literals that fit in 32 bits, but we want unsigned
999 -- literals here.
1000 -- note2: all labels are small, because we're assuming the
1001 -- small memory model (see gcc docs, -mcmodel=small).
1002
1003 getRegister' dflags _ (CmmLit lit)
1004 = do let format = cmmTypeFormat (cmmLitType dflags lit)
1005 imm = litToImm lit
1006 code dst = unitOL (MOV format (OpImm imm) (OpReg dst))
1007 return (Any format code)
1008
1009 getRegister' _ _ other
1010 | isVecExpr other = needLlvm
1011 | otherwise = pprPanic "getRegister(x86)" (ppr other)
1012
1013
1014 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
1015 -> NatM (Reg -> InstrBlock)
1016 intLoadCode instr mem = do
1017 Amode src mem_code <- getAmode mem
1018 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
1019
1020 -- Compute an expression into *any* register, adding the appropriate
1021 -- move instruction if necessary.
1022 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1023 getAnyReg expr = do
1024 r <- getRegister expr
1025 anyReg r
1026
1027 anyReg :: Register -> NatM (Reg -> InstrBlock)
1028 anyReg (Any _ code) = return code
1029 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1030
1031 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1032 -- Fixed registers might not be byte-addressable, so we make sure we've
1033 -- got a temporary, inserting an extra reg copy if necessary.
1034 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1035 getByteReg expr = do
1036 is32Bit <- is32BitPlatform
1037 if is32Bit
1038 then do r <- getRegister expr
1039 case r of
1040 Any rep code -> do
1041 tmp <- getNewRegNat rep
1042 return (tmp, code tmp)
1043 Fixed rep reg code
1044 | isVirtualReg reg -> return (reg,code)
1045 | otherwise -> do
1046 tmp <- getNewRegNat rep
1047 return (tmp, code `snocOL` reg2reg rep reg tmp)
1048 -- ToDo: could optimise slightly by checking for
1049 -- byte-addressable real registers, but that will
1050 -- happen very rarely if at all.
1051 else getSomeReg expr -- all regs are byte-addressable on x86_64
1052
1053 -- Another variant: this time we want the result in a register that cannot
1054 -- be modified by code to evaluate an arbitrary expression.
1055 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1056 getNonClobberedReg expr = do
1057 dflags <- getDynFlags
1058 r <- getRegister expr
1059 case r of
1060 Any rep code -> do
1061 tmp <- getNewRegNat rep
1062 return (tmp, code tmp)
1063 Fixed rep reg code
1064 -- only certain regs can be clobbered
1065 | reg `elem` instrClobberedRegs (targetPlatform dflags)
1066 -> do
1067 tmp <- getNewRegNat rep
1068 return (tmp, code `snocOL` reg2reg rep reg tmp)
1069 | otherwise ->
1070 return (reg, code)
1071
1072 reg2reg :: Format -> Reg -> Reg -> Instr
1073 reg2reg format src dst
1074 | format == FF80 = GMOV src dst
1075 | otherwise = MOV format (OpReg src) (OpReg dst)
1076
1077
1078 --------------------------------------------------------------------------------
1079 getAmode :: CmmExpr -> NatM Amode
1080 getAmode e = do is32Bit <- is32BitPlatform
1081 getAmode' is32Bit e
1082
1083 getAmode' :: Bool -> CmmExpr -> NatM Amode
1084 getAmode' _ (CmmRegOff r n) = do dflags <- getDynFlags
1085 getAmode $ mangleIndexTree dflags r n
1086
1087 getAmode' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1088 CmmLit displacement])
1089 | not is32Bit
1090 = return $ Amode (ripRel (litToImm displacement)) nilOL
1091
1092
1093 -- This is all just ridiculous, since it carefully undoes
1094 -- what mangleIndexTree has just done.
1095 getAmode' is32Bit (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)])
1096 | is32BitLit is32Bit lit
1097 -- ASSERT(rep == II32)???
1098 = do (x_reg, x_code) <- getSomeReg x
1099 let off = ImmInt (-(fromInteger i))
1100 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1101
1102 getAmode' is32Bit (CmmMachOp (MO_Add _rep) [x, CmmLit lit])
1103 | is32BitLit is32Bit lit
1104 -- ASSERT(rep == II32)???
1105 = do (x_reg, x_code) <- getSomeReg x
1106 let off = litToImm lit
1107 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1108
1109 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1110 -- recognised by the next rule.
1111 getAmode' is32Bit (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1112 b@(CmmLit _)])
1113 = getAmode' is32Bit (CmmMachOp (MO_Add rep) [b,a])
1114
1115 -- Matches: (x + offset) + (y << shift)
1116 getAmode' _ (CmmMachOp (MO_Add _) [CmmRegOff x offset,
1117 CmmMachOp (MO_Shl _)
1118 [y, CmmLit (CmmInt shift _)]])
1119 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1120 = x86_complex_amode (CmmReg x) y shift (fromIntegral offset)
1121
1122 getAmode' _ (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _)
1123 [y, CmmLit (CmmInt shift _)]])
1124 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1125 = x86_complex_amode x y shift 0
1126
1127 getAmode' _ (CmmMachOp (MO_Add _)
1128 [x, CmmMachOp (MO_Add _)
1129 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1130 CmmLit (CmmInt offset _)]])
1131 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1132 && is32BitInteger offset
1133 = x86_complex_amode x y shift offset
1134
1135 getAmode' _ (CmmMachOp (MO_Add _) [x,y])
1136 = x86_complex_amode x y 0 0
1137
1138 getAmode' is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1139 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1140
1141 getAmode' _ expr = do
1142 (reg,code) <- getSomeReg expr
1143 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1144
1145 -- | Like 'getAmode', but on 32-bit use simple register addressing
1146 -- (i.e. no index register). This stops us from running out of
1147 -- registers on x86 when using instructions such as cmpxchg, which can
1148 -- use up to three virtual registers and one fixed register.
1149 getSimpleAmode :: DynFlags -> Bool -> CmmExpr -> NatM Amode
1150 getSimpleAmode dflags is32Bit addr
1151 | is32Bit = do
1152 addr_code <- getAnyReg addr
1153 addr_r <- getNewRegNat (intFormat (wordWidth dflags))
1154 let amode = AddrBaseIndex (EABaseReg addr_r) EAIndexNone (ImmInt 0)
1155 return $! Amode amode (addr_code addr_r)
1156 | otherwise = getAmode addr
1157
1158 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1159 x86_complex_amode base index shift offset
1160 = do (x_reg, x_code) <- getNonClobberedReg base
1161 -- x must be in a temp, because it has to stay live over y_code
1162 -- we could compre x_reg and y_reg and do something better here...
1163 (y_reg, y_code) <- getSomeReg index
1164 let
1165 code = x_code `appOL` y_code
1166 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8;
1167 n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")"
1168 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1169 code)
1170
1171
1172
1173
1174 -- -----------------------------------------------------------------------------
1175 -- getOperand: sometimes any operand will do.
1176
1177 -- getNonClobberedOperand: the value of the operand will remain valid across
1178 -- the computation of an arbitrary expression, unless the expression
1179 -- is computed directly into a register which the operand refers to
1180 -- (see trivialCode where this function is used for an example).
1181
1182 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1183 getNonClobberedOperand (CmmLit lit) = do
1184 use_sse2 <- sse2Enabled
1185 if use_sse2 && isSuitableFloatingPointLit lit
1186 then do
1187 let CmmFloat _ w = lit
1188 Amode addr code <- memConstant (widthInBytes w) lit
1189 return (OpAddr addr, code)
1190 else do
1191
1192 is32Bit <- is32BitPlatform
1193 dflags <- getDynFlags
1194 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1195 then return (OpImm (litToImm lit), nilOL)
1196 else getNonClobberedOperand_generic (CmmLit lit)
1197
1198 getNonClobberedOperand (CmmLoad mem pk) = do
1199 is32Bit <- is32BitPlatform
1200 use_sse2 <- sse2Enabled
1201 if (not (isFloatType pk) || use_sse2)
1202 && (if is32Bit then not (isWord64 pk) else True)
1203 then do
1204 dflags <- getDynFlags
1205 let platform = targetPlatform dflags
1206 Amode src mem_code <- getAmode mem
1207 (src',save_code) <-
1208 if (amodeCouldBeClobbered platform src)
1209 then do
1210 tmp <- getNewRegNat (archWordFormat is32Bit)
1211 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1212 unitOL (LEA (archWordFormat is32Bit)
1213 (OpAddr src)
1214 (OpReg tmp)))
1215 else
1216 return (src, nilOL)
1217 return (OpAddr src', mem_code `appOL` save_code)
1218 else do
1219 getNonClobberedOperand_generic (CmmLoad mem pk)
1220
1221 getNonClobberedOperand e = getNonClobberedOperand_generic e
1222
1223 getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1224 getNonClobberedOperand_generic e = do
1225 (reg, code) <- getNonClobberedReg e
1226 return (OpReg reg, code)
1227
1228 amodeCouldBeClobbered :: Platform -> AddrMode -> Bool
1229 amodeCouldBeClobbered platform amode = any (regClobbered platform) (addrModeRegs amode)
1230
1231 regClobbered :: Platform -> Reg -> Bool
1232 regClobbered platform (RegReal (RealRegSingle rr)) = freeReg platform rr
1233 regClobbered _ _ = False
1234
1235 -- getOperand: the operand is not required to remain valid across the
1236 -- computation of an arbitrary expression.
1237 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1238
1239 getOperand (CmmLit lit) = do
1240 use_sse2 <- sse2Enabled
1241 if (use_sse2 && isSuitableFloatingPointLit lit)
1242 then do
1243 let CmmFloat _ w = lit
1244 Amode addr code <- memConstant (widthInBytes w) lit
1245 return (OpAddr addr, code)
1246 else do
1247
1248 is32Bit <- is32BitPlatform
1249 dflags <- getDynFlags
1250 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1251 then return (OpImm (litToImm lit), nilOL)
1252 else getOperand_generic (CmmLit lit)
1253
1254 getOperand (CmmLoad mem pk) = do
1255 is32Bit <- is32BitPlatform
1256 use_sse2 <- sse2Enabled
1257 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1258 then do
1259 Amode src mem_code <- getAmode mem
1260 return (OpAddr src, mem_code)
1261 else
1262 getOperand_generic (CmmLoad mem pk)
1263
1264 getOperand e = getOperand_generic e
1265
1266 getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1267 getOperand_generic e = do
1268 (reg, code) <- getSomeReg e
1269 return (OpReg reg, code)
1270
1271 isOperand :: Bool -> CmmExpr -> Bool
1272 isOperand _ (CmmLoad _ _) = True
1273 isOperand is32Bit (CmmLit lit) = is32BitLit is32Bit lit
1274 || isSuitableFloatingPointLit lit
1275 isOperand _ _ = False
1276
1277 -- | Given a 'Register', produce a new 'Register' with an instruction block
1278 -- which will check the value for alignment. Used for @-falignment-sanitisation@.
1279 addAlignmentCheck :: Int -> Register -> Register
1280 addAlignmentCheck align reg =
1281 case reg of
1282 Fixed fmt reg code -> Fixed fmt reg (code `appOL` check fmt reg)
1283 Any fmt f -> Any fmt (\reg -> f reg `appOL` check fmt reg)
1284 where
1285 check :: Format -> Reg -> InstrBlock
1286 check fmt reg =
1287 ASSERT(not $ isFloatFormat fmt)
1288 toOL [ TEST fmt (OpImm $ ImmInt $ align-1) (OpReg reg)
1289 , JXX_GBL NE $ ImmCLbl mkBadAlignmentLabel
1290 ]
1291
1292 memConstant :: Int -> CmmLit -> NatM Amode
1293 memConstant align lit = do
1294 lbl <- getNewLabelNat
1295 let rosection = Section ReadOnlyData lbl
1296 dflags <- getDynFlags
1297 (addr, addr_code) <- if target32Bit (targetPlatform dflags)
1298 then do dynRef <- cmmMakeDynamicReference
1299 dflags
1300 DataReference
1301 lbl
1302 Amode addr addr_code <- getAmode dynRef
1303 return (addr, addr_code)
1304 else return (ripRel (ImmCLbl lbl), nilOL)
1305 let code =
1306 LDATA rosection (align, Statics lbl [CmmStaticLit lit])
1307 `consOL` addr_code
1308 return (Amode addr code)
1309
1310
1311 loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
1312 loadFloatAmode use_sse2 w addr addr_code = do
1313 let format = floatFormat w
1314 code dst = addr_code `snocOL`
1315 if use_sse2
1316 then MOV format (OpAddr addr) (OpReg dst)
1317 else GLD format addr dst
1318 return (Any (if use_sse2 then format else FF80) code)
1319
1320
1321 -- if we want a floating-point literal as an operand, we can
1322 -- use it directly from memory. However, if the literal is
1323 -- zero, we're better off generating it into a register using
1324 -- xor.
1325 isSuitableFloatingPointLit :: CmmLit -> Bool
1326 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1327 isSuitableFloatingPointLit _ = False
1328
1329 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1330 getRegOrMem e@(CmmLoad mem pk) = do
1331 is32Bit <- is32BitPlatform
1332 use_sse2 <- sse2Enabled
1333 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1334 then do
1335 Amode src mem_code <- getAmode mem
1336 return (OpAddr src, mem_code)
1337 else do
1338 (reg, code) <- getNonClobberedReg e
1339 return (OpReg reg, code)
1340 getRegOrMem e = do
1341 (reg, code) <- getNonClobberedReg e
1342 return (OpReg reg, code)
1343
1344 is32BitLit :: Bool -> CmmLit -> Bool
1345 is32BitLit is32Bit (CmmInt i W64)
1346 | not is32Bit
1347 = -- assume that labels are in the range 0-2^31-1: this assumes the
1348 -- small memory model (see gcc docs, -mcmodel=small).
1349 is32BitInteger i
1350 is32BitLit _ _ = True
1351
1352
1353
1354
1355 -- Set up a condition code for a conditional branch.
1356
1357 getCondCode :: CmmExpr -> NatM CondCode
1358
1359 -- yes, they really do seem to want exactly the same!
1360
1361 getCondCode (CmmMachOp mop [x, y])
1362 =
1363 case mop of
1364 MO_F_Eq W32 -> condFltCode EQQ x y
1365 MO_F_Ne W32 -> condFltCode NE x y
1366 MO_F_Gt W32 -> condFltCode GTT x y
1367 MO_F_Ge W32 -> condFltCode GE x y
1368 MO_F_Lt W32 -> condFltCode LTT x y
1369 MO_F_Le W32 -> condFltCode LE x y
1370
1371 MO_F_Eq W64 -> condFltCode EQQ x y
1372 MO_F_Ne W64 -> condFltCode NE x y
1373 MO_F_Gt W64 -> condFltCode GTT x y
1374 MO_F_Ge W64 -> condFltCode GE x y
1375 MO_F_Lt W64 -> condFltCode LTT x y
1376 MO_F_Le W64 -> condFltCode LE x y
1377
1378 _ -> condIntCode (machOpToCond mop) x y
1379
1380 getCondCode other = pprPanic "getCondCode(2)(x86,x86_64)" (ppr other)
1381
1382 machOpToCond :: MachOp -> Cond
1383 machOpToCond mo = case mo of
1384 MO_Eq _ -> EQQ
1385 MO_Ne _ -> NE
1386 MO_S_Gt _ -> GTT
1387 MO_S_Ge _ -> GE
1388 MO_S_Lt _ -> LTT
1389 MO_S_Le _ -> LE
1390 MO_U_Gt _ -> GU
1391 MO_U_Ge _ -> GEU
1392 MO_U_Lt _ -> LU
1393 MO_U_Le _ -> LEU
1394 _other -> pprPanic "machOpToCond" (pprMachOp mo)
1395
1396
1397 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1398 -- passed back up the tree.
1399
1400 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1401 condIntCode cond x y = do is32Bit <- is32BitPlatform
1402 condIntCode' is32Bit cond x y
1403
1404 condIntCode' :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1405
1406 -- memory vs immediate
1407 condIntCode' is32Bit cond (CmmLoad x pk) (CmmLit lit)
1408 | is32BitLit is32Bit lit = do
1409 Amode x_addr x_code <- getAmode x
1410 let
1411 imm = litToImm lit
1412 code = x_code `snocOL`
1413 CMP (cmmTypeFormat pk) (OpImm imm) (OpAddr x_addr)
1414 --
1415 return (CondCode False cond code)
1416
1417 -- anything vs zero, using a mask
1418 -- TODO: Add some sanity checking!!!!
1419 condIntCode' is32Bit cond (CmmMachOp (MO_And _) [x,o2]) (CmmLit (CmmInt 0 pk))
1420 | (CmmLit lit@(CmmInt mask _)) <- o2, is32BitLit is32Bit lit
1421 = do
1422 (x_reg, x_code) <- getSomeReg x
1423 let
1424 code = x_code `snocOL`
1425 TEST (intFormat pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1426 --
1427 return (CondCode False cond code)
1428
1429 -- anything vs zero
1430 condIntCode' _ cond x (CmmLit (CmmInt 0 pk)) = do
1431 (x_reg, x_code) <- getSomeReg x
1432 let
1433 code = x_code `snocOL`
1434 TEST (intFormat pk) (OpReg x_reg) (OpReg x_reg)
1435 --
1436 return (CondCode False cond code)
1437
1438 -- anything vs operand
1439 condIntCode' is32Bit cond x y
1440 | isOperand is32Bit y = do
1441 dflags <- getDynFlags
1442 (x_reg, x_code) <- getNonClobberedReg x
1443 (y_op, y_code) <- getOperand y
1444 let
1445 code = x_code `appOL` y_code `snocOL`
1446 CMP (cmmTypeFormat (cmmExprType dflags x)) y_op (OpReg x_reg)
1447 return (CondCode False cond code)
1448 -- operand vs. anything: invert the comparison so that we can use a
1449 -- single comparison instruction.
1450 | isOperand is32Bit x
1451 , Just revcond <- maybeFlipCond cond = do
1452 dflags <- getDynFlags
1453 (y_reg, y_code) <- getNonClobberedReg y
1454 (x_op, x_code) <- getOperand x
1455 let
1456 code = y_code `appOL` x_code `snocOL`
1457 CMP (cmmTypeFormat (cmmExprType dflags x)) x_op (OpReg y_reg)
1458 return (CondCode False revcond code)
1459
1460 -- anything vs anything
1461 condIntCode' _ cond x y = do
1462 dflags <- getDynFlags
1463 (y_reg, y_code) <- getNonClobberedReg y
1464 (x_op, x_code) <- getRegOrMem x
1465 let
1466 code = y_code `appOL`
1467 x_code `snocOL`
1468 CMP (cmmTypeFormat (cmmExprType dflags x)) (OpReg y_reg) x_op
1469 return (CondCode False cond code)
1470
1471
1472
1473 --------------------------------------------------------------------------------
1474 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1475
1476 condFltCode cond x y
1477 = if_sse2 condFltCode_sse2 condFltCode_x87
1478 where
1479
1480 condFltCode_x87
1481 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1482 (x_reg, x_code) <- getNonClobberedReg x
1483 (y_reg, y_code) <- getSomeReg y
1484 let
1485 code = x_code `appOL` y_code `snocOL`
1486 GCMP cond x_reg y_reg
1487 -- The GCMP insn does the test and sets the zero flag if comparable
1488 -- and true. Hence we always supply EQQ as the condition to test.
1489 return (CondCode True EQQ code)
1490
1491 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1492 -- an operand, but the right must be a reg. We can probably do better
1493 -- than this general case...
1494 condFltCode_sse2 = do
1495 dflags <- getDynFlags
1496 (x_reg, x_code) <- getNonClobberedReg x
1497 (y_op, y_code) <- getOperand y
1498 let
1499 code = x_code `appOL`
1500 y_code `snocOL`
1501 CMP (floatFormat $ cmmExprWidth dflags x) y_op (OpReg x_reg)
1502 -- NB(1): we need to use the unsigned comparison operators on the
1503 -- result of this comparison.
1504 return (CondCode True (condToUnsigned cond) code)
1505
1506 -- -----------------------------------------------------------------------------
1507 -- Generating assignments
1508
1509 -- Assignments are really at the heart of the whole code generation
1510 -- business. Almost all top-level nodes of any real importance are
1511 -- assignments, which correspond to loads, stores, or register
1512 -- transfers. If we're really lucky, some of the register transfers
1513 -- will go away, because we can use the destination register to
1514 -- complete the code generation for the right hand side. This only
1515 -- fails when the right hand side is forced into a fixed register
1516 -- (e.g. the result of a call).
1517
1518 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1519 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1520
1521 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1522 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1523
1524
1525 -- integer assignment to memory
1526
1527 -- specific case of adding/subtracting an integer to a particular address.
1528 -- ToDo: catch other cases where we can use an operation directly on a memory
1529 -- address.
1530 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1531 CmmLit (CmmInt i _)])
1532 | addr == addr2, pk /= II64 || is32BitInteger i,
1533 Just instr <- check op
1534 = do Amode amode code_addr <- getAmode addr
1535 let code = code_addr `snocOL`
1536 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1537 return code
1538 where
1539 check (MO_Add _) = Just ADD
1540 check (MO_Sub _) = Just SUB
1541 check _ = Nothing
1542 -- ToDo: more?
1543
1544 -- general case
1545 assignMem_IntCode pk addr src = do
1546 is32Bit <- is32BitPlatform
1547 Amode addr code_addr <- getAmode addr
1548 (code_src, op_src) <- get_op_RI is32Bit src
1549 let
1550 code = code_src `appOL`
1551 code_addr `snocOL`
1552 MOV pk op_src (OpAddr addr)
1553 -- NOTE: op_src is stable, so it will still be valid
1554 -- after code_addr. This may involve the introduction
1555 -- of an extra MOV to a temporary register, but we hope
1556 -- the register allocator will get rid of it.
1557 --
1558 return code
1559 where
1560 get_op_RI :: Bool -> CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
1561 get_op_RI is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1562 = return (nilOL, OpImm (litToImm lit))
1563 get_op_RI _ op
1564 = do (reg,code) <- getNonClobberedReg op
1565 return (code, OpReg reg)
1566
1567
1568 -- Assign; dst is a reg, rhs is mem
1569 assignReg_IntCode pk reg (CmmLoad src _) = do
1570 load_code <- intLoadCode (MOV pk) src
1571 dflags <- getDynFlags
1572 let platform = targetPlatform dflags
1573 return (load_code (getRegisterReg platform False{-no sse2-} reg))
1574
1575 -- dst is a reg, but src could be anything
1576 assignReg_IntCode _ reg src = do
1577 dflags <- getDynFlags
1578 let platform = targetPlatform dflags
1579 code <- getAnyReg src
1580 return (code (getRegisterReg platform False{-no sse2-} reg))
1581
1582
1583 -- Floating point assignment to memory
1584 assignMem_FltCode pk addr src = do
1585 (src_reg, src_code) <- getNonClobberedReg src
1586 Amode addr addr_code <- getAmode addr
1587 use_sse2 <- sse2Enabled
1588 let
1589 code = src_code `appOL`
1590 addr_code `snocOL`
1591 if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr)
1592 else GST pk src_reg addr
1593 return code
1594
1595 -- Floating point assignment to a register/temporary
1596 assignReg_FltCode _ reg src = do
1597 use_sse2 <- sse2Enabled
1598 src_code <- getAnyReg src
1599 dflags <- getDynFlags
1600 let platform = targetPlatform dflags
1601 return (src_code (getRegisterReg platform use_sse2 reg))
1602
1603
1604 genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
1605
1606 genJump (CmmLoad mem _) regs = do
1607 Amode target code <- getAmode mem
1608 return (code `snocOL` JMP (OpAddr target) regs)
1609
1610 genJump (CmmLit lit) regs = do
1611 return (unitOL (JMP (OpImm (litToImm lit)) regs))
1612
1613 genJump expr regs = do
1614 (reg,code) <- getSomeReg expr
1615 return (code `snocOL` JMP (OpReg reg) regs)
1616
1617
1618 -- -----------------------------------------------------------------------------
1619 -- Unconditional branches
1620
1621 genBranch :: BlockId -> NatM InstrBlock
1622 genBranch = return . toOL . mkJumpInstr
1623
1624
1625
1626 -- -----------------------------------------------------------------------------
1627 -- Conditional jumps
1628
1629 {-
1630 Conditional jumps are always to local labels, so we can use branch
1631 instructions. We peek at the arguments to decide what kind of
1632 comparison to do.
1633
1634 I386: First, we have to ensure that the condition
1635 codes are set according to the supplied comparison operation.
1636 -}
1637
1638 genCondJump
1639 :: BlockId -- the branch target
1640 -> CmmExpr -- the condition on which to branch
1641 -> NatM InstrBlock
1642
1643 genCondJump id expr = do
1644 is32Bit <- is32BitPlatform
1645 genCondJump' is32Bit id expr
1646
1647 genCondJump' :: Bool -> BlockId -> CmmExpr -> NatM InstrBlock
1648
1649 -- 64-bit integer comparisons on 32-bit
1650 genCondJump' is32Bit true (CmmMachOp mop [e1,e2])
1651 | is32Bit, Just W64 <- maybeIntComparison mop = do
1652 ChildCode64 code1 r1_lo <- iselExpr64 e1
1653 ChildCode64 code2 r2_lo <- iselExpr64 e2
1654 let r1_hi = getHiVRegFromLo r1_lo
1655 r2_hi = getHiVRegFromLo r2_lo
1656 cond = machOpToCond mop
1657 Just cond' = maybeFlipCond cond
1658 false <- getBlockIdNat
1659 return $ code1 `appOL` code2 `appOL` toOL [
1660 CMP II32 (OpReg r2_hi) (OpReg r1_hi),
1661 JXX cond true,
1662 JXX cond' false,
1663 CMP II32 (OpReg r2_lo) (OpReg r1_lo),
1664 JXX cond true,
1665 NEWBLOCK false ]
1666
1667 genCondJump' _ id bool = do
1668 CondCode is_float cond cond_code <- getCondCode bool
1669 use_sse2 <- sse2Enabled
1670 if not is_float || not use_sse2
1671 then
1672 return (cond_code `snocOL` JXX cond id)
1673 else do
1674 lbl <- getBlockIdNat
1675
1676 -- see comment with condFltReg
1677 let code = case cond of
1678 NE -> or_unordered
1679 GU -> plain_test
1680 GEU -> plain_test
1681 _ -> and_ordered
1682
1683 plain_test = unitOL (
1684 JXX cond id
1685 )
1686 or_unordered = toOL [
1687 JXX cond id,
1688 JXX PARITY id
1689 ]
1690 and_ordered = toOL [
1691 JXX PARITY lbl,
1692 JXX cond id,
1693 JXX ALWAYS lbl,
1694 NEWBLOCK lbl
1695 ]
1696 return (cond_code `appOL` code)
1697
1698 -- -----------------------------------------------------------------------------
1699 -- Generating C calls
1700
1701 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1702 -- @get_arg@, which moves the arguments to the correct registers/stack
1703 -- locations. Apart from that, the code is easy.
1704 --
1705 -- (If applicable) Do not fill the delay slots here; you will confuse the
1706 -- register allocator.
1707
1708 genCCall
1709 :: DynFlags
1710 -> Bool -- 32 bit platform?
1711 -> ForeignTarget -- function to call
1712 -> [CmmFormal] -- where to put the result
1713 -> [CmmActual] -- arguments (of mixed type)
1714 -> NatM InstrBlock
1715
1716 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1717
1718 -- Unroll memcpy calls if the source and destination pointers are at
1719 -- least DWORD aligned and the number of bytes to copy isn't too
1720 -- large. Otherwise, call C's memcpy.
1721 genCCall dflags is32Bit (PrimTarget (MO_Memcpy align)) _
1722 [dst, src, CmmLit (CmmInt n _)]
1723 | fromInteger insns <= maxInlineMemcpyInsns dflags && align .&. 3 == 0 = do
1724 code_dst <- getAnyReg dst
1725 dst_r <- getNewRegNat format
1726 code_src <- getAnyReg src
1727 src_r <- getNewRegNat format
1728 tmp_r <- getNewRegNat format
1729 return $ code_dst dst_r `appOL` code_src src_r `appOL`
1730 go dst_r src_r tmp_r (fromInteger n)
1731 where
1732 -- The number of instructions we will generate (approx). We need 2
1733 -- instructions per move.
1734 insns = 2 * ((n + sizeBytes - 1) `div` sizeBytes)
1735
1736 format = if align .&. 4 /= 0 then II32 else (archWordFormat is32Bit)
1737
1738 -- The size of each move, in bytes.
1739 sizeBytes :: Integer
1740 sizeBytes = fromIntegral (formatInBytes format)
1741
1742 go :: Reg -> Reg -> Reg -> Integer -> OrdList Instr
1743 go dst src tmp i
1744 | i >= sizeBytes =
1745 unitOL (MOV format (OpAddr src_addr) (OpReg tmp)) `appOL`
1746 unitOL (MOV format (OpReg tmp) (OpAddr dst_addr)) `appOL`
1747 go dst src tmp (i - sizeBytes)
1748 -- Deal with remaining bytes.
1749 | i >= 4 = -- Will never happen on 32-bit
1750 unitOL (MOV II32 (OpAddr src_addr) (OpReg tmp)) `appOL`
1751 unitOL (MOV II32 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1752 go dst src tmp (i - 4)
1753 | i >= 2 =
1754 unitOL (MOVZxL II16 (OpAddr src_addr) (OpReg tmp)) `appOL`
1755 unitOL (MOV II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1756 go dst src tmp (i - 2)
1757 | i >= 1 =
1758 unitOL (MOVZxL II8 (OpAddr src_addr) (OpReg tmp)) `appOL`
1759 unitOL (MOV II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1760 go dst src tmp (i - 1)
1761 | otherwise = nilOL
1762 where
1763 src_addr = AddrBaseIndex (EABaseReg src) EAIndexNone
1764 (ImmInteger (n - i))
1765 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1766 (ImmInteger (n - i))
1767
1768 genCCall dflags _ (PrimTarget (MO_Memset align)) _
1769 [dst,
1770 CmmLit (CmmInt c _),
1771 CmmLit (CmmInt n _)]
1772 | fromInteger insns <= maxInlineMemsetInsns dflags && align .&. 3 == 0 = do
1773 code_dst <- getAnyReg dst
1774 dst_r <- getNewRegNat format
1775 return $ code_dst dst_r `appOL` go dst_r (fromInteger n)
1776 where
1777 (format, val) = case align .&. 3 of
1778 2 -> (II16, c2)
1779 0 -> (II32, c4)
1780 _ -> (II8, c)
1781 c2 = c `shiftL` 8 .|. c
1782 c4 = c2 `shiftL` 16 .|. c2
1783
1784 -- The number of instructions we will generate (approx). We need 1
1785 -- instructions per move.
1786 insns = (n + sizeBytes - 1) `div` sizeBytes
1787
1788 -- The size of each move, in bytes.
1789 sizeBytes :: Integer
1790 sizeBytes = fromIntegral (formatInBytes format)
1791
1792 go :: Reg -> Integer -> OrdList Instr
1793 go dst i
1794 -- TODO: Add movabs instruction and support 64-bit sets.
1795 | i >= sizeBytes = -- This might be smaller than the below sizes
1796 unitOL (MOV format (OpImm (ImmInteger val)) (OpAddr dst_addr)) `appOL`
1797 go dst (i - sizeBytes)
1798 | i >= 4 = -- Will never happen on 32-bit
1799 unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr dst_addr)) `appOL`
1800 go dst (i - 4)
1801 | i >= 2 =
1802 unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr dst_addr)) `appOL`
1803 go dst (i - 2)
1804 | i >= 1 =
1805 unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr dst_addr)) `appOL`
1806 go dst (i - 1)
1807 | otherwise = nilOL
1808 where
1809 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1810 (ImmInteger (n - i))
1811
1812 genCCall _ _ (PrimTarget MO_WriteBarrier) _ _ = return nilOL
1813 -- write barrier compiles to no code on x86/x86-64;
1814 -- we keep it this long in order to prevent earlier optimisations.
1815
1816 genCCall _ _ (PrimTarget MO_Touch) _ _ = return nilOL
1817
1818 genCCall _ is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] =
1819 case n of
1820 0 -> genPrefetch src $ PREFETCH NTA format
1821 1 -> genPrefetch src $ PREFETCH Lvl2 format
1822 2 -> genPrefetch src $ PREFETCH Lvl1 format
1823 3 -> genPrefetch src $ PREFETCH Lvl0 format
1824 l -> panic $ "unexpected prefetch level in genCCall MO_Prefetch_Data: " ++ (show l)
1825 -- the c / llvm prefetch convention is 0, 1, 2, and 3
1826 -- the x86 corresponding names are : NTA, 2 , 1, and 0
1827 where
1828 format = archWordFormat is32bit
1829 -- need to know what register width for pointers!
1830 genPrefetch inRegSrc prefetchCTor =
1831 do
1832 code_src <- getAnyReg inRegSrc
1833 src_r <- getNewRegNat format
1834 return $ code_src src_r `appOL`
1835 (unitOL (prefetchCTor (OpAddr
1836 ((AddrBaseIndex (EABaseReg src_r ) EAIndexNone (ImmInt 0)))) ))
1837 -- prefetch always takes an address
1838
1839 genCCall dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] = do
1840 let platform = targetPlatform dflags
1841 let dst_r = getRegisterReg platform False (CmmLocal dst)
1842 case width of
1843 W64 | is32Bit -> do
1844 ChildCode64 vcode rlo <- iselExpr64 src
1845 let dst_rhi = getHiVRegFromLo dst_r
1846 rhi = getHiVRegFromLo rlo
1847 return $ vcode `appOL`
1848 toOL [ MOV II32 (OpReg rlo) (OpReg dst_rhi),
1849 MOV II32 (OpReg rhi) (OpReg dst_r),
1850 BSWAP II32 dst_rhi,
1851 BSWAP II32 dst_r ]
1852 W16 -> do code_src <- getAnyReg src
1853 return $ code_src dst_r `appOL`
1854 unitOL (BSWAP II32 dst_r) `appOL`
1855 unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r))
1856 _ -> do code_src <- getAnyReg src
1857 return $ code_src dst_r `appOL` unitOL (BSWAP format dst_r)
1858 where
1859 format = intFormat width
1860
1861 genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
1862 args@[src] = do
1863 sse4_2 <- sse4_2Enabled
1864 let platform = targetPlatform dflags
1865 if sse4_2
1866 then do code_src <- getAnyReg src
1867 src_r <- getNewRegNat format
1868 let dst_r = getRegisterReg platform False (CmmLocal dst)
1869 return $ code_src src_r `appOL`
1870 (if width == W8 then
1871 -- The POPCNT instruction doesn't take a r/m8
1872 unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL`
1873 unitOL (POPCNT II16 (OpReg src_r) dst_r)
1874 else
1875 unitOL (POPCNT format (OpReg src_r) dst_r)) `appOL`
1876 (if width == W8 || width == W16 then
1877 -- We used a 16-bit destination register above,
1878 -- so zero-extend
1879 unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
1880 else nilOL)
1881 else do
1882 targetExpr <- cmmMakeDynamicReference dflags
1883 CallReference lbl
1884 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1885 [NoHint] [NoHint]
1886 CmmMayReturn)
1887 genCCall dflags is32Bit target dest_regs args
1888 where
1889 format = intFormat width
1890 lbl = mkCmmCodeLabel primUnitId (fsLit (popCntLabel width))
1891
1892 genCCall dflags is32Bit (PrimTarget (MO_Pdep width)) dest_regs@[dst]
1893 args@[src, mask] = do
1894 let platform = targetPlatform dflags
1895 if isBmi2Enabled dflags
1896 then do code_src <- getAnyReg src
1897 code_mask <- getAnyReg mask
1898 src_r <- getNewRegNat format
1899 mask_r <- getNewRegNat format
1900 let dst_r = getRegisterReg platform False (CmmLocal dst)
1901 return $ code_src src_r `appOL` code_mask mask_r `appOL`
1902 (if width == W8 then
1903 -- The PDEP instruction doesn't take a r/m8
1904 unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL`
1905 unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL`
1906 unitOL (PDEP II16 (OpReg mask_r) (OpReg src_r ) dst_r)
1907 else
1908 unitOL (PDEP format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL`
1909 (if width == W8 || width == W16 then
1910 -- We used a 16-bit destination register above,
1911 -- so zero-extend
1912 unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
1913 else nilOL)
1914 else do
1915 targetExpr <- cmmMakeDynamicReference dflags
1916 CallReference lbl
1917 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1918 [NoHint] [NoHint]
1919 CmmMayReturn)
1920 genCCall dflags is32Bit target dest_regs args
1921 where
1922 format = intFormat width
1923 lbl = mkCmmCodeLabel primUnitId (fsLit (pdepLabel width))
1924
1925 genCCall dflags is32Bit (PrimTarget (MO_Pext width)) dest_regs@[dst]
1926 args@[src, mask] = do
1927 let platform = targetPlatform dflags
1928 if isBmi2Enabled dflags
1929 then do code_src <- getAnyReg src
1930 code_mask <- getAnyReg mask
1931 src_r <- getNewRegNat format
1932 mask_r <- getNewRegNat format
1933 let dst_r = getRegisterReg platform False (CmmLocal dst)
1934 return $ code_src src_r `appOL` code_mask mask_r `appOL`
1935 (if width == W8 then
1936 -- The PEXT instruction doesn't take a r/m8
1937 unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL`
1938 unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL`
1939 unitOL (PEXT II16 (OpReg mask_r) (OpReg src_r) dst_r)
1940 else
1941 unitOL (PEXT format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL`
1942 (if width == W8 || width == W16 then
1943 -- We used a 16-bit destination register above,
1944 -- so zero-extend
1945 unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
1946 else nilOL)
1947 else do
1948 targetExpr <- cmmMakeDynamicReference dflags
1949 CallReference lbl
1950 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1951 [NoHint] [NoHint]
1952 CmmMayReturn)
1953 genCCall dflags is32Bit target dest_regs args
1954 where
1955 format = intFormat width
1956 lbl = mkCmmCodeLabel primUnitId (fsLit (pextLabel width))
1957
1958 genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src]
1959 | is32Bit && width == W64 = do
1960 -- Fallback to `hs_clz64` on i386
1961 targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
1962 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1963 [NoHint] [NoHint]
1964 CmmMayReturn)
1965 genCCall dflags is32Bit target dest_regs args
1966
1967 | otherwise = do
1968 code_src <- getAnyReg src
1969 src_r <- getNewRegNat format
1970 tmp_r <- getNewRegNat format
1971 let dst_r = getRegisterReg platform False (CmmLocal dst)
1972
1973 -- The following insn sequence makes sure 'clz 0' has a defined value.
1974 -- starting with Haswell, one could use the LZCNT insn instead.
1975 return $ code_src src_r `appOL` toOL
1976 ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
1977 [ BSR format (OpReg src_r) tmp_r
1978 , MOV II32 (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
1979 , CMOV NE format (OpReg tmp_r) dst_r
1980 , XOR format (OpImm (ImmInt (bw-1))) (OpReg dst_r)
1981 ]) -- NB: We don't need to zero-extend the result for the
1982 -- W8/W16 cases because the 'MOV' insn already
1983 -- took care of implicitly clearing the upper bits
1984 where
1985 bw = widthInBits width
1986 platform = targetPlatform dflags
1987 format = if width == W8 then II16 else intFormat width
1988 lbl = mkCmmCodeLabel primUnitId (fsLit (clzLabel width))
1989
1990 genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src]
1991 | is32Bit, width == W64 = do
1992 ChildCode64 vcode rlo <- iselExpr64 src
1993 let rhi = getHiVRegFromLo rlo
1994 dst_r = getRegisterReg platform False (CmmLocal dst)
1995 lbl1 <- getBlockIdNat
1996 lbl2 <- getBlockIdNat
1997 tmp_r <- getNewRegNat format
1998
1999 -- The following instruction sequence corresponds to the pseudo-code
2000 --
2001 -- if (src) {
2002 -- dst = src.lo32 ? BSF(src.lo32) : (BSF(src.hi32) + 32);
2003 -- } else {
2004 -- dst = 64;
2005 -- }
2006 return $ vcode `appOL` toOL
2007 ([ MOV II32 (OpReg rhi) (OpReg tmp_r)
2008 , OR II32 (OpReg rlo) (OpReg tmp_r)
2009 , MOV II32 (OpImm (ImmInt 64)) (OpReg dst_r)
2010 , JXX EQQ lbl2
2011 , JXX ALWAYS lbl1
2012
2013 , NEWBLOCK lbl1
2014 , BSF II32 (OpReg rhi) dst_r
2015 , ADD II32 (OpImm (ImmInt 32)) (OpReg dst_r)
2016 , BSF II32 (OpReg rlo) tmp_r
2017 , CMOV NE II32 (OpReg tmp_r) dst_r
2018 , JXX ALWAYS lbl2
2019
2020 , NEWBLOCK lbl2
2021 ])
2022
2023 | otherwise = do
2024 code_src <- getAnyReg src
2025 src_r <- getNewRegNat format
2026 tmp_r <- getNewRegNat format
2027 let dst_r = getRegisterReg platform False (CmmLocal dst)
2028
2029 -- The following insn sequence makes sure 'ctz 0' has a defined value.
2030 -- starting with Haswell, one could use the TZCNT insn instead.
2031 return $ code_src src_r `appOL` toOL
2032 ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
2033 [ BSF format (OpReg src_r) tmp_r
2034 , MOV II32 (OpImm (ImmInt bw)) (OpReg dst_r)
2035 , CMOV NE format (OpReg tmp_r) dst_r
2036 ]) -- NB: We don't need to zero-extend the result for the
2037 -- W8/W16 cases because the 'MOV' insn already
2038 -- took care of implicitly clearing the upper bits
2039 where
2040 bw = widthInBits width
2041 platform = targetPlatform dflags
2042 format = if width == W8 then II16 else intFormat width
2043
2044 genCCall dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args = do
2045 targetExpr <- cmmMakeDynamicReference dflags
2046 CallReference lbl
2047 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
2048 [NoHint] [NoHint]
2049 CmmMayReturn)
2050 genCCall dflags is32Bit target dest_regs args
2051 where
2052 lbl = mkCmmCodeLabel primUnitId (fsLit (word2FloatLabel width))
2053
2054 genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = do
2055 Amode amode addr_code <-
2056 if amop `elem` [AMO_Add, AMO_Sub]
2057 then getAmode addr
2058 else getSimpleAmode dflags is32Bit addr -- See genCCall for MO_Cmpxchg
2059 arg <- getNewRegNat format
2060 arg_code <- getAnyReg n
2061 use_sse2 <- sse2Enabled
2062 let platform = targetPlatform dflags
2063 dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
2064 code <- op_code dst_r arg amode
2065 return $ addr_code `appOL` arg_code arg `appOL` code
2066 where
2067 -- Code for the operation
2068 op_code :: Reg -- Destination reg
2069 -> Reg -- Register containing argument
2070 -> AddrMode -- Address of location to mutate
2071 -> NatM (OrdList Instr)
2072 op_code dst_r arg amode = case amop of
2073 -- In the common case where dst_r is a virtual register the
2074 -- final move should go away, because it's the last use of arg
2075 -- and the first use of dst_r.
2076 AMO_Add -> return $ toOL [ LOCK (XADD format (OpReg arg) (OpAddr amode))
2077 , MOV format (OpReg arg) (OpReg dst_r)
2078 ]
2079 AMO_Sub -> return $ toOL [ NEGI format (OpReg arg)
2080 , LOCK (XADD format (OpReg arg) (OpAddr amode))
2081 , MOV format (OpReg arg) (OpReg dst_r)
2082 ]
2083 AMO_And -> cmpxchg_code (\ src dst -> unitOL $ AND format src dst)
2084 AMO_Nand -> cmpxchg_code (\ src dst -> toOL [ AND format src dst
2085 , NOT format dst
2086 ])
2087 AMO_Or -> cmpxchg_code (\ src dst -> unitOL $ OR format src dst)
2088 AMO_Xor -> cmpxchg_code (\ src dst -> unitOL $ XOR format src dst)
2089 where
2090 -- Simulate operation that lacks a dedicated instruction using
2091 -- cmpxchg.
2092 cmpxchg_code :: (Operand -> Operand -> OrdList Instr)
2093 -> NatM (OrdList Instr)
2094 cmpxchg_code instrs = do
2095 lbl <- getBlockIdNat
2096 tmp <- getNewRegNat format
2097 return $ toOL
2098 [ MOV format (OpAddr amode) (OpReg eax)
2099 , JXX ALWAYS lbl
2100 , NEWBLOCK lbl
2101 -- Keep old value so we can return it:
2102 , MOV format (OpReg eax) (OpReg dst_r)
2103 , MOV format (OpReg eax) (OpReg tmp)
2104 ]
2105 `appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL
2106 [ LOCK (CMPXCHG format (OpReg tmp) (OpAddr amode))
2107 , JXX NE lbl
2108 ]
2109
2110 format = intFormat width
2111
2112 genCCall dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] = do
2113 load_code <- intLoadCode (MOV (intFormat width)) addr
2114 let platform = targetPlatform dflags
2115 use_sse2 <- sse2Enabled
2116 return (load_code (getRegisterReg platform use_sse2 (CmmLocal dst)))
2117
2118 genCCall _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
2119 code <- assignMem_IntCode (intFormat width) addr val
2120 return $ code `snocOL` MFENCE
2121
2122 genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = do
2123 -- On x86 we don't have enough registers to use cmpxchg with a
2124 -- complicated addressing mode, so on that architecture we
2125 -- pre-compute the address first.
2126 Amode amode addr_code <- getSimpleAmode dflags is32Bit addr
2127 newval <- getNewRegNat format
2128 newval_code <- getAnyReg new
2129 oldval <- getNewRegNat format
2130 oldval_code <- getAnyReg old
2131 use_sse2 <- sse2Enabled
2132 let platform = targetPlatform dflags
2133 dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
2134 code = toOL
2135 [ MOV format (OpReg oldval) (OpReg eax)
2136 , LOCK (CMPXCHG format (OpReg newval) (OpAddr amode))
2137 , MOV format (OpReg eax) (OpReg dst_r)
2138 ]
2139 return $ addr_code `appOL` newval_code newval `appOL` oldval_code oldval
2140 `appOL` code
2141 where
2142 format = intFormat width
2143
2144 genCCall _ is32Bit target dest_regs args = do
2145 dflags <- getDynFlags
2146 let platform = targetPlatform dflags
2147 sse2 = isSse2Enabled dflags
2148 case (target, dest_regs) of
2149 -- void return type prim op
2150 (PrimTarget op, []) ->
2151 outOfLineCmmOp op Nothing args
2152 -- we only cope with a single result for foreign calls
2153 (PrimTarget op, [r])
2154 | sse2 -> case op of
2155 MO_F32_Fabs -> case args of
2156 [x] -> sse2FabsCode W32 x
2157 _ -> panic "genCCall: Wrong number of arguments for fabs"
2158 MO_F64_Fabs -> case args of
2159 [x] -> sse2FabsCode W64 x
2160 _ -> panic "genCCall: Wrong number of arguments for fabs"
2161
2162 MO_F32_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF32 args
2163 MO_F64_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF64 args
2164 _other_op -> outOfLineCmmOp op (Just r) args
2165 | otherwise -> do
2166 l1 <- getNewLabelNat
2167 l2 <- getNewLabelNat
2168 if sse2
2169 then outOfLineCmmOp op (Just r) args
2170 else case op of
2171 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
2172 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
2173
2174 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
2175 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
2176
2177 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
2178 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
2179
2180 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
2181 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
2182
2183 _other_op -> outOfLineCmmOp op (Just r) args
2184
2185 where
2186 actuallyInlineFloatOp = actuallyInlineFloatOp' False
2187 actuallyInlineSSE2Op = actuallyInlineFloatOp' True
2188
2189 actuallyInlineFloatOp' usesSSE instr format [x]
2190 = do res <- trivialUFCode format (instr format) x
2191 any <- anyReg res
2192 return (any (getRegisterReg platform usesSSE (CmmLocal r)))
2193
2194 actuallyInlineFloatOp' _ _ _ args
2195 = panic $ "genCCall.actuallyInlineFloatOp': bad number of arguments! ("
2196 ++ show (length args) ++ ")"
2197
2198 sse2FabsCode :: Width -> CmmExpr -> NatM InstrBlock
2199 sse2FabsCode w x = do
2200 let fmt = floatFormat w
2201 x_code <- getAnyReg x
2202 let
2203 const | FF32 <- fmt = CmmInt 0x7fffffff W32
2204 | otherwise = CmmInt 0x7fffffffffffffff W64
2205 Amode amode amode_code <- memConstant (widthInBytes w) const
2206 tmp <- getNewRegNat fmt
2207 let
2208 code dst = x_code dst `appOL` amode_code `appOL` toOL [
2209 MOV fmt (OpAddr amode) (OpReg tmp),
2210 AND fmt (OpReg tmp) (OpReg dst)
2211 ]
2212
2213 return $ code (getRegisterReg platform True (CmmLocal r))
2214
2215 (PrimTarget (MO_S_QuotRem width), _) -> divOp1 platform True width dest_regs args
2216 (PrimTarget (MO_U_QuotRem width), _) -> divOp1 platform False width dest_regs args
2217 (PrimTarget (MO_U_QuotRem2 width), _) -> divOp2 platform False width dest_regs args
2218 (PrimTarget (MO_Add2 width), [res_h, res_l]) ->
2219 case args of
2220 [arg_x, arg_y] ->
2221 do hCode <- getAnyReg (CmmLit (CmmInt 0 width))
2222 let format = intFormat width
2223 lCode <- anyReg =<< trivialCode width (ADD_CC format)
2224 (Just (ADD_CC format)) arg_x arg_y
2225 let reg_l = getRegisterReg platform True (CmmLocal res_l)
2226 reg_h = getRegisterReg platform True (CmmLocal res_h)
2227 code = hCode reg_h `appOL`
2228 lCode reg_l `snocOL`
2229 ADC format (OpImm (ImmInteger 0)) (OpReg reg_h)
2230 return code
2231 _ -> panic "genCCall: Wrong number of arguments/results for add2"
2232 (PrimTarget (MO_SubWordC width), [res_r, res_c]) ->
2233 addSubIntC platform SUB_CC (const Nothing) CARRY width res_r res_c args
2234 (PrimTarget (MO_AddIntC width), [res_r, res_c]) ->
2235 addSubIntC platform ADD_CC (Just . ADD_CC) OFLO width res_r res_c args
2236 (PrimTarget (MO_SubIntC width), [res_r, res_c]) ->
2237 addSubIntC platform SUB_CC (const Nothing) OFLO width res_r res_c args
2238 (PrimTarget (MO_U_Mul2 width), [res_h, res_l]) ->
2239 case args of
2240 [arg_x, arg_y] ->
2241 do (y_reg, y_code) <- getRegOrMem arg_y
2242 x_code <- getAnyReg arg_x
2243 let format = intFormat width
2244 reg_h = getRegisterReg platform True (CmmLocal res_h)
2245 reg_l = getRegisterReg platform True (CmmLocal res_l)
2246 code = y_code `appOL`
2247 x_code rax `appOL`
2248 toOL [MUL2 format y_reg,
2249 MOV format (OpReg rdx) (OpReg reg_h),
2250 MOV format (OpReg rax) (OpReg reg_l)]
2251 return code
2252 _ -> panic "genCCall: Wrong number of arguments/results for mul2"
2253
2254 _ -> if is32Bit
2255 then genCCall32' dflags target dest_regs args
2256 else genCCall64' dflags target dest_regs args
2257
2258 where divOp1 platform signed width results [arg_x, arg_y]
2259 = divOp platform signed width results Nothing arg_x arg_y
2260 divOp1 _ _ _ _ _
2261 = panic "genCCall: Wrong number of arguments for divOp1"
2262 divOp2 platform signed width results [arg_x_high, arg_x_low, arg_y]
2263 = divOp platform signed width results (Just arg_x_high) arg_x_low arg_y
2264 divOp2 _ _ _ _ _
2265 = panic "genCCall: Wrong number of arguments for divOp2"
2266 divOp platform signed width [res_q, res_r]
2267 m_arg_x_high arg_x_low arg_y
2268 = do let format = intFormat width
2269 reg_q = getRegisterReg platform True (CmmLocal res_q)
2270 reg_r = getRegisterReg platform True (CmmLocal res_r)
2271 widen | signed = CLTD format
2272 | otherwise = XOR format (OpReg rdx) (OpReg rdx)
2273 instr | signed = IDIV
2274 | otherwise = DIV
2275 (y_reg, y_code) <- getRegOrMem arg_y
2276 x_low_code <- getAnyReg arg_x_low
2277 x_high_code <- case m_arg_x_high of
2278 Just arg_x_high ->
2279 getAnyReg arg_x_high
2280 Nothing ->
2281 return $ const $ unitOL widen
2282 return $ y_code `appOL`
2283 x_low_code rax `appOL`
2284 x_high_code rdx `appOL`
2285 toOL [instr format y_reg,
2286 MOV format (OpReg rax) (OpReg reg_q),
2287 MOV format (OpReg rdx) (OpReg reg_r)]
2288 divOp _ _ _ _ _ _ _
2289 = panic "genCCall: Wrong number of results for divOp"
2290
2291 addSubIntC platform instr mrevinstr cond width
2292 res_r res_c [arg_x, arg_y]
2293 = do let format = intFormat width
2294 rCode <- anyReg =<< trivialCode width (instr format)
2295 (mrevinstr format) arg_x arg_y
2296 reg_tmp <- getNewRegNat II8
2297 let reg_c = getRegisterReg platform True (CmmLocal res_c)
2298 reg_r = getRegisterReg platform True (CmmLocal res_r)
2299 code = rCode reg_r `snocOL`
2300 SETCC cond (OpReg reg_tmp) `snocOL`
2301 MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c)
2302
2303 return code
2304 addSubIntC _ _ _ _ _ _ _ _
2305 = panic "genCCall: Wrong number of arguments/results for addSubIntC"
2306
2307 genCCall32' :: DynFlags
2308 -> ForeignTarget -- function to call
2309 -> [CmmFormal] -- where to put the result
2310 -> [CmmActual] -- arguments (of mixed type)
2311 -> NatM InstrBlock
2312 genCCall32' dflags target dest_regs args = do
2313 let
2314 prom_args = map (maybePromoteCArg dflags W32) args
2315
2316 -- Align stack to 16n for calls, assuming a starting stack
2317 -- alignment of 16n - word_size on procedure entry. Which we
2318 -- maintiain. See Note [rts/StgCRun.c : Stack Alignment on X86]
2319 sizes = map (arg_size . cmmExprType dflags) (reverse args)
2320 raw_arg_size = sum sizes + wORD_SIZE dflags
2321 arg_pad_size = (roundTo 16 $ raw_arg_size) - raw_arg_size
2322 tot_arg_size = raw_arg_size + arg_pad_size - wORD_SIZE dflags
2323 delta0 <- getDeltaNat
2324 setDeltaNat (delta0 - arg_pad_size)
2325
2326 use_sse2 <- sse2Enabled
2327 push_codes <- mapM (push_arg use_sse2) (reverse prom_args)
2328 delta <- getDeltaNat
2329 MASSERT(delta == delta0 - tot_arg_size)
2330
2331 -- deal with static vs dynamic call targets
2332 (callinsns,cconv) <-
2333 case target of
2334 ForeignTarget (CmmLit (CmmLabel lbl)) conv
2335 -> -- ToDo: stdcall arg sizes
2336 return (unitOL (CALL (Left fn_imm) []), conv)
2337 where fn_imm = ImmCLbl lbl
2338 ForeignTarget expr conv
2339 -> do { (dyn_r, dyn_c) <- getSomeReg expr
2340 ; ASSERT( isWord32 (cmmExprType dflags expr) )
2341 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
2342 PrimTarget _
2343 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
2344 ++ "probably because too many return values."
2345
2346 let push_code
2347 | arg_pad_size /= 0
2348 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2349 DELTA (delta0 - arg_pad_size)]
2350 `appOL` concatOL push_codes
2351 | otherwise
2352 = concatOL push_codes
2353
2354 -- Deallocate parameters after call for ccall;
2355 -- but not for stdcall (callee does it)
2356 --
2357 -- We have to pop any stack padding we added
2358 -- even if we are doing stdcall, though (#5052)
2359 pop_size
2360 | ForeignConvention StdCallConv _ _ _ <- cconv = arg_pad_size
2361 | otherwise = tot_arg_size
2362
2363 call = callinsns `appOL`
2364 toOL (
2365 (if pop_size==0 then [] else
2366 [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)])
2367 ++
2368 [DELTA delta0]
2369 )
2370 setDeltaNat delta0
2371
2372 dflags <- getDynFlags
2373 let platform = targetPlatform dflags
2374
2375 let
2376 -- assign the results, if necessary
2377 assign_code [] = nilOL
2378 assign_code [dest]
2379 | isFloatType ty =
2380 if use_sse2
2381 then let tmp_amode = AddrBaseIndex (EABaseReg esp)
2382 EAIndexNone
2383 (ImmInt 0)
2384 fmt = floatFormat w
2385 in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
2386 DELTA (delta0 - b),
2387 GST fmt fake0 tmp_amode,
2388 MOV fmt (OpAddr tmp_amode) (OpReg r_dest),
2389 ADD II32 (OpImm (ImmInt b)) (OpReg esp),
2390 DELTA delta0]
2391 else unitOL (GMOV fake0 r_dest)
2392 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
2393 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
2394 | otherwise = unitOL (MOV (intFormat w)
2395 (OpReg eax)
2396 (OpReg r_dest))
2397 where
2398 ty = localRegType dest
2399 w = typeWidth ty
2400 b = widthInBytes w
2401 r_dest_hi = getHiVRegFromLo r_dest
2402 r_dest = getRegisterReg platform use_sse2 (CmmLocal dest)
2403 assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
2404
2405 return (push_code `appOL`
2406 call `appOL`
2407 assign_code dest_regs)
2408
2409 where
2410 arg_size :: CmmType -> Int -- Width in bytes
2411 arg_size ty = widthInBytes (typeWidth ty)
2412
2413 roundTo a x | x `mod` a == 0 = x
2414 | otherwise = x + a - (x `mod` a)
2415
2416 push_arg :: Bool -> CmmActual {-current argument-}
2417 -> NatM InstrBlock -- code
2418
2419 push_arg use_sse2 arg -- we don't need the hints on x86
2420 | isWord64 arg_ty = do
2421 ChildCode64 code r_lo <- iselExpr64 arg
2422 delta <- getDeltaNat
2423 setDeltaNat (delta - 8)
2424 let r_hi = getHiVRegFromLo r_lo
2425 return ( code `appOL`
2426 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
2427 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
2428 DELTA (delta-8)]
2429 )
2430
2431 | isFloatType arg_ty = do
2432 (reg, code) <- getSomeReg arg
2433 delta <- getDeltaNat
2434 setDeltaNat (delta-size)
2435 return (code `appOL`
2436 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
2437 DELTA (delta-size),
2438 let addr = AddrBaseIndex (EABaseReg esp)
2439 EAIndexNone
2440 (ImmInt 0)
2441 format = floatFormat (typeWidth arg_ty)
2442 in
2443 if use_sse2
2444 then MOV format (OpReg reg) (OpAddr addr)
2445 else GST format reg addr
2446 ]
2447 )
2448
2449 | otherwise = do
2450 (operand, code) <- getOperand arg
2451 delta <- getDeltaNat
2452 setDeltaNat (delta-size)
2453 return (code `snocOL`
2454 PUSH II32 operand `snocOL`
2455 DELTA (delta-size))
2456
2457 where
2458 arg_ty = cmmExprType dflags arg
2459 size = arg_size arg_ty -- Byte size
2460
2461 genCCall64' :: DynFlags
2462 -> ForeignTarget -- function to call
2463 -> [CmmFormal] -- where to put the result
2464 -> [CmmActual] -- arguments (of mixed type)
2465 -> NatM InstrBlock
2466 genCCall64' dflags target dest_regs args = do
2467 -- load up the register arguments
2468 let prom_args = map (maybePromoteCArg dflags W32) args
2469
2470 (stack_args, int_regs_used, fp_regs_used, load_args_code, assign_args_code)
2471 <-
2472 if platformOS platform == OSMinGW32
2473 then load_args_win prom_args [] [] (allArgRegs platform) nilOL
2474 else do
2475 (stack_args, aregs, fregs, load_args_code, assign_args_code)
2476 <- load_args prom_args (allIntArgRegs platform)
2477 (allFPArgRegs platform)
2478 nilOL nilOL
2479 let used_regs rs as = reverse (drop (length rs) (reverse as))
2480 fregs_used = used_regs fregs (allFPArgRegs platform)
2481 aregs_used = used_regs aregs (allIntArgRegs platform)
2482 return (stack_args, aregs_used, fregs_used, load_args_code
2483 , assign_args_code)
2484
2485 let
2486 arg_regs_used = int_regs_used ++ fp_regs_used
2487 arg_regs = [eax] ++ arg_regs_used
2488 -- for annotating the call instruction with
2489 sse_regs = length fp_regs_used
2490 arg_stack_slots = if platformOS platform == OSMinGW32
2491 then length stack_args + length (allArgRegs platform)
2492 else length stack_args
2493 tot_arg_size = arg_size * arg_stack_slots
2494
2495
2496 -- Align stack to 16n for calls, assuming a starting stack
2497 -- alignment of 16n - word_size on procedure entry. Which we
2498 -- maintain. See Note [rts/StgCRun.c : Stack Alignment on X86]
2499 (real_size, adjust_rsp) <-
2500 if (tot_arg_size + wORD_SIZE dflags) `rem` 16 == 0
2501 then return (tot_arg_size, nilOL)
2502 else do -- we need to adjust...
2503 delta <- getDeltaNat
2504 setDeltaNat (delta - wORD_SIZE dflags)
2505 return (tot_arg_size + wORD_SIZE dflags, toOL [
2506 SUB II64 (OpImm (ImmInt (wORD_SIZE dflags))) (OpReg rsp),
2507 DELTA (delta - wORD_SIZE dflags) ])
2508
2509 -- push the stack args, right to left
2510 push_code <- push_args (reverse stack_args) nilOL
2511 -- On Win64, we also have to leave stack space for the arguments
2512 -- that we are passing in registers
2513 lss_code <- if platformOS platform == OSMinGW32
2514 then leaveStackSpace (length (allArgRegs platform))
2515 else return nilOL
2516 delta <- getDeltaNat
2517
2518 -- deal with static vs dynamic call targets
2519 (callinsns,_cconv) <-
2520 case target of
2521 ForeignTarget (CmmLit (CmmLabel lbl)) conv
2522 -> -- ToDo: stdcall arg sizes
2523 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
2524 where fn_imm = ImmCLbl lbl
2525 ForeignTarget expr conv
2526 -> do (dyn_r, dyn_c) <- getSomeReg expr
2527 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
2528 PrimTarget _
2529 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
2530 ++ "probably because too many return values."
2531
2532 let
2533 -- The x86_64 ABI requires us to set %al to the number of SSE2
2534 -- registers that contain arguments, if the called routine
2535 -- is a varargs function. We don't know whether it's a
2536 -- varargs function or not, so we have to assume it is.
2537 --
2538 -- It's not safe to omit this assignment, even if the number
2539 -- of SSE2 regs in use is zero. If %al is larger than 8
2540 -- on entry to a varargs function, seg faults ensue.
2541 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
2542
2543 let call = callinsns `appOL`
2544 toOL (
2545 -- Deallocate parameters after call for ccall;
2546 -- stdcall has callee do it, but is not supported on
2547 -- x86_64 target (see #3336)
2548 (if real_size==0 then [] else
2549 [ADD (intFormat (wordWidth dflags)) (OpImm (ImmInt real_size)) (OpReg esp)])
2550 ++
2551 [DELTA (delta + real_size)]
2552 )
2553 setDeltaNat (delta + real_size)
2554
2555 let
2556 -- assign the results, if necessary
2557 assign_code [] = nilOL
2558 assign_code [dest] =
2559 case typeWidth rep of
2560 W32 | isFloatType rep -> unitOL (MOV (floatFormat W32)
2561 (OpReg xmm0)
2562 (OpReg r_dest))
2563 W64 | isFloatType rep -> unitOL (MOV (floatFormat W64)
2564 (OpReg xmm0)
2565 (OpReg r_dest))
2566 _ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest))
2567 where
2568 rep = localRegType dest
2569 r_dest = getRegisterReg platform True (CmmLocal dest)
2570 assign_code _many = panic "genCCall.assign_code many"
2571
2572 return (adjust_rsp `appOL`
2573 push_code `appOL`
2574 load_args_code `appOL`
2575 assign_args_code `appOL`
2576 lss_code `appOL`
2577 assign_eax sse_regs `appOL`
2578 call `appOL`
2579 assign_code dest_regs)
2580
2581 where platform = targetPlatform dflags
2582 arg_size = 8 -- always, at the mo
2583
2584
2585 load_args :: [CmmExpr]
2586 -> [Reg] -- int regs avail for args
2587 -> [Reg] -- FP regs avail for args
2588 -> InstrBlock -- code computing args
2589 -> InstrBlock -- code assigning args to ABI regs
2590 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock,InstrBlock)
2591 -- no more regs to use
2592 load_args args [] [] code acode =
2593 return (args, [], [], code, acode)
2594
2595 -- no more args to push
2596 load_args [] aregs fregs code acode =
2597 return ([], aregs, fregs, code, acode)
2598
2599 load_args (arg : rest) aregs fregs code acode
2600 | isFloatType arg_rep = case fregs of
2601 [] -> push_this_arg
2602 (r:rs) -> do
2603 (code',acode') <- reg_this_arg r
2604 load_args rest aregs rs code' acode'
2605 | otherwise = case aregs of
2606 [] -> push_this_arg
2607 (r:rs) -> do
2608 (code',acode') <- reg_this_arg r
2609 load_args rest rs fregs code' acode'
2610 where
2611
2612 -- put arg into the list of stack pushed args
2613 push_this_arg = do
2614 (args',ars,frs,code',acode')
2615 <- load_args rest aregs fregs code acode
2616 return (arg:args', ars, frs, code', acode')
2617
2618 -- pass the arg into the given register
2619 reg_this_arg r
2620 -- "operand" args can be directly assigned into r
2621 | isOperand False arg = do
2622 arg_code <- getAnyReg arg
2623 return (code, (acode `appOL` arg_code r))
2624 -- The last non-operand arg can be directly assigned after its
2625 -- computation without going into a temporary register
2626 | all (isOperand False) rest = do
2627 arg_code <- getAnyReg arg
2628 return (code `appOL` arg_code r,acode)
2629
2630 -- other args need to be computed beforehand to avoid clobbering
2631 -- previously assigned registers used to pass parameters (see
2632 -- #11792, #12614). They are assigned into temporary registers
2633 -- and get assigned to proper call ABI registers after they all
2634 -- have been computed.
2635 | otherwise = do
2636 arg_code <- getAnyReg arg
2637 tmp <- getNewRegNat arg_fmt
2638 let
2639 code' = code `appOL` arg_code tmp
2640 acode' = acode `snocOL` reg2reg arg_fmt tmp r
2641 return (code',acode')
2642
2643 arg_rep = cmmExprType dflags arg
2644 arg_fmt = cmmTypeFormat arg_rep
2645
2646 load_args_win :: [CmmExpr]
2647 -> [Reg] -- used int regs
2648 -> [Reg] -- used FP regs
2649 -> [(Reg, Reg)] -- (int, FP) regs avail for args
2650 -> InstrBlock
2651 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock,InstrBlock)
2652 load_args_win args usedInt usedFP [] code
2653 = return (args, usedInt, usedFP, code, nilOL)
2654 -- no more regs to use
2655 load_args_win [] usedInt usedFP _ code
2656 = return ([], usedInt, usedFP, code, nilOL)
2657 -- no more args to push
2658 load_args_win (arg : rest) usedInt usedFP
2659 ((ireg, freg) : regs) code
2660 | isFloatType arg_rep = do
2661 arg_code <- getAnyReg arg
2662 load_args_win rest (ireg : usedInt) (freg : usedFP) regs
2663 (code `appOL`
2664 arg_code freg `snocOL`
2665 -- If we are calling a varargs function
2666 -- then we need to define ireg as well
2667 -- as freg
2668 MOV II64 (OpReg freg) (OpReg ireg))
2669 | otherwise = do
2670 arg_code <- getAnyReg arg
2671 load_args_win rest (ireg : usedInt) usedFP regs
2672 (code `appOL` arg_code ireg)
2673 where
2674 arg_rep = cmmExprType dflags arg
2675
2676 push_args [] code = return code
2677 push_args (arg:rest) code
2678 | isFloatType arg_rep = do
2679 (arg_reg, arg_code) <- getSomeReg arg
2680 delta <- getDeltaNat
2681 setDeltaNat (delta-arg_size)
2682 let code' = code `appOL` arg_code `appOL` toOL [
2683 SUB (intFormat (wordWidth dflags)) (OpImm (ImmInt arg_size)) (OpReg rsp),
2684 DELTA (delta-arg_size),
2685 MOV (floatFormat width) (OpReg arg_reg) (OpAddr (spRel dflags 0))]
2686 push_args rest code'
2687
2688 | otherwise = do
2689 ASSERT(width == W64) return ()
2690 (arg_op, arg_code) <- getOperand arg
2691 delta <- getDeltaNat
2692 setDeltaNat (delta-arg_size)
2693 let code' = code `appOL` arg_code `appOL` toOL [
2694 PUSH II64 arg_op,
2695 DELTA (delta-arg_size)]
2696 push_args rest code'
2697 where
2698 arg_rep = cmmExprType dflags arg
2699 width = typeWidth arg_rep
2700
2701 leaveStackSpace n = do
2702 delta <- getDeltaNat
2703 setDeltaNat (delta - n * arg_size)
2704 return $ toOL [
2705 SUB II64 (OpImm (ImmInt (n * wORD_SIZE dflags))) (OpReg rsp),
2706 DELTA (delta - n * arg_size)]
2707
2708 maybePromoteCArg :: DynFlags -> Width -> CmmExpr -> CmmExpr
2709 maybePromoteCArg dflags wto arg
2710 | wfrom < wto = CmmMachOp (MO_UU_Conv wfrom wto) [arg]
2711 | otherwise = arg
2712 where
2713 wfrom = cmmExprWidth dflags arg
2714
2715 outOfLineCmmOp :: CallishMachOp -> Maybe CmmFormal -> [CmmActual] -> NatM InstrBlock
2716 outOfLineCmmOp mop res args
2717 = do
2718 dflags <- getDynFlags
2719 targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
2720 let target = ForeignTarget targetExpr
2721 (ForeignConvention CCallConv [] [] CmmMayReturn)
2722
2723 stmtToInstrs (CmmUnsafeForeignCall target (catMaybes [res]) args)
2724 where
2725 -- Assume we can call these functions directly, and that they're not in a dynamic library.
2726 -- TODO: Why is this ok? Under linux this code will be in libm.so
2727 -- Is it because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31
2728 lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
2729
2730 fn = case mop of
2731 MO_F32_Sqrt -> fsLit "sqrtf"
2732 MO_F32_Fabs -> fsLit "fabsf"
2733 MO_F32_Sin -> fsLit "sinf"
2734 MO_F32_Cos -> fsLit "cosf"
2735 MO_F32_Tan -> fsLit "tanf"
2736 MO_F32_Exp -> fsLit "expf"
2737 MO_F32_Log -> fsLit "logf"
2738
2739 MO_F32_Asin -> fsLit "asinf"
2740 MO_F32_Acos -> fsLit "acosf"
2741 MO_F32_Atan -> fsLit "atanf"
2742
2743 MO_F32_Sinh -> fsLit "sinhf"
2744 MO_F32_Cosh -> fsLit "coshf"
2745 MO_F32_Tanh -> fsLit "tanhf"
2746 MO_F32_Pwr -> fsLit "powf"
2747
2748 MO_F64_Sqrt -> fsLit "sqrt"
2749 MO_F64_Fabs -> fsLit "fabs"
2750 MO_F64_Sin -> fsLit "sin"
2751 MO_F64_Cos -> fsLit "cos"
2752 MO_F64_Tan -> fsLit "tan"
2753 MO_F64_Exp -> fsLit "exp"
2754 MO_F64_Log -> fsLit "log"
2755
2756 MO_F64_Asin -> fsLit "asin"
2757 MO_F64_Acos -> fsLit "acos"
2758 MO_F64_Atan -> fsLit "atan"
2759
2760 MO_F64_Sinh -> fsLit "sinh"
2761 MO_F64_Cosh -> fsLit "cosh"
2762 MO_F64_Tanh -> fsLit "tanh"
2763 MO_F64_Pwr -> fsLit "pow"
2764
2765 MO_Memcpy _ -> fsLit "memcpy"
2766 MO_Memset _ -> fsLit "memset"
2767 MO_Memmove _ -> fsLit "memmove"
2768 MO_Memcmp _ -> fsLit "memcmp"
2769
2770 MO_PopCnt _ -> fsLit "popcnt"
2771 MO_BSwap _ -> fsLit "bswap"
2772 MO_Clz w -> fsLit $ clzLabel w
2773 MO_Ctz _ -> unsupported
2774
2775 MO_Pdep w -> fsLit $ pdepLabel w
2776 MO_Pext w -> fsLit $ pextLabel w
2777
2778 MO_AtomicRMW _ _ -> fsLit "atomicrmw"
2779 MO_AtomicRead _ -> fsLit "atomicread"
2780 MO_AtomicWrite _ -> fsLit "atomicwrite"
2781 MO_Cmpxchg _ -> fsLit "cmpxchg"
2782
2783 MO_UF_Conv _ -> unsupported
2784
2785 MO_S_QuotRem {} -> unsupported
2786 MO_U_QuotRem {} -> unsupported
2787 MO_U_QuotRem2 {} -> unsupported
2788 MO_Add2 {} -> unsupported
2789 MO_AddIntC {} -> unsupported
2790 MO_SubIntC {} -> unsupported
2791 MO_SubWordC {} -> unsupported
2792 MO_U_Mul2 {} -> unsupported
2793 MO_WriteBarrier -> unsupported
2794 MO_Touch -> unsupported
2795 (MO_Prefetch_Data _ ) -> unsupported
2796 unsupported = panic ("outOfLineCmmOp: " ++ show mop
2797 ++ " not supported here")
2798
2799 -- -----------------------------------------------------------------------------
2800 -- Generating a table-branch
2801
2802 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
2803
2804 genSwitch dflags expr targets
2805 | positionIndependent dflags
2806 = do
2807 (reg,e_code) <- getNonClobberedReg (cmmOffset dflags expr offset)
2808 -- getNonClobberedReg because it needs to survive across t_code
2809 lbl <- getNewLabelNat
2810 dflags <- getDynFlags
2811 let is32bit = target32Bit (targetPlatform dflags)
2812 os = platformOS (targetPlatform dflags)
2813 -- Might want to use .rodata.<function we're in> instead, but as
2814 -- long as it's something unique it'll work out since the
2815 -- references to the jump table are in the appropriate section.
2816 rosection = case os of
2817 -- on Mac OS X/x86_64, put the jump table in the text section to
2818 -- work around a limitation of the linker.
2819 -- ld64 is unable to handle the relocations for
2820 -- .quad L1 - L0
2821 -- if L0 is not preceded by a non-anonymous label in its section.
2822 OSDarwin | not is32bit -> Section Text lbl
2823 _ -> Section ReadOnlyData lbl
2824 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2825 (tableReg,t_code) <- getSomeReg $ dynRef
2826 let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
2827 (EAIndex reg (wORD_SIZE dflags)) (ImmInt 0))
2828
2829 offsetReg <- getNewRegNat (intFormat (wordWidth dflags))
2830 return $ if is32bit || os == OSDarwin
2831 then e_code `appOL` t_code `appOL` toOL [
2832 ADD (intFormat (wordWidth dflags)) op (OpReg tableReg),
2833 JMP_TBL (OpReg tableReg) ids rosection lbl
2834 ]
2835 else -- HACK: On x86_64 binutils<2.17 is only able to generate
2836 -- PC32 relocations, hence we only get 32-bit offsets in
2837 -- the jump table. As these offsets are always negative
2838 -- we need to properly sign extend them to 64-bit. This
2839 -- hack should be removed in conjunction with the hack in
2840 -- PprMach.hs/pprDataItem once binutils 2.17 is standard.
2841 e_code `appOL` t_code `appOL` toOL [
2842 MOVSxL II32 op (OpReg offsetReg),
2843 ADD (intFormat (wordWidth dflags))
2844 (OpReg offsetReg)
2845 (OpReg tableReg),
2846 JMP_TBL (OpReg tableReg) ids rosection lbl
2847 ]
2848 | otherwise
2849 = do
2850 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2851 lbl <- getNewLabelNat
2852 let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg (wORD_SIZE dflags)) (ImmCLbl lbl))
2853 code = e_code `appOL` toOL [
2854 JMP_TBL op ids (Section ReadOnlyData lbl) lbl
2855 ]
2856 return code
2857 where (offset, ids) = switchTargetsToTable targets
2858
2859 generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl (Alignment, CmmStatics) Instr)
2860 generateJumpTableForInstr dflags (JMP_TBL _ ids section lbl)
2861 = Just (createJumpTable dflags ids section lbl)
2862 generateJumpTableForInstr _ _ = Nothing
2863
2864 createJumpTable :: DynFlags -> [Maybe BlockId] -> Section -> CLabel
2865 -> GenCmmDecl (Alignment, CmmStatics) h g
2866 createJumpTable dflags ids section lbl
2867 = let jumpTable
2868 | positionIndependent dflags =
2869 let jumpTableEntryRel Nothing
2870 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
2871 jumpTableEntryRel (Just blockid)
2872 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
2873 where blockLabel = blockLbl blockid
2874 in map jumpTableEntryRel ids
2875 | otherwise = map (jumpTableEntry dflags) ids
2876 in CmmData section (1, Statics lbl jumpTable)
2877
2878 extractUnwindPoints :: [Instr] -> [UnwindPoint]
2879 extractUnwindPoints instrs =
2880 [ UnwindPoint lbl unwinds | UNWIND lbl unwinds <- instrs]
2881
2882 -- -----------------------------------------------------------------------------
2883 -- 'condIntReg' and 'condFltReg': condition codes into registers
2884
2885 -- Turn those condition codes into integers now (when they appear on
2886 -- the right hand side of an assignment).
2887 --
2888 -- (If applicable) Do not fill the delay slots here; you will confuse the
2889 -- register allocator.
2890
2891 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2892
2893 condIntReg cond x y = do
2894 CondCode _ cond cond_code <- condIntCode cond x y
2895 tmp <- getNewRegNat II8
2896 let
2897 code dst = cond_code `appOL` toOL [
2898 SETCC cond (OpReg tmp),
2899 MOVZxL II8 (OpReg tmp) (OpReg dst)
2900 ]
2901 return (Any II32 code)
2902
2903
2904
2905 condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
2906 condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
2907 where
2908 condFltReg_x87 = do
2909 CondCode _ cond cond_code <- condFltCode cond x y
2910 tmp <- getNewRegNat II8
2911 let
2912 code dst = cond_code `appOL` toOL [
2913 SETCC cond (OpReg tmp),
2914 MOVZxL II8 (OpReg tmp) (OpReg dst)
2915 ]
2916 return (Any II32 code)
2917
2918 condFltReg_sse2 = do
2919 CondCode _ cond cond_code <- condFltCode cond x y
2920 tmp1 <- getNewRegNat (archWordFormat is32Bit)
2921 tmp2 <- getNewRegNat (archWordFormat is32Bit)
2922 let
2923 -- We have to worry about unordered operands (eg. comparisons
2924 -- against NaN). If the operands are unordered, the comparison
2925 -- sets the parity flag, carry flag and zero flag.
2926 -- All comparisons are supposed to return false for unordered
2927 -- operands except for !=, which returns true.
2928 --
2929 -- Optimisation: we don't have to test the parity flag if we
2930 -- know the test has already excluded the unordered case: eg >
2931 -- and >= test for a zero carry flag, which can only occur for
2932 -- ordered operands.
2933 --
2934 -- ToDo: by reversing comparisons we could avoid testing the
2935 -- parity flag in more cases.
2936
2937 code dst =
2938 cond_code `appOL`
2939 (case cond of
2940 NE -> or_unordered dst
2941 GU -> plain_test dst
2942 GEU -> plain_test dst
2943 _ -> and_ordered dst)
2944
2945 plain_test dst = toOL [
2946 SETCC cond (OpReg tmp1),
2947 MOVZxL II8 (OpReg tmp1) (OpReg dst)
2948 ]
2949 or_unordered dst = toOL [
2950 SETCC cond (OpReg tmp1),
2951 SETCC PARITY (OpReg tmp2),
2952 OR II8 (OpReg tmp1) (OpReg tmp2),
2953 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2954 ]
2955 and_ordered dst = toOL [
2956 SETCC cond (OpReg tmp1),
2957 SETCC NOTPARITY (OpReg tmp2),
2958 AND II8 (OpReg tmp1) (OpReg tmp2),
2959 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2960 ]
2961 return (Any II32 code)
2962
2963
2964 -- -----------------------------------------------------------------------------
2965 -- 'trivial*Code': deal with trivial instructions
2966
2967 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2968 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2969 -- Only look for constants on the right hand side, because that's
2970 -- where the generic optimizer will have put them.
2971
2972 -- Similarly, for unary instructions, we don't have to worry about
2973 -- matching an StInt as the argument, because genericOpt will already
2974 -- have handled the constant-folding.
2975
2976
2977 {-
2978 The Rules of the Game are:
2979
2980 * You cannot assume anything about the destination register dst;
2981 it may be anything, including a fixed reg.
2982
2983 * You may compute an operand into a fixed reg, but you may not
2984 subsequently change the contents of that fixed reg. If you
2985 want to do so, first copy the value either to a temporary
2986 or into dst. You are free to modify dst even if it happens
2987 to be a fixed reg -- that's not your problem.
2988
2989 * You cannot assume that a fixed reg will stay live over an
2990 arbitrary computation. The same applies to the dst reg.
2991
2992 * Temporary regs obtained from getNewRegNat are distinct from
2993 each other and from all other regs, and stay live over
2994 arbitrary computations.
2995
2996 --------------------
2997
2998 SDM's version of The Rules:
2999
3000 * If getRegister returns Any, that means it can generate correct
3001 code which places the result in any register, period. Even if that
3002 register happens to be read during the computation.
3003
3004 Corollary #1: this means that if you are generating code for an
3005 operation with two arbitrary operands, you cannot assign the result
3006 of the first operand into the destination register before computing
3007 the second operand. The second operand might require the old value
3008 of the destination register.
3009
3010 Corollary #2: A function might be able to generate more efficient
3011 code if it knows the destination register is a new temporary (and
3012 therefore not read by any of the sub-computations).
3013
3014 * If getRegister returns Any, then the code it generates may modify only:
3015 (a) fresh temporaries
3016 (b) the destination register
3017 (c) known registers (eg. %ecx is used by shifts)
3018 In particular, it may *not* modify global registers, unless the global
3019 register happens to be the destination register.
3020 -}
3021
3022 trivialCode :: Width -> (Operand -> Operand -> Instr)
3023 -> Maybe (Operand -> Operand -> Instr)
3024 -> CmmExpr -> CmmExpr -> NatM Register
3025 trivialCode width instr m a b
3026 = do is32Bit <- is32BitPlatform
3027 trivialCode' is32Bit width instr m a b
3028
3029 trivialCode' :: Bool -> Width -> (Operand -> Operand -> Instr)
3030 -> Maybe (Operand -> Operand -> Instr)
3031 -> CmmExpr -> CmmExpr -> NatM Register
3032 trivialCode' is32Bit width _ (Just revinstr) (CmmLit lit_a) b
3033 | is32BitLit is32Bit lit_a = do
3034 b_code <- getAnyReg b
3035 let
3036 code dst
3037 = b_code dst `snocOL`
3038 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
3039 return (Any (intFormat width) code)
3040
3041 trivialCode' _ width instr _ a b
3042 = genTrivialCode (intFormat width) instr a b
3043
3044 -- This is re-used for floating pt instructions too.
3045 genTrivialCode :: Format -> (Operand -> Operand -> Instr)
3046 -> CmmExpr -> CmmExpr -> NatM Register
3047 genTrivialCode rep instr a b = do
3048 (b_op, b_code) <- getNonClobberedOperand b
3049 a_code <- getAnyReg a
3050 tmp <- getNewRegNat rep
3051 let
3052 -- We want the value of b to stay alive across the computation of a.
3053 -- But, we want to calculate a straight into the destination register,
3054 -- because the instruction only has two operands (dst := dst `op` src).
3055 -- The troublesome case is when the result of b is in the same register
3056 -- as the destination reg. In this case, we have to save b in a
3057 -- new temporary across the computation of a.
3058 code dst
3059 | dst `regClashesWithOp` b_op =
3060 b_code `appOL`
3061 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
3062 a_code dst `snocOL`
3063 instr (OpReg tmp) (OpReg dst)
3064 | otherwise =
3065 b_code `appOL`
3066 a_code dst `snocOL`
3067 instr b_op (OpReg dst)
3068 return (Any rep code)
3069
3070 regClashesWithOp :: Reg -> Operand -> Bool
3071 reg `regClashesWithOp` OpReg reg2 = reg == reg2
3072 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
3073 _ `regClashesWithOp` _ = False
3074
3075 -----------
3076
3077 trivialUCode :: Format -> (Operand -> Instr)
3078 -> CmmExpr -> NatM Register
3079 trivialUCode rep instr x = do
3080 x_code <- getAnyReg x
3081 let
3082 code dst =
3083 x_code dst `snocOL`
3084 instr (OpReg dst)
3085 return (Any rep code)
3086
3087 -----------
3088
3089 trivialFCode_x87 :: (Format -> Reg -> Reg -> Reg -> Instr)
3090 -> CmmExpr -> CmmExpr -> NatM Register
3091 trivialFCode_x87 instr x y = do
3092 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
3093 (y_reg, y_code) <- getSomeReg y
3094 let
3095 format = FF80 -- always, on x87
3096 code dst =
3097 x_code `appOL`
3098 y_code `snocOL`
3099 instr format x_reg y_reg dst
3100 return (Any format code)
3101
3102 trivialFCode_sse2 :: Width -> (Format -> Operand -> Operand -> Instr)
3103 -> CmmExpr -> CmmExpr -> NatM Register
3104 trivialFCode_sse2 pk instr x y
3105 = genTrivialCode format (instr format) x y
3106 where format = floatFormat pk
3107
3108
3109 trivialUFCode :: Format -> (Reg -> Reg -> Instr) -> CmmExpr -> NatM Register
3110 trivialUFCode format instr x = do
3111 (x_reg, x_code) <- getSomeReg x
3112 let
3113 code dst =
3114 x_code `snocOL`
3115 instr x_reg dst
3116 return (Any format code)
3117
3118
3119 --------------------------------------------------------------------------------
3120 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
3121 coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
3122 where
3123 coerce_x87 = do
3124 (x_reg, x_code) <- getSomeReg x
3125 let
3126 opc = case to of W32 -> GITOF; W64 -> GITOD;
3127 n -> panic $ "coerceInt2FP.x87: unhandled width ("
3128 ++ show n ++ ")"
3129 code dst = x_code `snocOL` opc x_reg dst
3130 -- ToDo: works for non-II32 reps?
3131 return (Any FF80 code)
3132
3133 coerce_sse2 = do
3134 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
3135 let
3136 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
3137 n -> panic $ "coerceInt2FP.sse: unhandled width ("
3138 ++ show n ++ ")"
3139 code dst = x_code `snocOL` opc (intFormat from) x_op dst
3140 return (Any (floatFormat to) code)
3141 -- works even if the destination rep is <II32
3142
3143 --------------------------------------------------------------------------------
3144 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
3145 coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
3146 where
3147 coerceFP2Int_x87 = do
3148 (x_reg, x_code) <- getSomeReg x
3149 let
3150 opc = case from of W32 -> GFTOI; W64 -> GDTOI
3151 n -> panic $ "coerceFP2Int.x87: unhandled width ("
3152 ++ show n ++ ")"
3153 code dst = x_code `snocOL` opc x_reg dst
3154 -- ToDo: works for non-II32 reps?
3155 return (Any (intFormat to) code)
3156
3157 coerceFP2Int_sse2 = do
3158 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
3159 let
3160 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ;
3161 n -> panic $ "coerceFP2Init.sse: unhandled width ("
3162 ++ show n ++ ")"
3163 code dst = x_code `snocOL` opc (intFormat to) x_op dst
3164 return (Any (intFormat to) code)
3165 -- works even if the destination rep is <II32
3166
3167
3168 --------------------------------------------------------------------------------
3169 coerceFP2FP :: Width -> CmmExpr -> NatM Register
3170 coerceFP2FP to x = do
3171 use_sse2 <- sse2Enabled
3172 (x_reg, x_code) <- getSomeReg x
3173 let
3174 opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
3175 n -> panic $ "coerceFP2FP: unhandled width ("
3176 ++ show n ++ ")"
3177 | otherwise = GDTOF
3178 code dst = x_code `snocOL` opc x_reg dst
3179 return (Any (if use_sse2 then floatFormat to else FF80) code)
3180
3181 --------------------------------------------------------------------------------
3182
3183 sse2NegCode :: Width -> CmmExpr -> NatM Register
3184 sse2NegCode w x = do
3185 let fmt = floatFormat w
3186 x_code <- getAnyReg x
3187 -- This is how gcc does it, so it can't be that bad:
3188 let
3189 const = case fmt of
3190 FF32 -> CmmInt 0x80000000 W32
3191 FF64 -> CmmInt 0x8000000000000000 W64
3192 x@II8 -> wrongFmt x
3193 x@II16 -> wrongFmt x
3194 x@II32 -> wrongFmt x
3195 x@II64 -> wrongFmt x
3196 x@FF80 -> wrongFmt x
3197 where
3198 wrongFmt x = panic $ "sse2NegCode: " ++ show x
3199 Amode amode amode_code <- memConstant (widthInBytes w) const
3200 tmp <- getNewRegNat fmt
3201 let
3202 code dst = x_code dst `appOL` amode_code `appOL` toOL [
3203 MOV fmt (OpAddr amode) (OpReg tmp),
3204 XOR fmt (OpReg tmp) (OpReg dst)
3205 ]
3206 --
3207 return (Any fmt code)
3208
3209 isVecExpr :: CmmExpr -> Bool
3210 isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True
3211 isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True
3212 isVecExpr (CmmMachOp (MO_V_Add {}) _) = True
3213 isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True
3214 isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True
3215 isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True
3216 isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True
3217 isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True
3218 isVecExpr (CmmMachOp (MO_VF_Insert {}) _) = True
3219 isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True
3220 isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
3221 isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True
3222 isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True
3223 isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True
3224 isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True
3225 isVecExpr (CmmMachOp _ [e]) = isVecExpr e
3226 isVecExpr _ = False
3227
3228 needLlvm :: NatM a
3229 needLlvm =
3230 sorry $ unlines ["The native code generator does not support vector"
3231 ,"instructions. Please use -fllvm."]