d6fdee13f63ab1016059c26f3b2f49bff6e119cf
[ghc.git] / compiler / nativeGen / X86 / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2004
8 --
9 -----------------------------------------------------------------------------
10
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, and (b) the type signatures, the
13 -- structure should not be too overwhelming.
14
15 module X86.CodeGen (
16 cmmTopCodeGen,
17 generateJumpTableForInstr,
18 InstrBlock
19 )
20
21 where
22
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
25 #include "../includes/MachDeps.h"
26
27 -- NCG stuff:
28 import X86.Instr
29 import X86.Cond
30 import X86.Regs
31 import X86.RegInfo
32 import CodeGen.Platform
33 import CPrim
34 import Instruction
35 import PIC
36 import NCGMonad
37 import Size
38 import Reg
39 import Platform
40
41 -- Our intermediate code:
42 import BasicTypes
43 import BlockId
44 import Module ( primPackageKey )
45 import PprCmm ()
46 import CmmUtils
47 import Cmm
48 import Hoopl
49 import CLabel
50
51 -- The rest:
52 import ForeignCall ( CCallConv(..) )
53 import OrdList
54 import Outputable
55 import Unique
56 import FastString
57 import FastBool ( isFastTrue )
58 import DynFlags
59 import Util
60
61 import Control.Monad
62 import Data.Bits
63 import Data.Int
64 import Data.Maybe
65 import Data.Word
66
67 is32BitPlatform :: NatM Bool
68 is32BitPlatform = do
69 dflags <- getDynFlags
70 return $ target32Bit (targetPlatform dflags)
71
72 sse2Enabled :: NatM Bool
73 sse2Enabled = do
74 dflags <- getDynFlags
75 return (isSse2Enabled dflags)
76
77 sse4_2Enabled :: NatM Bool
78 sse4_2Enabled = do
79 dflags <- getDynFlags
80 return (isSse4_2Enabled dflags)
81
82 if_sse2 :: NatM a -> NatM a -> NatM a
83 if_sse2 sse2 x87 = do
84 b <- sse2Enabled
85 if b then sse2 else x87
86
87 cmmTopCodeGen
88 :: RawCmmDecl
89 -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
90
91 cmmTopCodeGen (CmmProc info lab live graph) = do
92 let blocks = toBlockListEntryFirst graph
93 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
94 picBaseMb <- getPicBaseMaybeNat
95 dflags <- getDynFlags
96 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
97 tops = proc : concat statics
98 os = platformOS $ targetPlatform dflags
99
100 case picBaseMb of
101 Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
102 Nothing -> return tops
103
104 cmmTopCodeGen (CmmData sec dat) = do
105 return [CmmData sec (1, dat)] -- no translation, we just use CmmStatic
106
107
108 basicBlockCodeGen
109 :: CmmBlock
110 -> NatM ( [NatBasicBlock Instr]
111 , [NatCmmDecl (Alignment, CmmStatics) Instr])
112
113 basicBlockCodeGen block = do
114 let (CmmEntry id, nodes, tail) = blockSplit block
115 stmts = blockToList nodes
116 mid_instrs <- stmtsToInstrs stmts
117 tail_instrs <- stmtToInstrs tail
118 let instrs = mid_instrs `appOL` tail_instrs
119 -- code generation may introduce new basic block boundaries, which
120 -- are indicated by the NEWBLOCK instruction. We must split up the
121 -- instruction stream into basic blocks again. Also, we extract
122 -- LDATAs here too.
123 let
124 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
125
126 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
127 = ([], BasicBlock id instrs : blocks, statics)
128 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
129 = (instrs, blocks, CmmData sec dat:statics)
130 mkBlocks instr (instrs,blocks,statics)
131 = (instr:instrs, blocks, statics)
132 return (BasicBlock id top : other_blocks, statics)
133
134
135 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
136 stmtsToInstrs stmts
137 = do instrss <- mapM stmtToInstrs stmts
138 return (concatOL instrss)
139
140
141 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
142 stmtToInstrs stmt = do
143 dflags <- getDynFlags
144 is32Bit <- is32BitPlatform
145 case stmt of
146 CmmComment s -> return (unitOL (COMMENT s))
147
148 CmmAssign reg src
149 | isFloatType ty -> assignReg_FltCode size reg src
150 | is32Bit && isWord64 ty -> assignReg_I64Code reg src
151 | otherwise -> assignReg_IntCode size reg src
152 where ty = cmmRegType dflags reg
153 size = cmmTypeSize ty
154
155 CmmStore addr src
156 | isFloatType ty -> assignMem_FltCode size addr src
157 | is32Bit && isWord64 ty -> assignMem_I64Code addr src
158 | otherwise -> assignMem_IntCode size addr src
159 where ty = cmmExprType dflags src
160 size = cmmTypeSize ty
161
162 CmmUnsafeForeignCall target result_regs args
163 -> genCCall dflags is32Bit target result_regs args
164
165 CmmBranch id -> genBranch id
166 CmmCondBranch arg true false -> do b1 <- genCondJump true arg
167 b2 <- genBranch false
168 return (b1 `appOL` b2)
169 CmmSwitch arg ids -> do dflags <- getDynFlags
170 genSwitch dflags arg ids
171 CmmCall { cml_target = arg
172 , cml_args_regs = gregs } -> do
173 dflags <- getDynFlags
174 genJump arg (jumpRegs dflags gregs)
175 _ ->
176 panic "stmtToInstrs: statement should have been cps'd away"
177
178
179 jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
180 jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
181 where platform = targetPlatform dflags
182
183 --------------------------------------------------------------------------------
184 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
185 -- They are really trees of insns to facilitate fast appending, where a
186 -- left-to-right traversal yields the insns in the correct order.
187 --
188 type InstrBlock
189 = OrdList Instr
190
191
192 -- | Condition codes passed up the tree.
193 --
194 data CondCode
195 = CondCode Bool Cond InstrBlock
196
197
198 -- | a.k.a "Register64"
199 -- Reg is the lower 32-bit temporary which contains the result.
200 -- Use getHiVRegFromLo to find the other VRegUnique.
201 --
202 -- Rules of this simplified insn selection game are therefore that
203 -- the returned Reg may be modified
204 --
205 data ChildCode64
206 = ChildCode64
207 InstrBlock
208 Reg
209
210
211 -- | Register's passed up the tree. If the stix code forces the register
212 -- to live in a pre-decided machine register, it comes out as @Fixed@;
213 -- otherwise, it comes out as @Any@, and the parent can decide which
214 -- register to put it in.
215 --
216 data Register
217 = Fixed Size Reg InstrBlock
218 | Any Size (Reg -> InstrBlock)
219
220
221 swizzleRegisterRep :: Register -> Size -> Register
222 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
223 swizzleRegisterRep (Any _ codefn) size = Any size codefn
224
225
226 -- | Grab the Reg for a CmmReg
227 getRegisterReg :: Platform -> Bool -> CmmReg -> Reg
228
229 getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk))
230 = let sz = cmmTypeSize pk in
231 if isFloatSize sz && not use_sse2
232 then RegVirtual (mkVirtualReg u FF80)
233 else RegVirtual (mkVirtualReg u sz)
234
235 getRegisterReg platform _ (CmmGlobal mid)
236 = case globalRegMaybe platform mid of
237 Just reg -> RegReal $ reg
238 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
239 -- By this stage, the only MagicIds remaining should be the
240 -- ones which map to a real machine register on this
241 -- platform. Hence ...
242
243
244 -- | Memory addressing modes passed up the tree.
245 data Amode
246 = Amode AddrMode InstrBlock
247
248 {-
249 Now, given a tree (the argument to an CmmLoad) that references memory,
250 produce a suitable addressing mode.
251
252 A Rule of the Game (tm) for Amodes: use of the addr bit must
253 immediately follow use of the code part, since the code part puts
254 values in registers which the addr then refers to. So you can't put
255 anything in between, lest it overwrite some of those registers. If
256 you need to do some other computation between the code part and use of
257 the addr bit, first store the effective address from the amode in a
258 temporary, then do the other computation, and then use the temporary:
259
260 code
261 LEA amode, tmp
262 ... other computation ...
263 ... (tmp) ...
264 -}
265
266
267 -- | Check whether an integer will fit in 32 bits.
268 -- A CmmInt is intended to be truncated to the appropriate
269 -- number of bits, so here we truncate it to Int64. This is
270 -- important because e.g. -1 as a CmmInt might be either
271 -- -1 or 18446744073709551615.
272 --
273 is32BitInteger :: Integer -> Bool
274 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
275 where i64 = fromIntegral i :: Int64
276
277
278 -- | Convert a BlockId to some CmmStatic data
279 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
280 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
281 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
282 where blockLabel = mkAsmTempLabel (getUnique blockid)
283
284
285 -- -----------------------------------------------------------------------------
286 -- General things for putting together code sequences
287
288 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
289 -- CmmExprs into CmmRegOff?
290 mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
291 mangleIndexTree dflags reg off
292 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
293 where width = typeWidth (cmmRegType dflags reg)
294
295 -- | The dual to getAnyReg: compute an expression into a register, but
296 -- we don't mind which one it is.
297 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
298 getSomeReg expr = do
299 r <- getRegister expr
300 case r of
301 Any rep code -> do
302 tmp <- getNewRegNat rep
303 return (tmp, code tmp)
304 Fixed _ reg code ->
305 return (reg, code)
306
307
308 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
309 assignMem_I64Code addrTree valueTree = do
310 Amode addr addr_code <- getAmode addrTree
311 ChildCode64 vcode rlo <- iselExpr64 valueTree
312 let
313 rhi = getHiVRegFromLo rlo
314
315 -- Little-endian store
316 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
317 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
318 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
319
320
321 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
322 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
323 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
324 let
325 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
326 r_dst_hi = getHiVRegFromLo r_dst_lo
327 r_src_hi = getHiVRegFromLo r_src_lo
328 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
329 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
330 return (
331 vcode `snocOL` mov_lo `snocOL` mov_hi
332 )
333
334 assignReg_I64Code _ _
335 = panic "assignReg_I64Code(i386): invalid lvalue"
336
337
338 iselExpr64 :: CmmExpr -> NatM ChildCode64
339 iselExpr64 (CmmLit (CmmInt i _)) = do
340 (rlo,rhi) <- getNewRegPairNat II32
341 let
342 r = fromIntegral (fromIntegral i :: Word32)
343 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
344 code = toOL [
345 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
346 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
347 ]
348 return (ChildCode64 code rlo)
349
350 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
351 Amode addr addr_code <- getAmode addrTree
352 (rlo,rhi) <- getNewRegPairNat II32
353 let
354 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
355 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
356 return (
357 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
358 rlo
359 )
360
361 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
362 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
363
364 -- we handle addition, but rather badly
365 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
366 ChildCode64 code1 r1lo <- iselExpr64 e1
367 (rlo,rhi) <- getNewRegPairNat II32
368 let
369 r = fromIntegral (fromIntegral i :: Word32)
370 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
371 r1hi = getHiVRegFromLo r1lo
372 code = code1 `appOL`
373 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
374 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
375 MOV II32 (OpReg r1hi) (OpReg rhi),
376 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
377 return (ChildCode64 code rlo)
378
379 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
380 ChildCode64 code1 r1lo <- iselExpr64 e1
381 ChildCode64 code2 r2lo <- iselExpr64 e2
382 (rlo,rhi) <- getNewRegPairNat II32
383 let
384 r1hi = getHiVRegFromLo r1lo
385 r2hi = getHiVRegFromLo r2lo
386 code = code1 `appOL`
387 code2 `appOL`
388 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
389 ADD II32 (OpReg r2lo) (OpReg rlo),
390 MOV II32 (OpReg r1hi) (OpReg rhi),
391 ADC II32 (OpReg r2hi) (OpReg rhi) ]
392 return (ChildCode64 code rlo)
393
394 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
395 fn <- getAnyReg expr
396 r_dst_lo <- getNewRegNat II32
397 let r_dst_hi = getHiVRegFromLo r_dst_lo
398 code = fn r_dst_lo
399 return (
400 ChildCode64 (code `snocOL`
401 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
402 r_dst_lo
403 )
404
405 iselExpr64 expr
406 = pprPanic "iselExpr64(i386)" (ppr expr)
407
408
409 --------------------------------------------------------------------------------
410 getRegister :: CmmExpr -> NatM Register
411 getRegister e = do dflags <- getDynFlags
412 is32Bit <- is32BitPlatform
413 getRegister' dflags is32Bit e
414
415 getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
416
417 getRegister' dflags is32Bit (CmmReg reg)
418 = case reg of
419 CmmGlobal PicBaseReg
420 | is32Bit ->
421 -- on x86_64, we have %rip for PicBaseReg, but it's not
422 -- a full-featured register, it can only be used for
423 -- rip-relative addressing.
424 do reg' <- getPicBaseNat (archWordSize is32Bit)
425 return (Fixed (archWordSize is32Bit) reg' nilOL)
426 _ ->
427 do use_sse2 <- sse2Enabled
428 let
429 sz = cmmTypeSize (cmmRegType dflags reg)
430 size | not use_sse2 && isFloatSize sz = FF80
431 | otherwise = sz
432 --
433 let platform = targetPlatform dflags
434 return (Fixed size (getRegisterReg platform use_sse2 reg) nilOL)
435
436
437 getRegister' dflags is32Bit (CmmRegOff r n)
438 = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
439
440 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
441 -- TO_W_(x), TO_W_(x >> 32)
442
443 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
444 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
445 | is32Bit = do
446 ChildCode64 code rlo <- iselExpr64 x
447 return $ Fixed II32 (getHiVRegFromLo rlo) code
448
449 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
450 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
451 | is32Bit = do
452 ChildCode64 code rlo <- iselExpr64 x
453 return $ Fixed II32 (getHiVRegFromLo rlo) code
454
455 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
456 | is32Bit = do
457 ChildCode64 code rlo <- iselExpr64 x
458 return $ Fixed II32 rlo code
459
460 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
461 | is32Bit = do
462 ChildCode64 code rlo <- iselExpr64 x
463 return $ Fixed II32 rlo code
464
465 getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
466 if_sse2 float_const_sse2 float_const_x87
467 where
468 float_const_sse2
469 | f == 0.0 = do
470 let
471 size = floatSize w
472 code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
473 -- I don't know why there are xorpd, xorps, and pxor instructions.
474 -- They all appear to do the same thing --SDM
475 return (Any size code)
476
477 | otherwise = do
478 Amode addr code <- memConstant (widthInBytes w) lit
479 loadFloatAmode True w addr code
480
481 float_const_x87 = case w of
482 W64
483 | f == 0.0 ->
484 let code dst = unitOL (GLDZ dst)
485 in return (Any FF80 code)
486
487 | f == 1.0 ->
488 let code dst = unitOL (GLD1 dst)
489 in return (Any FF80 code)
490
491 _otherwise -> do
492 Amode addr code <- memConstant (widthInBytes w) lit
493 loadFloatAmode False w addr code
494
495 -- catch simple cases of zero- or sign-extended load
496 getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
497 code <- intLoadCode (MOVZxL II8) addr
498 return (Any II32 code)
499
500 getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
501 code <- intLoadCode (MOVSxL II8) addr
502 return (Any II32 code)
503
504 getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
505 code <- intLoadCode (MOVZxL II16) addr
506 return (Any II32 code)
507
508 getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
509 code <- intLoadCode (MOVSxL II16) addr
510 return (Any II32 code)
511
512 -- catch simple cases of zero- or sign-extended load
513 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
514 | not is32Bit = do
515 code <- intLoadCode (MOVZxL II8) addr
516 return (Any II64 code)
517
518 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
519 | not is32Bit = do
520 code <- intLoadCode (MOVSxL II8) addr
521 return (Any II64 code)
522
523 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
524 | not is32Bit = do
525 code <- intLoadCode (MOVZxL II16) addr
526 return (Any II64 code)
527
528 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
529 | not is32Bit = do
530 code <- intLoadCode (MOVSxL II16) addr
531 return (Any II64 code)
532
533 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
534 | not is32Bit = do
535 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
536 return (Any II64 code)
537
538 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
539 | not is32Bit = do
540 code <- intLoadCode (MOVSxL II32) addr
541 return (Any II64 code)
542
543 getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
544 CmmLit displacement])
545 | not is32Bit = do
546 return $ Any II64 (\dst -> unitOL $
547 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
548
549 getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
550 sse2 <- sse2Enabled
551 case mop of
552 MO_F_Neg w
553 | sse2 -> sse2NegCode w x
554 | otherwise -> trivialUFCode FF80 (GNEG FF80) x
555
556 MO_S_Neg w -> triv_ucode NEGI (intSize w)
557 MO_Not w -> triv_ucode NOT (intSize w)
558
559 -- Nop conversions
560 MO_UU_Conv W32 W8 -> toI8Reg W32 x
561 MO_SS_Conv W32 W8 -> toI8Reg W32 x
562 MO_UU_Conv W16 W8 -> toI8Reg W16 x
563 MO_SS_Conv W16 W8 -> toI8Reg W16 x
564 MO_UU_Conv W32 W16 -> toI16Reg W32 x
565 MO_SS_Conv W32 W16 -> toI16Reg W32 x
566
567 MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
568 MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
569 MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
570 MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
571 MO_UU_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
572 MO_SS_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
573
574 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
575 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
576
577 -- widenings
578 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
579 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
580 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
581
582 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
583 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
584 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
585
586 MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x
587 MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
588 MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
589 MO_SS_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVSxL x
590 MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
591 MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
592 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
593 -- However, we don't want the register allocator to throw it
594 -- away as an unnecessary reg-to-reg move, so we keep it in
595 -- the form of a movzl and print it as a movl later.
596
597 MO_FF_Conv W32 W64
598 | sse2 -> coerceFP2FP W64 x
599 | otherwise -> conversionNop FF80 x
600
601 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
602
603 MO_FS_Conv from to -> coerceFP2Int from to x
604 MO_SF_Conv from to -> coerceInt2FP from to x
605
606 MO_V_Insert {} -> needLlvm
607 MO_V_Extract {} -> needLlvm
608 MO_V_Add {} -> needLlvm
609 MO_V_Sub {} -> needLlvm
610 MO_V_Mul {} -> needLlvm
611 MO_VS_Quot {} -> needLlvm
612 MO_VS_Rem {} -> needLlvm
613 MO_VS_Neg {} -> needLlvm
614 MO_VU_Quot {} -> needLlvm
615 MO_VU_Rem {} -> needLlvm
616 MO_VF_Insert {} -> needLlvm
617 MO_VF_Extract {} -> needLlvm
618 MO_VF_Add {} -> needLlvm
619 MO_VF_Sub {} -> needLlvm
620 MO_VF_Mul {} -> needLlvm
621 MO_VF_Quot {} -> needLlvm
622 MO_VF_Neg {} -> needLlvm
623
624 _other -> pprPanic "getRegister" (pprMachOp mop)
625 where
626 triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
627 triv_ucode instr size = trivialUCode size (instr size) x
628
629 -- signed or unsigned extension.
630 integerExtend :: Width -> Width
631 -> (Size -> Operand -> Operand -> Instr)
632 -> CmmExpr -> NatM Register
633 integerExtend from to instr expr = do
634 (reg,e_code) <- if from == W8 then getByteReg expr
635 else getSomeReg expr
636 let
637 code dst =
638 e_code `snocOL`
639 instr (intSize from) (OpReg reg) (OpReg dst)
640 return (Any (intSize to) code)
641
642 toI8Reg :: Width -> CmmExpr -> NatM Register
643 toI8Reg new_rep expr
644 = do codefn <- getAnyReg expr
645 return (Any (intSize new_rep) codefn)
646 -- HACK: use getAnyReg to get a byte-addressable register.
647 -- If the source was a Fixed register, this will add the
648 -- mov instruction to put it into the desired destination.
649 -- We're assuming that the destination won't be a fixed
650 -- non-byte-addressable register; it won't be, because all
651 -- fixed registers are word-sized.
652
653 toI16Reg = toI8Reg -- for now
654
655 conversionNop :: Size -> CmmExpr -> NatM Register
656 conversionNop new_size expr
657 = do e_code <- getRegister' dflags is32Bit expr
658 return (swizzleRegisterRep e_code new_size)
659
660
661 getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
662 sse2 <- sse2Enabled
663 case mop of
664 MO_F_Eq _ -> condFltReg is32Bit EQQ x y
665 MO_F_Ne _ -> condFltReg is32Bit NE x y
666 MO_F_Gt _ -> condFltReg is32Bit GTT x y
667 MO_F_Ge _ -> condFltReg is32Bit GE x y
668 MO_F_Lt _ -> condFltReg is32Bit LTT x y
669 MO_F_Le _ -> condFltReg is32Bit LE x y
670
671 MO_Eq _ -> condIntReg EQQ x y
672 MO_Ne _ -> condIntReg NE x y
673
674 MO_S_Gt _ -> condIntReg GTT x y
675 MO_S_Ge _ -> condIntReg GE x y
676 MO_S_Lt _ -> condIntReg LTT x y
677 MO_S_Le _ -> condIntReg LE x y
678
679 MO_U_Gt _ -> condIntReg GU x y
680 MO_U_Ge _ -> condIntReg GEU x y
681 MO_U_Lt _ -> condIntReg LU x y
682 MO_U_Le _ -> condIntReg LEU x y
683
684 MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y
685 | otherwise -> trivialFCode_x87 GADD x y
686 MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y
687 | otherwise -> trivialFCode_x87 GSUB x y
688 MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y
689 | otherwise -> trivialFCode_x87 GDIV x y
690 MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y
691 | otherwise -> trivialFCode_x87 GMUL x y
692
693 MO_Add rep -> add_code rep x y
694 MO_Sub rep -> sub_code rep x y
695
696 MO_S_Quot rep -> div_code rep True True x y
697 MO_S_Rem rep -> div_code rep True False x y
698 MO_U_Quot rep -> div_code rep False True x y
699 MO_U_Rem rep -> div_code rep False False x y
700
701 MO_S_MulMayOflo rep -> imulMayOflo rep x y
702
703 MO_Mul rep -> triv_op rep IMUL
704 MO_And rep -> triv_op rep AND
705 MO_Or rep -> triv_op rep OR
706 MO_Xor rep -> triv_op rep XOR
707
708 {- Shift ops on x86s have constraints on their source, it
709 either has to be Imm, CL or 1
710 => trivialCode is not restrictive enough (sigh.)
711 -}
712 MO_Shl rep -> shift_code rep SHL x y {-False-}
713 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
714 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
715
716 MO_V_Insert {} -> needLlvm
717 MO_V_Extract {} -> needLlvm
718 MO_V_Add {} -> needLlvm
719 MO_V_Sub {} -> needLlvm
720 MO_V_Mul {} -> needLlvm
721 MO_VS_Quot {} -> needLlvm
722 MO_VS_Rem {} -> needLlvm
723 MO_VS_Neg {} -> needLlvm
724 MO_VF_Insert {} -> needLlvm
725 MO_VF_Extract {} -> needLlvm
726 MO_VF_Add {} -> needLlvm
727 MO_VF_Sub {} -> needLlvm
728 MO_VF_Mul {} -> needLlvm
729 MO_VF_Quot {} -> needLlvm
730 MO_VF_Neg {} -> needLlvm
731
732 _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
733 where
734 --------------------
735 triv_op width instr = trivialCode width op (Just op) x y
736 where op = instr (intSize width)
737
738 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
739 imulMayOflo rep a b = do
740 (a_reg, a_code) <- getNonClobberedReg a
741 b_code <- getAnyReg b
742 let
743 shift_amt = case rep of
744 W32 -> 31
745 W64 -> 63
746 _ -> panic "shift_amt"
747
748 size = intSize rep
749 code = a_code `appOL` b_code eax `appOL`
750 toOL [
751 IMUL2 size (OpReg a_reg), -- result in %edx:%eax
752 SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
753 -- sign extend lower part
754 SUB size (OpReg edx) (OpReg eax)
755 -- compare against upper
756 -- eax==0 if high part == sign extended low part
757 ]
758 return (Fixed size eax code)
759
760 --------------------
761 shift_code :: Width
762 -> (Size -> Operand -> Operand -> Instr)
763 -> CmmExpr
764 -> CmmExpr
765 -> NatM Register
766
767 {- Case1: shift length as immediate -}
768 shift_code width instr x (CmmLit lit) = do
769 x_code <- getAnyReg x
770 let
771 size = intSize width
772 code dst
773 = x_code dst `snocOL`
774 instr size (OpImm (litToImm lit)) (OpReg dst)
775 return (Any size code)
776
777 {- Case2: shift length is complex (non-immediate)
778 * y must go in %ecx.
779 * we cannot do y first *and* put its result in %ecx, because
780 %ecx might be clobbered by x.
781 * if we do y second, then x cannot be
782 in a clobbered reg. Also, we cannot clobber x's reg
783 with the instruction itself.
784 * so we can either:
785 - do y first, put its result in a fresh tmp, then copy it to %ecx later
786 - do y second and put its result into %ecx. x gets placed in a fresh
787 tmp. This is likely to be better, because the reg alloc can
788 eliminate this reg->reg move here (it won't eliminate the other one,
789 because the move is into the fixed %ecx).
790 -}
791 shift_code width instr x y{-amount-} = do
792 x_code <- getAnyReg x
793 let size = intSize width
794 tmp <- getNewRegNat size
795 y_code <- getAnyReg y
796 let
797 code = x_code tmp `appOL`
798 y_code ecx `snocOL`
799 instr size (OpReg ecx) (OpReg tmp)
800 return (Fixed size tmp code)
801
802 --------------------
803 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
804 add_code rep x (CmmLit (CmmInt y _))
805 | is32BitInteger y = add_int rep x y
806 add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
807 where size = intSize rep
808 -- TODO: There are other interesting patterns we want to replace
809 -- with a LEA, e.g. `(x + offset) + (y << shift)`.
810
811 --------------------
812 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
813 sub_code rep x (CmmLit (CmmInt y _))
814 | is32BitInteger (-y) = add_int rep x (-y)
815 sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
816
817 -- our three-operand add instruction:
818 add_int width x y = do
819 (x_reg, x_code) <- getSomeReg x
820 let
821 size = intSize width
822 imm = ImmInt (fromInteger y)
823 code dst
824 = x_code `snocOL`
825 LEA size
826 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
827 (OpReg dst)
828 --
829 return (Any size code)
830
831 ----------------------
832 div_code width signed quotient x y = do
833 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
834 x_code <- getAnyReg x
835 let
836 size = intSize width
837 widen | signed = CLTD size
838 | otherwise = XOR size (OpReg edx) (OpReg edx)
839
840 instr | signed = IDIV
841 | otherwise = DIV
842
843 code = y_code `appOL`
844 x_code eax `appOL`
845 toOL [widen, instr size y_op]
846
847 result | quotient = eax
848 | otherwise = edx
849
850 return (Fixed size result code)
851
852
853 getRegister' _ _ (CmmLoad mem pk)
854 | isFloatType pk
855 = do
856 Amode addr mem_code <- getAmode mem
857 use_sse2 <- sse2Enabled
858 loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
859
860 getRegister' _ is32Bit (CmmLoad mem pk)
861 | is32Bit && not (isWord64 pk)
862 = do
863 code <- intLoadCode instr mem
864 return (Any size code)
865 where
866 width = typeWidth pk
867 size = intSize width
868 instr = case width of
869 W8 -> MOVZxL II8
870 _other -> MOV size
871 -- We always zero-extend 8-bit loads, if we
872 -- can't think of anything better. This is because
873 -- we can't guarantee access to an 8-bit variant of every register
874 -- (esi and edi don't have 8-bit variants), so to make things
875 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
876
877 -- Simpler memory load code on x86_64
878 getRegister' _ is32Bit (CmmLoad mem pk)
879 | not is32Bit
880 = do
881 code <- intLoadCode (MOV size) mem
882 return (Any size code)
883 where size = intSize $ typeWidth pk
884
885 getRegister' _ is32Bit (CmmLit (CmmInt 0 width))
886 = let
887 size = intSize width
888
889 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
890 size1 = if is32Bit then size
891 else case size of
892 II64 -> II32
893 _ -> size
894 code dst
895 = unitOL (XOR size1 (OpReg dst) (OpReg dst))
896 in
897 return (Any size code)
898
899 -- optimisation for loading small literals on x86_64: take advantage
900 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
901 -- instruction forms are shorter.
902 getRegister' dflags is32Bit (CmmLit lit)
903 | not is32Bit, isWord64 (cmmLitType dflags lit), not (isBigLit lit)
904 = let
905 imm = litToImm lit
906 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
907 in
908 return (Any II64 code)
909 where
910 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
911 isBigLit _ = False
912 -- note1: not the same as (not.is32BitLit), because that checks for
913 -- signed literals that fit in 32 bits, but we want unsigned
914 -- literals here.
915 -- note2: all labels are small, because we're assuming the
916 -- small memory model (see gcc docs, -mcmodel=small).
917
918 getRegister' dflags _ (CmmLit lit)
919 = do let size = cmmTypeSize (cmmLitType dflags lit)
920 imm = litToImm lit
921 code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
922 return (Any size code)
923
924 getRegister' _ _ other
925 | isVecExpr other = needLlvm
926 | otherwise = pprPanic "getRegister(x86)" (ppr other)
927
928
929 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
930 -> NatM (Reg -> InstrBlock)
931 intLoadCode instr mem = do
932 Amode src mem_code <- getAmode mem
933 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
934
935 -- Compute an expression into *any* register, adding the appropriate
936 -- move instruction if necessary.
937 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
938 getAnyReg expr = do
939 r <- getRegister expr
940 anyReg r
941
942 anyReg :: Register -> NatM (Reg -> InstrBlock)
943 anyReg (Any _ code) = return code
944 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
945
946 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
947 -- Fixed registers might not be byte-addressable, so we make sure we've
948 -- got a temporary, inserting an extra reg copy if necessary.
949 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
950 getByteReg expr = do
951 is32Bit <- is32BitPlatform
952 if is32Bit
953 then do r <- getRegister expr
954 case r of
955 Any rep code -> do
956 tmp <- getNewRegNat rep
957 return (tmp, code tmp)
958 Fixed rep reg code
959 | isVirtualReg reg -> return (reg,code)
960 | otherwise -> do
961 tmp <- getNewRegNat rep
962 return (tmp, code `snocOL` reg2reg rep reg tmp)
963 -- ToDo: could optimise slightly by checking for
964 -- byte-addressable real registers, but that will
965 -- happen very rarely if at all.
966 else getSomeReg expr -- all regs are byte-addressable on x86_64
967
968 -- Another variant: this time we want the result in a register that cannot
969 -- be modified by code to evaluate an arbitrary expression.
970 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
971 getNonClobberedReg expr = do
972 dflags <- getDynFlags
973 r <- getRegister expr
974 case r of
975 Any rep code -> do
976 tmp <- getNewRegNat rep
977 return (tmp, code tmp)
978 Fixed rep reg code
979 -- only certain regs can be clobbered
980 | reg `elem` instrClobberedRegs (targetPlatform dflags)
981 -> do
982 tmp <- getNewRegNat rep
983 return (tmp, code `snocOL` reg2reg rep reg tmp)
984 | otherwise ->
985 return (reg, code)
986
987 reg2reg :: Size -> Reg -> Reg -> Instr
988 reg2reg size src dst
989 | size == FF80 = GMOV src dst
990 | otherwise = MOV size (OpReg src) (OpReg dst)
991
992
993 --------------------------------------------------------------------------------
994 getAmode :: CmmExpr -> NatM Amode
995 getAmode e = do is32Bit <- is32BitPlatform
996 getAmode' is32Bit e
997
998 getAmode' :: Bool -> CmmExpr -> NatM Amode
999 getAmode' _ (CmmRegOff r n) = do dflags <- getDynFlags
1000 getAmode $ mangleIndexTree dflags r n
1001
1002 getAmode' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1003 CmmLit displacement])
1004 | not is32Bit
1005 = return $ Amode (ripRel (litToImm displacement)) nilOL
1006
1007
1008 -- This is all just ridiculous, since it carefully undoes
1009 -- what mangleIndexTree has just done.
1010 getAmode' is32Bit (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)])
1011 | is32BitLit is32Bit lit
1012 -- ASSERT(rep == II32)???
1013 = do (x_reg, x_code) <- getSomeReg x
1014 let off = ImmInt (-(fromInteger i))
1015 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1016
1017 getAmode' is32Bit (CmmMachOp (MO_Add _rep) [x, CmmLit lit])
1018 | is32BitLit is32Bit lit
1019 -- ASSERT(rep == II32)???
1020 = do (x_reg, x_code) <- getSomeReg x
1021 let off = litToImm lit
1022 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1023
1024 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1025 -- recognised by the next rule.
1026 getAmode' is32Bit (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1027 b@(CmmLit _)])
1028 = getAmode' is32Bit (CmmMachOp (MO_Add rep) [b,a])
1029
1030 -- Matches: (x + offset) + (y << shift)
1031 getAmode' _ (CmmMachOp (MO_Add _) [CmmRegOff x offset,
1032 CmmMachOp (MO_Shl _)
1033 [y, CmmLit (CmmInt shift _)]])
1034 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1035 = x86_complex_amode (CmmReg x) y shift (fromIntegral offset)
1036
1037 getAmode' _ (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _)
1038 [y, CmmLit (CmmInt shift _)]])
1039 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1040 = x86_complex_amode x y shift 0
1041
1042 getAmode' _ (CmmMachOp (MO_Add _)
1043 [x, CmmMachOp (MO_Add _)
1044 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1045 CmmLit (CmmInt offset _)]])
1046 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1047 && is32BitInteger offset
1048 = x86_complex_amode x y shift offset
1049
1050 getAmode' _ (CmmMachOp (MO_Add _) [x,y])
1051 = x86_complex_amode x y 0 0
1052
1053 getAmode' is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1054 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1055
1056 getAmode' _ expr = do
1057 (reg,code) <- getSomeReg expr
1058 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1059
1060 -- | Like 'getAmode', but on 32-bit use simple register addressing
1061 -- (i.e. no index register). This stops us from running out of
1062 -- registers on x86 when using instructions such as cmpxchg, which can
1063 -- use up to three virtual registers and one fixed register.
1064 getSimpleAmode :: DynFlags -> Bool -> CmmExpr -> NatM Amode
1065 getSimpleAmode dflags is32Bit addr
1066 | is32Bit = do
1067 addr_code <- getAnyReg addr
1068 addr_r <- getNewRegNat (intSize (wordWidth dflags))
1069 let amode = AddrBaseIndex (EABaseReg addr_r) EAIndexNone (ImmInt 0)
1070 return $! Amode amode (addr_code addr_r)
1071 | otherwise = getAmode addr
1072
1073 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1074 x86_complex_amode base index shift offset
1075 = do (x_reg, x_code) <- getNonClobberedReg base
1076 -- x must be in a temp, because it has to stay live over y_code
1077 -- we could compre x_reg and y_reg and do something better here...
1078 (y_reg, y_code) <- getSomeReg index
1079 let
1080 code = x_code `appOL` y_code
1081 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8;
1082 n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")"
1083 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1084 code)
1085
1086
1087
1088
1089 -- -----------------------------------------------------------------------------
1090 -- getOperand: sometimes any operand will do.
1091
1092 -- getNonClobberedOperand: the value of the operand will remain valid across
1093 -- the computation of an arbitrary expression, unless the expression
1094 -- is computed directly into a register which the operand refers to
1095 -- (see trivialCode where this function is used for an example).
1096
1097 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1098 getNonClobberedOperand (CmmLit lit) = do
1099 use_sse2 <- sse2Enabled
1100 if use_sse2 && isSuitableFloatingPointLit lit
1101 then do
1102 let CmmFloat _ w = lit
1103 Amode addr code <- memConstant (widthInBytes w) lit
1104 return (OpAddr addr, code)
1105 else do
1106
1107 is32Bit <- is32BitPlatform
1108 dflags <- getDynFlags
1109 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1110 then return (OpImm (litToImm lit), nilOL)
1111 else getNonClobberedOperand_generic (CmmLit lit)
1112
1113 getNonClobberedOperand (CmmLoad mem pk) = do
1114 is32Bit <- is32BitPlatform
1115 use_sse2 <- sse2Enabled
1116 if (not (isFloatType pk) || use_sse2)
1117 && (if is32Bit then not (isWord64 pk) else True)
1118 then do
1119 dflags <- getDynFlags
1120 let platform = targetPlatform dflags
1121 Amode src mem_code <- getAmode mem
1122 (src',save_code) <-
1123 if (amodeCouldBeClobbered platform src)
1124 then do
1125 tmp <- getNewRegNat (archWordSize is32Bit)
1126 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1127 unitOL (LEA (archWordSize is32Bit) (OpAddr src) (OpReg tmp)))
1128 else
1129 return (src, nilOL)
1130 return (OpAddr src', mem_code `appOL` save_code)
1131 else do
1132 getNonClobberedOperand_generic (CmmLoad mem pk)
1133
1134 getNonClobberedOperand e = getNonClobberedOperand_generic e
1135
1136 getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1137 getNonClobberedOperand_generic e = do
1138 (reg, code) <- getNonClobberedReg e
1139 return (OpReg reg, code)
1140
1141 amodeCouldBeClobbered :: Platform -> AddrMode -> Bool
1142 amodeCouldBeClobbered platform amode = any (regClobbered platform) (addrModeRegs amode)
1143
1144 regClobbered :: Platform -> Reg -> Bool
1145 regClobbered platform (RegReal (RealRegSingle rr)) = isFastTrue (freeReg platform rr)
1146 regClobbered _ _ = False
1147
1148 -- getOperand: the operand is not required to remain valid across the
1149 -- computation of an arbitrary expression.
1150 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1151
1152 getOperand (CmmLit lit) = do
1153 use_sse2 <- sse2Enabled
1154 if (use_sse2 && isSuitableFloatingPointLit lit)
1155 then do
1156 let CmmFloat _ w = lit
1157 Amode addr code <- memConstant (widthInBytes w) lit
1158 return (OpAddr addr, code)
1159 else do
1160
1161 is32Bit <- is32BitPlatform
1162 dflags <- getDynFlags
1163 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1164 then return (OpImm (litToImm lit), nilOL)
1165 else getOperand_generic (CmmLit lit)
1166
1167 getOperand (CmmLoad mem pk) = do
1168 is32Bit <- is32BitPlatform
1169 use_sse2 <- sse2Enabled
1170 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1171 then do
1172 Amode src mem_code <- getAmode mem
1173 return (OpAddr src, mem_code)
1174 else
1175 getOperand_generic (CmmLoad mem pk)
1176
1177 getOperand e = getOperand_generic e
1178
1179 getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1180 getOperand_generic e = do
1181 (reg, code) <- getSomeReg e
1182 return (OpReg reg, code)
1183
1184 isOperand :: Bool -> CmmExpr -> Bool
1185 isOperand _ (CmmLoad _ _) = True
1186 isOperand is32Bit (CmmLit lit) = is32BitLit is32Bit lit
1187 || isSuitableFloatingPointLit lit
1188 isOperand _ _ = False
1189
1190 memConstant :: Int -> CmmLit -> NatM Amode
1191 memConstant align lit = do
1192 lbl <- getNewLabelNat
1193 dflags <- getDynFlags
1194 (addr, addr_code) <- if target32Bit (targetPlatform dflags)
1195 then do dynRef <- cmmMakeDynamicReference
1196 dflags
1197 DataReference
1198 lbl
1199 Amode addr addr_code <- getAmode dynRef
1200 return (addr, addr_code)
1201 else return (ripRel (ImmCLbl lbl), nilOL)
1202 let code =
1203 LDATA ReadOnlyData (align, Statics lbl [CmmStaticLit lit])
1204 `consOL` addr_code
1205 return (Amode addr code)
1206
1207
1208 loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
1209 loadFloatAmode use_sse2 w addr addr_code = do
1210 let size = floatSize w
1211 code dst = addr_code `snocOL`
1212 if use_sse2
1213 then MOV size (OpAddr addr) (OpReg dst)
1214 else GLD size addr dst
1215 return (Any (if use_sse2 then size else FF80) code)
1216
1217
1218 -- if we want a floating-point literal as an operand, we can
1219 -- use it directly from memory. However, if the literal is
1220 -- zero, we're better off generating it into a register using
1221 -- xor.
1222 isSuitableFloatingPointLit :: CmmLit -> Bool
1223 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1224 isSuitableFloatingPointLit _ = False
1225
1226 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1227 getRegOrMem e@(CmmLoad mem pk) = do
1228 is32Bit <- is32BitPlatform
1229 use_sse2 <- sse2Enabled
1230 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1231 then do
1232 Amode src mem_code <- getAmode mem
1233 return (OpAddr src, mem_code)
1234 else do
1235 (reg, code) <- getNonClobberedReg e
1236 return (OpReg reg, code)
1237 getRegOrMem e = do
1238 (reg, code) <- getNonClobberedReg e
1239 return (OpReg reg, code)
1240
1241 is32BitLit :: Bool -> CmmLit -> Bool
1242 is32BitLit is32Bit (CmmInt i W64)
1243 | not is32Bit
1244 = -- assume that labels are in the range 0-2^31-1: this assumes the
1245 -- small memory model (see gcc docs, -mcmodel=small).
1246 is32BitInteger i
1247 is32BitLit _ _ = True
1248
1249
1250
1251
1252 -- Set up a condition code for a conditional branch.
1253
1254 getCondCode :: CmmExpr -> NatM CondCode
1255
1256 -- yes, they really do seem to want exactly the same!
1257
1258 getCondCode (CmmMachOp mop [x, y])
1259 =
1260 case mop of
1261 MO_F_Eq W32 -> condFltCode EQQ x y
1262 MO_F_Ne W32 -> condFltCode NE x y
1263 MO_F_Gt W32 -> condFltCode GTT x y
1264 MO_F_Ge W32 -> condFltCode GE x y
1265 MO_F_Lt W32 -> condFltCode LTT x y
1266 MO_F_Le W32 -> condFltCode LE x y
1267
1268 MO_F_Eq W64 -> condFltCode EQQ x y
1269 MO_F_Ne W64 -> condFltCode NE x y
1270 MO_F_Gt W64 -> condFltCode GTT x y
1271 MO_F_Ge W64 -> condFltCode GE x y
1272 MO_F_Lt W64 -> condFltCode LTT x y
1273 MO_F_Le W64 -> condFltCode LE x y
1274
1275 MO_Eq _ -> condIntCode EQQ x y
1276 MO_Ne _ -> condIntCode NE x y
1277
1278 MO_S_Gt _ -> condIntCode GTT x y
1279 MO_S_Ge _ -> condIntCode GE x y
1280 MO_S_Lt _ -> condIntCode LTT x y
1281 MO_S_Le _ -> condIntCode LE x y
1282
1283 MO_U_Gt _ -> condIntCode GU x y
1284 MO_U_Ge _ -> condIntCode GEU x y
1285 MO_U_Lt _ -> condIntCode LU x y
1286 MO_U_Le _ -> condIntCode LEU x y
1287
1288 _other -> pprPanic "getCondCode(x86,x86_64)" (ppr (CmmMachOp mop [x,y]))
1289
1290 getCondCode other = pprPanic "getCondCode(2)(x86,x86_64)" (ppr other)
1291
1292
1293
1294
1295 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1296 -- passed back up the tree.
1297
1298 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1299 condIntCode cond x y = do is32Bit <- is32BitPlatform
1300 condIntCode' is32Bit cond x y
1301
1302 condIntCode' :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1303
1304 -- memory vs immediate
1305 condIntCode' is32Bit cond (CmmLoad x pk) (CmmLit lit)
1306 | is32BitLit is32Bit lit = do
1307 Amode x_addr x_code <- getAmode x
1308 let
1309 imm = litToImm lit
1310 code = x_code `snocOL`
1311 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
1312 --
1313 return (CondCode False cond code)
1314
1315 -- anything vs zero, using a mask
1316 -- TODO: Add some sanity checking!!!!
1317 condIntCode' is32Bit cond (CmmMachOp (MO_And _) [x,o2]) (CmmLit (CmmInt 0 pk))
1318 | (CmmLit lit@(CmmInt mask _)) <- o2, is32BitLit is32Bit lit
1319 = do
1320 (x_reg, x_code) <- getSomeReg x
1321 let
1322 code = x_code `snocOL`
1323 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1324 --
1325 return (CondCode False cond code)
1326
1327 -- anything vs zero
1328 condIntCode' _ cond x (CmmLit (CmmInt 0 pk)) = do
1329 (x_reg, x_code) <- getSomeReg x
1330 let
1331 code = x_code `snocOL`
1332 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
1333 --
1334 return (CondCode False cond code)
1335
1336 -- anything vs operand
1337 condIntCode' is32Bit cond x y
1338 | isOperand is32Bit y = do
1339 dflags <- getDynFlags
1340 (x_reg, x_code) <- getNonClobberedReg x
1341 (y_op, y_code) <- getOperand y
1342 let
1343 code = x_code `appOL` y_code `snocOL`
1344 CMP (cmmTypeSize (cmmExprType dflags x)) y_op (OpReg x_reg)
1345 return (CondCode False cond code)
1346 -- operand vs. anything: invert the comparison so that we can use a
1347 -- single comparison instruction.
1348 | isOperand is32Bit x
1349 , Just revcond <- maybeFlipCond cond = do
1350 dflags <- getDynFlags
1351 (y_reg, y_code) <- getNonClobberedReg y
1352 (x_op, x_code) <- getOperand x
1353 let
1354 code = y_code `appOL` x_code `snocOL`
1355 CMP (cmmTypeSize (cmmExprType dflags x)) x_op (OpReg y_reg)
1356 return (CondCode False revcond code)
1357
1358 -- anything vs anything
1359 condIntCode' _ cond x y = do
1360 dflags <- getDynFlags
1361 (y_reg, y_code) <- getNonClobberedReg y
1362 (x_op, x_code) <- getRegOrMem x
1363 let
1364 code = y_code `appOL`
1365 x_code `snocOL`
1366 CMP (cmmTypeSize (cmmExprType dflags x)) (OpReg y_reg) x_op
1367 return (CondCode False cond code)
1368
1369
1370
1371 --------------------------------------------------------------------------------
1372 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1373
1374 condFltCode cond x y
1375 = if_sse2 condFltCode_sse2 condFltCode_x87
1376 where
1377
1378 condFltCode_x87
1379 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1380 (x_reg, x_code) <- getNonClobberedReg x
1381 (y_reg, y_code) <- getSomeReg y
1382 let
1383 code = x_code `appOL` y_code `snocOL`
1384 GCMP cond x_reg y_reg
1385 -- The GCMP insn does the test and sets the zero flag if comparable
1386 -- and true. Hence we always supply EQQ as the condition to test.
1387 return (CondCode True EQQ code)
1388
1389 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1390 -- an operand, but the right must be a reg. We can probably do better
1391 -- than this general case...
1392 condFltCode_sse2 = do
1393 dflags <- getDynFlags
1394 (x_reg, x_code) <- getNonClobberedReg x
1395 (y_op, y_code) <- getOperand y
1396 let
1397 code = x_code `appOL`
1398 y_code `snocOL`
1399 CMP (floatSize $ cmmExprWidth dflags x) y_op (OpReg x_reg)
1400 -- NB(1): we need to use the unsigned comparison operators on the
1401 -- result of this comparison.
1402 return (CondCode True (condToUnsigned cond) code)
1403
1404 -- -----------------------------------------------------------------------------
1405 -- Generating assignments
1406
1407 -- Assignments are really at the heart of the whole code generation
1408 -- business. Almost all top-level nodes of any real importance are
1409 -- assignments, which correspond to loads, stores, or register
1410 -- transfers. If we're really lucky, some of the register transfers
1411 -- will go away, because we can use the destination register to
1412 -- complete the code generation for the right hand side. This only
1413 -- fails when the right hand side is forced into a fixed register
1414 -- (e.g. the result of a call).
1415
1416 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1417 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1418
1419 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1420 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1421
1422
1423 -- integer assignment to memory
1424
1425 -- specific case of adding/subtracting an integer to a particular address.
1426 -- ToDo: catch other cases where we can use an operation directly on a memory
1427 -- address.
1428 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1429 CmmLit (CmmInt i _)])
1430 | addr == addr2, pk /= II64 || is32BitInteger i,
1431 Just instr <- check op
1432 = do Amode amode code_addr <- getAmode addr
1433 let code = code_addr `snocOL`
1434 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1435 return code
1436 where
1437 check (MO_Add _) = Just ADD
1438 check (MO_Sub _) = Just SUB
1439 check _ = Nothing
1440 -- ToDo: more?
1441
1442 -- general case
1443 assignMem_IntCode pk addr src = do
1444 is32Bit <- is32BitPlatform
1445 Amode addr code_addr <- getAmode addr
1446 (code_src, op_src) <- get_op_RI is32Bit src
1447 let
1448 code = code_src `appOL`
1449 code_addr `snocOL`
1450 MOV pk op_src (OpAddr addr)
1451 -- NOTE: op_src is stable, so it will still be valid
1452 -- after code_addr. This may involve the introduction
1453 -- of an extra MOV to a temporary register, but we hope
1454 -- the register allocator will get rid of it.
1455 --
1456 return code
1457 where
1458 get_op_RI :: Bool -> CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
1459 get_op_RI is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1460 = return (nilOL, OpImm (litToImm lit))
1461 get_op_RI _ op
1462 = do (reg,code) <- getNonClobberedReg op
1463 return (code, OpReg reg)
1464
1465
1466 -- Assign; dst is a reg, rhs is mem
1467 assignReg_IntCode pk reg (CmmLoad src _) = do
1468 load_code <- intLoadCode (MOV pk) src
1469 dflags <- getDynFlags
1470 let platform = targetPlatform dflags
1471 return (load_code (getRegisterReg platform False{-no sse2-} reg))
1472
1473 -- dst is a reg, but src could be anything
1474 assignReg_IntCode _ reg src = do
1475 dflags <- getDynFlags
1476 let platform = targetPlatform dflags
1477 code <- getAnyReg src
1478 return (code (getRegisterReg platform False{-no sse2-} reg))
1479
1480
1481 -- Floating point assignment to memory
1482 assignMem_FltCode pk addr src = do
1483 (src_reg, src_code) <- getNonClobberedReg src
1484 Amode addr addr_code <- getAmode addr
1485 use_sse2 <- sse2Enabled
1486 let
1487 code = src_code `appOL`
1488 addr_code `snocOL`
1489 if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr)
1490 else GST pk src_reg addr
1491 return code
1492
1493 -- Floating point assignment to a register/temporary
1494 assignReg_FltCode _ reg src = do
1495 use_sse2 <- sse2Enabled
1496 src_code <- getAnyReg src
1497 dflags <- getDynFlags
1498 let platform = targetPlatform dflags
1499 return (src_code (getRegisterReg platform use_sse2 reg))
1500
1501
1502 genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
1503
1504 genJump (CmmLoad mem _) regs = do
1505 Amode target code <- getAmode mem
1506 return (code `snocOL` JMP (OpAddr target) regs)
1507
1508 genJump (CmmLit lit) regs = do
1509 return (unitOL (JMP (OpImm (litToImm lit)) regs))
1510
1511 genJump expr regs = do
1512 (reg,code) <- getSomeReg expr
1513 return (code `snocOL` JMP (OpReg reg) regs)
1514
1515
1516 -- -----------------------------------------------------------------------------
1517 -- Unconditional branches
1518
1519 genBranch :: BlockId -> NatM InstrBlock
1520 genBranch = return . toOL . mkJumpInstr
1521
1522
1523
1524 -- -----------------------------------------------------------------------------
1525 -- Conditional jumps
1526
1527 {-
1528 Conditional jumps are always to local labels, so we can use branch
1529 instructions. We peek at the arguments to decide what kind of
1530 comparison to do.
1531
1532 I386: First, we have to ensure that the condition
1533 codes are set according to the supplied comparison operation.
1534 -}
1535
1536 genCondJump
1537 :: BlockId -- the branch target
1538 -> CmmExpr -- the condition on which to branch
1539 -> NatM InstrBlock
1540
1541 genCondJump id bool = do
1542 CondCode is_float cond cond_code <- getCondCode bool
1543 use_sse2 <- sse2Enabled
1544 if not is_float || not use_sse2
1545 then
1546 return (cond_code `snocOL` JXX cond id)
1547 else do
1548 lbl <- getBlockIdNat
1549
1550 -- see comment with condFltReg
1551 let code = case cond of
1552 NE -> or_unordered
1553 GU -> plain_test
1554 GEU -> plain_test
1555 _ -> and_ordered
1556
1557 plain_test = unitOL (
1558 JXX cond id
1559 )
1560 or_unordered = toOL [
1561 JXX cond id,
1562 JXX PARITY id
1563 ]
1564 and_ordered = toOL [
1565 JXX PARITY lbl,
1566 JXX cond id,
1567 JXX ALWAYS lbl,
1568 NEWBLOCK lbl
1569 ]
1570 return (cond_code `appOL` code)
1571
1572
1573 -- -----------------------------------------------------------------------------
1574 -- Generating C calls
1575
1576 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1577 -- @get_arg@, which moves the arguments to the correct registers/stack
1578 -- locations. Apart from that, the code is easy.
1579 --
1580 -- (If applicable) Do not fill the delay slots here; you will confuse the
1581 -- register allocator.
1582
1583 genCCall
1584 :: DynFlags
1585 -> Bool -- 32 bit platform?
1586 -> ForeignTarget -- function to call
1587 -> [CmmFormal] -- where to put the result
1588 -> [CmmActual] -- arguments (of mixed type)
1589 -> NatM InstrBlock
1590
1591 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1592
1593 -- Unroll memcpy calls if the source and destination pointers are at
1594 -- least DWORD aligned and the number of bytes to copy isn't too
1595 -- large. Otherwise, call C's memcpy.
1596 genCCall dflags is32Bit (PrimTarget MO_Memcpy) _
1597 [dst, src,
1598 (CmmLit (CmmInt n _)),
1599 (CmmLit (CmmInt align _))]
1600 | fromInteger insns <= maxInlineMemcpyInsns dflags && align .&. 3 == 0 = do
1601 code_dst <- getAnyReg dst
1602 dst_r <- getNewRegNat size
1603 code_src <- getAnyReg src
1604 src_r <- getNewRegNat size
1605 tmp_r <- getNewRegNat size
1606 return $ code_dst dst_r `appOL` code_src src_r `appOL`
1607 go dst_r src_r tmp_r (fromInteger n)
1608 where
1609 -- The number of instructions we will generate (approx). We need 2
1610 -- instructions per move.
1611 insns = 2 * ((n + sizeBytes - 1) `div` sizeBytes)
1612
1613 size = if align .&. 4 /= 0 then II32 else (archWordSize is32Bit)
1614
1615 -- The size of each move, in bytes.
1616 sizeBytes :: Integer
1617 sizeBytes = fromIntegral (sizeInBytes size)
1618
1619 go :: Reg -> Reg -> Reg -> Integer -> OrdList Instr
1620 go dst src tmp i
1621 | i >= sizeBytes =
1622 unitOL (MOV size (OpAddr src_addr) (OpReg tmp)) `appOL`
1623 unitOL (MOV size (OpReg tmp) (OpAddr dst_addr)) `appOL`
1624 go dst src tmp (i - sizeBytes)
1625 -- Deal with remaining bytes.
1626 | i >= 4 = -- Will never happen on 32-bit
1627 unitOL (MOV II32 (OpAddr src_addr) (OpReg tmp)) `appOL`
1628 unitOL (MOV II32 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1629 go dst src tmp (i - 4)
1630 | i >= 2 =
1631 unitOL (MOVZxL II16 (OpAddr src_addr) (OpReg tmp)) `appOL`
1632 unitOL (MOV II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1633 go dst src tmp (i - 2)
1634 | i >= 1 =
1635 unitOL (MOVZxL II8 (OpAddr src_addr) (OpReg tmp)) `appOL`
1636 unitOL (MOV II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1637 go dst src tmp (i - 1)
1638 | otherwise = nilOL
1639 where
1640 src_addr = AddrBaseIndex (EABaseReg src) EAIndexNone
1641 (ImmInteger (n - i))
1642 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1643 (ImmInteger (n - i))
1644
1645 genCCall dflags _ (PrimTarget MO_Memset) _
1646 [dst,
1647 CmmLit (CmmInt c _),
1648 CmmLit (CmmInt n _),
1649 CmmLit (CmmInt align _)]
1650 | fromInteger insns <= maxInlineMemsetInsns dflags && align .&. 3 == 0 = do
1651 code_dst <- getAnyReg dst
1652 dst_r <- getNewRegNat size
1653 return $ code_dst dst_r `appOL` go dst_r (fromInteger n)
1654 where
1655 (size, val) = case align .&. 3 of
1656 2 -> (II16, c2)
1657 0 -> (II32, c4)
1658 _ -> (II8, c)
1659 c2 = c `shiftL` 8 .|. c
1660 c4 = c2 `shiftL` 16 .|. c2
1661
1662 -- The number of instructions we will generate (approx). We need 1
1663 -- instructions per move.
1664 insns = (n + sizeBytes - 1) `div` sizeBytes
1665
1666 -- The size of each move, in bytes.
1667 sizeBytes :: Integer
1668 sizeBytes = fromIntegral (sizeInBytes size)
1669
1670 go :: Reg -> Integer -> OrdList Instr
1671 go dst i
1672 -- TODO: Add movabs instruction and support 64-bit sets.
1673 | i >= sizeBytes = -- This might be smaller than the below sizes
1674 unitOL (MOV size (OpImm (ImmInteger val)) (OpAddr dst_addr)) `appOL`
1675 go dst (i - sizeBytes)
1676 | i >= 4 = -- Will never happen on 32-bit
1677 unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr dst_addr)) `appOL`
1678 go dst (i - 4)
1679 | i >= 2 =
1680 unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr dst_addr)) `appOL`
1681 go dst (i - 2)
1682 | i >= 1 =
1683 unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr dst_addr)) `appOL`
1684 go dst (i - 1)
1685 | otherwise = nilOL
1686 where
1687 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1688 (ImmInteger (n - i))
1689
1690 genCCall _ _ (PrimTarget MO_WriteBarrier) _ _ = return nilOL
1691 -- write barrier compiles to no code on x86/x86-64;
1692 -- we keep it this long in order to prevent earlier optimisations.
1693
1694 genCCall _ _ (PrimTarget MO_Touch) _ _ = return nilOL
1695
1696 genCCall _ is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] =
1697 case n of
1698 0 -> genPrefetch src $ PREFETCH NTA size
1699 1 -> genPrefetch src $ PREFETCH Lvl2 size
1700 2 -> genPrefetch src $ PREFETCH Lvl1 size
1701 3 -> genPrefetch src $ PREFETCH Lvl0 size
1702 l -> panic $ "unexpected prefetch level in genCCall MO_Prefetch_Data: " ++ (show l)
1703 -- the c / llvm prefetch convention is 0, 1, 2, and 3
1704 -- the x86 corresponding names are : NTA, 2 , 1, and 0
1705 where
1706 size = archWordSize is32bit
1707 -- need to know what register width for pointers!
1708 genPrefetch inRegSrc prefetchCTor =
1709 do
1710 code_src <- getAnyReg inRegSrc
1711 src_r <- getNewRegNat size
1712 return $ code_src src_r `appOL`
1713 (unitOL (prefetchCTor (OpAddr
1714 ((AddrBaseIndex (EABaseReg src_r ) EAIndexNone (ImmInt 0)))) ))
1715 -- prefetch always takes an address
1716
1717 genCCall dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] = do
1718 let platform = targetPlatform dflags
1719 let dst_r = getRegisterReg platform False (CmmLocal dst)
1720 case width of
1721 W64 | is32Bit -> do
1722 ChildCode64 vcode rlo <- iselExpr64 src
1723 let dst_rhi = getHiVRegFromLo dst_r
1724 rhi = getHiVRegFromLo rlo
1725 return $ vcode `appOL`
1726 toOL [ MOV II32 (OpReg rlo) (OpReg dst_rhi),
1727 MOV II32 (OpReg rhi) (OpReg dst_r),
1728 BSWAP II32 dst_rhi,
1729 BSWAP II32 dst_r ]
1730 W16 -> do code_src <- getAnyReg src
1731 return $ code_src dst_r `appOL`
1732 unitOL (BSWAP II32 dst_r) `appOL`
1733 unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r))
1734 _ -> do code_src <- getAnyReg src
1735 return $ code_src dst_r `appOL` unitOL (BSWAP size dst_r)
1736 where
1737 size = intSize width
1738
1739 genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
1740 args@[src] = do
1741 sse4_2 <- sse4_2Enabled
1742 let platform = targetPlatform dflags
1743 if sse4_2
1744 then do code_src <- getAnyReg src
1745 src_r <- getNewRegNat size
1746 return $ code_src src_r `appOL`
1747 (if width == W8 then
1748 -- The POPCNT instruction doesn't take a r/m8
1749 unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL`
1750 unitOL (POPCNT II16 (OpReg src_r)
1751 (getRegisterReg platform False (CmmLocal dst)))
1752 else
1753 unitOL (POPCNT size (OpReg src_r)
1754 (getRegisterReg platform False (CmmLocal dst))))
1755 else do
1756 targetExpr <- cmmMakeDynamicReference dflags
1757 CallReference lbl
1758 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1759 [NoHint] [NoHint]
1760 CmmMayReturn)
1761 genCCall dflags is32Bit target dest_regs args
1762 where
1763 size = intSize width
1764 lbl = mkCmmCodeLabel primPackageKey (fsLit (popCntLabel width))
1765
1766 genCCall dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args = do
1767 targetExpr <- cmmMakeDynamicReference dflags
1768 CallReference lbl
1769 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1770 [NoHint] [NoHint]
1771 CmmMayReturn)
1772 genCCall dflags is32Bit target dest_regs args
1773 where
1774 lbl = mkCmmCodeLabel primPackageKey (fsLit (word2FloatLabel width))
1775
1776 genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = do
1777 Amode amode addr_code <-
1778 if amop `elem` [AMO_Add, AMO_Sub]
1779 then getAmode addr
1780 else getSimpleAmode dflags is32Bit addr -- See genCCall for MO_Cmpxchg
1781 arg <- getNewRegNat size
1782 arg_code <- getAnyReg n
1783 use_sse2 <- sse2Enabled
1784 let platform = targetPlatform dflags
1785 dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
1786 code <- op_code dst_r arg amode
1787 return $ addr_code `appOL` arg_code arg `appOL` code
1788 where
1789 -- Code for the operation
1790 op_code :: Reg -- Destination reg
1791 -> Reg -- Register containing argument
1792 -> AddrMode -- Address of location to mutate
1793 -> NatM (OrdList Instr)
1794 op_code dst_r arg amode = case amop of
1795 -- In the common case where dst_r is a virtual register the
1796 -- final move should go away, because it's the last use of arg
1797 -- and the first use of dst_r.
1798 AMO_Add -> return $ toOL [ LOCK (XADD size (OpReg arg) (OpAddr amode))
1799 , MOV size (OpReg arg) (OpReg dst_r)
1800 ]
1801 AMO_Sub -> return $ toOL [ NEGI size (OpReg arg)
1802 , LOCK (XADD size (OpReg arg) (OpAddr amode))
1803 , MOV size (OpReg arg) (OpReg dst_r)
1804 ]
1805 AMO_And -> cmpxchg_code (\ src dst -> unitOL $ AND size src dst)
1806 AMO_Nand -> cmpxchg_code (\ src dst -> toOL [ AND size src dst
1807 , NOT size dst
1808 ])
1809 AMO_Or -> cmpxchg_code (\ src dst -> unitOL $ OR size src dst)
1810 AMO_Xor -> cmpxchg_code (\ src dst -> unitOL $ XOR size src dst)
1811 where
1812 -- Simulate operation that lacks a dedicated instruction using
1813 -- cmpxchg.
1814 cmpxchg_code :: (Operand -> Operand -> OrdList Instr)
1815 -> NatM (OrdList Instr)
1816 cmpxchg_code instrs = do
1817 lbl <- getBlockIdNat
1818 tmp <- getNewRegNat size
1819 return $ toOL
1820 [ MOV size (OpAddr amode) (OpReg eax)
1821 , JXX ALWAYS lbl
1822 , NEWBLOCK lbl
1823 -- Keep old value so we can return it:
1824 , MOV size (OpReg eax) (OpReg dst_r)
1825 , MOV size (OpReg eax) (OpReg tmp)
1826 ]
1827 `appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL
1828 [ LOCK (CMPXCHG size (OpReg tmp) (OpAddr amode))
1829 , JXX NE lbl
1830 ]
1831
1832 size = intSize width
1833
1834 genCCall dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] = do
1835 load_code <- intLoadCode (MOV (intSize width)) addr
1836 let platform = targetPlatform dflags
1837 use_sse2 <- sse2Enabled
1838 return (load_code (getRegisterReg platform use_sse2 (CmmLocal dst)))
1839
1840 genCCall _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
1841 code <- assignMem_IntCode (intSize width) addr val
1842 return $ code `snocOL` MFENCE
1843
1844 genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = do
1845 -- On x86 we don't have enough registers to use cmpxchg with a
1846 -- complicated addressing mode, so on that architecture we
1847 -- pre-compute the address first.
1848 Amode amode addr_code <- getSimpleAmode dflags is32Bit addr
1849 newval <- getNewRegNat size
1850 newval_code <- getAnyReg new
1851 oldval <- getNewRegNat size
1852 oldval_code <- getAnyReg old
1853 use_sse2 <- sse2Enabled
1854 let platform = targetPlatform dflags
1855 dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
1856 code = toOL
1857 [ MOV size (OpReg oldval) (OpReg eax)
1858 , LOCK (CMPXCHG size (OpReg newval) (OpAddr amode))
1859 , MOV size (OpReg eax) (OpReg dst_r)
1860 ]
1861 return $ addr_code `appOL` newval_code newval `appOL` oldval_code oldval
1862 `appOL` code
1863 where
1864 size = intSize width
1865
1866 genCCall _ is32Bit target dest_regs args = do
1867 dflags <- getDynFlags
1868 let platform = targetPlatform dflags
1869 case (target, dest_regs) of
1870 -- void return type prim op
1871 (PrimTarget op, []) ->
1872 outOfLineCmmOp op Nothing args
1873 -- we only cope with a single result for foreign calls
1874 (PrimTarget op, [r])
1875 | not is32Bit -> outOfLineCmmOp op (Just r) args
1876 | otherwise -> do
1877 l1 <- getNewLabelNat
1878 l2 <- getNewLabelNat
1879 sse2 <- sse2Enabled
1880 if sse2
1881 then
1882 outOfLineCmmOp op (Just r) args
1883 else case op of
1884 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
1885 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
1886
1887 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
1888 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
1889
1890 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
1891 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
1892
1893 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
1894 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
1895
1896 _other_op -> outOfLineCmmOp op (Just r) args
1897
1898 where
1899 actuallyInlineFloatOp instr size [x]
1900 = do res <- trivialUFCode size (instr size) x
1901 any <- anyReg res
1902 return (any (getRegisterReg platform False (CmmLocal r)))
1903
1904 actuallyInlineFloatOp _ _ args
1905 = panic $ "genCCall.actuallyInlineFloatOp: bad number of arguments! ("
1906 ++ show (length args) ++ ")"
1907
1908 (PrimTarget (MO_S_QuotRem width), _) -> divOp1 platform True width dest_regs args
1909 (PrimTarget (MO_U_QuotRem width), _) -> divOp1 platform False width dest_regs args
1910 (PrimTarget (MO_U_QuotRem2 width), _) -> divOp2 platform False width dest_regs args
1911 (PrimTarget (MO_Add2 width), [res_h, res_l]) ->
1912 case args of
1913 [arg_x, arg_y] ->
1914 do hCode <- getAnyReg (CmmLit (CmmInt 0 width))
1915 let size = intSize width
1916 lCode <- anyReg =<< trivialCode width (ADD_CC size)
1917 (Just (ADD_CC size)) arg_x arg_y
1918 let reg_l = getRegisterReg platform True (CmmLocal res_l)
1919 reg_h = getRegisterReg platform True (CmmLocal res_h)
1920 code = hCode reg_h `appOL`
1921 lCode reg_l `snocOL`
1922 ADC size (OpImm (ImmInteger 0)) (OpReg reg_h)
1923 return code
1924 _ -> panic "genCCall: Wrong number of arguments/results for add2"
1925 (PrimTarget (MO_U_Mul2 width), [res_h, res_l]) ->
1926 case args of
1927 [arg_x, arg_y] ->
1928 do (y_reg, y_code) <- getRegOrMem arg_y
1929 x_code <- getAnyReg arg_x
1930 let size = intSize width
1931 reg_h = getRegisterReg platform True (CmmLocal res_h)
1932 reg_l = getRegisterReg platform True (CmmLocal res_l)
1933 code = y_code `appOL`
1934 x_code rax `appOL`
1935 toOL [MUL2 size y_reg,
1936 MOV size (OpReg rdx) (OpReg reg_h),
1937 MOV size (OpReg rax) (OpReg reg_l)]
1938 return code
1939 _ -> panic "genCCall: Wrong number of arguments/results for add2"
1940
1941 _ -> if is32Bit
1942 then genCCall32' dflags target dest_regs args
1943 else genCCall64' dflags target dest_regs args
1944
1945 where divOp1 platform signed width results [arg_x, arg_y]
1946 = divOp platform signed width results Nothing arg_x arg_y
1947 divOp1 _ _ _ _ _
1948 = panic "genCCall: Wrong number of arguments for divOp1"
1949 divOp2 platform signed width results [arg_x_high, arg_x_low, arg_y]
1950 = divOp platform signed width results (Just arg_x_high) arg_x_low arg_y
1951 divOp2 _ _ _ _ _
1952 = panic "genCCall: Wrong number of arguments for divOp2"
1953 divOp platform signed width [res_q, res_r]
1954 m_arg_x_high arg_x_low arg_y
1955 = do let size = intSize width
1956 reg_q = getRegisterReg platform True (CmmLocal res_q)
1957 reg_r = getRegisterReg platform True (CmmLocal res_r)
1958 widen | signed = CLTD size
1959 | otherwise = XOR size (OpReg rdx) (OpReg rdx)
1960 instr | signed = IDIV
1961 | otherwise = DIV
1962 (y_reg, y_code) <- getRegOrMem arg_y
1963 x_low_code <- getAnyReg arg_x_low
1964 x_high_code <- case m_arg_x_high of
1965 Just arg_x_high ->
1966 getAnyReg arg_x_high
1967 Nothing ->
1968 return $ const $ unitOL widen
1969 return $ y_code `appOL`
1970 x_low_code rax `appOL`
1971 x_high_code rdx `appOL`
1972 toOL [instr size y_reg,
1973 MOV size (OpReg rax) (OpReg reg_q),
1974 MOV size (OpReg rdx) (OpReg reg_r)]
1975 divOp _ _ _ _ _ _ _
1976 = panic "genCCall: Wrong number of results for divOp"
1977
1978 genCCall32' :: DynFlags
1979 -> ForeignTarget -- function to call
1980 -> [CmmFormal] -- where to put the result
1981 -> [CmmActual] -- arguments (of mixed type)
1982 -> NatM InstrBlock
1983 genCCall32' dflags target dest_regs args = do
1984 let
1985 prom_args = map (maybePromoteCArg dflags W32) args
1986
1987 -- Align stack to 16n for calls, assuming a starting stack
1988 -- alignment of 16n - word_size on procedure entry. Which we
1989 -- maintiain. See Note [rts/StgCRun.c : Stack Alignment on X86]
1990 sizes = map (arg_size . cmmExprType dflags) (reverse args)
1991 raw_arg_size = sum sizes + wORD_SIZE dflags
1992 arg_pad_size = (roundTo 16 $ raw_arg_size) - raw_arg_size
1993 tot_arg_size = raw_arg_size + arg_pad_size - wORD_SIZE dflags
1994 delta0 <- getDeltaNat
1995 setDeltaNat (delta0 - arg_pad_size)
1996
1997 use_sse2 <- sse2Enabled
1998 push_codes <- mapM (push_arg use_sse2) (reverse prom_args)
1999 delta <- getDeltaNat
2000 MASSERT(delta == delta0 - tot_arg_size)
2001
2002 -- deal with static vs dynamic call targets
2003 (callinsns,cconv) <-
2004 case target of
2005 ForeignTarget (CmmLit (CmmLabel lbl)) conv
2006 -> -- ToDo: stdcall arg sizes
2007 return (unitOL (CALL (Left fn_imm) []), conv)
2008 where fn_imm = ImmCLbl lbl
2009 ForeignTarget expr conv
2010 -> do { (dyn_r, dyn_c) <- getSomeReg expr
2011 ; ASSERT( isWord32 (cmmExprType dflags expr) )
2012 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
2013 PrimTarget _
2014 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
2015 ++ "probably because too many return values."
2016
2017 let push_code
2018 | arg_pad_size /= 0
2019 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2020 DELTA (delta0 - arg_pad_size)]
2021 `appOL` concatOL push_codes
2022 | otherwise
2023 = concatOL push_codes
2024
2025 -- Deallocate parameters after call for ccall;
2026 -- but not for stdcall (callee does it)
2027 --
2028 -- We have to pop any stack padding we added
2029 -- even if we are doing stdcall, though (#5052)
2030 pop_size
2031 | ForeignConvention StdCallConv _ _ _ <- cconv = arg_pad_size
2032 | otherwise = tot_arg_size
2033
2034 call = callinsns `appOL`
2035 toOL (
2036 (if pop_size==0 then [] else
2037 [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)])
2038 ++
2039 [DELTA delta0]
2040 )
2041 setDeltaNat delta0
2042
2043 dflags <- getDynFlags
2044 let platform = targetPlatform dflags
2045
2046 let
2047 -- assign the results, if necessary
2048 assign_code [] = nilOL
2049 assign_code [dest]
2050 | isFloatType ty =
2051 if use_sse2
2052 then let tmp_amode = AddrBaseIndex (EABaseReg esp)
2053 EAIndexNone
2054 (ImmInt 0)
2055 sz = floatSize w
2056 in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
2057 DELTA (delta0 - b),
2058 GST sz fake0 tmp_amode,
2059 MOV sz (OpAddr tmp_amode) (OpReg r_dest),
2060 ADD II32 (OpImm (ImmInt b)) (OpReg esp),
2061 DELTA delta0]
2062 else unitOL (GMOV fake0 r_dest)
2063 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
2064 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
2065 | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
2066 where
2067 ty = localRegType dest
2068 w = typeWidth ty
2069 b = widthInBytes w
2070 r_dest_hi = getHiVRegFromLo r_dest
2071 r_dest = getRegisterReg platform use_sse2 (CmmLocal dest)
2072 assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
2073
2074 return (push_code `appOL`
2075 call `appOL`
2076 assign_code dest_regs)
2077
2078 where
2079 arg_size :: CmmType -> Int -- Width in bytes
2080 arg_size ty = widthInBytes (typeWidth ty)
2081
2082 roundTo a x | x `mod` a == 0 = x
2083 | otherwise = x + a - (x `mod` a)
2084
2085 push_arg :: Bool -> CmmActual {-current argument-}
2086 -> NatM InstrBlock -- code
2087
2088 push_arg use_sse2 arg -- we don't need the hints on x86
2089 | isWord64 arg_ty = do
2090 ChildCode64 code r_lo <- iselExpr64 arg
2091 delta <- getDeltaNat
2092 setDeltaNat (delta - 8)
2093 let
2094 r_hi = getHiVRegFromLo r_lo
2095 return ( code `appOL`
2096 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
2097 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
2098 DELTA (delta-8)]
2099 )
2100
2101 | isFloatType arg_ty = do
2102 (reg, code) <- getSomeReg arg
2103 delta <- getDeltaNat
2104 setDeltaNat (delta-size)
2105 return (code `appOL`
2106 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
2107 DELTA (delta-size),
2108 let addr = AddrBaseIndex (EABaseReg esp)
2109 EAIndexNone
2110 (ImmInt 0)
2111 size = floatSize (typeWidth arg_ty)
2112 in
2113 if use_sse2
2114 then MOV size (OpReg reg) (OpAddr addr)
2115 else GST size reg addr
2116 ]
2117 )
2118
2119 | otherwise = do
2120 (operand, code) <- getOperand arg
2121 delta <- getDeltaNat
2122 setDeltaNat (delta-size)
2123 return (code `snocOL`
2124 PUSH II32 operand `snocOL`
2125 DELTA (delta-size))
2126
2127 where
2128 arg_ty = cmmExprType dflags arg
2129 size = arg_size arg_ty -- Byte size
2130
2131 genCCall64' :: DynFlags
2132 -> ForeignTarget -- function to call
2133 -> [CmmFormal] -- where to put the result
2134 -> [CmmActual] -- arguments (of mixed type)
2135 -> NatM InstrBlock
2136 genCCall64' dflags target dest_regs args = do
2137 -- load up the register arguments
2138 let prom_args = map (maybePromoteCArg dflags W32) args
2139
2140 (stack_args, int_regs_used, fp_regs_used, load_args_code)
2141 <-
2142 if platformOS platform == OSMinGW32
2143 then load_args_win prom_args [] [] (allArgRegs platform) nilOL
2144 else do (stack_args, aregs, fregs, load_args_code)
2145 <- load_args prom_args (allIntArgRegs platform) (allFPArgRegs platform) nilOL
2146 let fp_regs_used = reverse (drop (length fregs) (reverse (allFPArgRegs platform)))
2147 int_regs_used = reverse (drop (length aregs) (reverse (allIntArgRegs platform)))
2148 return (stack_args, int_regs_used, fp_regs_used, load_args_code)
2149
2150 let
2151 arg_regs_used = int_regs_used ++ fp_regs_used
2152 arg_regs = [eax] ++ arg_regs_used
2153 -- for annotating the call instruction with
2154 sse_regs = length fp_regs_used
2155 arg_stack_slots = if platformOS platform == OSMinGW32
2156 then length stack_args + length (allArgRegs platform)
2157 else length stack_args
2158 tot_arg_size = arg_size * arg_stack_slots
2159
2160
2161 -- Align stack to 16n for calls, assuming a starting stack
2162 -- alignment of 16n - word_size on procedure entry. Which we
2163 -- maintain. See Note [rts/StgCRun.c : Stack Alignment on X86]
2164 (real_size, adjust_rsp) <-
2165 if (tot_arg_size + wORD_SIZE dflags) `rem` 16 == 0
2166 then return (tot_arg_size, nilOL)
2167 else do -- we need to adjust...
2168 delta <- getDeltaNat
2169 setDeltaNat (delta - wORD_SIZE dflags)
2170 return (tot_arg_size + wORD_SIZE dflags, toOL [
2171 SUB II64 (OpImm (ImmInt (wORD_SIZE dflags))) (OpReg rsp),
2172 DELTA (delta - wORD_SIZE dflags) ])
2173
2174 -- push the stack args, right to left
2175 push_code <- push_args (reverse stack_args) nilOL
2176 -- On Win64, we also have to leave stack space for the arguments
2177 -- that we are passing in registers
2178 lss_code <- if platformOS platform == OSMinGW32
2179 then leaveStackSpace (length (allArgRegs platform))
2180 else return nilOL
2181 delta <- getDeltaNat
2182
2183 -- deal with static vs dynamic call targets
2184 (callinsns,_cconv) <-
2185 case target of
2186 ForeignTarget (CmmLit (CmmLabel lbl)) conv
2187 -> -- ToDo: stdcall arg sizes
2188 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
2189 where fn_imm = ImmCLbl lbl
2190 ForeignTarget expr conv
2191 -> do (dyn_r, dyn_c) <- getSomeReg expr
2192 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
2193 PrimTarget _
2194 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
2195 ++ "probably because too many return values."
2196
2197 let
2198 -- The x86_64 ABI requires us to set %al to the number of SSE2
2199 -- registers that contain arguments, if the called routine
2200 -- is a varargs function. We don't know whether it's a
2201 -- varargs function or not, so we have to assume it is.
2202 --
2203 -- It's not safe to omit this assignment, even if the number
2204 -- of SSE2 regs in use is zero. If %al is larger than 8
2205 -- on entry to a varargs function, seg faults ensue.
2206 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
2207
2208 let call = callinsns `appOL`
2209 toOL (
2210 -- Deallocate parameters after call for ccall;
2211 -- stdcall has callee do it, but is not supported on
2212 -- x86_64 target (see #3336)
2213 (if real_size==0 then [] else
2214 [ADD (intSize (wordWidth dflags)) (OpImm (ImmInt real_size)) (OpReg esp)])
2215 ++
2216 [DELTA (delta + real_size)]
2217 )
2218 setDeltaNat (delta + real_size)
2219
2220 let
2221 -- assign the results, if necessary
2222 assign_code [] = nilOL
2223 assign_code [dest] =
2224 case typeWidth rep of
2225 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
2226 W64 | isFloatType rep -> unitOL (MOV (floatSize W64) (OpReg xmm0) (OpReg r_dest))
2227 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
2228 where
2229 rep = localRegType dest
2230 r_dest = getRegisterReg platform True (CmmLocal dest)
2231 assign_code _many = panic "genCCall.assign_code many"
2232
2233 return (load_args_code `appOL`
2234 adjust_rsp `appOL`
2235 push_code `appOL`
2236 lss_code `appOL`
2237 assign_eax sse_regs `appOL`
2238 call `appOL`
2239 assign_code dest_regs)
2240
2241 where platform = targetPlatform dflags
2242 arg_size = 8 -- always, at the mo
2243
2244 load_args :: [CmmExpr]
2245 -> [Reg] -- int regs avail for args
2246 -> [Reg] -- FP regs avail for args
2247 -> InstrBlock
2248 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock)
2249 load_args args [] [] code = return (args, [], [], code)
2250 -- no more regs to use
2251 load_args [] aregs fregs code = return ([], aregs, fregs, code)
2252 -- no more args to push
2253 load_args (arg : rest) aregs fregs code
2254 | isFloatType arg_rep =
2255 case fregs of
2256 [] -> push_this_arg
2257 (r:rs) -> do
2258 arg_code <- getAnyReg arg
2259 load_args rest aregs rs (code `appOL` arg_code r)
2260 | otherwise =
2261 case aregs of
2262 [] -> push_this_arg
2263 (r:rs) -> do
2264 arg_code <- getAnyReg arg
2265 load_args rest rs fregs (code `appOL` arg_code r)
2266 where
2267 arg_rep = cmmExprType dflags arg
2268
2269 push_this_arg = do
2270 (args',ars,frs,code') <- load_args rest aregs fregs code
2271 return (arg:args', ars, frs, code')
2272
2273 load_args_win :: [CmmExpr]
2274 -> [Reg] -- used int regs
2275 -> [Reg] -- used FP regs
2276 -> [(Reg, Reg)] -- (int, FP) regs avail for args
2277 -> InstrBlock
2278 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock)
2279 load_args_win args usedInt usedFP [] code
2280 = return (args, usedInt, usedFP, code)
2281 -- no more regs to use
2282 load_args_win [] usedInt usedFP _ code
2283 = return ([], usedInt, usedFP, code)
2284 -- no more args to push
2285 load_args_win (arg : rest) usedInt usedFP
2286 ((ireg, freg) : regs) code
2287 | isFloatType arg_rep = do
2288 arg_code <- getAnyReg arg
2289 load_args_win rest (ireg : usedInt) (freg : usedFP) regs
2290 (code `appOL`
2291 arg_code freg `snocOL`
2292 -- If we are calling a varargs function
2293 -- then we need to define ireg as well
2294 -- as freg
2295 MOV II64 (OpReg freg) (OpReg ireg))
2296 | otherwise = do
2297 arg_code <- getAnyReg arg
2298 load_args_win rest (ireg : usedInt) usedFP regs
2299 (code `appOL` arg_code ireg)
2300 where
2301 arg_rep = cmmExprType dflags arg
2302
2303 push_args [] code = return code
2304 push_args (arg:rest) code
2305 | isFloatType arg_rep = do
2306 (arg_reg, arg_code) <- getSomeReg arg
2307 delta <- getDeltaNat
2308 setDeltaNat (delta-arg_size)
2309 let code' = code `appOL` arg_code `appOL` toOL [
2310 SUB (intSize (wordWidth dflags)) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
2311 DELTA (delta-arg_size),
2312 MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel dflags 0))]
2313 push_args rest code'
2314
2315 | otherwise = do
2316 ASSERT(width == W64) return ()
2317 (arg_op, arg_code) <- getOperand arg
2318 delta <- getDeltaNat
2319 setDeltaNat (delta-arg_size)
2320 let code' = code `appOL` arg_code `appOL` toOL [
2321 PUSH II64 arg_op,
2322 DELTA (delta-arg_size)]
2323 push_args rest code'
2324 where
2325 arg_rep = cmmExprType dflags arg
2326 width = typeWidth arg_rep
2327
2328 leaveStackSpace n = do
2329 delta <- getDeltaNat
2330 setDeltaNat (delta - n * arg_size)
2331 return $ toOL [
2332 SUB II64 (OpImm (ImmInt (n * wORD_SIZE dflags))) (OpReg rsp),
2333 DELTA (delta - n * arg_size)]
2334
2335 maybePromoteCArg :: DynFlags -> Width -> CmmExpr -> CmmExpr
2336 maybePromoteCArg dflags wto arg
2337 | wfrom < wto = CmmMachOp (MO_UU_Conv wfrom wto) [arg]
2338 | otherwise = arg
2339 where
2340 wfrom = cmmExprWidth dflags arg
2341
2342 outOfLineCmmOp :: CallishMachOp -> Maybe CmmFormal -> [CmmActual] -> NatM InstrBlock
2343 outOfLineCmmOp mop res args
2344 = do
2345 dflags <- getDynFlags
2346 targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
2347 let target = ForeignTarget targetExpr
2348 (ForeignConvention CCallConv [] [] CmmMayReturn)
2349
2350 stmtToInstrs (CmmUnsafeForeignCall target (catMaybes [res]) args')
2351 where
2352 -- Assume we can call these functions directly, and that they're not in a dynamic library.
2353 -- TODO: Why is this ok? Under linux this code will be in libm.so
2354 -- Is is because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31
2355 lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
2356
2357 args' = case mop of
2358 MO_Memcpy -> init args
2359 MO_Memset -> init args
2360 MO_Memmove -> init args
2361 _ -> args
2362
2363 fn = case mop of
2364 MO_F32_Sqrt -> fsLit "sqrtf"
2365 MO_F32_Sin -> fsLit "sinf"
2366 MO_F32_Cos -> fsLit "cosf"
2367 MO_F32_Tan -> fsLit "tanf"
2368 MO_F32_Exp -> fsLit "expf"
2369 MO_F32_Log -> fsLit "logf"
2370
2371 MO_F32_Asin -> fsLit "asinf"
2372 MO_F32_Acos -> fsLit "acosf"
2373 MO_F32_Atan -> fsLit "atanf"
2374
2375 MO_F32_Sinh -> fsLit "sinhf"
2376 MO_F32_Cosh -> fsLit "coshf"
2377 MO_F32_Tanh -> fsLit "tanhf"
2378 MO_F32_Pwr -> fsLit "powf"
2379
2380 MO_F64_Sqrt -> fsLit "sqrt"
2381 MO_F64_Sin -> fsLit "sin"
2382 MO_F64_Cos -> fsLit "cos"
2383 MO_F64_Tan -> fsLit "tan"
2384 MO_F64_Exp -> fsLit "exp"
2385 MO_F64_Log -> fsLit "log"
2386
2387 MO_F64_Asin -> fsLit "asin"
2388 MO_F64_Acos -> fsLit "acos"
2389 MO_F64_Atan -> fsLit "atan"
2390
2391 MO_F64_Sinh -> fsLit "sinh"
2392 MO_F64_Cosh -> fsLit "cosh"
2393 MO_F64_Tanh -> fsLit "tanh"
2394 MO_F64_Pwr -> fsLit "pow"
2395
2396 MO_Memcpy -> fsLit "memcpy"
2397 MO_Memset -> fsLit "memset"
2398 MO_Memmove -> fsLit "memmove"
2399
2400 MO_PopCnt _ -> fsLit "popcnt"
2401 MO_BSwap _ -> fsLit "bswap"
2402
2403 MO_AtomicRMW _ _ -> fsLit "atomicrmw"
2404 MO_AtomicRead _ -> fsLit "atomicread"
2405 MO_AtomicWrite _ -> fsLit "atomicwrite"
2406 MO_Cmpxchg _ -> fsLit "cmpxchg"
2407
2408 MO_UF_Conv _ -> unsupported
2409
2410 MO_S_QuotRem {} -> unsupported
2411 MO_U_QuotRem {} -> unsupported
2412 MO_U_QuotRem2 {} -> unsupported
2413 MO_Add2 {} -> unsupported
2414 MO_U_Mul2 {} -> unsupported
2415 MO_WriteBarrier -> unsupported
2416 MO_Touch -> unsupported
2417 (MO_Prefetch_Data _ ) -> unsupported
2418 unsupported = panic ("outOfLineCmmOp: " ++ show mop
2419 ++ " not supported here")
2420
2421 -- -----------------------------------------------------------------------------
2422 -- Generating a table-branch
2423
2424 genSwitch :: DynFlags -> CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
2425
2426 genSwitch dflags expr ids
2427 | gopt Opt_PIC dflags
2428 = do
2429 (reg,e_code) <- getSomeReg expr
2430 lbl <- getNewLabelNat
2431 dflags <- getDynFlags
2432 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2433 (tableReg,t_code) <- getSomeReg $ dynRef
2434 let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
2435 (EAIndex reg (wORD_SIZE dflags)) (ImmInt 0))
2436
2437 return $ if target32Bit (targetPlatform dflags)
2438 then e_code `appOL` t_code `appOL` toOL [
2439 ADD (intSize (wordWidth dflags)) op (OpReg tableReg),
2440 JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
2441 ]
2442 else case platformOS (targetPlatform dflags) of
2443 OSDarwin ->
2444 -- on Mac OS X/x86_64, put the jump table
2445 -- in the text section to work around a
2446 -- limitation of the linker.
2447 -- ld64 is unable to handle the relocations for
2448 -- .quad L1 - L0
2449 -- if L0 is not preceded by a non-anonymous
2450 -- label in its section.
2451 e_code `appOL` t_code `appOL` toOL [
2452 ADD (intSize (wordWidth dflags)) op (OpReg tableReg),
2453 JMP_TBL (OpReg tableReg) ids Text lbl
2454 ]
2455 _ ->
2456 -- HACK: On x86_64 binutils<2.17 is only able
2457 -- to generate PC32 relocations, hence we only
2458 -- get 32-bit offsets in the jump table. As
2459 -- these offsets are always negative we need
2460 -- to properly sign extend them to 64-bit.
2461 -- This hack should be removed in conjunction
2462 -- with the hack in PprMach.hs/pprDataItem
2463 -- once binutils 2.17 is standard.
2464 e_code `appOL` t_code `appOL` toOL [
2465 MOVSxL II32 op (OpReg reg),
2466 ADD (intSize (wordWidth dflags)) (OpReg reg) (OpReg tableReg),
2467 JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
2468 ]
2469 | otherwise
2470 = do
2471 (reg,e_code) <- getSomeReg expr
2472 lbl <- getNewLabelNat
2473 let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg (wORD_SIZE dflags)) (ImmCLbl lbl))
2474 code = e_code `appOL` toOL [
2475 JMP_TBL op ids ReadOnlyData lbl
2476 ]
2477 return code
2478
2479 generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl (Alignment, CmmStatics) Instr)
2480 generateJumpTableForInstr dflags (JMP_TBL _ ids section lbl)
2481 = Just (createJumpTable dflags ids section lbl)
2482 generateJumpTableForInstr _ _ = Nothing
2483
2484 createJumpTable :: DynFlags -> [Maybe BlockId] -> Section -> CLabel
2485 -> GenCmmDecl (Alignment, CmmStatics) h g
2486 createJumpTable dflags ids section lbl
2487 = let jumpTable
2488 | gopt Opt_PIC dflags =
2489 let jumpTableEntryRel Nothing
2490 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
2491 jumpTableEntryRel (Just blockid)
2492 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
2493 where blockLabel = mkAsmTempLabel (getUnique blockid)
2494 in map jumpTableEntryRel ids
2495 | otherwise = map (jumpTableEntry dflags) ids
2496 in CmmData section (1, Statics lbl jumpTable)
2497
2498 -- -----------------------------------------------------------------------------
2499 -- 'condIntReg' and 'condFltReg': condition codes into registers
2500
2501 -- Turn those condition codes into integers now (when they appear on
2502 -- the right hand side of an assignment).
2503 --
2504 -- (If applicable) Do not fill the delay slots here; you will confuse the
2505 -- register allocator.
2506
2507 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2508
2509 condIntReg cond x y = do
2510 CondCode _ cond cond_code <- condIntCode cond x y
2511 tmp <- getNewRegNat II8
2512 let
2513 code dst = cond_code `appOL` toOL [
2514 SETCC cond (OpReg tmp),
2515 MOVZxL II8 (OpReg tmp) (OpReg dst)
2516 ]
2517 return (Any II32 code)
2518
2519
2520
2521 condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
2522 condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
2523 where
2524 condFltReg_x87 = do
2525 CondCode _ cond cond_code <- condFltCode cond x y
2526 tmp <- getNewRegNat II8
2527 let
2528 code dst = cond_code `appOL` toOL [
2529 SETCC cond (OpReg tmp),
2530 MOVZxL II8 (OpReg tmp) (OpReg dst)
2531 ]
2532 return (Any II32 code)
2533
2534 condFltReg_sse2 = do
2535 CondCode _ cond cond_code <- condFltCode cond x y
2536 tmp1 <- getNewRegNat (archWordSize is32Bit)
2537 tmp2 <- getNewRegNat (archWordSize is32Bit)
2538 let
2539 -- We have to worry about unordered operands (eg. comparisons
2540 -- against NaN). If the operands are unordered, the comparison
2541 -- sets the parity flag, carry flag and zero flag.
2542 -- All comparisons are supposed to return false for unordered
2543 -- operands except for !=, which returns true.
2544 --
2545 -- Optimisation: we don't have to test the parity flag if we
2546 -- know the test has already excluded the unordered case: eg >
2547 -- and >= test for a zero carry flag, which can only occur for
2548 -- ordered operands.
2549 --
2550 -- ToDo: by reversing comparisons we could avoid testing the
2551 -- parity flag in more cases.
2552
2553 code dst =
2554 cond_code `appOL`
2555 (case cond of
2556 NE -> or_unordered dst
2557 GU -> plain_test dst
2558 GEU -> plain_test dst
2559 _ -> and_ordered dst)
2560
2561 plain_test dst = toOL [
2562 SETCC cond (OpReg tmp1),
2563 MOVZxL II8 (OpReg tmp1) (OpReg dst)
2564 ]
2565 or_unordered dst = toOL [
2566 SETCC cond (OpReg tmp1),
2567 SETCC PARITY (OpReg tmp2),
2568 OR II8 (OpReg tmp1) (OpReg tmp2),
2569 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2570 ]
2571 and_ordered dst = toOL [
2572 SETCC cond (OpReg tmp1),
2573 SETCC NOTPARITY (OpReg tmp2),
2574 AND II8 (OpReg tmp1) (OpReg tmp2),
2575 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2576 ]
2577 return (Any II32 code)
2578
2579
2580 -- -----------------------------------------------------------------------------
2581 -- 'trivial*Code': deal with trivial instructions
2582
2583 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2584 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2585 -- Only look for constants on the right hand side, because that's
2586 -- where the generic optimizer will have put them.
2587
2588 -- Similarly, for unary instructions, we don't have to worry about
2589 -- matching an StInt as the argument, because genericOpt will already
2590 -- have handled the constant-folding.
2591
2592
2593 {-
2594 The Rules of the Game are:
2595
2596 * You cannot assume anything about the destination register dst;
2597 it may be anything, including a fixed reg.
2598
2599 * You may compute an operand into a fixed reg, but you may not
2600 subsequently change the contents of that fixed reg. If you
2601 want to do so, first copy the value either to a temporary
2602 or into dst. You are free to modify dst even if it happens
2603 to be a fixed reg -- that's not your problem.
2604
2605 * You cannot assume that a fixed reg will stay live over an
2606 arbitrary computation. The same applies to the dst reg.
2607
2608 * Temporary regs obtained from getNewRegNat are distinct from
2609 each other and from all other regs, and stay live over
2610 arbitrary computations.
2611
2612 --------------------
2613
2614 SDM's version of The Rules:
2615
2616 * If getRegister returns Any, that means it can generate correct
2617 code which places the result in any register, period. Even if that
2618 register happens to be read during the computation.
2619
2620 Corollary #1: this means that if you are generating code for an
2621 operation with two arbitrary operands, you cannot assign the result
2622 of the first operand into the destination register before computing
2623 the second operand. The second operand might require the old value
2624 of the destination register.
2625
2626 Corollary #2: A function might be able to generate more efficient
2627 code if it knows the destination register is a new temporary (and
2628 therefore not read by any of the sub-computations).
2629
2630 * If getRegister returns Any, then the code it generates may modify only:
2631 (a) fresh temporaries
2632 (b) the destination register
2633 (c) known registers (eg. %ecx is used by shifts)
2634 In particular, it may *not* modify global registers, unless the global
2635 register happens to be the destination register.
2636 -}
2637
2638 trivialCode :: Width -> (Operand -> Operand -> Instr)
2639 -> Maybe (Operand -> Operand -> Instr)
2640 -> CmmExpr -> CmmExpr -> NatM Register
2641 trivialCode width instr m a b
2642 = do is32Bit <- is32BitPlatform
2643 trivialCode' is32Bit width instr m a b
2644
2645 trivialCode' :: Bool -> Width -> (Operand -> Operand -> Instr)
2646 -> Maybe (Operand -> Operand -> Instr)
2647 -> CmmExpr -> CmmExpr -> NatM Register
2648 trivialCode' is32Bit width _ (Just revinstr) (CmmLit lit_a) b
2649 | is32BitLit is32Bit lit_a = do
2650 b_code <- getAnyReg b
2651 let
2652 code dst
2653 = b_code dst `snocOL`
2654 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
2655 return (Any (intSize width) code)
2656
2657 trivialCode' _ width instr _ a b
2658 = genTrivialCode (intSize width) instr a b
2659
2660 -- This is re-used for floating pt instructions too.
2661 genTrivialCode :: Size -> (Operand -> Operand -> Instr)
2662 -> CmmExpr -> CmmExpr -> NatM Register
2663 genTrivialCode rep instr a b = do
2664 (b_op, b_code) <- getNonClobberedOperand b
2665 a_code <- getAnyReg a
2666 tmp <- getNewRegNat rep
2667 let
2668 -- We want the value of b to stay alive across the computation of a.
2669 -- But, we want to calculate a straight into the destination register,
2670 -- because the instruction only has two operands (dst := dst `op` src).
2671 -- The troublesome case is when the result of b is in the same register
2672 -- as the destination reg. In this case, we have to save b in a
2673 -- new temporary across the computation of a.
2674 code dst
2675 | dst `regClashesWithOp` b_op =
2676 b_code `appOL`
2677 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
2678 a_code dst `snocOL`
2679 instr (OpReg tmp) (OpReg dst)
2680 | otherwise =
2681 b_code `appOL`
2682 a_code dst `snocOL`
2683 instr b_op (OpReg dst)
2684 return (Any rep code)
2685
2686 regClashesWithOp :: Reg -> Operand -> Bool
2687 reg `regClashesWithOp` OpReg reg2 = reg == reg2
2688 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
2689 _ `regClashesWithOp` _ = False
2690
2691 -----------
2692
2693 trivialUCode :: Size -> (Operand -> Instr)
2694 -> CmmExpr -> NatM Register
2695 trivialUCode rep instr x = do
2696 x_code <- getAnyReg x
2697 let
2698 code dst =
2699 x_code dst `snocOL`
2700 instr (OpReg dst)
2701 return (Any rep code)
2702
2703 -----------
2704
2705 trivialFCode_x87 :: (Size -> Reg -> Reg -> Reg -> Instr)
2706 -> CmmExpr -> CmmExpr -> NatM Register
2707 trivialFCode_x87 instr x y = do
2708 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
2709 (y_reg, y_code) <- getSomeReg y
2710 let
2711 size = FF80 -- always, on x87
2712 code dst =
2713 x_code `appOL`
2714 y_code `snocOL`
2715 instr size x_reg y_reg dst
2716 return (Any size code)
2717
2718 trivialFCode_sse2 :: Width -> (Size -> Operand -> Operand -> Instr)
2719 -> CmmExpr -> CmmExpr -> NatM Register
2720 trivialFCode_sse2 pk instr x y
2721 = genTrivialCode size (instr size) x y
2722 where size = floatSize pk
2723
2724
2725 trivialUFCode :: Size -> (Reg -> Reg -> Instr) -> CmmExpr -> NatM Register
2726 trivialUFCode size instr x = do
2727 (x_reg, x_code) <- getSomeReg x
2728 let
2729 code dst =
2730 x_code `snocOL`
2731 instr x_reg dst
2732 return (Any size code)
2733
2734
2735 --------------------------------------------------------------------------------
2736 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2737 coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
2738 where
2739 coerce_x87 = do
2740 (x_reg, x_code) <- getSomeReg x
2741 let
2742 opc = case to of W32 -> GITOF; W64 -> GITOD;
2743 n -> panic $ "coerceInt2FP.x87: unhandled width ("
2744 ++ show n ++ ")"
2745 code dst = x_code `snocOL` opc x_reg dst
2746 -- ToDo: works for non-II32 reps?
2747 return (Any FF80 code)
2748
2749 coerce_sse2 = do
2750 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2751 let
2752 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
2753 n -> panic $ "coerceInt2FP.sse: unhandled width ("
2754 ++ show n ++ ")"
2755 code dst = x_code `snocOL` opc (intSize from) x_op dst
2756 return (Any (floatSize to) code)
2757 -- works even if the destination rep is <II32
2758
2759 --------------------------------------------------------------------------------
2760 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2761 coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
2762 where
2763 coerceFP2Int_x87 = do
2764 (x_reg, x_code) <- getSomeReg x
2765 let
2766 opc = case from of W32 -> GFTOI; W64 -> GDTOI
2767 n -> panic $ "coerceFP2Int.x87: unhandled width ("
2768 ++ show n ++ ")"
2769 code dst = x_code `snocOL` opc x_reg dst
2770 -- ToDo: works for non-II32 reps?
2771 return (Any (intSize to) code)
2772
2773 coerceFP2Int_sse2 = do
2774 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2775 let
2776 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ;
2777 n -> panic $ "coerceFP2Init.sse: unhandled width ("
2778 ++ show n ++ ")"
2779 code dst = x_code `snocOL` opc (intSize to) x_op dst
2780 return (Any (intSize to) code)
2781 -- works even if the destination rep is <II32
2782
2783
2784 --------------------------------------------------------------------------------
2785 coerceFP2FP :: Width -> CmmExpr -> NatM Register
2786 coerceFP2FP to x = do
2787 use_sse2 <- sse2Enabled
2788 (x_reg, x_code) <- getSomeReg x
2789 let
2790 opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
2791 n -> panic $ "coerceFP2FP: unhandled width ("
2792 ++ show n ++ ")"
2793 | otherwise = GDTOF
2794 code dst = x_code `snocOL` opc x_reg dst
2795 return (Any (if use_sse2 then floatSize to else FF80) code)
2796
2797 --------------------------------------------------------------------------------
2798
2799 sse2NegCode :: Width -> CmmExpr -> NatM Register
2800 sse2NegCode w x = do
2801 let sz = floatSize w
2802 x_code <- getAnyReg x
2803 -- This is how gcc does it, so it can't be that bad:
2804 let
2805 const | FF32 <- sz = CmmInt 0x80000000 W32
2806 | otherwise = CmmInt 0x8000000000000000 W64
2807 Amode amode amode_code <- memConstant (widthInBytes w) const
2808 tmp <- getNewRegNat sz
2809 let
2810 code dst = x_code dst `appOL` amode_code `appOL` toOL [
2811 MOV sz (OpAddr amode) (OpReg tmp),
2812 XOR sz (OpReg tmp) (OpReg dst)
2813 ]
2814 --
2815 return (Any sz code)
2816
2817 isVecExpr :: CmmExpr -> Bool
2818 isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True
2819 isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True
2820 isVecExpr (CmmMachOp (MO_V_Add {}) _) = True
2821 isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True
2822 isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True
2823 isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True
2824 isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True
2825 isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True
2826 isVecExpr (CmmMachOp (MO_VF_Insert {}) _) = True
2827 isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True
2828 isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
2829 isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True
2830 isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True
2831 isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True
2832 isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True
2833 isVecExpr (CmmMachOp _ [e]) = isVecExpr e
2834 isVecExpr _ = False
2835
2836 needLlvm :: NatM a
2837 needLlvm =
2838 sorry $ unlines ["The native code generator does not support vector"
2839 ,"instructions. Please use -fllvm."]