0df95a2f732120f15fc8e1f57732eaebec16150c
[ghc.git] / compiler / nativeGen / X86 / CodeGen.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Generating machine code (instruction selection)
4 --
5 -- (c) The University of Glasgow 1996-2004
6 --
7 -----------------------------------------------------------------------------
8
9 -- This is a big module, but, if you pay attention to
10 -- (a) the sectioning, and (b) the type signatures, the
11 -- structure should not be too overwhelming.
12
13 {-# LANGUAGE GADTs #-}
14 module X86.CodeGen (
15 cmmTopCodeGen,
16 generateJumpTableForInstr,
17 InstrBlock
18 )
19
20 where
21
22 #include "HsVersions.h"
23 #include "nativeGen/NCG.h"
24 #include "../includes/MachDeps.h"
25
26 -- NCG stuff:
27 import X86.Instr
28 import X86.Cond
29 import X86.Regs
30 import X86.RegInfo
31 import CodeGen.Platform
32 import CPrim
33 import Instruction
34 import PIC
35 import NCGMonad
36 import Size
37 import Reg
38 import Platform
39
40 -- Our intermediate code:
41 import BasicTypes
42 import BlockId
43 import Module ( primPackageId )
44 import PprCmm ()
45 import CmmUtils
46 import Cmm
47 import Hoopl
48 import CLabel
49
50 -- The rest:
51 import ForeignCall ( CCallConv(..) )
52 import OrdList
53 import Outputable
54 import Unique
55 import FastString
56 import FastBool ( isFastTrue )
57 import DynFlags
58 import Util
59
60 import Control.Monad
61 import Data.Bits
62 import Data.Int
63 import Data.Maybe
64 import Data.Word
65
66 is32BitPlatform :: NatM Bool
67 is32BitPlatform = do
68 dflags <- getDynFlags
69 return $ target32Bit (targetPlatform dflags)
70
71 sse2Enabled :: NatM Bool
72 sse2Enabled = do
73 dflags <- getDynFlags
74 return (isSse2Enabled dflags)
75
76 sse4_2Enabled :: NatM Bool
77 sse4_2Enabled = do
78 dflags <- getDynFlags
79 return (isSse4_2Enabled dflags)
80
81 if_sse2 :: NatM a -> NatM a -> NatM a
82 if_sse2 sse2 x87 = do
83 b <- sse2Enabled
84 if b then sse2 else x87
85
86 cmmTopCodeGen
87 :: RawCmmDecl
88 -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
89
90 cmmTopCodeGen (CmmProc info lab live graph) = do
91 let blocks = toBlockListEntryFirst graph
92 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
93 picBaseMb <- getPicBaseMaybeNat
94 dflags <- getDynFlags
95 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
96 tops = proc : concat statics
97 os = platformOS $ targetPlatform dflags
98
99 case picBaseMb of
100 Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
101 Nothing -> return tops
102
103 cmmTopCodeGen (CmmData sec dat) = do
104 return [CmmData sec (1, dat)] -- no translation, we just use CmmStatic
105
106
107 basicBlockCodeGen
108 :: CmmBlock
109 -> NatM ( [NatBasicBlock Instr]
110 , [NatCmmDecl (Alignment, CmmStatics) Instr])
111
112 basicBlockCodeGen block = do
113 let (CmmEntry id, nodes, tail) = blockSplit block
114 stmts = blockToList nodes
115 mid_instrs <- stmtsToInstrs stmts
116 tail_instrs <- stmtToInstrs tail
117 let instrs = mid_instrs `appOL` tail_instrs
118 -- code generation may introduce new basic block boundaries, which
119 -- are indicated by the NEWBLOCK instruction. We must split up the
120 -- instruction stream into basic blocks again. Also, we extract
121 -- LDATAs here too.
122 let
123 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
124
125 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
126 = ([], BasicBlock id instrs : blocks, statics)
127 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
128 = (instrs, blocks, CmmData sec dat:statics)
129 mkBlocks instr (instrs,blocks,statics)
130 = (instr:instrs, blocks, statics)
131 return (BasicBlock id top : other_blocks, statics)
132
133
134 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
135 stmtsToInstrs stmts
136 = do instrss <- mapM stmtToInstrs stmts
137 return (concatOL instrss)
138
139
140 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
141 stmtToInstrs stmt = do
142 dflags <- getDynFlags
143 is32Bit <- is32BitPlatform
144 case stmt of
145 CmmComment s -> return (unitOL (COMMENT s))
146
147 CmmAssign reg src
148 | isFloatType ty -> assignReg_FltCode size reg src
149 | is32Bit && isWord64 ty -> assignReg_I64Code reg src
150 | otherwise -> assignReg_IntCode size reg src
151 where ty = cmmRegType dflags reg
152 size = cmmTypeSize ty
153
154 CmmStore addr src
155 | isFloatType ty -> assignMem_FltCode size addr src
156 | is32Bit && isWord64 ty -> assignMem_I64Code addr src
157 | otherwise -> assignMem_IntCode size addr src
158 where ty = cmmExprType dflags src
159 size = cmmTypeSize ty
160
161 CmmUnsafeForeignCall target result_regs args
162 -> genCCall is32Bit target result_regs args
163
164 CmmBranch id -> genBranch id
165 CmmCondBranch arg true false -> do b1 <- genCondJump true arg
166 b2 <- genBranch false
167 return (b1 `appOL` b2)
168 CmmSwitch arg ids -> do dflags <- getDynFlags
169 genSwitch dflags arg ids
170 CmmCall { cml_target = arg
171 , cml_args_regs = gregs } -> do
172 dflags <- getDynFlags
173 genJump arg (jumpRegs dflags gregs)
174 _ ->
175 panic "stmtToInstrs: statement should have been cps'd away"
176
177
178 jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
179 jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
180 where platform = targetPlatform dflags
181
182 --------------------------------------------------------------------------------
183 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
184 -- They are really trees of insns to facilitate fast appending, where a
185 -- left-to-right traversal yields the insns in the correct order.
186 --
187 type InstrBlock
188 = OrdList Instr
189
190
191 -- | Condition codes passed up the tree.
192 --
193 data CondCode
194 = CondCode Bool Cond InstrBlock
195
196
197 -- | a.k.a "Register64"
198 -- Reg is the lower 32-bit temporary which contains the result.
199 -- Use getHiVRegFromLo to find the other VRegUnique.
200 --
201 -- Rules of this simplified insn selection game are therefore that
202 -- the returned Reg may be modified
203 --
204 data ChildCode64
205 = ChildCode64
206 InstrBlock
207 Reg
208
209
210 -- | Register's passed up the tree. If the stix code forces the register
211 -- to live in a pre-decided machine register, it comes out as @Fixed@;
212 -- otherwise, it comes out as @Any@, and the parent can decide which
213 -- register to put it in.
214 --
215 data Register
216 = Fixed Size Reg InstrBlock
217 | Any Size (Reg -> InstrBlock)
218
219
220 swizzleRegisterRep :: Register -> Size -> Register
221 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
222 swizzleRegisterRep (Any _ codefn) size = Any size codefn
223
224
225 -- | Grab the Reg for a CmmReg
226 getRegisterReg :: Platform -> Bool -> CmmReg -> Reg
227
228 getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk))
229 = let sz = cmmTypeSize pk in
230 if isFloatSize sz && not use_sse2
231 then RegVirtual (mkVirtualReg u FF80)
232 else RegVirtual (mkVirtualReg u sz)
233
234 getRegisterReg platform _ (CmmGlobal mid)
235 = case globalRegMaybe platform mid of
236 Just reg -> RegReal $ reg
237 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
238 -- By this stage, the only MagicIds remaining should be the
239 -- ones which map to a real machine register on this
240 -- platform. Hence ...
241
242
243 -- | Memory addressing modes passed up the tree.
244 data Amode
245 = Amode AddrMode InstrBlock
246
247 {-
248 Now, given a tree (the argument to an CmmLoad) that references memory,
249 produce a suitable addressing mode.
250
251 A Rule of the Game (tm) for Amodes: use of the addr bit must
252 immediately follow use of the code part, since the code part puts
253 values in registers which the addr then refers to. So you can't put
254 anything in between, lest it overwrite some of those registers. If
255 you need to do some other computation between the code part and use of
256 the addr bit, first store the effective address from the amode in a
257 temporary, then do the other computation, and then use the temporary:
258
259 code
260 LEA amode, tmp
261 ... other computation ...
262 ... (tmp) ...
263 -}
264
265
266 -- | Check whether an integer will fit in 32 bits.
267 -- A CmmInt is intended to be truncated to the appropriate
268 -- number of bits, so here we truncate it to Int64. This is
269 -- important because e.g. -1 as a CmmInt might be either
270 -- -1 or 18446744073709551615.
271 --
272 is32BitInteger :: Integer -> Bool
273 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
274 where i64 = fromIntegral i :: Int64
275
276
277 -- | Convert a BlockId to some CmmStatic data
278 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
279 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
280 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
281 where blockLabel = mkAsmTempLabel (getUnique blockid)
282
283
284 -- -----------------------------------------------------------------------------
285 -- General things for putting together code sequences
286
287 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
288 -- CmmExprs into CmmRegOff?
289 mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
290 mangleIndexTree dflags reg off
291 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
292 where width = typeWidth (cmmRegType dflags reg)
293
294 -- | The dual to getAnyReg: compute an expression into a register, but
295 -- we don't mind which one it is.
296 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
297 getSomeReg expr = do
298 r <- getRegister expr
299 case r of
300 Any rep code -> do
301 tmp <- getNewRegNat rep
302 return (tmp, code tmp)
303 Fixed _ reg code ->
304 return (reg, code)
305
306
307 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
308 assignMem_I64Code addrTree valueTree = do
309 Amode addr addr_code <- getAmode addrTree
310 ChildCode64 vcode rlo <- iselExpr64 valueTree
311 let
312 rhi = getHiVRegFromLo rlo
313
314 -- Little-endian store
315 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
316 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
317 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
318
319
320 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
321 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
322 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
323 let
324 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
325 r_dst_hi = getHiVRegFromLo r_dst_lo
326 r_src_hi = getHiVRegFromLo r_src_lo
327 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
328 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
329 return (
330 vcode `snocOL` mov_lo `snocOL` mov_hi
331 )
332
333 assignReg_I64Code _ _
334 = panic "assignReg_I64Code(i386): invalid lvalue"
335
336
337 iselExpr64 :: CmmExpr -> NatM ChildCode64
338 iselExpr64 (CmmLit (CmmInt i _)) = do
339 (rlo,rhi) <- getNewRegPairNat II32
340 let
341 r = fromIntegral (fromIntegral i :: Word32)
342 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
343 code = toOL [
344 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
345 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
346 ]
347 return (ChildCode64 code rlo)
348
349 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
350 Amode addr addr_code <- getAmode addrTree
351 (rlo,rhi) <- getNewRegPairNat II32
352 let
353 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
354 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
355 return (
356 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
357 rlo
358 )
359
360 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
361 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
362
363 -- we handle addition, but rather badly
364 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
365 ChildCode64 code1 r1lo <- iselExpr64 e1
366 (rlo,rhi) <- getNewRegPairNat II32
367 let
368 r = fromIntegral (fromIntegral i :: Word32)
369 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
370 r1hi = getHiVRegFromLo r1lo
371 code = code1 `appOL`
372 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
373 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
374 MOV II32 (OpReg r1hi) (OpReg rhi),
375 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
376 return (ChildCode64 code rlo)
377
378 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
379 ChildCode64 code1 r1lo <- iselExpr64 e1
380 ChildCode64 code2 r2lo <- iselExpr64 e2
381 (rlo,rhi) <- getNewRegPairNat II32
382 let
383 r1hi = getHiVRegFromLo r1lo
384 r2hi = getHiVRegFromLo r2lo
385 code = code1 `appOL`
386 code2 `appOL`
387 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
388 ADD II32 (OpReg r2lo) (OpReg rlo),
389 MOV II32 (OpReg r1hi) (OpReg rhi),
390 ADC II32 (OpReg r2hi) (OpReg rhi) ]
391 return (ChildCode64 code rlo)
392
393 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
394 fn <- getAnyReg expr
395 r_dst_lo <- getNewRegNat II32
396 let r_dst_hi = getHiVRegFromLo r_dst_lo
397 code = fn r_dst_lo
398 return (
399 ChildCode64 (code `snocOL`
400 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
401 r_dst_lo
402 )
403
404 iselExpr64 expr
405 = pprPanic "iselExpr64(i386)" (ppr expr)
406
407
408 --------------------------------------------------------------------------------
409 getRegister :: CmmExpr -> NatM Register
410 getRegister e = do dflags <- getDynFlags
411 is32Bit <- is32BitPlatform
412 getRegister' dflags is32Bit e
413
414 getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
415
416 getRegister' dflags is32Bit (CmmReg reg)
417 = case reg of
418 CmmGlobal PicBaseReg
419 | is32Bit ->
420 -- on x86_64, we have %rip for PicBaseReg, but it's not
421 -- a full-featured register, it can only be used for
422 -- rip-relative addressing.
423 do reg' <- getPicBaseNat (archWordSize is32Bit)
424 return (Fixed (archWordSize is32Bit) reg' nilOL)
425 _ ->
426 do use_sse2 <- sse2Enabled
427 let
428 sz = cmmTypeSize (cmmRegType dflags reg)
429 size | not use_sse2 && isFloatSize sz = FF80
430 | otherwise = sz
431 --
432 let platform = targetPlatform dflags
433 return (Fixed size (getRegisterReg platform use_sse2 reg) nilOL)
434
435
436 getRegister' dflags is32Bit (CmmRegOff r n)
437 = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
438
439 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
440 -- TO_W_(x), TO_W_(x >> 32)
441
442 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
443 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
444 | is32Bit = do
445 ChildCode64 code rlo <- iselExpr64 x
446 return $ Fixed II32 (getHiVRegFromLo rlo) code
447
448 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
449 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
450 | is32Bit = do
451 ChildCode64 code rlo <- iselExpr64 x
452 return $ Fixed II32 (getHiVRegFromLo rlo) code
453
454 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
455 | is32Bit = do
456 ChildCode64 code rlo <- iselExpr64 x
457 return $ Fixed II32 rlo code
458
459 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
460 | is32Bit = do
461 ChildCode64 code rlo <- iselExpr64 x
462 return $ Fixed II32 rlo code
463
464 getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
465 if_sse2 float_const_sse2 float_const_x87
466 where
467 float_const_sse2
468 | f == 0.0 = do
469 let
470 size = floatSize w
471 code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
472 -- I don't know why there are xorpd, xorps, and pxor instructions.
473 -- They all appear to do the same thing --SDM
474 return (Any size code)
475
476 | otherwise = do
477 Amode addr code <- memConstant (widthInBytes w) lit
478 loadFloatAmode True w addr code
479
480 float_const_x87 = case w of
481 W64
482 | f == 0.0 ->
483 let code dst = unitOL (GLDZ dst)
484 in return (Any FF80 code)
485
486 | f == 1.0 ->
487 let code dst = unitOL (GLD1 dst)
488 in return (Any FF80 code)
489
490 _otherwise -> do
491 Amode addr code <- memConstant (widthInBytes w) lit
492 loadFloatAmode False w addr code
493
494 -- catch simple cases of zero- or sign-extended load
495 getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
496 code <- intLoadCode (MOVZxL II8) addr
497 return (Any II32 code)
498
499 getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
500 code <- intLoadCode (MOVSxL II8) addr
501 return (Any II32 code)
502
503 getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
504 code <- intLoadCode (MOVZxL II16) addr
505 return (Any II32 code)
506
507 getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
508 code <- intLoadCode (MOVSxL II16) addr
509 return (Any II32 code)
510
511 -- catch simple cases of zero- or sign-extended load
512 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
513 | not is32Bit = do
514 code <- intLoadCode (MOVZxL II8) addr
515 return (Any II64 code)
516
517 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
518 | not is32Bit = do
519 code <- intLoadCode (MOVSxL II8) addr
520 return (Any II64 code)
521
522 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
523 | not is32Bit = do
524 code <- intLoadCode (MOVZxL II16) addr
525 return (Any II64 code)
526
527 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
528 | not is32Bit = do
529 code <- intLoadCode (MOVSxL II16) addr
530 return (Any II64 code)
531
532 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
533 | not is32Bit = do
534 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
535 return (Any II64 code)
536
537 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
538 | not is32Bit = do
539 code <- intLoadCode (MOVSxL II32) addr
540 return (Any II64 code)
541
542 getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
543 CmmLit displacement])
544 | not is32Bit = do
545 return $ Any II64 (\dst -> unitOL $
546 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
547
548 getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
549 sse2 <- sse2Enabled
550 case mop of
551 MO_F_Neg w
552 | sse2 -> sse2NegCode w x
553 | otherwise -> trivialUFCode FF80 (GNEG FF80) x
554
555 MO_S_Neg w -> triv_ucode NEGI (intSize w)
556 MO_Not w -> triv_ucode NOT (intSize w)
557
558 -- Nop conversions
559 MO_UU_Conv W32 W8 -> toI8Reg W32 x
560 MO_SS_Conv W32 W8 -> toI8Reg W32 x
561 MO_UU_Conv W16 W8 -> toI8Reg W16 x
562 MO_SS_Conv W16 W8 -> toI8Reg W16 x
563 MO_UU_Conv W32 W16 -> toI16Reg W32 x
564 MO_SS_Conv W32 W16 -> toI16Reg W32 x
565
566 MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
567 MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
568 MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
569 MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
570 MO_UU_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
571 MO_SS_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
572
573 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
574 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
575
576 -- widenings
577 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
578 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
579 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
580
581 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
582 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
583 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
584
585 MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x
586 MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
587 MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
588 MO_SS_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVSxL x
589 MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
590 MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
591 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
592 -- However, we don't want the register allocator to throw it
593 -- away as an unnecessary reg-to-reg move, so we keep it in
594 -- the form of a movzl and print it as a movl later.
595
596 MO_FF_Conv W32 W64
597 | sse2 -> coerceFP2FP W64 x
598 | otherwise -> conversionNop FF80 x
599
600 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
601
602 MO_FS_Conv from to -> coerceFP2Int from to x
603 MO_SF_Conv from to -> coerceInt2FP from to x
604
605 MO_V_Insert {} -> needLlvm
606 MO_V_Extract {} -> needLlvm
607 MO_V_Add {} -> needLlvm
608 MO_V_Sub {} -> needLlvm
609 MO_V_Mul {} -> needLlvm
610 MO_VS_Quot {} -> needLlvm
611 MO_VS_Rem {} -> needLlvm
612 MO_VS_Neg {} -> needLlvm
613 MO_VF_Add {} -> needLlvm
614 MO_VF_Sub {} -> needLlvm
615 MO_VF_Mul {} -> needLlvm
616 MO_VF_Quot {} -> needLlvm
617 MO_VF_Neg {} -> needLlvm
618
619 _other -> pprPanic "getRegister" (pprMachOp mop)
620 where
621 triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
622 triv_ucode instr size = trivialUCode size (instr size) x
623
624 -- signed or unsigned extension.
625 integerExtend :: Width -> Width
626 -> (Size -> Operand -> Operand -> Instr)
627 -> CmmExpr -> NatM Register
628 integerExtend from to instr expr = do
629 (reg,e_code) <- if from == W8 then getByteReg expr
630 else getSomeReg expr
631 let
632 code dst =
633 e_code `snocOL`
634 instr (intSize from) (OpReg reg) (OpReg dst)
635 return (Any (intSize to) code)
636
637 toI8Reg :: Width -> CmmExpr -> NatM Register
638 toI8Reg new_rep expr
639 = do codefn <- getAnyReg expr
640 return (Any (intSize new_rep) codefn)
641 -- HACK: use getAnyReg to get a byte-addressable register.
642 -- If the source was a Fixed register, this will add the
643 -- mov instruction to put it into the desired destination.
644 -- We're assuming that the destination won't be a fixed
645 -- non-byte-addressable register; it won't be, because all
646 -- fixed registers are word-sized.
647
648 toI16Reg = toI8Reg -- for now
649
650 conversionNop :: Size -> CmmExpr -> NatM Register
651 conversionNop new_size expr
652 = do e_code <- getRegister' dflags is32Bit expr
653 return (swizzleRegisterRep e_code new_size)
654
655
656 getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
657 sse2 <- sse2Enabled
658 case mop of
659 MO_F_Eq _ -> condFltReg is32Bit EQQ x y
660 MO_F_Ne _ -> condFltReg is32Bit NE x y
661 MO_F_Gt _ -> condFltReg is32Bit GTT x y
662 MO_F_Ge _ -> condFltReg is32Bit GE x y
663 MO_F_Lt _ -> condFltReg is32Bit LTT x y
664 MO_F_Le _ -> condFltReg is32Bit LE x y
665
666 MO_Eq _ -> condIntReg EQQ x y
667 MO_Ne _ -> condIntReg NE x y
668
669 MO_S_Gt _ -> condIntReg GTT x y
670 MO_S_Ge _ -> condIntReg GE x y
671 MO_S_Lt _ -> condIntReg LTT x y
672 MO_S_Le _ -> condIntReg LE x y
673
674 MO_U_Gt _ -> condIntReg GU x y
675 MO_U_Ge _ -> condIntReg GEU x y
676 MO_U_Lt _ -> condIntReg LU x y
677 MO_U_Le _ -> condIntReg LEU x y
678
679 MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y
680 | otherwise -> trivialFCode_x87 GADD x y
681 MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y
682 | otherwise -> trivialFCode_x87 GSUB x y
683 MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y
684 | otherwise -> trivialFCode_x87 GDIV x y
685 MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y
686 | otherwise -> trivialFCode_x87 GMUL x y
687
688 MO_Add rep -> add_code rep x y
689 MO_Sub rep -> sub_code rep x y
690
691 MO_S_Quot rep -> div_code rep True True x y
692 MO_S_Rem rep -> div_code rep True False x y
693 MO_U_Quot rep -> div_code rep False True x y
694 MO_U_Rem rep -> div_code rep False False x y
695
696 MO_S_MulMayOflo rep -> imulMayOflo rep x y
697
698 MO_Mul rep -> triv_op rep IMUL
699 MO_And rep -> triv_op rep AND
700 MO_Or rep -> triv_op rep OR
701 MO_Xor rep -> triv_op rep XOR
702
703 {- Shift ops on x86s have constraints on their source, it
704 either has to be Imm, CL or 1
705 => trivialCode is not restrictive enough (sigh.)
706 -}
707 MO_Shl rep -> shift_code rep SHL x y {-False-}
708 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
709 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
710
711 MO_V_Insert {} -> needLlvm
712 MO_V_Extract {} -> needLlvm
713 MO_V_Add {} -> needLlvm
714 MO_V_Sub {} -> needLlvm
715 MO_V_Mul {} -> needLlvm
716 MO_VS_Quot {} -> needLlvm
717 MO_VS_Rem {} -> needLlvm
718 MO_VS_Neg {} -> needLlvm
719 MO_VF_Add {} -> needLlvm
720 MO_VF_Sub {} -> needLlvm
721 MO_VF_Mul {} -> needLlvm
722 MO_VF_Quot {} -> needLlvm
723 MO_VF_Neg {} -> needLlvm
724
725 _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
726 where
727 --------------------
728 triv_op width instr = trivialCode width op (Just op) x y
729 where op = instr (intSize width)
730
731 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
732 imulMayOflo rep a b = do
733 (a_reg, a_code) <- getNonClobberedReg a
734 b_code <- getAnyReg b
735 let
736 shift_amt = case rep of
737 W32 -> 31
738 W64 -> 63
739 _ -> panic "shift_amt"
740
741 size = intSize rep
742 code = a_code `appOL` b_code eax `appOL`
743 toOL [
744 IMUL2 size (OpReg a_reg), -- result in %edx:%eax
745 SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
746 -- sign extend lower part
747 SUB size (OpReg edx) (OpReg eax)
748 -- compare against upper
749 -- eax==0 if high part == sign extended low part
750 ]
751 return (Fixed size eax code)
752
753 --------------------
754 shift_code :: Width
755 -> (Size -> Operand -> Operand -> Instr)
756 -> CmmExpr
757 -> CmmExpr
758 -> NatM Register
759
760 {- Case1: shift length as immediate -}
761 shift_code width instr x (CmmLit lit) = do
762 x_code <- getAnyReg x
763 let
764 size = intSize width
765 code dst
766 = x_code dst `snocOL`
767 instr size (OpImm (litToImm lit)) (OpReg dst)
768 return (Any size code)
769
770 {- Case2: shift length is complex (non-immediate)
771 * y must go in %ecx.
772 * we cannot do y first *and* put its result in %ecx, because
773 %ecx might be clobbered by x.
774 * if we do y second, then x cannot be
775 in a clobbered reg. Also, we cannot clobber x's reg
776 with the instruction itself.
777 * so we can either:
778 - do y first, put its result in a fresh tmp, then copy it to %ecx later
779 - do y second and put its result into %ecx. x gets placed in a fresh
780 tmp. This is likely to be better, because the reg alloc can
781 eliminate this reg->reg move here (it won't eliminate the other one,
782 because the move is into the fixed %ecx).
783 -}
784 shift_code width instr x y{-amount-} = do
785 x_code <- getAnyReg x
786 let size = intSize width
787 tmp <- getNewRegNat size
788 y_code <- getAnyReg y
789 let
790 code = x_code tmp `appOL`
791 y_code ecx `snocOL`
792 instr size (OpReg ecx) (OpReg tmp)
793 return (Fixed size tmp code)
794
795 --------------------
796 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
797 add_code rep x (CmmLit (CmmInt y _))
798 | is32BitInteger y = add_int rep x y
799 add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
800 where size = intSize rep
801
802 --------------------
803 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
804 sub_code rep x (CmmLit (CmmInt y _))
805 | is32BitInteger (-y) = add_int rep x (-y)
806 sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
807
808 -- our three-operand add instruction:
809 add_int width x y = do
810 (x_reg, x_code) <- getSomeReg x
811 let
812 size = intSize width
813 imm = ImmInt (fromInteger y)
814 code dst
815 = x_code `snocOL`
816 LEA size
817 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
818 (OpReg dst)
819 --
820 return (Any size code)
821
822 ----------------------
823 div_code width signed quotient x y = do
824 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
825 x_code <- getAnyReg x
826 let
827 size = intSize width
828 widen | signed = CLTD size
829 | otherwise = XOR size (OpReg edx) (OpReg edx)
830
831 instr | signed = IDIV
832 | otherwise = DIV
833
834 code = y_code `appOL`
835 x_code eax `appOL`
836 toOL [widen, instr size y_op]
837
838 result | quotient = eax
839 | otherwise = edx
840
841 return (Fixed size result code)
842
843
844 getRegister' _ _ (CmmLoad mem pk)
845 | isFloatType pk
846 = do
847 Amode addr mem_code <- getAmode mem
848 use_sse2 <- sse2Enabled
849 loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
850
851 getRegister' _ is32Bit (CmmLoad mem pk)
852 | is32Bit && not (isWord64 pk)
853 = do
854 code <- intLoadCode instr mem
855 return (Any size code)
856 where
857 width = typeWidth pk
858 size = intSize width
859 instr = case width of
860 W8 -> MOVZxL II8
861 _other -> MOV size
862 -- We always zero-extend 8-bit loads, if we
863 -- can't think of anything better. This is because
864 -- we can't guarantee access to an 8-bit variant of every register
865 -- (esi and edi don't have 8-bit variants), so to make things
866 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
867
868 -- Simpler memory load code on x86_64
869 getRegister' _ is32Bit (CmmLoad mem pk)
870 | not is32Bit
871 = do
872 code <- intLoadCode (MOV size) mem
873 return (Any size code)
874 where size = intSize $ typeWidth pk
875
876 getRegister' _ is32Bit (CmmLit (CmmInt 0 width))
877 = let
878 size = intSize width
879
880 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
881 size1 = if is32Bit then size
882 else case size of
883 II64 -> II32
884 _ -> size
885 code dst
886 = unitOL (XOR size1 (OpReg dst) (OpReg dst))
887 in
888 return (Any size code)
889
890 -- optimisation for loading small literals on x86_64: take advantage
891 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
892 -- instruction forms are shorter.
893 getRegister' dflags is32Bit (CmmLit lit)
894 | not is32Bit, isWord64 (cmmLitType dflags lit), not (isBigLit lit)
895 = let
896 imm = litToImm lit
897 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
898 in
899 return (Any II64 code)
900 where
901 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
902 isBigLit _ = False
903 -- note1: not the same as (not.is32BitLit), because that checks for
904 -- signed literals that fit in 32 bits, but we want unsigned
905 -- literals here.
906 -- note2: all labels are small, because we're assuming the
907 -- small memory model (see gcc docs, -mcmodel=small).
908
909 getRegister' dflags _ (CmmLit lit)
910 = do let size = cmmTypeSize (cmmLitType dflags lit)
911 imm = litToImm lit
912 code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
913 return (Any size code)
914
915 getRegister' _ _ other
916 | isVecExpr other = needLlvm
917 | otherwise = pprPanic "getRegister(x86)" (ppr other)
918
919
920 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
921 -> NatM (Reg -> InstrBlock)
922 intLoadCode instr mem = do
923 Amode src mem_code <- getAmode mem
924 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
925
926 -- Compute an expression into *any* register, adding the appropriate
927 -- move instruction if necessary.
928 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
929 getAnyReg expr = do
930 r <- getRegister expr
931 anyReg r
932
933 anyReg :: Register -> NatM (Reg -> InstrBlock)
934 anyReg (Any _ code) = return code
935 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
936
937 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
938 -- Fixed registers might not be byte-addressable, so we make sure we've
939 -- got a temporary, inserting an extra reg copy if necessary.
940 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
941 getByteReg expr = do
942 is32Bit <- is32BitPlatform
943 if is32Bit
944 then do r <- getRegister expr
945 case r of
946 Any rep code -> do
947 tmp <- getNewRegNat rep
948 return (tmp, code tmp)
949 Fixed rep reg code
950 | isVirtualReg reg -> return (reg,code)
951 | otherwise -> do
952 tmp <- getNewRegNat rep
953 return (tmp, code `snocOL` reg2reg rep reg tmp)
954 -- ToDo: could optimise slightly by checking for
955 -- byte-addressable real registers, but that will
956 -- happen very rarely if at all.
957 else getSomeReg expr -- all regs are byte-addressable on x86_64
958
959 -- Another variant: this time we want the result in a register that cannot
960 -- be modified by code to evaluate an arbitrary expression.
961 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
962 getNonClobberedReg expr = do
963 dflags <- getDynFlags
964 r <- getRegister expr
965 case r of
966 Any rep code -> do
967 tmp <- getNewRegNat rep
968 return (tmp, code tmp)
969 Fixed rep reg code
970 -- only certain regs can be clobbered
971 | reg `elem` instrClobberedRegs (targetPlatform dflags)
972 -> do
973 tmp <- getNewRegNat rep
974 return (tmp, code `snocOL` reg2reg rep reg tmp)
975 | otherwise ->
976 return (reg, code)
977
978 reg2reg :: Size -> Reg -> Reg -> Instr
979 reg2reg size src dst
980 | size == FF80 = GMOV src dst
981 | otherwise = MOV size (OpReg src) (OpReg dst)
982
983
984 --------------------------------------------------------------------------------
985 getAmode :: CmmExpr -> NatM Amode
986 getAmode e = do is32Bit <- is32BitPlatform
987 getAmode' is32Bit e
988
989 getAmode' :: Bool -> CmmExpr -> NatM Amode
990 getAmode' _ (CmmRegOff r n) = do dflags <- getDynFlags
991 getAmode $ mangleIndexTree dflags r n
992
993 getAmode' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
994 CmmLit displacement])
995 | not is32Bit
996 = return $ Amode (ripRel (litToImm displacement)) nilOL
997
998
999 -- This is all just ridiculous, since it carefully undoes
1000 -- what mangleIndexTree has just done.
1001 getAmode' is32Bit (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)])
1002 | is32BitLit is32Bit lit
1003 -- ASSERT(rep == II32)???
1004 = do (x_reg, x_code) <- getSomeReg x
1005 let off = ImmInt (-(fromInteger i))
1006 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1007
1008 getAmode' is32Bit (CmmMachOp (MO_Add _rep) [x, CmmLit lit])
1009 | is32BitLit is32Bit lit
1010 -- ASSERT(rep == II32)???
1011 = do (x_reg, x_code) <- getSomeReg x
1012 let off = litToImm lit
1013 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1014
1015 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1016 -- recognised by the next rule.
1017 getAmode' is32Bit (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1018 b@(CmmLit _)])
1019 = getAmode' is32Bit (CmmMachOp (MO_Add rep) [b,a])
1020
1021 getAmode' _ (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _)
1022 [y, CmmLit (CmmInt shift _)]])
1023 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1024 = x86_complex_amode x y shift 0
1025
1026 getAmode' _ (CmmMachOp (MO_Add _)
1027 [x, CmmMachOp (MO_Add _)
1028 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1029 CmmLit (CmmInt offset _)]])
1030 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1031 && is32BitInteger offset
1032 = x86_complex_amode x y shift offset
1033
1034 getAmode' _ (CmmMachOp (MO_Add _) [x,y])
1035 = x86_complex_amode x y 0 0
1036
1037 getAmode' is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1038 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1039
1040 getAmode' _ expr = do
1041 (reg,code) <- getSomeReg expr
1042 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1043
1044
1045 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1046 x86_complex_amode base index shift offset
1047 = do (x_reg, x_code) <- getNonClobberedReg base
1048 -- x must be in a temp, because it has to stay live over y_code
1049 -- we could compre x_reg and y_reg and do something better here...
1050 (y_reg, y_code) <- getSomeReg index
1051 let
1052 code = x_code `appOL` y_code
1053 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8;
1054 n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")"
1055 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1056 code)
1057
1058
1059
1060
1061 -- -----------------------------------------------------------------------------
1062 -- getOperand: sometimes any operand will do.
1063
1064 -- getNonClobberedOperand: the value of the operand will remain valid across
1065 -- the computation of an arbitrary expression, unless the expression
1066 -- is computed directly into a register which the operand refers to
1067 -- (see trivialCode where this function is used for an example).
1068
1069 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1070 getNonClobberedOperand (CmmLit lit) = do
1071 use_sse2 <- sse2Enabled
1072 if use_sse2 && isSuitableFloatingPointLit lit
1073 then do
1074 let CmmFloat _ w = lit
1075 Amode addr code <- memConstant (widthInBytes w) lit
1076 return (OpAddr addr, code)
1077 else do
1078
1079 is32Bit <- is32BitPlatform
1080 dflags <- getDynFlags
1081 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1082 then return (OpImm (litToImm lit), nilOL)
1083 else getNonClobberedOperand_generic (CmmLit lit)
1084
1085 getNonClobberedOperand (CmmLoad mem pk) = do
1086 is32Bit <- is32BitPlatform
1087 use_sse2 <- sse2Enabled
1088 if (not (isFloatType pk) || use_sse2)
1089 && (if is32Bit then not (isWord64 pk) else True)
1090 then do
1091 dflags <- getDynFlags
1092 let platform = targetPlatform dflags
1093 Amode src mem_code <- getAmode mem
1094 (src',save_code) <-
1095 if (amodeCouldBeClobbered platform src)
1096 then do
1097 tmp <- getNewRegNat (archWordSize is32Bit)
1098 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1099 unitOL (LEA (archWordSize is32Bit) (OpAddr src) (OpReg tmp)))
1100 else
1101 return (src, nilOL)
1102 return (OpAddr src', mem_code `appOL` save_code)
1103 else do
1104 getNonClobberedOperand_generic (CmmLoad mem pk)
1105
1106 getNonClobberedOperand e = getNonClobberedOperand_generic e
1107
1108 getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1109 getNonClobberedOperand_generic e = do
1110 (reg, code) <- getNonClobberedReg e
1111 return (OpReg reg, code)
1112
1113 amodeCouldBeClobbered :: Platform -> AddrMode -> Bool
1114 amodeCouldBeClobbered platform amode = any (regClobbered platform) (addrModeRegs amode)
1115
1116 regClobbered :: Platform -> Reg -> Bool
1117 regClobbered platform (RegReal (RealRegSingle rr)) = isFastTrue (freeReg platform rr)
1118 regClobbered _ _ = False
1119
1120 -- getOperand: the operand is not required to remain valid across the
1121 -- computation of an arbitrary expression.
1122 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1123
1124 getOperand (CmmLit lit) = do
1125 use_sse2 <- sse2Enabled
1126 if (use_sse2 && isSuitableFloatingPointLit lit)
1127 then do
1128 let CmmFloat _ w = lit
1129 Amode addr code <- memConstant (widthInBytes w) lit
1130 return (OpAddr addr, code)
1131 else do
1132
1133 is32Bit <- is32BitPlatform
1134 dflags <- getDynFlags
1135 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1136 then return (OpImm (litToImm lit), nilOL)
1137 else getOperand_generic (CmmLit lit)
1138
1139 getOperand (CmmLoad mem pk) = do
1140 is32Bit <- is32BitPlatform
1141 use_sse2 <- sse2Enabled
1142 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1143 then do
1144 Amode src mem_code <- getAmode mem
1145 return (OpAddr src, mem_code)
1146 else
1147 getOperand_generic (CmmLoad mem pk)
1148
1149 getOperand e = getOperand_generic e
1150
1151 getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1152 getOperand_generic e = do
1153 (reg, code) <- getSomeReg e
1154 return (OpReg reg, code)
1155
1156 isOperand :: Bool -> CmmExpr -> Bool
1157 isOperand _ (CmmLoad _ _) = True
1158 isOperand is32Bit (CmmLit lit) = is32BitLit is32Bit lit
1159 || isSuitableFloatingPointLit lit
1160 isOperand _ _ = False
1161
1162 memConstant :: Int -> CmmLit -> NatM Amode
1163 memConstant align lit = do
1164 lbl <- getNewLabelNat
1165 dflags <- getDynFlags
1166 (addr, addr_code) <- if target32Bit (targetPlatform dflags)
1167 then do dynRef <- cmmMakeDynamicReference
1168 dflags
1169 addImportNat
1170 DataReference
1171 lbl
1172 Amode addr addr_code <- getAmode dynRef
1173 return (addr, addr_code)
1174 else return (ripRel (ImmCLbl lbl), nilOL)
1175 let code =
1176 LDATA ReadOnlyData (align, Statics lbl [CmmStaticLit lit])
1177 `consOL` addr_code
1178 return (Amode addr code)
1179
1180
1181 loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
1182 loadFloatAmode use_sse2 w addr addr_code = do
1183 let size = floatSize w
1184 code dst = addr_code `snocOL`
1185 if use_sse2
1186 then MOV size (OpAddr addr) (OpReg dst)
1187 else GLD size addr dst
1188 return (Any (if use_sse2 then size else FF80) code)
1189
1190
1191 -- if we want a floating-point literal as an operand, we can
1192 -- use it directly from memory. However, if the literal is
1193 -- zero, we're better off generating it into a register using
1194 -- xor.
1195 isSuitableFloatingPointLit :: CmmLit -> Bool
1196 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1197 isSuitableFloatingPointLit _ = False
1198
1199 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1200 getRegOrMem e@(CmmLoad mem pk) = do
1201 is32Bit <- is32BitPlatform
1202 use_sse2 <- sse2Enabled
1203 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1204 then do
1205 Amode src mem_code <- getAmode mem
1206 return (OpAddr src, mem_code)
1207 else do
1208 (reg, code) <- getNonClobberedReg e
1209 return (OpReg reg, code)
1210 getRegOrMem e = do
1211 (reg, code) <- getNonClobberedReg e
1212 return (OpReg reg, code)
1213
1214 is32BitLit :: Bool -> CmmLit -> Bool
1215 is32BitLit is32Bit (CmmInt i W64)
1216 | not is32Bit
1217 = -- assume that labels are in the range 0-2^31-1: this assumes the
1218 -- small memory model (see gcc docs, -mcmodel=small).
1219 is32BitInteger i
1220 is32BitLit _ _ = True
1221
1222
1223
1224
1225 -- Set up a condition code for a conditional branch.
1226
1227 getCondCode :: CmmExpr -> NatM CondCode
1228
1229 -- yes, they really do seem to want exactly the same!
1230
1231 getCondCode (CmmMachOp mop [x, y])
1232 =
1233 case mop of
1234 MO_F_Eq W32 -> condFltCode EQQ x y
1235 MO_F_Ne W32 -> condFltCode NE x y
1236 MO_F_Gt W32 -> condFltCode GTT x y
1237 MO_F_Ge W32 -> condFltCode GE x y
1238 MO_F_Lt W32 -> condFltCode LTT x y
1239 MO_F_Le W32 -> condFltCode LE x y
1240
1241 MO_F_Eq W64 -> condFltCode EQQ x y
1242 MO_F_Ne W64 -> condFltCode NE x y
1243 MO_F_Gt W64 -> condFltCode GTT x y
1244 MO_F_Ge W64 -> condFltCode GE x y
1245 MO_F_Lt W64 -> condFltCode LTT x y
1246 MO_F_Le W64 -> condFltCode LE x y
1247
1248 MO_Eq _ -> condIntCode EQQ x y
1249 MO_Ne _ -> condIntCode NE x y
1250
1251 MO_S_Gt _ -> condIntCode GTT x y
1252 MO_S_Ge _ -> condIntCode GE x y
1253 MO_S_Lt _ -> condIntCode LTT x y
1254 MO_S_Le _ -> condIntCode LE x y
1255
1256 MO_U_Gt _ -> condIntCode GU x y
1257 MO_U_Ge _ -> condIntCode GEU x y
1258 MO_U_Lt _ -> condIntCode LU x y
1259 MO_U_Le _ -> condIntCode LEU x y
1260
1261 _other -> pprPanic "getCondCode(x86,x86_64)" (ppr (CmmMachOp mop [x,y]))
1262
1263 getCondCode other = pprPanic "getCondCode(2)(x86,x86_64)" (ppr other)
1264
1265
1266
1267
1268 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1269 -- passed back up the tree.
1270
1271 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1272 condIntCode cond x y = do is32Bit <- is32BitPlatform
1273 condIntCode' is32Bit cond x y
1274
1275 condIntCode' :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1276
1277 -- memory vs immediate
1278 condIntCode' is32Bit cond (CmmLoad x pk) (CmmLit lit)
1279 | is32BitLit is32Bit lit = do
1280 Amode x_addr x_code <- getAmode x
1281 let
1282 imm = litToImm lit
1283 code = x_code `snocOL`
1284 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
1285 --
1286 return (CondCode False cond code)
1287
1288 -- anything vs zero, using a mask
1289 -- TODO: Add some sanity checking!!!!
1290 condIntCode' is32Bit cond (CmmMachOp (MO_And _) [x,o2]) (CmmLit (CmmInt 0 pk))
1291 | (CmmLit lit@(CmmInt mask _)) <- o2, is32BitLit is32Bit lit
1292 = do
1293 (x_reg, x_code) <- getSomeReg x
1294 let
1295 code = x_code `snocOL`
1296 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1297 --
1298 return (CondCode False cond code)
1299
1300 -- anything vs zero
1301 condIntCode' _ cond x (CmmLit (CmmInt 0 pk)) = do
1302 (x_reg, x_code) <- getSomeReg x
1303 let
1304 code = x_code `snocOL`
1305 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
1306 --
1307 return (CondCode False cond code)
1308
1309 -- anything vs operand
1310 condIntCode' is32Bit cond x y
1311 | isOperand is32Bit y = do
1312 dflags <- getDynFlags
1313 (x_reg, x_code) <- getNonClobberedReg x
1314 (y_op, y_code) <- getOperand y
1315 let
1316 code = x_code `appOL` y_code `snocOL`
1317 CMP (cmmTypeSize (cmmExprType dflags x)) y_op (OpReg x_reg)
1318 return (CondCode False cond code)
1319 -- operand vs. anything: invert the comparison so that we can use a
1320 -- single comparison instruction.
1321 | isOperand is32Bit x
1322 , Just revcond <- maybeFlipCond cond = do
1323 dflags <- getDynFlags
1324 (y_reg, y_code) <- getNonClobberedReg y
1325 (x_op, x_code) <- getOperand x
1326 let
1327 code = y_code `appOL` x_code `snocOL`
1328 CMP (cmmTypeSize (cmmExprType dflags x)) x_op (OpReg y_reg)
1329 return (CondCode False revcond code)
1330
1331 -- anything vs anything
1332 condIntCode' _ cond x y = do
1333 dflags <- getDynFlags
1334 (y_reg, y_code) <- getNonClobberedReg y
1335 (x_op, x_code) <- getRegOrMem x
1336 let
1337 code = y_code `appOL`
1338 x_code `snocOL`
1339 CMP (cmmTypeSize (cmmExprType dflags x)) (OpReg y_reg) x_op
1340 return (CondCode False cond code)
1341
1342
1343
1344 --------------------------------------------------------------------------------
1345 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1346
1347 condFltCode cond x y
1348 = if_sse2 condFltCode_sse2 condFltCode_x87
1349 where
1350
1351 condFltCode_x87
1352 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1353 (x_reg, x_code) <- getNonClobberedReg x
1354 (y_reg, y_code) <- getSomeReg y
1355 let
1356 code = x_code `appOL` y_code `snocOL`
1357 GCMP cond x_reg y_reg
1358 -- The GCMP insn does the test and sets the zero flag if comparable
1359 -- and true. Hence we always supply EQQ as the condition to test.
1360 return (CondCode True EQQ code)
1361
1362 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1363 -- an operand, but the right must be a reg. We can probably do better
1364 -- than this general case...
1365 condFltCode_sse2 = do
1366 dflags <- getDynFlags
1367 (x_reg, x_code) <- getNonClobberedReg x
1368 (y_op, y_code) <- getOperand y
1369 let
1370 code = x_code `appOL`
1371 y_code `snocOL`
1372 CMP (floatSize $ cmmExprWidth dflags x) y_op (OpReg x_reg)
1373 -- NB(1): we need to use the unsigned comparison operators on the
1374 -- result of this comparison.
1375 return (CondCode True (condToUnsigned cond) code)
1376
1377 -- -----------------------------------------------------------------------------
1378 -- Generating assignments
1379
1380 -- Assignments are really at the heart of the whole code generation
1381 -- business. Almost all top-level nodes of any real importance are
1382 -- assignments, which correspond to loads, stores, or register
1383 -- transfers. If we're really lucky, some of the register transfers
1384 -- will go away, because we can use the destination register to
1385 -- complete the code generation for the right hand side. This only
1386 -- fails when the right hand side is forced into a fixed register
1387 -- (e.g. the result of a call).
1388
1389 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1390 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1391
1392 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1393 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1394
1395
1396 -- integer assignment to memory
1397
1398 -- specific case of adding/subtracting an integer to a particular address.
1399 -- ToDo: catch other cases where we can use an operation directly on a memory
1400 -- address.
1401 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1402 CmmLit (CmmInt i _)])
1403 | addr == addr2, pk /= II64 || is32BitInteger i,
1404 Just instr <- check op
1405 = do Amode amode code_addr <- getAmode addr
1406 let code = code_addr `snocOL`
1407 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1408 return code
1409 where
1410 check (MO_Add _) = Just ADD
1411 check (MO_Sub _) = Just SUB
1412 check _ = Nothing
1413 -- ToDo: more?
1414
1415 -- general case
1416 assignMem_IntCode pk addr src = do
1417 is32Bit <- is32BitPlatform
1418 Amode addr code_addr <- getAmode addr
1419 (code_src, op_src) <- get_op_RI is32Bit src
1420 let
1421 code = code_src `appOL`
1422 code_addr `snocOL`
1423 MOV pk op_src (OpAddr addr)
1424 -- NOTE: op_src is stable, so it will still be valid
1425 -- after code_addr. This may involve the introduction
1426 -- of an extra MOV to a temporary register, but we hope
1427 -- the register allocator will get rid of it.
1428 --
1429 return code
1430 where
1431 get_op_RI :: Bool -> CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
1432 get_op_RI is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1433 = return (nilOL, OpImm (litToImm lit))
1434 get_op_RI _ op
1435 = do (reg,code) <- getNonClobberedReg op
1436 return (code, OpReg reg)
1437
1438
1439 -- Assign; dst is a reg, rhs is mem
1440 assignReg_IntCode pk reg (CmmLoad src _) = do
1441 load_code <- intLoadCode (MOV pk) src
1442 dflags <- getDynFlags
1443 let platform = targetPlatform dflags
1444 return (load_code (getRegisterReg platform False{-no sse2-} reg))
1445
1446 -- dst is a reg, but src could be anything
1447 assignReg_IntCode _ reg src = do
1448 dflags <- getDynFlags
1449 let platform = targetPlatform dflags
1450 code <- getAnyReg src
1451 return (code (getRegisterReg platform False{-no sse2-} reg))
1452
1453
1454 -- Floating point assignment to memory
1455 assignMem_FltCode pk addr src = do
1456 (src_reg, src_code) <- getNonClobberedReg src
1457 Amode addr addr_code <- getAmode addr
1458 use_sse2 <- sse2Enabled
1459 let
1460 code = src_code `appOL`
1461 addr_code `snocOL`
1462 if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr)
1463 else GST pk src_reg addr
1464 return code
1465
1466 -- Floating point assignment to a register/temporary
1467 assignReg_FltCode _ reg src = do
1468 use_sse2 <- sse2Enabled
1469 src_code <- getAnyReg src
1470 dflags <- getDynFlags
1471 let platform = targetPlatform dflags
1472 return (src_code (getRegisterReg platform use_sse2 reg))
1473
1474
1475 genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
1476
1477 genJump (CmmLoad mem _) regs = do
1478 Amode target code <- getAmode mem
1479 return (code `snocOL` JMP (OpAddr target) regs)
1480
1481 genJump (CmmLit lit) regs = do
1482 return (unitOL (JMP (OpImm (litToImm lit)) regs))
1483
1484 genJump expr regs = do
1485 (reg,code) <- getSomeReg expr
1486 return (code `snocOL` JMP (OpReg reg) regs)
1487
1488
1489 -- -----------------------------------------------------------------------------
1490 -- Unconditional branches
1491
1492 genBranch :: BlockId -> NatM InstrBlock
1493 genBranch = return . toOL . mkJumpInstr
1494
1495
1496
1497 -- -----------------------------------------------------------------------------
1498 -- Conditional jumps
1499
1500 {-
1501 Conditional jumps are always to local labels, so we can use branch
1502 instructions. We peek at the arguments to decide what kind of
1503 comparison to do.
1504
1505 I386: First, we have to ensure that the condition
1506 codes are set according to the supplied comparison operation.
1507 -}
1508
1509 genCondJump
1510 :: BlockId -- the branch target
1511 -> CmmExpr -- the condition on which to branch
1512 -> NatM InstrBlock
1513
1514 genCondJump id bool = do
1515 CondCode is_float cond cond_code <- getCondCode bool
1516 use_sse2 <- sse2Enabled
1517 if not is_float || not use_sse2
1518 then
1519 return (cond_code `snocOL` JXX cond id)
1520 else do
1521 lbl <- getBlockIdNat
1522
1523 -- see comment with condFltReg
1524 let code = case cond of
1525 NE -> or_unordered
1526 GU -> plain_test
1527 GEU -> plain_test
1528 _ -> and_ordered
1529
1530 plain_test = unitOL (
1531 JXX cond id
1532 )
1533 or_unordered = toOL [
1534 JXX cond id,
1535 JXX PARITY id
1536 ]
1537 and_ordered = toOL [
1538 JXX PARITY lbl,
1539 JXX cond id,
1540 JXX ALWAYS lbl,
1541 NEWBLOCK lbl
1542 ]
1543 return (cond_code `appOL` code)
1544
1545
1546 -- -----------------------------------------------------------------------------
1547 -- Generating C calls
1548
1549 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1550 -- @get_arg@, which moves the arguments to the correct registers/stack
1551 -- locations. Apart from that, the code is easy.
1552 --
1553 -- (If applicable) Do not fill the delay slots here; you will confuse the
1554 -- register allocator.
1555
1556 genCCall
1557 :: Bool -- 32 bit platform?
1558 -> ForeignTarget -- function to call
1559 -> [CmmFormal] -- where to put the result
1560 -> [CmmActual] -- arguments (of mixed type)
1561 -> NatM InstrBlock
1562
1563 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1564
1565 -- Unroll memcpy calls if the source and destination pointers are at
1566 -- least DWORD aligned and the number of bytes to copy isn't too
1567 -- large. Otherwise, call C's memcpy.
1568 genCCall is32Bit (PrimTarget MO_Memcpy) _
1569 [dst, src,
1570 (CmmLit (CmmInt n _)),
1571 (CmmLit (CmmInt align _))]
1572 | n <= maxInlineSizeThreshold && align .&. 3 == 0 = do
1573 code_dst <- getAnyReg dst
1574 dst_r <- getNewRegNat size
1575 code_src <- getAnyReg src
1576 src_r <- getNewRegNat size
1577 tmp_r <- getNewRegNat size
1578 return $ code_dst dst_r `appOL` code_src src_r `appOL`
1579 go dst_r src_r tmp_r n
1580 where
1581 size = if align .&. 4 /= 0 then II32 else (archWordSize is32Bit)
1582
1583 sizeBytes = fromIntegral (sizeInBytes size)
1584
1585 go :: Reg -> Reg -> Reg -> Integer -> OrdList Instr
1586 go dst src tmp i
1587 | i >= sizeBytes =
1588 unitOL (MOV size (OpAddr src_addr) (OpReg tmp)) `appOL`
1589 unitOL (MOV size (OpReg tmp) (OpAddr dst_addr)) `appOL`
1590 go dst src tmp (i - sizeBytes)
1591 -- Deal with remaining bytes.
1592 | i >= 4 = -- Will never happen on 32-bit
1593 unitOL (MOV II32 (OpAddr src_addr) (OpReg tmp)) `appOL`
1594 unitOL (MOV II32 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1595 go dst src tmp (i - 4)
1596 | i >= 2 =
1597 unitOL (MOVZxL II16 (OpAddr src_addr) (OpReg tmp)) `appOL`
1598 unitOL (MOV II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1599 go dst src tmp (i - 2)
1600 | i >= 1 =
1601 unitOL (MOVZxL II8 (OpAddr src_addr) (OpReg tmp)) `appOL`
1602 unitOL (MOV II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1603 go dst src tmp (i - 1)
1604 | otherwise = nilOL
1605 where
1606 src_addr = AddrBaseIndex (EABaseReg src) EAIndexNone
1607 (ImmInteger (n - i))
1608 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1609 (ImmInteger (n - i))
1610
1611 genCCall _ (PrimTarget MO_Memset) _
1612 [dst,
1613 CmmLit (CmmInt c _),
1614 CmmLit (CmmInt n _),
1615 CmmLit (CmmInt align _)]
1616 | n <= maxInlineSizeThreshold && align .&. 3 == 0 = do
1617 code_dst <- getAnyReg dst
1618 dst_r <- getNewRegNat size
1619 return $ code_dst dst_r `appOL` go dst_r n
1620 where
1621 (size, val) = case align .&. 3 of
1622 2 -> (II16, c2)
1623 0 -> (II32, c4)
1624 _ -> (II8, c)
1625 c2 = c `shiftL` 8 .|. c
1626 c4 = c2 `shiftL` 16 .|. c2
1627
1628 sizeBytes = fromIntegral (sizeInBytes size)
1629
1630 go :: Reg -> Integer -> OrdList Instr
1631 go dst i
1632 -- TODO: Add movabs instruction and support 64-bit sets.
1633 | i >= sizeBytes = -- This might be smaller than the below sizes
1634 unitOL (MOV size (OpImm (ImmInteger val)) (OpAddr dst_addr)) `appOL`
1635 go dst (i - sizeBytes)
1636 | i >= 4 = -- Will never happen on 32-bit
1637 unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr dst_addr)) `appOL`
1638 go dst (i - 4)
1639 | i >= 2 =
1640 unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr dst_addr)) `appOL`
1641 go dst (i - 2)
1642 | i >= 1 =
1643 unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr dst_addr)) `appOL`
1644 go dst (i - 1)
1645 | otherwise = nilOL
1646 where
1647 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1648 (ImmInteger (n - i))
1649
1650 genCCall _ (PrimTarget MO_WriteBarrier) _ _ = return nilOL
1651 -- write barrier compiles to no code on x86/x86-64;
1652 -- we keep it this long in order to prevent earlier optimisations.
1653
1654 genCCall _ (PrimTarget MO_Touch) _ _ = return nilOL
1655
1656 genCCall is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
1657 args@[src] = do
1658 sse4_2 <- sse4_2Enabled
1659 dflags <- getDynFlags
1660 let platform = targetPlatform dflags
1661 if sse4_2
1662 then do code_src <- getAnyReg src
1663 src_r <- getNewRegNat size
1664 return $ code_src src_r `appOL`
1665 (if width == W8 then
1666 -- The POPCNT instruction doesn't take a r/m8
1667 unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL`
1668 unitOL (POPCNT II16 (OpReg src_r)
1669 (getRegisterReg platform False (CmmLocal dst)))
1670 else
1671 unitOL (POPCNT size (OpReg src_r)
1672 (getRegisterReg platform False (CmmLocal dst))))
1673 else do
1674 targetExpr <- cmmMakeDynamicReference dflags addImportNat
1675 CallReference lbl
1676 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1677 [NoHint] [NoHint]
1678 CmmMayReturn)
1679 genCCall is32Bit target dest_regs args
1680 where
1681 size = intSize width
1682 lbl = mkCmmCodeLabel primPackageId (fsLit (popCntLabel width))
1683
1684 genCCall is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args = do
1685 dflags <- getDynFlags
1686 targetExpr <- cmmMakeDynamicReference dflags addImportNat
1687 CallReference lbl
1688 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1689 [NoHint] [NoHint]
1690 CmmMayReturn)
1691 genCCall is32Bit target dest_regs args
1692 where
1693 lbl = mkCmmCodeLabel primPackageId (fsLit (word2FloatLabel width))
1694
1695 genCCall is32Bit target dest_regs args
1696 | is32Bit = genCCall32 target dest_regs args
1697 | otherwise = genCCall64 target dest_regs args
1698
1699 genCCall32 :: ForeignTarget -- function to call
1700 -> [CmmFormal] -- where to put the result
1701 -> [CmmActual] -- arguments (of mixed type)
1702 -> NatM InstrBlock
1703 genCCall32 target dest_regs args = do
1704 dflags <- getDynFlags
1705 let platform = targetPlatform dflags
1706 case (target, dest_regs) of
1707 -- void return type prim op
1708 (PrimTarget op, []) ->
1709 outOfLineCmmOp op Nothing args
1710 -- we only cope with a single result for foreign calls
1711 (PrimTarget op, [r]) -> do
1712 l1 <- getNewLabelNat
1713 l2 <- getNewLabelNat
1714 sse2 <- sse2Enabled
1715 if sse2
1716 then
1717 outOfLineCmmOp op (Just r) args
1718 else case op of
1719 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
1720 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
1721
1722 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
1723 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
1724
1725 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
1726 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
1727
1728 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
1729 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
1730
1731 _other_op -> outOfLineCmmOp op (Just r) args
1732
1733 where
1734 actuallyInlineFloatOp instr size [x]
1735 = do res <- trivialUFCode size (instr size) x
1736 any <- anyReg res
1737 return (any (getRegisterReg platform False (CmmLocal r)))
1738
1739 actuallyInlineFloatOp _ _ args
1740 = panic $ "genCCall32.actuallyInlineFloatOp: bad number of arguments! ("
1741 ++ show (length args) ++ ")"
1742
1743 (PrimTarget (MO_S_QuotRem width), _) -> divOp1 platform True width dest_regs args
1744 (PrimTarget (MO_U_QuotRem width), _) -> divOp1 platform False width dest_regs args
1745 (PrimTarget (MO_U_QuotRem2 width), _) -> divOp2 platform False width dest_regs args
1746 (PrimTarget (MO_Add2 width), [res_h, res_l]) ->
1747 case args of
1748 [arg_x, arg_y] ->
1749 do hCode <- getAnyReg (CmmLit (CmmInt 0 width))
1750 lCode <- getAnyReg (CmmMachOp (MO_Add width) [arg_x, arg_y])
1751 let size = intSize width
1752 reg_l = getRegisterReg platform True (CmmLocal res_l)
1753 reg_h = getRegisterReg platform True (CmmLocal res_h)
1754 code = hCode reg_h `appOL`
1755 lCode reg_l `snocOL`
1756 ADC size (OpImm (ImmInteger 0)) (OpReg reg_h)
1757 return code
1758 _ -> panic "genCCall32: Wrong number of arguments/results for add2"
1759 (PrimTarget (MO_U_Mul2 width), [res_h, res_l]) ->
1760 case args of
1761 [arg_x, arg_y] ->
1762 do (y_reg, y_code) <- getRegOrMem arg_y
1763 x_code <- getAnyReg arg_x
1764 let size = intSize width
1765 reg_h = getRegisterReg platform True (CmmLocal res_h)
1766 reg_l = getRegisterReg platform True (CmmLocal res_l)
1767 code = y_code `appOL`
1768 x_code rax `appOL`
1769 toOL [MUL2 size y_reg,
1770 MOV size (OpReg rdx) (OpReg reg_h),
1771 MOV size (OpReg rax) (OpReg reg_l)]
1772 return code
1773 _ -> panic "genCCall32: Wrong number of arguments/results for add2"
1774
1775 _ -> genCCall32' dflags target dest_regs args
1776
1777 where divOp1 platform signed width results [arg_x, arg_y]
1778 = divOp platform signed width results Nothing arg_x arg_y
1779 divOp1 _ _ _ _ _
1780 = panic "genCCall32: Wrong number of arguments for divOp1"
1781 divOp2 platform signed width results [arg_x_high, arg_x_low, arg_y]
1782 = divOp platform signed width results (Just arg_x_high) arg_x_low arg_y
1783 divOp2 _ _ _ _ _
1784 = panic "genCCall64: Wrong number of arguments for divOp2"
1785 divOp platform signed width [res_q, res_r]
1786 m_arg_x_high arg_x_low arg_y
1787 = do let size = intSize width
1788 reg_q = getRegisterReg platform True (CmmLocal res_q)
1789 reg_r = getRegisterReg platform True (CmmLocal res_r)
1790 widen | signed = CLTD size
1791 | otherwise = XOR size (OpReg rdx) (OpReg rdx)
1792 instr | signed = IDIV
1793 | otherwise = DIV
1794 (y_reg, y_code) <- getRegOrMem arg_y
1795 x_low_code <- getAnyReg arg_x_low
1796 x_high_code <- case m_arg_x_high of
1797 Just arg_x_high ->
1798 getAnyReg arg_x_high
1799 Nothing ->
1800 return $ const $ unitOL widen
1801 return $ y_code `appOL`
1802 x_low_code rax `appOL`
1803 x_high_code rdx `appOL`
1804 toOL [instr size y_reg,
1805 MOV size (OpReg rax) (OpReg reg_q),
1806 MOV size (OpReg rdx) (OpReg reg_r)]
1807 divOp _ _ _ _ _ _ _
1808 = panic "genCCall32: Wrong number of results for divOp"
1809
1810 genCCall32' :: DynFlags
1811 -> ForeignTarget -- function to call
1812 -> [CmmFormal] -- where to put the result
1813 -> [CmmActual] -- arguments (of mixed type)
1814 -> NatM InstrBlock
1815 genCCall32' dflags target dest_regs args = do
1816 let
1817 -- Align stack to 16n for calls, assuming a starting stack
1818 -- alignment of 16n - word_size on procedure entry. Which we
1819 -- maintiain. See Note [rts/StgCRun.c : Stack Alignment on X86]
1820 sizes = map (arg_size . cmmExprType dflags) (reverse args)
1821 raw_arg_size = sum sizes + wORD_SIZE dflags
1822 arg_pad_size = (roundTo 16 $ raw_arg_size) - raw_arg_size
1823 tot_arg_size = raw_arg_size + arg_pad_size - wORD_SIZE dflags
1824 delta0 <- getDeltaNat
1825 setDeltaNat (delta0 - arg_pad_size)
1826
1827 use_sse2 <- sse2Enabled
1828 push_codes <- mapM (push_arg use_sse2) (reverse args)
1829 delta <- getDeltaNat
1830 MASSERT (delta == delta0 - tot_arg_size)
1831
1832 -- deal with static vs dynamic call targets
1833 (callinsns,cconv) <-
1834 case target of
1835 ForeignTarget (CmmLit (CmmLabel lbl)) conv
1836 -> -- ToDo: stdcall arg sizes
1837 return (unitOL (CALL (Left fn_imm) []), conv)
1838 where fn_imm = ImmCLbl lbl
1839 ForeignTarget expr conv
1840 -> do { (dyn_r, dyn_c) <- getSomeReg expr
1841 ; ASSERT( isWord32 (cmmExprType dflags expr) )
1842 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
1843 PrimTarget _
1844 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
1845 ++ "probably because too many return values."
1846
1847 let push_code
1848 | arg_pad_size /= 0
1849 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
1850 DELTA (delta0 - arg_pad_size)]
1851 `appOL` concatOL push_codes
1852 | otherwise
1853 = concatOL push_codes
1854
1855 -- Deallocate parameters after call for ccall;
1856 -- but not for stdcall (callee does it)
1857 --
1858 -- We have to pop any stack padding we added
1859 -- even if we are doing stdcall, though (#5052)
1860 pop_size
1861 | ForeignConvention StdCallConv _ _ _ <- cconv = arg_pad_size
1862 | otherwise = tot_arg_size
1863
1864 call = callinsns `appOL`
1865 toOL (
1866 (if pop_size==0 then [] else
1867 [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)])
1868 ++
1869 [DELTA delta0]
1870 )
1871 setDeltaNat delta0
1872
1873 dflags <- getDynFlags
1874 let platform = targetPlatform dflags
1875
1876 let
1877 -- assign the results, if necessary
1878 assign_code [] = nilOL
1879 assign_code [dest]
1880 | isFloatType ty =
1881 if use_sse2
1882 then let tmp_amode = AddrBaseIndex (EABaseReg esp)
1883 EAIndexNone
1884 (ImmInt 0)
1885 sz = floatSize w
1886 in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
1887 DELTA (delta0 - b),
1888 GST sz fake0 tmp_amode,
1889 MOV sz (OpAddr tmp_amode) (OpReg r_dest),
1890 ADD II32 (OpImm (ImmInt b)) (OpReg esp),
1891 DELTA delta0]
1892 else unitOL (GMOV fake0 r_dest)
1893 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
1894 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
1895 | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
1896 where
1897 ty = localRegType dest
1898 w = typeWidth ty
1899 b = widthInBytes w
1900 r_dest_hi = getHiVRegFromLo r_dest
1901 r_dest = getRegisterReg platform use_sse2 (CmmLocal dest)
1902 assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
1903
1904 return (push_code `appOL`
1905 call `appOL`
1906 assign_code dest_regs)
1907
1908 where
1909 arg_size :: CmmType -> Int -- Width in bytes
1910 arg_size ty = widthInBytes (typeWidth ty)
1911
1912 roundTo a x | x `mod` a == 0 = x
1913 | otherwise = x + a - (x `mod` a)
1914
1915 push_arg :: Bool -> CmmActual {-current argument-}
1916 -> NatM InstrBlock -- code
1917
1918 push_arg use_sse2 arg -- we don't need the hints on x86
1919 | isWord64 arg_ty = do
1920 ChildCode64 code r_lo <- iselExpr64 arg
1921 delta <- getDeltaNat
1922 setDeltaNat (delta - 8)
1923 let
1924 r_hi = getHiVRegFromLo r_lo
1925 return ( code `appOL`
1926 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
1927 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
1928 DELTA (delta-8)]
1929 )
1930
1931 | isFloatType arg_ty = do
1932 (reg, code) <- getSomeReg arg
1933 delta <- getDeltaNat
1934 setDeltaNat (delta-size)
1935 return (code `appOL`
1936 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
1937 DELTA (delta-size),
1938 let addr = AddrBaseIndex (EABaseReg esp)
1939 EAIndexNone
1940 (ImmInt 0)
1941 size = floatSize (typeWidth arg_ty)
1942 in
1943 if use_sse2
1944 then MOV size (OpReg reg) (OpAddr addr)
1945 else GST size reg addr
1946 ]
1947 )
1948
1949 | otherwise = do
1950 (operand, code) <- getOperand arg
1951 delta <- getDeltaNat
1952 setDeltaNat (delta-size)
1953 return (code `snocOL`
1954 PUSH II32 operand `snocOL`
1955 DELTA (delta-size))
1956
1957 where
1958 arg_ty = cmmExprType dflags arg
1959 size = arg_size arg_ty -- Byte size
1960
1961 genCCall64 :: ForeignTarget -- function to call
1962 -> [CmmFormal] -- where to put the result
1963 -> [CmmActual] -- arguments (of mixed type)
1964 -> NatM InstrBlock
1965 genCCall64 target dest_regs args = do
1966 dflags <- getDynFlags
1967 let platform = targetPlatform dflags
1968 case (target, dest_regs) of
1969
1970 (PrimTarget op, []) ->
1971 -- void return type prim op
1972 outOfLineCmmOp op Nothing args
1973
1974 (PrimTarget op, [res]) ->
1975 -- we only cope with a single result for foreign calls
1976 outOfLineCmmOp op (Just res) args
1977
1978 (PrimTarget (MO_S_QuotRem width), _) -> divOp1 platform True width dest_regs args
1979 (PrimTarget (MO_U_QuotRem width), _) -> divOp1 platform False width dest_regs args
1980 (PrimTarget (MO_U_QuotRem2 width), _) -> divOp2 platform False width dest_regs args
1981 (PrimTarget (MO_Add2 width), [res_h, res_l]) ->
1982 case args of
1983 [arg_x, arg_y] ->
1984 do hCode <- getAnyReg (CmmLit (CmmInt 0 width))
1985 lCode <- getAnyReg (CmmMachOp (MO_Add width) [arg_x, arg_y])
1986 let size = intSize width
1987 reg_l = getRegisterReg platform True (CmmLocal res_l)
1988 reg_h = getRegisterReg platform True (CmmLocal res_h)
1989 code = hCode reg_h `appOL`
1990 lCode reg_l `snocOL`
1991 ADC size (OpImm (ImmInteger 0)) (OpReg reg_h)
1992 return code
1993 _ -> panic "genCCall64: Wrong number of arguments/results for add2"
1994 (PrimTarget (MO_U_Mul2 width), [res_h, res_l]) ->
1995 case args of
1996 [arg_x, arg_y] ->
1997 do (y_reg, y_code) <- getRegOrMem arg_y
1998 x_code <- getAnyReg arg_x
1999 let size = intSize width
2000 reg_h = getRegisterReg platform True (CmmLocal res_h)
2001 reg_l = getRegisterReg platform True (CmmLocal res_l)
2002 code = y_code `appOL`
2003 x_code rax `appOL`
2004 toOL [MUL2 size y_reg,
2005 MOV size (OpReg rdx) (OpReg reg_h),
2006 MOV size (OpReg rax) (OpReg reg_l)]
2007 return code
2008 _ -> panic "genCCall64: Wrong number of arguments/results for add2"
2009
2010 _ ->
2011 do dflags <- getDynFlags
2012 genCCall64' dflags target dest_regs args
2013
2014 where divOp1 platform signed width results [arg_x, arg_y]
2015 = divOp platform signed width results Nothing arg_x arg_y
2016 divOp1 _ _ _ _ _
2017 = panic "genCCall64: Wrong number of arguments for divOp1"
2018 divOp2 platform signed width results [arg_x_high, arg_x_low, arg_y]
2019 = divOp platform signed width results (Just arg_x_high) arg_x_low arg_y
2020 divOp2 _ _ _ _ _
2021 = panic "genCCall64: Wrong number of arguments for divOp2"
2022 divOp platform signed width [res_q, res_r]
2023 m_arg_x_high arg_x_low arg_y
2024 = do let size = intSize width
2025 reg_q = getRegisterReg platform True (CmmLocal res_q)
2026 reg_r = getRegisterReg platform True (CmmLocal res_r)
2027 widen | signed = CLTD size
2028 | otherwise = XOR size (OpReg rdx) (OpReg rdx)
2029 instr | signed = IDIV
2030 | otherwise = DIV
2031 (y_reg, y_code) <- getRegOrMem arg_y
2032 x_low_code <- getAnyReg arg_x_low
2033 x_high_code <- case m_arg_x_high of
2034 Just arg_x_high -> getAnyReg arg_x_high
2035 Nothing -> return $ const $ unitOL widen
2036 return $ y_code `appOL`
2037 x_low_code rax `appOL`
2038 x_high_code rdx `appOL`
2039 toOL [instr size y_reg,
2040 MOV size (OpReg rax) (OpReg reg_q),
2041 MOV size (OpReg rdx) (OpReg reg_r)]
2042 divOp _ _ _ _ _ _ _
2043 = panic "genCCall64: Wrong number of results for divOp"
2044
2045 genCCall64' :: DynFlags
2046 -> ForeignTarget -- function to call
2047 -> [CmmFormal] -- where to put the result
2048 -> [CmmActual] -- arguments (of mixed type)
2049 -> NatM InstrBlock
2050 genCCall64' dflags target dest_regs args = do
2051 -- load up the register arguments
2052 (stack_args, int_regs_used, fp_regs_used, load_args_code)
2053 <-
2054 if platformOS platform == OSMinGW32
2055 then load_args_win args [] [] (allArgRegs platform) nilOL
2056 else do (stack_args, aregs, fregs, load_args_code)
2057 <- load_args args (allIntArgRegs platform) (allFPArgRegs platform) nilOL
2058 let fp_regs_used = reverse (drop (length fregs) (reverse (allFPArgRegs platform)))
2059 int_regs_used = reverse (drop (length aregs) (reverse (allIntArgRegs platform)))
2060 return (stack_args, int_regs_used, fp_regs_used, load_args_code)
2061
2062 let
2063 arg_regs_used = int_regs_used ++ fp_regs_used
2064 arg_regs = [eax] ++ arg_regs_used
2065 -- for annotating the call instruction with
2066 sse_regs = length fp_regs_used
2067 arg_stack_slots = if platformOS platform == OSMinGW32
2068 then length stack_args + length (allArgRegs platform)
2069 else length stack_args
2070 tot_arg_size = arg_size * arg_stack_slots
2071
2072
2073 -- Align stack to 16n for calls, assuming a starting stack
2074 -- alignment of 16n - word_size on procedure entry. Which we
2075 -- maintain. See Note [rts/StgCRun.c : Stack Alignment on X86]
2076 (real_size, adjust_rsp) <-
2077 if (tot_arg_size + wORD_SIZE dflags) `rem` 16 == 0
2078 then return (tot_arg_size, nilOL)
2079 else do -- we need to adjust...
2080 delta <- getDeltaNat
2081 setDeltaNat (delta - wORD_SIZE dflags)
2082 return (tot_arg_size + wORD_SIZE dflags, toOL [
2083 SUB II64 (OpImm (ImmInt (wORD_SIZE dflags))) (OpReg rsp),
2084 DELTA (delta - wORD_SIZE dflags) ])
2085
2086 -- push the stack args, right to left
2087 push_code <- push_args (reverse stack_args) nilOL
2088 -- On Win64, we also have to leave stack space for the arguments
2089 -- that we are passing in registers
2090 lss_code <- if platformOS platform == OSMinGW32
2091 then leaveStackSpace (length (allArgRegs platform))
2092 else return nilOL
2093 delta <- getDeltaNat
2094
2095 -- deal with static vs dynamic call targets
2096 (callinsns,_cconv) <-
2097 case target of
2098 ForeignTarget (CmmLit (CmmLabel lbl)) conv
2099 -> -- ToDo: stdcall arg sizes
2100 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
2101 where fn_imm = ImmCLbl lbl
2102 ForeignTarget expr conv
2103 -> do (dyn_r, dyn_c) <- getSomeReg expr
2104 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
2105 PrimTarget _
2106 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
2107 ++ "probably because too many return values."
2108
2109 let
2110 -- The x86_64 ABI requires us to set %al to the number of SSE2
2111 -- registers that contain arguments, if the called routine
2112 -- is a varargs function. We don't know whether it's a
2113 -- varargs function or not, so we have to assume it is.
2114 --
2115 -- It's not safe to omit this assignment, even if the number
2116 -- of SSE2 regs in use is zero. If %al is larger than 8
2117 -- on entry to a varargs function, seg faults ensue.
2118 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
2119
2120 let call = callinsns `appOL`
2121 toOL (
2122 -- Deallocate parameters after call for ccall;
2123 -- stdcall has callee do it, but is not supported on
2124 -- x86_64 target (see #3336)
2125 (if real_size==0 then [] else
2126 [ADD (intSize (wordWidth dflags)) (OpImm (ImmInt real_size)) (OpReg esp)])
2127 ++
2128 [DELTA (delta + real_size)]
2129 )
2130 setDeltaNat (delta + real_size)
2131
2132 let
2133 -- assign the results, if necessary
2134 assign_code [] = nilOL
2135 assign_code [dest] =
2136 case typeWidth rep of
2137 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
2138 W64 | isFloatType rep -> unitOL (MOV (floatSize W64) (OpReg xmm0) (OpReg r_dest))
2139 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
2140 where
2141 rep = localRegType dest
2142 r_dest = getRegisterReg platform True (CmmLocal dest)
2143 assign_code _many = panic "genCCall.assign_code many"
2144
2145 return (load_args_code `appOL`
2146 adjust_rsp `appOL`
2147 push_code `appOL`
2148 lss_code `appOL`
2149 assign_eax sse_regs `appOL`
2150 call `appOL`
2151 assign_code dest_regs)
2152
2153 where platform = targetPlatform dflags
2154 arg_size = 8 -- always, at the mo
2155
2156 load_args :: [CmmExpr]
2157 -> [Reg] -- int regs avail for args
2158 -> [Reg] -- FP regs avail for args
2159 -> InstrBlock
2160 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock)
2161 load_args args [] [] code = return (args, [], [], code)
2162 -- no more regs to use
2163 load_args [] aregs fregs code = return ([], aregs, fregs, code)
2164 -- no more args to push
2165 load_args (arg : rest) aregs fregs code
2166 | isFloatType arg_rep =
2167 case fregs of
2168 [] -> push_this_arg
2169 (r:rs) -> do
2170 arg_code <- getAnyReg arg
2171 load_args rest aregs rs (code `appOL` arg_code r)
2172 | otherwise =
2173 case aregs of
2174 [] -> push_this_arg
2175 (r:rs) -> do
2176 arg_code <- getAnyReg arg
2177 load_args rest rs fregs (code `appOL` arg_code r)
2178 where
2179 arg_rep = cmmExprType dflags arg
2180
2181 push_this_arg = do
2182 (args',ars,frs,code') <- load_args rest aregs fregs code
2183 return (arg:args', ars, frs, code')
2184
2185 load_args_win :: [CmmExpr]
2186 -> [Reg] -- used int regs
2187 -> [Reg] -- used FP regs
2188 -> [(Reg, Reg)] -- (int, FP) regs avail for args
2189 -> InstrBlock
2190 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock)
2191 load_args_win args usedInt usedFP [] code
2192 = return (args, usedInt, usedFP, code)
2193 -- no more regs to use
2194 load_args_win [] usedInt usedFP _ code
2195 = return ([], usedInt, usedFP, code)
2196 -- no more args to push
2197 load_args_win (arg : rest) usedInt usedFP
2198 ((ireg, freg) : regs) code
2199 | isFloatType arg_rep = do
2200 arg_code <- getAnyReg arg
2201 load_args_win rest (ireg : usedInt) (freg : usedFP) regs
2202 (code `appOL`
2203 arg_code freg `snocOL`
2204 -- If we are calling a varargs function
2205 -- then we need to define ireg as well
2206 -- as freg
2207 MOV II64 (OpReg freg) (OpReg ireg))
2208 | otherwise = do
2209 arg_code <- getAnyReg arg
2210 load_args_win rest (ireg : usedInt) usedFP regs
2211 (code `appOL` arg_code ireg)
2212 where
2213 arg_rep = cmmExprType dflags arg
2214
2215 push_args [] code = return code
2216 push_args (arg:rest) code
2217 | isFloatType arg_rep = do
2218 (arg_reg, arg_code) <- getSomeReg arg
2219 delta <- getDeltaNat
2220 setDeltaNat (delta-arg_size)
2221 let code' = code `appOL` arg_code `appOL` toOL [
2222 SUB (intSize (wordWidth dflags)) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
2223 DELTA (delta-arg_size),
2224 MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel dflags 0))]
2225 push_args rest code'
2226
2227 | otherwise = do
2228 -- we only ever generate word-sized function arguments. Promotion
2229 -- has already happened: our Int8# type is kept sign-extended
2230 -- in an Int#, for example.
2231 ASSERT(width == W64) return ()
2232 (arg_op, arg_code) <- getOperand arg
2233 delta <- getDeltaNat
2234 setDeltaNat (delta-arg_size)
2235 let code' = code `appOL` arg_code `appOL` toOL [
2236 PUSH II64 arg_op,
2237 DELTA (delta-arg_size)]
2238 push_args rest code'
2239 where
2240 arg_rep = cmmExprType dflags arg
2241 width = typeWidth arg_rep
2242
2243 leaveStackSpace n = do
2244 delta <- getDeltaNat
2245 setDeltaNat (delta - n * arg_size)
2246 return $ toOL [
2247 SUB II64 (OpImm (ImmInt (n * wORD_SIZE dflags))) (OpReg rsp),
2248 DELTA (delta - n * arg_size)]
2249
2250 -- | We're willing to inline and unroll memcpy/memset calls that touch
2251 -- at most these many bytes. This threshold is the same as the one
2252 -- used by GCC and LLVM.
2253 maxInlineSizeThreshold :: Integer
2254 maxInlineSizeThreshold = 128
2255
2256 outOfLineCmmOp :: CallishMachOp -> Maybe CmmFormal -> [CmmActual] -> NatM InstrBlock
2257 outOfLineCmmOp mop res args
2258 = do
2259 dflags <- getDynFlags
2260 targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
2261 let target = ForeignTarget targetExpr
2262 (ForeignConvention CCallConv [] [] CmmMayReturn)
2263
2264 stmtToInstrs (CmmUnsafeForeignCall target (catMaybes [res]) args')
2265 where
2266 -- Assume we can call these functions directly, and that they're not in a dynamic library.
2267 -- TODO: Why is this ok? Under linux this code will be in libm.so
2268 -- Is is because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31
2269 lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
2270
2271 args' = case mop of
2272 MO_Memcpy -> init args
2273 MO_Memset -> init args
2274 MO_Memmove -> init args
2275 _ -> args
2276
2277 fn = case mop of
2278 MO_F32_Sqrt -> fsLit "sqrtf"
2279 MO_F32_Sin -> fsLit "sinf"
2280 MO_F32_Cos -> fsLit "cosf"
2281 MO_F32_Tan -> fsLit "tanf"
2282 MO_F32_Exp -> fsLit "expf"
2283 MO_F32_Log -> fsLit "logf"
2284
2285 MO_F32_Asin -> fsLit "asinf"
2286 MO_F32_Acos -> fsLit "acosf"
2287 MO_F32_Atan -> fsLit "atanf"
2288
2289 MO_F32_Sinh -> fsLit "sinhf"
2290 MO_F32_Cosh -> fsLit "coshf"
2291 MO_F32_Tanh -> fsLit "tanhf"
2292 MO_F32_Pwr -> fsLit "powf"
2293
2294 MO_F64_Sqrt -> fsLit "sqrt"
2295 MO_F64_Sin -> fsLit "sin"
2296 MO_F64_Cos -> fsLit "cos"
2297 MO_F64_Tan -> fsLit "tan"
2298 MO_F64_Exp -> fsLit "exp"
2299 MO_F64_Log -> fsLit "log"
2300
2301 MO_F64_Asin -> fsLit "asin"
2302 MO_F64_Acos -> fsLit "acos"
2303 MO_F64_Atan -> fsLit "atan"
2304
2305 MO_F64_Sinh -> fsLit "sinh"
2306 MO_F64_Cosh -> fsLit "cosh"
2307 MO_F64_Tanh -> fsLit "tanh"
2308 MO_F64_Pwr -> fsLit "pow"
2309
2310 MO_Memcpy -> fsLit "memcpy"
2311 MO_Memset -> fsLit "memset"
2312 MO_Memmove -> fsLit "memmove"
2313
2314 MO_PopCnt _ -> fsLit "popcnt"
2315
2316 MO_UF_Conv _ -> unsupported
2317
2318 MO_S_QuotRem {} -> unsupported
2319 MO_U_QuotRem {} -> unsupported
2320 MO_U_QuotRem2 {} -> unsupported
2321 MO_Add2 {} -> unsupported
2322 MO_U_Mul2 {} -> unsupported
2323 MO_WriteBarrier -> unsupported
2324 MO_Touch -> unsupported
2325 unsupported = panic ("outOfLineCmmOp: " ++ show mop
2326 ++ " not supported here")
2327
2328 -- -----------------------------------------------------------------------------
2329 -- Generating a table-branch
2330
2331 genSwitch :: DynFlags -> CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
2332
2333 genSwitch dflags expr ids
2334 | gopt Opt_PIC dflags
2335 = do
2336 (reg,e_code) <- getSomeReg expr
2337 lbl <- getNewLabelNat
2338 dflags <- getDynFlags
2339 dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl
2340 (tableReg,t_code) <- getSomeReg $ dynRef
2341 let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
2342 (EAIndex reg (wORD_SIZE dflags)) (ImmInt 0))
2343
2344 return $ if target32Bit (targetPlatform dflags)
2345 then e_code `appOL` t_code `appOL` toOL [
2346 ADD (intSize (wordWidth dflags)) op (OpReg tableReg),
2347 JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
2348 ]
2349 else case platformOS (targetPlatform dflags) of
2350 OSDarwin ->
2351 -- on Mac OS X/x86_64, put the jump table
2352 -- in the text section to work around a
2353 -- limitation of the linker.
2354 -- ld64 is unable to handle the relocations for
2355 -- .quad L1 - L0
2356 -- if L0 is not preceded by a non-anonymous
2357 -- label in its section.
2358 e_code `appOL` t_code `appOL` toOL [
2359 ADD (intSize (wordWidth dflags)) op (OpReg tableReg),
2360 JMP_TBL (OpReg tableReg) ids Text lbl
2361 ]
2362 _ ->
2363 -- HACK: On x86_64 binutils<2.17 is only able
2364 -- to generate PC32 relocations, hence we only
2365 -- get 32-bit offsets in the jump table. As
2366 -- these offsets are always negative we need
2367 -- to properly sign extend them to 64-bit.
2368 -- This hack should be removed in conjunction
2369 -- with the hack in PprMach.hs/pprDataItem
2370 -- once binutils 2.17 is standard.
2371 e_code `appOL` t_code `appOL` toOL [
2372 MOVSxL II32 op (OpReg reg),
2373 ADD (intSize (wordWidth dflags)) (OpReg reg) (OpReg tableReg),
2374 JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
2375 ]
2376 | otherwise
2377 = do
2378 (reg,e_code) <- getSomeReg expr
2379 lbl <- getNewLabelNat
2380 let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg (wORD_SIZE dflags)) (ImmCLbl lbl))
2381 code = e_code `appOL` toOL [
2382 JMP_TBL op ids ReadOnlyData lbl
2383 ]
2384 return code
2385
2386 generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl (Alignment, CmmStatics) Instr)
2387 generateJumpTableForInstr dflags (JMP_TBL _ ids section lbl)
2388 = Just (createJumpTable dflags ids section lbl)
2389 generateJumpTableForInstr _ _ = Nothing
2390
2391 createJumpTable :: DynFlags -> [Maybe BlockId] -> Section -> CLabel
2392 -> GenCmmDecl (Alignment, CmmStatics) h g
2393 createJumpTable dflags ids section lbl
2394 = let jumpTable
2395 | gopt Opt_PIC dflags =
2396 let jumpTableEntryRel Nothing
2397 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
2398 jumpTableEntryRel (Just blockid)
2399 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
2400 where blockLabel = mkAsmTempLabel (getUnique blockid)
2401 in map jumpTableEntryRel ids
2402 | otherwise = map (jumpTableEntry dflags) ids
2403 in CmmData section (1, Statics lbl jumpTable)
2404
2405 -- -----------------------------------------------------------------------------
2406 -- 'condIntReg' and 'condFltReg': condition codes into registers
2407
2408 -- Turn those condition codes into integers now (when they appear on
2409 -- the right hand side of an assignment).
2410 --
2411 -- (If applicable) Do not fill the delay slots here; you will confuse the
2412 -- register allocator.
2413
2414 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2415
2416 condIntReg cond x y = do
2417 CondCode _ cond cond_code <- condIntCode cond x y
2418 tmp <- getNewRegNat II8
2419 let
2420 code dst = cond_code `appOL` toOL [
2421 SETCC cond (OpReg tmp),
2422 MOVZxL II8 (OpReg tmp) (OpReg dst)
2423 ]
2424 return (Any II32 code)
2425
2426
2427
2428 condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
2429 condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
2430 where
2431 condFltReg_x87 = do
2432 CondCode _ cond cond_code <- condFltCode cond x y
2433 tmp <- getNewRegNat II8
2434 let
2435 code dst = cond_code `appOL` toOL [
2436 SETCC cond (OpReg tmp),
2437 MOVZxL II8 (OpReg tmp) (OpReg dst)
2438 ]
2439 return (Any II32 code)
2440
2441 condFltReg_sse2 = do
2442 CondCode _ cond cond_code <- condFltCode cond x y
2443 tmp1 <- getNewRegNat (archWordSize is32Bit)
2444 tmp2 <- getNewRegNat (archWordSize is32Bit)
2445 let
2446 -- We have to worry about unordered operands (eg. comparisons
2447 -- against NaN). If the operands are unordered, the comparison
2448 -- sets the parity flag, carry flag and zero flag.
2449 -- All comparisons are supposed to return false for unordered
2450 -- operands except for !=, which returns true.
2451 --
2452 -- Optimisation: we don't have to test the parity flag if we
2453 -- know the test has already excluded the unordered case: eg >
2454 -- and >= test for a zero carry flag, which can only occur for
2455 -- ordered operands.
2456 --
2457 -- ToDo: by reversing comparisons we could avoid testing the
2458 -- parity flag in more cases.
2459
2460 code dst =
2461 cond_code `appOL`
2462 (case cond of
2463 NE -> or_unordered dst
2464 GU -> plain_test dst
2465 GEU -> plain_test dst
2466 _ -> and_ordered dst)
2467
2468 plain_test dst = toOL [
2469 SETCC cond (OpReg tmp1),
2470 MOVZxL II8 (OpReg tmp1) (OpReg dst)
2471 ]
2472 or_unordered dst = toOL [
2473 SETCC cond (OpReg tmp1),
2474 SETCC PARITY (OpReg tmp2),
2475 OR II8 (OpReg tmp1) (OpReg tmp2),
2476 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2477 ]
2478 and_ordered dst = toOL [
2479 SETCC cond (OpReg tmp1),
2480 SETCC NOTPARITY (OpReg tmp2),
2481 AND II8 (OpReg tmp1) (OpReg tmp2),
2482 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2483 ]
2484 return (Any II32 code)
2485
2486
2487 -- -----------------------------------------------------------------------------
2488 -- 'trivial*Code': deal with trivial instructions
2489
2490 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2491 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2492 -- Only look for constants on the right hand side, because that's
2493 -- where the generic optimizer will have put them.
2494
2495 -- Similarly, for unary instructions, we don't have to worry about
2496 -- matching an StInt as the argument, because genericOpt will already
2497 -- have handled the constant-folding.
2498
2499
2500 {-
2501 The Rules of the Game are:
2502
2503 * You cannot assume anything about the destination register dst;
2504 it may be anything, including a fixed reg.
2505
2506 * You may compute an operand into a fixed reg, but you may not
2507 subsequently change the contents of that fixed reg. If you
2508 want to do so, first copy the value either to a temporary
2509 or into dst. You are free to modify dst even if it happens
2510 to be a fixed reg -- that's not your problem.
2511
2512 * You cannot assume that a fixed reg will stay live over an
2513 arbitrary computation. The same applies to the dst reg.
2514
2515 * Temporary regs obtained from getNewRegNat are distinct from
2516 each other and from all other regs, and stay live over
2517 arbitrary computations.
2518
2519 --------------------
2520
2521 SDM's version of The Rules:
2522
2523 * If getRegister returns Any, that means it can generate correct
2524 code which places the result in any register, period. Even if that
2525 register happens to be read during the computation.
2526
2527 Corollary #1: this means that if you are generating code for an
2528 operation with two arbitrary operands, you cannot assign the result
2529 of the first operand into the destination register before computing
2530 the second operand. The second operand might require the old value
2531 of the destination register.
2532
2533 Corollary #2: A function might be able to generate more efficient
2534 code if it knows the destination register is a new temporary (and
2535 therefore not read by any of the sub-computations).
2536
2537 * If getRegister returns Any, then the code it generates may modify only:
2538 (a) fresh temporaries
2539 (b) the destination register
2540 (c) known registers (eg. %ecx is used by shifts)
2541 In particular, it may *not* modify global registers, unless the global
2542 register happens to be the destination register.
2543 -}
2544
2545 trivialCode :: Width -> (Operand -> Operand -> Instr)
2546 -> Maybe (Operand -> Operand -> Instr)
2547 -> CmmExpr -> CmmExpr -> NatM Register
2548 trivialCode width instr m a b
2549 = do is32Bit <- is32BitPlatform
2550 trivialCode' is32Bit width instr m a b
2551
2552 trivialCode' :: Bool -> Width -> (Operand -> Operand -> Instr)
2553 -> Maybe (Operand -> Operand -> Instr)
2554 -> CmmExpr -> CmmExpr -> NatM Register
2555 trivialCode' is32Bit width _ (Just revinstr) (CmmLit lit_a) b
2556 | is32BitLit is32Bit lit_a = do
2557 b_code <- getAnyReg b
2558 let
2559 code dst
2560 = b_code dst `snocOL`
2561 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
2562 return (Any (intSize width) code)
2563
2564 trivialCode' _ width instr _ a b
2565 = genTrivialCode (intSize width) instr a b
2566
2567 -- This is re-used for floating pt instructions too.
2568 genTrivialCode :: Size -> (Operand -> Operand -> Instr)
2569 -> CmmExpr -> CmmExpr -> NatM Register
2570 genTrivialCode rep instr a b = do
2571 (b_op, b_code) <- getNonClobberedOperand b
2572 a_code <- getAnyReg a
2573 tmp <- getNewRegNat rep
2574 let
2575 -- We want the value of b to stay alive across the computation of a.
2576 -- But, we want to calculate a straight into the destination register,
2577 -- because the instruction only has two operands (dst := dst `op` src).
2578 -- The troublesome case is when the result of b is in the same register
2579 -- as the destination reg. In this case, we have to save b in a
2580 -- new temporary across the computation of a.
2581 code dst
2582 | dst `regClashesWithOp` b_op =
2583 b_code `appOL`
2584 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
2585 a_code dst `snocOL`
2586 instr (OpReg tmp) (OpReg dst)
2587 | otherwise =
2588 b_code `appOL`
2589 a_code dst `snocOL`
2590 instr b_op (OpReg dst)
2591 return (Any rep code)
2592
2593 regClashesWithOp :: Reg -> Operand -> Bool
2594 reg `regClashesWithOp` OpReg reg2 = reg == reg2
2595 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
2596 _ `regClashesWithOp` _ = False
2597
2598 -----------
2599
2600 trivialUCode :: Size -> (Operand -> Instr)
2601 -> CmmExpr -> NatM Register
2602 trivialUCode rep instr x = do
2603 x_code <- getAnyReg x
2604 let
2605 code dst =
2606 x_code dst `snocOL`
2607 instr (OpReg dst)
2608 return (Any rep code)
2609
2610 -----------
2611
2612 trivialFCode_x87 :: (Size -> Reg -> Reg -> Reg -> Instr)
2613 -> CmmExpr -> CmmExpr -> NatM Register
2614 trivialFCode_x87 instr x y = do
2615 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
2616 (y_reg, y_code) <- getSomeReg y
2617 let
2618 size = FF80 -- always, on x87
2619 code dst =
2620 x_code `appOL`
2621 y_code `snocOL`
2622 instr size x_reg y_reg dst
2623 return (Any size code)
2624
2625 trivialFCode_sse2 :: Width -> (Size -> Operand -> Operand -> Instr)
2626 -> CmmExpr -> CmmExpr -> NatM Register
2627 trivialFCode_sse2 pk instr x y
2628 = genTrivialCode size (instr size) x y
2629 where size = floatSize pk
2630
2631
2632 trivialUFCode :: Size -> (Reg -> Reg -> Instr) -> CmmExpr -> NatM Register
2633 trivialUFCode size instr x = do
2634 (x_reg, x_code) <- getSomeReg x
2635 let
2636 code dst =
2637 x_code `snocOL`
2638 instr x_reg dst
2639 return (Any size code)
2640
2641
2642 --------------------------------------------------------------------------------
2643 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2644 coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
2645 where
2646 coerce_x87 = do
2647 (x_reg, x_code) <- getSomeReg x
2648 let
2649 opc = case to of W32 -> GITOF; W64 -> GITOD;
2650 n -> panic $ "coerceInt2FP.x87: unhandled width ("
2651 ++ show n ++ ")"
2652 code dst = x_code `snocOL` opc x_reg dst
2653 -- ToDo: works for non-II32 reps?
2654 return (Any FF80 code)
2655
2656 coerce_sse2 = do
2657 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2658 let
2659 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
2660 n -> panic $ "coerceInt2FP.sse: unhandled width ("
2661 ++ show n ++ ")"
2662 code dst = x_code `snocOL` opc (intSize from) x_op dst
2663 return (Any (floatSize to) code)
2664 -- works even if the destination rep is <II32
2665
2666 --------------------------------------------------------------------------------
2667 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2668 coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
2669 where
2670 coerceFP2Int_x87 = do
2671 (x_reg, x_code) <- getSomeReg x
2672 let
2673 opc = case from of W32 -> GFTOI; W64 -> GDTOI
2674 n -> panic $ "coerceFP2Int.x87: unhandled width ("
2675 ++ show n ++ ")"
2676 code dst = x_code `snocOL` opc x_reg dst
2677 -- ToDo: works for non-II32 reps?
2678 return (Any (intSize to) code)
2679
2680 coerceFP2Int_sse2 = do
2681 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2682 let
2683 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ;
2684 n -> panic $ "coerceFP2Init.sse: unhandled width ("
2685 ++ show n ++ ")"
2686 code dst = x_code `snocOL` opc (intSize to) x_op dst
2687 return (Any (intSize to) code)
2688 -- works even if the destination rep is <II32
2689
2690
2691 --------------------------------------------------------------------------------
2692 coerceFP2FP :: Width -> CmmExpr -> NatM Register
2693 coerceFP2FP to x = do
2694 use_sse2 <- sse2Enabled
2695 (x_reg, x_code) <- getSomeReg x
2696 let
2697 opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
2698 n -> panic $ "coerceFP2FP: unhandled width ("
2699 ++ show n ++ ")"
2700 | otherwise = GDTOF
2701 code dst = x_code `snocOL` opc x_reg dst
2702 return (Any (if use_sse2 then floatSize to else FF80) code)
2703
2704 --------------------------------------------------------------------------------
2705
2706 sse2NegCode :: Width -> CmmExpr -> NatM Register
2707 sse2NegCode w x = do
2708 let sz = floatSize w
2709 x_code <- getAnyReg x
2710 -- This is how gcc does it, so it can't be that bad:
2711 let
2712 const | FF32 <- sz = CmmInt 0x80000000 W32
2713 | otherwise = CmmInt 0x8000000000000000 W64
2714 Amode amode amode_code <- memConstant (widthInBytes w) const
2715 tmp <- getNewRegNat sz
2716 let
2717 code dst = x_code dst `appOL` amode_code `appOL` toOL [
2718 MOV sz (OpAddr amode) (OpReg tmp),
2719 XOR sz (OpReg tmp) (OpReg dst)
2720 ]
2721 --
2722 return (Any sz code)
2723
2724 isVecExpr :: CmmExpr -> Bool
2725 isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True
2726 isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True
2727 isVecExpr (CmmMachOp (MO_V_Add {}) _) = True
2728 isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True
2729 isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True
2730 isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True
2731 isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True
2732 isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True
2733 isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
2734 isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True
2735 isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True
2736 isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True
2737 isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True
2738 isVecExpr (CmmMachOp _ [e]) = isVecExpr e
2739 isVecExpr _ = False
2740
2741 needLlvm :: NatM a
2742 needLlvm =
2743 sorry $ unlines ["The native code generator does not support vector"
2744 ,"instructions. Please use -fllvm."]