a052fdacdf8ca8de87aea13acbfff36d102ff28b
[ghc.git] / compiler / nativeGen / X86 / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2004
8 --
9 -----------------------------------------------------------------------------
10
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, and (b) the type signatures, the
13 -- structure should not be too overwhelming.
14
15 module X86.CodeGen (
16 cmmTopCodeGen,
17 generateJumpTableForInstr,
18 InstrBlock
19 )
20
21 where
22
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
25 #include "../includes/MachDeps.h"
26
27 -- NCG stuff:
28 import X86.Instr
29 import X86.Cond
30 import X86.Regs
31 import X86.RegInfo
32 import CodeGen.Platform
33 import CPrim
34 import Debug ( DebugBlock(..) )
35 import Instruction
36 import PIC
37 import NCGMonad
38 import Size
39 import Reg
40 import Platform
41
42 -- Our intermediate code:
43 import BasicTypes
44 import BlockId
45 import Module ( primPackageKey )
46 import PprCmm ()
47 import CmmUtils
48 import CmmSwitch
49 import Cmm
50 import Hoopl
51 import CLabel
52 import CoreSyn ( Tickish(..) )
53 import SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
54
55 -- The rest:
56 import ForeignCall ( CCallConv(..) )
57 import OrdList
58 import Outputable
59 import Unique
60 import FastString
61 import FastBool ( isFastTrue )
62 import DynFlags
63 import Util
64
65 import Control.Monad
66 import Data.Bits
67 import Data.Int
68 import Data.Maybe
69 import Data.Word
70
71 is32BitPlatform :: NatM Bool
72 is32BitPlatform = do
73 dflags <- getDynFlags
74 return $ target32Bit (targetPlatform dflags)
75
76 sse2Enabled :: NatM Bool
77 sse2Enabled = do
78 dflags <- getDynFlags
79 return (isSse2Enabled dflags)
80
81 sse4_2Enabled :: NatM Bool
82 sse4_2Enabled = do
83 dflags <- getDynFlags
84 return (isSse4_2Enabled dflags)
85
86 if_sse2 :: NatM a -> NatM a -> NatM a
87 if_sse2 sse2 x87 = do
88 b <- sse2Enabled
89 if b then sse2 else x87
90
91 cmmTopCodeGen
92 :: RawCmmDecl
93 -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
94
95 cmmTopCodeGen (CmmProc info lab live graph) = do
96 let blocks = toBlockListEntryFirst graph
97 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
98 picBaseMb <- getPicBaseMaybeNat
99 dflags <- getDynFlags
100 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
101 tops = proc : concat statics
102 os = platformOS $ targetPlatform dflags
103
104 case picBaseMb of
105 Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
106 Nothing -> return tops
107
108 cmmTopCodeGen (CmmData sec dat) = do
109 return [CmmData sec (1, dat)] -- no translation, we just use CmmStatic
110
111
112 basicBlockCodeGen
113 :: CmmBlock
114 -> NatM ( [NatBasicBlock Instr]
115 , [NatCmmDecl (Alignment, CmmStatics) Instr])
116
117 basicBlockCodeGen block = do
118 let (_, nodes, tail) = blockSplit block
119 id = entryLabel block
120 stmts = blockToList nodes
121 -- Generate location directive
122 dbg <- getDebugBlock (entryLabel block)
123 loc_instrs <- case dblSourceTick =<< dbg of
124 Just (SourceNote span name)
125 -> do fileId <- getFileId (srcSpanFile span)
126 let line = srcSpanStartLine span; col = srcSpanStartCol span
127 return $ unitOL $ LOCATION fileId line col name
128 _ -> return nilOL
129 mid_instrs <- stmtsToInstrs stmts
130 tail_instrs <- stmtToInstrs tail
131 let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
132 -- code generation may introduce new basic block boundaries, which
133 -- are indicated by the NEWBLOCK instruction. We must split up the
134 -- instruction stream into basic blocks again. Also, we extract
135 -- LDATAs here too.
136 let
137 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
138
139 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
140 = ([], BasicBlock id instrs : blocks, statics)
141 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
142 = (instrs, blocks, CmmData sec dat:statics)
143 mkBlocks instr (instrs,blocks,statics)
144 = (instr:instrs, blocks, statics)
145 return (BasicBlock id top : other_blocks, statics)
146
147
148 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
149 stmtsToInstrs stmts
150 = do instrss <- mapM stmtToInstrs stmts
151 return (concatOL instrss)
152
153
154 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
155 stmtToInstrs stmt = do
156 dflags <- getDynFlags
157 is32Bit <- is32BitPlatform
158 case stmt of
159 CmmComment s -> return (unitOL (COMMENT s))
160 CmmTick {} -> return nilOL
161 CmmUnwind {} -> return nilOL
162
163 CmmAssign reg src
164 | isFloatType ty -> assignReg_FltCode size reg src
165 | is32Bit && isWord64 ty -> assignReg_I64Code reg src
166 | otherwise -> assignReg_IntCode size reg src
167 where ty = cmmRegType dflags reg
168 size = cmmTypeSize ty
169
170 CmmStore addr src
171 | isFloatType ty -> assignMem_FltCode size addr src
172 | is32Bit && isWord64 ty -> assignMem_I64Code addr src
173 | otherwise -> assignMem_IntCode size addr src
174 where ty = cmmExprType dflags src
175 size = cmmTypeSize ty
176
177 CmmUnsafeForeignCall target result_regs args
178 -> genCCall dflags is32Bit target result_regs args
179
180 CmmBranch id -> genBranch id
181 CmmCondBranch arg true false -> do b1 <- genCondJump true arg
182 b2 <- genBranch false
183 return (b1 `appOL` b2)
184 CmmSwitch arg ids -> do dflags <- getDynFlags
185 genSwitch dflags arg ids
186 CmmCall { cml_target = arg
187 , cml_args_regs = gregs } -> do
188 dflags <- getDynFlags
189 genJump arg (jumpRegs dflags gregs)
190 _ ->
191 panic "stmtToInstrs: statement should have been cps'd away"
192
193
194 jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
195 jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
196 where platform = targetPlatform dflags
197
198 --------------------------------------------------------------------------------
199 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
200 -- They are really trees of insns to facilitate fast appending, where a
201 -- left-to-right traversal yields the insns in the correct order.
202 --
203 type InstrBlock
204 = OrdList Instr
205
206
207 -- | Condition codes passed up the tree.
208 --
209 data CondCode
210 = CondCode Bool Cond InstrBlock
211
212
213 -- | a.k.a "Register64"
214 -- Reg is the lower 32-bit temporary which contains the result.
215 -- Use getHiVRegFromLo to find the other VRegUnique.
216 --
217 -- Rules of this simplified insn selection game are therefore that
218 -- the returned Reg may be modified
219 --
220 data ChildCode64
221 = ChildCode64
222 InstrBlock
223 Reg
224
225
226 -- | Register's passed up the tree. If the stix code forces the register
227 -- to live in a pre-decided machine register, it comes out as @Fixed@;
228 -- otherwise, it comes out as @Any@, and the parent can decide which
229 -- register to put it in.
230 --
231 data Register
232 = Fixed Size Reg InstrBlock
233 | Any Size (Reg -> InstrBlock)
234
235
236 swizzleRegisterRep :: Register -> Size -> Register
237 swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
238 swizzleRegisterRep (Any _ codefn) size = Any size codefn
239
240
241 -- | Grab the Reg for a CmmReg
242 getRegisterReg :: Platform -> Bool -> CmmReg -> Reg
243
244 getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk))
245 = let sz = cmmTypeSize pk in
246 if isFloatSize sz && not use_sse2
247 then RegVirtual (mkVirtualReg u FF80)
248 else RegVirtual (mkVirtualReg u sz)
249
250 getRegisterReg platform _ (CmmGlobal mid)
251 = case globalRegMaybe platform mid of
252 Just reg -> RegReal $ reg
253 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
254 -- By this stage, the only MagicIds remaining should be the
255 -- ones which map to a real machine register on this
256 -- platform. Hence ...
257
258
259 -- | Memory addressing modes passed up the tree.
260 data Amode
261 = Amode AddrMode InstrBlock
262
263 {-
264 Now, given a tree (the argument to an CmmLoad) that references memory,
265 produce a suitable addressing mode.
266
267 A Rule of the Game (tm) for Amodes: use of the addr bit must
268 immediately follow use of the code part, since the code part puts
269 values in registers which the addr then refers to. So you can't put
270 anything in between, lest it overwrite some of those registers. If
271 you need to do some other computation between the code part and use of
272 the addr bit, first store the effective address from the amode in a
273 temporary, then do the other computation, and then use the temporary:
274
275 code
276 LEA amode, tmp
277 ... other computation ...
278 ... (tmp) ...
279 -}
280
281
282 -- | Check whether an integer will fit in 32 bits.
283 -- A CmmInt is intended to be truncated to the appropriate
284 -- number of bits, so here we truncate it to Int64. This is
285 -- important because e.g. -1 as a CmmInt might be either
286 -- -1 or 18446744073709551615.
287 --
288 is32BitInteger :: Integer -> Bool
289 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
290 where i64 = fromIntegral i :: Int64
291
292
293 -- | Convert a BlockId to some CmmStatic data
294 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
295 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
296 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
297 where blockLabel = mkAsmTempLabel (getUnique blockid)
298
299
300 -- -----------------------------------------------------------------------------
301 -- General things for putting together code sequences
302
303 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
304 -- CmmExprs into CmmRegOff?
305 mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
306 mangleIndexTree dflags reg off
307 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
308 where width = typeWidth (cmmRegType dflags reg)
309
310 -- | The dual to getAnyReg: compute an expression into a register, but
311 -- we don't mind which one it is.
312 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
313 getSomeReg expr = do
314 r <- getRegister expr
315 case r of
316 Any rep code -> do
317 tmp <- getNewRegNat rep
318 return (tmp, code tmp)
319 Fixed _ reg code ->
320 return (reg, code)
321
322
323 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
324 assignMem_I64Code addrTree valueTree = do
325 Amode addr addr_code <- getAmode addrTree
326 ChildCode64 vcode rlo <- iselExpr64 valueTree
327 let
328 rhi = getHiVRegFromLo rlo
329
330 -- Little-endian store
331 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
332 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
333 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
334
335
336 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
337 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
338 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
339 let
340 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
341 r_dst_hi = getHiVRegFromLo r_dst_lo
342 r_src_hi = getHiVRegFromLo r_src_lo
343 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
344 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
345 return (
346 vcode `snocOL` mov_lo `snocOL` mov_hi
347 )
348
349 assignReg_I64Code _ _
350 = panic "assignReg_I64Code(i386): invalid lvalue"
351
352
353 iselExpr64 :: CmmExpr -> NatM ChildCode64
354 iselExpr64 (CmmLit (CmmInt i _)) = do
355 (rlo,rhi) <- getNewRegPairNat II32
356 let
357 r = fromIntegral (fromIntegral i :: Word32)
358 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
359 code = toOL [
360 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
361 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
362 ]
363 return (ChildCode64 code rlo)
364
365 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
366 Amode addr addr_code <- getAmode addrTree
367 (rlo,rhi) <- getNewRegPairNat II32
368 let
369 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
370 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
371 return (
372 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
373 rlo
374 )
375
376 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
377 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
378
379 -- we handle addition, but rather badly
380 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
381 ChildCode64 code1 r1lo <- iselExpr64 e1
382 (rlo,rhi) <- getNewRegPairNat II32
383 let
384 r = fromIntegral (fromIntegral i :: Word32)
385 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
386 r1hi = getHiVRegFromLo r1lo
387 code = code1 `appOL`
388 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
389 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
390 MOV II32 (OpReg r1hi) (OpReg rhi),
391 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
392 return (ChildCode64 code rlo)
393
394 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
395 ChildCode64 code1 r1lo <- iselExpr64 e1
396 ChildCode64 code2 r2lo <- iselExpr64 e2
397 (rlo,rhi) <- getNewRegPairNat II32
398 let
399 r1hi = getHiVRegFromLo r1lo
400 r2hi = getHiVRegFromLo r2lo
401 code = code1 `appOL`
402 code2 `appOL`
403 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
404 ADD II32 (OpReg r2lo) (OpReg rlo),
405 MOV II32 (OpReg r1hi) (OpReg rhi),
406 ADC II32 (OpReg r2hi) (OpReg rhi) ]
407 return (ChildCode64 code rlo)
408
409 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
410 ChildCode64 code1 r1lo <- iselExpr64 e1
411 ChildCode64 code2 r2lo <- iselExpr64 e2
412 (rlo,rhi) <- getNewRegPairNat II32
413 let
414 r1hi = getHiVRegFromLo r1lo
415 r2hi = getHiVRegFromLo r2lo
416 code = code1 `appOL`
417 code2 `appOL`
418 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
419 SUB II32 (OpReg r2lo) (OpReg rlo),
420 MOV II32 (OpReg r1hi) (OpReg rhi),
421 SBB II32 (OpReg r2hi) (OpReg rhi) ]
422 return (ChildCode64 code rlo)
423
424 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
425 fn <- getAnyReg expr
426 r_dst_lo <- getNewRegNat II32
427 let r_dst_hi = getHiVRegFromLo r_dst_lo
428 code = fn r_dst_lo
429 return (
430 ChildCode64 (code `snocOL`
431 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
432 r_dst_lo
433 )
434
435 iselExpr64 expr
436 = pprPanic "iselExpr64(i386)" (ppr expr)
437
438
439 --------------------------------------------------------------------------------
440 getRegister :: CmmExpr -> NatM Register
441 getRegister e = do dflags <- getDynFlags
442 is32Bit <- is32BitPlatform
443 getRegister' dflags is32Bit e
444
445 getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
446
447 getRegister' dflags is32Bit (CmmReg reg)
448 = case reg of
449 CmmGlobal PicBaseReg
450 | is32Bit ->
451 -- on x86_64, we have %rip for PicBaseReg, but it's not
452 -- a full-featured register, it can only be used for
453 -- rip-relative addressing.
454 do reg' <- getPicBaseNat (archWordSize is32Bit)
455 return (Fixed (archWordSize is32Bit) reg' nilOL)
456 _ ->
457 do use_sse2 <- sse2Enabled
458 let
459 sz = cmmTypeSize (cmmRegType dflags reg)
460 size | not use_sse2 && isFloatSize sz = FF80
461 | otherwise = sz
462 --
463 let platform = targetPlatform dflags
464 return (Fixed size (getRegisterReg platform use_sse2 reg) nilOL)
465
466
467 getRegister' dflags is32Bit (CmmRegOff r n)
468 = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
469
470 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
471 -- TO_W_(x), TO_W_(x >> 32)
472
473 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
474 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
475 | is32Bit = do
476 ChildCode64 code rlo <- iselExpr64 x
477 return $ Fixed II32 (getHiVRegFromLo rlo) code
478
479 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
480 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
481 | is32Bit = do
482 ChildCode64 code rlo <- iselExpr64 x
483 return $ Fixed II32 (getHiVRegFromLo rlo) code
484
485 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
486 | is32Bit = do
487 ChildCode64 code rlo <- iselExpr64 x
488 return $ Fixed II32 rlo code
489
490 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
491 | is32Bit = do
492 ChildCode64 code rlo <- iselExpr64 x
493 return $ Fixed II32 rlo code
494
495 getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
496 if_sse2 float_const_sse2 float_const_x87
497 where
498 float_const_sse2
499 | f == 0.0 = do
500 let
501 size = floatSize w
502 code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
503 -- I don't know why there are xorpd, xorps, and pxor instructions.
504 -- They all appear to do the same thing --SDM
505 return (Any size code)
506
507 | otherwise = do
508 Amode addr code <- memConstant (widthInBytes w) lit
509 loadFloatAmode True w addr code
510
511 float_const_x87 = case w of
512 W64
513 | f == 0.0 ->
514 let code dst = unitOL (GLDZ dst)
515 in return (Any FF80 code)
516
517 | f == 1.0 ->
518 let code dst = unitOL (GLD1 dst)
519 in return (Any FF80 code)
520
521 _otherwise -> do
522 Amode addr code <- memConstant (widthInBytes w) lit
523 loadFloatAmode False w addr code
524
525 -- catch simple cases of zero- or sign-extended load
526 getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
527 code <- intLoadCode (MOVZxL II8) addr
528 return (Any II32 code)
529
530 getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
531 code <- intLoadCode (MOVSxL II8) addr
532 return (Any II32 code)
533
534 getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
535 code <- intLoadCode (MOVZxL II16) addr
536 return (Any II32 code)
537
538 getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
539 code <- intLoadCode (MOVSxL II16) addr
540 return (Any II32 code)
541
542 -- catch simple cases of zero- or sign-extended load
543 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
544 | not is32Bit = do
545 code <- intLoadCode (MOVZxL II8) addr
546 return (Any II64 code)
547
548 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
549 | not is32Bit = do
550 code <- intLoadCode (MOVSxL II8) addr
551 return (Any II64 code)
552
553 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
554 | not is32Bit = do
555 code <- intLoadCode (MOVZxL II16) addr
556 return (Any II64 code)
557
558 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
559 | not is32Bit = do
560 code <- intLoadCode (MOVSxL II16) addr
561 return (Any II64 code)
562
563 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
564 | not is32Bit = do
565 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
566 return (Any II64 code)
567
568 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
569 | not is32Bit = do
570 code <- intLoadCode (MOVSxL II32) addr
571 return (Any II64 code)
572
573 getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
574 CmmLit displacement])
575 | not is32Bit = do
576 return $ Any II64 (\dst -> unitOL $
577 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
578
579 getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
580 sse2 <- sse2Enabled
581 case mop of
582 MO_F_Neg w
583 | sse2 -> sse2NegCode w x
584 | otherwise -> trivialUFCode FF80 (GNEG FF80) x
585
586 MO_S_Neg w -> triv_ucode NEGI (intSize w)
587 MO_Not w -> triv_ucode NOT (intSize w)
588
589 -- Nop conversions
590 MO_UU_Conv W32 W8 -> toI8Reg W32 x
591 MO_SS_Conv W32 W8 -> toI8Reg W32 x
592 MO_UU_Conv W16 W8 -> toI8Reg W16 x
593 MO_SS_Conv W16 W8 -> toI8Reg W16 x
594 MO_UU_Conv W32 W16 -> toI16Reg W32 x
595 MO_SS_Conv W32 W16 -> toI16Reg W32 x
596
597 MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
598 MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
599 MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
600 MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
601 MO_UU_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
602 MO_SS_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
603
604 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
605 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
606
607 -- widenings
608 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
609 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
610 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
611
612 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
613 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
614 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
615
616 MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x
617 MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
618 MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
619 MO_SS_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVSxL x
620 MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
621 MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
622 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
623 -- However, we don't want the register allocator to throw it
624 -- away as an unnecessary reg-to-reg move, so we keep it in
625 -- the form of a movzl and print it as a movl later.
626
627 MO_FF_Conv W32 W64
628 | sse2 -> coerceFP2FP W64 x
629 | otherwise -> conversionNop FF80 x
630
631 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
632
633 MO_FS_Conv from to -> coerceFP2Int from to x
634 MO_SF_Conv from to -> coerceInt2FP from to x
635
636 MO_V_Insert {} -> needLlvm
637 MO_V_Extract {} -> needLlvm
638 MO_V_Add {} -> needLlvm
639 MO_V_Sub {} -> needLlvm
640 MO_V_Mul {} -> needLlvm
641 MO_VS_Quot {} -> needLlvm
642 MO_VS_Rem {} -> needLlvm
643 MO_VS_Neg {} -> needLlvm
644 MO_VU_Quot {} -> needLlvm
645 MO_VU_Rem {} -> needLlvm
646 MO_VF_Insert {} -> needLlvm
647 MO_VF_Extract {} -> needLlvm
648 MO_VF_Add {} -> needLlvm
649 MO_VF_Sub {} -> needLlvm
650 MO_VF_Mul {} -> needLlvm
651 MO_VF_Quot {} -> needLlvm
652 MO_VF_Neg {} -> needLlvm
653
654 _other -> pprPanic "getRegister" (pprMachOp mop)
655 where
656 triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
657 triv_ucode instr size = trivialUCode size (instr size) x
658
659 -- signed or unsigned extension.
660 integerExtend :: Width -> Width
661 -> (Size -> Operand -> Operand -> Instr)
662 -> CmmExpr -> NatM Register
663 integerExtend from to instr expr = do
664 (reg,e_code) <- if from == W8 then getByteReg expr
665 else getSomeReg expr
666 let
667 code dst =
668 e_code `snocOL`
669 instr (intSize from) (OpReg reg) (OpReg dst)
670 return (Any (intSize to) code)
671
672 toI8Reg :: Width -> CmmExpr -> NatM Register
673 toI8Reg new_rep expr
674 = do codefn <- getAnyReg expr
675 return (Any (intSize new_rep) codefn)
676 -- HACK: use getAnyReg to get a byte-addressable register.
677 -- If the source was a Fixed register, this will add the
678 -- mov instruction to put it into the desired destination.
679 -- We're assuming that the destination won't be a fixed
680 -- non-byte-addressable register; it won't be, because all
681 -- fixed registers are word-sized.
682
683 toI16Reg = toI8Reg -- for now
684
685 conversionNop :: Size -> CmmExpr -> NatM Register
686 conversionNop new_size expr
687 = do e_code <- getRegister' dflags is32Bit expr
688 return (swizzleRegisterRep e_code new_size)
689
690
691 getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
692 sse2 <- sse2Enabled
693 case mop of
694 MO_F_Eq _ -> condFltReg is32Bit EQQ x y
695 MO_F_Ne _ -> condFltReg is32Bit NE x y
696 MO_F_Gt _ -> condFltReg is32Bit GTT x y
697 MO_F_Ge _ -> condFltReg is32Bit GE x y
698 MO_F_Lt _ -> condFltReg is32Bit LTT x y
699 MO_F_Le _ -> condFltReg is32Bit LE x y
700
701 MO_Eq _ -> condIntReg EQQ x y
702 MO_Ne _ -> condIntReg NE x y
703
704 MO_S_Gt _ -> condIntReg GTT x y
705 MO_S_Ge _ -> condIntReg GE x y
706 MO_S_Lt _ -> condIntReg LTT x y
707 MO_S_Le _ -> condIntReg LE x y
708
709 MO_U_Gt _ -> condIntReg GU x y
710 MO_U_Ge _ -> condIntReg GEU x y
711 MO_U_Lt _ -> condIntReg LU x y
712 MO_U_Le _ -> condIntReg LEU x y
713
714 MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y
715 | otherwise -> trivialFCode_x87 GADD x y
716 MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y
717 | otherwise -> trivialFCode_x87 GSUB x y
718 MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y
719 | otherwise -> trivialFCode_x87 GDIV x y
720 MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y
721 | otherwise -> trivialFCode_x87 GMUL x y
722
723 MO_Add rep -> add_code rep x y
724 MO_Sub rep -> sub_code rep x y
725
726 MO_S_Quot rep -> div_code rep True True x y
727 MO_S_Rem rep -> div_code rep True False x y
728 MO_U_Quot rep -> div_code rep False True x y
729 MO_U_Rem rep -> div_code rep False False x y
730
731 MO_S_MulMayOflo rep -> imulMayOflo rep x y
732
733 MO_Mul rep -> triv_op rep IMUL
734 MO_And rep -> triv_op rep AND
735 MO_Or rep -> triv_op rep OR
736 MO_Xor rep -> triv_op rep XOR
737
738 {- Shift ops on x86s have constraints on their source, it
739 either has to be Imm, CL or 1
740 => trivialCode is not restrictive enough (sigh.)
741 -}
742 MO_Shl rep -> shift_code rep SHL x y {-False-}
743 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
744 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
745
746 MO_V_Insert {} -> needLlvm
747 MO_V_Extract {} -> needLlvm
748 MO_V_Add {} -> needLlvm
749 MO_V_Sub {} -> needLlvm
750 MO_V_Mul {} -> needLlvm
751 MO_VS_Quot {} -> needLlvm
752 MO_VS_Rem {} -> needLlvm
753 MO_VS_Neg {} -> needLlvm
754 MO_VF_Insert {} -> needLlvm
755 MO_VF_Extract {} -> needLlvm
756 MO_VF_Add {} -> needLlvm
757 MO_VF_Sub {} -> needLlvm
758 MO_VF_Mul {} -> needLlvm
759 MO_VF_Quot {} -> needLlvm
760 MO_VF_Neg {} -> needLlvm
761
762 _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
763 where
764 --------------------
765 triv_op width instr = trivialCode width op (Just op) x y
766 where op = instr (intSize width)
767
768 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
769 imulMayOflo rep a b = do
770 (a_reg, a_code) <- getNonClobberedReg a
771 b_code <- getAnyReg b
772 let
773 shift_amt = case rep of
774 W32 -> 31
775 W64 -> 63
776 _ -> panic "shift_amt"
777
778 size = intSize rep
779 code = a_code `appOL` b_code eax `appOL`
780 toOL [
781 IMUL2 size (OpReg a_reg), -- result in %edx:%eax
782 SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
783 -- sign extend lower part
784 SUB size (OpReg edx) (OpReg eax)
785 -- compare against upper
786 -- eax==0 if high part == sign extended low part
787 ]
788 return (Fixed size eax code)
789
790 --------------------
791 shift_code :: Width
792 -> (Size -> Operand -> Operand -> Instr)
793 -> CmmExpr
794 -> CmmExpr
795 -> NatM Register
796
797 {- Case1: shift length as immediate -}
798 shift_code width instr x (CmmLit lit) = do
799 x_code <- getAnyReg x
800 let
801 size = intSize width
802 code dst
803 = x_code dst `snocOL`
804 instr size (OpImm (litToImm lit)) (OpReg dst)
805 return (Any size code)
806
807 {- Case2: shift length is complex (non-immediate)
808 * y must go in %ecx.
809 * we cannot do y first *and* put its result in %ecx, because
810 %ecx might be clobbered by x.
811 * if we do y second, then x cannot be
812 in a clobbered reg. Also, we cannot clobber x's reg
813 with the instruction itself.
814 * so we can either:
815 - do y first, put its result in a fresh tmp, then copy it to %ecx later
816 - do y second and put its result into %ecx. x gets placed in a fresh
817 tmp. This is likely to be better, because the reg alloc can
818 eliminate this reg->reg move here (it won't eliminate the other one,
819 because the move is into the fixed %ecx).
820 -}
821 shift_code width instr x y{-amount-} = do
822 x_code <- getAnyReg x
823 let size = intSize width
824 tmp <- getNewRegNat size
825 y_code <- getAnyReg y
826 let
827 code = x_code tmp `appOL`
828 y_code ecx `snocOL`
829 instr size (OpReg ecx) (OpReg tmp)
830 return (Fixed size tmp code)
831
832 --------------------
833 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
834 add_code rep x (CmmLit (CmmInt y _))
835 | is32BitInteger y = add_int rep x y
836 add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
837 where size = intSize rep
838 -- TODO: There are other interesting patterns we want to replace
839 -- with a LEA, e.g. `(x + offset) + (y << shift)`.
840
841 --------------------
842 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
843 sub_code rep x (CmmLit (CmmInt y _))
844 | is32BitInteger (-y) = add_int rep x (-y)
845 sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
846
847 -- our three-operand add instruction:
848 add_int width x y = do
849 (x_reg, x_code) <- getSomeReg x
850 let
851 size = intSize width
852 imm = ImmInt (fromInteger y)
853 code dst
854 = x_code `snocOL`
855 LEA size
856 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
857 (OpReg dst)
858 --
859 return (Any size code)
860
861 ----------------------
862 div_code width signed quotient x y = do
863 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
864 x_code <- getAnyReg x
865 let
866 size = intSize width
867 widen | signed = CLTD size
868 | otherwise = XOR size (OpReg edx) (OpReg edx)
869
870 instr | signed = IDIV
871 | otherwise = DIV
872
873 code = y_code `appOL`
874 x_code eax `appOL`
875 toOL [widen, instr size y_op]
876
877 result | quotient = eax
878 | otherwise = edx
879
880 return (Fixed size result code)
881
882
883 getRegister' _ _ (CmmLoad mem pk)
884 | isFloatType pk
885 = do
886 Amode addr mem_code <- getAmode mem
887 use_sse2 <- sse2Enabled
888 loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
889
890 getRegister' _ is32Bit (CmmLoad mem pk)
891 | is32Bit && not (isWord64 pk)
892 = do
893 code <- intLoadCode instr mem
894 return (Any size code)
895 where
896 width = typeWidth pk
897 size = intSize width
898 instr = case width of
899 W8 -> MOVZxL II8
900 _other -> MOV size
901 -- We always zero-extend 8-bit loads, if we
902 -- can't think of anything better. This is because
903 -- we can't guarantee access to an 8-bit variant of every register
904 -- (esi and edi don't have 8-bit variants), so to make things
905 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
906
907 -- Simpler memory load code on x86_64
908 getRegister' _ is32Bit (CmmLoad mem pk)
909 | not is32Bit
910 = do
911 code <- intLoadCode (MOV size) mem
912 return (Any size code)
913 where size = intSize $ typeWidth pk
914
915 getRegister' _ is32Bit (CmmLit (CmmInt 0 width))
916 = let
917 size = intSize width
918
919 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
920 size1 = if is32Bit then size
921 else case size of
922 II64 -> II32
923 _ -> size
924 code dst
925 = unitOL (XOR size1 (OpReg dst) (OpReg dst))
926 in
927 return (Any size code)
928
929 -- optimisation for loading small literals on x86_64: take advantage
930 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
931 -- instruction forms are shorter.
932 getRegister' dflags is32Bit (CmmLit lit)
933 | not is32Bit, isWord64 (cmmLitType dflags lit), not (isBigLit lit)
934 = let
935 imm = litToImm lit
936 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
937 in
938 return (Any II64 code)
939 where
940 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
941 isBigLit _ = False
942 -- note1: not the same as (not.is32BitLit), because that checks for
943 -- signed literals that fit in 32 bits, but we want unsigned
944 -- literals here.
945 -- note2: all labels are small, because we're assuming the
946 -- small memory model (see gcc docs, -mcmodel=small).
947
948 getRegister' dflags _ (CmmLit lit)
949 = do let size = cmmTypeSize (cmmLitType dflags lit)
950 imm = litToImm lit
951 code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
952 return (Any size code)
953
954 getRegister' _ _ other
955 | isVecExpr other = needLlvm
956 | otherwise = pprPanic "getRegister(x86)" (ppr other)
957
958
959 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
960 -> NatM (Reg -> InstrBlock)
961 intLoadCode instr mem = do
962 Amode src mem_code <- getAmode mem
963 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
964
965 -- Compute an expression into *any* register, adding the appropriate
966 -- move instruction if necessary.
967 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
968 getAnyReg expr = do
969 r <- getRegister expr
970 anyReg r
971
972 anyReg :: Register -> NatM (Reg -> InstrBlock)
973 anyReg (Any _ code) = return code
974 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
975
976 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
977 -- Fixed registers might not be byte-addressable, so we make sure we've
978 -- got a temporary, inserting an extra reg copy if necessary.
979 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
980 getByteReg expr = do
981 is32Bit <- is32BitPlatform
982 if is32Bit
983 then do r <- getRegister expr
984 case r of
985 Any rep code -> do
986 tmp <- getNewRegNat rep
987 return (tmp, code tmp)
988 Fixed rep reg code
989 | isVirtualReg reg -> return (reg,code)
990 | otherwise -> do
991 tmp <- getNewRegNat rep
992 return (tmp, code `snocOL` reg2reg rep reg tmp)
993 -- ToDo: could optimise slightly by checking for
994 -- byte-addressable real registers, but that will
995 -- happen very rarely if at all.
996 else getSomeReg expr -- all regs are byte-addressable on x86_64
997
998 -- Another variant: this time we want the result in a register that cannot
999 -- be modified by code to evaluate an arbitrary expression.
1000 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1001 getNonClobberedReg expr = do
1002 dflags <- getDynFlags
1003 r <- getRegister expr
1004 case r of
1005 Any rep code -> do
1006 tmp <- getNewRegNat rep
1007 return (tmp, code tmp)
1008 Fixed rep reg code
1009 -- only certain regs can be clobbered
1010 | reg `elem` instrClobberedRegs (targetPlatform dflags)
1011 -> do
1012 tmp <- getNewRegNat rep
1013 return (tmp, code `snocOL` reg2reg rep reg tmp)
1014 | otherwise ->
1015 return (reg, code)
1016
1017 reg2reg :: Size -> Reg -> Reg -> Instr
1018 reg2reg size src dst
1019 | size == FF80 = GMOV src dst
1020 | otherwise = MOV size (OpReg src) (OpReg dst)
1021
1022
1023 --------------------------------------------------------------------------------
1024 getAmode :: CmmExpr -> NatM Amode
1025 getAmode e = do is32Bit <- is32BitPlatform
1026 getAmode' is32Bit e
1027
1028 getAmode' :: Bool -> CmmExpr -> NatM Amode
1029 getAmode' _ (CmmRegOff r n) = do dflags <- getDynFlags
1030 getAmode $ mangleIndexTree dflags r n
1031
1032 getAmode' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1033 CmmLit displacement])
1034 | not is32Bit
1035 = return $ Amode (ripRel (litToImm displacement)) nilOL
1036
1037
1038 -- This is all just ridiculous, since it carefully undoes
1039 -- what mangleIndexTree has just done.
1040 getAmode' is32Bit (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)])
1041 | is32BitLit is32Bit lit
1042 -- ASSERT(rep == II32)???
1043 = do (x_reg, x_code) <- getSomeReg x
1044 let off = ImmInt (-(fromInteger i))
1045 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1046
1047 getAmode' is32Bit (CmmMachOp (MO_Add _rep) [x, CmmLit lit])
1048 | is32BitLit is32Bit lit
1049 -- ASSERT(rep == II32)???
1050 = do (x_reg, x_code) <- getSomeReg x
1051 let off = litToImm lit
1052 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1053
1054 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1055 -- recognised by the next rule.
1056 getAmode' is32Bit (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1057 b@(CmmLit _)])
1058 = getAmode' is32Bit (CmmMachOp (MO_Add rep) [b,a])
1059
1060 -- Matches: (x + offset) + (y << shift)
1061 getAmode' _ (CmmMachOp (MO_Add _) [CmmRegOff x offset,
1062 CmmMachOp (MO_Shl _)
1063 [y, CmmLit (CmmInt shift _)]])
1064 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1065 = x86_complex_amode (CmmReg x) y shift (fromIntegral offset)
1066
1067 getAmode' _ (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _)
1068 [y, CmmLit (CmmInt shift _)]])
1069 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1070 = x86_complex_amode x y shift 0
1071
1072 getAmode' _ (CmmMachOp (MO_Add _)
1073 [x, CmmMachOp (MO_Add _)
1074 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1075 CmmLit (CmmInt offset _)]])
1076 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1077 && is32BitInteger offset
1078 = x86_complex_amode x y shift offset
1079
1080 getAmode' _ (CmmMachOp (MO_Add _) [x,y])
1081 = x86_complex_amode x y 0 0
1082
1083 getAmode' is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1084 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1085
1086 getAmode' _ expr = do
1087 (reg,code) <- getSomeReg expr
1088 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1089
1090 -- | Like 'getAmode', but on 32-bit use simple register addressing
1091 -- (i.e. no index register). This stops us from running out of
1092 -- registers on x86 when using instructions such as cmpxchg, which can
1093 -- use up to three virtual registers and one fixed register.
1094 getSimpleAmode :: DynFlags -> Bool -> CmmExpr -> NatM Amode
1095 getSimpleAmode dflags is32Bit addr
1096 | is32Bit = do
1097 addr_code <- getAnyReg addr
1098 addr_r <- getNewRegNat (intSize (wordWidth dflags))
1099 let amode = AddrBaseIndex (EABaseReg addr_r) EAIndexNone (ImmInt 0)
1100 return $! Amode amode (addr_code addr_r)
1101 | otherwise = getAmode addr
1102
1103 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1104 x86_complex_amode base index shift offset
1105 = do (x_reg, x_code) <- getNonClobberedReg base
1106 -- x must be in a temp, because it has to stay live over y_code
1107 -- we could compre x_reg and y_reg and do something better here...
1108 (y_reg, y_code) <- getSomeReg index
1109 let
1110 code = x_code `appOL` y_code
1111 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8;
1112 n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")"
1113 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1114 code)
1115
1116
1117
1118
1119 -- -----------------------------------------------------------------------------
1120 -- getOperand: sometimes any operand will do.
1121
1122 -- getNonClobberedOperand: the value of the operand will remain valid across
1123 -- the computation of an arbitrary expression, unless the expression
1124 -- is computed directly into a register which the operand refers to
1125 -- (see trivialCode where this function is used for an example).
1126
1127 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1128 getNonClobberedOperand (CmmLit lit) = do
1129 use_sse2 <- sse2Enabled
1130 if use_sse2 && isSuitableFloatingPointLit lit
1131 then do
1132 let CmmFloat _ w = lit
1133 Amode addr code <- memConstant (widthInBytes w) lit
1134 return (OpAddr addr, code)
1135 else do
1136
1137 is32Bit <- is32BitPlatform
1138 dflags <- getDynFlags
1139 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1140 then return (OpImm (litToImm lit), nilOL)
1141 else getNonClobberedOperand_generic (CmmLit lit)
1142
1143 getNonClobberedOperand (CmmLoad mem pk) = do
1144 is32Bit <- is32BitPlatform
1145 use_sse2 <- sse2Enabled
1146 if (not (isFloatType pk) || use_sse2)
1147 && (if is32Bit then not (isWord64 pk) else True)
1148 then do
1149 dflags <- getDynFlags
1150 let platform = targetPlatform dflags
1151 Amode src mem_code <- getAmode mem
1152 (src',save_code) <-
1153 if (amodeCouldBeClobbered platform src)
1154 then do
1155 tmp <- getNewRegNat (archWordSize is32Bit)
1156 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1157 unitOL (LEA (archWordSize is32Bit) (OpAddr src) (OpReg tmp)))
1158 else
1159 return (src, nilOL)
1160 return (OpAddr src', mem_code `appOL` save_code)
1161 else do
1162 getNonClobberedOperand_generic (CmmLoad mem pk)
1163
1164 getNonClobberedOperand e = getNonClobberedOperand_generic e
1165
1166 getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1167 getNonClobberedOperand_generic e = do
1168 (reg, code) <- getNonClobberedReg e
1169 return (OpReg reg, code)
1170
1171 amodeCouldBeClobbered :: Platform -> AddrMode -> Bool
1172 amodeCouldBeClobbered platform amode = any (regClobbered platform) (addrModeRegs amode)
1173
1174 regClobbered :: Platform -> Reg -> Bool
1175 regClobbered platform (RegReal (RealRegSingle rr)) = isFastTrue (freeReg platform rr)
1176 regClobbered _ _ = False
1177
1178 -- getOperand: the operand is not required to remain valid across the
1179 -- computation of an arbitrary expression.
1180 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1181
1182 getOperand (CmmLit lit) = do
1183 use_sse2 <- sse2Enabled
1184 if (use_sse2 && isSuitableFloatingPointLit lit)
1185 then do
1186 let CmmFloat _ w = lit
1187 Amode addr code <- memConstant (widthInBytes w) lit
1188 return (OpAddr addr, code)
1189 else do
1190
1191 is32Bit <- is32BitPlatform
1192 dflags <- getDynFlags
1193 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1194 then return (OpImm (litToImm lit), nilOL)
1195 else getOperand_generic (CmmLit lit)
1196
1197 getOperand (CmmLoad mem pk) = do
1198 is32Bit <- is32BitPlatform
1199 use_sse2 <- sse2Enabled
1200 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1201 then do
1202 Amode src mem_code <- getAmode mem
1203 return (OpAddr src, mem_code)
1204 else
1205 getOperand_generic (CmmLoad mem pk)
1206
1207 getOperand e = getOperand_generic e
1208
1209 getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1210 getOperand_generic e = do
1211 (reg, code) <- getSomeReg e
1212 return (OpReg reg, code)
1213
1214 isOperand :: Bool -> CmmExpr -> Bool
1215 isOperand _ (CmmLoad _ _) = True
1216 isOperand is32Bit (CmmLit lit) = is32BitLit is32Bit lit
1217 || isSuitableFloatingPointLit lit
1218 isOperand _ _ = False
1219
1220 memConstant :: Int -> CmmLit -> NatM Amode
1221 memConstant align lit = do
1222 lbl <- getNewLabelNat
1223 dflags <- getDynFlags
1224 (addr, addr_code) <- if target32Bit (targetPlatform dflags)
1225 then do dynRef <- cmmMakeDynamicReference
1226 dflags
1227 DataReference
1228 lbl
1229 Amode addr addr_code <- getAmode dynRef
1230 return (addr, addr_code)
1231 else return (ripRel (ImmCLbl lbl), nilOL)
1232 let code =
1233 LDATA ReadOnlyData (align, Statics lbl [CmmStaticLit lit])
1234 `consOL` addr_code
1235 return (Amode addr code)
1236
1237
1238 loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
1239 loadFloatAmode use_sse2 w addr addr_code = do
1240 let size = floatSize w
1241 code dst = addr_code `snocOL`
1242 if use_sse2
1243 then MOV size (OpAddr addr) (OpReg dst)
1244 else GLD size addr dst
1245 return (Any (if use_sse2 then size else FF80) code)
1246
1247
1248 -- if we want a floating-point literal as an operand, we can
1249 -- use it directly from memory. However, if the literal is
1250 -- zero, we're better off generating it into a register using
1251 -- xor.
1252 isSuitableFloatingPointLit :: CmmLit -> Bool
1253 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1254 isSuitableFloatingPointLit _ = False
1255
1256 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1257 getRegOrMem e@(CmmLoad mem pk) = do
1258 is32Bit <- is32BitPlatform
1259 use_sse2 <- sse2Enabled
1260 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1261 then do
1262 Amode src mem_code <- getAmode mem
1263 return (OpAddr src, mem_code)
1264 else do
1265 (reg, code) <- getNonClobberedReg e
1266 return (OpReg reg, code)
1267 getRegOrMem e = do
1268 (reg, code) <- getNonClobberedReg e
1269 return (OpReg reg, code)
1270
1271 is32BitLit :: Bool -> CmmLit -> Bool
1272 is32BitLit is32Bit (CmmInt i W64)
1273 | not is32Bit
1274 = -- assume that labels are in the range 0-2^31-1: this assumes the
1275 -- small memory model (see gcc docs, -mcmodel=small).
1276 is32BitInteger i
1277 is32BitLit _ _ = True
1278
1279
1280
1281
1282 -- Set up a condition code for a conditional branch.
1283
1284 getCondCode :: CmmExpr -> NatM CondCode
1285
1286 -- yes, they really do seem to want exactly the same!
1287
1288 getCondCode (CmmMachOp mop [x, y])
1289 =
1290 case mop of
1291 MO_F_Eq W32 -> condFltCode EQQ x y
1292 MO_F_Ne W32 -> condFltCode NE x y
1293 MO_F_Gt W32 -> condFltCode GTT x y
1294 MO_F_Ge W32 -> condFltCode GE x y
1295 MO_F_Lt W32 -> condFltCode LTT x y
1296 MO_F_Le W32 -> condFltCode LE x y
1297
1298 MO_F_Eq W64 -> condFltCode EQQ x y
1299 MO_F_Ne W64 -> condFltCode NE x y
1300 MO_F_Gt W64 -> condFltCode GTT x y
1301 MO_F_Ge W64 -> condFltCode GE x y
1302 MO_F_Lt W64 -> condFltCode LTT x y
1303 MO_F_Le W64 -> condFltCode LE x y
1304
1305 _ -> condIntCode (machOpToCond mop) x y
1306
1307 getCondCode other = pprPanic "getCondCode(2)(x86,x86_64)" (ppr other)
1308
1309 machOpToCond :: MachOp -> Cond
1310 machOpToCond mo = case mo of
1311 MO_Eq _ -> EQQ
1312 MO_Ne _ -> NE
1313 MO_S_Gt _ -> GTT
1314 MO_S_Ge _ -> GE
1315 MO_S_Lt _ -> LTT
1316 MO_S_Le _ -> LE
1317 MO_U_Gt _ -> GU
1318 MO_U_Ge _ -> GEU
1319 MO_U_Lt _ -> LU
1320 MO_U_Le _ -> LEU
1321 _other -> pprPanic "machOpToCond" (pprMachOp mo)
1322
1323
1324 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1325 -- passed back up the tree.
1326
1327 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1328 condIntCode cond x y = do is32Bit <- is32BitPlatform
1329 condIntCode' is32Bit cond x y
1330
1331 condIntCode' :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1332
1333 -- memory vs immediate
1334 condIntCode' is32Bit cond (CmmLoad x pk) (CmmLit lit)
1335 | is32BitLit is32Bit lit = do
1336 Amode x_addr x_code <- getAmode x
1337 let
1338 imm = litToImm lit
1339 code = x_code `snocOL`
1340 CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr)
1341 --
1342 return (CondCode False cond code)
1343
1344 -- anything vs zero, using a mask
1345 -- TODO: Add some sanity checking!!!!
1346 condIntCode' is32Bit cond (CmmMachOp (MO_And _) [x,o2]) (CmmLit (CmmInt 0 pk))
1347 | (CmmLit lit@(CmmInt mask _)) <- o2, is32BitLit is32Bit lit
1348 = do
1349 (x_reg, x_code) <- getSomeReg x
1350 let
1351 code = x_code `snocOL`
1352 TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1353 --
1354 return (CondCode False cond code)
1355
1356 -- anything vs zero
1357 condIntCode' _ cond x (CmmLit (CmmInt 0 pk)) = do
1358 (x_reg, x_code) <- getSomeReg x
1359 let
1360 code = x_code `snocOL`
1361 TEST (intSize pk) (OpReg x_reg) (OpReg x_reg)
1362 --
1363 return (CondCode False cond code)
1364
1365 -- anything vs operand
1366 condIntCode' is32Bit cond x y
1367 | isOperand is32Bit y = do
1368 dflags <- getDynFlags
1369 (x_reg, x_code) <- getNonClobberedReg x
1370 (y_op, y_code) <- getOperand y
1371 let
1372 code = x_code `appOL` y_code `snocOL`
1373 CMP (cmmTypeSize (cmmExprType dflags x)) y_op (OpReg x_reg)
1374 return (CondCode False cond code)
1375 -- operand vs. anything: invert the comparison so that we can use a
1376 -- single comparison instruction.
1377 | isOperand is32Bit x
1378 , Just revcond <- maybeFlipCond cond = do
1379 dflags <- getDynFlags
1380 (y_reg, y_code) <- getNonClobberedReg y
1381 (x_op, x_code) <- getOperand x
1382 let
1383 code = y_code `appOL` x_code `snocOL`
1384 CMP (cmmTypeSize (cmmExprType dflags x)) x_op (OpReg y_reg)
1385 return (CondCode False revcond code)
1386
1387 -- anything vs anything
1388 condIntCode' _ cond x y = do
1389 dflags <- getDynFlags
1390 (y_reg, y_code) <- getNonClobberedReg y
1391 (x_op, x_code) <- getRegOrMem x
1392 let
1393 code = y_code `appOL`
1394 x_code `snocOL`
1395 CMP (cmmTypeSize (cmmExprType dflags x)) (OpReg y_reg) x_op
1396 return (CondCode False cond code)
1397
1398
1399
1400 --------------------------------------------------------------------------------
1401 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1402
1403 condFltCode cond x y
1404 = if_sse2 condFltCode_sse2 condFltCode_x87
1405 where
1406
1407 condFltCode_x87
1408 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1409 (x_reg, x_code) <- getNonClobberedReg x
1410 (y_reg, y_code) <- getSomeReg y
1411 let
1412 code = x_code `appOL` y_code `snocOL`
1413 GCMP cond x_reg y_reg
1414 -- The GCMP insn does the test and sets the zero flag if comparable
1415 -- and true. Hence we always supply EQQ as the condition to test.
1416 return (CondCode True EQQ code)
1417
1418 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1419 -- an operand, but the right must be a reg. We can probably do better
1420 -- than this general case...
1421 condFltCode_sse2 = do
1422 dflags <- getDynFlags
1423 (x_reg, x_code) <- getNonClobberedReg x
1424 (y_op, y_code) <- getOperand y
1425 let
1426 code = x_code `appOL`
1427 y_code `snocOL`
1428 CMP (floatSize $ cmmExprWidth dflags x) y_op (OpReg x_reg)
1429 -- NB(1): we need to use the unsigned comparison operators on the
1430 -- result of this comparison.
1431 return (CondCode True (condToUnsigned cond) code)
1432
1433 -- -----------------------------------------------------------------------------
1434 -- Generating assignments
1435
1436 -- Assignments are really at the heart of the whole code generation
1437 -- business. Almost all top-level nodes of any real importance are
1438 -- assignments, which correspond to loads, stores, or register
1439 -- transfers. If we're really lucky, some of the register transfers
1440 -- will go away, because we can use the destination register to
1441 -- complete the code generation for the right hand side. This only
1442 -- fails when the right hand side is forced into a fixed register
1443 -- (e.g. the result of a call).
1444
1445 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1446 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1447
1448 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
1449 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
1450
1451
1452 -- integer assignment to memory
1453
1454 -- specific case of adding/subtracting an integer to a particular address.
1455 -- ToDo: catch other cases where we can use an operation directly on a memory
1456 -- address.
1457 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1458 CmmLit (CmmInt i _)])
1459 | addr == addr2, pk /= II64 || is32BitInteger i,
1460 Just instr <- check op
1461 = do Amode amode code_addr <- getAmode addr
1462 let code = code_addr `snocOL`
1463 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1464 return code
1465 where
1466 check (MO_Add _) = Just ADD
1467 check (MO_Sub _) = Just SUB
1468 check _ = Nothing
1469 -- ToDo: more?
1470
1471 -- general case
1472 assignMem_IntCode pk addr src = do
1473 is32Bit <- is32BitPlatform
1474 Amode addr code_addr <- getAmode addr
1475 (code_src, op_src) <- get_op_RI is32Bit src
1476 let
1477 code = code_src `appOL`
1478 code_addr `snocOL`
1479 MOV pk op_src (OpAddr addr)
1480 -- NOTE: op_src is stable, so it will still be valid
1481 -- after code_addr. This may involve the introduction
1482 -- of an extra MOV to a temporary register, but we hope
1483 -- the register allocator will get rid of it.
1484 --
1485 return code
1486 where
1487 get_op_RI :: Bool -> CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
1488 get_op_RI is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1489 = return (nilOL, OpImm (litToImm lit))
1490 get_op_RI _ op
1491 = do (reg,code) <- getNonClobberedReg op
1492 return (code, OpReg reg)
1493
1494
1495 -- Assign; dst is a reg, rhs is mem
1496 assignReg_IntCode pk reg (CmmLoad src _) = do
1497 load_code <- intLoadCode (MOV pk) src
1498 dflags <- getDynFlags
1499 let platform = targetPlatform dflags
1500 return (load_code (getRegisterReg platform False{-no sse2-} reg))
1501
1502 -- dst is a reg, but src could be anything
1503 assignReg_IntCode _ reg src = do
1504 dflags <- getDynFlags
1505 let platform = targetPlatform dflags
1506 code <- getAnyReg src
1507 return (code (getRegisterReg platform False{-no sse2-} reg))
1508
1509
1510 -- Floating point assignment to memory
1511 assignMem_FltCode pk addr src = do
1512 (src_reg, src_code) <- getNonClobberedReg src
1513 Amode addr addr_code <- getAmode addr
1514 use_sse2 <- sse2Enabled
1515 let
1516 code = src_code `appOL`
1517 addr_code `snocOL`
1518 if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr)
1519 else GST pk src_reg addr
1520 return code
1521
1522 -- Floating point assignment to a register/temporary
1523 assignReg_FltCode _ reg src = do
1524 use_sse2 <- sse2Enabled
1525 src_code <- getAnyReg src
1526 dflags <- getDynFlags
1527 let platform = targetPlatform dflags
1528 return (src_code (getRegisterReg platform use_sse2 reg))
1529
1530
1531 genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
1532
1533 genJump (CmmLoad mem _) regs = do
1534 Amode target code <- getAmode mem
1535 return (code `snocOL` JMP (OpAddr target) regs)
1536
1537 genJump (CmmLit lit) regs = do
1538 return (unitOL (JMP (OpImm (litToImm lit)) regs))
1539
1540 genJump expr regs = do
1541 (reg,code) <- getSomeReg expr
1542 return (code `snocOL` JMP (OpReg reg) regs)
1543
1544
1545 -- -----------------------------------------------------------------------------
1546 -- Unconditional branches
1547
1548 genBranch :: BlockId -> NatM InstrBlock
1549 genBranch = return . toOL . mkJumpInstr
1550
1551
1552
1553 -- -----------------------------------------------------------------------------
1554 -- Conditional jumps
1555
1556 {-
1557 Conditional jumps are always to local labels, so we can use branch
1558 instructions. We peek at the arguments to decide what kind of
1559 comparison to do.
1560
1561 I386: First, we have to ensure that the condition
1562 codes are set according to the supplied comparison operation.
1563 -}
1564
1565 genCondJump
1566 :: BlockId -- the branch target
1567 -> CmmExpr -- the condition on which to branch
1568 -> NatM InstrBlock
1569
1570 genCondJump id expr = do
1571 is32Bit <- is32BitPlatform
1572 genCondJump' is32Bit id expr
1573
1574 genCondJump' :: Bool -> BlockId -> CmmExpr -> NatM InstrBlock
1575
1576 -- 64-bit integer comparisons on 32-bit
1577 genCondJump' is32Bit true (CmmMachOp mop [e1,e2])
1578 | is32Bit, Just W64 <- maybeIntComparison mop = do
1579 ChildCode64 code1 r1_lo <- iselExpr64 e1
1580 ChildCode64 code2 r2_lo <- iselExpr64 e2
1581 let r1_hi = getHiVRegFromLo r1_lo
1582 r2_hi = getHiVRegFromLo r2_lo
1583 cond = machOpToCond mop
1584 Just cond' = maybeFlipCond cond
1585 false <- getBlockIdNat
1586 return $ code1 `appOL` code2 `appOL` toOL [
1587 CMP II32 (OpReg r2_hi) (OpReg r1_hi),
1588 JXX cond true,
1589 JXX cond' false,
1590 CMP II32 (OpReg r2_lo) (OpReg r1_lo),
1591 JXX cond true,
1592 NEWBLOCK false ]
1593
1594 genCondJump' _ id bool = do
1595 CondCode is_float cond cond_code <- getCondCode bool
1596 use_sse2 <- sse2Enabled
1597 if not is_float || not use_sse2
1598 then
1599 return (cond_code `snocOL` JXX cond id)
1600 else do
1601 lbl <- getBlockIdNat
1602
1603 -- see comment with condFltReg
1604 let code = case cond of
1605 NE -> or_unordered
1606 GU -> plain_test
1607 GEU -> plain_test
1608 _ -> and_ordered
1609
1610 plain_test = unitOL (
1611 JXX cond id
1612 )
1613 or_unordered = toOL [
1614 JXX cond id,
1615 JXX PARITY id
1616 ]
1617 and_ordered = toOL [
1618 JXX PARITY lbl,
1619 JXX cond id,
1620 JXX ALWAYS lbl,
1621 NEWBLOCK lbl
1622 ]
1623 return (cond_code `appOL` code)
1624
1625 -- -----------------------------------------------------------------------------
1626 -- Generating C calls
1627
1628 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1629 -- @get_arg@, which moves the arguments to the correct registers/stack
1630 -- locations. Apart from that, the code is easy.
1631 --
1632 -- (If applicable) Do not fill the delay slots here; you will confuse the
1633 -- register allocator.
1634
1635 genCCall
1636 :: DynFlags
1637 -> Bool -- 32 bit platform?
1638 -> ForeignTarget -- function to call
1639 -> [CmmFormal] -- where to put the result
1640 -> [CmmActual] -- arguments (of mixed type)
1641 -> NatM InstrBlock
1642
1643 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1644
1645 -- Unroll memcpy calls if the source and destination pointers are at
1646 -- least DWORD aligned and the number of bytes to copy isn't too
1647 -- large. Otherwise, call C's memcpy.
1648 genCCall dflags is32Bit (PrimTarget (MO_Memcpy align)) _
1649 [dst, src, CmmLit (CmmInt n _)]
1650 | fromInteger insns <= maxInlineMemcpyInsns dflags && align .&. 3 == 0 = do
1651 code_dst <- getAnyReg dst
1652 dst_r <- getNewRegNat size
1653 code_src <- getAnyReg src
1654 src_r <- getNewRegNat size
1655 tmp_r <- getNewRegNat size
1656 return $ code_dst dst_r `appOL` code_src src_r `appOL`
1657 go dst_r src_r tmp_r (fromInteger n)
1658 where
1659 -- The number of instructions we will generate (approx). We need 2
1660 -- instructions per move.
1661 insns = 2 * ((n + sizeBytes - 1) `div` sizeBytes)
1662
1663 size = if align .&. 4 /= 0 then II32 else (archWordSize is32Bit)
1664
1665 -- The size of each move, in bytes.
1666 sizeBytes :: Integer
1667 sizeBytes = fromIntegral (sizeInBytes size)
1668
1669 go :: Reg -> Reg -> Reg -> Integer -> OrdList Instr
1670 go dst src tmp i
1671 | i >= sizeBytes =
1672 unitOL (MOV size (OpAddr src_addr) (OpReg tmp)) `appOL`
1673 unitOL (MOV size (OpReg tmp) (OpAddr dst_addr)) `appOL`
1674 go dst src tmp (i - sizeBytes)
1675 -- Deal with remaining bytes.
1676 | i >= 4 = -- Will never happen on 32-bit
1677 unitOL (MOV II32 (OpAddr src_addr) (OpReg tmp)) `appOL`
1678 unitOL (MOV II32 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1679 go dst src tmp (i - 4)
1680 | i >= 2 =
1681 unitOL (MOVZxL II16 (OpAddr src_addr) (OpReg tmp)) `appOL`
1682 unitOL (MOV II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1683 go dst src tmp (i - 2)
1684 | i >= 1 =
1685 unitOL (MOVZxL II8 (OpAddr src_addr) (OpReg tmp)) `appOL`
1686 unitOL (MOV II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1687 go dst src tmp (i - 1)
1688 | otherwise = nilOL
1689 where
1690 src_addr = AddrBaseIndex (EABaseReg src) EAIndexNone
1691 (ImmInteger (n - i))
1692 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1693 (ImmInteger (n - i))
1694
1695 genCCall dflags _ (PrimTarget (MO_Memset align)) _
1696 [dst,
1697 CmmLit (CmmInt c _),
1698 CmmLit (CmmInt n _)]
1699 | fromInteger insns <= maxInlineMemsetInsns dflags && align .&. 3 == 0 = do
1700 code_dst <- getAnyReg dst
1701 dst_r <- getNewRegNat size
1702 return $ code_dst dst_r `appOL` go dst_r (fromInteger n)
1703 where
1704 (size, val) = case align .&. 3 of
1705 2 -> (II16, c2)
1706 0 -> (II32, c4)
1707 _ -> (II8, c)
1708 c2 = c `shiftL` 8 .|. c
1709 c4 = c2 `shiftL` 16 .|. c2
1710
1711 -- The number of instructions we will generate (approx). We need 1
1712 -- instructions per move.
1713 insns = (n + sizeBytes - 1) `div` sizeBytes
1714
1715 -- The size of each move, in bytes.
1716 sizeBytes :: Integer
1717 sizeBytes = fromIntegral (sizeInBytes size)
1718
1719 go :: Reg -> Integer -> OrdList Instr
1720 go dst i
1721 -- TODO: Add movabs instruction and support 64-bit sets.
1722 | i >= sizeBytes = -- This might be smaller than the below sizes
1723 unitOL (MOV size (OpImm (ImmInteger val)) (OpAddr dst_addr)) `appOL`
1724 go dst (i - sizeBytes)
1725 | i >= 4 = -- Will never happen on 32-bit
1726 unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr dst_addr)) `appOL`
1727 go dst (i - 4)
1728 | i >= 2 =
1729 unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr dst_addr)) `appOL`
1730 go dst (i - 2)
1731 | i >= 1 =
1732 unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr dst_addr)) `appOL`
1733 go dst (i - 1)
1734 | otherwise = nilOL
1735 where
1736 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1737 (ImmInteger (n - i))
1738
1739 genCCall _ _ (PrimTarget MO_WriteBarrier) _ _ = return nilOL
1740 -- write barrier compiles to no code on x86/x86-64;
1741 -- we keep it this long in order to prevent earlier optimisations.
1742
1743 genCCall _ _ (PrimTarget MO_Touch) _ _ = return nilOL
1744
1745 genCCall _ is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] =
1746 case n of
1747 0 -> genPrefetch src $ PREFETCH NTA size
1748 1 -> genPrefetch src $ PREFETCH Lvl2 size
1749 2 -> genPrefetch src $ PREFETCH Lvl1 size
1750 3 -> genPrefetch src $ PREFETCH Lvl0 size
1751 l -> panic $ "unexpected prefetch level in genCCall MO_Prefetch_Data: " ++ (show l)
1752 -- the c / llvm prefetch convention is 0, 1, 2, and 3
1753 -- the x86 corresponding names are : NTA, 2 , 1, and 0
1754 where
1755 size = archWordSize is32bit
1756 -- need to know what register width for pointers!
1757 genPrefetch inRegSrc prefetchCTor =
1758 do
1759 code_src <- getAnyReg inRegSrc
1760 src_r <- getNewRegNat size
1761 return $ code_src src_r `appOL`
1762 (unitOL (prefetchCTor (OpAddr
1763 ((AddrBaseIndex (EABaseReg src_r ) EAIndexNone (ImmInt 0)))) ))
1764 -- prefetch always takes an address
1765
1766 genCCall dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] = do
1767 let platform = targetPlatform dflags
1768 let dst_r = getRegisterReg platform False (CmmLocal dst)
1769 case width of
1770 W64 | is32Bit -> do
1771 ChildCode64 vcode rlo <- iselExpr64 src
1772 let dst_rhi = getHiVRegFromLo dst_r
1773 rhi = getHiVRegFromLo rlo
1774 return $ vcode `appOL`
1775 toOL [ MOV II32 (OpReg rlo) (OpReg dst_rhi),
1776 MOV II32 (OpReg rhi) (OpReg dst_r),
1777 BSWAP II32 dst_rhi,
1778 BSWAP II32 dst_r ]
1779 W16 -> do code_src <- getAnyReg src
1780 return $ code_src dst_r `appOL`
1781 unitOL (BSWAP II32 dst_r) `appOL`
1782 unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r))
1783 _ -> do code_src <- getAnyReg src
1784 return $ code_src dst_r `appOL` unitOL (BSWAP size dst_r)
1785 where
1786 size = intSize width
1787
1788 genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
1789 args@[src] = do
1790 sse4_2 <- sse4_2Enabled
1791 let platform = targetPlatform dflags
1792 if sse4_2
1793 then do code_src <- getAnyReg src
1794 src_r <- getNewRegNat size
1795 let dst_r = getRegisterReg platform False (CmmLocal dst)
1796 return $ code_src src_r `appOL`
1797 (if width == W8 then
1798 -- The POPCNT instruction doesn't take a r/m8
1799 unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL`
1800 unitOL (POPCNT II16 (OpReg src_r) dst_r)
1801 else
1802 unitOL (POPCNT size (OpReg src_r) dst_r)) `appOL`
1803 (if width == W8 || width == W16 then
1804 -- We used a 16-bit destination register above,
1805 -- so zero-extend
1806 unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
1807 else nilOL)
1808 else do
1809 targetExpr <- cmmMakeDynamicReference dflags
1810 CallReference lbl
1811 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1812 [NoHint] [NoHint]
1813 CmmMayReturn)
1814 genCCall dflags is32Bit target dest_regs args
1815 where
1816 size = intSize width
1817 lbl = mkCmmCodeLabel primPackageKey (fsLit (popCntLabel width))
1818
1819 genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src]
1820 | is32Bit && width == W64 = do
1821 -- Fallback to `hs_clz64` on i386
1822 targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
1823 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1824 [NoHint] [NoHint]
1825 CmmMayReturn)
1826 genCCall dflags is32Bit target dest_regs args
1827
1828 | otherwise = do
1829 code_src <- getAnyReg src
1830 src_r <- getNewRegNat size
1831 tmp_r <- getNewRegNat size
1832 let dst_r = getRegisterReg platform False (CmmLocal dst)
1833
1834 -- The following insn sequence makes sure 'clz 0' has a defined value.
1835 -- starting with Haswell, one could use the LZCNT insn instead.
1836 return $ code_src src_r `appOL` toOL
1837 ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
1838 [ BSR size (OpReg src_r) tmp_r
1839 , MOV II32 (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
1840 , CMOV NE size (OpReg tmp_r) dst_r
1841 , XOR size (OpImm (ImmInt (bw-1))) (OpReg dst_r)
1842 ]) -- NB: We don't need to zero-extend the result for the
1843 -- W8/W16 cases because the 'MOV' insn already
1844 -- took care of implicitly clearing the upper bits
1845 where
1846 bw = widthInBits width
1847 platform = targetPlatform dflags
1848 size = if width == W8 then II16 else intSize width
1849 lbl = mkCmmCodeLabel primPackageKey (fsLit (clzLabel width))
1850
1851 genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src]
1852 | is32Bit, width == W64 = do
1853 ChildCode64 vcode rlo <- iselExpr64 src
1854 let rhi = getHiVRegFromLo rlo
1855 dst_r = getRegisterReg platform False (CmmLocal dst)
1856 lbl1 <- getBlockIdNat
1857 lbl2 <- getBlockIdNat
1858 tmp_r <- getNewRegNat size
1859
1860 -- The following instruction sequence corresponds to the pseudo-code
1861 --
1862 -- if (src) {
1863 -- dst = src.lo32 ? BSF(src.lo32) : (BSF(src.hi32) + 32);
1864 -- } else {
1865 -- dst = 64;
1866 -- }
1867 return $ vcode `appOL` toOL
1868 ([ MOV II32 (OpReg rhi) (OpReg tmp_r)
1869 , OR II32 (OpReg rlo) (OpReg tmp_r)
1870 , MOV II32 (OpImm (ImmInt 64)) (OpReg dst_r)
1871 , JXX EQQ lbl2
1872 , JXX ALWAYS lbl1
1873
1874 , NEWBLOCK lbl1
1875 , BSF II32 (OpReg rhi) dst_r
1876 , ADD II32 (OpImm (ImmInt 32)) (OpReg dst_r)
1877 , BSF II32 (OpReg rlo) tmp_r
1878 , CMOV NE II32 (OpReg tmp_r) dst_r
1879 , JXX ALWAYS lbl2
1880
1881 , NEWBLOCK lbl2
1882 ])
1883
1884 | otherwise = do
1885 code_src <- getAnyReg src
1886 src_r <- getNewRegNat size
1887 tmp_r <- getNewRegNat size
1888 let dst_r = getRegisterReg platform False (CmmLocal dst)
1889
1890 -- The following insn sequence makes sure 'ctz 0' has a defined value.
1891 -- starting with Haswell, one could use the TZCNT insn instead.
1892 return $ code_src src_r `appOL` toOL
1893 ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
1894 [ BSF size (OpReg src_r) tmp_r
1895 , MOV II32 (OpImm (ImmInt bw)) (OpReg dst_r)
1896 , CMOV NE size (OpReg tmp_r) dst_r
1897 ]) -- NB: We don't need to zero-extend the result for the
1898 -- W8/W16 cases because the 'MOV' insn already
1899 -- took care of implicitly clearing the upper bits
1900 where
1901 bw = widthInBits width
1902 platform = targetPlatform dflags
1903 size = if width == W8 then II16 else intSize width
1904
1905 genCCall dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args = do
1906 targetExpr <- cmmMakeDynamicReference dflags
1907 CallReference lbl
1908 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1909 [NoHint] [NoHint]
1910 CmmMayReturn)
1911 genCCall dflags is32Bit target dest_regs args
1912 where
1913 lbl = mkCmmCodeLabel primPackageKey (fsLit (word2FloatLabel width))
1914
1915 genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = do
1916 Amode amode addr_code <-
1917 if amop `elem` [AMO_Add, AMO_Sub]
1918 then getAmode addr
1919 else getSimpleAmode dflags is32Bit addr -- See genCCall for MO_Cmpxchg
1920 arg <- getNewRegNat size
1921 arg_code <- getAnyReg n
1922 use_sse2 <- sse2Enabled
1923 let platform = targetPlatform dflags
1924 dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
1925 code <- op_code dst_r arg amode
1926 return $ addr_code `appOL` arg_code arg `appOL` code
1927 where
1928 -- Code for the operation
1929 op_code :: Reg -- Destination reg
1930 -> Reg -- Register containing argument
1931 -> AddrMode -- Address of location to mutate
1932 -> NatM (OrdList Instr)
1933 op_code dst_r arg amode = case amop of
1934 -- In the common case where dst_r is a virtual register the
1935 -- final move should go away, because it's the last use of arg
1936 -- and the first use of dst_r.
1937 AMO_Add -> return $ toOL [ LOCK (XADD size (OpReg arg) (OpAddr amode))
1938 , MOV size (OpReg arg) (OpReg dst_r)
1939 ]
1940 AMO_Sub -> return $ toOL [ NEGI size (OpReg arg)
1941 , LOCK (XADD size (OpReg arg) (OpAddr amode))
1942 , MOV size (OpReg arg) (OpReg dst_r)
1943 ]
1944 AMO_And -> cmpxchg_code (\ src dst -> unitOL $ AND size src dst)
1945 AMO_Nand -> cmpxchg_code (\ src dst -> toOL [ AND size src dst
1946 , NOT size dst
1947 ])
1948 AMO_Or -> cmpxchg_code (\ src dst -> unitOL $ OR size src dst)
1949 AMO_Xor -> cmpxchg_code (\ src dst -> unitOL $ XOR size src dst)
1950 where
1951 -- Simulate operation that lacks a dedicated instruction using
1952 -- cmpxchg.
1953 cmpxchg_code :: (Operand -> Operand -> OrdList Instr)
1954 -> NatM (OrdList Instr)
1955 cmpxchg_code instrs = do
1956 lbl <- getBlockIdNat
1957 tmp <- getNewRegNat size
1958 return $ toOL
1959 [ MOV size (OpAddr amode) (OpReg eax)
1960 , JXX ALWAYS lbl
1961 , NEWBLOCK lbl
1962 -- Keep old value so we can return it:
1963 , MOV size (OpReg eax) (OpReg dst_r)
1964 , MOV size (OpReg eax) (OpReg tmp)
1965 ]
1966 `appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL
1967 [ LOCK (CMPXCHG size (OpReg tmp) (OpAddr amode))
1968 , JXX NE lbl
1969 ]
1970
1971 size = intSize width
1972
1973 genCCall dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] = do
1974 load_code <- intLoadCode (MOV (intSize width)) addr
1975 let platform = targetPlatform dflags
1976 use_sse2 <- sse2Enabled
1977 return (load_code (getRegisterReg platform use_sse2 (CmmLocal dst)))
1978
1979 genCCall _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
1980 code <- assignMem_IntCode (intSize width) addr val
1981 return $ code `snocOL` MFENCE
1982
1983 genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = do
1984 -- On x86 we don't have enough registers to use cmpxchg with a
1985 -- complicated addressing mode, so on that architecture we
1986 -- pre-compute the address first.
1987 Amode amode addr_code <- getSimpleAmode dflags is32Bit addr
1988 newval <- getNewRegNat size
1989 newval_code <- getAnyReg new
1990 oldval <- getNewRegNat size
1991 oldval_code <- getAnyReg old
1992 use_sse2 <- sse2Enabled
1993 let platform = targetPlatform dflags
1994 dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
1995 code = toOL
1996 [ MOV size (OpReg oldval) (OpReg eax)
1997 , LOCK (CMPXCHG size (OpReg newval) (OpAddr amode))
1998 , MOV size (OpReg eax) (OpReg dst_r)
1999 ]
2000 return $ addr_code `appOL` newval_code newval `appOL` oldval_code oldval
2001 `appOL` code
2002 where
2003 size = intSize width
2004
2005 genCCall _ is32Bit target dest_regs args = do
2006 dflags <- getDynFlags
2007 let platform = targetPlatform dflags
2008 case (target, dest_regs) of
2009 -- void return type prim op
2010 (PrimTarget op, []) ->
2011 outOfLineCmmOp op Nothing args
2012 -- we only cope with a single result for foreign calls
2013 (PrimTarget op, [r])
2014 | not is32Bit -> outOfLineCmmOp op (Just r) args
2015 | otherwise -> do
2016 l1 <- getNewLabelNat
2017 l2 <- getNewLabelNat
2018 sse2 <- sse2Enabled
2019 if sse2
2020 then
2021 outOfLineCmmOp op (Just r) args
2022 else case op of
2023 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
2024 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
2025
2026 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
2027 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
2028
2029 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
2030 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
2031
2032 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
2033 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
2034
2035 _other_op -> outOfLineCmmOp op (Just r) args
2036
2037 where
2038 actuallyInlineFloatOp instr size [x]
2039 = do res <- trivialUFCode size (instr size) x
2040 any <- anyReg res
2041 return (any (getRegisterReg platform False (CmmLocal r)))
2042
2043 actuallyInlineFloatOp _ _ args
2044 = panic $ "genCCall.actuallyInlineFloatOp: bad number of arguments! ("
2045 ++ show (length args) ++ ")"
2046
2047 (PrimTarget (MO_S_QuotRem width), _) -> divOp1 platform True width dest_regs args
2048 (PrimTarget (MO_U_QuotRem width), _) -> divOp1 platform False width dest_regs args
2049 (PrimTarget (MO_U_QuotRem2 width), _) -> divOp2 platform False width dest_regs args
2050 (PrimTarget (MO_Add2 width), [res_h, res_l]) ->
2051 case args of
2052 [arg_x, arg_y] ->
2053 do hCode <- getAnyReg (CmmLit (CmmInt 0 width))
2054 let size = intSize width
2055 lCode <- anyReg =<< trivialCode width (ADD_CC size)
2056 (Just (ADD_CC size)) arg_x arg_y
2057 let reg_l = getRegisterReg platform True (CmmLocal res_l)
2058 reg_h = getRegisterReg platform True (CmmLocal res_h)
2059 code = hCode reg_h `appOL`
2060 lCode reg_l `snocOL`
2061 ADC size (OpImm (ImmInteger 0)) (OpReg reg_h)
2062 return code
2063 _ -> panic "genCCall: Wrong number of arguments/results for add2"
2064 (PrimTarget (MO_AddIntC width), [res_r, res_c]) ->
2065 addSubIntC platform ADD_CC (Just . ADD_CC) width res_r res_c args
2066 (PrimTarget (MO_SubIntC width), [res_r, res_c]) ->
2067 addSubIntC platform SUB_CC (const Nothing) width res_r res_c args
2068 (PrimTarget (MO_U_Mul2 width), [res_h, res_l]) ->
2069 case args of
2070 [arg_x, arg_y] ->
2071 do (y_reg, y_code) <- getRegOrMem arg_y
2072 x_code <- getAnyReg arg_x
2073 let size = intSize width
2074 reg_h = getRegisterReg platform True (CmmLocal res_h)
2075 reg_l = getRegisterReg platform True (CmmLocal res_l)
2076 code = y_code `appOL`
2077 x_code rax `appOL`
2078 toOL [MUL2 size y_reg,
2079 MOV size (OpReg rdx) (OpReg reg_h),
2080 MOV size (OpReg rax) (OpReg reg_l)]
2081 return code
2082 _ -> panic "genCCall: Wrong number of arguments/results for add2"
2083
2084 _ -> if is32Bit
2085 then genCCall32' dflags target dest_regs args
2086 else genCCall64' dflags target dest_regs args
2087
2088 where divOp1 platform signed width results [arg_x, arg_y]
2089 = divOp platform signed width results Nothing arg_x arg_y
2090 divOp1 _ _ _ _ _
2091 = panic "genCCall: Wrong number of arguments for divOp1"
2092 divOp2 platform signed width results [arg_x_high, arg_x_low, arg_y]
2093 = divOp platform signed width results (Just arg_x_high) arg_x_low arg_y
2094 divOp2 _ _ _ _ _
2095 = panic "genCCall: Wrong number of arguments for divOp2"
2096 divOp platform signed width [res_q, res_r]
2097 m_arg_x_high arg_x_low arg_y
2098 = do let size = intSize width
2099 reg_q = getRegisterReg platform True (CmmLocal res_q)
2100 reg_r = getRegisterReg platform True (CmmLocal res_r)
2101 widen | signed = CLTD size
2102 | otherwise = XOR size (OpReg rdx) (OpReg rdx)
2103 instr | signed = IDIV
2104 | otherwise = DIV
2105 (y_reg, y_code) <- getRegOrMem arg_y
2106 x_low_code <- getAnyReg arg_x_low
2107 x_high_code <- case m_arg_x_high of
2108 Just arg_x_high ->
2109 getAnyReg arg_x_high
2110 Nothing ->
2111 return $ const $ unitOL widen
2112 return $ y_code `appOL`
2113 x_low_code rax `appOL`
2114 x_high_code rdx `appOL`
2115 toOL [instr size y_reg,
2116 MOV size (OpReg rax) (OpReg reg_q),
2117 MOV size (OpReg rdx) (OpReg reg_r)]
2118 divOp _ _ _ _ _ _ _
2119 = panic "genCCall: Wrong number of results for divOp"
2120
2121 addSubIntC platform instr mrevinstr width res_r res_c [arg_x, arg_y]
2122 = do let size = intSize width
2123 rCode <- anyReg =<< trivialCode width (instr size)
2124 (mrevinstr size) arg_x arg_y
2125 reg_tmp <- getNewRegNat II8
2126 let reg_c = getRegisterReg platform True (CmmLocal res_c)
2127 reg_r = getRegisterReg platform True (CmmLocal res_r)
2128 code = rCode reg_r `snocOL`
2129 SETCC OFLO (OpReg reg_tmp) `snocOL`
2130 MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c)
2131 return code
2132 addSubIntC _ _ _ _ _ _ _
2133 = panic "genCCall: Wrong number of arguments/results for addSubIntC"
2134
2135 genCCall32' :: DynFlags
2136 -> ForeignTarget -- function to call
2137 -> [CmmFormal] -- where to put the result
2138 -> [CmmActual] -- arguments (of mixed type)
2139 -> NatM InstrBlock
2140 genCCall32' dflags target dest_regs args = do
2141 let
2142 prom_args = map (maybePromoteCArg dflags W32) args
2143
2144 -- Align stack to 16n for calls, assuming a starting stack
2145 -- alignment of 16n - word_size on procedure entry. Which we
2146 -- maintiain. See Note [rts/StgCRun.c : Stack Alignment on X86]
2147 sizes = map (arg_size . cmmExprType dflags) (reverse args)
2148 raw_arg_size = sum sizes + wORD_SIZE dflags
2149 arg_pad_size = (roundTo 16 $ raw_arg_size) - raw_arg_size
2150 tot_arg_size = raw_arg_size + arg_pad_size - wORD_SIZE dflags
2151 delta0 <- getDeltaNat
2152 setDeltaNat (delta0 - arg_pad_size)
2153
2154 use_sse2 <- sse2Enabled
2155 push_codes <- mapM (push_arg use_sse2) (reverse prom_args)
2156 delta <- getDeltaNat
2157 MASSERT(delta == delta0 - tot_arg_size)
2158
2159 -- deal with static vs dynamic call targets
2160 (callinsns,cconv) <-
2161 case target of
2162 ForeignTarget (CmmLit (CmmLabel lbl)) conv
2163 -> -- ToDo: stdcall arg sizes
2164 return (unitOL (CALL (Left fn_imm) []), conv)
2165 where fn_imm = ImmCLbl lbl
2166 ForeignTarget expr conv
2167 -> do { (dyn_r, dyn_c) <- getSomeReg expr
2168 ; ASSERT( isWord32 (cmmExprType dflags expr) )
2169 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
2170 PrimTarget _
2171 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
2172 ++ "probably because too many return values."
2173
2174 let push_code
2175 | arg_pad_size /= 0
2176 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2177 DELTA (delta0 - arg_pad_size)]
2178 `appOL` concatOL push_codes
2179 | otherwise
2180 = concatOL push_codes
2181
2182 -- Deallocate parameters after call for ccall;
2183 -- but not for stdcall (callee does it)
2184 --
2185 -- We have to pop any stack padding we added
2186 -- even if we are doing stdcall, though (#5052)
2187 pop_size
2188 | ForeignConvention StdCallConv _ _ _ <- cconv = arg_pad_size
2189 | otherwise = tot_arg_size
2190
2191 call = callinsns `appOL`
2192 toOL (
2193 (if pop_size==0 then [] else
2194 [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)])
2195 ++
2196 [DELTA delta0]
2197 )
2198 setDeltaNat delta0
2199
2200 dflags <- getDynFlags
2201 let platform = targetPlatform dflags
2202
2203 let
2204 -- assign the results, if necessary
2205 assign_code [] = nilOL
2206 assign_code [dest]
2207 | isFloatType ty =
2208 if use_sse2
2209 then let tmp_amode = AddrBaseIndex (EABaseReg esp)
2210 EAIndexNone
2211 (ImmInt 0)
2212 sz = floatSize w
2213 in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
2214 DELTA (delta0 - b),
2215 GST sz fake0 tmp_amode,
2216 MOV sz (OpAddr tmp_amode) (OpReg r_dest),
2217 ADD II32 (OpImm (ImmInt b)) (OpReg esp),
2218 DELTA delta0]
2219 else unitOL (GMOV fake0 r_dest)
2220 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
2221 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
2222 | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest))
2223 where
2224 ty = localRegType dest
2225 w = typeWidth ty
2226 b = widthInBytes w
2227 r_dest_hi = getHiVRegFromLo r_dest
2228 r_dest = getRegisterReg platform use_sse2 (CmmLocal dest)
2229 assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
2230
2231 return (push_code `appOL`
2232 call `appOL`
2233 assign_code dest_regs)
2234
2235 where
2236 arg_size :: CmmType -> Int -- Width in bytes
2237 arg_size ty = widthInBytes (typeWidth ty)
2238
2239 roundTo a x | x `mod` a == 0 = x
2240 | otherwise = x + a - (x `mod` a)
2241
2242 push_arg :: Bool -> CmmActual {-current argument-}
2243 -> NatM InstrBlock -- code
2244
2245 push_arg use_sse2 arg -- we don't need the hints on x86
2246 | isWord64 arg_ty = do
2247 ChildCode64 code r_lo <- iselExpr64 arg
2248 delta <- getDeltaNat
2249 setDeltaNat (delta - 8)
2250 let
2251 r_hi = getHiVRegFromLo r_lo
2252 return ( code `appOL`
2253 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
2254 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
2255 DELTA (delta-8)]
2256 )
2257
2258 | isFloatType arg_ty = do
2259 (reg, code) <- getSomeReg arg
2260 delta <- getDeltaNat
2261 setDeltaNat (delta-size)
2262 return (code `appOL`
2263 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
2264 DELTA (delta-size),
2265 let addr = AddrBaseIndex (EABaseReg esp)
2266 EAIndexNone
2267 (ImmInt 0)
2268 size = floatSize (typeWidth arg_ty)
2269 in
2270 if use_sse2
2271 then MOV size (OpReg reg) (OpAddr addr)
2272 else GST size reg addr
2273 ]
2274 )
2275
2276 | otherwise = do
2277 (operand, code) <- getOperand arg
2278 delta <- getDeltaNat
2279 setDeltaNat (delta-size)
2280 return (code `snocOL`
2281 PUSH II32 operand `snocOL`
2282 DELTA (delta-size))
2283
2284 where
2285 arg_ty = cmmExprType dflags arg
2286 size = arg_size arg_ty -- Byte size
2287
2288 genCCall64' :: DynFlags
2289 -> ForeignTarget -- function to call
2290 -> [CmmFormal] -- where to put the result
2291 -> [CmmActual] -- arguments (of mixed type)
2292 -> NatM InstrBlock
2293 genCCall64' dflags target dest_regs args = do
2294 -- load up the register arguments
2295 let prom_args = map (maybePromoteCArg dflags W32) args
2296
2297 (stack_args, int_regs_used, fp_regs_used, load_args_code)
2298 <-
2299 if platformOS platform == OSMinGW32
2300 then load_args_win prom_args [] [] (allArgRegs platform) nilOL
2301 else do (stack_args, aregs, fregs, load_args_code)
2302 <- load_args prom_args (allIntArgRegs platform) (allFPArgRegs platform) nilOL
2303 let fp_regs_used = reverse (drop (length fregs) (reverse (allFPArgRegs platform)))
2304 int_regs_used = reverse (drop (length aregs) (reverse (allIntArgRegs platform)))
2305 return (stack_args, int_regs_used, fp_regs_used, load_args_code)
2306
2307 let
2308 arg_regs_used = int_regs_used ++ fp_regs_used
2309 arg_regs = [eax] ++ arg_regs_used
2310 -- for annotating the call instruction with
2311 sse_regs = length fp_regs_used
2312 arg_stack_slots = if platformOS platform == OSMinGW32
2313 then length stack_args + length (allArgRegs platform)
2314 else length stack_args
2315 tot_arg_size = arg_size * arg_stack_slots
2316
2317
2318 -- Align stack to 16n for calls, assuming a starting stack
2319 -- alignment of 16n - word_size on procedure entry. Which we
2320 -- maintain. See Note [rts/StgCRun.c : Stack Alignment on X86]
2321 (real_size, adjust_rsp) <-
2322 if (tot_arg_size + wORD_SIZE dflags) `rem` 16 == 0
2323 then return (tot_arg_size, nilOL)
2324 else do -- we need to adjust...
2325 delta <- getDeltaNat
2326 setDeltaNat (delta - wORD_SIZE dflags)
2327 return (tot_arg_size + wORD_SIZE dflags, toOL [
2328 SUB II64 (OpImm (ImmInt (wORD_SIZE dflags))) (OpReg rsp),
2329 DELTA (delta - wORD_SIZE dflags) ])
2330
2331 -- push the stack args, right to left
2332 push_code <- push_args (reverse stack_args) nilOL
2333 -- On Win64, we also have to leave stack space for the arguments
2334 -- that we are passing in registers
2335 lss_code <- if platformOS platform == OSMinGW32
2336 then leaveStackSpace (length (allArgRegs platform))
2337 else return nilOL
2338 delta <- getDeltaNat
2339
2340 -- deal with static vs dynamic call targets
2341 (callinsns,_cconv) <-
2342 case target of
2343 ForeignTarget (CmmLit (CmmLabel lbl)) conv
2344 -> -- ToDo: stdcall arg sizes
2345 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
2346 where fn_imm = ImmCLbl lbl
2347 ForeignTarget expr conv
2348 -> do (dyn_r, dyn_c) <- getSomeReg expr
2349 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
2350 PrimTarget _
2351 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
2352 ++ "probably because too many return values."
2353
2354 let
2355 -- The x86_64 ABI requires us to set %al to the number of SSE2
2356 -- registers that contain arguments, if the called routine
2357 -- is a varargs function. We don't know whether it's a
2358 -- varargs function or not, so we have to assume it is.
2359 --
2360 -- It's not safe to omit this assignment, even if the number
2361 -- of SSE2 regs in use is zero. If %al is larger than 8
2362 -- on entry to a varargs function, seg faults ensue.
2363 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
2364
2365 let call = callinsns `appOL`
2366 toOL (
2367 -- Deallocate parameters after call for ccall;
2368 -- stdcall has callee do it, but is not supported on
2369 -- x86_64 target (see #3336)
2370 (if real_size==0 then [] else
2371 [ADD (intSize (wordWidth dflags)) (OpImm (ImmInt real_size)) (OpReg esp)])
2372 ++
2373 [DELTA (delta + real_size)]
2374 )
2375 setDeltaNat (delta + real_size)
2376
2377 let
2378 -- assign the results, if necessary
2379 assign_code [] = nilOL
2380 assign_code [dest] =
2381 case typeWidth rep of
2382 W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest))
2383 W64 | isFloatType rep -> unitOL (MOV (floatSize W64) (OpReg xmm0) (OpReg r_dest))
2384 _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest))
2385 where
2386 rep = localRegType dest
2387 r_dest = getRegisterReg platform True (CmmLocal dest)
2388 assign_code _many = panic "genCCall.assign_code many"
2389
2390 return (load_args_code `appOL`
2391 adjust_rsp `appOL`
2392 push_code `appOL`
2393 lss_code `appOL`
2394 assign_eax sse_regs `appOL`
2395 call `appOL`
2396 assign_code dest_regs)
2397
2398 where platform = targetPlatform dflags
2399 arg_size = 8 -- always, at the mo
2400
2401 load_args :: [CmmExpr]
2402 -> [Reg] -- int regs avail for args
2403 -> [Reg] -- FP regs avail for args
2404 -> InstrBlock
2405 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock)
2406 load_args args [] [] code = return (args, [], [], code)
2407 -- no more regs to use
2408 load_args [] aregs fregs code = return ([], aregs, fregs, code)
2409 -- no more args to push
2410 load_args (arg : rest) aregs fregs code
2411 | isFloatType arg_rep =
2412 case fregs of
2413 [] -> push_this_arg
2414 (r:rs) -> do
2415 arg_code <- getAnyReg arg
2416 load_args rest aregs rs (code `appOL` arg_code r)
2417 | otherwise =
2418 case aregs of
2419 [] -> push_this_arg
2420 (r:rs) -> do
2421 arg_code <- getAnyReg arg
2422 load_args rest rs fregs (code `appOL` arg_code r)
2423 where
2424 arg_rep = cmmExprType dflags arg
2425
2426 push_this_arg = do
2427 (args',ars,frs,code') <- load_args rest aregs fregs code
2428 return (arg:args', ars, frs, code')
2429
2430 load_args_win :: [CmmExpr]
2431 -> [Reg] -- used int regs
2432 -> [Reg] -- used FP regs
2433 -> [(Reg, Reg)] -- (int, FP) regs avail for args
2434 -> InstrBlock
2435 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock)
2436 load_args_win args usedInt usedFP [] code
2437 = return (args, usedInt, usedFP, code)
2438 -- no more regs to use
2439 load_args_win [] usedInt usedFP _ code
2440 = return ([], usedInt, usedFP, code)
2441 -- no more args to push
2442 load_args_win (arg : rest) usedInt usedFP
2443 ((ireg, freg) : regs) code
2444 | isFloatType arg_rep = do
2445 arg_code <- getAnyReg arg
2446 load_args_win rest (ireg : usedInt) (freg : usedFP) regs
2447 (code `appOL`
2448 arg_code freg `snocOL`
2449 -- If we are calling a varargs function
2450 -- then we need to define ireg as well
2451 -- as freg
2452 MOV II64 (OpReg freg) (OpReg ireg))
2453 | otherwise = do
2454 arg_code <- getAnyReg arg
2455 load_args_win rest (ireg : usedInt) usedFP regs
2456 (code `appOL` arg_code ireg)
2457 where
2458 arg_rep = cmmExprType dflags arg
2459
2460 push_args [] code = return code
2461 push_args (arg:rest) code
2462 | isFloatType arg_rep = do
2463 (arg_reg, arg_code) <- getSomeReg arg
2464 delta <- getDeltaNat
2465 setDeltaNat (delta-arg_size)
2466 let code' = code `appOL` arg_code `appOL` toOL [
2467 SUB (intSize (wordWidth dflags)) (OpImm (ImmInt arg_size)) (OpReg rsp) ,
2468 DELTA (delta-arg_size),
2469 MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel dflags 0))]
2470 push_args rest code'
2471
2472 | otherwise = do
2473 ASSERT(width == W64) return ()
2474 (arg_op, arg_code) <- getOperand arg
2475 delta <- getDeltaNat
2476 setDeltaNat (delta-arg_size)
2477 let code' = code `appOL` arg_code `appOL` toOL [
2478 PUSH II64 arg_op,
2479 DELTA (delta-arg_size)]
2480 push_args rest code'
2481 where
2482 arg_rep = cmmExprType dflags arg
2483 width = typeWidth arg_rep
2484
2485 leaveStackSpace n = do
2486 delta <- getDeltaNat
2487 setDeltaNat (delta - n * arg_size)
2488 return $ toOL [
2489 SUB II64 (OpImm (ImmInt (n * wORD_SIZE dflags))) (OpReg rsp),
2490 DELTA (delta - n * arg_size)]
2491
2492 maybePromoteCArg :: DynFlags -> Width -> CmmExpr -> CmmExpr
2493 maybePromoteCArg dflags wto arg
2494 | wfrom < wto = CmmMachOp (MO_UU_Conv wfrom wto) [arg]
2495 | otherwise = arg
2496 where
2497 wfrom = cmmExprWidth dflags arg
2498
2499 outOfLineCmmOp :: CallishMachOp -> Maybe CmmFormal -> [CmmActual] -> NatM InstrBlock
2500 outOfLineCmmOp mop res args
2501 = do
2502 dflags <- getDynFlags
2503 targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
2504 let target = ForeignTarget targetExpr
2505 (ForeignConvention CCallConv [] [] CmmMayReturn)
2506
2507 stmtToInstrs (CmmUnsafeForeignCall target (catMaybes [res]) args)
2508 where
2509 -- Assume we can call these functions directly, and that they're not in a dynamic library.
2510 -- TODO: Why is this ok? Under linux this code will be in libm.so
2511 -- Is is because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31
2512 lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
2513
2514 fn = case mop of
2515 MO_F32_Sqrt -> fsLit "sqrtf"
2516 MO_F32_Sin -> fsLit "sinf"
2517 MO_F32_Cos -> fsLit "cosf"
2518 MO_F32_Tan -> fsLit "tanf"
2519 MO_F32_Exp -> fsLit "expf"
2520 MO_F32_Log -> fsLit "logf"
2521
2522 MO_F32_Asin -> fsLit "asinf"
2523 MO_F32_Acos -> fsLit "acosf"
2524 MO_F32_Atan -> fsLit "atanf"
2525
2526 MO_F32_Sinh -> fsLit "sinhf"
2527 MO_F32_Cosh -> fsLit "coshf"
2528 MO_F32_Tanh -> fsLit "tanhf"
2529 MO_F32_Pwr -> fsLit "powf"
2530
2531 MO_F64_Sqrt -> fsLit "sqrt"
2532 MO_F64_Sin -> fsLit "sin"
2533 MO_F64_Cos -> fsLit "cos"
2534 MO_F64_Tan -> fsLit "tan"
2535 MO_F64_Exp -> fsLit "exp"
2536 MO_F64_Log -> fsLit "log"
2537
2538 MO_F64_Asin -> fsLit "asin"
2539 MO_F64_Acos -> fsLit "acos"
2540 MO_F64_Atan -> fsLit "atan"
2541
2542 MO_F64_Sinh -> fsLit "sinh"
2543 MO_F64_Cosh -> fsLit "cosh"
2544 MO_F64_Tanh -> fsLit "tanh"
2545 MO_F64_Pwr -> fsLit "pow"
2546
2547 MO_Memcpy _ -> fsLit "memcpy"
2548 MO_Memset _ -> fsLit "memset"
2549 MO_Memmove _ -> fsLit "memmove"
2550
2551 MO_PopCnt _ -> fsLit "popcnt"
2552 MO_BSwap _ -> fsLit "bswap"
2553 MO_Clz w -> fsLit $ clzLabel w
2554 MO_Ctz _ -> unsupported
2555
2556 MO_AtomicRMW _ _ -> fsLit "atomicrmw"
2557 MO_AtomicRead _ -> fsLit "atomicread"
2558 MO_AtomicWrite _ -> fsLit "atomicwrite"
2559 MO_Cmpxchg _ -> fsLit "cmpxchg"
2560
2561 MO_UF_Conv _ -> unsupported
2562
2563 MO_S_QuotRem {} -> unsupported
2564 MO_U_QuotRem {} -> unsupported
2565 MO_U_QuotRem2 {} -> unsupported
2566 MO_Add2 {} -> unsupported
2567 MO_AddIntC {} -> unsupported
2568 MO_SubIntC {} -> unsupported
2569 MO_U_Mul2 {} -> unsupported
2570 MO_WriteBarrier -> unsupported
2571 MO_Touch -> unsupported
2572 (MO_Prefetch_Data _ ) -> unsupported
2573 unsupported = panic ("outOfLineCmmOp: " ++ show mop
2574 ++ " not supported here")
2575
2576 -- -----------------------------------------------------------------------------
2577 -- Generating a table-branch
2578
2579 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
2580
2581 genSwitch dflags expr targets
2582 | gopt Opt_PIC dflags
2583 = do
2584 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2585 lbl <- getNewLabelNat
2586 dflags <- getDynFlags
2587 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2588 (tableReg,t_code) <- getSomeReg $ dynRef
2589 let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
2590 (EAIndex reg (wORD_SIZE dflags)) (ImmInt 0))
2591
2592 return $ if target32Bit (targetPlatform dflags)
2593 then e_code `appOL` t_code `appOL` toOL [
2594 ADD (intSize (wordWidth dflags)) op (OpReg tableReg),
2595 JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
2596 ]
2597 else case platformOS (targetPlatform dflags) of
2598 OSDarwin ->
2599 -- on Mac OS X/x86_64, put the jump table
2600 -- in the text section to work around a
2601 -- limitation of the linker.
2602 -- ld64 is unable to handle the relocations for
2603 -- .quad L1 - L0
2604 -- if L0 is not preceded by a non-anonymous
2605 -- label in its section.
2606 e_code `appOL` t_code `appOL` toOL [
2607 ADD (intSize (wordWidth dflags)) op (OpReg tableReg),
2608 JMP_TBL (OpReg tableReg) ids Text lbl
2609 ]
2610 _ ->
2611 -- HACK: On x86_64 binutils<2.17 is only able
2612 -- to generate PC32 relocations, hence we only
2613 -- get 32-bit offsets in the jump table. As
2614 -- these offsets are always negative we need
2615 -- to properly sign extend them to 64-bit.
2616 -- This hack should be removed in conjunction
2617 -- with the hack in PprMach.hs/pprDataItem
2618 -- once binutils 2.17 is standard.
2619 e_code `appOL` t_code `appOL` toOL [
2620 MOVSxL II32 op (OpReg reg),
2621 ADD (intSize (wordWidth dflags)) (OpReg reg) (OpReg tableReg),
2622 JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
2623 ]
2624 | otherwise
2625 = do
2626 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2627 lbl <- getNewLabelNat
2628 let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg (wORD_SIZE dflags)) (ImmCLbl lbl))
2629 code = e_code `appOL` toOL [
2630 JMP_TBL op ids ReadOnlyData lbl
2631 ]
2632 return code
2633 where (offset, ids) = switchTargetsToTable targets
2634
2635 generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl (Alignment, CmmStatics) Instr)
2636 generateJumpTableForInstr dflags (JMP_TBL _ ids section lbl)
2637 = Just (createJumpTable dflags ids section lbl)
2638 generateJumpTableForInstr _ _ = Nothing
2639
2640 createJumpTable :: DynFlags -> [Maybe BlockId] -> Section -> CLabel
2641 -> GenCmmDecl (Alignment, CmmStatics) h g
2642 createJumpTable dflags ids section lbl
2643 = let jumpTable
2644 | gopt Opt_PIC dflags =
2645 let jumpTableEntryRel Nothing
2646 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
2647 jumpTableEntryRel (Just blockid)
2648 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
2649 where blockLabel = mkAsmTempLabel (getUnique blockid)
2650 in map jumpTableEntryRel ids
2651 | otherwise = map (jumpTableEntry dflags) ids
2652 in CmmData section (1, Statics lbl jumpTable)
2653
2654 -- -----------------------------------------------------------------------------
2655 -- 'condIntReg' and 'condFltReg': condition codes into registers
2656
2657 -- Turn those condition codes into integers now (when they appear on
2658 -- the right hand side of an assignment).
2659 --
2660 -- (If applicable) Do not fill the delay slots here; you will confuse the
2661 -- register allocator.
2662
2663 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2664
2665 condIntReg cond x y = do
2666 CondCode _ cond cond_code <- condIntCode cond x y
2667 tmp <- getNewRegNat II8
2668 let
2669 code dst = cond_code `appOL` toOL [
2670 SETCC cond (OpReg tmp),
2671 MOVZxL II8 (OpReg tmp) (OpReg dst)
2672 ]
2673 return (Any II32 code)
2674
2675
2676
2677 condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
2678 condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
2679 where
2680 condFltReg_x87 = do
2681 CondCode _ cond cond_code <- condFltCode cond x y
2682 tmp <- getNewRegNat II8
2683 let
2684 code dst = cond_code `appOL` toOL [
2685 SETCC cond (OpReg tmp),
2686 MOVZxL II8 (OpReg tmp) (OpReg dst)
2687 ]
2688 return (Any II32 code)
2689
2690 condFltReg_sse2 = do
2691 CondCode _ cond cond_code <- condFltCode cond x y
2692 tmp1 <- getNewRegNat (archWordSize is32Bit)
2693 tmp2 <- getNewRegNat (archWordSize is32Bit)
2694 let
2695 -- We have to worry about unordered operands (eg. comparisons
2696 -- against NaN). If the operands are unordered, the comparison
2697 -- sets the parity flag, carry flag and zero flag.
2698 -- All comparisons are supposed to return false for unordered
2699 -- operands except for !=, which returns true.
2700 --
2701 -- Optimisation: we don't have to test the parity flag if we
2702 -- know the test has already excluded the unordered case: eg >
2703 -- and >= test for a zero carry flag, which can only occur for
2704 -- ordered operands.
2705 --
2706 -- ToDo: by reversing comparisons we could avoid testing the
2707 -- parity flag in more cases.
2708
2709 code dst =
2710 cond_code `appOL`
2711 (case cond of
2712 NE -> or_unordered dst
2713 GU -> plain_test dst
2714 GEU -> plain_test dst
2715 _ -> and_ordered dst)
2716
2717 plain_test dst = toOL [
2718 SETCC cond (OpReg tmp1),
2719 MOVZxL II8 (OpReg tmp1) (OpReg dst)
2720 ]
2721 or_unordered dst = toOL [
2722 SETCC cond (OpReg tmp1),
2723 SETCC PARITY (OpReg tmp2),
2724 OR II8 (OpReg tmp1) (OpReg tmp2),
2725 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2726 ]
2727 and_ordered dst = toOL [
2728 SETCC cond (OpReg tmp1),
2729 SETCC NOTPARITY (OpReg tmp2),
2730 AND II8 (OpReg tmp1) (OpReg tmp2),
2731 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2732 ]
2733 return (Any II32 code)
2734
2735
2736 -- -----------------------------------------------------------------------------
2737 -- 'trivial*Code': deal with trivial instructions
2738
2739 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2740 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2741 -- Only look for constants on the right hand side, because that's
2742 -- where the generic optimizer will have put them.
2743
2744 -- Similarly, for unary instructions, we don't have to worry about
2745 -- matching an StInt as the argument, because genericOpt will already
2746 -- have handled the constant-folding.
2747
2748
2749 {-
2750 The Rules of the Game are:
2751
2752 * You cannot assume anything about the destination register dst;
2753 it may be anything, including a fixed reg.
2754
2755 * You may compute an operand into a fixed reg, but you may not
2756 subsequently change the contents of that fixed reg. If you
2757 want to do so, first copy the value either to a temporary
2758 or into dst. You are free to modify dst even if it happens
2759 to be a fixed reg -- that's not your problem.
2760
2761 * You cannot assume that a fixed reg will stay live over an
2762 arbitrary computation. The same applies to the dst reg.
2763
2764 * Temporary regs obtained from getNewRegNat are distinct from
2765 each other and from all other regs, and stay live over
2766 arbitrary computations.
2767
2768 --------------------
2769
2770 SDM's version of The Rules:
2771
2772 * If getRegister returns Any, that means it can generate correct
2773 code which places the result in any register, period. Even if that
2774 register happens to be read during the computation.
2775
2776 Corollary #1: this means that if you are generating code for an
2777 operation with two arbitrary operands, you cannot assign the result
2778 of the first operand into the destination register before computing
2779 the second operand. The second operand might require the old value
2780 of the destination register.
2781
2782 Corollary #2: A function might be able to generate more efficient
2783 code if it knows the destination register is a new temporary (and
2784 therefore not read by any of the sub-computations).
2785
2786 * If getRegister returns Any, then the code it generates may modify only:
2787 (a) fresh temporaries
2788 (b) the destination register
2789 (c) known registers (eg. %ecx is used by shifts)
2790 In particular, it may *not* modify global registers, unless the global
2791 register happens to be the destination register.
2792 -}
2793
2794 trivialCode :: Width -> (Operand -> Operand -> Instr)
2795 -> Maybe (Operand -> Operand -> Instr)
2796 -> CmmExpr -> CmmExpr -> NatM Register
2797 trivialCode width instr m a b
2798 = do is32Bit <- is32BitPlatform
2799 trivialCode' is32Bit width instr m a b
2800
2801 trivialCode' :: Bool -> Width -> (Operand -> Operand -> Instr)
2802 -> Maybe (Operand -> Operand -> Instr)
2803 -> CmmExpr -> CmmExpr -> NatM Register
2804 trivialCode' is32Bit width _ (Just revinstr) (CmmLit lit_a) b
2805 | is32BitLit is32Bit lit_a = do
2806 b_code <- getAnyReg b
2807 let
2808 code dst
2809 = b_code dst `snocOL`
2810 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
2811 return (Any (intSize width) code)
2812
2813 trivialCode' _ width instr _ a b
2814 = genTrivialCode (intSize width) instr a b
2815
2816 -- This is re-used for floating pt instructions too.
2817 genTrivialCode :: Size -> (Operand -> Operand -> Instr)
2818 -> CmmExpr -> CmmExpr -> NatM Register
2819 genTrivialCode rep instr a b = do
2820 (b_op, b_code) <- getNonClobberedOperand b
2821 a_code <- getAnyReg a
2822 tmp <- getNewRegNat rep
2823 let
2824 -- We want the value of b to stay alive across the computation of a.
2825 -- But, we want to calculate a straight into the destination register,
2826 -- because the instruction only has two operands (dst := dst `op` src).
2827 -- The troublesome case is when the result of b is in the same register
2828 -- as the destination reg. In this case, we have to save b in a
2829 -- new temporary across the computation of a.
2830 code dst
2831 | dst `regClashesWithOp` b_op =
2832 b_code `appOL`
2833 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
2834 a_code dst `snocOL`
2835 instr (OpReg tmp) (OpReg dst)
2836 | otherwise =
2837 b_code `appOL`
2838 a_code dst `snocOL`
2839 instr b_op (OpReg dst)
2840 return (Any rep code)
2841
2842 regClashesWithOp :: Reg -> Operand -> Bool
2843 reg `regClashesWithOp` OpReg reg2 = reg == reg2
2844 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
2845 _ `regClashesWithOp` _ = False
2846
2847 -----------
2848
2849 trivialUCode :: Size -> (Operand -> Instr)
2850 -> CmmExpr -> NatM Register
2851 trivialUCode rep instr x = do
2852 x_code <- getAnyReg x
2853 let
2854 code dst =
2855 x_code dst `snocOL`
2856 instr (OpReg dst)
2857 return (Any rep code)
2858
2859 -----------
2860
2861 trivialFCode_x87 :: (Size -> Reg -> Reg -> Reg -> Instr)
2862 -> CmmExpr -> CmmExpr -> NatM Register
2863 trivialFCode_x87 instr x y = do
2864 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
2865 (y_reg, y_code) <- getSomeReg y
2866 let
2867 size = FF80 -- always, on x87
2868 code dst =
2869 x_code `appOL`
2870 y_code `snocOL`
2871 instr size x_reg y_reg dst
2872 return (Any size code)
2873
2874 trivialFCode_sse2 :: Width -> (Size -> Operand -> Operand -> Instr)
2875 -> CmmExpr -> CmmExpr -> NatM Register
2876 trivialFCode_sse2 pk instr x y
2877 = genTrivialCode size (instr size) x y
2878 where size = floatSize pk
2879
2880
2881 trivialUFCode :: Size -> (Reg -> Reg -> Instr) -> CmmExpr -> NatM Register
2882 trivialUFCode size instr x = do
2883 (x_reg, x_code) <- getSomeReg x
2884 let
2885 code dst =
2886 x_code `snocOL`
2887 instr x_reg dst
2888 return (Any size code)
2889
2890
2891 --------------------------------------------------------------------------------
2892 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2893 coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
2894 where
2895 coerce_x87 = do
2896 (x_reg, x_code) <- getSomeReg x
2897 let
2898 opc = case to of W32 -> GITOF; W64 -> GITOD;
2899 n -> panic $ "coerceInt2FP.x87: unhandled width ("
2900 ++ show n ++ ")"
2901 code dst = x_code `snocOL` opc x_reg dst
2902 -- ToDo: works for non-II32 reps?
2903 return (Any FF80 code)
2904
2905 coerce_sse2 = do
2906 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2907 let
2908 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
2909 n -> panic $ "coerceInt2FP.sse: unhandled width ("
2910 ++ show n ++ ")"
2911 code dst = x_code `snocOL` opc (intSize from) x_op dst
2912 return (Any (floatSize to) code)
2913 -- works even if the destination rep is <II32
2914
2915 --------------------------------------------------------------------------------
2916 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2917 coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
2918 where
2919 coerceFP2Int_x87 = do
2920 (x_reg, x_code) <- getSomeReg x
2921 let
2922 opc = case from of W32 -> GFTOI; W64 -> GDTOI
2923 n -> panic $ "coerceFP2Int.x87: unhandled width ("
2924 ++ show n ++ ")"
2925 code dst = x_code `snocOL` opc x_reg dst
2926 -- ToDo: works for non-II32 reps?
2927 return (Any (intSize to) code)
2928
2929 coerceFP2Int_sse2 = do
2930 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2931 let
2932 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ;
2933 n -> panic $ "coerceFP2Init.sse: unhandled width ("
2934 ++ show n ++ ")"
2935 code dst = x_code `snocOL` opc (intSize to) x_op dst
2936 return (Any (intSize to) code)
2937 -- works even if the destination rep is <II32
2938
2939
2940 --------------------------------------------------------------------------------
2941 coerceFP2FP :: Width -> CmmExpr -> NatM Register
2942 coerceFP2FP to x = do
2943 use_sse2 <- sse2Enabled
2944 (x_reg, x_code) <- getSomeReg x
2945 let
2946 opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
2947 n -> panic $ "coerceFP2FP: unhandled width ("
2948 ++ show n ++ ")"
2949 | otherwise = GDTOF
2950 code dst = x_code `snocOL` opc x_reg dst
2951 return (Any (if use_sse2 then floatSize to else FF80) code)
2952
2953 --------------------------------------------------------------------------------
2954
2955 sse2NegCode :: Width -> CmmExpr -> NatM Register
2956 sse2NegCode w x = do
2957 let sz = floatSize w
2958 x_code <- getAnyReg x
2959 -- This is how gcc does it, so it can't be that bad:
2960 let
2961 const | FF32 <- sz = CmmInt 0x80000000 W32
2962 | otherwise = CmmInt 0x8000000000000000 W64
2963 Amode amode amode_code <- memConstant (widthInBytes w) const
2964 tmp <- getNewRegNat sz
2965 let
2966 code dst = x_code dst `appOL` amode_code `appOL` toOL [
2967 MOV sz (OpAddr amode) (OpReg tmp),
2968 XOR sz (OpReg tmp) (OpReg dst)
2969 ]
2970 --
2971 return (Any sz code)
2972
2973 isVecExpr :: CmmExpr -> Bool
2974 isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True
2975 isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True
2976 isVecExpr (CmmMachOp (MO_V_Add {}) _) = True
2977 isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True
2978 isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True
2979 isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True
2980 isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True
2981 isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True
2982 isVecExpr (CmmMachOp (MO_VF_Insert {}) _) = True
2983 isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True
2984 isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
2985 isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True
2986 isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True
2987 isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True
2988 isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True
2989 isVecExpr (CmmMachOp _ [e]) = isVecExpr e
2990 isVecExpr _ = False
2991
2992 needLlvm :: NatM a
2993 needLlvm =
2994 sorry $ unlines ["The native code generator does not support vector"
2995 ,"instructions. Please use -fllvm."]