b60d61047f7be7e4ea874c908566c88eb3442f03
[ghc.git] / compiler / nativeGen / X86 / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-}
2
3 -- The default iteration limit is a bit too low for the definitions
4 -- in this module.
5 #if __GLASGOW_HASKELL__ >= 800
6 {-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-}
7 #endif
8
9 -----------------------------------------------------------------------------
10 --
11 -- Generating machine code (instruction selection)
12 --
13 -- (c) The University of Glasgow 1996-2004
14 --
15 -----------------------------------------------------------------------------
16
17 -- This is a big module, but, if you pay attention to
18 -- (a) the sectioning, and (b) the type signatures, the
19 -- structure should not be too overwhelming.
20
21 module X86.CodeGen (
22 cmmTopCodeGen,
23 generateJumpTableForInstr,
24 InstrBlock
25 )
26
27 where
28
29 #include "HsVersions.h"
30 #include "nativeGen/NCG.h"
31 #include "../includes/MachDeps.h"
32
33 -- NCG stuff:
34 import X86.Instr
35 import X86.Cond
36 import X86.Regs
37 import X86.RegInfo
38 import CodeGen.Platform
39 import CPrim
40 import Debug ( DebugBlock(..) )
41 import Instruction
42 import PIC
43 import NCGMonad
44 import Format
45 import Reg
46 import Platform
47
48 -- Our intermediate code:
49 import BasicTypes
50 import BlockId
51 import Module ( primUnitId )
52 import PprCmm ()
53 import CmmUtils
54 import CmmSwitch
55 import Cmm
56 import Hoopl
57 import CLabel
58 import CoreSyn ( Tickish(..) )
59 import SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
60
61 -- The rest:
62 import ForeignCall ( CCallConv(..) )
63 import OrdList
64 import Outputable
65 import Unique
66 import FastString
67 import DynFlags
68 import Util
69
70 import Control.Monad
71 import Data.Bits
72 import Data.Int
73 import Data.Maybe
74 import Data.Word
75
76 is32BitPlatform :: NatM Bool
77 is32BitPlatform = do
78 dflags <- getDynFlags
79 return $ target32Bit (targetPlatform dflags)
80
81 sse2Enabled :: NatM Bool
82 sse2Enabled = do
83 dflags <- getDynFlags
84 return (isSse2Enabled dflags)
85
86 sse4_2Enabled :: NatM Bool
87 sse4_2Enabled = do
88 dflags <- getDynFlags
89 return (isSse4_2Enabled dflags)
90
91 if_sse2 :: NatM a -> NatM a -> NatM a
92 if_sse2 sse2 x87 = do
93 b <- sse2Enabled
94 if b then sse2 else x87
95
96 cmmTopCodeGen
97 :: RawCmmDecl
98 -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
99
100 cmmTopCodeGen (CmmProc info lab live graph) = do
101 let blocks = toBlockListEntryFirst graph
102 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
103 picBaseMb <- getPicBaseMaybeNat
104 dflags <- getDynFlags
105 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
106 tops = proc : concat statics
107 os = platformOS $ targetPlatform dflags
108
109 case picBaseMb of
110 Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
111 Nothing -> return tops
112
113 cmmTopCodeGen (CmmData sec dat) = do
114 return [CmmData sec (1, dat)] -- no translation, we just use CmmStatic
115
116
117 basicBlockCodeGen
118 :: CmmBlock
119 -> NatM ( [NatBasicBlock Instr]
120 , [NatCmmDecl (Alignment, CmmStatics) Instr])
121
122 basicBlockCodeGen block = do
123 let (_, nodes, tail) = blockSplit block
124 id = entryLabel block
125 stmts = blockToList nodes
126 -- Generate location directive
127 dbg <- getDebugBlock (entryLabel block)
128 loc_instrs <- case dblSourceTick =<< dbg of
129 Just (SourceNote span name)
130 -> do fileId <- getFileId (srcSpanFile span)
131 let line = srcSpanStartLine span; col = srcSpanStartCol span
132 return $ unitOL $ LOCATION fileId line col name
133 _ -> return nilOL
134 mid_instrs <- stmtsToInstrs stmts
135 tail_instrs <- stmtToInstrs tail
136 let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
137 -- code generation may introduce new basic block boundaries, which
138 -- are indicated by the NEWBLOCK instruction. We must split up the
139 -- instruction stream into basic blocks again. Also, we extract
140 -- LDATAs here too.
141 let
142 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
143
144 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
145 = ([], BasicBlock id instrs : blocks, statics)
146 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
147 = (instrs, blocks, CmmData sec dat:statics)
148 mkBlocks instr (instrs,blocks,statics)
149 = (instr:instrs, blocks, statics)
150 return (BasicBlock id top : other_blocks, statics)
151
152
153 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
154 stmtsToInstrs stmts
155 = do instrss <- mapM stmtToInstrs stmts
156 return (concatOL instrss)
157
158
159 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
160 stmtToInstrs stmt = do
161 dflags <- getDynFlags
162 is32Bit <- is32BitPlatform
163 case stmt of
164 CmmComment s -> return (unitOL (COMMENT s))
165 CmmTick {} -> return nilOL
166 CmmUnwind {} -> return nilOL
167
168 CmmAssign reg src
169 | isFloatType ty -> assignReg_FltCode format reg src
170 | is32Bit && isWord64 ty -> assignReg_I64Code reg src
171 | otherwise -> assignReg_IntCode format reg src
172 where ty = cmmRegType dflags reg
173 format = cmmTypeFormat ty
174
175 CmmStore addr src
176 | isFloatType ty -> assignMem_FltCode format addr src
177 | is32Bit && isWord64 ty -> assignMem_I64Code addr src
178 | otherwise -> assignMem_IntCode format addr src
179 where ty = cmmExprType dflags src
180 format = cmmTypeFormat ty
181
182 CmmUnsafeForeignCall target result_regs args
183 -> genCCall dflags is32Bit target result_regs args
184
185 CmmBranch id -> genBranch id
186 CmmCondBranch arg true false _ -> do
187 b1 <- genCondJump true arg
188 b2 <- genBranch false
189 return (b1 `appOL` b2)
190 CmmSwitch arg ids -> do dflags <- getDynFlags
191 genSwitch dflags arg ids
192 CmmCall { cml_target = arg
193 , cml_args_regs = gregs } -> do
194 dflags <- getDynFlags
195 genJump arg (jumpRegs dflags gregs)
196 _ ->
197 panic "stmtToInstrs: statement should have been cps'd away"
198
199
200 jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
201 jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
202 where platform = targetPlatform dflags
203
204 --------------------------------------------------------------------------------
205 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
206 -- They are really trees of insns to facilitate fast appending, where a
207 -- left-to-right traversal yields the insns in the correct order.
208 --
209 type InstrBlock
210 = OrdList Instr
211
212
213 -- | Condition codes passed up the tree.
214 --
215 data CondCode
216 = CondCode Bool Cond InstrBlock
217
218
219 -- | a.k.a "Register64"
220 -- Reg is the lower 32-bit temporary which contains the result.
221 -- Use getHiVRegFromLo to find the other VRegUnique.
222 --
223 -- Rules of this simplified insn selection game are therefore that
224 -- the returned Reg may be modified
225 --
226 data ChildCode64
227 = ChildCode64
228 InstrBlock
229 Reg
230
231
232 -- | Register's passed up the tree. If the stix code forces the register
233 -- to live in a pre-decided machine register, it comes out as @Fixed@;
234 -- otherwise, it comes out as @Any@, and the parent can decide which
235 -- register to put it in.
236 --
237 data Register
238 = Fixed Format Reg InstrBlock
239 | Any Format (Reg -> InstrBlock)
240
241
242 swizzleRegisterRep :: Register -> Format -> Register
243 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
244 swizzleRegisterRep (Any _ codefn) format = Any format codefn
245
246
247 -- | Grab the Reg for a CmmReg
248 getRegisterReg :: Platform -> Bool -> CmmReg -> Reg
249
250 getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk))
251 = let fmt = cmmTypeFormat pk in
252 if isFloatFormat fmt && not use_sse2
253 then RegVirtual (mkVirtualReg u FF80)
254 else RegVirtual (mkVirtualReg u fmt)
255
256 getRegisterReg platform _ (CmmGlobal mid)
257 = case globalRegMaybe platform mid of
258 Just reg -> RegReal $ reg
259 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
260 -- By this stage, the only MagicIds remaining should be the
261 -- ones which map to a real machine register on this
262 -- platform. Hence ...
263
264
265 -- | Memory addressing modes passed up the tree.
266 data Amode
267 = Amode AddrMode InstrBlock
268
269 {-
270 Now, given a tree (the argument to an CmmLoad) that references memory,
271 produce a suitable addressing mode.
272
273 A Rule of the Game (tm) for Amodes: use of the addr bit must
274 immediately follow use of the code part, since the code part puts
275 values in registers which the addr then refers to. So you can't put
276 anything in between, lest it overwrite some of those registers. If
277 you need to do some other computation between the code part and use of
278 the addr bit, first store the effective address from the amode in a
279 temporary, then do the other computation, and then use the temporary:
280
281 code
282 LEA amode, tmp
283 ... other computation ...
284 ... (tmp) ...
285 -}
286
287
288 -- | Check whether an integer will fit in 32 bits.
289 -- A CmmInt is intended to be truncated to the appropriate
290 -- number of bits, so here we truncate it to Int64. This is
291 -- important because e.g. -1 as a CmmInt might be either
292 -- -1 or 18446744073709551615.
293 --
294 is32BitInteger :: Integer -> Bool
295 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
296 where i64 = fromIntegral i :: Int64
297
298
299 -- | Convert a BlockId to some CmmStatic data
300 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
301 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
302 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
303 where blockLabel = mkAsmTempLabel (getUnique blockid)
304
305
306 -- -----------------------------------------------------------------------------
307 -- General things for putting together code sequences
308
309 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
310 -- CmmExprs into CmmRegOff?
311 mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
312 mangleIndexTree dflags reg off
313 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
314 where width = typeWidth (cmmRegType dflags reg)
315
316 -- | The dual to getAnyReg: compute an expression into a register, but
317 -- we don't mind which one it is.
318 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
319 getSomeReg expr = do
320 r <- getRegister expr
321 case r of
322 Any rep code -> do
323 tmp <- getNewRegNat rep
324 return (tmp, code tmp)
325 Fixed _ reg code ->
326 return (reg, code)
327
328
329 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
330 assignMem_I64Code addrTree valueTree = do
331 Amode addr addr_code <- getAmode addrTree
332 ChildCode64 vcode rlo <- iselExpr64 valueTree
333 let
334 rhi = getHiVRegFromLo rlo
335
336 -- Little-endian store
337 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
338 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
339 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
340
341
342 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
343 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
344 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
345 let
346 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
347 r_dst_hi = getHiVRegFromLo r_dst_lo
348 r_src_hi = getHiVRegFromLo r_src_lo
349 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
350 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
351 return (
352 vcode `snocOL` mov_lo `snocOL` mov_hi
353 )
354
355 assignReg_I64Code _ _
356 = panic "assignReg_I64Code(i386): invalid lvalue"
357
358
359 iselExpr64 :: CmmExpr -> NatM ChildCode64
360 iselExpr64 (CmmLit (CmmInt i _)) = do
361 (rlo,rhi) <- getNewRegPairNat II32
362 let
363 r = fromIntegral (fromIntegral i :: Word32)
364 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
365 code = toOL [
366 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
367 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
368 ]
369 return (ChildCode64 code rlo)
370
371 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
372 Amode addr addr_code <- getAmode addrTree
373 (rlo,rhi) <- getNewRegPairNat II32
374 let
375 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
376 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
377 return (
378 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
379 rlo
380 )
381
382 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
383 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
384
385 -- we handle addition, but rather badly
386 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
387 ChildCode64 code1 r1lo <- iselExpr64 e1
388 (rlo,rhi) <- getNewRegPairNat II32
389 let
390 r = fromIntegral (fromIntegral i :: Word32)
391 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
392 r1hi = getHiVRegFromLo r1lo
393 code = code1 `appOL`
394 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
395 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
396 MOV II32 (OpReg r1hi) (OpReg rhi),
397 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
398 return (ChildCode64 code rlo)
399
400 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
401 ChildCode64 code1 r1lo <- iselExpr64 e1
402 ChildCode64 code2 r2lo <- iselExpr64 e2
403 (rlo,rhi) <- getNewRegPairNat II32
404 let
405 r1hi = getHiVRegFromLo r1lo
406 r2hi = getHiVRegFromLo r2lo
407 code = code1 `appOL`
408 code2 `appOL`
409 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
410 ADD II32 (OpReg r2lo) (OpReg rlo),
411 MOV II32 (OpReg r1hi) (OpReg rhi),
412 ADC II32 (OpReg r2hi) (OpReg rhi) ]
413 return (ChildCode64 code rlo)
414
415 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
416 ChildCode64 code1 r1lo <- iselExpr64 e1
417 ChildCode64 code2 r2lo <- iselExpr64 e2
418 (rlo,rhi) <- getNewRegPairNat II32
419 let
420 r1hi = getHiVRegFromLo r1lo
421 r2hi = getHiVRegFromLo r2lo
422 code = code1 `appOL`
423 code2 `appOL`
424 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
425 SUB II32 (OpReg r2lo) (OpReg rlo),
426 MOV II32 (OpReg r1hi) (OpReg rhi),
427 SBB II32 (OpReg r2hi) (OpReg rhi) ]
428 return (ChildCode64 code rlo)
429
430 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
431 fn <- getAnyReg expr
432 r_dst_lo <- getNewRegNat II32
433 let r_dst_hi = getHiVRegFromLo r_dst_lo
434 code = fn r_dst_lo
435 return (
436 ChildCode64 (code `snocOL`
437 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
438 r_dst_lo
439 )
440
441 iselExpr64 expr
442 = pprPanic "iselExpr64(i386)" (ppr expr)
443
444
445 --------------------------------------------------------------------------------
446 getRegister :: CmmExpr -> NatM Register
447 getRegister e = do dflags <- getDynFlags
448 is32Bit <- is32BitPlatform
449 getRegister' dflags is32Bit e
450
451 getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
452
453 getRegister' dflags is32Bit (CmmReg reg)
454 = case reg of
455 CmmGlobal PicBaseReg
456 | is32Bit ->
457 -- on x86_64, we have %rip for PicBaseReg, but it's not
458 -- a full-featured register, it can only be used for
459 -- rip-relative addressing.
460 do reg' <- getPicBaseNat (archWordFormat is32Bit)
461 return (Fixed (archWordFormat is32Bit) reg' nilOL)
462 _ ->
463 do use_sse2 <- sse2Enabled
464 let
465 fmt = cmmTypeFormat (cmmRegType dflags reg)
466 format | not use_sse2 && isFloatFormat fmt = FF80
467 | otherwise = fmt
468 --
469 let platform = targetPlatform dflags
470 return (Fixed format
471 (getRegisterReg platform use_sse2 reg)
472 nilOL)
473
474
475 getRegister' dflags is32Bit (CmmRegOff r n)
476 = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
477
478 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
479 -- TO_W_(x), TO_W_(x >> 32)
480
481 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
482 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
483 | is32Bit = do
484 ChildCode64 code rlo <- iselExpr64 x
485 return $ Fixed II32 (getHiVRegFromLo rlo) code
486
487 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
488 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
489 | is32Bit = do
490 ChildCode64 code rlo <- iselExpr64 x
491 return $ Fixed II32 (getHiVRegFromLo rlo) code
492
493 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
494 | is32Bit = do
495 ChildCode64 code rlo <- iselExpr64 x
496 return $ Fixed II32 rlo code
497
498 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
499 | is32Bit = do
500 ChildCode64 code rlo <- iselExpr64 x
501 return $ Fixed II32 rlo code
502
503 getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
504 if_sse2 float_const_sse2 float_const_x87
505 where
506 float_const_sse2
507 | f == 0.0 = do
508 let
509 format = floatFormat w
510 code dst = unitOL (XOR format (OpReg dst) (OpReg dst))
511 -- I don't know why there are xorpd, xorps, and pxor instructions.
512 -- They all appear to do the same thing --SDM
513 return (Any format code)
514
515 | otherwise = do
516 Amode addr code <- memConstant (widthInBytes w) lit
517 loadFloatAmode True w addr code
518
519 float_const_x87 = case w of
520 W64
521 | f == 0.0 ->
522 let code dst = unitOL (GLDZ dst)
523 in return (Any FF80 code)
524
525 | f == 1.0 ->
526 let code dst = unitOL (GLD1 dst)
527 in return (Any FF80 code)
528
529 _otherwise -> do
530 Amode addr code <- memConstant (widthInBytes w) lit
531 loadFloatAmode False w addr code
532
533 -- catch simple cases of zero- or sign-extended load
534 getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
535 code <- intLoadCode (MOVZxL II8) addr
536 return (Any II32 code)
537
538 getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
539 code <- intLoadCode (MOVSxL II8) addr
540 return (Any II32 code)
541
542 getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
543 code <- intLoadCode (MOVZxL II16) addr
544 return (Any II32 code)
545
546 getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
547 code <- intLoadCode (MOVSxL II16) addr
548 return (Any II32 code)
549
550 -- catch simple cases of zero- or sign-extended load
551 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
552 | not is32Bit = do
553 code <- intLoadCode (MOVZxL II8) addr
554 return (Any II64 code)
555
556 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
557 | not is32Bit = do
558 code <- intLoadCode (MOVSxL II8) addr
559 return (Any II64 code)
560
561 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
562 | not is32Bit = do
563 code <- intLoadCode (MOVZxL II16) addr
564 return (Any II64 code)
565
566 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
567 | not is32Bit = do
568 code <- intLoadCode (MOVSxL II16) addr
569 return (Any II64 code)
570
571 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
572 | not is32Bit = do
573 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
574 return (Any II64 code)
575
576 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
577 | not is32Bit = do
578 code <- intLoadCode (MOVSxL II32) addr
579 return (Any II64 code)
580
581 getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
582 CmmLit displacement])
583 | not is32Bit = do
584 return $ Any II64 (\dst -> unitOL $
585 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
586
587 getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
588 sse2 <- sse2Enabled
589 case mop of
590 MO_F_Neg w
591 | sse2 -> sse2NegCode w x
592 | otherwise -> trivialUFCode FF80 (GNEG FF80) x
593
594 MO_S_Neg w -> triv_ucode NEGI (intFormat w)
595 MO_Not w -> triv_ucode NOT (intFormat w)
596
597 -- Nop conversions
598 MO_UU_Conv W32 W8 -> toI8Reg W32 x
599 MO_SS_Conv W32 W8 -> toI8Reg W32 x
600 MO_UU_Conv W16 W8 -> toI8Reg W16 x
601 MO_SS_Conv W16 W8 -> toI8Reg W16 x
602 MO_UU_Conv W32 W16 -> toI16Reg W32 x
603 MO_SS_Conv W32 W16 -> toI16Reg W32 x
604
605 MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
606 MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
607 MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
608 MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
609 MO_UU_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
610 MO_SS_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
611
612 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
613 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
614
615 -- widenings
616 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
617 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
618 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
619
620 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
621 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
622 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
623
624 MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x
625 MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
626 MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
627 MO_SS_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVSxL x
628 MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
629 MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
630 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
631 -- However, we don't want the register allocator to throw it
632 -- away as an unnecessary reg-to-reg move, so we keep it in
633 -- the form of a movzl and print it as a movl later.
634
635 MO_FF_Conv W32 W64
636 | sse2 -> coerceFP2FP W64 x
637 | otherwise -> conversionNop FF80 x
638
639 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
640
641 MO_FS_Conv from to -> coerceFP2Int from to x
642 MO_SF_Conv from to -> coerceInt2FP from to x
643
644 MO_V_Insert {} -> needLlvm
645 MO_V_Extract {} -> needLlvm
646 MO_V_Add {} -> needLlvm
647 MO_V_Sub {} -> needLlvm
648 MO_V_Mul {} -> needLlvm
649 MO_VS_Quot {} -> needLlvm
650 MO_VS_Rem {} -> needLlvm
651 MO_VS_Neg {} -> needLlvm
652 MO_VU_Quot {} -> needLlvm
653 MO_VU_Rem {} -> needLlvm
654 MO_VF_Insert {} -> needLlvm
655 MO_VF_Extract {} -> needLlvm
656 MO_VF_Add {} -> needLlvm
657 MO_VF_Sub {} -> needLlvm
658 MO_VF_Mul {} -> needLlvm
659 MO_VF_Quot {} -> needLlvm
660 MO_VF_Neg {} -> needLlvm
661
662 _other -> pprPanic "getRegister" (pprMachOp mop)
663 where
664 triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
665 triv_ucode instr format = trivialUCode format (instr format) x
666
667 -- signed or unsigned extension.
668 integerExtend :: Width -> Width
669 -> (Format -> Operand -> Operand -> Instr)
670 -> CmmExpr -> NatM Register
671 integerExtend from to instr expr = do
672 (reg,e_code) <- if from == W8 then getByteReg expr
673 else getSomeReg expr
674 let
675 code dst =
676 e_code `snocOL`
677 instr (intFormat from) (OpReg reg) (OpReg dst)
678 return (Any (intFormat to) code)
679
680 toI8Reg :: Width -> CmmExpr -> NatM Register
681 toI8Reg new_rep expr
682 = do codefn <- getAnyReg expr
683 return (Any (intFormat new_rep) codefn)
684 -- HACK: use getAnyReg to get a byte-addressable register.
685 -- If the source was a Fixed register, this will add the
686 -- mov instruction to put it into the desired destination.
687 -- We're assuming that the destination won't be a fixed
688 -- non-byte-addressable register; it won't be, because all
689 -- fixed registers are word-sized.
690
691 toI16Reg = toI8Reg -- for now
692
693 conversionNop :: Format -> CmmExpr -> NatM Register
694 conversionNop new_format expr
695 = do e_code <- getRegister' dflags is32Bit expr
696 return (swizzleRegisterRep e_code new_format)
697
698
699 getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
700 sse2 <- sse2Enabled
701 case mop of
702 MO_F_Eq _ -> condFltReg is32Bit EQQ x y
703 MO_F_Ne _ -> condFltReg is32Bit NE x y
704 MO_F_Gt _ -> condFltReg is32Bit GTT x y
705 MO_F_Ge _ -> condFltReg is32Bit GE x y
706 MO_F_Lt _ -> condFltReg is32Bit LTT x y
707 MO_F_Le _ -> condFltReg is32Bit LE x y
708
709 MO_Eq _ -> condIntReg EQQ x y
710 MO_Ne _ -> condIntReg NE x y
711
712 MO_S_Gt _ -> condIntReg GTT x y
713 MO_S_Ge _ -> condIntReg GE x y
714 MO_S_Lt _ -> condIntReg LTT x y
715 MO_S_Le _ -> condIntReg LE x y
716
717 MO_U_Gt _ -> condIntReg GU x y
718 MO_U_Ge _ -> condIntReg GEU x y
719 MO_U_Lt _ -> condIntReg LU x y
720 MO_U_Le _ -> condIntReg LEU x y
721
722 MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y
723 | otherwise -> trivialFCode_x87 GADD x y
724 MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y
725 | otherwise -> trivialFCode_x87 GSUB x y
726 MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y
727 | otherwise -> trivialFCode_x87 GDIV x y
728 MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y
729 | otherwise -> trivialFCode_x87 GMUL x y
730
731 MO_Add rep -> add_code rep x y
732 MO_Sub rep -> sub_code rep x y
733
734 MO_S_Quot rep -> div_code rep True True x y
735 MO_S_Rem rep -> div_code rep True False x y
736 MO_U_Quot rep -> div_code rep False True x y
737 MO_U_Rem rep -> div_code rep False False x y
738
739 MO_S_MulMayOflo rep -> imulMayOflo rep x y
740
741 MO_Mul rep -> triv_op rep IMUL
742 MO_And rep -> triv_op rep AND
743 MO_Or rep -> triv_op rep OR
744 MO_Xor rep -> triv_op rep XOR
745
746 {- Shift ops on x86s have constraints on their source, it
747 either has to be Imm, CL or 1
748 => trivialCode is not restrictive enough (sigh.)
749 -}
750 MO_Shl rep -> shift_code rep SHL x y {-False-}
751 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
752 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
753
754 MO_V_Insert {} -> needLlvm
755 MO_V_Extract {} -> needLlvm
756 MO_V_Add {} -> needLlvm
757 MO_V_Sub {} -> needLlvm
758 MO_V_Mul {} -> needLlvm
759 MO_VS_Quot {} -> needLlvm
760 MO_VS_Rem {} -> needLlvm
761 MO_VS_Neg {} -> needLlvm
762 MO_VF_Insert {} -> needLlvm
763 MO_VF_Extract {} -> needLlvm
764 MO_VF_Add {} -> needLlvm
765 MO_VF_Sub {} -> needLlvm
766 MO_VF_Mul {} -> needLlvm
767 MO_VF_Quot {} -> needLlvm
768 MO_VF_Neg {} -> needLlvm
769
770 _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
771 where
772 --------------------
773 triv_op width instr = trivialCode width op (Just op) x y
774 where op = instr (intFormat width)
775
776 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
777 imulMayOflo rep a b = do
778 (a_reg, a_code) <- getNonClobberedReg a
779 b_code <- getAnyReg b
780 let
781 shift_amt = case rep of
782 W32 -> 31
783 W64 -> 63
784 _ -> panic "shift_amt"
785
786 format = intFormat rep
787 code = a_code `appOL` b_code eax `appOL`
788 toOL [
789 IMUL2 format (OpReg a_reg), -- result in %edx:%eax
790 SAR format (OpImm (ImmInt shift_amt)) (OpReg eax),
791 -- sign extend lower part
792 SUB format (OpReg edx) (OpReg eax)
793 -- compare against upper
794 -- eax==0 if high part == sign extended low part
795 ]
796 return (Fixed format eax code)
797
798 --------------------
799 shift_code :: Width
800 -> (Format -> Operand -> Operand -> Instr)
801 -> CmmExpr
802 -> CmmExpr
803 -> NatM Register
804
805 {- Case1: shift length as immediate -}
806 shift_code width instr x (CmmLit lit) = do
807 x_code <- getAnyReg x
808 let
809 format = intFormat width
810 code dst
811 = x_code dst `snocOL`
812 instr format (OpImm (litToImm lit)) (OpReg dst)
813 return (Any format code)
814
815 {- Case2: shift length is complex (non-immediate)
816 * y must go in %ecx.
817 * we cannot do y first *and* put its result in %ecx, because
818 %ecx might be clobbered by x.
819 * if we do y second, then x cannot be
820 in a clobbered reg. Also, we cannot clobber x's reg
821 with the instruction itself.
822 * so we can either:
823 - do y first, put its result in a fresh tmp, then copy it to %ecx later
824 - do y second and put its result into %ecx. x gets placed in a fresh
825 tmp. This is likely to be better, because the reg alloc can
826 eliminate this reg->reg move here (it won't eliminate the other one,
827 because the move is into the fixed %ecx).
828 -}
829 shift_code width instr x y{-amount-} = do
830 x_code <- getAnyReg x
831 let format = intFormat width
832 tmp <- getNewRegNat format
833 y_code <- getAnyReg y
834 let
835 code = x_code tmp `appOL`
836 y_code ecx `snocOL`
837 instr format (OpReg ecx) (OpReg tmp)
838 return (Fixed format tmp code)
839
840 --------------------
841 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
842 add_code rep x (CmmLit (CmmInt y _))
843 | is32BitInteger y = add_int rep x y
844 add_code rep x y = trivialCode rep (ADD format) (Just (ADD format)) x y
845 where format = intFormat rep
846 -- TODO: There are other interesting patterns we want to replace
847 -- with a LEA, e.g. `(x + offset) + (y << shift)`.
848
849 --------------------
850 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
851 sub_code rep x (CmmLit (CmmInt y _))
852 | is32BitInteger (-y) = add_int rep x (-y)
853 sub_code rep x y = trivialCode rep (SUB (intFormat rep)) Nothing x y
854
855 -- our three-operand add instruction:
856 add_int width x y = do
857 (x_reg, x_code) <- getSomeReg x
858 let
859 format = intFormat width
860 imm = ImmInt (fromInteger y)
861 code dst
862 = x_code `snocOL`
863 LEA format
864 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
865 (OpReg dst)
866 --
867 return (Any format code)
868
869 ----------------------
870 div_code width signed quotient x y = do
871 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
872 x_code <- getAnyReg x
873 let
874 format = intFormat width
875 widen | signed = CLTD format
876 | otherwise = XOR format (OpReg edx) (OpReg edx)
877
878 instr | signed = IDIV
879 | otherwise = DIV
880
881 code = y_code `appOL`
882 x_code eax `appOL`
883 toOL [widen, instr format y_op]
884
885 result | quotient = eax
886 | otherwise = edx
887
888 return (Fixed format result code)
889
890
891 getRegister' _ _ (CmmLoad mem pk)
892 | isFloatType pk
893 = do
894 Amode addr mem_code <- getAmode mem
895 use_sse2 <- sse2Enabled
896 loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
897
898 getRegister' _ is32Bit (CmmLoad mem pk)
899 | is32Bit && not (isWord64 pk)
900 = do
901 code <- intLoadCode instr mem
902 return (Any format code)
903 where
904 width = typeWidth pk
905 format = intFormat width
906 instr = case width of
907 W8 -> MOVZxL II8
908 _other -> MOV format
909 -- We always zero-extend 8-bit loads, if we
910 -- can't think of anything better. This is because
911 -- we can't guarantee access to an 8-bit variant of every register
912 -- (esi and edi don't have 8-bit variants), so to make things
913 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
914
915 -- Simpler memory load code on x86_64
916 getRegister' _ is32Bit (CmmLoad mem pk)
917 | not is32Bit
918 = do
919 code <- intLoadCode (MOV format) mem
920 return (Any format code)
921 where format = intFormat $ typeWidth pk
922
923 getRegister' _ is32Bit (CmmLit (CmmInt 0 width))
924 = let
925 format = intFormat width
926
927 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
928 format1 = if is32Bit then format
929 else case format of
930 II64 -> II32
931 _ -> format
932 code dst
933 = unitOL (XOR format1 (OpReg dst) (OpReg dst))
934 in
935 return (Any format code)
936
937 -- optimisation for loading small literals on x86_64: take advantage
938 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
939 -- instruction forms are shorter.
940 getRegister' dflags is32Bit (CmmLit lit)
941 | not is32Bit, isWord64 (cmmLitType dflags lit), not (isBigLit lit)
942 = let
943 imm = litToImm lit
944 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
945 in
946 return (Any II64 code)
947 where
948 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
949 isBigLit _ = False
950 -- note1: not the same as (not.is32BitLit), because that checks for
951 -- signed literals that fit in 32 bits, but we want unsigned
952 -- literals here.
953 -- note2: all labels are small, because we're assuming the
954 -- small memory model (see gcc docs, -mcmodel=small).
955
956 getRegister' dflags _ (CmmLit lit)
957 = do let format = cmmTypeFormat (cmmLitType dflags lit)
958 imm = litToImm lit
959 code dst = unitOL (MOV format (OpImm imm) (OpReg dst))
960 return (Any format code)
961
962 getRegister' _ _ other
963 | isVecExpr other = needLlvm
964 | otherwise = pprPanic "getRegister(x86)" (ppr other)
965
966
967 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
968 -> NatM (Reg -> InstrBlock)
969 intLoadCode instr mem = do
970 Amode src mem_code <- getAmode mem
971 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
972
973 -- Compute an expression into *any* register, adding the appropriate
974 -- move instruction if necessary.
975 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
976 getAnyReg expr = do
977 r <- getRegister expr
978 anyReg r
979
980 anyReg :: Register -> NatM (Reg -> InstrBlock)
981 anyReg (Any _ code) = return code
982 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
983
984 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
985 -- Fixed registers might not be byte-addressable, so we make sure we've
986 -- got a temporary, inserting an extra reg copy if necessary.
987 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
988 getByteReg expr = do
989 is32Bit <- is32BitPlatform
990 if is32Bit
991 then do r <- getRegister expr
992 case r of
993 Any rep code -> do
994 tmp <- getNewRegNat rep
995 return (tmp, code tmp)
996 Fixed rep reg code
997 | isVirtualReg reg -> return (reg,code)
998 | otherwise -> do
999 tmp <- getNewRegNat rep
1000 return (tmp, code `snocOL` reg2reg rep reg tmp)
1001 -- ToDo: could optimise slightly by checking for
1002 -- byte-addressable real registers, but that will
1003 -- happen very rarely if at all.
1004 else getSomeReg expr -- all regs are byte-addressable on x86_64
1005
1006 -- Another variant: this time we want the result in a register that cannot
1007 -- be modified by code to evaluate an arbitrary expression.
1008 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1009 getNonClobberedReg expr = do
1010 dflags <- getDynFlags
1011 r <- getRegister expr
1012 case r of
1013 Any rep code -> do
1014 tmp <- getNewRegNat rep
1015 return (tmp, code tmp)
1016 Fixed rep reg code
1017 -- only certain regs can be clobbered
1018 | reg `elem` instrClobberedRegs (targetPlatform dflags)
1019 -> do
1020 tmp <- getNewRegNat rep
1021 return (tmp, code `snocOL` reg2reg rep reg tmp)
1022 | otherwise ->
1023 return (reg, code)
1024
1025 reg2reg :: Format -> Reg -> Reg -> Instr
1026 reg2reg format src dst
1027 | format == FF80 = GMOV src dst
1028 | otherwise = MOV format (OpReg src) (OpReg dst)
1029
1030
1031 --------------------------------------------------------------------------------
1032 getAmode :: CmmExpr -> NatM Amode
1033 getAmode e = do is32Bit <- is32BitPlatform
1034 getAmode' is32Bit e
1035
1036 getAmode' :: Bool -> CmmExpr -> NatM Amode
1037 getAmode' _ (CmmRegOff r n) = do dflags <- getDynFlags
1038 getAmode $ mangleIndexTree dflags r n
1039
1040 getAmode' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1041 CmmLit displacement])
1042 | not is32Bit
1043 = return $ Amode (ripRel (litToImm displacement)) nilOL
1044
1045
1046 -- This is all just ridiculous, since it carefully undoes
1047 -- what mangleIndexTree has just done.
1048 getAmode' is32Bit (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)])
1049 | is32BitLit is32Bit lit
1050 -- ASSERT(rep == II32)???
1051 = do (x_reg, x_code) <- getSomeReg x
1052 let off = ImmInt (-(fromInteger i))
1053 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1054
1055 getAmode' is32Bit (CmmMachOp (MO_Add _rep) [x, CmmLit lit])
1056 | is32BitLit is32Bit lit
1057 -- ASSERT(rep == II32)???
1058 = do (x_reg, x_code) <- getSomeReg x
1059 let off = litToImm lit
1060 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1061
1062 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1063 -- recognised by the next rule.
1064 getAmode' is32Bit (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1065 b@(CmmLit _)])
1066 = getAmode' is32Bit (CmmMachOp (MO_Add rep) [b,a])
1067
1068 -- Matches: (x + offset) + (y << shift)
1069 getAmode' _ (CmmMachOp (MO_Add _) [CmmRegOff x offset,
1070 CmmMachOp (MO_Shl _)
1071 [y, CmmLit (CmmInt shift _)]])
1072 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1073 = x86_complex_amode (CmmReg x) y shift (fromIntegral offset)
1074
1075 getAmode' _ (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _)
1076 [y, CmmLit (CmmInt shift _)]])
1077 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1078 = x86_complex_amode x y shift 0
1079
1080 getAmode' _ (CmmMachOp (MO_Add _)
1081 [x, CmmMachOp (MO_Add _)
1082 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1083 CmmLit (CmmInt offset _)]])
1084 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1085 && is32BitInteger offset
1086 = x86_complex_amode x y shift offset
1087
1088 getAmode' _ (CmmMachOp (MO_Add _) [x,y])
1089 = x86_complex_amode x y 0 0
1090
1091 getAmode' is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1092 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1093
1094 getAmode' _ expr = do
1095 (reg,code) <- getSomeReg expr
1096 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1097
1098 -- | Like 'getAmode', but on 32-bit use simple register addressing
1099 -- (i.e. no index register). This stops us from running out of
1100 -- registers on x86 when using instructions such as cmpxchg, which can
1101 -- use up to three virtual registers and one fixed register.
1102 getSimpleAmode :: DynFlags -> Bool -> CmmExpr -> NatM Amode
1103 getSimpleAmode dflags is32Bit addr
1104 | is32Bit = do
1105 addr_code <- getAnyReg addr
1106 addr_r <- getNewRegNat (intFormat (wordWidth dflags))
1107 let amode = AddrBaseIndex (EABaseReg addr_r) EAIndexNone (ImmInt 0)
1108 return $! Amode amode (addr_code addr_r)
1109 | otherwise = getAmode addr
1110
1111 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1112 x86_complex_amode base index shift offset
1113 = do (x_reg, x_code) <- getNonClobberedReg base
1114 -- x must be in a temp, because it has to stay live over y_code
1115 -- we could compre x_reg and y_reg and do something better here...
1116 (y_reg, y_code) <- getSomeReg index
1117 let
1118 code = x_code `appOL` y_code
1119 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8;
1120 n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")"
1121 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1122 code)
1123
1124
1125
1126
1127 -- -----------------------------------------------------------------------------
1128 -- getOperand: sometimes any operand will do.
1129
1130 -- getNonClobberedOperand: the value of the operand will remain valid across
1131 -- the computation of an arbitrary expression, unless the expression
1132 -- is computed directly into a register which the operand refers to
1133 -- (see trivialCode where this function is used for an example).
1134
1135 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1136 getNonClobberedOperand (CmmLit lit) = do
1137 use_sse2 <- sse2Enabled
1138 if use_sse2 && isSuitableFloatingPointLit lit
1139 then do
1140 let CmmFloat _ w = lit
1141 Amode addr code <- memConstant (widthInBytes w) lit
1142 return (OpAddr addr, code)
1143 else do
1144
1145 is32Bit <- is32BitPlatform
1146 dflags <- getDynFlags
1147 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1148 then return (OpImm (litToImm lit), nilOL)
1149 else getNonClobberedOperand_generic (CmmLit lit)
1150
1151 getNonClobberedOperand (CmmLoad mem pk) = do
1152 is32Bit <- is32BitPlatform
1153 use_sse2 <- sse2Enabled
1154 if (not (isFloatType pk) || use_sse2)
1155 && (if is32Bit then not (isWord64 pk) else True)
1156 then do
1157 dflags <- getDynFlags
1158 let platform = targetPlatform dflags
1159 Amode src mem_code <- getAmode mem
1160 (src',save_code) <-
1161 if (amodeCouldBeClobbered platform src)
1162 then do
1163 tmp <- getNewRegNat (archWordFormat is32Bit)
1164 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1165 unitOL (LEA (archWordFormat is32Bit)
1166 (OpAddr src)
1167 (OpReg tmp)))
1168 else
1169 return (src, nilOL)
1170 return (OpAddr src', mem_code `appOL` save_code)
1171 else do
1172 getNonClobberedOperand_generic (CmmLoad mem pk)
1173
1174 getNonClobberedOperand e = getNonClobberedOperand_generic e
1175
1176 getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1177 getNonClobberedOperand_generic e = do
1178 (reg, code) <- getNonClobberedReg e
1179 return (OpReg reg, code)
1180
1181 amodeCouldBeClobbered :: Platform -> AddrMode -> Bool
1182 amodeCouldBeClobbered platform amode = any (regClobbered platform) (addrModeRegs amode)
1183
1184 regClobbered :: Platform -> Reg -> Bool
1185 regClobbered platform (RegReal (RealRegSingle rr)) = freeReg platform rr
1186 regClobbered _ _ = False
1187
1188 -- getOperand: the operand is not required to remain valid across the
1189 -- computation of an arbitrary expression.
1190 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1191
1192 getOperand (CmmLit lit) = do
1193 use_sse2 <- sse2Enabled
1194 if (use_sse2 && isSuitableFloatingPointLit lit)
1195 then do
1196 let CmmFloat _ w = lit
1197 Amode addr code <- memConstant (widthInBytes w) lit
1198 return (OpAddr addr, code)
1199 else do
1200
1201 is32Bit <- is32BitPlatform
1202 dflags <- getDynFlags
1203 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1204 then return (OpImm (litToImm lit), nilOL)
1205 else getOperand_generic (CmmLit lit)
1206
1207 getOperand (CmmLoad mem pk) = do
1208 is32Bit <- is32BitPlatform
1209 use_sse2 <- sse2Enabled
1210 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1211 then do
1212 Amode src mem_code <- getAmode mem
1213 return (OpAddr src, mem_code)
1214 else
1215 getOperand_generic (CmmLoad mem pk)
1216
1217 getOperand e = getOperand_generic e
1218
1219 getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1220 getOperand_generic e = do
1221 (reg, code) <- getSomeReg e
1222 return (OpReg reg, code)
1223
1224 isOperand :: Bool -> CmmExpr -> Bool
1225 isOperand _ (CmmLoad _ _) = True
1226 isOperand is32Bit (CmmLit lit) = is32BitLit is32Bit lit
1227 || isSuitableFloatingPointLit lit
1228 isOperand _ _ = False
1229
1230 memConstant :: Int -> CmmLit -> NatM Amode
1231 memConstant align lit = do
1232 lbl <- getNewLabelNat
1233 let rosection = Section ReadOnlyData lbl
1234 dflags <- getDynFlags
1235 (addr, addr_code) <- if target32Bit (targetPlatform dflags)
1236 then do dynRef <- cmmMakeDynamicReference
1237 dflags
1238 DataReference
1239 lbl
1240 Amode addr addr_code <- getAmode dynRef
1241 return (addr, addr_code)
1242 else return (ripRel (ImmCLbl lbl), nilOL)
1243 let code =
1244 LDATA rosection (align, Statics lbl [CmmStaticLit lit])
1245 `consOL` addr_code
1246 return (Amode addr code)
1247
1248
1249 loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
1250 loadFloatAmode use_sse2 w addr addr_code = do
1251 let format = floatFormat w
1252 code dst = addr_code `snocOL`
1253 if use_sse2
1254 then MOV format (OpAddr addr) (OpReg dst)
1255 else GLD format addr dst
1256 return (Any (if use_sse2 then format else FF80) code)
1257
1258
1259 -- if we want a floating-point literal as an operand, we can
1260 -- use it directly from memory. However, if the literal is
1261 -- zero, we're better off generating it into a register using
1262 -- xor.
1263 isSuitableFloatingPointLit :: CmmLit -> Bool
1264 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1265 isSuitableFloatingPointLit _ = False
1266
1267 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1268 getRegOrMem e@(CmmLoad mem pk) = do
1269 is32Bit <- is32BitPlatform
1270 use_sse2 <- sse2Enabled
1271 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1272 then do
1273 Amode src mem_code <- getAmode mem
1274 return (OpAddr src, mem_code)
1275 else do
1276 (reg, code) <- getNonClobberedReg e
1277 return (OpReg reg, code)
1278 getRegOrMem e = do
1279 (reg, code) <- getNonClobberedReg e
1280 return (OpReg reg, code)
1281
1282 is32BitLit :: Bool -> CmmLit -> Bool
1283 is32BitLit is32Bit (CmmInt i W64)
1284 | not is32Bit
1285 = -- assume that labels are in the range 0-2^31-1: this assumes the
1286 -- small memory model (see gcc docs, -mcmodel=small).
1287 is32BitInteger i
1288 is32BitLit _ _ = True
1289
1290
1291
1292
1293 -- Set up a condition code for a conditional branch.
1294
1295 getCondCode :: CmmExpr -> NatM CondCode
1296
1297 -- yes, they really do seem to want exactly the same!
1298
1299 getCondCode (CmmMachOp mop [x, y])
1300 =
1301 case mop of
1302 MO_F_Eq W32 -> condFltCode EQQ x y
1303 MO_F_Ne W32 -> condFltCode NE x y
1304 MO_F_Gt W32 -> condFltCode GTT x y
1305 MO_F_Ge W32 -> condFltCode GE x y
1306 MO_F_Lt W32 -> condFltCode LTT x y
1307 MO_F_Le W32 -> condFltCode LE x y
1308
1309 MO_F_Eq W64 -> condFltCode EQQ x y
1310 MO_F_Ne W64 -> condFltCode NE x y
1311 MO_F_Gt W64 -> condFltCode GTT x y
1312 MO_F_Ge W64 -> condFltCode GE x y
1313 MO_F_Lt W64 -> condFltCode LTT x y
1314 MO_F_Le W64 -> condFltCode LE x y
1315
1316 _ -> condIntCode (machOpToCond mop) x y
1317
1318 getCondCode other = pprPanic "getCondCode(2)(x86,x86_64)" (ppr other)
1319
1320 machOpToCond :: MachOp -> Cond
1321 machOpToCond mo = case mo of
1322 MO_Eq _ -> EQQ
1323 MO_Ne _ -> NE
1324 MO_S_Gt _ -> GTT
1325 MO_S_Ge _ -> GE
1326 MO_S_Lt _ -> LTT
1327 MO_S_Le _ -> LE
1328 MO_U_Gt _ -> GU
1329 MO_U_Ge _ -> GEU
1330 MO_U_Lt _ -> LU
1331 MO_U_Le _ -> LEU
1332 _other -> pprPanic "machOpToCond" (pprMachOp mo)
1333
1334
1335 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1336 -- passed back up the tree.
1337
1338 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1339 condIntCode cond x y = do is32Bit <- is32BitPlatform
1340 condIntCode' is32Bit cond x y
1341
1342 condIntCode' :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1343
1344 -- memory vs immediate
1345 condIntCode' is32Bit cond (CmmLoad x pk) (CmmLit lit)
1346 | is32BitLit is32Bit lit = do
1347 Amode x_addr x_code <- getAmode x
1348 let
1349 imm = litToImm lit
1350 code = x_code `snocOL`
1351 CMP (cmmTypeFormat pk) (OpImm imm) (OpAddr x_addr)
1352 --
1353 return (CondCode False cond code)
1354
1355 -- anything vs zero, using a mask
1356 -- TODO: Add some sanity checking!!!!
1357 condIntCode' is32Bit cond (CmmMachOp (MO_And _) [x,o2]) (CmmLit (CmmInt 0 pk))
1358 | (CmmLit lit@(CmmInt mask _)) <- o2, is32BitLit is32Bit lit
1359 = do
1360 (x_reg, x_code) <- getSomeReg x
1361 let
1362 code = x_code `snocOL`
1363 TEST (intFormat pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1364 --
1365 return (CondCode False cond code)
1366
1367 -- anything vs zero
1368 condIntCode' _ cond x (CmmLit (CmmInt 0 pk)) = do
1369 (x_reg, x_code) <- getSomeReg x
1370 let
1371 code = x_code `snocOL`
1372 TEST (intFormat pk) (OpReg x_reg) (OpReg x_reg)
1373 --
1374 return (CondCode False cond code)
1375
1376 -- anything vs operand
1377 condIntCode' is32Bit cond x y
1378 | isOperand is32Bit y = do
1379 dflags <- getDynFlags
1380 (x_reg, x_code) <- getNonClobberedReg x
1381 (y_op, y_code) <- getOperand y
1382 let
1383 code = x_code `appOL` y_code `snocOL`
1384 CMP (cmmTypeFormat (cmmExprType dflags x)) y_op (OpReg x_reg)
1385 return (CondCode False cond code)
1386 -- operand vs. anything: invert the comparison so that we can use a
1387 -- single comparison instruction.
1388 | isOperand is32Bit x
1389 , Just revcond <- maybeFlipCond cond = do
1390 dflags <- getDynFlags
1391 (y_reg, y_code) <- getNonClobberedReg y
1392 (x_op, x_code) <- getOperand x
1393 let
1394 code = y_code `appOL` x_code `snocOL`
1395 CMP (cmmTypeFormat (cmmExprType dflags x)) x_op (OpReg y_reg)
1396 return (CondCode False revcond code)
1397
1398 -- anything vs anything
1399 condIntCode' _ cond x y = do
1400 dflags <- getDynFlags
1401 (y_reg, y_code) <- getNonClobberedReg y
1402 (x_op, x_code) <- getRegOrMem x
1403 let
1404 code = y_code `appOL`
1405 x_code `snocOL`
1406 CMP (cmmTypeFormat (cmmExprType dflags x)) (OpReg y_reg) x_op
1407 return (CondCode False cond code)
1408
1409
1410
1411 --------------------------------------------------------------------------------
1412 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1413
1414 condFltCode cond x y
1415 = if_sse2 condFltCode_sse2 condFltCode_x87
1416 where
1417
1418 condFltCode_x87
1419 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1420 (x_reg, x_code) <- getNonClobberedReg x
1421 (y_reg, y_code) <- getSomeReg y
1422 let
1423 code = x_code `appOL` y_code `snocOL`
1424 GCMP cond x_reg y_reg
1425 -- The GCMP insn does the test and sets the zero flag if comparable
1426 -- and true. Hence we always supply EQQ as the condition to test.
1427 return (CondCode True EQQ code)
1428
1429 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1430 -- an operand, but the right must be a reg. We can probably do better
1431 -- than this general case...
1432 condFltCode_sse2 = do
1433 dflags <- getDynFlags
1434 (x_reg, x_code) <- getNonClobberedReg x
1435 (y_op, y_code) <- getOperand y
1436 let
1437 code = x_code `appOL`
1438 y_code `snocOL`
1439 CMP (floatFormat $ cmmExprWidth dflags x) y_op (OpReg x_reg)
1440 -- NB(1): we need to use the unsigned comparison operators on the
1441 -- result of this comparison.
1442 return (CondCode True (condToUnsigned cond) code)
1443
1444 -- -----------------------------------------------------------------------------
1445 -- Generating assignments
1446
1447 -- Assignments are really at the heart of the whole code generation
1448 -- business. Almost all top-level nodes of any real importance are
1449 -- assignments, which correspond to loads, stores, or register
1450 -- transfers. If we're really lucky, some of the register transfers
1451 -- will go away, because we can use the destination register to
1452 -- complete the code generation for the right hand side. This only
1453 -- fails when the right hand side is forced into a fixed register
1454 -- (e.g. the result of a call).
1455
1456 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1457 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1458
1459 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1460 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1461
1462
1463 -- integer assignment to memory
1464
1465 -- specific case of adding/subtracting an integer to a particular address.
1466 -- ToDo: catch other cases where we can use an operation directly on a memory
1467 -- address.
1468 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1469 CmmLit (CmmInt i _)])
1470 | addr == addr2, pk /= II64 || is32BitInteger i,
1471 Just instr <- check op
1472 = do Amode amode code_addr <- getAmode addr
1473 let code = code_addr `snocOL`
1474 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1475 return code
1476 where
1477 check (MO_Add _) = Just ADD
1478 check (MO_Sub _) = Just SUB
1479 check _ = Nothing
1480 -- ToDo: more?
1481
1482 -- general case
1483 assignMem_IntCode pk addr src = do
1484 is32Bit <- is32BitPlatform
1485 Amode addr code_addr <- getAmode addr
1486 (code_src, op_src) <- get_op_RI is32Bit src
1487 let
1488 code = code_src `appOL`
1489 code_addr `snocOL`
1490 MOV pk op_src (OpAddr addr)
1491 -- NOTE: op_src is stable, so it will still be valid
1492 -- after code_addr. This may involve the introduction
1493 -- of an extra MOV to a temporary register, but we hope
1494 -- the register allocator will get rid of it.
1495 --
1496 return code
1497 where
1498 get_op_RI :: Bool -> CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
1499 get_op_RI is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1500 = return (nilOL, OpImm (litToImm lit))
1501 get_op_RI _ op
1502 = do (reg,code) <- getNonClobberedReg op
1503 return (code, OpReg reg)
1504
1505
1506 -- Assign; dst is a reg, rhs is mem
1507 assignReg_IntCode pk reg (CmmLoad src _) = do
1508 load_code <- intLoadCode (MOV pk) src
1509 dflags <- getDynFlags
1510 let platform = targetPlatform dflags
1511 return (load_code (getRegisterReg platform False{-no sse2-} reg))
1512
1513 -- dst is a reg, but src could be anything
1514 assignReg_IntCode _ reg src = do
1515 dflags <- getDynFlags
1516 let platform = targetPlatform dflags
1517 code <- getAnyReg src
1518 return (code (getRegisterReg platform False{-no sse2-} reg))
1519
1520
1521 -- Floating point assignment to memory
1522 assignMem_FltCode pk addr src = do
1523 (src_reg, src_code) <- getNonClobberedReg src
1524 Amode addr addr_code <- getAmode addr
1525 use_sse2 <- sse2Enabled
1526 let
1527 code = src_code `appOL`
1528 addr_code `snocOL`
1529 if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr)
1530 else GST pk src_reg addr
1531 return code
1532
1533 -- Floating point assignment to a register/temporary
1534 assignReg_FltCode _ reg src = do
1535 use_sse2 <- sse2Enabled
1536 src_code <- getAnyReg src
1537 dflags <- getDynFlags
1538 let platform = targetPlatform dflags
1539 return (src_code (getRegisterReg platform use_sse2 reg))
1540
1541
1542 genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
1543
1544 genJump (CmmLoad mem _) regs = do
1545 Amode target code <- getAmode mem
1546 return (code `snocOL` JMP (OpAddr target) regs)
1547
1548 genJump (CmmLit lit) regs = do
1549 return (unitOL (JMP (OpImm (litToImm lit)) regs))
1550
1551 genJump expr regs = do
1552 (reg,code) <- getSomeReg expr
1553 return (code `snocOL` JMP (OpReg reg) regs)
1554
1555
1556 -- -----------------------------------------------------------------------------
1557 -- Unconditional branches
1558
1559 genBranch :: BlockId -> NatM InstrBlock
1560 genBranch = return . toOL . mkJumpInstr
1561
1562
1563
1564 -- -----------------------------------------------------------------------------
1565 -- Conditional jumps
1566
1567 {-
1568 Conditional jumps are always to local labels, so we can use branch
1569 instructions. We peek at the arguments to decide what kind of
1570 comparison to do.
1571
1572 I386: First, we have to ensure that the condition
1573 codes are set according to the supplied comparison operation.
1574 -}
1575
1576 genCondJump
1577 :: BlockId -- the branch target
1578 -> CmmExpr -- the condition on which to branch
1579 -> NatM InstrBlock
1580
1581 genCondJump id expr = do
1582 is32Bit <- is32BitPlatform
1583 genCondJump' is32Bit id expr
1584
1585 genCondJump' :: Bool -> BlockId -> CmmExpr -> NatM InstrBlock
1586
1587 -- 64-bit integer comparisons on 32-bit
1588 genCondJump' is32Bit true (CmmMachOp mop [e1,e2])
1589 | is32Bit, Just W64 <- maybeIntComparison mop = do
1590 ChildCode64 code1 r1_lo <- iselExpr64 e1
1591 ChildCode64 code2 r2_lo <- iselExpr64 e2
1592 let r1_hi = getHiVRegFromLo r1_lo
1593 r2_hi = getHiVRegFromLo r2_lo
1594 cond = machOpToCond mop
1595 Just cond' = maybeFlipCond cond
1596 false <- getBlockIdNat
1597 return $ code1 `appOL` code2 `appOL` toOL [
1598 CMP II32 (OpReg r2_hi) (OpReg r1_hi),
1599 JXX cond true,
1600 JXX cond' false,
1601 CMP II32 (OpReg r2_lo) (OpReg r1_lo),
1602 JXX cond true,
1603 NEWBLOCK false ]
1604
1605 genCondJump' _ id bool = do
1606 CondCode is_float cond cond_code <- getCondCode bool
1607 use_sse2 <- sse2Enabled
1608 if not is_float || not use_sse2
1609 then
1610 return (cond_code `snocOL` JXX cond id)
1611 else do
1612 lbl <- getBlockIdNat
1613
1614 -- see comment with condFltReg
1615 let code = case cond of
1616 NE -> or_unordered
1617 GU -> plain_test
1618 GEU -> plain_test
1619 _ -> and_ordered
1620
1621 plain_test = unitOL (
1622 JXX cond id
1623 )
1624 or_unordered = toOL [
1625 JXX cond id,
1626 JXX PARITY id
1627 ]
1628 and_ordered = toOL [
1629 JXX PARITY lbl,
1630 JXX cond id,
1631 JXX ALWAYS lbl,
1632 NEWBLOCK lbl
1633 ]
1634 return (cond_code `appOL` code)
1635
1636 -- -----------------------------------------------------------------------------
1637 -- Generating C calls
1638
1639 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1640 -- @get_arg@, which moves the arguments to the correct registers/stack
1641 -- locations. Apart from that, the code is easy.
1642 --
1643 -- (If applicable) Do not fill the delay slots here; you will confuse the
1644 -- register allocator.
1645
1646 genCCall
1647 :: DynFlags
1648 -> Bool -- 32 bit platform?
1649 -> ForeignTarget -- function to call
1650 -> [CmmFormal] -- where to put the result
1651 -> [CmmActual] -- arguments (of mixed type)
1652 -> NatM InstrBlock
1653
1654 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1655
1656 -- Unroll memcpy calls if the source and destination pointers are at
1657 -- least DWORD aligned and the number of bytes to copy isn't too
1658 -- large. Otherwise, call C's memcpy.
1659 genCCall dflags is32Bit (PrimTarget (MO_Memcpy align)) _
1660 [dst, src, CmmLit (CmmInt n _)]
1661 | fromInteger insns <= maxInlineMemcpyInsns dflags && align .&. 3 == 0 = do
1662 code_dst <- getAnyReg dst
1663 dst_r <- getNewRegNat format
1664 code_src <- getAnyReg src
1665 src_r <- getNewRegNat format
1666 tmp_r <- getNewRegNat format
1667 return $ code_dst dst_r `appOL` code_src src_r `appOL`
1668 go dst_r src_r tmp_r (fromInteger n)
1669 where
1670 -- The number of instructions we will generate (approx). We need 2
1671 -- instructions per move.
1672 insns = 2 * ((n + sizeBytes - 1) `div` sizeBytes)
1673
1674 format = if align .&. 4 /= 0 then II32 else (archWordFormat is32Bit)
1675
1676 -- The size of each move, in bytes.
1677 sizeBytes :: Integer
1678 sizeBytes = fromIntegral (formatInBytes format)
1679
1680 go :: Reg -> Reg -> Reg -> Integer -> OrdList Instr
1681 go dst src tmp i
1682 | i >= sizeBytes =
1683 unitOL (MOV format (OpAddr src_addr) (OpReg tmp)) `appOL`
1684 unitOL (MOV format (OpReg tmp) (OpAddr dst_addr)) `appOL`
1685 go dst src tmp (i - sizeBytes)
1686 -- Deal with remaining bytes.
1687 | i >= 4 = -- Will never happen on 32-bit
1688 unitOL (MOV II32 (OpAddr src_addr) (OpReg tmp)) `appOL`
1689 unitOL (MOV II32 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1690 go dst src tmp (i - 4)
1691 | i >= 2 =
1692 unitOL (MOVZxL II16 (OpAddr src_addr) (OpReg tmp)) `appOL`
1693 unitOL (MOV II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1694 go dst src tmp (i - 2)
1695 | i >= 1 =
1696 unitOL (MOVZxL II8 (OpAddr src_addr) (OpReg tmp)) `appOL`
1697 unitOL (MOV II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1698 go dst src tmp (i - 1)
1699 | otherwise = nilOL
1700 where
1701 src_addr = AddrBaseIndex (EABaseReg src) EAIndexNone
1702 (ImmInteger (n - i))
1703 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1704 (ImmInteger (n - i))
1705
1706 genCCall dflags _ (PrimTarget (MO_Memset align)) _
1707 [dst,
1708 CmmLit (CmmInt c _),
1709 CmmLit (CmmInt n _)]
1710 | fromInteger insns <= maxInlineMemsetInsns dflags && align .&. 3 == 0 = do
1711 code_dst <- getAnyReg dst
1712 dst_r <- getNewRegNat format
1713 return $ code_dst dst_r `appOL` go dst_r (fromInteger n)
1714 where
1715 (format, val) = case align .&. 3 of
1716 2 -> (II16, c2)
1717 0 -> (II32, c4)
1718 _ -> (II8, c)
1719 c2 = c `shiftL` 8 .|. c
1720 c4 = c2 `shiftL` 16 .|. c2
1721
1722 -- The number of instructions we will generate (approx). We need 1
1723 -- instructions per move.
1724 insns = (n + sizeBytes - 1) `div` sizeBytes
1725
1726 -- The size of each move, in bytes.
1727 sizeBytes :: Integer
1728 sizeBytes = fromIntegral (formatInBytes format)
1729
1730 go :: Reg -> Integer -> OrdList Instr
1731 go dst i
1732 -- TODO: Add movabs instruction and support 64-bit sets.
1733 | i >= sizeBytes = -- This might be smaller than the below sizes
1734 unitOL (MOV format (OpImm (ImmInteger val)) (OpAddr dst_addr)) `appOL`
1735 go dst (i - sizeBytes)
1736 | i >= 4 = -- Will never happen on 32-bit
1737 unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr dst_addr)) `appOL`
1738 go dst (i - 4)
1739 | i >= 2 =
1740 unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr dst_addr)) `appOL`
1741 go dst (i - 2)
1742 | i >= 1 =
1743 unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr dst_addr)) `appOL`
1744 go dst (i - 1)
1745 | otherwise = nilOL
1746 where
1747 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1748 (ImmInteger (n - i))
1749
1750 genCCall _ _ (PrimTarget MO_WriteBarrier) _ _ = return nilOL
1751 -- write barrier compiles to no code on x86/x86-64;
1752 -- we keep it this long in order to prevent earlier optimisations.
1753
1754 genCCall _ _ (PrimTarget MO_Touch) _ _ = return nilOL
1755
1756 genCCall _ is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] =
1757 case n of
1758 0 -> genPrefetch src $ PREFETCH NTA format
1759 1 -> genPrefetch src $ PREFETCH Lvl2 format
1760 2 -> genPrefetch src $ PREFETCH Lvl1 format
1761 3 -> genPrefetch src $ PREFETCH Lvl0 format
1762 l -> panic $ "unexpected prefetch level in genCCall MO_Prefetch_Data: " ++ (show l)
1763 -- the c / llvm prefetch convention is 0, 1, 2, and 3
1764 -- the x86 corresponding names are : NTA, 2 , 1, and 0
1765 where
1766 format = archWordFormat is32bit
1767 -- need to know what register width for pointers!
1768 genPrefetch inRegSrc prefetchCTor =
1769 do
1770 code_src <- getAnyReg inRegSrc
1771 src_r <- getNewRegNat format
1772 return $ code_src src_r `appOL`
1773 (unitOL (prefetchCTor (OpAddr
1774 ((AddrBaseIndex (EABaseReg src_r ) EAIndexNone (ImmInt 0)))) ))
1775 -- prefetch always takes an address
1776
1777 genCCall dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] = do
1778 let platform = targetPlatform dflags
1779 let dst_r = getRegisterReg platform False (CmmLocal dst)
1780 case width of
1781 W64 | is32Bit -> do
1782 ChildCode64 vcode rlo <- iselExpr64 src
1783 let dst_rhi = getHiVRegFromLo dst_r
1784 rhi = getHiVRegFromLo rlo
1785 return $ vcode `appOL`
1786 toOL [ MOV II32 (OpReg rlo) (OpReg dst_rhi),
1787 MOV II32 (OpReg rhi) (OpReg dst_r),
1788 BSWAP II32 dst_rhi,
1789 BSWAP II32 dst_r ]
1790 W16 -> do code_src <- getAnyReg src
1791 return $ code_src dst_r `appOL`
1792 unitOL (BSWAP II32 dst_r) `appOL`
1793 unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r))
1794 _ -> do code_src <- getAnyReg src
1795 return $ code_src dst_r `appOL` unitOL (BSWAP format dst_r)
1796 where
1797 format = intFormat width
1798
1799 genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
1800 args@[src] = do
1801 sse4_2 <- sse4_2Enabled
1802 let platform = targetPlatform dflags
1803 if sse4_2
1804 then do code_src <- getAnyReg src
1805 src_r <- getNewRegNat format
1806 let dst_r = getRegisterReg platform False (CmmLocal dst)
1807 return $ code_src src_r `appOL`
1808 (if width == W8 then
1809 -- The POPCNT instruction doesn't take a r/m8
1810 unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL`
1811 unitOL (POPCNT II16 (OpReg src_r) dst_r)
1812 else
1813 unitOL (POPCNT format (OpReg src_r) dst_r)) `appOL`
1814 (if width == W8 || width == W16 then
1815 -- We used a 16-bit destination register above,
1816 -- so zero-extend
1817 unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
1818 else nilOL)
1819 else do
1820 targetExpr <- cmmMakeDynamicReference dflags
1821 CallReference lbl
1822 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1823 [NoHint] [NoHint]
1824 CmmMayReturn)
1825 genCCall dflags is32Bit target dest_regs args
1826 where
1827 format = intFormat width
1828 lbl = mkCmmCodeLabel primUnitId (fsLit (popCntLabel width))
1829
1830 genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src]
1831 | is32Bit && width == W64 = do
1832 -- Fallback to `hs_clz64` on i386
1833 targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
1834 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1835 [NoHint] [NoHint]
1836 CmmMayReturn)
1837 genCCall dflags is32Bit target dest_regs args
1838
1839 | otherwise = do
1840 code_src <- getAnyReg src
1841 src_r <- getNewRegNat format
1842 tmp_r <- getNewRegNat format
1843 let dst_r = getRegisterReg platform False (CmmLocal dst)
1844
1845 -- The following insn sequence makes sure 'clz 0' has a defined value.
1846 -- starting with Haswell, one could use the LZCNT insn instead.
1847 return $ code_src src_r `appOL` toOL
1848 ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
1849 [ BSR format (OpReg src_r) tmp_r
1850 , MOV II32 (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
1851 , CMOV NE format (OpReg tmp_r) dst_r
1852 , XOR format (OpImm (ImmInt (bw-1))) (OpReg dst_r)
1853 ]) -- NB: We don't need to zero-extend the result for the
1854 -- W8/W16 cases because the 'MOV' insn already
1855 -- took care of implicitly clearing the upper bits
1856 where
1857 bw = widthInBits width
1858 platform = targetPlatform dflags
1859 format = if width == W8 then II16 else intFormat width
1860 lbl = mkCmmCodeLabel primUnitId (fsLit (clzLabel width))
1861
1862 genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src]
1863 | is32Bit, width == W64 = do
1864 ChildCode64 vcode rlo <- iselExpr64 src
1865 let rhi = getHiVRegFromLo rlo
1866 dst_r = getRegisterReg platform False (CmmLocal dst)
1867 lbl1 <- getBlockIdNat
1868 lbl2 <- getBlockIdNat
1869 tmp_r <- getNewRegNat format
1870
1871 -- The following instruction sequence corresponds to the pseudo-code
1872 --
1873 -- if (src) {
1874 -- dst = src.lo32 ? BSF(src.lo32) : (BSF(src.hi32) + 32);
1875 -- } else {
1876 -- dst = 64;
1877 -- }
1878 return $ vcode `appOL` toOL
1879 ([ MOV II32 (OpReg rhi) (OpReg tmp_r)
1880 , OR II32 (OpReg rlo) (OpReg tmp_r)
1881 , MOV II32 (OpImm (ImmInt 64)) (OpReg dst_r)
1882 , JXX EQQ lbl2
1883 , JXX ALWAYS lbl1
1884
1885 , NEWBLOCK lbl1
1886 , BSF II32 (OpReg rhi) dst_r
1887 , ADD II32 (OpImm (ImmInt 32)) (OpReg dst_r)
1888 , BSF II32 (OpReg rlo) tmp_r
1889 , CMOV NE II32 (OpReg tmp_r) dst_r
1890 , JXX ALWAYS lbl2
1891
1892 , NEWBLOCK lbl2
1893 ])
1894
1895 | otherwise = do
1896 code_src <- getAnyReg src
1897 src_r <- getNewRegNat format
1898 tmp_r <- getNewRegNat format
1899 let dst_r = getRegisterReg platform False (CmmLocal dst)
1900
1901 -- The following insn sequence makes sure 'ctz 0' has a defined value.
1902 -- starting with Haswell, one could use the TZCNT insn instead.
1903 return $ code_src src_r `appOL` toOL
1904 ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
1905 [ BSF format (OpReg src_r) tmp_r
1906 , MOV II32 (OpImm (ImmInt bw)) (OpReg dst_r)
1907 , CMOV NE format (OpReg tmp_r) dst_r
1908 ]) -- NB: We don't need to zero-extend the result for the
1909 -- W8/W16 cases because the 'MOV' insn already
1910 -- took care of implicitly clearing the upper bits
1911 where
1912 bw = widthInBits width
1913 platform = targetPlatform dflags
1914 format = if width == W8 then II16 else intFormat width
1915
1916 genCCall dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args = do
1917 targetExpr <- cmmMakeDynamicReference dflags
1918 CallReference lbl
1919 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1920 [NoHint] [NoHint]
1921 CmmMayReturn)
1922 genCCall dflags is32Bit target dest_regs args
1923 where
1924 lbl = mkCmmCodeLabel primUnitId (fsLit (word2FloatLabel width))
1925
1926 genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = do
1927 Amode amode addr_code <-
1928 if amop `elem` [AMO_Add, AMO_Sub]
1929 then getAmode addr
1930 else getSimpleAmode dflags is32Bit addr -- See genCCall for MO_Cmpxchg
1931 arg <- getNewRegNat format
1932 arg_code <- getAnyReg n
1933 use_sse2 <- sse2Enabled
1934 let platform = targetPlatform dflags
1935 dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
1936 code <- op_code dst_r arg amode
1937 return $ addr_code `appOL` arg_code arg `appOL` code
1938 where
1939 -- Code for the operation
1940 op_code :: Reg -- Destination reg
1941 -> Reg -- Register containing argument
1942 -> AddrMode -- Address of location to mutate
1943 -> NatM (OrdList Instr)
1944 op_code dst_r arg amode = case amop of
1945 -- In the common case where dst_r is a virtual register the
1946 -- final move should go away, because it's the last use of arg
1947 -- and the first use of dst_r.
1948 AMO_Add -> return $ toOL [ LOCK (XADD format (OpReg arg) (OpAddr amode))
1949 , MOV format (OpReg arg) (OpReg dst_r)
1950 ]
1951 AMO_Sub -> return $ toOL [ NEGI format (OpReg arg)
1952 , LOCK (XADD format (OpReg arg) (OpAddr amode))
1953 , MOV format (OpReg arg) (OpReg dst_r)
1954 ]
1955 AMO_And -> cmpxchg_code (\ src dst -> unitOL $ AND format src dst)
1956 AMO_Nand -> cmpxchg_code (\ src dst -> toOL [ AND format src dst
1957 , NOT format dst
1958 ])
1959 AMO_Or -> cmpxchg_code (\ src dst -> unitOL $ OR format src dst)
1960 AMO_Xor -> cmpxchg_code (\ src dst -> unitOL $ XOR format src dst)
1961 where
1962 -- Simulate operation that lacks a dedicated instruction using
1963 -- cmpxchg.
1964 cmpxchg_code :: (Operand -> Operand -> OrdList Instr)
1965 -> NatM (OrdList Instr)
1966 cmpxchg_code instrs = do
1967 lbl <- getBlockIdNat
1968 tmp <- getNewRegNat format
1969 return $ toOL
1970 [ MOV format (OpAddr amode) (OpReg eax)
1971 , JXX ALWAYS lbl
1972 , NEWBLOCK lbl
1973 -- Keep old value so we can return it:
1974 , MOV format (OpReg eax) (OpReg dst_r)
1975 , MOV format (OpReg eax) (OpReg tmp)
1976 ]
1977 `appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL
1978 [ LOCK (CMPXCHG format (OpReg tmp) (OpAddr amode))
1979 , JXX NE lbl
1980 ]
1981
1982 format = intFormat width
1983
1984 genCCall dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] = do
1985 load_code <- intLoadCode (MOV (intFormat width)) addr
1986 let platform = targetPlatform dflags
1987 use_sse2 <- sse2Enabled
1988 return (load_code (getRegisterReg platform use_sse2 (CmmLocal dst)))
1989
1990 genCCall _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
1991 code <- assignMem_IntCode (intFormat width) addr val
1992 return $ code `snocOL` MFENCE
1993
1994 genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = do
1995 -- On x86 we don't have enough registers to use cmpxchg with a
1996 -- complicated addressing mode, so on that architecture we
1997 -- pre-compute the address first.
1998 Amode amode addr_code <- getSimpleAmode dflags is32Bit addr
1999 newval <- getNewRegNat format
2000 newval_code <- getAnyReg new
2001 oldval <- getNewRegNat format
2002 oldval_code <- getAnyReg old
2003 use_sse2 <- sse2Enabled
2004 let platform = targetPlatform dflags
2005 dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
2006 code = toOL
2007 [ MOV format (OpReg oldval) (OpReg eax)
2008 , LOCK (CMPXCHG format (OpReg newval) (OpAddr amode))
2009 , MOV format (OpReg eax) (OpReg dst_r)
2010 ]
2011 return $ addr_code `appOL` newval_code newval `appOL` oldval_code oldval
2012 `appOL` code
2013 where
2014 format = intFormat width
2015
2016 genCCall _ is32Bit target dest_regs args = do
2017 dflags <- getDynFlags
2018 let platform = targetPlatform dflags
2019 case (target, dest_regs) of
2020 -- void return type prim op
2021 (PrimTarget op, []) ->
2022 outOfLineCmmOp op Nothing args
2023 -- we only cope with a single result for foreign calls
2024 (PrimTarget op, [r])
2025 | not is32Bit -> outOfLineCmmOp op (Just r) args
2026 | otherwise -> do
2027 l1 <- getNewLabelNat
2028 l2 <- getNewLabelNat
2029 sse2 <- sse2Enabled
2030 if sse2
2031 then
2032 outOfLineCmmOp op (Just r) args
2033 else case op of
2034 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
2035 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
2036
2037 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
2038 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
2039
2040 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
2041 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
2042
2043 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
2044 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
2045
2046 _other_op -> outOfLineCmmOp op (Just r) args
2047
2048 where
2049 actuallyInlineFloatOp instr format [x]
2050 = do res <- trivialUFCode format (instr format) x
2051 any <- anyReg res
2052 return (any (getRegisterReg platform False (CmmLocal r)))
2053
2054 actuallyInlineFloatOp _ _ args
2055 = panic $ "genCCall.actuallyInlineFloatOp: bad number of arguments! ("
2056 ++ show (length args) ++ ")"
2057
2058 (PrimTarget (MO_S_QuotRem width), _) -> divOp1 platform True width dest_regs args
2059 (PrimTarget (MO_U_QuotRem width), _) -> divOp1 platform False width dest_regs args
2060 (PrimTarget (MO_U_QuotRem2 width), _) -> divOp2 platform False width dest_regs args
2061 (PrimTarget (MO_Add2 width), [res_h, res_l]) ->
2062 case args of
2063 [arg_x, arg_y] ->
2064 do hCode <- getAnyReg (CmmLit (CmmInt 0 width))
2065 let format = intFormat width
2066 lCode <- anyReg =<< trivialCode width (ADD_CC format)
2067 (Just (ADD_CC format)) arg_x arg_y
2068 let reg_l = getRegisterReg platform True (CmmLocal res_l)
2069 reg_h = getRegisterReg platform True (CmmLocal res_h)
2070 code = hCode reg_h `appOL`
2071 lCode reg_l `snocOL`
2072 ADC format (OpImm (ImmInteger 0)) (OpReg reg_h)
2073 return code
2074 _ -> panic "genCCall: Wrong number of arguments/results for add2"
2075 (PrimTarget (MO_SubWordC width), [res_r, res_c]) ->
2076 addSubIntC platform SUB_CC (const Nothing) CARRY width res_r res_c args
2077 (PrimTarget (MO_AddIntC width), [res_r, res_c]) ->
2078 addSubIntC platform ADD_CC (Just . ADD_CC) OFLO width res_r res_c args
2079 (PrimTarget (MO_SubIntC width), [res_r, res_c]) ->
2080 addSubIntC platform SUB_CC (const Nothing) OFLO width res_r res_c args
2081 (PrimTarget (MO_U_Mul2 width), [res_h, res_l]) ->
2082 case args of
2083 [arg_x, arg_y] ->
2084 do (y_reg, y_code) <- getRegOrMem arg_y
2085 x_code <- getAnyReg arg_x
2086 let format = intFormat width
2087 reg_h = getRegisterReg platform True (CmmLocal res_h)
2088 reg_l = getRegisterReg platform True (CmmLocal res_l)
2089 code = y_code `appOL`
2090 x_code rax `appOL`
2091 toOL [MUL2 format y_reg,
2092 MOV format (OpReg rdx) (OpReg reg_h),
2093 MOV format (OpReg rax) (OpReg reg_l)]
2094 return code
2095 _ -> panic "genCCall: Wrong number of arguments/results for mul2"
2096
2097 _ -> if is32Bit
2098 then genCCall32' dflags target dest_regs args
2099 else genCCall64' dflags target dest_regs args
2100
2101 where divOp1 platform signed width results [arg_x, arg_y]
2102 = divOp platform signed width results Nothing arg_x arg_y
2103 divOp1 _ _ _ _ _
2104 = panic "genCCall: Wrong number of arguments for divOp1"
2105 divOp2 platform signed width results [arg_x_high, arg_x_low, arg_y]
2106 = divOp platform signed width results (Just arg_x_high) arg_x_low arg_y
2107 divOp2 _ _ _ _ _
2108 = panic "genCCall: Wrong number of arguments for divOp2"
2109 divOp platform signed width [res_q, res_r]
2110 m_arg_x_high arg_x_low arg_y
2111 = do let format = intFormat width
2112 reg_q = getRegisterReg platform True (CmmLocal res_q)
2113 reg_r = getRegisterReg platform True (CmmLocal res_r)
2114 widen | signed = CLTD format
2115 | otherwise = XOR format (OpReg rdx) (OpReg rdx)
2116 instr | signed = IDIV
2117 | otherwise = DIV
2118 (y_reg, y_code) <- getRegOrMem arg_y
2119 x_low_code <- getAnyReg arg_x_low
2120 x_high_code <- case m_arg_x_high of
2121 Just arg_x_high ->
2122 getAnyReg arg_x_high
2123 Nothing ->
2124 return $ const $ unitOL widen
2125 return $ y_code `appOL`
2126 x_low_code rax `appOL`
2127 x_high_code rdx `appOL`
2128 toOL [instr format y_reg,
2129 MOV format (OpReg rax) (OpReg reg_q),
2130 MOV format (OpReg rdx) (OpReg reg_r)]
2131 divOp _ _ _ _ _ _ _
2132 = panic "genCCall: Wrong number of results for divOp"
2133
2134 addSubIntC platform instr mrevinstr cond width
2135 res_r res_c [arg_x, arg_y]
2136 = do let format = intFormat width
2137 rCode <- anyReg =<< trivialCode width (instr format)
2138 (mrevinstr format) arg_x arg_y
2139 reg_tmp <- getNewRegNat II8
2140 let reg_c = getRegisterReg platform True (CmmLocal res_c)
2141 reg_r = getRegisterReg platform True (CmmLocal res_r)
2142 code = rCode reg_r `snocOL`
2143 SETCC cond (OpReg reg_tmp) `snocOL`
2144 MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c)
2145
2146 return code
2147 addSubIntC _ _ _ _ _ _ _ _
2148 = panic "genCCall: Wrong number of arguments/results for addSubIntC"
2149
2150 genCCall32' :: DynFlags
2151 -> ForeignTarget -- function to call
2152 -> [CmmFormal] -- where to put the result
2153 -> [CmmActual] -- arguments (of mixed type)
2154 -> NatM InstrBlock
2155 genCCall32' dflags target dest_regs args = do
2156 let
2157 prom_args = map (maybePromoteCArg dflags W32) args
2158
2159 -- Align stack to 16n for calls, assuming a starting stack
2160 -- alignment of 16n - word_size on procedure entry. Which we
2161 -- maintiain. See Note [rts/StgCRun.c : Stack Alignment on X86]
2162 sizes = map (arg_size . cmmExprType dflags) (reverse args)
2163 raw_arg_size = sum sizes + wORD_SIZE dflags
2164 arg_pad_size = (roundTo 16 $ raw_arg_size) - raw_arg_size
2165 tot_arg_size = raw_arg_size + arg_pad_size - wORD_SIZE dflags
2166 delta0 <- getDeltaNat
2167 setDeltaNat (delta0 - arg_pad_size)
2168
2169 use_sse2 <- sse2Enabled
2170 push_codes <- mapM (push_arg use_sse2) (reverse prom_args)
2171 delta <- getDeltaNat
2172 MASSERT(delta == delta0 - tot_arg_size)
2173
2174 -- deal with static vs dynamic call targets
2175 (callinsns,cconv) <-
2176 case target of
2177 ForeignTarget (CmmLit (CmmLabel lbl)) conv
2178 -> -- ToDo: stdcall arg sizes
2179 return (unitOL (CALL (Left fn_imm) []), conv)
2180 where fn_imm = ImmCLbl lbl
2181 ForeignTarget expr conv
2182 -> do { (dyn_r, dyn_c) <- getSomeReg expr
2183 ; ASSERT( isWord32 (cmmExprType dflags expr) )
2184 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
2185 PrimTarget _
2186 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
2187 ++ "probably because too many return values."
2188
2189 let push_code
2190 | arg_pad_size /= 0
2191 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2192 DELTA (delta0 - arg_pad_size)]
2193 `appOL` concatOL push_codes
2194 | otherwise
2195 = concatOL push_codes
2196
2197 -- Deallocate parameters after call for ccall;
2198 -- but not for stdcall (callee does it)
2199 --
2200 -- We have to pop any stack padding we added
2201 -- even if we are doing stdcall, though (#5052)
2202 pop_size
2203 | ForeignConvention StdCallConv _ _ _ <- cconv = arg_pad_size
2204 | otherwise = tot_arg_size
2205
2206 call = callinsns `appOL`
2207 toOL (
2208 (if pop_size==0 then [] else
2209 [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)])
2210 ++
2211 [DELTA delta0]
2212 )
2213 setDeltaNat delta0
2214
2215 dflags <- getDynFlags
2216 let platform = targetPlatform dflags
2217
2218 let
2219 -- assign the results, if necessary
2220 assign_code [] = nilOL
2221 assign_code [dest]
2222 | isFloatType ty =
2223 if use_sse2
2224 then let tmp_amode = AddrBaseIndex (EABaseReg esp)
2225 EAIndexNone
2226 (ImmInt 0)
2227 fmt = floatFormat w
2228 in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
2229 DELTA (delta0 - b),
2230 GST fmt fake0 tmp_amode,
2231 MOV fmt (OpAddr tmp_amode) (OpReg r_dest),
2232 ADD II32 (OpImm (ImmInt b)) (OpReg esp),
2233 DELTA delta0]
2234 else unitOL (GMOV fake0 r_dest)
2235 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
2236 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
2237 | otherwise = unitOL (MOV (intFormat w)
2238 (OpReg eax)
2239 (OpReg r_dest))
2240 where
2241 ty = localRegType dest
2242 w = typeWidth ty
2243 b = widthInBytes w
2244 r_dest_hi = getHiVRegFromLo r_dest
2245 r_dest = getRegisterReg platform use_sse2 (CmmLocal dest)
2246 assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
2247
2248 return (push_code `appOL`
2249 call `appOL`
2250 assign_code dest_regs)
2251
2252 where
2253 arg_size :: CmmType -> Int -- Width in bytes
2254 arg_size ty = widthInBytes (typeWidth ty)
2255
2256 roundTo a x | x `mod` a == 0 = x
2257 | otherwise = x + a - (x `mod` a)
2258
2259 push_arg :: Bool -> CmmActual {-current argument-}
2260 -> NatM InstrBlock -- code
2261
2262 push_arg use_sse2 arg -- we don't need the hints on x86
2263 | isWord64 arg_ty = do
2264 ChildCode64 code r_lo <- iselExpr64 arg
2265 delta <- getDeltaNat
2266 setDeltaNat (delta - 8)
2267 let
2268 r_hi = getHiVRegFromLo r_lo
2269 return ( code `appOL`
2270 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
2271 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
2272 DELTA (delta-8)]
2273 )
2274
2275 | isFloatType arg_ty = do
2276 (reg, code) <- getSomeReg arg
2277 delta <- getDeltaNat
2278 setDeltaNat (delta-size)
2279 return (code `appOL`
2280 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
2281 DELTA (delta-size),
2282 let addr = AddrBaseIndex (EABaseReg esp)
2283 EAIndexNone
2284 (ImmInt 0)
2285 format = floatFormat (typeWidth arg_ty)
2286 in
2287 if use_sse2
2288 then MOV format (OpReg reg) (OpAddr addr)
2289 else GST format reg addr
2290 ]
2291 )
2292
2293 | otherwise = do
2294 (operand, code) <- getOperand arg
2295 delta <- getDeltaNat
2296 setDeltaNat (delta-size)
2297 return (code `snocOL`
2298 PUSH II32 operand `snocOL`
2299 DELTA (delta-size))
2300
2301 where
2302 arg_ty = cmmExprType dflags arg
2303 size = arg_size arg_ty -- Byte size
2304
2305 genCCall64' :: DynFlags
2306 -> ForeignTarget -- function to call
2307 -> [CmmFormal] -- where to put the result
2308 -> [CmmActual] -- arguments (of mixed type)
2309 -> NatM InstrBlock
2310 genCCall64' dflags target dest_regs args = do
2311 -- load up the register arguments
2312 let prom_args = map (maybePromoteCArg dflags W32) args
2313
2314 (stack_args, int_regs_used, fp_regs_used, load_args_code, assign_args_code)
2315 <-
2316 if platformOS platform == OSMinGW32
2317 then load_args_win prom_args [] [] (allArgRegs platform) nilOL
2318 else do
2319 (stack_args, aregs, fregs, load_args_code, assign_args_code)
2320 <- load_args prom_args (allIntArgRegs platform)
2321 (allFPArgRegs platform)
2322 nilOL nilOL
2323 let used_regs rs as = reverse (drop (length rs) (reverse as))
2324 fregs_used = used_regs fregs (allFPArgRegs platform)
2325 aregs_used = used_regs aregs (allIntArgRegs platform)
2326 return (stack_args, aregs_used, fregs_used, load_args_code
2327 , assign_args_code)
2328
2329 let
2330 arg_regs_used = int_regs_used ++ fp_regs_used
2331 arg_regs = [eax] ++ arg_regs_used
2332 -- for annotating the call instruction with
2333 sse_regs = length fp_regs_used
2334 arg_stack_slots = if platformOS platform == OSMinGW32
2335 then length stack_args + length (allArgRegs platform)
2336 else length stack_args
2337 tot_arg_size = arg_size * arg_stack_slots
2338
2339
2340 -- Align stack to 16n for calls, assuming a starting stack
2341 -- alignment of 16n - word_size on procedure entry. Which we
2342 -- maintain. See Note [rts/StgCRun.c : Stack Alignment on X86]
2343 (real_size, adjust_rsp) <-
2344 if (tot_arg_size + wORD_SIZE dflags) `rem` 16 == 0
2345 then return (tot_arg_size, nilOL)
2346 else do -- we need to adjust...
2347 delta <- getDeltaNat
2348 setDeltaNat (delta - wORD_SIZE dflags)
2349 return (tot_arg_size + wORD_SIZE dflags, toOL [
2350 SUB II64 (OpImm (ImmInt (wORD_SIZE dflags))) (OpReg rsp),
2351 DELTA (delta - wORD_SIZE dflags) ])
2352
2353 -- push the stack args, right to left
2354 push_code <- push_args (reverse stack_args) nilOL
2355 -- On Win64, we also have to leave stack space for the arguments
2356 -- that we are passing in registers
2357 lss_code <- if platformOS platform == OSMinGW32
2358 then leaveStackSpace (length (allArgRegs platform))
2359 else return nilOL
2360 delta <- getDeltaNat
2361
2362 -- deal with static vs dynamic call targets
2363 (callinsns,_cconv) <-
2364 case target of
2365 ForeignTarget (CmmLit (CmmLabel lbl)) conv
2366 -> -- ToDo: stdcall arg sizes
2367 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
2368 where fn_imm = ImmCLbl lbl
2369 ForeignTarget expr conv
2370 -> do (dyn_r, dyn_c) <- getSomeReg expr
2371 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
2372 PrimTarget _
2373 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
2374 ++ "probably because too many return values."
2375
2376 let
2377 -- The x86_64 ABI requires us to set %al to the number of SSE2
2378 -- registers that contain arguments, if the called routine
2379 -- is a varargs function. We don't know whether it's a
2380 -- varargs function or not, so we have to assume it is.
2381 --
2382 -- It's not safe to omit this assignment, even if the number
2383 -- of SSE2 regs in use is zero. If %al is larger than 8
2384 -- on entry to a varargs function, seg faults ensue.
2385 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
2386
2387 let call = callinsns `appOL`
2388 toOL (
2389 -- Deallocate parameters after call for ccall;
2390 -- stdcall has callee do it, but is not supported on
2391 -- x86_64 target (see #3336)
2392 (if real_size==0 then [] else
2393 [ADD (intFormat (wordWidth dflags)) (OpImm (ImmInt real_size)) (OpReg esp)])
2394 ++
2395 [DELTA (delta + real_size)]
2396 )
2397 setDeltaNat (delta + real_size)
2398
2399 let
2400 -- assign the results, if necessary
2401 assign_code [] = nilOL
2402 assign_code [dest] =
2403 case typeWidth rep of
2404 W32 | isFloatType rep -> unitOL (MOV (floatFormat W32)
2405 (OpReg xmm0)
2406 (OpReg r_dest))
2407 W64 | isFloatType rep -> unitOL (MOV (floatFormat W64)
2408 (OpReg xmm0)
2409 (OpReg r_dest))
2410 _ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest))
2411 where
2412 rep = localRegType dest
2413 r_dest = getRegisterReg platform True (CmmLocal dest)
2414 assign_code _many = panic "genCCall.assign_code many"
2415
2416 return (adjust_rsp `appOL`
2417 push_code `appOL`
2418 load_args_code `appOL`
2419 assign_args_code `appOL`
2420 lss_code `appOL`
2421 assign_eax sse_regs `appOL`
2422 call `appOL`
2423 assign_code dest_regs)
2424
2425 where platform = targetPlatform dflags
2426 arg_size = 8 -- always, at the mo
2427
2428
2429 load_args :: [CmmExpr]
2430 -> [Reg] -- int regs avail for args
2431 -> [Reg] -- FP regs avail for args
2432 -> InstrBlock -- code computing args
2433 -> InstrBlock -- code assigning args to ABI regs
2434 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock,InstrBlock)
2435 -- no more regs to use
2436 load_args args [] [] code acode =
2437 return (args, [], [], code, acode)
2438
2439 -- no more args to push
2440 load_args [] aregs fregs code acode =
2441 return ([], aregs, fregs, code, acode)
2442
2443 load_args (arg : rest) aregs fregs code acode
2444 | isFloatType arg_rep = case fregs of
2445 [] -> push_this_arg
2446 (r:rs) -> do
2447 (code',acode') <- reg_this_arg r
2448 load_args rest aregs rs code' acode'
2449 | otherwise = case aregs of
2450 [] -> push_this_arg
2451 (r:rs) -> do
2452 (code',acode') <- reg_this_arg r
2453 load_args rest rs fregs code' acode'
2454 where
2455
2456 -- put arg into the list of stack pushed args
2457 push_this_arg = do
2458 (args',ars,frs,code',acode')
2459 <- load_args rest aregs fregs code acode
2460 return (arg:args', ars, frs, code', acode')
2461
2462 -- pass the arg into the given register
2463 reg_this_arg r
2464 -- "operand" args can be directly assigned into r
2465 | isOperand False arg = do
2466 arg_code <- getAnyReg arg
2467 return (code, (acode `appOL` arg_code r))
2468 -- The last non-operand arg can be directly assigned after its
2469 -- computation without going into a temporary register
2470 | all (isOperand False) rest = do
2471 arg_code <- getAnyReg arg
2472 return (code `appOL` arg_code r,acode)
2473
2474 -- other args need to be computed beforehand to avoid clobbering
2475 -- previously assigned registers used to pass parameters (see
2476 -- #11792, #12614). They are assigned into temporary registers
2477 -- and get assigned to proper call ABI registers after they all
2478 -- have been computed.
2479 | otherwise = do
2480 arg_code <- getAnyReg arg
2481 tmp <- getNewRegNat arg_fmt
2482 let
2483 code' = code `appOL` arg_code tmp
2484 acode' = acode `snocOL` reg2reg arg_fmt tmp r
2485 return (code',acode')
2486
2487 arg_rep = cmmExprType dflags arg
2488 arg_fmt = cmmTypeFormat arg_rep
2489
2490 load_args_win :: [CmmExpr]
2491 -> [Reg] -- used int regs
2492 -> [Reg] -- used FP regs
2493 -> [(Reg, Reg)] -- (int, FP) regs avail for args
2494 -> InstrBlock
2495 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock,InstrBlock)
2496 load_args_win args usedInt usedFP [] code
2497 = return (args, usedInt, usedFP, code, nilOL)
2498 -- no more regs to use
2499 load_args_win [] usedInt usedFP _ code
2500 = return ([], usedInt, usedFP, code, nilOL)
2501 -- no more args to push
2502 load_args_win (arg : rest) usedInt usedFP
2503 ((ireg, freg) : regs) code
2504 | isFloatType arg_rep = do
2505 arg_code <- getAnyReg arg
2506 load_args_win rest (ireg : usedInt) (freg : usedFP) regs
2507 (code `appOL`
2508 arg_code freg `snocOL`
2509 -- If we are calling a varargs function
2510 -- then we need to define ireg as well
2511 -- as freg
2512 MOV II64 (OpReg freg) (OpReg ireg))
2513 | otherwise = do
2514 arg_code <- getAnyReg arg
2515 load_args_win rest (ireg : usedInt) usedFP regs
2516 (code `appOL` arg_code ireg)
2517 where
2518 arg_rep = cmmExprType dflags arg
2519
2520 push_args [] code = return code
2521 push_args (arg:rest) code
2522 | isFloatType arg_rep = do
2523 (arg_reg, arg_code) <- getSomeReg arg
2524 delta <- getDeltaNat
2525 setDeltaNat (delta-arg_size)
2526 let code' = code `appOL` arg_code `appOL` toOL [
2527 SUB (intFormat (wordWidth dflags)) (OpImm (ImmInt arg_size)) (OpReg rsp),
2528 DELTA (delta-arg_size),
2529 MOV (floatFormat width) (OpReg arg_reg) (OpAddr (spRel dflags 0))]
2530 push_args rest code'
2531
2532 | otherwise = do
2533 ASSERT(width == W64) return ()
2534 (arg_op, arg_code) <- getOperand arg
2535 delta <- getDeltaNat
2536 setDeltaNat (delta-arg_size)
2537 let code' = code `appOL` arg_code `appOL` toOL [
2538 PUSH II64 arg_op,
2539 DELTA (delta-arg_size)]
2540 push_args rest code'
2541 where
2542 arg_rep = cmmExprType dflags arg
2543 width = typeWidth arg_rep
2544
2545 leaveStackSpace n = do
2546 delta <- getDeltaNat
2547 setDeltaNat (delta - n * arg_size)
2548 return $ toOL [
2549 SUB II64 (OpImm (ImmInt (n * wORD_SIZE dflags))) (OpReg rsp),
2550 DELTA (delta - n * arg_size)]
2551
2552 maybePromoteCArg :: DynFlags -> Width -> CmmExpr -> CmmExpr
2553 maybePromoteCArg dflags wto arg
2554 | wfrom < wto = CmmMachOp (MO_UU_Conv wfrom wto) [arg]
2555 | otherwise = arg
2556 where
2557 wfrom = cmmExprWidth dflags arg
2558
2559 outOfLineCmmOp :: CallishMachOp -> Maybe CmmFormal -> [CmmActual] -> NatM InstrBlock
2560 outOfLineCmmOp mop res args
2561 = do
2562 dflags <- getDynFlags
2563 targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
2564 let target = ForeignTarget targetExpr
2565 (ForeignConvention CCallConv [] [] CmmMayReturn)
2566
2567 stmtToInstrs (CmmUnsafeForeignCall target (catMaybes [res]) args)
2568 where
2569 -- Assume we can call these functions directly, and that they're not in a dynamic library.
2570 -- TODO: Why is this ok? Under linux this code will be in libm.so
2571 -- Is is because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31
2572 lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
2573
2574 fn = case mop of
2575 MO_F32_Sqrt -> fsLit "sqrtf"
2576 MO_F32_Sin -> fsLit "sinf"
2577 MO_F32_Cos -> fsLit "cosf"
2578 MO_F32_Tan -> fsLit "tanf"
2579 MO_F32_Exp -> fsLit "expf"
2580 MO_F32_Log -> fsLit "logf"
2581
2582 MO_F32_Asin -> fsLit "asinf"
2583 MO_F32_Acos -> fsLit "acosf"
2584 MO_F32_Atan -> fsLit "atanf"
2585
2586 MO_F32_Sinh -> fsLit "sinhf"
2587 MO_F32_Cosh -> fsLit "coshf"
2588 MO_F32_Tanh -> fsLit "tanhf"
2589 MO_F32_Pwr -> fsLit "powf"
2590
2591 MO_F64_Sqrt -> fsLit "sqrt"
2592 MO_F64_Sin -> fsLit "sin"
2593 MO_F64_Cos -> fsLit "cos"
2594 MO_F64_Tan -> fsLit "tan"
2595 MO_F64_Exp -> fsLit "exp"
2596 MO_F64_Log -> fsLit "log"
2597
2598 MO_F64_Asin -> fsLit "asin"
2599 MO_F64_Acos -> fsLit "acos"
2600 MO_F64_Atan -> fsLit "atan"
2601
2602 MO_F64_Sinh -> fsLit "sinh"
2603 MO_F64_Cosh -> fsLit "cosh"
2604 MO_F64_Tanh -> fsLit "tanh"
2605 MO_F64_Pwr -> fsLit "pow"
2606
2607 MO_Memcpy _ -> fsLit "memcpy"
2608 MO_Memset _ -> fsLit "memset"
2609 MO_Memmove _ -> fsLit "memmove"
2610
2611 MO_PopCnt _ -> fsLit "popcnt"
2612 MO_BSwap _ -> fsLit "bswap"
2613 MO_Clz w -> fsLit $ clzLabel w
2614 MO_Ctz _ -> unsupported
2615
2616 MO_AtomicRMW _ _ -> fsLit "atomicrmw"
2617 MO_AtomicRead _ -> fsLit "atomicread"
2618 MO_AtomicWrite _ -> fsLit "atomicwrite"
2619 MO_Cmpxchg _ -> fsLit "cmpxchg"
2620
2621 MO_UF_Conv _ -> unsupported
2622
2623 MO_S_QuotRem {} -> unsupported
2624 MO_U_QuotRem {} -> unsupported
2625 MO_U_QuotRem2 {} -> unsupported
2626 MO_Add2 {} -> unsupported
2627 MO_AddIntC {} -> unsupported
2628 MO_SubIntC {} -> unsupported
2629 MO_SubWordC {} -> unsupported
2630 MO_U_Mul2 {} -> unsupported
2631 MO_WriteBarrier -> unsupported
2632 MO_Touch -> unsupported
2633 (MO_Prefetch_Data _ ) -> unsupported
2634 unsupported = panic ("outOfLineCmmOp: " ++ show mop
2635 ++ " not supported here")
2636
2637 -- -----------------------------------------------------------------------------
2638 -- Generating a table-branch
2639
2640 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
2641
2642 genSwitch dflags expr targets
2643 | gopt Opt_PIC dflags
2644 = do
2645 (reg,e_code) <- getNonClobberedReg (cmmOffset dflags expr offset)
2646 -- getNonClobberedReg because it needs to survive across t_code
2647 lbl <- getNewLabelNat
2648 dflags <- getDynFlags
2649 let is32bit = target32Bit (targetPlatform dflags)
2650 os = platformOS (targetPlatform dflags)
2651 -- Might want to use .rodata.<function we're in> instead, but as
2652 -- long as it's something unique it'll work out since the
2653 -- references to the jump table are in the appropriate section.
2654 rosection = case os of
2655 -- on Mac OS X/x86_64, put the jump table in the text section to
2656 -- work around a limitation of the linker.
2657 -- ld64 is unable to handle the relocations for
2658 -- .quad L1 - L0
2659 -- if L0 is not preceded by a non-anonymous label in its section.
2660 OSDarwin | not is32bit -> Section Text lbl
2661 _ -> Section ReadOnlyData lbl
2662 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2663 (tableReg,t_code) <- getSomeReg $ dynRef
2664 let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
2665 (EAIndex reg (wORD_SIZE dflags)) (ImmInt 0))
2666
2667 offsetReg <- getNewRegNat (intFormat (wordWidth dflags))
2668 return $ if is32bit || os == OSDarwin
2669 then e_code `appOL` t_code `appOL` toOL [
2670 ADD (intFormat (wordWidth dflags)) op (OpReg tableReg),
2671 JMP_TBL (OpReg tableReg) ids rosection lbl
2672 ]
2673 else -- HACK: On x86_64 binutils<2.17 is only able to generate
2674 -- PC32 relocations, hence we only get 32-bit offsets in
2675 -- the jump table. As these offsets are always negative
2676 -- we need to properly sign extend them to 64-bit. This
2677 -- hack should be removed in conjunction with the hack in
2678 -- PprMach.hs/pprDataItem once binutils 2.17 is standard.
2679 e_code `appOL` t_code `appOL` toOL [
2680 MOVSxL II32 op (OpReg offsetReg),
2681 ADD (intFormat (wordWidth dflags))
2682 (OpReg offsetReg)
2683 (OpReg tableReg),
2684 JMP_TBL (OpReg tableReg) ids rosection lbl
2685 ]
2686 | otherwise
2687 = do
2688 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2689 lbl <- getNewLabelNat
2690 let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg (wORD_SIZE dflags)) (ImmCLbl lbl))
2691 code = e_code `appOL` toOL [
2692 JMP_TBL op ids (Section ReadOnlyData lbl) lbl
2693 ]
2694 return code
2695 where (offset, ids) = switchTargetsToTable targets
2696
2697 generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl (Alignment, CmmStatics) Instr)
2698 generateJumpTableForInstr dflags (JMP_TBL _ ids section lbl)
2699 = Just (createJumpTable dflags ids section lbl)
2700 generateJumpTableForInstr _ _ = Nothing
2701
2702 createJumpTable :: DynFlags -> [Maybe BlockId] -> Section -> CLabel
2703 -> GenCmmDecl (Alignment, CmmStatics) h g
2704 createJumpTable dflags ids section lbl
2705 = let jumpTable
2706 | gopt Opt_PIC dflags =
2707 let jumpTableEntryRel Nothing
2708 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
2709 jumpTableEntryRel (Just blockid)
2710 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
2711 where blockLabel = mkAsmTempLabel (getUnique blockid)
2712 in map jumpTableEntryRel ids
2713 | otherwise = map (jumpTableEntry dflags) ids
2714 in CmmData section (1, Statics lbl jumpTable)
2715
2716 -- -----------------------------------------------------------------------------
2717 -- 'condIntReg' and 'condFltReg': condition codes into registers
2718
2719 -- Turn those condition codes into integers now (when they appear on
2720 -- the right hand side of an assignment).
2721 --
2722 -- (If applicable) Do not fill the delay slots here; you will confuse the
2723 -- register allocator.
2724
2725 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2726
2727 condIntReg cond x y = do
2728 CondCode _ cond cond_code <- condIntCode cond x y
2729 tmp <- getNewRegNat II8
2730 let
2731 code dst = cond_code `appOL` toOL [
2732 SETCC cond (OpReg tmp),
2733 MOVZxL II8 (OpReg tmp) (OpReg dst)
2734 ]
2735 return (Any II32 code)
2736
2737
2738
2739 condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
2740 condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
2741 where
2742 condFltReg_x87 = do
2743 CondCode _ cond cond_code <- condFltCode cond x y
2744 tmp <- getNewRegNat II8
2745 let
2746 code dst = cond_code `appOL` toOL [
2747 SETCC cond (OpReg tmp),
2748 MOVZxL II8 (OpReg tmp) (OpReg dst)
2749 ]
2750 return (Any II32 code)
2751
2752 condFltReg_sse2 = do
2753 CondCode _ cond cond_code <- condFltCode cond x y
2754 tmp1 <- getNewRegNat (archWordFormat is32Bit)
2755 tmp2 <- getNewRegNat (archWordFormat is32Bit)
2756 let
2757 -- We have to worry about unordered operands (eg. comparisons
2758 -- against NaN). If the operands are unordered, the comparison
2759 -- sets the parity flag, carry flag and zero flag.
2760 -- All comparisons are supposed to return false for unordered
2761 -- operands except for !=, which returns true.
2762 --
2763 -- Optimisation: we don't have to test the parity flag if we
2764 -- know the test has already excluded the unordered case: eg >
2765 -- and >= test for a zero carry flag, which can only occur for
2766 -- ordered operands.
2767 --
2768 -- ToDo: by reversing comparisons we could avoid testing the
2769 -- parity flag in more cases.
2770
2771 code dst =
2772 cond_code `appOL`
2773 (case cond of
2774 NE -> or_unordered dst
2775 GU -> plain_test dst
2776 GEU -> plain_test dst
2777 _ -> and_ordered dst)
2778
2779 plain_test dst = toOL [
2780 SETCC cond (OpReg tmp1),
2781 MOVZxL II8 (OpReg tmp1) (OpReg dst)
2782 ]
2783 or_unordered dst = toOL [
2784 SETCC cond (OpReg tmp1),
2785 SETCC PARITY (OpReg tmp2),
2786 OR II8 (OpReg tmp1) (OpReg tmp2),
2787 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2788 ]
2789 and_ordered dst = toOL [
2790 SETCC cond (OpReg tmp1),
2791 SETCC NOTPARITY (OpReg tmp2),
2792 AND II8 (OpReg tmp1) (OpReg tmp2),
2793 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2794 ]
2795 return (Any II32 code)
2796
2797
2798 -- -----------------------------------------------------------------------------
2799 -- 'trivial*Code': deal with trivial instructions
2800
2801 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2802 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2803 -- Only look for constants on the right hand side, because that's
2804 -- where the generic optimizer will have put them.
2805
2806 -- Similarly, for unary instructions, we don't have to worry about
2807 -- matching an StInt as the argument, because genericOpt will already
2808 -- have handled the constant-folding.
2809
2810
2811 {-
2812 The Rules of the Game are:
2813
2814 * You cannot assume anything about the destination register dst;
2815 it may be anything, including a fixed reg.
2816
2817 * You may compute an operand into a fixed reg, but you may not
2818 subsequently change the contents of that fixed reg. If you
2819 want to do so, first copy the value either to a temporary
2820 or into dst. You are free to modify dst even if it happens
2821 to be a fixed reg -- that's not your problem.
2822
2823 * You cannot assume that a fixed reg will stay live over an
2824 arbitrary computation. The same applies to the dst reg.
2825
2826 * Temporary regs obtained from getNewRegNat are distinct from
2827 each other and from all other regs, and stay live over
2828 arbitrary computations.
2829
2830 --------------------
2831
2832 SDM's version of The Rules:
2833
2834 * If getRegister returns Any, that means it can generate correct
2835 code which places the result in any register, period. Even if that
2836 register happens to be read during the computation.
2837
2838 Corollary #1: this means that if you are generating code for an
2839 operation with two arbitrary operands, you cannot assign the result
2840 of the first operand into the destination register before computing
2841 the second operand. The second operand might require the old value
2842 of the destination register.
2843
2844 Corollary #2: A function might be able to generate more efficient
2845 code if it knows the destination register is a new temporary (and
2846 therefore not read by any of the sub-computations).
2847
2848 * If getRegister returns Any, then the code it generates may modify only:
2849 (a) fresh temporaries
2850 (b) the destination register
2851 (c) known registers (eg. %ecx is used by shifts)
2852 In particular, it may *not* modify global registers, unless the global
2853 register happens to be the destination register.
2854 -}
2855
2856 trivialCode :: Width -> (Operand -> Operand -> Instr)
2857 -> Maybe (Operand -> Operand -> Instr)
2858 -> CmmExpr -> CmmExpr -> NatM Register
2859 trivialCode width instr m a b
2860 = do is32Bit <- is32BitPlatform
2861 trivialCode' is32Bit width instr m a b
2862
2863 trivialCode' :: Bool -> Width -> (Operand -> Operand -> Instr)
2864 -> Maybe (Operand -> Operand -> Instr)
2865 -> CmmExpr -> CmmExpr -> NatM Register
2866 trivialCode' is32Bit width _ (Just revinstr) (CmmLit lit_a) b
2867 | is32BitLit is32Bit lit_a = do
2868 b_code <- getAnyReg b
2869 let
2870 code dst
2871 = b_code dst `snocOL`
2872 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
2873 return (Any (intFormat width) code)
2874
2875 trivialCode' _ width instr _ a b
2876 = genTrivialCode (intFormat width) instr a b
2877
2878 -- This is re-used for floating pt instructions too.
2879 genTrivialCode :: Format -> (Operand -> Operand -> Instr)
2880 -> CmmExpr -> CmmExpr -> NatM Register
2881 genTrivialCode rep instr a b = do
2882 (b_op, b_code) <- getNonClobberedOperand b
2883 a_code <- getAnyReg a
2884 tmp <- getNewRegNat rep
2885 let
2886 -- We want the value of b to stay alive across the computation of a.
2887 -- But, we want to calculate a straight into the destination register,
2888 -- because the instruction only has two operands (dst := dst `op` src).
2889 -- The troublesome case is when the result of b is in the same register
2890 -- as the destination reg. In this case, we have to save b in a
2891 -- new temporary across the computation of a.
2892 code dst
2893 | dst `regClashesWithOp` b_op =
2894 b_code `appOL`
2895 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
2896 a_code dst `snocOL`
2897 instr (OpReg tmp) (OpReg dst)
2898 | otherwise =
2899 b_code `appOL`
2900 a_code dst `snocOL`
2901 instr b_op (OpReg dst)
2902 return (Any rep code)
2903
2904 regClashesWithOp :: Reg -> Operand -> Bool
2905 reg `regClashesWithOp` OpReg reg2 = reg == reg2
2906 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
2907 _ `regClashesWithOp` _ = False
2908
2909 -----------
2910
2911 trivialUCode :: Format -> (Operand -> Instr)
2912 -> CmmExpr -> NatM Register
2913 trivialUCode rep instr x = do
2914 x_code <- getAnyReg x
2915 let
2916 code dst =
2917 x_code dst `snocOL`
2918 instr (OpReg dst)
2919 return (Any rep code)
2920
2921 -----------
2922
2923 trivialFCode_x87 :: (Format -> Reg -> Reg -> Reg -> Instr)
2924 -> CmmExpr -> CmmExpr -> NatM Register
2925 trivialFCode_x87 instr x y = do
2926 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
2927 (y_reg, y_code) <- getSomeReg y
2928 let
2929 format = FF80 -- always, on x87
2930 code dst =
2931 x_code `appOL`
2932 y_code `snocOL`
2933 instr format x_reg y_reg dst
2934 return (Any format code)
2935
2936 trivialFCode_sse2 :: Width -> (Format -> Operand -> Operand -> Instr)
2937 -> CmmExpr -> CmmExpr -> NatM Register
2938 trivialFCode_sse2 pk instr x y
2939 = genTrivialCode format (instr format) x y
2940 where format = floatFormat pk
2941
2942
2943 trivialUFCode :: Format -> (Reg -> Reg -> Instr) -> CmmExpr -> NatM Register
2944 trivialUFCode format instr x = do
2945 (x_reg, x_code) <- getSomeReg x
2946 let
2947 code dst =
2948 x_code `snocOL`
2949 instr x_reg dst
2950 return (Any format code)
2951
2952
2953 --------------------------------------------------------------------------------
2954 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2955 coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
2956 where
2957 coerce_x87 = do
2958 (x_reg, x_code) <- getSomeReg x
2959 let
2960 opc = case to of W32 -> GITOF; W64 -> GITOD;
2961 n -> panic $ "coerceInt2FP.x87: unhandled width ("
2962 ++ show n ++ ")"
2963 code dst = x_code `snocOL` opc x_reg dst
2964 -- ToDo: works for non-II32 reps?
2965 return (Any FF80 code)
2966
2967 coerce_sse2 = do
2968 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2969 let
2970 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
2971 n -> panic $ "coerceInt2FP.sse: unhandled width ("
2972 ++ show n ++ ")"
2973 code dst = x_code `snocOL` opc (intFormat from) x_op dst
2974 return (Any (floatFormat to) code)
2975 -- works even if the destination rep is <II32
2976
2977 --------------------------------------------------------------------------------
2978 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2979 coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
2980 where
2981 coerceFP2Int_x87 = do
2982 (x_reg, x_code) <- getSomeReg x
2983 let
2984 opc = case from of W32 -> GFTOI; W64 -> GDTOI
2985 n -> panic $ "coerceFP2Int.x87: unhandled width ("
2986 ++ show n ++ ")"
2987 code dst = x_code `snocOL` opc x_reg dst
2988 -- ToDo: works for non-II32 reps?
2989 return (Any (intFormat to) code)
2990
2991 coerceFP2Int_sse2 = do
2992 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2993 let
2994 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ;
2995 n -> panic $ "coerceFP2Init.sse: unhandled width ("
2996 ++ show n ++ ")"
2997 code dst = x_code `snocOL` opc (intFormat to) x_op dst
2998 return (Any (intFormat to) code)
2999 -- works even if the destination rep is <II32
3000
3001
3002 --------------------------------------------------------------------------------
3003 coerceFP2FP :: Width -> CmmExpr -> NatM Register
3004 coerceFP2FP to x = do
3005 use_sse2 <- sse2Enabled
3006 (x_reg, x_code) <- getSomeReg x
3007 let
3008 opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
3009 n -> panic $ "coerceFP2FP: unhandled width ("
3010 ++ show n ++ ")"
3011 | otherwise = GDTOF
3012 code dst = x_code `snocOL` opc x_reg dst
3013 return (Any (if use_sse2 then floatFormat to else FF80) code)
3014
3015 --------------------------------------------------------------------------------
3016
3017 sse2NegCode :: Width -> CmmExpr -> NatM Register
3018 sse2NegCode w x = do
3019 let fmt = floatFormat w
3020 x_code <- getAnyReg x
3021 -- This is how gcc does it, so it can't be that bad:
3022 let
3023 const | FF32 <- fmt = CmmInt 0x80000000 W32
3024 | otherwise = CmmInt 0x8000000000000000 W64
3025 Amode amode amode_code <- memConstant (widthInBytes w) const
3026 tmp <- getNewRegNat fmt
3027 let
3028 code dst = x_code dst `appOL` amode_code `appOL` toOL [
3029 MOV fmt (OpAddr amode) (OpReg tmp),
3030 XOR fmt (OpReg tmp) (OpReg dst)
3031 ]
3032 --
3033 return (Any fmt code)
3034
3035 isVecExpr :: CmmExpr -> Bool
3036 isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True
3037 isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True
3038 isVecExpr (CmmMachOp (MO_V_Add {}) _) = True
3039 isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True
3040 isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True
3041 isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True
3042 isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True
3043 isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True
3044 isVecExpr (CmmMachOp (MO_VF_Insert {}) _) = True
3045 isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True
3046 isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
3047 isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True
3048 isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True
3049 isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True
3050 isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True
3051 isVecExpr (CmmMachOp _ [e]) = isVecExpr e
3052 isVecExpr _ = False
3053
3054 needLlvm :: NatM a
3055 needLlvm =
3056 sorry $ unlines ["The native code generator does not support vector"
3057 ,"instructions. Please use -fllvm."]