Delete FastBool
[ghc.git] / compiler / nativeGen / X86 / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2004
8 --
9 -----------------------------------------------------------------------------
10
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, and (b) the type signatures, the
13 -- structure should not be too overwhelming.
14
15 module X86.CodeGen (
16 cmmTopCodeGen,
17 generateJumpTableForInstr,
18 InstrBlock
19 )
20
21 where
22
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
25 #include "../includes/MachDeps.h"
26
27 -- NCG stuff:
28 import X86.Instr
29 import X86.Cond
30 import X86.Regs
31 import X86.RegInfo
32 import CodeGen.Platform
33 import CPrim
34 import Debug ( DebugBlock(..) )
35 import Instruction
36 import PIC
37 import NCGMonad
38 import Format
39 import Reg
40 import Platform
41
42 -- Our intermediate code:
43 import BasicTypes
44 import BlockId
45 import Module ( primPackageKey )
46 import PprCmm ()
47 import CmmUtils
48 import CmmSwitch
49 import Cmm
50 import Hoopl
51 import CLabel
52 import CoreSyn ( Tickish(..) )
53 import SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
54
55 -- The rest:
56 import ForeignCall ( CCallConv(..) )
57 import OrdList
58 import Outputable
59 import Unique
60 import FastString
61 import DynFlags
62 import Util
63
64 import Control.Monad
65 import Data.Bits
66 import Data.Int
67 import Data.Maybe
68 import Data.Word
69
70 is32BitPlatform :: NatM Bool
71 is32BitPlatform = do
72 dflags <- getDynFlags
73 return $ target32Bit (targetPlatform dflags)
74
75 sse2Enabled :: NatM Bool
76 sse2Enabled = do
77 dflags <- getDynFlags
78 return (isSse2Enabled dflags)
79
80 sse4_2Enabled :: NatM Bool
81 sse4_2Enabled = do
82 dflags <- getDynFlags
83 return (isSse4_2Enabled dflags)
84
85 if_sse2 :: NatM a -> NatM a -> NatM a
86 if_sse2 sse2 x87 = do
87 b <- sse2Enabled
88 if b then sse2 else x87
89
90 cmmTopCodeGen
91 :: RawCmmDecl
92 -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
93
94 cmmTopCodeGen (CmmProc info lab live graph) = do
95 let blocks = toBlockListEntryFirst graph
96 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
97 picBaseMb <- getPicBaseMaybeNat
98 dflags <- getDynFlags
99 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
100 tops = proc : concat statics
101 os = platformOS $ targetPlatform dflags
102
103 case picBaseMb of
104 Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
105 Nothing -> return tops
106
107 cmmTopCodeGen (CmmData sec dat) = do
108 return [CmmData sec (1, dat)] -- no translation, we just use CmmStatic
109
110
111 basicBlockCodeGen
112 :: CmmBlock
113 -> NatM ( [NatBasicBlock Instr]
114 , [NatCmmDecl (Alignment, CmmStatics) Instr])
115
116 basicBlockCodeGen block = do
117 let (_, nodes, tail) = blockSplit block
118 id = entryLabel block
119 stmts = blockToList nodes
120 -- Generate location directive
121 dbg <- getDebugBlock (entryLabel block)
122 loc_instrs <- case dblSourceTick =<< dbg of
123 Just (SourceNote span name)
124 -> do fileId <- getFileId (srcSpanFile span)
125 let line = srcSpanStartLine span; col = srcSpanStartCol span
126 return $ unitOL $ LOCATION fileId line col name
127 _ -> return nilOL
128 mid_instrs <- stmtsToInstrs stmts
129 tail_instrs <- stmtToInstrs tail
130 let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
131 -- code generation may introduce new basic block boundaries, which
132 -- are indicated by the NEWBLOCK instruction. We must split up the
133 -- instruction stream into basic blocks again. Also, we extract
134 -- LDATAs here too.
135 let
136 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
137
138 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
139 = ([], BasicBlock id instrs : blocks, statics)
140 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
141 = (instrs, blocks, CmmData sec dat:statics)
142 mkBlocks instr (instrs,blocks,statics)
143 = (instr:instrs, blocks, statics)
144 return (BasicBlock id top : other_blocks, statics)
145
146
147 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
148 stmtsToInstrs stmts
149 = do instrss <- mapM stmtToInstrs stmts
150 return (concatOL instrss)
151
152
153 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
154 stmtToInstrs stmt = do
155 dflags <- getDynFlags
156 is32Bit <- is32BitPlatform
157 case stmt of
158 CmmComment s -> return (unitOL (COMMENT s))
159 CmmTick {} -> return nilOL
160 CmmUnwind {} -> return nilOL
161
162 CmmAssign reg src
163 | isFloatType ty -> assignReg_FltCode format reg src
164 | is32Bit && isWord64 ty -> assignReg_I64Code reg src
165 | otherwise -> assignReg_IntCode format reg src
166 where ty = cmmRegType dflags reg
167 format = cmmTypeFormat ty
168
169 CmmStore addr src
170 | isFloatType ty -> assignMem_FltCode format addr src
171 | is32Bit && isWord64 ty -> assignMem_I64Code addr src
172 | otherwise -> assignMem_IntCode format addr src
173 where ty = cmmExprType dflags src
174 format = cmmTypeFormat ty
175
176 CmmUnsafeForeignCall target result_regs args
177 -> genCCall dflags is32Bit target result_regs args
178
179 CmmBranch id -> genBranch id
180 CmmCondBranch arg true false -> do b1 <- genCondJump true arg
181 b2 <- genBranch false
182 return (b1 `appOL` b2)
183 CmmSwitch arg ids -> do dflags <- getDynFlags
184 genSwitch dflags arg ids
185 CmmCall { cml_target = arg
186 , cml_args_regs = gregs } -> do
187 dflags <- getDynFlags
188 genJump arg (jumpRegs dflags gregs)
189 _ ->
190 panic "stmtToInstrs: statement should have been cps'd away"
191
192
193 jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
194 jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
195 where platform = targetPlatform dflags
196
197 --------------------------------------------------------------------------------
198 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
199 -- They are really trees of insns to facilitate fast appending, where a
200 -- left-to-right traversal yields the insns in the correct order.
201 --
202 type InstrBlock
203 = OrdList Instr
204
205
206 -- | Condition codes passed up the tree.
207 --
208 data CondCode
209 = CondCode Bool Cond InstrBlock
210
211
212 -- | a.k.a "Register64"
213 -- Reg is the lower 32-bit temporary which contains the result.
214 -- Use getHiVRegFromLo to find the other VRegUnique.
215 --
216 -- Rules of this simplified insn selection game are therefore that
217 -- the returned Reg may be modified
218 --
219 data ChildCode64
220 = ChildCode64
221 InstrBlock
222 Reg
223
224
225 -- | Register's passed up the tree. If the stix code forces the register
226 -- to live in a pre-decided machine register, it comes out as @Fixed@;
227 -- otherwise, it comes out as @Any@, and the parent can decide which
228 -- register to put it in.
229 --
230 data Register
231 = Fixed Format Reg InstrBlock
232 | Any Format (Reg -> InstrBlock)
233
234
235 swizzleRegisterRep :: Register -> Format -> Register
236 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
237 swizzleRegisterRep (Any _ codefn) format = Any format codefn
238
239
240 -- | Grab the Reg for a CmmReg
241 getRegisterReg :: Platform -> Bool -> CmmReg -> Reg
242
243 getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk))
244 = let fmt = cmmTypeFormat pk in
245 if isFloatFormat fmt && not use_sse2
246 then RegVirtual (mkVirtualReg u FF80)
247 else RegVirtual (mkVirtualReg u fmt)
248
249 getRegisterReg platform _ (CmmGlobal mid)
250 = case globalRegMaybe platform mid of
251 Just reg -> RegReal $ reg
252 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
253 -- By this stage, the only MagicIds remaining should be the
254 -- ones which map to a real machine register on this
255 -- platform. Hence ...
256
257
258 -- | Memory addressing modes passed up the tree.
259 data Amode
260 = Amode AddrMode InstrBlock
261
262 {-
263 Now, given a tree (the argument to an CmmLoad) that references memory,
264 produce a suitable addressing mode.
265
266 A Rule of the Game (tm) for Amodes: use of the addr bit must
267 immediately follow use of the code part, since the code part puts
268 values in registers which the addr then refers to. So you can't put
269 anything in between, lest it overwrite some of those registers. If
270 you need to do some other computation between the code part and use of
271 the addr bit, first store the effective address from the amode in a
272 temporary, then do the other computation, and then use the temporary:
273
274 code
275 LEA amode, tmp
276 ... other computation ...
277 ... (tmp) ...
278 -}
279
280
281 -- | Check whether an integer will fit in 32 bits.
282 -- A CmmInt is intended to be truncated to the appropriate
283 -- number of bits, so here we truncate it to Int64. This is
284 -- important because e.g. -1 as a CmmInt might be either
285 -- -1 or 18446744073709551615.
286 --
287 is32BitInteger :: Integer -> Bool
288 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
289 where i64 = fromIntegral i :: Int64
290
291
292 -- | Convert a BlockId to some CmmStatic data
293 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
294 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
295 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
296 where blockLabel = mkAsmTempLabel (getUnique blockid)
297
298
299 -- -----------------------------------------------------------------------------
300 -- General things for putting together code sequences
301
302 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
303 -- CmmExprs into CmmRegOff?
304 mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
305 mangleIndexTree dflags reg off
306 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
307 where width = typeWidth (cmmRegType dflags reg)
308
309 -- | The dual to getAnyReg: compute an expression into a register, but
310 -- we don't mind which one it is.
311 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
312 getSomeReg expr = do
313 r <- getRegister expr
314 case r of
315 Any rep code -> do
316 tmp <- getNewRegNat rep
317 return (tmp, code tmp)
318 Fixed _ reg code ->
319 return (reg, code)
320
321
322 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
323 assignMem_I64Code addrTree valueTree = do
324 Amode addr addr_code <- getAmode addrTree
325 ChildCode64 vcode rlo <- iselExpr64 valueTree
326 let
327 rhi = getHiVRegFromLo rlo
328
329 -- Little-endian store
330 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
331 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
332 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
333
334
335 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
336 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
337 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
338 let
339 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
340 r_dst_hi = getHiVRegFromLo r_dst_lo
341 r_src_hi = getHiVRegFromLo r_src_lo
342 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
343 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
344 return (
345 vcode `snocOL` mov_lo `snocOL` mov_hi
346 )
347
348 assignReg_I64Code _ _
349 = panic "assignReg_I64Code(i386): invalid lvalue"
350
351
352 iselExpr64 :: CmmExpr -> NatM ChildCode64
353 iselExpr64 (CmmLit (CmmInt i _)) = do
354 (rlo,rhi) <- getNewRegPairNat II32
355 let
356 r = fromIntegral (fromIntegral i :: Word32)
357 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
358 code = toOL [
359 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
360 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
361 ]
362 return (ChildCode64 code rlo)
363
364 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
365 Amode addr addr_code <- getAmode addrTree
366 (rlo,rhi) <- getNewRegPairNat II32
367 let
368 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
369 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
370 return (
371 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
372 rlo
373 )
374
375 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
376 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
377
378 -- we handle addition, but rather badly
379 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
380 ChildCode64 code1 r1lo <- iselExpr64 e1
381 (rlo,rhi) <- getNewRegPairNat II32
382 let
383 r = fromIntegral (fromIntegral i :: Word32)
384 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
385 r1hi = getHiVRegFromLo r1lo
386 code = code1 `appOL`
387 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
388 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
389 MOV II32 (OpReg r1hi) (OpReg rhi),
390 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
391 return (ChildCode64 code rlo)
392
393 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
394 ChildCode64 code1 r1lo <- iselExpr64 e1
395 ChildCode64 code2 r2lo <- iselExpr64 e2
396 (rlo,rhi) <- getNewRegPairNat II32
397 let
398 r1hi = getHiVRegFromLo r1lo
399 r2hi = getHiVRegFromLo r2lo
400 code = code1 `appOL`
401 code2 `appOL`
402 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
403 ADD II32 (OpReg r2lo) (OpReg rlo),
404 MOV II32 (OpReg r1hi) (OpReg rhi),
405 ADC II32 (OpReg r2hi) (OpReg rhi) ]
406 return (ChildCode64 code rlo)
407
408 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
409 ChildCode64 code1 r1lo <- iselExpr64 e1
410 ChildCode64 code2 r2lo <- iselExpr64 e2
411 (rlo,rhi) <- getNewRegPairNat II32
412 let
413 r1hi = getHiVRegFromLo r1lo
414 r2hi = getHiVRegFromLo r2lo
415 code = code1 `appOL`
416 code2 `appOL`
417 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
418 SUB II32 (OpReg r2lo) (OpReg rlo),
419 MOV II32 (OpReg r1hi) (OpReg rhi),
420 SBB II32 (OpReg r2hi) (OpReg rhi) ]
421 return (ChildCode64 code rlo)
422
423 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
424 fn <- getAnyReg expr
425 r_dst_lo <- getNewRegNat II32
426 let r_dst_hi = getHiVRegFromLo r_dst_lo
427 code = fn r_dst_lo
428 return (
429 ChildCode64 (code `snocOL`
430 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
431 r_dst_lo
432 )
433
434 iselExpr64 expr
435 = pprPanic "iselExpr64(i386)" (ppr expr)
436
437
438 --------------------------------------------------------------------------------
439 getRegister :: CmmExpr -> NatM Register
440 getRegister e = do dflags <- getDynFlags
441 is32Bit <- is32BitPlatform
442 getRegister' dflags is32Bit e
443
444 getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
445
446 getRegister' dflags is32Bit (CmmReg reg)
447 = case reg of
448 CmmGlobal PicBaseReg
449 | is32Bit ->
450 -- on x86_64, we have %rip for PicBaseReg, but it's not
451 -- a full-featured register, it can only be used for
452 -- rip-relative addressing.
453 do reg' <- getPicBaseNat (archWordFormat is32Bit)
454 return (Fixed (archWordFormat is32Bit) reg' nilOL)
455 _ ->
456 do use_sse2 <- sse2Enabled
457 let
458 fmt = cmmTypeFormat (cmmRegType dflags reg)
459 format | not use_sse2 && isFloatFormat fmt = FF80
460 | otherwise = fmt
461 --
462 let platform = targetPlatform dflags
463 return (Fixed format
464 (getRegisterReg platform use_sse2 reg)
465 nilOL)
466
467
468 getRegister' dflags is32Bit (CmmRegOff r n)
469 = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
470
471 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
472 -- TO_W_(x), TO_W_(x >> 32)
473
474 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
475 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
476 | is32Bit = do
477 ChildCode64 code rlo <- iselExpr64 x
478 return $ Fixed II32 (getHiVRegFromLo rlo) code
479
480 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
481 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
482 | is32Bit = do
483 ChildCode64 code rlo <- iselExpr64 x
484 return $ Fixed II32 (getHiVRegFromLo rlo) code
485
486 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
487 | is32Bit = do
488 ChildCode64 code rlo <- iselExpr64 x
489 return $ Fixed II32 rlo code
490
491 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
492 | is32Bit = do
493 ChildCode64 code rlo <- iselExpr64 x
494 return $ Fixed II32 rlo code
495
496 getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
497 if_sse2 float_const_sse2 float_const_x87
498 where
499 float_const_sse2
500 | f == 0.0 = do
501 let
502 format = floatFormat w
503 code dst = unitOL (XOR format (OpReg dst) (OpReg dst))
504 -- I don't know why there are xorpd, xorps, and pxor instructions.
505 -- They all appear to do the same thing --SDM
506 return (Any format code)
507
508 | otherwise = do
509 Amode addr code <- memConstant (widthInBytes w) lit
510 loadFloatAmode True w addr code
511
512 float_const_x87 = case w of
513 W64
514 | f == 0.0 ->
515 let code dst = unitOL (GLDZ dst)
516 in return (Any FF80 code)
517
518 | f == 1.0 ->
519 let code dst = unitOL (GLD1 dst)
520 in return (Any FF80 code)
521
522 _otherwise -> do
523 Amode addr code <- memConstant (widthInBytes w) lit
524 loadFloatAmode False w addr code
525
526 -- catch simple cases of zero- or sign-extended load
527 getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
528 code <- intLoadCode (MOVZxL II8) addr
529 return (Any II32 code)
530
531 getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
532 code <- intLoadCode (MOVSxL II8) addr
533 return (Any II32 code)
534
535 getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
536 code <- intLoadCode (MOVZxL II16) addr
537 return (Any II32 code)
538
539 getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
540 code <- intLoadCode (MOVSxL II16) addr
541 return (Any II32 code)
542
543 -- catch simple cases of zero- or sign-extended load
544 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
545 | not is32Bit = do
546 code <- intLoadCode (MOVZxL II8) addr
547 return (Any II64 code)
548
549 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
550 | not is32Bit = do
551 code <- intLoadCode (MOVSxL II8) addr
552 return (Any II64 code)
553
554 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
555 | not is32Bit = do
556 code <- intLoadCode (MOVZxL II16) addr
557 return (Any II64 code)
558
559 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
560 | not is32Bit = do
561 code <- intLoadCode (MOVSxL II16) addr
562 return (Any II64 code)
563
564 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
565 | not is32Bit = do
566 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
567 return (Any II64 code)
568
569 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
570 | not is32Bit = do
571 code <- intLoadCode (MOVSxL II32) addr
572 return (Any II64 code)
573
574 getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
575 CmmLit displacement])
576 | not is32Bit = do
577 return $ Any II64 (\dst -> unitOL $
578 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
579
580 getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
581 sse2 <- sse2Enabled
582 case mop of
583 MO_F_Neg w
584 | sse2 -> sse2NegCode w x
585 | otherwise -> trivialUFCode FF80 (GNEG FF80) x
586
587 MO_S_Neg w -> triv_ucode NEGI (intFormat w)
588 MO_Not w -> triv_ucode NOT (intFormat w)
589
590 -- Nop conversions
591 MO_UU_Conv W32 W8 -> toI8Reg W32 x
592 MO_SS_Conv W32 W8 -> toI8Reg W32 x
593 MO_UU_Conv W16 W8 -> toI8Reg W16 x
594 MO_SS_Conv W16 W8 -> toI8Reg W16 x
595 MO_UU_Conv W32 W16 -> toI16Reg W32 x
596 MO_SS_Conv W32 W16 -> toI16Reg W32 x
597
598 MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
599 MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
600 MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
601 MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
602 MO_UU_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
603 MO_SS_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
604
605 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
606 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
607
608 -- widenings
609 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
610 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
611 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
612
613 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
614 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
615 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
616
617 MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x
618 MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
619 MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
620 MO_SS_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVSxL x
621 MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
622 MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
623 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
624 -- However, we don't want the register allocator to throw it
625 -- away as an unnecessary reg-to-reg move, so we keep it in
626 -- the form of a movzl and print it as a movl later.
627
628 MO_FF_Conv W32 W64
629 | sse2 -> coerceFP2FP W64 x
630 | otherwise -> conversionNop FF80 x
631
632 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
633
634 MO_FS_Conv from to -> coerceFP2Int from to x
635 MO_SF_Conv from to -> coerceInt2FP from to x
636
637 MO_V_Insert {} -> needLlvm
638 MO_V_Extract {} -> needLlvm
639 MO_V_Add {} -> needLlvm
640 MO_V_Sub {} -> needLlvm
641 MO_V_Mul {} -> needLlvm
642 MO_VS_Quot {} -> needLlvm
643 MO_VS_Rem {} -> needLlvm
644 MO_VS_Neg {} -> needLlvm
645 MO_VU_Quot {} -> needLlvm
646 MO_VU_Rem {} -> needLlvm
647 MO_VF_Insert {} -> needLlvm
648 MO_VF_Extract {} -> needLlvm
649 MO_VF_Add {} -> needLlvm
650 MO_VF_Sub {} -> needLlvm
651 MO_VF_Mul {} -> needLlvm
652 MO_VF_Quot {} -> needLlvm
653 MO_VF_Neg {} -> needLlvm
654
655 _other -> pprPanic "getRegister" (pprMachOp mop)
656 where
657 triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
658 triv_ucode instr format = trivialUCode format (instr format) x
659
660 -- signed or unsigned extension.
661 integerExtend :: Width -> Width
662 -> (Format -> Operand -> Operand -> Instr)
663 -> CmmExpr -> NatM Register
664 integerExtend from to instr expr = do
665 (reg,e_code) <- if from == W8 then getByteReg expr
666 else getSomeReg expr
667 let
668 code dst =
669 e_code `snocOL`
670 instr (intFormat from) (OpReg reg) (OpReg dst)
671 return (Any (intFormat to) code)
672
673 toI8Reg :: Width -> CmmExpr -> NatM Register
674 toI8Reg new_rep expr
675 = do codefn <- getAnyReg expr
676 return (Any (intFormat new_rep) codefn)
677 -- HACK: use getAnyReg to get a byte-addressable register.
678 -- If the source was a Fixed register, this will add the
679 -- mov instruction to put it into the desired destination.
680 -- We're assuming that the destination won't be a fixed
681 -- non-byte-addressable register; it won't be, because all
682 -- fixed registers are word-sized.
683
684 toI16Reg = toI8Reg -- for now
685
686 conversionNop :: Format -> CmmExpr -> NatM Register
687 conversionNop new_format expr
688 = do e_code <- getRegister' dflags is32Bit expr
689 return (swizzleRegisterRep e_code new_format)
690
691
692 getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
693 sse2 <- sse2Enabled
694 case mop of
695 MO_F_Eq _ -> condFltReg is32Bit EQQ x y
696 MO_F_Ne _ -> condFltReg is32Bit NE x y
697 MO_F_Gt _ -> condFltReg is32Bit GTT x y
698 MO_F_Ge _ -> condFltReg is32Bit GE x y
699 MO_F_Lt _ -> condFltReg is32Bit LTT x y
700 MO_F_Le _ -> condFltReg is32Bit LE x y
701
702 MO_Eq _ -> condIntReg EQQ x y
703 MO_Ne _ -> condIntReg NE x y
704
705 MO_S_Gt _ -> condIntReg GTT x y
706 MO_S_Ge _ -> condIntReg GE x y
707 MO_S_Lt _ -> condIntReg LTT x y
708 MO_S_Le _ -> condIntReg LE x y
709
710 MO_U_Gt _ -> condIntReg GU x y
711 MO_U_Ge _ -> condIntReg GEU x y
712 MO_U_Lt _ -> condIntReg LU x y
713 MO_U_Le _ -> condIntReg LEU x y
714
715 MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y
716 | otherwise -> trivialFCode_x87 GADD x y
717 MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y
718 | otherwise -> trivialFCode_x87 GSUB x y
719 MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y
720 | otherwise -> trivialFCode_x87 GDIV x y
721 MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y
722 | otherwise -> trivialFCode_x87 GMUL x y
723
724 MO_Add rep -> add_code rep x y
725 MO_Sub rep -> sub_code rep x y
726
727 MO_S_Quot rep -> div_code rep True True x y
728 MO_S_Rem rep -> div_code rep True False x y
729 MO_U_Quot rep -> div_code rep False True x y
730 MO_U_Rem rep -> div_code rep False False x y
731
732 MO_S_MulMayOflo rep -> imulMayOflo rep x y
733
734 MO_Mul rep -> triv_op rep IMUL
735 MO_And rep -> triv_op rep AND
736 MO_Or rep -> triv_op rep OR
737 MO_Xor rep -> triv_op rep XOR
738
739 {- Shift ops on x86s have constraints on their source, it
740 either has to be Imm, CL or 1
741 => trivialCode is not restrictive enough (sigh.)
742 -}
743 MO_Shl rep -> shift_code rep SHL x y {-False-}
744 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
745 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
746
747 MO_V_Insert {} -> needLlvm
748 MO_V_Extract {} -> needLlvm
749 MO_V_Add {} -> needLlvm
750 MO_V_Sub {} -> needLlvm
751 MO_V_Mul {} -> needLlvm
752 MO_VS_Quot {} -> needLlvm
753 MO_VS_Rem {} -> needLlvm
754 MO_VS_Neg {} -> needLlvm
755 MO_VF_Insert {} -> needLlvm
756 MO_VF_Extract {} -> needLlvm
757 MO_VF_Add {} -> needLlvm
758 MO_VF_Sub {} -> needLlvm
759 MO_VF_Mul {} -> needLlvm
760 MO_VF_Quot {} -> needLlvm
761 MO_VF_Neg {} -> needLlvm
762
763 _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
764 where
765 --------------------
766 triv_op width instr = trivialCode width op (Just op) x y
767 where op = instr (intFormat width)
768
769 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
770 imulMayOflo rep a b = do
771 (a_reg, a_code) <- getNonClobberedReg a
772 b_code <- getAnyReg b
773 let
774 shift_amt = case rep of
775 W32 -> 31
776 W64 -> 63
777 _ -> panic "shift_amt"
778
779 format = intFormat rep
780 code = a_code `appOL` b_code eax `appOL`
781 toOL [
782 IMUL2 format (OpReg a_reg), -- result in %edx:%eax
783 SAR format (OpImm (ImmInt shift_amt)) (OpReg eax),
784 -- sign extend lower part
785 SUB format (OpReg edx) (OpReg eax)
786 -- compare against upper
787 -- eax==0 if high part == sign extended low part
788 ]
789 return (Fixed format eax code)
790
791 --------------------
792 shift_code :: Width
793 -> (Format -> Operand -> Operand -> Instr)
794 -> CmmExpr
795 -> CmmExpr
796 -> NatM Register
797
798 {- Case1: shift length as immediate -}
799 shift_code width instr x (CmmLit lit) = do
800 x_code <- getAnyReg x
801 let
802 format = intFormat width
803 code dst
804 = x_code dst `snocOL`
805 instr format (OpImm (litToImm lit)) (OpReg dst)
806 return (Any format code)
807
808 {- Case2: shift length is complex (non-immediate)
809 * y must go in %ecx.
810 * we cannot do y first *and* put its result in %ecx, because
811 %ecx might be clobbered by x.
812 * if we do y second, then x cannot be
813 in a clobbered reg. Also, we cannot clobber x's reg
814 with the instruction itself.
815 * so we can either:
816 - do y first, put its result in a fresh tmp, then copy it to %ecx later
817 - do y second and put its result into %ecx. x gets placed in a fresh
818 tmp. This is likely to be better, because the reg alloc can
819 eliminate this reg->reg move here (it won't eliminate the other one,
820 because the move is into the fixed %ecx).
821 -}
822 shift_code width instr x y{-amount-} = do
823 x_code <- getAnyReg x
824 let format = intFormat width
825 tmp <- getNewRegNat format
826 y_code <- getAnyReg y
827 let
828 code = x_code tmp `appOL`
829 y_code ecx `snocOL`
830 instr format (OpReg ecx) (OpReg tmp)
831 return (Fixed format tmp code)
832
833 --------------------
834 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
835 add_code rep x (CmmLit (CmmInt y _))
836 | is32BitInteger y = add_int rep x y
837 add_code rep x y = trivialCode rep (ADD format) (Just (ADD format)) x y
838 where format = intFormat rep
839 -- TODO: There are other interesting patterns we want to replace
840 -- with a LEA, e.g. `(x + offset) + (y << shift)`.
841
842 --------------------
843 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
844 sub_code rep x (CmmLit (CmmInt y _))
845 | is32BitInteger (-y) = add_int rep x (-y)
846 sub_code rep x y = trivialCode rep (SUB (intFormat rep)) Nothing x y
847
848 -- our three-operand add instruction:
849 add_int width x y = do
850 (x_reg, x_code) <- getSomeReg x
851 let
852 format = intFormat width
853 imm = ImmInt (fromInteger y)
854 code dst
855 = x_code `snocOL`
856 LEA format
857 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
858 (OpReg dst)
859 --
860 return (Any format code)
861
862 ----------------------
863 div_code width signed quotient x y = do
864 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
865 x_code <- getAnyReg x
866 let
867 format = intFormat width
868 widen | signed = CLTD format
869 | otherwise = XOR format (OpReg edx) (OpReg edx)
870
871 instr | signed = IDIV
872 | otherwise = DIV
873
874 code = y_code `appOL`
875 x_code eax `appOL`
876 toOL [widen, instr format y_op]
877
878 result | quotient = eax
879 | otherwise = edx
880
881 return (Fixed format result code)
882
883
884 getRegister' _ _ (CmmLoad mem pk)
885 | isFloatType pk
886 = do
887 Amode addr mem_code <- getAmode mem
888 use_sse2 <- sse2Enabled
889 loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
890
891 getRegister' _ is32Bit (CmmLoad mem pk)
892 | is32Bit && not (isWord64 pk)
893 = do
894 code <- intLoadCode instr mem
895 return (Any format code)
896 where
897 width = typeWidth pk
898 format = intFormat width
899 instr = case width of
900 W8 -> MOVZxL II8
901 _other -> MOV format
902 -- We always zero-extend 8-bit loads, if we
903 -- can't think of anything better. This is because
904 -- we can't guarantee access to an 8-bit variant of every register
905 -- (esi and edi don't have 8-bit variants), so to make things
906 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
907
908 -- Simpler memory load code on x86_64
909 getRegister' _ is32Bit (CmmLoad mem pk)
910 | not is32Bit
911 = do
912 code <- intLoadCode (MOV format) mem
913 return (Any format code)
914 where format = intFormat $ typeWidth pk
915
916 getRegister' _ is32Bit (CmmLit (CmmInt 0 width))
917 = let
918 format = intFormat width
919
920 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
921 format1 = if is32Bit then format
922 else case format of
923 II64 -> II32
924 _ -> format
925 code dst
926 = unitOL (XOR format1 (OpReg dst) (OpReg dst))
927 in
928 return (Any format code)
929
930 -- optimisation for loading small literals on x86_64: take advantage
931 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
932 -- instruction forms are shorter.
933 getRegister' dflags is32Bit (CmmLit lit)
934 | not is32Bit, isWord64 (cmmLitType dflags lit), not (isBigLit lit)
935 = let
936 imm = litToImm lit
937 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
938 in
939 return (Any II64 code)
940 where
941 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
942 isBigLit _ = False
943 -- note1: not the same as (not.is32BitLit), because that checks for
944 -- signed literals that fit in 32 bits, but we want unsigned
945 -- literals here.
946 -- note2: all labels are small, because we're assuming the
947 -- small memory model (see gcc docs, -mcmodel=small).
948
949 getRegister' dflags _ (CmmLit lit)
950 = do let format = cmmTypeFormat (cmmLitType dflags lit)
951 imm = litToImm lit
952 code dst = unitOL (MOV format (OpImm imm) (OpReg dst))
953 return (Any format code)
954
955 getRegister' _ _ other
956 | isVecExpr other = needLlvm
957 | otherwise = pprPanic "getRegister(x86)" (ppr other)
958
959
960 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
961 -> NatM (Reg -> InstrBlock)
962 intLoadCode instr mem = do
963 Amode src mem_code <- getAmode mem
964 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
965
966 -- Compute an expression into *any* register, adding the appropriate
967 -- move instruction if necessary.
968 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
969 getAnyReg expr = do
970 r <- getRegister expr
971 anyReg r
972
973 anyReg :: Register -> NatM (Reg -> InstrBlock)
974 anyReg (Any _ code) = return code
975 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
976
977 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
978 -- Fixed registers might not be byte-addressable, so we make sure we've
979 -- got a temporary, inserting an extra reg copy if necessary.
980 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
981 getByteReg expr = do
982 is32Bit <- is32BitPlatform
983 if is32Bit
984 then do r <- getRegister expr
985 case r of
986 Any rep code -> do
987 tmp <- getNewRegNat rep
988 return (tmp, code tmp)
989 Fixed rep reg code
990 | isVirtualReg reg -> return (reg,code)
991 | otherwise -> do
992 tmp <- getNewRegNat rep
993 return (tmp, code `snocOL` reg2reg rep reg tmp)
994 -- ToDo: could optimise slightly by checking for
995 -- byte-addressable real registers, but that will
996 -- happen very rarely if at all.
997 else getSomeReg expr -- all regs are byte-addressable on x86_64
998
999 -- Another variant: this time we want the result in a register that cannot
1000 -- be modified by code to evaluate an arbitrary expression.
1001 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1002 getNonClobberedReg expr = do
1003 dflags <- getDynFlags
1004 r <- getRegister expr
1005 case r of
1006 Any rep code -> do
1007 tmp <- getNewRegNat rep
1008 return (tmp, code tmp)
1009 Fixed rep reg code
1010 -- only certain regs can be clobbered
1011 | reg `elem` instrClobberedRegs (targetPlatform dflags)
1012 -> do
1013 tmp <- getNewRegNat rep
1014 return (tmp, code `snocOL` reg2reg rep reg tmp)
1015 | otherwise ->
1016 return (reg, code)
1017
1018 reg2reg :: Format -> Reg -> Reg -> Instr
1019 reg2reg format src dst
1020 | format == FF80 = GMOV src dst
1021 | otherwise = MOV format (OpReg src) (OpReg dst)
1022
1023
1024 --------------------------------------------------------------------------------
1025 getAmode :: CmmExpr -> NatM Amode
1026 getAmode e = do is32Bit <- is32BitPlatform
1027 getAmode' is32Bit e
1028
1029 getAmode' :: Bool -> CmmExpr -> NatM Amode
1030 getAmode' _ (CmmRegOff r n) = do dflags <- getDynFlags
1031 getAmode $ mangleIndexTree dflags r n
1032
1033 getAmode' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1034 CmmLit displacement])
1035 | not is32Bit
1036 = return $ Amode (ripRel (litToImm displacement)) nilOL
1037
1038
1039 -- This is all just ridiculous, since it carefully undoes
1040 -- what mangleIndexTree has just done.
1041 getAmode' is32Bit (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)])
1042 | is32BitLit is32Bit lit
1043 -- ASSERT(rep == II32)???
1044 = do (x_reg, x_code) <- getSomeReg x
1045 let off = ImmInt (-(fromInteger i))
1046 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1047
1048 getAmode' is32Bit (CmmMachOp (MO_Add _rep) [x, CmmLit lit])
1049 | is32BitLit is32Bit lit
1050 -- ASSERT(rep == II32)???
1051 = do (x_reg, x_code) <- getSomeReg x
1052 let off = litToImm lit
1053 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1054
1055 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1056 -- recognised by the next rule.
1057 getAmode' is32Bit (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1058 b@(CmmLit _)])
1059 = getAmode' is32Bit (CmmMachOp (MO_Add rep) [b,a])
1060
1061 -- Matches: (x + offset) + (y << shift)
1062 getAmode' _ (CmmMachOp (MO_Add _) [CmmRegOff x offset,
1063 CmmMachOp (MO_Shl _)
1064 [y, CmmLit (CmmInt shift _)]])
1065 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1066 = x86_complex_amode (CmmReg x) y shift (fromIntegral offset)
1067
1068 getAmode' _ (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _)
1069 [y, CmmLit (CmmInt shift _)]])
1070 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1071 = x86_complex_amode x y shift 0
1072
1073 getAmode' _ (CmmMachOp (MO_Add _)
1074 [x, CmmMachOp (MO_Add _)
1075 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1076 CmmLit (CmmInt offset _)]])
1077 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1078 && is32BitInteger offset
1079 = x86_complex_amode x y shift offset
1080
1081 getAmode' _ (CmmMachOp (MO_Add _) [x,y])
1082 = x86_complex_amode x y 0 0
1083
1084 getAmode' is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1085 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1086
1087 getAmode' _ expr = do
1088 (reg,code) <- getSomeReg expr
1089 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1090
1091 -- | Like 'getAmode', but on 32-bit use simple register addressing
1092 -- (i.e. no index register). This stops us from running out of
1093 -- registers on x86 when using instructions such as cmpxchg, which can
1094 -- use up to three virtual registers and one fixed register.
1095 getSimpleAmode :: DynFlags -> Bool -> CmmExpr -> NatM Amode
1096 getSimpleAmode dflags is32Bit addr
1097 | is32Bit = do
1098 addr_code <- getAnyReg addr
1099 addr_r <- getNewRegNat (intFormat (wordWidth dflags))
1100 let amode = AddrBaseIndex (EABaseReg addr_r) EAIndexNone (ImmInt 0)
1101 return $! Amode amode (addr_code addr_r)
1102 | otherwise = getAmode addr
1103
1104 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1105 x86_complex_amode base index shift offset
1106 = do (x_reg, x_code) <- getNonClobberedReg base
1107 -- x must be in a temp, because it has to stay live over y_code
1108 -- we could compre x_reg and y_reg and do something better here...
1109 (y_reg, y_code) <- getSomeReg index
1110 let
1111 code = x_code `appOL` y_code
1112 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8;
1113 n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")"
1114 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1115 code)
1116
1117
1118
1119
1120 -- -----------------------------------------------------------------------------
1121 -- getOperand: sometimes any operand will do.
1122
1123 -- getNonClobberedOperand: the value of the operand will remain valid across
1124 -- the computation of an arbitrary expression, unless the expression
1125 -- is computed directly into a register which the operand refers to
1126 -- (see trivialCode where this function is used for an example).
1127
1128 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1129 getNonClobberedOperand (CmmLit lit) = do
1130 use_sse2 <- sse2Enabled
1131 if use_sse2 && isSuitableFloatingPointLit lit
1132 then do
1133 let CmmFloat _ w = lit
1134 Amode addr code <- memConstant (widthInBytes w) lit
1135 return (OpAddr addr, code)
1136 else do
1137
1138 is32Bit <- is32BitPlatform
1139 dflags <- getDynFlags
1140 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1141 then return (OpImm (litToImm lit), nilOL)
1142 else getNonClobberedOperand_generic (CmmLit lit)
1143
1144 getNonClobberedOperand (CmmLoad mem pk) = do
1145 is32Bit <- is32BitPlatform
1146 use_sse2 <- sse2Enabled
1147 if (not (isFloatType pk) || use_sse2)
1148 && (if is32Bit then not (isWord64 pk) else True)
1149 then do
1150 dflags <- getDynFlags
1151 let platform = targetPlatform dflags
1152 Amode src mem_code <- getAmode mem
1153 (src',save_code) <-
1154 if (amodeCouldBeClobbered platform src)
1155 then do
1156 tmp <- getNewRegNat (archWordFormat is32Bit)
1157 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1158 unitOL (LEA (archWordFormat is32Bit)
1159 (OpAddr src)
1160 (OpReg tmp)))
1161 else
1162 return (src, nilOL)
1163 return (OpAddr src', mem_code `appOL` save_code)
1164 else do
1165 getNonClobberedOperand_generic (CmmLoad mem pk)
1166
1167 getNonClobberedOperand e = getNonClobberedOperand_generic e
1168
1169 getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1170 getNonClobberedOperand_generic e = do
1171 (reg, code) <- getNonClobberedReg e
1172 return (OpReg reg, code)
1173
1174 amodeCouldBeClobbered :: Platform -> AddrMode -> Bool
1175 amodeCouldBeClobbered platform amode = any (regClobbered platform) (addrModeRegs amode)
1176
1177 regClobbered :: Platform -> Reg -> Bool
1178 regClobbered platform (RegReal (RealRegSingle rr)) = freeReg platform rr
1179 regClobbered _ _ = False
1180
1181 -- getOperand: the operand is not required to remain valid across the
1182 -- computation of an arbitrary expression.
1183 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1184
1185 getOperand (CmmLit lit) = do
1186 use_sse2 <- sse2Enabled
1187 if (use_sse2 && isSuitableFloatingPointLit lit)
1188 then do
1189 let CmmFloat _ w = lit
1190 Amode addr code <- memConstant (widthInBytes w) lit
1191 return (OpAddr addr, code)
1192 else do
1193
1194 is32Bit <- is32BitPlatform
1195 dflags <- getDynFlags
1196 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1197 then return (OpImm (litToImm lit), nilOL)
1198 else getOperand_generic (CmmLit lit)
1199
1200 getOperand (CmmLoad mem pk) = do
1201 is32Bit <- is32BitPlatform
1202 use_sse2 <- sse2Enabled
1203 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1204 then do
1205 Amode src mem_code <- getAmode mem
1206 return (OpAddr src, mem_code)
1207 else
1208 getOperand_generic (CmmLoad mem pk)
1209
1210 getOperand e = getOperand_generic e
1211
1212 getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1213 getOperand_generic e = do
1214 (reg, code) <- getSomeReg e
1215 return (OpReg reg, code)
1216
1217 isOperand :: Bool -> CmmExpr -> Bool
1218 isOperand _ (CmmLoad _ _) = True
1219 isOperand is32Bit (CmmLit lit) = is32BitLit is32Bit lit
1220 || isSuitableFloatingPointLit lit
1221 isOperand _ _ = False
1222
1223 memConstant :: Int -> CmmLit -> NatM Amode
1224 memConstant align lit = do
1225 lbl <- getNewLabelNat
1226 dflags <- getDynFlags
1227 (addr, addr_code) <- if target32Bit (targetPlatform dflags)
1228 then do dynRef <- cmmMakeDynamicReference
1229 dflags
1230 DataReference
1231 lbl
1232 Amode addr addr_code <- getAmode dynRef
1233 return (addr, addr_code)
1234 else return (ripRel (ImmCLbl lbl), nilOL)
1235 let code =
1236 LDATA ReadOnlyData (align, Statics lbl [CmmStaticLit lit])
1237 `consOL` addr_code
1238 return (Amode addr code)
1239
1240
1241 loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
1242 loadFloatAmode use_sse2 w addr addr_code = do
1243 let format = floatFormat w
1244 code dst = addr_code `snocOL`
1245 if use_sse2
1246 then MOV format (OpAddr addr) (OpReg dst)
1247 else GLD format addr dst
1248 return (Any (if use_sse2 then format else FF80) code)
1249
1250
1251 -- if we want a floating-point literal as an operand, we can
1252 -- use it directly from memory. However, if the literal is
1253 -- zero, we're better off generating it into a register using
1254 -- xor.
1255 isSuitableFloatingPointLit :: CmmLit -> Bool
1256 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1257 isSuitableFloatingPointLit _ = False
1258
1259 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1260 getRegOrMem e@(CmmLoad mem pk) = do
1261 is32Bit <- is32BitPlatform
1262 use_sse2 <- sse2Enabled
1263 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1264 then do
1265 Amode src mem_code <- getAmode mem
1266 return (OpAddr src, mem_code)
1267 else do
1268 (reg, code) <- getNonClobberedReg e
1269 return (OpReg reg, code)
1270 getRegOrMem e = do
1271 (reg, code) <- getNonClobberedReg e
1272 return (OpReg reg, code)
1273
1274 is32BitLit :: Bool -> CmmLit -> Bool
1275 is32BitLit is32Bit (CmmInt i W64)
1276 | not is32Bit
1277 = -- assume that labels are in the range 0-2^31-1: this assumes the
1278 -- small memory model (see gcc docs, -mcmodel=small).
1279 is32BitInteger i
1280 is32BitLit _ _ = True
1281
1282
1283
1284
1285 -- Set up a condition code for a conditional branch.
1286
1287 getCondCode :: CmmExpr -> NatM CondCode
1288
1289 -- yes, they really do seem to want exactly the same!
1290
1291 getCondCode (CmmMachOp mop [x, y])
1292 =
1293 case mop of
1294 MO_F_Eq W32 -> condFltCode EQQ x y
1295 MO_F_Ne W32 -> condFltCode NE x y
1296 MO_F_Gt W32 -> condFltCode GTT x y
1297 MO_F_Ge W32 -> condFltCode GE x y
1298 MO_F_Lt W32 -> condFltCode LTT x y
1299 MO_F_Le W32 -> condFltCode LE x y
1300
1301 MO_F_Eq W64 -> condFltCode EQQ x y
1302 MO_F_Ne W64 -> condFltCode NE x y
1303 MO_F_Gt W64 -> condFltCode GTT x y
1304 MO_F_Ge W64 -> condFltCode GE x y
1305 MO_F_Lt W64 -> condFltCode LTT x y
1306 MO_F_Le W64 -> condFltCode LE x y
1307
1308 _ -> condIntCode (machOpToCond mop) x y
1309
1310 getCondCode other = pprPanic "getCondCode(2)(x86,x86_64)" (ppr other)
1311
1312 machOpToCond :: MachOp -> Cond
1313 machOpToCond mo = case mo of
1314 MO_Eq _ -> EQQ
1315 MO_Ne _ -> NE
1316 MO_S_Gt _ -> GTT
1317 MO_S_Ge _ -> GE
1318 MO_S_Lt _ -> LTT
1319 MO_S_Le _ -> LE
1320 MO_U_Gt _ -> GU
1321 MO_U_Ge _ -> GEU
1322 MO_U_Lt _ -> LU
1323 MO_U_Le _ -> LEU
1324 _other -> pprPanic "machOpToCond" (pprMachOp mo)
1325
1326
1327 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1328 -- passed back up the tree.
1329
1330 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1331 condIntCode cond x y = do is32Bit <- is32BitPlatform
1332 condIntCode' is32Bit cond x y
1333
1334 condIntCode' :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1335
1336 -- memory vs immediate
1337 condIntCode' is32Bit cond (CmmLoad x pk) (CmmLit lit)
1338 | is32BitLit is32Bit lit = do
1339 Amode x_addr x_code <- getAmode x
1340 let
1341 imm = litToImm lit
1342 code = x_code `snocOL`
1343 CMP (cmmTypeFormat pk) (OpImm imm) (OpAddr x_addr)
1344 --
1345 return (CondCode False cond code)
1346
1347 -- anything vs zero, using a mask
1348 -- TODO: Add some sanity checking!!!!
1349 condIntCode' is32Bit cond (CmmMachOp (MO_And _) [x,o2]) (CmmLit (CmmInt 0 pk))
1350 | (CmmLit lit@(CmmInt mask _)) <- o2, is32BitLit is32Bit lit
1351 = do
1352 (x_reg, x_code) <- getSomeReg x
1353 let
1354 code = x_code `snocOL`
1355 TEST (intFormat pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1356 --
1357 return (CondCode False cond code)
1358
1359 -- anything vs zero
1360 condIntCode' _ cond x (CmmLit (CmmInt 0 pk)) = do
1361 (x_reg, x_code) <- getSomeReg x
1362 let
1363 code = x_code `snocOL`
1364 TEST (intFormat pk) (OpReg x_reg) (OpReg x_reg)
1365 --
1366 return (CondCode False cond code)
1367
1368 -- anything vs operand
1369 condIntCode' is32Bit cond x y
1370 | isOperand is32Bit y = do
1371 dflags <- getDynFlags
1372 (x_reg, x_code) <- getNonClobberedReg x
1373 (y_op, y_code) <- getOperand y
1374 let
1375 code = x_code `appOL` y_code `snocOL`
1376 CMP (cmmTypeFormat (cmmExprType dflags x)) y_op (OpReg x_reg)
1377 return (CondCode False cond code)
1378 -- operand vs. anything: invert the comparison so that we can use a
1379 -- single comparison instruction.
1380 | isOperand is32Bit x
1381 , Just revcond <- maybeFlipCond cond = do
1382 dflags <- getDynFlags
1383 (y_reg, y_code) <- getNonClobberedReg y
1384 (x_op, x_code) <- getOperand x
1385 let
1386 code = y_code `appOL` x_code `snocOL`
1387 CMP (cmmTypeFormat (cmmExprType dflags x)) x_op (OpReg y_reg)
1388 return (CondCode False revcond code)
1389
1390 -- anything vs anything
1391 condIntCode' _ cond x y = do
1392 dflags <- getDynFlags
1393 (y_reg, y_code) <- getNonClobberedReg y
1394 (x_op, x_code) <- getRegOrMem x
1395 let
1396 code = y_code `appOL`
1397 x_code `snocOL`
1398 CMP (cmmTypeFormat (cmmExprType dflags x)) (OpReg y_reg) x_op
1399 return (CondCode False cond code)
1400
1401
1402
1403 --------------------------------------------------------------------------------
1404 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1405
1406 condFltCode cond x y
1407 = if_sse2 condFltCode_sse2 condFltCode_x87
1408 where
1409
1410 condFltCode_x87
1411 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1412 (x_reg, x_code) <- getNonClobberedReg x
1413 (y_reg, y_code) <- getSomeReg y
1414 let
1415 code = x_code `appOL` y_code `snocOL`
1416 GCMP cond x_reg y_reg
1417 -- The GCMP insn does the test and sets the zero flag if comparable
1418 -- and true. Hence we always supply EQQ as the condition to test.
1419 return (CondCode True EQQ code)
1420
1421 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1422 -- an operand, but the right must be a reg. We can probably do better
1423 -- than this general case...
1424 condFltCode_sse2 = do
1425 dflags <- getDynFlags
1426 (x_reg, x_code) <- getNonClobberedReg x
1427 (y_op, y_code) <- getOperand y
1428 let
1429 code = x_code `appOL`
1430 y_code `snocOL`
1431 CMP (floatFormat $ cmmExprWidth dflags x) y_op (OpReg x_reg)
1432 -- NB(1): we need to use the unsigned comparison operators on the
1433 -- result of this comparison.
1434 return (CondCode True (condToUnsigned cond) code)
1435
1436 -- -----------------------------------------------------------------------------
1437 -- Generating assignments
1438
1439 -- Assignments are really at the heart of the whole code generation
1440 -- business. Almost all top-level nodes of any real importance are
1441 -- assignments, which correspond to loads, stores, or register
1442 -- transfers. If we're really lucky, some of the register transfers
1443 -- will go away, because we can use the destination register to
1444 -- complete the code generation for the right hand side. This only
1445 -- fails when the right hand side is forced into a fixed register
1446 -- (e.g. the result of a call).
1447
1448 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1449 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1450
1451 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1452 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1453
1454
1455 -- integer assignment to memory
1456
1457 -- specific case of adding/subtracting an integer to a particular address.
1458 -- ToDo: catch other cases where we can use an operation directly on a memory
1459 -- address.
1460 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1461 CmmLit (CmmInt i _)])
1462 | addr == addr2, pk /= II64 || is32BitInteger i,
1463 Just instr <- check op
1464 = do Amode amode code_addr <- getAmode addr
1465 let code = code_addr `snocOL`
1466 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1467 return code
1468 where
1469 check (MO_Add _) = Just ADD
1470 check (MO_Sub _) = Just SUB
1471 check _ = Nothing
1472 -- ToDo: more?
1473
1474 -- general case
1475 assignMem_IntCode pk addr src = do
1476 is32Bit <- is32BitPlatform
1477 Amode addr code_addr <- getAmode addr
1478 (code_src, op_src) <- get_op_RI is32Bit src
1479 let
1480 code = code_src `appOL`
1481 code_addr `snocOL`
1482 MOV pk op_src (OpAddr addr)
1483 -- NOTE: op_src is stable, so it will still be valid
1484 -- after code_addr. This may involve the introduction
1485 -- of an extra MOV to a temporary register, but we hope
1486 -- the register allocator will get rid of it.
1487 --
1488 return code
1489 where
1490 get_op_RI :: Bool -> CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
1491 get_op_RI is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1492 = return (nilOL, OpImm (litToImm lit))
1493 get_op_RI _ op
1494 = do (reg,code) <- getNonClobberedReg op
1495 return (code, OpReg reg)
1496
1497
1498 -- Assign; dst is a reg, rhs is mem
1499 assignReg_IntCode pk reg (CmmLoad src _) = do
1500 load_code <- intLoadCode (MOV pk) src
1501 dflags <- getDynFlags
1502 let platform = targetPlatform dflags
1503 return (load_code (getRegisterReg platform False{-no sse2-} reg))
1504
1505 -- dst is a reg, but src could be anything
1506 assignReg_IntCode _ reg src = do
1507 dflags <- getDynFlags
1508 let platform = targetPlatform dflags
1509 code <- getAnyReg src
1510 return (code (getRegisterReg platform False{-no sse2-} reg))
1511
1512
1513 -- Floating point assignment to memory
1514 assignMem_FltCode pk addr src = do
1515 (src_reg, src_code) <- getNonClobberedReg src
1516 Amode addr addr_code <- getAmode addr
1517 use_sse2 <- sse2Enabled
1518 let
1519 code = src_code `appOL`
1520 addr_code `snocOL`
1521 if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr)
1522 else GST pk src_reg addr
1523 return code
1524
1525 -- Floating point assignment to a register/temporary
1526 assignReg_FltCode _ reg src = do
1527 use_sse2 <- sse2Enabled
1528 src_code <- getAnyReg src
1529 dflags <- getDynFlags
1530 let platform = targetPlatform dflags
1531 return (src_code (getRegisterReg platform use_sse2 reg))
1532
1533
1534 genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
1535
1536 genJump (CmmLoad mem _) regs = do
1537 Amode target code <- getAmode mem
1538 return (code `snocOL` JMP (OpAddr target) regs)
1539
1540 genJump (CmmLit lit) regs = do
1541 return (unitOL (JMP (OpImm (litToImm lit)) regs))
1542
1543 genJump expr regs = do
1544 (reg,code) <- getSomeReg expr
1545 return (code `snocOL` JMP (OpReg reg) regs)
1546
1547
1548 -- -----------------------------------------------------------------------------
1549 -- Unconditional branches
1550
1551 genBranch :: BlockId -> NatM InstrBlock
1552 genBranch = return . toOL . mkJumpInstr
1553
1554
1555
1556 -- -----------------------------------------------------------------------------
1557 -- Conditional jumps
1558
1559 {-
1560 Conditional jumps are always to local labels, so we can use branch
1561 instructions. We peek at the arguments to decide what kind of
1562 comparison to do.
1563
1564 I386: First, we have to ensure that the condition
1565 codes are set according to the supplied comparison operation.
1566 -}
1567
1568 genCondJump
1569 :: BlockId -- the branch target
1570 -> CmmExpr -- the condition on which to branch
1571 -> NatM InstrBlock
1572
1573 genCondJump id expr = do
1574 is32Bit <- is32BitPlatform
1575 genCondJump' is32Bit id expr
1576
1577 genCondJump' :: Bool -> BlockId -> CmmExpr -> NatM InstrBlock
1578
1579 -- 64-bit integer comparisons on 32-bit
1580 genCondJump' is32Bit true (CmmMachOp mop [e1,e2])
1581 | is32Bit, Just W64 <- maybeIntComparison mop = do
1582 ChildCode64 code1 r1_lo <- iselExpr64 e1
1583 ChildCode64 code2 r2_lo <- iselExpr64 e2
1584 let r1_hi = getHiVRegFromLo r1_lo
1585 r2_hi = getHiVRegFromLo r2_lo
1586 cond = machOpToCond mop
1587 Just cond' = maybeFlipCond cond
1588 false <- getBlockIdNat
1589 return $ code1 `appOL` code2 `appOL` toOL [
1590 CMP II32 (OpReg r2_hi) (OpReg r1_hi),
1591 JXX cond true,
1592 JXX cond' false,
1593 CMP II32 (OpReg r2_lo) (OpReg r1_lo),
1594 JXX cond true,
1595 NEWBLOCK false ]
1596
1597 genCondJump' _ id bool = do
1598 CondCode is_float cond cond_code <- getCondCode bool
1599 use_sse2 <- sse2Enabled
1600 if not is_float || not use_sse2
1601 then
1602 return (cond_code `snocOL` JXX cond id)
1603 else do
1604 lbl <- getBlockIdNat
1605
1606 -- see comment with condFltReg
1607 let code = case cond of
1608 NE -> or_unordered
1609 GU -> plain_test
1610 GEU -> plain_test
1611 _ -> and_ordered
1612
1613 plain_test = unitOL (
1614 JXX cond id
1615 )
1616 or_unordered = toOL [
1617 JXX cond id,
1618 JXX PARITY id
1619 ]
1620 and_ordered = toOL [
1621 JXX PARITY lbl,
1622 JXX cond id,
1623 JXX ALWAYS lbl,
1624 NEWBLOCK lbl
1625 ]
1626 return (cond_code `appOL` code)
1627
1628 -- -----------------------------------------------------------------------------
1629 -- Generating C calls
1630
1631 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1632 -- @get_arg@, which moves the arguments to the correct registers/stack
1633 -- locations. Apart from that, the code is easy.
1634 --
1635 -- (If applicable) Do not fill the delay slots here; you will confuse the
1636 -- register allocator.
1637
1638 genCCall
1639 :: DynFlags
1640 -> Bool -- 32 bit platform?
1641 -> ForeignTarget -- function to call
1642 -> [CmmFormal] -- where to put the result
1643 -> [CmmActual] -- arguments (of mixed type)
1644 -> NatM InstrBlock
1645
1646 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1647
1648 -- Unroll memcpy calls if the source and destination pointers are at
1649 -- least DWORD aligned and the number of bytes to copy isn't too
1650 -- large. Otherwise, call C's memcpy.
1651 genCCall dflags is32Bit (PrimTarget (MO_Memcpy align)) _
1652 [dst, src, CmmLit (CmmInt n _)]
1653 | fromInteger insns <= maxInlineMemcpyInsns dflags && align .&. 3 == 0 = do
1654 code_dst <- getAnyReg dst
1655 dst_r <- getNewRegNat format
1656 code_src <- getAnyReg src
1657 src_r <- getNewRegNat format
1658 tmp_r <- getNewRegNat format
1659 return $ code_dst dst_r `appOL` code_src src_r `appOL`
1660 go dst_r src_r tmp_r (fromInteger n)
1661 where
1662 -- The number of instructions we will generate (approx). We need 2
1663 -- instructions per move.
1664 insns = 2 * ((n + sizeBytes - 1) `div` sizeBytes)
1665
1666 format = if align .&. 4 /= 0 then II32 else (archWordFormat is32Bit)
1667
1668 -- The size of each move, in bytes.
1669 sizeBytes :: Integer
1670 sizeBytes = fromIntegral (formatInBytes format)
1671
1672 go :: Reg -> Reg -> Reg -> Integer -> OrdList Instr
1673 go dst src tmp i
1674 | i >= sizeBytes =
1675 unitOL (MOV format (OpAddr src_addr) (OpReg tmp)) `appOL`
1676 unitOL (MOV format (OpReg tmp) (OpAddr dst_addr)) `appOL`
1677 go dst src tmp (i - sizeBytes)
1678 -- Deal with remaining bytes.
1679 | i >= 4 = -- Will never happen on 32-bit
1680 unitOL (MOV II32 (OpAddr src_addr) (OpReg tmp)) `appOL`
1681 unitOL (MOV II32 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1682 go dst src tmp (i - 4)
1683 | i >= 2 =
1684 unitOL (MOVZxL II16 (OpAddr src_addr) (OpReg tmp)) `appOL`
1685 unitOL (MOV II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1686 go dst src tmp (i - 2)
1687 | i >= 1 =
1688 unitOL (MOVZxL II8 (OpAddr src_addr) (OpReg tmp)) `appOL`
1689 unitOL (MOV II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1690 go dst src tmp (i - 1)
1691 | otherwise = nilOL
1692 where
1693 src_addr = AddrBaseIndex (EABaseReg src) EAIndexNone
1694 (ImmInteger (n - i))
1695 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1696 (ImmInteger (n - i))
1697
1698 genCCall dflags _ (PrimTarget (MO_Memset align)) _
1699 [dst,
1700 CmmLit (CmmInt c _),
1701 CmmLit (CmmInt n _)]
1702 | fromInteger insns <= maxInlineMemsetInsns dflags && align .&. 3 == 0 = do
1703 code_dst <- getAnyReg dst
1704 dst_r <- getNewRegNat format
1705 return $ code_dst dst_r `appOL` go dst_r (fromInteger n)
1706 where
1707 (format, val) = case align .&. 3 of
1708 2 -> (II16, c2)
1709 0 -> (II32, c4)
1710 _ -> (II8, c)
1711 c2 = c `shiftL` 8 .|. c
1712 c4 = c2 `shiftL` 16 .|. c2
1713
1714 -- The number of instructions we will generate (approx). We need 1
1715 -- instructions per move.
1716 insns = (n + sizeBytes - 1) `div` sizeBytes
1717
1718 -- The size of each move, in bytes.
1719 sizeBytes :: Integer
1720 sizeBytes = fromIntegral (formatInBytes format)
1721
1722 go :: Reg -> Integer -> OrdList Instr
1723 go dst i
1724 -- TODO: Add movabs instruction and support 64-bit sets.
1725 | i >= sizeBytes = -- This might be smaller than the below sizes
1726 unitOL (MOV format (OpImm (ImmInteger val)) (OpAddr dst_addr)) `appOL`
1727 go dst (i - sizeBytes)
1728 | i >= 4 = -- Will never happen on 32-bit
1729 unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr dst_addr)) `appOL`
1730 go dst (i - 4)
1731 | i >= 2 =
1732 unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr dst_addr)) `appOL`
1733 go dst (i - 2)
1734 | i >= 1 =
1735 unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr dst_addr)) `appOL`
1736 go dst (i - 1)
1737 | otherwise = nilOL
1738 where
1739 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1740 (ImmInteger (n - i))
1741
1742 genCCall _ _ (PrimTarget MO_WriteBarrier) _ _ = return nilOL
1743 -- write barrier compiles to no code on x86/x86-64;
1744 -- we keep it this long in order to prevent earlier optimisations.
1745
1746 genCCall _ _ (PrimTarget MO_Touch) _ _ = return nilOL
1747
1748 genCCall _ is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] =
1749 case n of
1750 0 -> genPrefetch src $ PREFETCH NTA format
1751 1 -> genPrefetch src $ PREFETCH Lvl2 format
1752 2 -> genPrefetch src $ PREFETCH Lvl1 format
1753 3 -> genPrefetch src $ PREFETCH Lvl0 format
1754 l -> panic $ "unexpected prefetch level in genCCall MO_Prefetch_Data: " ++ (show l)
1755 -- the c / llvm prefetch convention is 0, 1, 2, and 3
1756 -- the x86 corresponding names are : NTA, 2 , 1, and 0
1757 where
1758 format = archWordFormat is32bit
1759 -- need to know what register width for pointers!
1760 genPrefetch inRegSrc prefetchCTor =
1761 do
1762 code_src <- getAnyReg inRegSrc
1763 src_r <- getNewRegNat format
1764 return $ code_src src_r `appOL`
1765 (unitOL (prefetchCTor (OpAddr
1766 ((AddrBaseIndex (EABaseReg src_r ) EAIndexNone (ImmInt 0)))) ))
1767 -- prefetch always takes an address
1768
1769 genCCall dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] = do
1770 let platform = targetPlatform dflags
1771 let dst_r = getRegisterReg platform False (CmmLocal dst)
1772 case width of
1773 W64 | is32Bit -> do
1774 ChildCode64 vcode rlo <- iselExpr64 src
1775 let dst_rhi = getHiVRegFromLo dst_r
1776 rhi = getHiVRegFromLo rlo
1777 return $ vcode `appOL`
1778 toOL [ MOV II32 (OpReg rlo) (OpReg dst_rhi),
1779 MOV II32 (OpReg rhi) (OpReg dst_r),
1780 BSWAP II32 dst_rhi,
1781 BSWAP II32 dst_r ]
1782 W16 -> do code_src <- getAnyReg src
1783 return $ code_src dst_r `appOL`
1784 unitOL (BSWAP II32 dst_r) `appOL`
1785 unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r))
1786 _ -> do code_src <- getAnyReg src
1787 return $ code_src dst_r `appOL` unitOL (BSWAP format dst_r)
1788 where
1789 format = intFormat width
1790
1791 genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
1792 args@[src] = do
1793 sse4_2 <- sse4_2Enabled
1794 let platform = targetPlatform dflags
1795 if sse4_2
1796 then do code_src <- getAnyReg src
1797 src_r <- getNewRegNat format
1798 let dst_r = getRegisterReg platform False (CmmLocal dst)
1799 return $ code_src src_r `appOL`
1800 (if width == W8 then
1801 -- The POPCNT instruction doesn't take a r/m8
1802 unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL`
1803 unitOL (POPCNT II16 (OpReg src_r) dst_r)
1804 else
1805 unitOL (POPCNT format (OpReg src_r) dst_r)) `appOL`
1806 (if width == W8 || width == W16 then
1807 -- We used a 16-bit destination register above,
1808 -- so zero-extend
1809 unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
1810 else nilOL)
1811 else do
1812 targetExpr <- cmmMakeDynamicReference dflags
1813 CallReference lbl
1814 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1815 [NoHint] [NoHint]
1816 CmmMayReturn)
1817 genCCall dflags is32Bit target dest_regs args
1818 where
1819 format = intFormat width
1820 lbl = mkCmmCodeLabel primPackageKey (fsLit (popCntLabel width))
1821
1822 genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src]
1823 | is32Bit && width == W64 = do
1824 -- Fallback to `hs_clz64` on i386
1825 targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
1826 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1827 [NoHint] [NoHint]
1828 CmmMayReturn)
1829 genCCall dflags is32Bit target dest_regs args
1830
1831 | otherwise = do
1832 code_src <- getAnyReg src
1833 src_r <- getNewRegNat format
1834 tmp_r <- getNewRegNat format
1835 let dst_r = getRegisterReg platform False (CmmLocal dst)
1836
1837 -- The following insn sequence makes sure 'clz 0' has a defined value.
1838 -- starting with Haswell, one could use the LZCNT insn instead.
1839 return $ code_src src_r `appOL` toOL
1840 ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
1841 [ BSR format (OpReg src_r) tmp_r
1842 , MOV II32 (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
1843 , CMOV NE format (OpReg tmp_r) dst_r
1844 , XOR format (OpImm (ImmInt (bw-1))) (OpReg dst_r)
1845 ]) -- NB: We don't need to zero-extend the result for the
1846 -- W8/W16 cases because the 'MOV' insn already
1847 -- took care of implicitly clearing the upper bits
1848 where
1849 bw = widthInBits width
1850 platform = targetPlatform dflags
1851 format = if width == W8 then II16 else intFormat width
1852 lbl = mkCmmCodeLabel primPackageKey (fsLit (clzLabel width))
1853
1854 genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src]
1855 | is32Bit, width == W64 = do
1856 ChildCode64 vcode rlo <- iselExpr64 src
1857 let rhi = getHiVRegFromLo rlo
1858 dst_r = getRegisterReg platform False (CmmLocal dst)
1859 lbl1 <- getBlockIdNat
1860 lbl2 <- getBlockIdNat
1861 tmp_r <- getNewRegNat format
1862
1863 -- The following instruction sequence corresponds to the pseudo-code
1864 --
1865 -- if (src) {
1866 -- dst = src.lo32 ? BSF(src.lo32) : (BSF(src.hi32) + 32);
1867 -- } else {
1868 -- dst = 64;
1869 -- }
1870 return $ vcode `appOL` toOL
1871 ([ MOV II32 (OpReg rhi) (OpReg tmp_r)
1872 , OR II32 (OpReg rlo) (OpReg tmp_r)
1873 , MOV II32 (OpImm (ImmInt 64)) (OpReg dst_r)
1874 , JXX EQQ lbl2
1875 , JXX ALWAYS lbl1
1876
1877 , NEWBLOCK lbl1
1878 , BSF II32 (OpReg rhi) dst_r
1879 , ADD II32 (OpImm (ImmInt 32)) (OpReg dst_r)
1880 , BSF II32 (OpReg rlo) tmp_r
1881 , CMOV NE II32 (OpReg tmp_r) dst_r
1882 , JXX ALWAYS lbl2
1883
1884 , NEWBLOCK lbl2
1885 ])
1886
1887 | otherwise = do
1888 code_src <- getAnyReg src
1889 src_r <- getNewRegNat format
1890 tmp_r <- getNewRegNat format
1891 let dst_r = getRegisterReg platform False (CmmLocal dst)
1892
1893 -- The following insn sequence makes sure 'ctz 0' has a defined value.
1894 -- starting with Haswell, one could use the TZCNT insn instead.
1895 return $ code_src src_r `appOL` toOL
1896 ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
1897 [ BSF format (OpReg src_r) tmp_r
1898 , MOV II32 (OpImm (ImmInt bw)) (OpReg dst_r)
1899 , CMOV NE format (OpReg tmp_r) dst_r
1900 ]) -- NB: We don't need to zero-extend the result for the
1901 -- W8/W16 cases because the 'MOV' insn already
1902 -- took care of implicitly clearing the upper bits
1903 where
1904 bw = widthInBits width
1905 platform = targetPlatform dflags
1906 format = if width == W8 then II16 else intFormat width
1907
1908 genCCall dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args = do
1909 targetExpr <- cmmMakeDynamicReference dflags
1910 CallReference lbl
1911 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1912 [NoHint] [NoHint]
1913 CmmMayReturn)
1914 genCCall dflags is32Bit target dest_regs args
1915 where
1916 lbl = mkCmmCodeLabel primPackageKey (fsLit (word2FloatLabel width))
1917
1918 genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = do
1919 Amode amode addr_code <-
1920 if amop `elem` [AMO_Add, AMO_Sub]
1921 then getAmode addr
1922 else getSimpleAmode dflags is32Bit addr -- See genCCall for MO_Cmpxchg
1923 arg <- getNewRegNat format
1924 arg_code <- getAnyReg n
1925 use_sse2 <- sse2Enabled
1926 let platform = targetPlatform dflags
1927 dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
1928 code <- op_code dst_r arg amode
1929 return $ addr_code `appOL` arg_code arg `appOL` code
1930 where
1931 -- Code for the operation
1932 op_code :: Reg -- Destination reg
1933 -> Reg -- Register containing argument
1934 -> AddrMode -- Address of location to mutate
1935 -> NatM (OrdList Instr)
1936 op_code dst_r arg amode = case amop of
1937 -- In the common case where dst_r is a virtual register the
1938 -- final move should go away, because it's the last use of arg
1939 -- and the first use of dst_r.
1940 AMO_Add -> return $ toOL [ LOCK (XADD format (OpReg arg) (OpAddr amode))
1941 , MOV format (OpReg arg) (OpReg dst_r)
1942 ]
1943 AMO_Sub -> return $ toOL [ NEGI format (OpReg arg)
1944 , LOCK (XADD format (OpReg arg) (OpAddr amode))
1945 , MOV format (OpReg arg) (OpReg dst_r)
1946 ]
1947 AMO_And -> cmpxchg_code (\ src dst -> unitOL $ AND format src dst)
1948 AMO_Nand -> cmpxchg_code (\ src dst -> toOL [ AND format src dst
1949 , NOT format dst
1950 ])
1951 AMO_Or -> cmpxchg_code (\ src dst -> unitOL $ OR format src dst)
1952 AMO_Xor -> cmpxchg_code (\ src dst -> unitOL $ XOR format src dst)
1953 where
1954 -- Simulate operation that lacks a dedicated instruction using
1955 -- cmpxchg.
1956 cmpxchg_code :: (Operand -> Operand -> OrdList Instr)
1957 -> NatM (OrdList Instr)
1958 cmpxchg_code instrs = do
1959 lbl <- getBlockIdNat
1960 tmp <- getNewRegNat format
1961 return $ toOL
1962 [ MOV format (OpAddr amode) (OpReg eax)
1963 , JXX ALWAYS lbl
1964 , NEWBLOCK lbl
1965 -- Keep old value so we can return it:
1966 , MOV format (OpReg eax) (OpReg dst_r)
1967 , MOV format (OpReg eax) (OpReg tmp)
1968 ]
1969 `appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL
1970 [ LOCK (CMPXCHG format (OpReg tmp) (OpAddr amode))
1971 , JXX NE lbl
1972 ]
1973
1974 format = intFormat width
1975
1976 genCCall dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] = do
1977 load_code <- intLoadCode (MOV (intFormat width)) addr
1978 let platform = targetPlatform dflags
1979 use_sse2 <- sse2Enabled
1980 return (load_code (getRegisterReg platform use_sse2 (CmmLocal dst)))
1981
1982 genCCall _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
1983 code <- assignMem_IntCode (intFormat width) addr val
1984 return $ code `snocOL` MFENCE
1985
1986 genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = do
1987 -- On x86 we don't have enough registers to use cmpxchg with a
1988 -- complicated addressing mode, so on that architecture we
1989 -- pre-compute the address first.
1990 Amode amode addr_code <- getSimpleAmode dflags is32Bit addr
1991 newval <- getNewRegNat format
1992 newval_code <- getAnyReg new
1993 oldval <- getNewRegNat format
1994 oldval_code <- getAnyReg old
1995 use_sse2 <- sse2Enabled
1996 let platform = targetPlatform dflags
1997 dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
1998 code = toOL
1999 [ MOV format (OpReg oldval) (OpReg eax)
2000 , LOCK (CMPXCHG format (OpReg newval) (OpAddr amode))
2001 , MOV format (OpReg eax) (OpReg dst_r)
2002 ]
2003 return $ addr_code `appOL` newval_code newval `appOL` oldval_code oldval
2004 `appOL` code
2005 where
2006 format = intFormat width
2007
2008 genCCall _ is32Bit target dest_regs args = do
2009 dflags <- getDynFlags
2010 let platform = targetPlatform dflags
2011 case (target, dest_regs) of
2012 -- void return type prim op
2013 (PrimTarget op, []) ->
2014 outOfLineCmmOp op Nothing args
2015 -- we only cope with a single result for foreign calls
2016 (PrimTarget op, [r])
2017 | not is32Bit -> outOfLineCmmOp op (Just r) args
2018 | otherwise -> do
2019 l1 <- getNewLabelNat
2020 l2 <- getNewLabelNat
2021 sse2 <- sse2Enabled
2022 if sse2
2023 then
2024 outOfLineCmmOp op (Just r) args
2025 else case op of
2026 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
2027 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
2028
2029 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
2030 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
2031
2032 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
2033 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
2034
2035 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
2036 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
2037
2038 _other_op -> outOfLineCmmOp op (Just r) args
2039
2040 where
2041 actuallyInlineFloatOp instr format [x]
2042 = do res <- trivialUFCode format (instr format) x
2043 any <- anyReg res
2044 return (any (getRegisterReg platform False (CmmLocal r)))
2045
2046 actuallyInlineFloatOp _ _ args
2047 = panic $ "genCCall.actuallyInlineFloatOp: bad number of arguments! ("
2048 ++ show (length args) ++ ")"
2049
2050 (PrimTarget (MO_S_QuotRem width), _) -> divOp1 platform True width dest_regs args
2051 (PrimTarget (MO_U_QuotRem width), _) -> divOp1 platform False width dest_regs args
2052 (PrimTarget (MO_U_QuotRem2 width), _) -> divOp2 platform False width dest_regs args
2053 (PrimTarget (MO_Add2 width), [res_h, res_l]) ->
2054 case args of
2055 [arg_x, arg_y] ->
2056 do hCode <- getAnyReg (CmmLit (CmmInt 0 width))
2057 let format = intFormat width
2058 lCode <- anyReg =<< trivialCode width (ADD_CC format)
2059 (Just (ADD_CC format)) arg_x arg_y
2060 let reg_l = getRegisterReg platform True (CmmLocal res_l)
2061 reg_h = getRegisterReg platform True (CmmLocal res_h)
2062 code = hCode reg_h `appOL`
2063 lCode reg_l `snocOL`
2064 ADC format (OpImm (ImmInteger 0)) (OpReg reg_h)
2065 return code
2066 _ -> panic "genCCall: Wrong number of arguments/results for add2"
2067 (PrimTarget (MO_AddIntC width), [res_r, res_c]) ->
2068 addSubIntC platform ADD_CC (Just . ADD_CC) width res_r res_c args
2069 (PrimTarget (MO_SubIntC width), [res_r, res_c]) ->
2070 addSubIntC platform SUB_CC (const Nothing) width res_r res_c args
2071 (PrimTarget (MO_U_Mul2 width), [res_h, res_l]) ->
2072 case args of
2073 [arg_x, arg_y] ->
2074 do (y_reg, y_code) <- getRegOrMem arg_y
2075 x_code <- getAnyReg arg_x
2076 let format = intFormat width
2077 reg_h = getRegisterReg platform True (CmmLocal res_h)
2078 reg_l = getRegisterReg platform True (CmmLocal res_l)
2079 code = y_code `appOL`
2080 x_code rax `appOL`
2081 toOL [MUL2 format y_reg,
2082 MOV format (OpReg rdx) (OpReg reg_h),
2083 MOV format (OpReg rax) (OpReg reg_l)]
2084 return code
2085 _ -> panic "genCCall: Wrong number of arguments/results for add2"
2086
2087 _ -> if is32Bit
2088 then genCCall32' dflags target dest_regs args
2089 else genCCall64' dflags target dest_regs args
2090
2091 where divOp1 platform signed width results [arg_x, arg_y]
2092 = divOp platform signed width results Nothing arg_x arg_y
2093 divOp1 _ _ _ _ _
2094 = panic "genCCall: Wrong number of arguments for divOp1"
2095 divOp2 platform signed width results [arg_x_high, arg_x_low, arg_y]
2096 = divOp platform signed width results (Just arg_x_high) arg_x_low arg_y
2097 divOp2 _ _ _ _ _
2098 = panic "genCCall: Wrong number of arguments for divOp2"
2099 divOp platform signed width [res_q, res_r]
2100 m_arg_x_high arg_x_low arg_y
2101 = do let format = intFormat width
2102 reg_q = getRegisterReg platform True (CmmLocal res_q)
2103 reg_r = getRegisterReg platform True (CmmLocal res_r)
2104 widen | signed = CLTD format
2105 | otherwise = XOR format (OpReg rdx) (OpReg rdx)
2106 instr | signed = IDIV
2107 | otherwise = DIV
2108 (y_reg, y_code) <- getRegOrMem arg_y
2109 x_low_code <- getAnyReg arg_x_low
2110 x_high_code <- case m_arg_x_high of
2111 Just arg_x_high ->
2112 getAnyReg arg_x_high
2113 Nothing ->
2114 return $ const $ unitOL widen
2115 return $ y_code `appOL`
2116 x_low_code rax `appOL`
2117 x_high_code rdx `appOL`
2118 toOL [instr format y_reg,
2119 MOV format (OpReg rax) (OpReg reg_q),
2120 MOV format (OpReg rdx) (OpReg reg_r)]
2121 divOp _ _ _ _ _ _ _
2122 = panic "genCCall: Wrong number of results for divOp"
2123
2124 addSubIntC platform instr mrevinstr width res_r res_c [arg_x, arg_y]
2125 = do let format = intFormat width
2126 rCode <- anyReg =<< trivialCode width (instr format)
2127 (mrevinstr format) arg_x arg_y
2128 reg_tmp <- getNewRegNat II8
2129 let reg_c = getRegisterReg platform True (CmmLocal res_c)
2130 reg_r = getRegisterReg platform True (CmmLocal res_r)
2131 code = rCode reg_r `snocOL`
2132 SETCC OFLO (OpReg reg_tmp) `snocOL`
2133 MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c)
2134 return code
2135 addSubIntC _ _ _ _ _ _ _
2136 = panic "genCCall: Wrong number of arguments/results for addSubIntC"
2137
2138 genCCall32' :: DynFlags
2139 -> ForeignTarget -- function to call
2140 -> [CmmFormal] -- where to put the result
2141 -> [CmmActual] -- arguments (of mixed type)
2142 -> NatM InstrBlock
2143 genCCall32' dflags target dest_regs args = do
2144 let
2145 prom_args = map (maybePromoteCArg dflags W32) args
2146
2147 -- Align stack to 16n for calls, assuming a starting stack
2148 -- alignment of 16n - word_size on procedure entry. Which we
2149 -- maintiain. See Note [rts/StgCRun.c : Stack Alignment on X86]
2150 sizes = map (arg_size . cmmExprType dflags) (reverse args)
2151 raw_arg_size = sum sizes + wORD_SIZE dflags
2152 arg_pad_size = (roundTo 16 $ raw_arg_size) - raw_arg_size
2153 tot_arg_size = raw_arg_size + arg_pad_size - wORD_SIZE dflags
2154 delta0 <- getDeltaNat
2155 setDeltaNat (delta0 - arg_pad_size)
2156
2157 use_sse2 <- sse2Enabled
2158 push_codes <- mapM (push_arg use_sse2) (reverse prom_args)
2159 delta <- getDeltaNat
2160 MASSERT(delta == delta0 - tot_arg_size)
2161
2162 -- deal with static vs dynamic call targets
2163 (callinsns,cconv) <-
2164 case target of
2165 ForeignTarget (CmmLit (CmmLabel lbl)) conv
2166 -> -- ToDo: stdcall arg sizes
2167 return (unitOL (CALL (Left fn_imm) []), conv)
2168 where fn_imm = ImmCLbl lbl
2169 ForeignTarget expr conv
2170 -> do { (dyn_r, dyn_c) <- getSomeReg expr
2171 ; ASSERT( isWord32 (cmmExprType dflags expr) )
2172 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
2173 PrimTarget _
2174 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
2175 ++ "probably because too many return values."
2176
2177 let push_code
2178 | arg_pad_size /= 0
2179 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2180 DELTA (delta0 - arg_pad_size)]
2181 `appOL` concatOL push_codes
2182 | otherwise
2183 = concatOL push_codes
2184
2185 -- Deallocate parameters after call for ccall;
2186 -- but not for stdcall (callee does it)
2187 --
2188 -- We have to pop any stack padding we added
2189 -- even if we are doing stdcall, though (#5052)
2190 pop_size
2191 | ForeignConvention StdCallConv _ _ _ <- cconv = arg_pad_size
2192 | otherwise = tot_arg_size
2193
2194 call = callinsns `appOL`
2195 toOL (
2196 (if pop_size==0 then [] else
2197 [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)])
2198 ++
2199 [DELTA delta0]
2200 )
2201 setDeltaNat delta0
2202
2203 dflags <- getDynFlags
2204 let platform = targetPlatform dflags
2205
2206 let
2207 -- assign the results, if necessary
2208 assign_code [] = nilOL
2209 assign_code [dest]
2210 | isFloatType ty =
2211 if use_sse2
2212 then let tmp_amode = AddrBaseIndex (EABaseReg esp)
2213 EAIndexNone
2214 (ImmInt 0)
2215 fmt = floatFormat w
2216 in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
2217 DELTA (delta0 - b),
2218 GST fmt fake0 tmp_amode,
2219 MOV fmt (OpAddr tmp_amode) (OpReg r_dest),
2220 ADD II32 (OpImm (ImmInt b)) (OpReg esp),
2221 DELTA delta0]
2222 else unitOL (GMOV fake0 r_dest)
2223 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
2224 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
2225 | otherwise = unitOL (MOV (intFormat w)
2226 (OpReg eax)
2227 (OpReg r_dest))
2228 where
2229 ty = localRegType dest
2230 w = typeWidth ty
2231 b = widthInBytes w
2232 r_dest_hi = getHiVRegFromLo r_dest
2233 r_dest = getRegisterReg platform use_sse2 (CmmLocal dest)
2234 assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
2235
2236 return (push_code `appOL`
2237 call `appOL`
2238 assign_code dest_regs)
2239
2240 where
2241 arg_size :: CmmType -> Int -- Width in bytes
2242 arg_size ty = widthInBytes (typeWidth ty)
2243
2244 roundTo a x | x `mod` a == 0 = x
2245 | otherwise = x + a - (x `mod` a)
2246
2247 push_arg :: Bool -> CmmActual {-current argument-}
2248 -> NatM InstrBlock -- code
2249
2250 push_arg use_sse2 arg -- we don't need the hints on x86
2251 | isWord64 arg_ty = do
2252 ChildCode64 code r_lo <- iselExpr64 arg
2253 delta <- getDeltaNat
2254 setDeltaNat (delta - 8)
2255 let
2256 r_hi = getHiVRegFromLo r_lo
2257 return ( code `appOL`
2258 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
2259 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
2260 DELTA (delta-8)]
2261 )
2262
2263 | isFloatType arg_ty = do
2264 (reg, code) <- getSomeReg arg
2265 delta <- getDeltaNat
2266 setDeltaNat (delta-size)
2267 return (code `appOL`
2268 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
2269 DELTA (delta-size),
2270 let addr = AddrBaseIndex (EABaseReg esp)
2271 EAIndexNone
2272 (ImmInt 0)
2273 format = floatFormat (typeWidth arg_ty)
2274 in
2275 if use_sse2
2276 then MOV format (OpReg reg) (OpAddr addr)
2277 else GST format reg addr
2278 ]
2279 )
2280
2281 | otherwise = do
2282 (operand, code) <- getOperand arg
2283 delta <- getDeltaNat
2284 setDeltaNat (delta-size)
2285 return (code `snocOL`
2286 PUSH II32 operand `snocOL`
2287 DELTA (delta-size))
2288
2289 where
2290 arg_ty = cmmExprType dflags arg
2291 size = arg_size arg_ty -- Byte size
2292
2293 genCCall64' :: DynFlags
2294 -> ForeignTarget -- function to call
2295 -> [CmmFormal] -- where to put the result
2296 -> [CmmActual] -- arguments (of mixed type)
2297 -> NatM InstrBlock
2298 genCCall64' dflags target dest_regs args = do
2299 -- load up the register arguments
2300 let prom_args = map (maybePromoteCArg dflags W32) args
2301
2302 (stack_args, int_regs_used, fp_regs_used, load_args_code)
2303 <-
2304 if platformOS platform == OSMinGW32
2305 then load_args_win prom_args [] [] (allArgRegs platform) nilOL
2306 else do (stack_args, aregs, fregs, load_args_code)
2307 <- load_args prom_args (allIntArgRegs platform) (allFPArgRegs platform) nilOL
2308 let fp_regs_used = reverse (drop (length fregs) (reverse (allFPArgRegs platform)))
2309 int_regs_used = reverse (drop (length aregs) (reverse (allIntArgRegs platform)))
2310 return (stack_args, int_regs_used, fp_regs_used, load_args_code)
2311
2312 let
2313 arg_regs_used = int_regs_used ++ fp_regs_used
2314 arg_regs = [eax] ++ arg_regs_used
2315 -- for annotating the call instruction with
2316 sse_regs = length fp_regs_used
2317 arg_stack_slots = if platformOS platform == OSMinGW32
2318 then length stack_args + length (allArgRegs platform)
2319 else length stack_args
2320 tot_arg_size = arg_size * arg_stack_slots
2321
2322
2323 -- Align stack to 16n for calls, assuming a starting stack
2324 -- alignment of 16n - word_size on procedure entry. Which we
2325 -- maintain. See Note [rts/StgCRun.c : Stack Alignment on X86]
2326 (real_size, adjust_rsp) <-
2327 if (tot_arg_size + wORD_SIZE dflags) `rem` 16 == 0
2328 then return (tot_arg_size, nilOL)
2329 else do -- we need to adjust...
2330 delta <- getDeltaNat
2331 setDeltaNat (delta - wORD_SIZE dflags)
2332 return (tot_arg_size + wORD_SIZE dflags, toOL [
2333 SUB II64 (OpImm (ImmInt (wORD_SIZE dflags))) (OpReg rsp),
2334 DELTA (delta - wORD_SIZE dflags) ])
2335
2336 -- push the stack args, right to left
2337 push_code <- push_args (reverse stack_args) nilOL
2338 -- On Win64, we also have to leave stack space for the arguments
2339 -- that we are passing in registers
2340 lss_code <- if platformOS platform == OSMinGW32
2341 then leaveStackSpace (length (allArgRegs platform))
2342 else return nilOL
2343 delta <- getDeltaNat
2344
2345 -- deal with static vs dynamic call targets
2346 (callinsns,_cconv) <-
2347 case target of
2348 ForeignTarget (CmmLit (CmmLabel lbl)) conv
2349 -> -- ToDo: stdcall arg sizes
2350 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
2351 where fn_imm = ImmCLbl lbl
2352 ForeignTarget expr conv
2353 -> do (dyn_r, dyn_c) <- getSomeReg expr
2354 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
2355 PrimTarget _
2356 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
2357 ++ "probably because too many return values."
2358
2359 let
2360 -- The x86_64 ABI requires us to set %al to the number of SSE2
2361 -- registers that contain arguments, if the called routine
2362 -- is a varargs function. We don't know whether it's a
2363 -- varargs function or not, so we have to assume it is.
2364 --
2365 -- It's not safe to omit this assignment, even if the number
2366 -- of SSE2 regs in use is zero. If %al is larger than 8
2367 -- on entry to a varargs function, seg faults ensue.
2368 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
2369
2370 let call = callinsns `appOL`
2371 toOL (
2372 -- Deallocate parameters after call for ccall;
2373 -- stdcall has callee do it, but is not supported on
2374 -- x86_64 target (see #3336)
2375 (if real_size==0 then [] else
2376 [ADD (intFormat (wordWidth dflags)) (OpImm (ImmInt real_size)) (OpReg esp)])
2377 ++
2378 [DELTA (delta + real_size)]
2379 )
2380 setDeltaNat (delta + real_size)
2381
2382 let
2383 -- assign the results, if necessary
2384 assign_code [] = nilOL
2385 assign_code [dest] =
2386 case typeWidth rep of
2387 W32 | isFloatType rep -> unitOL (MOV (floatFormat W32)
2388 (OpReg xmm0)
2389 (OpReg r_dest))
2390 W64 | isFloatType rep -> unitOL (MOV (floatFormat W64)
2391 (OpReg xmm0)
2392 (OpReg r_dest))
2393 _ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest))
2394 where
2395 rep = localRegType dest
2396 r_dest = getRegisterReg platform True (CmmLocal dest)
2397 assign_code _many = panic "genCCall.assign_code many"
2398
2399 return (load_args_code `appOL`
2400 adjust_rsp `appOL`
2401 push_code `appOL`
2402 lss_code `appOL`
2403 assign_eax sse_regs `appOL`
2404 call `appOL`
2405 assign_code dest_regs)
2406
2407 where platform = targetPlatform dflags
2408 arg_size = 8 -- always, at the mo
2409
2410 load_args :: [CmmExpr]
2411 -> [Reg] -- int regs avail for args
2412 -> [Reg] -- FP regs avail for args
2413 -> InstrBlock
2414 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock)
2415 load_args args [] [] code = return (args, [], [], code)
2416 -- no more regs to use
2417 load_args [] aregs fregs code = return ([], aregs, fregs, code)
2418 -- no more args to push
2419 load_args (arg : rest) aregs fregs code
2420 | isFloatType arg_rep =
2421 case fregs of
2422 [] -> push_this_arg
2423 (r:rs) -> do
2424 arg_code <- getAnyReg arg
2425 load_args rest aregs rs (code `appOL` arg_code r)
2426 | otherwise =
2427 case aregs of
2428 [] -> push_this_arg
2429 (r:rs) -> do
2430 arg_code <- getAnyReg arg
2431 load_args rest rs fregs (code `appOL` arg_code r)
2432 where
2433 arg_rep = cmmExprType dflags arg
2434
2435 push_this_arg = do
2436 (args',ars,frs,code') <- load_args rest aregs fregs code
2437 return (arg:args', ars, frs, code')
2438
2439 load_args_win :: [CmmExpr]
2440 -> [Reg] -- used int regs
2441 -> [Reg] -- used FP regs
2442 -> [(Reg, Reg)] -- (int, FP) regs avail for args
2443 -> InstrBlock
2444 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock)
2445 load_args_win args usedInt usedFP [] code
2446 = return (args, usedInt, usedFP, code)
2447 -- no more regs to use
2448 load_args_win [] usedInt usedFP _ code
2449 = return ([], usedInt, usedFP, code)
2450 -- no more args to push
2451 load_args_win (arg : rest) usedInt usedFP
2452 ((ireg, freg) : regs) code
2453 | isFloatType arg_rep = do
2454 arg_code <- getAnyReg arg
2455 load_args_win rest (ireg : usedInt) (freg : usedFP) regs
2456 (code `appOL`
2457 arg_code freg `snocOL`
2458 -- If we are calling a varargs function
2459 -- then we need to define ireg as well
2460 -- as freg
2461 MOV II64 (OpReg freg) (OpReg ireg))
2462 | otherwise = do
2463 arg_code <- getAnyReg arg
2464 load_args_win rest (ireg : usedInt) usedFP regs
2465 (code `appOL` arg_code ireg)
2466 where
2467 arg_rep = cmmExprType dflags arg
2468
2469 push_args [] code = return code
2470 push_args (arg:rest) code
2471 | isFloatType arg_rep = do
2472 (arg_reg, arg_code) <- getSomeReg arg
2473 delta <- getDeltaNat
2474 setDeltaNat (delta-arg_size)
2475 let code' = code `appOL` arg_code `appOL` toOL [
2476 SUB (intFormat (wordWidth dflags)) (OpImm (ImmInt arg_size)) (OpReg rsp),
2477 DELTA (delta-arg_size),
2478 MOV (floatFormat width) (OpReg arg_reg) (OpAddr (spRel dflags 0))]
2479 push_args rest code'
2480
2481 | otherwise = do
2482 ASSERT(width == W64) return ()
2483 (arg_op, arg_code) <- getOperand arg
2484 delta <- getDeltaNat
2485 setDeltaNat (delta-arg_size)
2486 let code' = code `appOL` arg_code `appOL` toOL [
2487 PUSH II64 arg_op,
2488 DELTA (delta-arg_size)]
2489 push_args rest code'
2490 where
2491 arg_rep = cmmExprType dflags arg
2492 width = typeWidth arg_rep
2493
2494 leaveStackSpace n = do
2495 delta <- getDeltaNat
2496 setDeltaNat (delta - n * arg_size)
2497 return $ toOL [
2498 SUB II64 (OpImm (ImmInt (n * wORD_SIZE dflags))) (OpReg rsp),
2499 DELTA (delta - n * arg_size)]
2500
2501 maybePromoteCArg :: DynFlags -> Width -> CmmExpr -> CmmExpr
2502 maybePromoteCArg dflags wto arg
2503 | wfrom < wto = CmmMachOp (MO_UU_Conv wfrom wto) [arg]
2504 | otherwise = arg
2505 where
2506 wfrom = cmmExprWidth dflags arg
2507
2508 outOfLineCmmOp :: CallishMachOp -> Maybe CmmFormal -> [CmmActual] -> NatM InstrBlock
2509 outOfLineCmmOp mop res args
2510 = do
2511 dflags <- getDynFlags
2512 targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
2513 let target = ForeignTarget targetExpr
2514 (ForeignConvention CCallConv [] [] CmmMayReturn)
2515
2516 stmtToInstrs (CmmUnsafeForeignCall target (catMaybes [res]) args)
2517 where
2518 -- Assume we can call these functions directly, and that they're not in a dynamic library.
2519 -- TODO: Why is this ok? Under linux this code will be in libm.so
2520 -- Is is because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31
2521 lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
2522
2523 fn = case mop of
2524 MO_F32_Sqrt -> fsLit "sqrtf"
2525 MO_F32_Sin -> fsLit "sinf"
2526 MO_F32_Cos -> fsLit "cosf"
2527 MO_F32_Tan -> fsLit "tanf"
2528 MO_F32_Exp -> fsLit "expf"
2529 MO_F32_Log -> fsLit "logf"
2530
2531 MO_F32_Asin -> fsLit "asinf"
2532 MO_F32_Acos -> fsLit "acosf"
2533 MO_F32_Atan -> fsLit "atanf"
2534
2535 MO_F32_Sinh -> fsLit "sinhf"
2536 MO_F32_Cosh -> fsLit "coshf"
2537 MO_F32_Tanh -> fsLit "tanhf"
2538 MO_F32_Pwr -> fsLit "powf"
2539
2540 MO_F64_Sqrt -> fsLit "sqrt"
2541 MO_F64_Sin -> fsLit "sin"
2542 MO_F64_Cos -> fsLit "cos"
2543 MO_F64_Tan -> fsLit "tan"
2544 MO_F64_Exp -> fsLit "exp"
2545 MO_F64_Log -> fsLit "log"
2546
2547 MO_F64_Asin -> fsLit "asin"
2548 MO_F64_Acos -> fsLit "acos"
2549 MO_F64_Atan -> fsLit "atan"
2550
2551 MO_F64_Sinh -> fsLit "sinh"
2552 MO_F64_Cosh -> fsLit "cosh"
2553 MO_F64_Tanh -> fsLit "tanh"
2554 MO_F64_Pwr -> fsLit "pow"
2555
2556 MO_Memcpy _ -> fsLit "memcpy"
2557 MO_Memset _ -> fsLit "memset"
2558 MO_Memmove _ -> fsLit "memmove"
2559
2560 MO_PopCnt _ -> fsLit "popcnt"
2561 MO_BSwap _ -> fsLit "bswap"
2562 MO_Clz w -> fsLit $ clzLabel w
2563 MO_Ctz _ -> unsupported
2564
2565 MO_AtomicRMW _ _ -> fsLit "atomicrmw"
2566 MO_AtomicRead _ -> fsLit "atomicread"
2567 MO_AtomicWrite _ -> fsLit "atomicwrite"
2568 MO_Cmpxchg _ -> fsLit "cmpxchg"
2569
2570 MO_UF_Conv _ -> unsupported
2571
2572 MO_S_QuotRem {} -> unsupported
2573 MO_U_QuotRem {} -> unsupported
2574 MO_U_QuotRem2 {} -> unsupported
2575 MO_Add2 {} -> unsupported
2576 MO_AddIntC {} -> unsupported
2577 MO_SubIntC {} -> unsupported
2578 MO_U_Mul2 {} -> unsupported
2579 MO_WriteBarrier -> unsupported
2580 MO_Touch -> unsupported
2581 (MO_Prefetch_Data _ ) -> unsupported
2582 unsupported = panic ("outOfLineCmmOp: " ++ show mop
2583 ++ " not supported here")
2584
2585 -- -----------------------------------------------------------------------------
2586 -- Generating a table-branch
2587
2588 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
2589
2590 genSwitch dflags expr targets
2591 | gopt Opt_PIC dflags
2592 = do
2593 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2594 lbl <- getNewLabelNat
2595 dflags <- getDynFlags
2596 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2597 (tableReg,t_code) <- getSomeReg $ dynRef
2598 let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
2599 (EAIndex reg (wORD_SIZE dflags)) (ImmInt 0))
2600
2601 return $ if target32Bit (targetPlatform dflags)
2602 then e_code `appOL` t_code `appOL` toOL [
2603 ADD (intFormat (wordWidth dflags)) op (OpReg tableReg),
2604 JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
2605 ]
2606 else case platformOS (targetPlatform dflags) of
2607 OSDarwin ->
2608 -- on Mac OS X/x86_64, put the jump table
2609 -- in the text section to work around a
2610 -- limitation of the linker.
2611 -- ld64 is unable to handle the relocations for
2612 -- .quad L1 - L0
2613 -- if L0 is not preceded by a non-anonymous
2614 -- label in its section.
2615 e_code `appOL` t_code `appOL` toOL [
2616 ADD (intFormat (wordWidth dflags)) op (OpReg tableReg),
2617 JMP_TBL (OpReg tableReg) ids Text lbl
2618 ]
2619 _ ->
2620 -- HACK: On x86_64 binutils<2.17 is only able
2621 -- to generate PC32 relocations, hence we only
2622 -- get 32-bit offsets in the jump table. As
2623 -- these offsets are always negative we need
2624 -- to properly sign extend them to 64-bit.
2625 -- This hack should be removed in conjunction
2626 -- with the hack in PprMach.hs/pprDataItem
2627 -- once binutils 2.17 is standard.
2628 e_code `appOL` t_code `appOL` toOL [
2629 MOVSxL II32 op (OpReg reg),
2630 ADD (intFormat (wordWidth dflags)) (OpReg reg) (OpReg tableReg),
2631 JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
2632 ]
2633 | otherwise
2634 = do
2635 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2636 lbl <- getNewLabelNat
2637 let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg (wORD_SIZE dflags)) (ImmCLbl lbl))
2638 code = e_code `appOL` toOL [
2639 JMP_TBL op ids ReadOnlyData lbl
2640 ]
2641 return code
2642 where (offset, ids) = switchTargetsToTable targets
2643
2644 generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl (Alignment, CmmStatics) Instr)
2645 generateJumpTableForInstr dflags (JMP_TBL _ ids section lbl)
2646 = Just (createJumpTable dflags ids section lbl)
2647 generateJumpTableForInstr _ _ = Nothing
2648
2649 createJumpTable :: DynFlags -> [Maybe BlockId] -> Section -> CLabel
2650 -> GenCmmDecl (Alignment, CmmStatics) h g
2651 createJumpTable dflags ids section lbl
2652 = let jumpTable
2653 | gopt Opt_PIC dflags =
2654 let jumpTableEntryRel Nothing
2655 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
2656 jumpTableEntryRel (Just blockid)
2657 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
2658 where blockLabel = mkAsmTempLabel (getUnique blockid)
2659 in map jumpTableEntryRel ids
2660 | otherwise = map (jumpTableEntry dflags) ids
2661 in CmmData section (1, Statics lbl jumpTable)
2662
2663 -- -----------------------------------------------------------------------------
2664 -- 'condIntReg' and 'condFltReg': condition codes into registers
2665
2666 -- Turn those condition codes into integers now (when they appear on
2667 -- the right hand side of an assignment).
2668 --
2669 -- (If applicable) Do not fill the delay slots here; you will confuse the
2670 -- register allocator.
2671
2672 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2673
2674 condIntReg cond x y = do
2675 CondCode _ cond cond_code <- condIntCode cond x y
2676 tmp <- getNewRegNat II8
2677 let
2678 code dst = cond_code `appOL` toOL [
2679 SETCC cond (OpReg tmp),
2680 MOVZxL II8 (OpReg tmp) (OpReg dst)
2681 ]
2682 return (Any II32 code)
2683
2684
2685
2686 condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
2687 condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
2688 where
2689 condFltReg_x87 = do
2690 CondCode _ cond cond_code <- condFltCode cond x y
2691 tmp <- getNewRegNat II8
2692 let
2693 code dst = cond_code `appOL` toOL [
2694 SETCC cond (OpReg tmp),
2695 MOVZxL II8 (OpReg tmp) (OpReg dst)
2696 ]
2697 return (Any II32 code)
2698
2699 condFltReg_sse2 = do
2700 CondCode _ cond cond_code <- condFltCode cond x y
2701 tmp1 <- getNewRegNat (archWordFormat is32Bit)
2702 tmp2 <- getNewRegNat (archWordFormat is32Bit)
2703 let
2704 -- We have to worry about unordered operands (eg. comparisons
2705 -- against NaN). If the operands are unordered, the comparison
2706 -- sets the parity flag, carry flag and zero flag.
2707 -- All comparisons are supposed to return false for unordered
2708 -- operands except for !=, which returns true.
2709 --
2710 -- Optimisation: we don't have to test the parity flag if we
2711 -- know the test has already excluded the unordered case: eg >
2712 -- and >= test for a zero carry flag, which can only occur for
2713 -- ordered operands.
2714 --
2715 -- ToDo: by reversing comparisons we could avoid testing the
2716 -- parity flag in more cases.
2717
2718 code dst =
2719 cond_code `appOL`
2720 (case cond of
2721 NE -> or_unordered dst
2722 GU -> plain_test dst
2723 GEU -> plain_test dst
2724 _ -> and_ordered dst)
2725
2726 plain_test dst = toOL [
2727 SETCC cond (OpReg tmp1),
2728 MOVZxL II8 (OpReg tmp1) (OpReg dst)
2729 ]
2730 or_unordered dst = toOL [
2731 SETCC cond (OpReg tmp1),
2732 SETCC PARITY (OpReg tmp2),
2733 OR II8 (OpReg tmp1) (OpReg tmp2),
2734 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2735 ]
2736 and_ordered dst = toOL [
2737 SETCC cond (OpReg tmp1),
2738 SETCC NOTPARITY (OpReg tmp2),
2739 AND II8 (OpReg tmp1) (OpReg tmp2),
2740 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2741 ]
2742 return (Any II32 code)
2743
2744
2745 -- -----------------------------------------------------------------------------
2746 -- 'trivial*Code': deal with trivial instructions
2747
2748 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2749 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2750 -- Only look for constants on the right hand side, because that's
2751 -- where the generic optimizer will have put them.
2752
2753 -- Similarly, for unary instructions, we don't have to worry about
2754 -- matching an StInt as the argument, because genericOpt will already
2755 -- have handled the constant-folding.
2756
2757
2758 {-
2759 The Rules of the Game are:
2760
2761 * You cannot assume anything about the destination register dst;
2762 it may be anything, including a fixed reg.
2763
2764 * You may compute an operand into a fixed reg, but you may not
2765 subsequently change the contents of that fixed reg. If you
2766 want to do so, first copy the value either to a temporary
2767 or into dst. You are free to modify dst even if it happens
2768 to be a fixed reg -- that's not your problem.
2769
2770 * You cannot assume that a fixed reg will stay live over an
2771 arbitrary computation. The same applies to the dst reg.
2772
2773 * Temporary regs obtained from getNewRegNat are distinct from
2774 each other and from all other regs, and stay live over
2775 arbitrary computations.
2776
2777 --------------------
2778
2779 SDM's version of The Rules:
2780
2781 * If getRegister returns Any, that means it can generate correct
2782 code which places the result in any register, period. Even if that
2783 register happens to be read during the computation.
2784
2785 Corollary #1: this means that if you are generating code for an
2786 operation with two arbitrary operands, you cannot assign the result
2787 of the first operand into the destination register before computing
2788 the second operand. The second operand might require the old value
2789 of the destination register.
2790
2791 Corollary #2: A function might be able to generate more efficient
2792 code if it knows the destination register is a new temporary (and
2793 therefore not read by any of the sub-computations).
2794
2795 * If getRegister returns Any, then the code it generates may modify only:
2796 (a) fresh temporaries
2797 (b) the destination register
2798 (c) known registers (eg. %ecx is used by shifts)
2799 In particular, it may *not* modify global registers, unless the global
2800 register happens to be the destination register.
2801 -}
2802
2803 trivialCode :: Width -> (Operand -> Operand -> Instr)
2804 -> Maybe (Operand -> Operand -> Instr)
2805 -> CmmExpr -> CmmExpr -> NatM Register
2806 trivialCode width instr m a b
2807 = do is32Bit <- is32BitPlatform
2808 trivialCode' is32Bit width instr m a b
2809
2810 trivialCode' :: Bool -> Width -> (Operand -> Operand -> Instr)
2811 -> Maybe (Operand -> Operand -> Instr)
2812 -> CmmExpr -> CmmExpr -> NatM Register
2813 trivialCode' is32Bit width _ (Just revinstr) (CmmLit lit_a) b
2814 | is32BitLit is32Bit lit_a = do
2815 b_code <- getAnyReg b
2816 let
2817 code dst
2818 = b_code dst `snocOL`
2819 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
2820 return (Any (intFormat width) code)
2821
2822 trivialCode' _ width instr _ a b
2823 = genTrivialCode (intFormat width) instr a b
2824
2825 -- This is re-used for floating pt instructions too.
2826 genTrivialCode :: Format -> (Operand -> Operand -> Instr)
2827 -> CmmExpr -> CmmExpr -> NatM Register
2828 genTrivialCode rep instr a b = do
2829 (b_op, b_code) <- getNonClobberedOperand b
2830 a_code <- getAnyReg a
2831 tmp <- getNewRegNat rep
2832 let
2833 -- We want the value of b to stay alive across the computation of a.
2834 -- But, we want to calculate a straight into the destination register,
2835 -- because the instruction only has two operands (dst := dst `op` src).
2836 -- The troublesome case is when the result of b is in the same register
2837 -- as the destination reg. In this case, we have to save b in a
2838 -- new temporary across the computation of a.
2839 code dst
2840 | dst `regClashesWithOp` b_op =
2841 b_code `appOL`
2842 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
2843 a_code dst `snocOL`
2844 instr (OpReg tmp) (OpReg dst)
2845 | otherwise =
2846 b_code `appOL`
2847 a_code dst `snocOL`
2848 instr b_op (OpReg dst)
2849 return (Any rep code)
2850
2851 regClashesWithOp :: Reg -> Operand -> Bool
2852 reg `regClashesWithOp` OpReg reg2 = reg == reg2
2853 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
2854 _ `regClashesWithOp` _ = False
2855
2856 -----------
2857
2858 trivialUCode :: Format -> (Operand -> Instr)
2859 -> CmmExpr -> NatM Register
2860 trivialUCode rep instr x = do
2861 x_code <- getAnyReg x
2862 let
2863 code dst =
2864 x_code dst `snocOL`
2865 instr (OpReg dst)
2866 return (Any rep code)
2867
2868 -----------
2869
2870 trivialFCode_x87 :: (Format -> Reg -> Reg -> Reg -> Instr)
2871 -> CmmExpr -> CmmExpr -> NatM Register
2872 trivialFCode_x87 instr x y = do
2873 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
2874 (y_reg, y_code) <- getSomeReg y
2875 let
2876 format = FF80 -- always, on x87
2877 code dst =
2878 x_code `appOL`
2879 y_code `snocOL`
2880 instr format x_reg y_reg dst
2881 return (Any format code)
2882
2883 trivialFCode_sse2 :: Width -> (Format -> Operand -> Operand -> Instr)
2884 -> CmmExpr -> CmmExpr -> NatM Register
2885 trivialFCode_sse2 pk instr x y
2886 = genTrivialCode format (instr format) x y
2887 where format = floatFormat pk
2888
2889
2890 trivialUFCode :: Format -> (Reg -> Reg -> Instr) -> CmmExpr -> NatM Register
2891 trivialUFCode format instr x = do
2892 (x_reg, x_code) <- getSomeReg x
2893 let
2894 code dst =
2895 x_code `snocOL`
2896 instr x_reg dst
2897 return (Any format code)
2898
2899
2900 --------------------------------------------------------------------------------
2901 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2902 coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
2903 where
2904 coerce_x87 = do
2905 (x_reg, x_code) <- getSomeReg x
2906 let
2907 opc = case to of W32 -> GITOF; W64 -> GITOD;
2908 n -> panic $ "coerceInt2FP.x87: unhandled width ("
2909 ++ show n ++ ")"
2910 code dst = x_code `snocOL` opc x_reg dst
2911 -- ToDo: works for non-II32 reps?
2912 return (Any FF80 code)
2913
2914 coerce_sse2 = do
2915 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2916 let
2917 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
2918 n -> panic $ "coerceInt2FP.sse: unhandled width ("
2919 ++ show n ++ ")"
2920 code dst = x_code `snocOL` opc (intFormat from) x_op dst
2921 return (Any (floatFormat to) code)
2922 -- works even if the destination rep is <II32
2923
2924 --------------------------------------------------------------------------------
2925 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2926 coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
2927 where
2928 coerceFP2Int_x87 = do
2929 (x_reg, x_code) <- getSomeReg x
2930 let
2931 opc = case from of W32 -> GFTOI; W64 -> GDTOI
2932 n -> panic $ "coerceFP2Int.x87: unhandled width ("
2933 ++ show n ++ ")"
2934 code dst = x_code `snocOL` opc x_reg dst
2935 -- ToDo: works for non-II32 reps?
2936 return (Any (intFormat to) code)
2937
2938 coerceFP2Int_sse2 = do
2939 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2940 let
2941 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ;
2942 n -> panic $ "coerceFP2Init.sse: unhandled width ("
2943 ++ show n ++ ")"
2944 code dst = x_code `snocOL` opc (intFormat to) x_op dst
2945 return (Any (intFormat to) code)
2946 -- works even if the destination rep is <II32
2947
2948
2949 --------------------------------------------------------------------------------
2950 coerceFP2FP :: Width -> CmmExpr -> NatM Register
2951 coerceFP2FP to x = do
2952 use_sse2 <- sse2Enabled
2953 (x_reg, x_code) <- getSomeReg x
2954 let
2955 opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
2956 n -> panic $ "coerceFP2FP: unhandled width ("
2957 ++ show n ++ ")"
2958 | otherwise = GDTOF
2959 code dst = x_code `snocOL` opc x_reg dst
2960 return (Any (if use_sse2 then floatFormat to else FF80) code)
2961
2962 --------------------------------------------------------------------------------
2963
2964 sse2NegCode :: Width -> CmmExpr -> NatM Register
2965 sse2NegCode w x = do
2966 let fmt = floatFormat w
2967 x_code <- getAnyReg x
2968 -- This is how gcc does it, so it can't be that bad:
2969 let
2970 const | FF32 <- fmt = CmmInt 0x80000000 W32
2971 | otherwise = CmmInt 0x8000000000000000 W64
2972 Amode amode amode_code <- memConstant (widthInBytes w) const
2973 tmp <- getNewRegNat fmt
2974 let
2975 code dst = x_code dst `appOL` amode_code `appOL` toOL [
2976 MOV fmt (OpAddr amode) (OpReg tmp),
2977 XOR fmt (OpReg tmp) (OpReg dst)
2978 ]
2979 --
2980 return (Any fmt code)
2981
2982 isVecExpr :: CmmExpr -> Bool
2983 isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True
2984 isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True
2985 isVecExpr (CmmMachOp (MO_V_Add {}) _) = True
2986 isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True
2987 isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True
2988 isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True
2989 isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True
2990 isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True
2991 isVecExpr (CmmMachOp (MO_VF_Insert {}) _) = True
2992 isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True
2993 isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
2994 isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True
2995 isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True
2996 isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True
2997 isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True
2998 isVecExpr (CmmMachOp _ [e]) = isVecExpr e
2999 isVecExpr _ = False
3000
3001 needLlvm :: NatM a
3002 needLlvm =
3003 sorry $ unlines ["The native code generator does not support vector"
3004 ,"instructions. Please use -fllvm."]