47fc50a39e7a8c4ca61bac8b07d362f24331bed1
[ghc.git] / compiler / nativeGen / X86 / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2004
8 --
9 -----------------------------------------------------------------------------
10
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, and (b) the type signatures, the
13 -- structure should not be too overwhelming.
14
15 module X86.CodeGen (
16 cmmTopCodeGen,
17 generateJumpTableForInstr,
18 InstrBlock
19 )
20
21 where
22
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
25 #include "../includes/MachDeps.h"
26
27 -- NCG stuff:
28 import X86.Instr
29 import X86.Cond
30 import X86.Regs
31 import X86.RegInfo
32 import CodeGen.Platform
33 import CPrim
34 import Debug ( DebugBlock(..) )
35 import Instruction
36 import PIC
37 import NCGMonad
38 import Format
39 import Reg
40 import Platform
41
42 -- Our intermediate code:
43 import BasicTypes
44 import BlockId
45 import Module ( primPackageKey )
46 import PprCmm ()
47 import CmmUtils
48 import CmmSwitch
49 import Cmm
50 import Hoopl
51 import CLabel
52 import CoreSyn ( Tickish(..) )
53 import SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
54
55 -- The rest:
56 import ForeignCall ( CCallConv(..) )
57 import OrdList
58 import Outputable
59 import Unique
60 import FastString
61 import FastBool ( isFastTrue )
62 import DynFlags
63 import Util
64
65 import Control.Monad
66 import Data.Bits
67 import Data.Int
68 import Data.Maybe
69 import Data.Word
70
71 is32BitPlatform :: NatM Bool
72 is32BitPlatform = do
73 dflags <- getDynFlags
74 return $ target32Bit (targetPlatform dflags)
75
76 sse2Enabled :: NatM Bool
77 sse2Enabled = do
78 dflags <- getDynFlags
79 return (isSse2Enabled dflags)
80
81 sse4_2Enabled :: NatM Bool
82 sse4_2Enabled = do
83 dflags <- getDynFlags
84 return (isSse4_2Enabled dflags)
85
86 if_sse2 :: NatM a -> NatM a -> NatM a
87 if_sse2 sse2 x87 = do
88 b <- sse2Enabled
89 if b then sse2 else x87
90
91 cmmTopCodeGen
92 :: RawCmmDecl
93 -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
94
95 cmmTopCodeGen (CmmProc info lab live graph) = do
96 let blocks = toBlockListEntryFirst graph
97 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
98 picBaseMb <- getPicBaseMaybeNat
99 dflags <- getDynFlags
100 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
101 tops = proc : concat statics
102 os = platformOS $ targetPlatform dflags
103
104 case picBaseMb of
105 Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
106 Nothing -> return tops
107
108 cmmTopCodeGen (CmmData sec dat) = do
109 return [CmmData sec (1, dat)] -- no translation, we just use CmmStatic
110
111
112 basicBlockCodeGen
113 :: CmmBlock
114 -> NatM ( [NatBasicBlock Instr]
115 , [NatCmmDecl (Alignment, CmmStatics) Instr])
116
117 basicBlockCodeGen block = do
118 let (_, nodes, tail) = blockSplit block
119 id = entryLabel block
120 stmts = blockToList nodes
121 -- Generate location directive
122 dbg <- getDebugBlock (entryLabel block)
123 loc_instrs <- case dblSourceTick =<< dbg of
124 Just (SourceNote span name)
125 -> do fileId <- getFileId (srcSpanFile span)
126 let line = srcSpanStartLine span; col = srcSpanStartCol span
127 return $ unitOL $ LOCATION fileId line col name
128 _ -> return nilOL
129 mid_instrs <- stmtsToInstrs stmts
130 tail_instrs <- stmtToInstrs tail
131 let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
132 -- code generation may introduce new basic block boundaries, which
133 -- are indicated by the NEWBLOCK instruction. We must split up the
134 -- instruction stream into basic blocks again. Also, we extract
135 -- LDATAs here too.
136 let
137 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
138
139 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
140 = ([], BasicBlock id instrs : blocks, statics)
141 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
142 = (instrs, blocks, CmmData sec dat:statics)
143 mkBlocks instr (instrs,blocks,statics)
144 = (instr:instrs, blocks, statics)
145 return (BasicBlock id top : other_blocks, statics)
146
147
148 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
149 stmtsToInstrs stmts
150 = do instrss <- mapM stmtToInstrs stmts
151 return (concatOL instrss)
152
153
154 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
155 stmtToInstrs stmt = do
156 dflags <- getDynFlags
157 is32Bit <- is32BitPlatform
158 case stmt of
159 CmmComment s -> return (unitOL (COMMENT s))
160 CmmTick {} -> return nilOL
161 CmmUnwind {} -> return nilOL
162
163 CmmAssign reg src
164 | isFloatType ty -> assignReg_FltCode format reg src
165 | is32Bit && isWord64 ty -> assignReg_I64Code reg src
166 | otherwise -> assignReg_IntCode format reg src
167 where ty = cmmRegType dflags reg
168 format = cmmTypeFormat ty
169
170 CmmStore addr src
171 | isFloatType ty -> assignMem_FltCode format addr src
172 | is32Bit && isWord64 ty -> assignMem_I64Code addr src
173 | otherwise -> assignMem_IntCode format addr src
174 where ty = cmmExprType dflags src
175 format = cmmTypeFormat ty
176
177 CmmUnsafeForeignCall target result_regs args
178 -> genCCall dflags is32Bit target result_regs args
179
180 CmmBranch id -> genBranch id
181 CmmCondBranch arg true false -> do b1 <- genCondJump true arg
182 b2 <- genBranch false
183 return (b1 `appOL` b2)
184 CmmSwitch arg ids -> do dflags <- getDynFlags
185 genSwitch dflags arg ids
186 CmmCall { cml_target = arg
187 , cml_args_regs = gregs } -> do
188 dflags <- getDynFlags
189 genJump arg (jumpRegs dflags gregs)
190 _ ->
191 panic "stmtToInstrs: statement should have been cps'd away"
192
193
194 jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
195 jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
196 where platform = targetPlatform dflags
197
198 --------------------------------------------------------------------------------
199 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
200 -- They are really trees of insns to facilitate fast appending, where a
201 -- left-to-right traversal yields the insns in the correct order.
202 --
203 type InstrBlock
204 = OrdList Instr
205
206
207 -- | Condition codes passed up the tree.
208 --
209 data CondCode
210 = CondCode Bool Cond InstrBlock
211
212
213 -- | a.k.a "Register64"
214 -- Reg is the lower 32-bit temporary which contains the result.
215 -- Use getHiVRegFromLo to find the other VRegUnique.
216 --
217 -- Rules of this simplified insn selection game are therefore that
218 -- the returned Reg may be modified
219 --
220 data ChildCode64
221 = ChildCode64
222 InstrBlock
223 Reg
224
225
226 -- | Register's passed up the tree. If the stix code forces the register
227 -- to live in a pre-decided machine register, it comes out as @Fixed@;
228 -- otherwise, it comes out as @Any@, and the parent can decide which
229 -- register to put it in.
230 --
231 data Register
232 = Fixed Format Reg InstrBlock
233 | Any Format (Reg -> InstrBlock)
234
235
236 swizzleRegisterRep :: Register -> Format -> Register
237 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
238 swizzleRegisterRep (Any _ codefn) format = Any format codefn
239
240
241 -- | Grab the Reg for a CmmReg
242 getRegisterReg :: Platform -> Bool -> CmmReg -> Reg
243
244 getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk))
245 = let fmt = cmmTypeFormat pk in
246 if isFloatFormat fmt && not use_sse2
247 then RegVirtual (mkVirtualReg u FF80)
248 else RegVirtual (mkVirtualReg u fmt)
249
250 getRegisterReg platform _ (CmmGlobal mid)
251 = case globalRegMaybe platform mid of
252 Just reg -> RegReal $ reg
253 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
254 -- By this stage, the only MagicIds remaining should be the
255 -- ones which map to a real machine register on this
256 -- platform. Hence ...
257
258
259 -- | Memory addressing modes passed up the tree.
260 data Amode
261 = Amode AddrMode InstrBlock
262
263 {-
264 Now, given a tree (the argument to an CmmLoad) that references memory,
265 produce a suitable addressing mode.
266
267 A Rule of the Game (tm) for Amodes: use of the addr bit must
268 immediately follow use of the code part, since the code part puts
269 values in registers which the addr then refers to. So you can't put
270 anything in between, lest it overwrite some of those registers. If
271 you need to do some other computation between the code part and use of
272 the addr bit, first store the effective address from the amode in a
273 temporary, then do the other computation, and then use the temporary:
274
275 code
276 LEA amode, tmp
277 ... other computation ...
278 ... (tmp) ...
279 -}
280
281
282 -- | Check whether an integer will fit in 32 bits.
283 -- A CmmInt is intended to be truncated to the appropriate
284 -- number of bits, so here we truncate it to Int64. This is
285 -- important because e.g. -1 as a CmmInt might be either
286 -- -1 or 18446744073709551615.
287 --
288 is32BitInteger :: Integer -> Bool
289 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
290 where i64 = fromIntegral i :: Int64
291
292
293 -- | Convert a BlockId to some CmmStatic data
294 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
295 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
296 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
297 where blockLabel = mkAsmTempLabel (getUnique blockid)
298
299
300 -- -----------------------------------------------------------------------------
301 -- General things for putting together code sequences
302
303 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
304 -- CmmExprs into CmmRegOff?
305 mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
306 mangleIndexTree dflags reg off
307 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
308 where width = typeWidth (cmmRegType dflags reg)
309
310 -- | The dual to getAnyReg: compute an expression into a register, but
311 -- we don't mind which one it is.
312 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
313 getSomeReg expr = do
314 r <- getRegister expr
315 case r of
316 Any rep code -> do
317 tmp <- getNewRegNat rep
318 return (tmp, code tmp)
319 Fixed _ reg code ->
320 return (reg, code)
321
322
323 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
324 assignMem_I64Code addrTree valueTree = do
325 Amode addr addr_code <- getAmode addrTree
326 ChildCode64 vcode rlo <- iselExpr64 valueTree
327 let
328 rhi = getHiVRegFromLo rlo
329
330 -- Little-endian store
331 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
332 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
333 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
334
335
336 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
337 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
338 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
339 let
340 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
341 r_dst_hi = getHiVRegFromLo r_dst_lo
342 r_src_hi = getHiVRegFromLo r_src_lo
343 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
344 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
345 return (
346 vcode `snocOL` mov_lo `snocOL` mov_hi
347 )
348
349 assignReg_I64Code _ _
350 = panic "assignReg_I64Code(i386): invalid lvalue"
351
352
353 iselExpr64 :: CmmExpr -> NatM ChildCode64
354 iselExpr64 (CmmLit (CmmInt i _)) = do
355 (rlo,rhi) <- getNewRegPairNat II32
356 let
357 r = fromIntegral (fromIntegral i :: Word32)
358 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
359 code = toOL [
360 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
361 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
362 ]
363 return (ChildCode64 code rlo)
364
365 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
366 Amode addr addr_code <- getAmode addrTree
367 (rlo,rhi) <- getNewRegPairNat II32
368 let
369 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
370 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
371 return (
372 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
373 rlo
374 )
375
376 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
377 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
378
379 -- we handle addition, but rather badly
380 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
381 ChildCode64 code1 r1lo <- iselExpr64 e1
382 (rlo,rhi) <- getNewRegPairNat II32
383 let
384 r = fromIntegral (fromIntegral i :: Word32)
385 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
386 r1hi = getHiVRegFromLo r1lo
387 code = code1 `appOL`
388 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
389 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
390 MOV II32 (OpReg r1hi) (OpReg rhi),
391 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
392 return (ChildCode64 code rlo)
393
394 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
395 ChildCode64 code1 r1lo <- iselExpr64 e1
396 ChildCode64 code2 r2lo <- iselExpr64 e2
397 (rlo,rhi) <- getNewRegPairNat II32
398 let
399 r1hi = getHiVRegFromLo r1lo
400 r2hi = getHiVRegFromLo r2lo
401 code = code1 `appOL`
402 code2 `appOL`
403 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
404 ADD II32 (OpReg r2lo) (OpReg rlo),
405 MOV II32 (OpReg r1hi) (OpReg rhi),
406 ADC II32 (OpReg r2hi) (OpReg rhi) ]
407 return (ChildCode64 code rlo)
408
409 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
410 ChildCode64 code1 r1lo <- iselExpr64 e1
411 ChildCode64 code2 r2lo <- iselExpr64 e2
412 (rlo,rhi) <- getNewRegPairNat II32
413 let
414 r1hi = getHiVRegFromLo r1lo
415 r2hi = getHiVRegFromLo r2lo
416 code = code1 `appOL`
417 code2 `appOL`
418 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
419 SUB II32 (OpReg r2lo) (OpReg rlo),
420 MOV II32 (OpReg r1hi) (OpReg rhi),
421 SBB II32 (OpReg r2hi) (OpReg rhi) ]
422 return (ChildCode64 code rlo)
423
424 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
425 fn <- getAnyReg expr
426 r_dst_lo <- getNewRegNat II32
427 let r_dst_hi = getHiVRegFromLo r_dst_lo
428 code = fn r_dst_lo
429 return (
430 ChildCode64 (code `snocOL`
431 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
432 r_dst_lo
433 )
434
435 iselExpr64 expr
436 = pprPanic "iselExpr64(i386)" (ppr expr)
437
438
439 --------------------------------------------------------------------------------
440 getRegister :: CmmExpr -> NatM Register
441 getRegister e = do dflags <- getDynFlags
442 is32Bit <- is32BitPlatform
443 getRegister' dflags is32Bit e
444
445 getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
446
447 getRegister' dflags is32Bit (CmmReg reg)
448 = case reg of
449 CmmGlobal PicBaseReg
450 | is32Bit ->
451 -- on x86_64, we have %rip for PicBaseReg, but it's not
452 -- a full-featured register, it can only be used for
453 -- rip-relative addressing.
454 do reg' <- getPicBaseNat (archWordFormat is32Bit)
455 return (Fixed (archWordFormat is32Bit) reg' nilOL)
456 _ ->
457 do use_sse2 <- sse2Enabled
458 let
459 fmt = cmmTypeFormat (cmmRegType dflags reg)
460 format | not use_sse2 && isFloatFormat fmt = FF80
461 | otherwise = fmt
462 --
463 let platform = targetPlatform dflags
464 return (Fixed format
465 (getRegisterReg platform use_sse2 reg)
466 nilOL)
467
468
469 getRegister' dflags is32Bit (CmmRegOff r n)
470 = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
471
472 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
473 -- TO_W_(x), TO_W_(x >> 32)
474
475 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
476 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
477 | is32Bit = do
478 ChildCode64 code rlo <- iselExpr64 x
479 return $ Fixed II32 (getHiVRegFromLo rlo) code
480
481 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
482 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
483 | is32Bit = do
484 ChildCode64 code rlo <- iselExpr64 x
485 return $ Fixed II32 (getHiVRegFromLo rlo) code
486
487 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
488 | is32Bit = do
489 ChildCode64 code rlo <- iselExpr64 x
490 return $ Fixed II32 rlo code
491
492 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
493 | is32Bit = do
494 ChildCode64 code rlo <- iselExpr64 x
495 return $ Fixed II32 rlo code
496
497 getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
498 if_sse2 float_const_sse2 float_const_x87
499 where
500 float_const_sse2
501 | f == 0.0 = do
502 let
503 format = floatFormat w
504 code dst = unitOL (XOR format (OpReg dst) (OpReg dst))
505 -- I don't know why there are xorpd, xorps, and pxor instructions.
506 -- They all appear to do the same thing --SDM
507 return (Any format code)
508
509 | otherwise = do
510 Amode addr code <- memConstant (widthInBytes w) lit
511 loadFloatAmode True w addr code
512
513 float_const_x87 = case w of
514 W64
515 | f == 0.0 ->
516 let code dst = unitOL (GLDZ dst)
517 in return (Any FF80 code)
518
519 | f == 1.0 ->
520 let code dst = unitOL (GLD1 dst)
521 in return (Any FF80 code)
522
523 _otherwise -> do
524 Amode addr code <- memConstant (widthInBytes w) lit
525 loadFloatAmode False w addr code
526
527 -- catch simple cases of zero- or sign-extended load
528 getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
529 code <- intLoadCode (MOVZxL II8) addr
530 return (Any II32 code)
531
532 getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
533 code <- intLoadCode (MOVSxL II8) addr
534 return (Any II32 code)
535
536 getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
537 code <- intLoadCode (MOVZxL II16) addr
538 return (Any II32 code)
539
540 getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
541 code <- intLoadCode (MOVSxL II16) addr
542 return (Any II32 code)
543
544 -- catch simple cases of zero- or sign-extended load
545 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
546 | not is32Bit = do
547 code <- intLoadCode (MOVZxL II8) addr
548 return (Any II64 code)
549
550 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
551 | not is32Bit = do
552 code <- intLoadCode (MOVSxL II8) addr
553 return (Any II64 code)
554
555 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
556 | not is32Bit = do
557 code <- intLoadCode (MOVZxL II16) addr
558 return (Any II64 code)
559
560 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
561 | not is32Bit = do
562 code <- intLoadCode (MOVSxL II16) addr
563 return (Any II64 code)
564
565 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
566 | not is32Bit = do
567 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
568 return (Any II64 code)
569
570 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
571 | not is32Bit = do
572 code <- intLoadCode (MOVSxL II32) addr
573 return (Any II64 code)
574
575 getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
576 CmmLit displacement])
577 | not is32Bit = do
578 return $ Any II64 (\dst -> unitOL $
579 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
580
581 getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
582 sse2 <- sse2Enabled
583 case mop of
584 MO_F_Neg w
585 | sse2 -> sse2NegCode w x
586 | otherwise -> trivialUFCode FF80 (GNEG FF80) x
587
588 MO_S_Neg w -> triv_ucode NEGI (intFormat w)
589 MO_Not w -> triv_ucode NOT (intFormat w)
590
591 -- Nop conversions
592 MO_UU_Conv W32 W8 -> toI8Reg W32 x
593 MO_SS_Conv W32 W8 -> toI8Reg W32 x
594 MO_UU_Conv W16 W8 -> toI8Reg W16 x
595 MO_SS_Conv W16 W8 -> toI8Reg W16 x
596 MO_UU_Conv W32 W16 -> toI16Reg W32 x
597 MO_SS_Conv W32 W16 -> toI16Reg W32 x
598
599 MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
600 MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
601 MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
602 MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
603 MO_UU_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
604 MO_SS_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
605
606 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
607 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
608
609 -- widenings
610 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
611 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
612 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
613
614 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
615 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
616 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
617
618 MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x
619 MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
620 MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
621 MO_SS_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVSxL x
622 MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
623 MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
624 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
625 -- However, we don't want the register allocator to throw it
626 -- away as an unnecessary reg-to-reg move, so we keep it in
627 -- the form of a movzl and print it as a movl later.
628
629 MO_FF_Conv W32 W64
630 | sse2 -> coerceFP2FP W64 x
631 | otherwise -> conversionNop FF80 x
632
633 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
634
635 MO_FS_Conv from to -> coerceFP2Int from to x
636 MO_SF_Conv from to -> coerceInt2FP from to x
637
638 MO_V_Insert {} -> needLlvm
639 MO_V_Extract {} -> needLlvm
640 MO_V_Add {} -> needLlvm
641 MO_V_Sub {} -> needLlvm
642 MO_V_Mul {} -> needLlvm
643 MO_VS_Quot {} -> needLlvm
644 MO_VS_Rem {} -> needLlvm
645 MO_VS_Neg {} -> needLlvm
646 MO_VU_Quot {} -> needLlvm
647 MO_VU_Rem {} -> needLlvm
648 MO_VF_Insert {} -> needLlvm
649 MO_VF_Extract {} -> needLlvm
650 MO_VF_Add {} -> needLlvm
651 MO_VF_Sub {} -> needLlvm
652 MO_VF_Mul {} -> needLlvm
653 MO_VF_Quot {} -> needLlvm
654 MO_VF_Neg {} -> needLlvm
655
656 _other -> pprPanic "getRegister" (pprMachOp mop)
657 where
658 triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
659 triv_ucode instr format = trivialUCode format (instr format) x
660
661 -- signed or unsigned extension.
662 integerExtend :: Width -> Width
663 -> (Format -> Operand -> Operand -> Instr)
664 -> CmmExpr -> NatM Register
665 integerExtend from to instr expr = do
666 (reg,e_code) <- if from == W8 then getByteReg expr
667 else getSomeReg expr
668 let
669 code dst =
670 e_code `snocOL`
671 instr (intFormat from) (OpReg reg) (OpReg dst)
672 return (Any (intFormat to) code)
673
674 toI8Reg :: Width -> CmmExpr -> NatM Register
675 toI8Reg new_rep expr
676 = do codefn <- getAnyReg expr
677 return (Any (intFormat new_rep) codefn)
678 -- HACK: use getAnyReg to get a byte-addressable register.
679 -- If the source was a Fixed register, this will add the
680 -- mov instruction to put it into the desired destination.
681 -- We're assuming that the destination won't be a fixed
682 -- non-byte-addressable register; it won't be, because all
683 -- fixed registers are word-sized.
684
685 toI16Reg = toI8Reg -- for now
686
687 conversionNop :: Format -> CmmExpr -> NatM Register
688 conversionNop new_format expr
689 = do e_code <- getRegister' dflags is32Bit expr
690 return (swizzleRegisterRep e_code new_format)
691
692
693 getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
694 sse2 <- sse2Enabled
695 case mop of
696 MO_F_Eq _ -> condFltReg is32Bit EQQ x y
697 MO_F_Ne _ -> condFltReg is32Bit NE x y
698 MO_F_Gt _ -> condFltReg is32Bit GTT x y
699 MO_F_Ge _ -> condFltReg is32Bit GE x y
700 MO_F_Lt _ -> condFltReg is32Bit LTT x y
701 MO_F_Le _ -> condFltReg is32Bit LE x y
702
703 MO_Eq _ -> condIntReg EQQ x y
704 MO_Ne _ -> condIntReg NE x y
705
706 MO_S_Gt _ -> condIntReg GTT x y
707 MO_S_Ge _ -> condIntReg GE x y
708 MO_S_Lt _ -> condIntReg LTT x y
709 MO_S_Le _ -> condIntReg LE x y
710
711 MO_U_Gt _ -> condIntReg GU x y
712 MO_U_Ge _ -> condIntReg GEU x y
713 MO_U_Lt _ -> condIntReg LU x y
714 MO_U_Le _ -> condIntReg LEU x y
715
716 MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y
717 | otherwise -> trivialFCode_x87 GADD x y
718 MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y
719 | otherwise -> trivialFCode_x87 GSUB x y
720 MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y
721 | otherwise -> trivialFCode_x87 GDIV x y
722 MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y
723 | otherwise -> trivialFCode_x87 GMUL x y
724
725 MO_Add rep -> add_code rep x y
726 MO_Sub rep -> sub_code rep x y
727
728 MO_S_Quot rep -> div_code rep True True x y
729 MO_S_Rem rep -> div_code rep True False x y
730 MO_U_Quot rep -> div_code rep False True x y
731 MO_U_Rem rep -> div_code rep False False x y
732
733 MO_S_MulMayOflo rep -> imulMayOflo rep x y
734
735 MO_Mul rep -> triv_op rep IMUL
736 MO_And rep -> triv_op rep AND
737 MO_Or rep -> triv_op rep OR
738 MO_Xor rep -> triv_op rep XOR
739
740 {- Shift ops on x86s have constraints on their source, it
741 either has to be Imm, CL or 1
742 => trivialCode is not restrictive enough (sigh.)
743 -}
744 MO_Shl rep -> shift_code rep SHL x y {-False-}
745 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
746 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
747
748 MO_V_Insert {} -> needLlvm
749 MO_V_Extract {} -> needLlvm
750 MO_V_Add {} -> needLlvm
751 MO_V_Sub {} -> needLlvm
752 MO_V_Mul {} -> needLlvm
753 MO_VS_Quot {} -> needLlvm
754 MO_VS_Rem {} -> needLlvm
755 MO_VS_Neg {} -> needLlvm
756 MO_VF_Insert {} -> needLlvm
757 MO_VF_Extract {} -> needLlvm
758 MO_VF_Add {} -> needLlvm
759 MO_VF_Sub {} -> needLlvm
760 MO_VF_Mul {} -> needLlvm
761 MO_VF_Quot {} -> needLlvm
762 MO_VF_Neg {} -> needLlvm
763
764 _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
765 where
766 --------------------
767 triv_op width instr = trivialCode width op (Just op) x y
768 where op = instr (intFormat width)
769
770 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
771 imulMayOflo rep a b = do
772 (a_reg, a_code) <- getNonClobberedReg a
773 b_code <- getAnyReg b
774 let
775 shift_amt = case rep of
776 W32 -> 31
777 W64 -> 63
778 _ -> panic "shift_amt"
779
780 format = intFormat rep
781 code = a_code `appOL` b_code eax `appOL`
782 toOL [
783 IMUL2 format (OpReg a_reg), -- result in %edx:%eax
784 SAR format (OpImm (ImmInt shift_amt)) (OpReg eax),
785 -- sign extend lower part
786 SUB format (OpReg edx) (OpReg eax)
787 -- compare against upper
788 -- eax==0 if high part == sign extended low part
789 ]
790 return (Fixed format eax code)
791
792 --------------------
793 shift_code :: Width
794 -> (Format -> Operand -> Operand -> Instr)
795 -> CmmExpr
796 -> CmmExpr
797 -> NatM Register
798
799 {- Case1: shift length as immediate -}
800 shift_code width instr x (CmmLit lit) = do
801 x_code <- getAnyReg x
802 let
803 format = intFormat width
804 code dst
805 = x_code dst `snocOL`
806 instr format (OpImm (litToImm lit)) (OpReg dst)
807 return (Any format code)
808
809 {- Case2: shift length is complex (non-immediate)
810 * y must go in %ecx.
811 * we cannot do y first *and* put its result in %ecx, because
812 %ecx might be clobbered by x.
813 * if we do y second, then x cannot be
814 in a clobbered reg. Also, we cannot clobber x's reg
815 with the instruction itself.
816 * so we can either:
817 - do y first, put its result in a fresh tmp, then copy it to %ecx later
818 - do y second and put its result into %ecx. x gets placed in a fresh
819 tmp. This is likely to be better, because the reg alloc can
820 eliminate this reg->reg move here (it won't eliminate the other one,
821 because the move is into the fixed %ecx).
822 -}
823 shift_code width instr x y{-amount-} = do
824 x_code <- getAnyReg x
825 let format = intFormat width
826 tmp <- getNewRegNat format
827 y_code <- getAnyReg y
828 let
829 code = x_code tmp `appOL`
830 y_code ecx `snocOL`
831 instr format (OpReg ecx) (OpReg tmp)
832 return (Fixed format tmp code)
833
834 --------------------
835 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
836 add_code rep x (CmmLit (CmmInt y _))
837 | is32BitInteger y = add_int rep x y
838 add_code rep x y = trivialCode rep (ADD format) (Just (ADD format)) x y
839 where format = intFormat rep
840 -- TODO: There are other interesting patterns we want to replace
841 -- with a LEA, e.g. `(x + offset) + (y << shift)`.
842
843 --------------------
844 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
845 sub_code rep x (CmmLit (CmmInt y _))
846 | is32BitInteger (-y) = add_int rep x (-y)
847 sub_code rep x y = trivialCode rep (SUB (intFormat rep)) Nothing x y
848
849 -- our three-operand add instruction:
850 add_int width x y = do
851 (x_reg, x_code) <- getSomeReg x
852 let
853 format = intFormat width
854 imm = ImmInt (fromInteger y)
855 code dst
856 = x_code `snocOL`
857 LEA format
858 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
859 (OpReg dst)
860 --
861 return (Any format code)
862
863 ----------------------
864 div_code width signed quotient x y = do
865 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
866 x_code <- getAnyReg x
867 let
868 format = intFormat width
869 widen | signed = CLTD format
870 | otherwise = XOR format (OpReg edx) (OpReg edx)
871
872 instr | signed = IDIV
873 | otherwise = DIV
874
875 code = y_code `appOL`
876 x_code eax `appOL`
877 toOL [widen, instr format y_op]
878
879 result | quotient = eax
880 | otherwise = edx
881
882 return (Fixed format result code)
883
884
885 getRegister' _ _ (CmmLoad mem pk)
886 | isFloatType pk
887 = do
888 Amode addr mem_code <- getAmode mem
889 use_sse2 <- sse2Enabled
890 loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
891
892 getRegister' _ is32Bit (CmmLoad mem pk)
893 | is32Bit && not (isWord64 pk)
894 = do
895 code <- intLoadCode instr mem
896 return (Any format code)
897 where
898 width = typeWidth pk
899 format = intFormat width
900 instr = case width of
901 W8 -> MOVZxL II8
902 _other -> MOV format
903 -- We always zero-extend 8-bit loads, if we
904 -- can't think of anything better. This is because
905 -- we can't guarantee access to an 8-bit variant of every register
906 -- (esi and edi don't have 8-bit variants), so to make things
907 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
908
909 -- Simpler memory load code on x86_64
910 getRegister' _ is32Bit (CmmLoad mem pk)
911 | not is32Bit
912 = do
913 code <- intLoadCode (MOV format) mem
914 return (Any format code)
915 where format = intFormat $ typeWidth pk
916
917 getRegister' _ is32Bit (CmmLit (CmmInt 0 width))
918 = let
919 format = intFormat width
920
921 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
922 format1 = if is32Bit then format
923 else case format of
924 II64 -> II32
925 _ -> format
926 code dst
927 = unitOL (XOR format1 (OpReg dst) (OpReg dst))
928 in
929 return (Any format code)
930
931 -- optimisation for loading small literals on x86_64: take advantage
932 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
933 -- instruction forms are shorter.
934 getRegister' dflags is32Bit (CmmLit lit)
935 | not is32Bit, isWord64 (cmmLitType dflags lit), not (isBigLit lit)
936 = let
937 imm = litToImm lit
938 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
939 in
940 return (Any II64 code)
941 where
942 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
943 isBigLit _ = False
944 -- note1: not the same as (not.is32BitLit), because that checks for
945 -- signed literals that fit in 32 bits, but we want unsigned
946 -- literals here.
947 -- note2: all labels are small, because we're assuming the
948 -- small memory model (see gcc docs, -mcmodel=small).
949
950 getRegister' dflags _ (CmmLit lit)
951 = do let format = cmmTypeFormat (cmmLitType dflags lit)
952 imm = litToImm lit
953 code dst = unitOL (MOV format (OpImm imm) (OpReg dst))
954 return (Any format code)
955
956 getRegister' _ _ other
957 | isVecExpr other = needLlvm
958 | otherwise = pprPanic "getRegister(x86)" (ppr other)
959
960
961 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
962 -> NatM (Reg -> InstrBlock)
963 intLoadCode instr mem = do
964 Amode src mem_code <- getAmode mem
965 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
966
967 -- Compute an expression into *any* register, adding the appropriate
968 -- move instruction if necessary.
969 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
970 getAnyReg expr = do
971 r <- getRegister expr
972 anyReg r
973
974 anyReg :: Register -> NatM (Reg -> InstrBlock)
975 anyReg (Any _ code) = return code
976 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
977
978 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
979 -- Fixed registers might not be byte-addressable, so we make sure we've
980 -- got a temporary, inserting an extra reg copy if necessary.
981 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
982 getByteReg expr = do
983 is32Bit <- is32BitPlatform
984 if is32Bit
985 then do r <- getRegister expr
986 case r of
987 Any rep code -> do
988 tmp <- getNewRegNat rep
989 return (tmp, code tmp)
990 Fixed rep reg code
991 | isVirtualReg reg -> return (reg,code)
992 | otherwise -> do
993 tmp <- getNewRegNat rep
994 return (tmp, code `snocOL` reg2reg rep reg tmp)
995 -- ToDo: could optimise slightly by checking for
996 -- byte-addressable real registers, but that will
997 -- happen very rarely if at all.
998 else getSomeReg expr -- all regs are byte-addressable on x86_64
999
1000 -- Another variant: this time we want the result in a register that cannot
1001 -- be modified by code to evaluate an arbitrary expression.
1002 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1003 getNonClobberedReg expr = do
1004 dflags <- getDynFlags
1005 r <- getRegister expr
1006 case r of
1007 Any rep code -> do
1008 tmp <- getNewRegNat rep
1009 return (tmp, code tmp)
1010 Fixed rep reg code
1011 -- only certain regs can be clobbered
1012 | reg `elem` instrClobberedRegs (targetPlatform dflags)
1013 -> do
1014 tmp <- getNewRegNat rep
1015 return (tmp, code `snocOL` reg2reg rep reg tmp)
1016 | otherwise ->
1017 return (reg, code)
1018
1019 reg2reg :: Format -> Reg -> Reg -> Instr
1020 reg2reg format src dst
1021 | format == FF80 = GMOV src dst
1022 | otherwise = MOV format (OpReg src) (OpReg dst)
1023
1024
1025 --------------------------------------------------------------------------------
1026 getAmode :: CmmExpr -> NatM Amode
1027 getAmode e = do is32Bit <- is32BitPlatform
1028 getAmode' is32Bit e
1029
1030 getAmode' :: Bool -> CmmExpr -> NatM Amode
1031 getAmode' _ (CmmRegOff r n) = do dflags <- getDynFlags
1032 getAmode $ mangleIndexTree dflags r n
1033
1034 getAmode' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1035 CmmLit displacement])
1036 | not is32Bit
1037 = return $ Amode (ripRel (litToImm displacement)) nilOL
1038
1039
1040 -- This is all just ridiculous, since it carefully undoes
1041 -- what mangleIndexTree has just done.
1042 getAmode' is32Bit (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)])
1043 | is32BitLit is32Bit lit
1044 -- ASSERT(rep == II32)???
1045 = do (x_reg, x_code) <- getSomeReg x
1046 let off = ImmInt (-(fromInteger i))
1047 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1048
1049 getAmode' is32Bit (CmmMachOp (MO_Add _rep) [x, CmmLit lit])
1050 | is32BitLit is32Bit lit
1051 -- ASSERT(rep == II32)???
1052 = do (x_reg, x_code) <- getSomeReg x
1053 let off = litToImm lit
1054 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1055
1056 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1057 -- recognised by the next rule.
1058 getAmode' is32Bit (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1059 b@(CmmLit _)])
1060 = getAmode' is32Bit (CmmMachOp (MO_Add rep) [b,a])
1061
1062 -- Matches: (x + offset) + (y << shift)
1063 getAmode' _ (CmmMachOp (MO_Add _) [CmmRegOff x offset,
1064 CmmMachOp (MO_Shl _)
1065 [y, CmmLit (CmmInt shift _)]])
1066 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1067 = x86_complex_amode (CmmReg x) y shift (fromIntegral offset)
1068
1069 getAmode' _ (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _)
1070 [y, CmmLit (CmmInt shift _)]])
1071 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1072 = x86_complex_amode x y shift 0
1073
1074 getAmode' _ (CmmMachOp (MO_Add _)
1075 [x, CmmMachOp (MO_Add _)
1076 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1077 CmmLit (CmmInt offset _)]])
1078 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1079 && is32BitInteger offset
1080 = x86_complex_amode x y shift offset
1081
1082 getAmode' _ (CmmMachOp (MO_Add _) [x,y])
1083 = x86_complex_amode x y 0 0
1084
1085 getAmode' is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1086 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1087
1088 getAmode' _ expr = do
1089 (reg,code) <- getSomeReg expr
1090 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1091
1092 -- | Like 'getAmode', but on 32-bit use simple register addressing
1093 -- (i.e. no index register). This stops us from running out of
1094 -- registers on x86 when using instructions such as cmpxchg, which can
1095 -- use up to three virtual registers and one fixed register.
1096 getSimpleAmode :: DynFlags -> Bool -> CmmExpr -> NatM Amode
1097 getSimpleAmode dflags is32Bit addr
1098 | is32Bit = do
1099 addr_code <- getAnyReg addr
1100 addr_r <- getNewRegNat (intFormat (wordWidth dflags))
1101 let amode = AddrBaseIndex (EABaseReg addr_r) EAIndexNone (ImmInt 0)
1102 return $! Amode amode (addr_code addr_r)
1103 | otherwise = getAmode addr
1104
1105 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1106 x86_complex_amode base index shift offset
1107 = do (x_reg, x_code) <- getNonClobberedReg base
1108 -- x must be in a temp, because it has to stay live over y_code
1109 -- we could compre x_reg and y_reg and do something better here...
1110 (y_reg, y_code) <- getSomeReg index
1111 let
1112 code = x_code `appOL` y_code
1113 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8;
1114 n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")"
1115 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1116 code)
1117
1118
1119
1120
1121 -- -----------------------------------------------------------------------------
1122 -- getOperand: sometimes any operand will do.
1123
1124 -- getNonClobberedOperand: the value of the operand will remain valid across
1125 -- the computation of an arbitrary expression, unless the expression
1126 -- is computed directly into a register which the operand refers to
1127 -- (see trivialCode where this function is used for an example).
1128
1129 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1130 getNonClobberedOperand (CmmLit lit) = do
1131 use_sse2 <- sse2Enabled
1132 if use_sse2 && isSuitableFloatingPointLit lit
1133 then do
1134 let CmmFloat _ w = lit
1135 Amode addr code <- memConstant (widthInBytes w) lit
1136 return (OpAddr addr, code)
1137 else do
1138
1139 is32Bit <- is32BitPlatform
1140 dflags <- getDynFlags
1141 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1142 then return (OpImm (litToImm lit), nilOL)
1143 else getNonClobberedOperand_generic (CmmLit lit)
1144
1145 getNonClobberedOperand (CmmLoad mem pk) = do
1146 is32Bit <- is32BitPlatform
1147 use_sse2 <- sse2Enabled
1148 if (not (isFloatType pk) || use_sse2)
1149 && (if is32Bit then not (isWord64 pk) else True)
1150 then do
1151 dflags <- getDynFlags
1152 let platform = targetPlatform dflags
1153 Amode src mem_code <- getAmode mem
1154 (src',save_code) <-
1155 if (amodeCouldBeClobbered platform src)
1156 then do
1157 tmp <- getNewRegNat (archWordFormat is32Bit)
1158 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1159 unitOL (LEA (archWordFormat is32Bit)
1160 (OpAddr src)
1161 (OpReg tmp)))
1162 else
1163 return (src, nilOL)
1164 return (OpAddr src', mem_code `appOL` save_code)
1165 else do
1166 getNonClobberedOperand_generic (CmmLoad mem pk)
1167
1168 getNonClobberedOperand e = getNonClobberedOperand_generic e
1169
1170 getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1171 getNonClobberedOperand_generic e = do
1172 (reg, code) <- getNonClobberedReg e
1173 return (OpReg reg, code)
1174
1175 amodeCouldBeClobbered :: Platform -> AddrMode -> Bool
1176 amodeCouldBeClobbered platform amode = any (regClobbered platform) (addrModeRegs amode)
1177
1178 regClobbered :: Platform -> Reg -> Bool
1179 regClobbered platform (RegReal (RealRegSingle rr)) = isFastTrue (freeReg platform rr)
1180 regClobbered _ _ = False
1181
1182 -- getOperand: the operand is not required to remain valid across the
1183 -- computation of an arbitrary expression.
1184 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1185
1186 getOperand (CmmLit lit) = do
1187 use_sse2 <- sse2Enabled
1188 if (use_sse2 && isSuitableFloatingPointLit lit)
1189 then do
1190 let CmmFloat _ w = lit
1191 Amode addr code <- memConstant (widthInBytes w) lit
1192 return (OpAddr addr, code)
1193 else do
1194
1195 is32Bit <- is32BitPlatform
1196 dflags <- getDynFlags
1197 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1198 then return (OpImm (litToImm lit), nilOL)
1199 else getOperand_generic (CmmLit lit)
1200
1201 getOperand (CmmLoad mem pk) = do
1202 is32Bit <- is32BitPlatform
1203 use_sse2 <- sse2Enabled
1204 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1205 then do
1206 Amode src mem_code <- getAmode mem
1207 return (OpAddr src, mem_code)
1208 else
1209 getOperand_generic (CmmLoad mem pk)
1210
1211 getOperand e = getOperand_generic e
1212
1213 getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1214 getOperand_generic e = do
1215 (reg, code) <- getSomeReg e
1216 return (OpReg reg, code)
1217
1218 isOperand :: Bool -> CmmExpr -> Bool
1219 isOperand _ (CmmLoad _ _) = True
1220 isOperand is32Bit (CmmLit lit) = is32BitLit is32Bit lit
1221 || isSuitableFloatingPointLit lit
1222 isOperand _ _ = False
1223
1224 memConstant :: Int -> CmmLit -> NatM Amode
1225 memConstant align lit = do
1226 lbl <- getNewLabelNat
1227 dflags <- getDynFlags
1228 (addr, addr_code) <- if target32Bit (targetPlatform dflags)
1229 then do dynRef <- cmmMakeDynamicReference
1230 dflags
1231 DataReference
1232 lbl
1233 Amode addr addr_code <- getAmode dynRef
1234 return (addr, addr_code)
1235 else return (ripRel (ImmCLbl lbl), nilOL)
1236 let code =
1237 LDATA ReadOnlyData (align, Statics lbl [CmmStaticLit lit])
1238 `consOL` addr_code
1239 return (Amode addr code)
1240
1241
1242 loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
1243 loadFloatAmode use_sse2 w addr addr_code = do
1244 let format = floatFormat w
1245 code dst = addr_code `snocOL`
1246 if use_sse2
1247 then MOV format (OpAddr addr) (OpReg dst)
1248 else GLD format addr dst
1249 return (Any (if use_sse2 then format else FF80) code)
1250
1251
1252 -- if we want a floating-point literal as an operand, we can
1253 -- use it directly from memory. However, if the literal is
1254 -- zero, we're better off generating it into a register using
1255 -- xor.
1256 isSuitableFloatingPointLit :: CmmLit -> Bool
1257 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1258 isSuitableFloatingPointLit _ = False
1259
1260 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1261 getRegOrMem e@(CmmLoad mem pk) = do
1262 is32Bit <- is32BitPlatform
1263 use_sse2 <- sse2Enabled
1264 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1265 then do
1266 Amode src mem_code <- getAmode mem
1267 return (OpAddr src, mem_code)
1268 else do
1269 (reg, code) <- getNonClobberedReg e
1270 return (OpReg reg, code)
1271 getRegOrMem e = do
1272 (reg, code) <- getNonClobberedReg e
1273 return (OpReg reg, code)
1274
1275 is32BitLit :: Bool -> CmmLit -> Bool
1276 is32BitLit is32Bit (CmmInt i W64)
1277 | not is32Bit
1278 = -- assume that labels are in the range 0-2^31-1: this assumes the
1279 -- small memory model (see gcc docs, -mcmodel=small).
1280 is32BitInteger i
1281 is32BitLit _ _ = True
1282
1283
1284
1285
1286 -- Set up a condition code for a conditional branch.
1287
1288 getCondCode :: CmmExpr -> NatM CondCode
1289
1290 -- yes, they really do seem to want exactly the same!
1291
1292 getCondCode (CmmMachOp mop [x, y])
1293 =
1294 case mop of
1295 MO_F_Eq W32 -> condFltCode EQQ x y
1296 MO_F_Ne W32 -> condFltCode NE x y
1297 MO_F_Gt W32 -> condFltCode GTT x y
1298 MO_F_Ge W32 -> condFltCode GE x y
1299 MO_F_Lt W32 -> condFltCode LTT x y
1300 MO_F_Le W32 -> condFltCode LE x y
1301
1302 MO_F_Eq W64 -> condFltCode EQQ x y
1303 MO_F_Ne W64 -> condFltCode NE x y
1304 MO_F_Gt W64 -> condFltCode GTT x y
1305 MO_F_Ge W64 -> condFltCode GE x y
1306 MO_F_Lt W64 -> condFltCode LTT x y
1307 MO_F_Le W64 -> condFltCode LE x y
1308
1309 _ -> condIntCode (machOpToCond mop) x y
1310
1311 getCondCode other = pprPanic "getCondCode(2)(x86,x86_64)" (ppr other)
1312
1313 machOpToCond :: MachOp -> Cond
1314 machOpToCond mo = case mo of
1315 MO_Eq _ -> EQQ
1316 MO_Ne _ -> NE
1317 MO_S_Gt _ -> GTT
1318 MO_S_Ge _ -> GE
1319 MO_S_Lt _ -> LTT
1320 MO_S_Le _ -> LE
1321 MO_U_Gt _ -> GU
1322 MO_U_Ge _ -> GEU
1323 MO_U_Lt _ -> LU
1324 MO_U_Le _ -> LEU
1325 _other -> pprPanic "machOpToCond" (pprMachOp mo)
1326
1327
1328 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1329 -- passed back up the tree.
1330
1331 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1332 condIntCode cond x y = do is32Bit <- is32BitPlatform
1333 condIntCode' is32Bit cond x y
1334
1335 condIntCode' :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1336
1337 -- memory vs immediate
1338 condIntCode' is32Bit cond (CmmLoad x pk) (CmmLit lit)
1339 | is32BitLit is32Bit lit = do
1340 Amode x_addr x_code <- getAmode x
1341 let
1342 imm = litToImm lit
1343 code = x_code `snocOL`
1344 CMP (cmmTypeFormat pk) (OpImm imm) (OpAddr x_addr)
1345 --
1346 return (CondCode False cond code)
1347
1348 -- anything vs zero, using a mask
1349 -- TODO: Add some sanity checking!!!!
1350 condIntCode' is32Bit cond (CmmMachOp (MO_And _) [x,o2]) (CmmLit (CmmInt 0 pk))
1351 | (CmmLit lit@(CmmInt mask _)) <- o2, is32BitLit is32Bit lit
1352 = do
1353 (x_reg, x_code) <- getSomeReg x
1354 let
1355 code = x_code `snocOL`
1356 TEST (intFormat pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1357 --
1358 return (CondCode False cond code)
1359
1360 -- anything vs zero
1361 condIntCode' _ cond x (CmmLit (CmmInt 0 pk)) = do
1362 (x_reg, x_code) <- getSomeReg x
1363 let
1364 code = x_code `snocOL`
1365 TEST (intFormat pk) (OpReg x_reg) (OpReg x_reg)
1366 --
1367 return (CondCode False cond code)
1368
1369 -- anything vs operand
1370 condIntCode' is32Bit cond x y
1371 | isOperand is32Bit y = do
1372 dflags <- getDynFlags
1373 (x_reg, x_code) <- getNonClobberedReg x
1374 (y_op, y_code) <- getOperand y
1375 let
1376 code = x_code `appOL` y_code `snocOL`
1377 CMP (cmmTypeFormat (cmmExprType dflags x)) y_op (OpReg x_reg)
1378 return (CondCode False cond code)
1379 -- operand vs. anything: invert the comparison so that we can use a
1380 -- single comparison instruction.
1381 | isOperand is32Bit x
1382 , Just revcond <- maybeFlipCond cond = do
1383 dflags <- getDynFlags
1384 (y_reg, y_code) <- getNonClobberedReg y
1385 (x_op, x_code) <- getOperand x
1386 let
1387 code = y_code `appOL` x_code `snocOL`
1388 CMP (cmmTypeFormat (cmmExprType dflags x)) x_op (OpReg y_reg)
1389 return (CondCode False revcond code)
1390
1391 -- anything vs anything
1392 condIntCode' _ cond x y = do
1393 dflags <- getDynFlags
1394 (y_reg, y_code) <- getNonClobberedReg y
1395 (x_op, x_code) <- getRegOrMem x
1396 let
1397 code = y_code `appOL`
1398 x_code `snocOL`
1399 CMP (cmmTypeFormat (cmmExprType dflags x)) (OpReg y_reg) x_op
1400 return (CondCode False cond code)
1401
1402
1403
1404 --------------------------------------------------------------------------------
1405 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1406
1407 condFltCode cond x y
1408 = if_sse2 condFltCode_sse2 condFltCode_x87
1409 where
1410
1411 condFltCode_x87
1412 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1413 (x_reg, x_code) <- getNonClobberedReg x
1414 (y_reg, y_code) <- getSomeReg y
1415 let
1416 code = x_code `appOL` y_code `snocOL`
1417 GCMP cond x_reg y_reg
1418 -- The GCMP insn does the test and sets the zero flag if comparable
1419 -- and true. Hence we always supply EQQ as the condition to test.
1420 return (CondCode True EQQ code)
1421
1422 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1423 -- an operand, but the right must be a reg. We can probably do better
1424 -- than this general case...
1425 condFltCode_sse2 = do
1426 dflags <- getDynFlags
1427 (x_reg, x_code) <- getNonClobberedReg x
1428 (y_op, y_code) <- getOperand y
1429 let
1430 code = x_code `appOL`
1431 y_code `snocOL`
1432 CMP (floatFormat $ cmmExprWidth dflags x) y_op (OpReg x_reg)
1433 -- NB(1): we need to use the unsigned comparison operators on the
1434 -- result of this comparison.
1435 return (CondCode True (condToUnsigned cond) code)
1436
1437 -- -----------------------------------------------------------------------------
1438 -- Generating assignments
1439
1440 -- Assignments are really at the heart of the whole code generation
1441 -- business. Almost all top-level nodes of any real importance are
1442 -- assignments, which correspond to loads, stores, or register
1443 -- transfers. If we're really lucky, some of the register transfers
1444 -- will go away, because we can use the destination register to
1445 -- complete the code generation for the right hand side. This only
1446 -- fails when the right hand side is forced into a fixed register
1447 -- (e.g. the result of a call).
1448
1449 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1450 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1451
1452 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1453 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1454
1455
1456 -- integer assignment to memory
1457
1458 -- specific case of adding/subtracting an integer to a particular address.
1459 -- ToDo: catch other cases where we can use an operation directly on a memory
1460 -- address.
1461 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1462 CmmLit (CmmInt i _)])
1463 | addr == addr2, pk /= II64 || is32BitInteger i,
1464 Just instr <- check op
1465 = do Amode amode code_addr <- getAmode addr
1466 let code = code_addr `snocOL`
1467 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1468 return code
1469 where
1470 check (MO_Add _) = Just ADD
1471 check (MO_Sub _) = Just SUB
1472 check _ = Nothing
1473 -- ToDo: more?
1474
1475 -- general case
1476 assignMem_IntCode pk addr src = do
1477 is32Bit <- is32BitPlatform
1478 Amode addr code_addr <- getAmode addr
1479 (code_src, op_src) <- get_op_RI is32Bit src
1480 let
1481 code = code_src `appOL`
1482 code_addr `snocOL`
1483 MOV pk op_src (OpAddr addr)
1484 -- NOTE: op_src is stable, so it will still be valid
1485 -- after code_addr. This may involve the introduction
1486 -- of an extra MOV to a temporary register, but we hope
1487 -- the register allocator will get rid of it.
1488 --
1489 return code
1490 where
1491 get_op_RI :: Bool -> CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
1492 get_op_RI is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1493 = return (nilOL, OpImm (litToImm lit))
1494 get_op_RI _ op
1495 = do (reg,code) <- getNonClobberedReg op
1496 return (code, OpReg reg)
1497
1498
1499 -- Assign; dst is a reg, rhs is mem
1500 assignReg_IntCode pk reg (CmmLoad src _) = do
1501 load_code <- intLoadCode (MOV pk) src
1502 dflags <- getDynFlags
1503 let platform = targetPlatform dflags
1504 return (load_code (getRegisterReg platform False{-no sse2-} reg))
1505
1506 -- dst is a reg, but src could be anything
1507 assignReg_IntCode _ reg src = do
1508 dflags <- getDynFlags
1509 let platform = targetPlatform dflags
1510 code <- getAnyReg src
1511 return (code (getRegisterReg platform False{-no sse2-} reg))
1512
1513
1514 -- Floating point assignment to memory
1515 assignMem_FltCode pk addr src = do
1516 (src_reg, src_code) <- getNonClobberedReg src
1517 Amode addr addr_code <- getAmode addr
1518 use_sse2 <- sse2Enabled
1519 let
1520 code = src_code `appOL`
1521 addr_code `snocOL`
1522 if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr)
1523 else GST pk src_reg addr
1524 return code
1525
1526 -- Floating point assignment to a register/temporary
1527 assignReg_FltCode _ reg src = do
1528 use_sse2 <- sse2Enabled
1529 src_code <- getAnyReg src
1530 dflags <- getDynFlags
1531 let platform = targetPlatform dflags
1532 return (src_code (getRegisterReg platform use_sse2 reg))
1533
1534
1535 genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
1536
1537 genJump (CmmLoad mem _) regs = do
1538 Amode target code <- getAmode mem
1539 return (code `snocOL` JMP (OpAddr target) regs)
1540
1541 genJump (CmmLit lit) regs = do
1542 return (unitOL (JMP (OpImm (litToImm lit)) regs))
1543
1544 genJump expr regs = do
1545 (reg,code) <- getSomeReg expr
1546 return (code `snocOL` JMP (OpReg reg) regs)
1547
1548
1549 -- -----------------------------------------------------------------------------
1550 -- Unconditional branches
1551
1552 genBranch :: BlockId -> NatM InstrBlock
1553 genBranch = return . toOL . mkJumpInstr
1554
1555
1556
1557 -- -----------------------------------------------------------------------------
1558 -- Conditional jumps
1559
1560 {-
1561 Conditional jumps are always to local labels, so we can use branch
1562 instructions. We peek at the arguments to decide what kind of
1563 comparison to do.
1564
1565 I386: First, we have to ensure that the condition
1566 codes are set according to the supplied comparison operation.
1567 -}
1568
1569 genCondJump
1570 :: BlockId -- the branch target
1571 -> CmmExpr -- the condition on which to branch
1572 -> NatM InstrBlock
1573
1574 genCondJump id expr = do
1575 is32Bit <- is32BitPlatform
1576 genCondJump' is32Bit id expr
1577
1578 genCondJump' :: Bool -> BlockId -> CmmExpr -> NatM InstrBlock
1579
1580 -- 64-bit integer comparisons on 32-bit
1581 genCondJump' is32Bit true (CmmMachOp mop [e1,e2])
1582 | is32Bit, Just W64 <- maybeIntComparison mop = do
1583 ChildCode64 code1 r1_lo <- iselExpr64 e1
1584 ChildCode64 code2 r2_lo <- iselExpr64 e2
1585 let r1_hi = getHiVRegFromLo r1_lo
1586 r2_hi = getHiVRegFromLo r2_lo
1587 cond = machOpToCond mop
1588 Just cond' = maybeFlipCond cond
1589 false <- getBlockIdNat
1590 return $ code1 `appOL` code2 `appOL` toOL [
1591 CMP II32 (OpReg r2_hi) (OpReg r1_hi),
1592 JXX cond true,
1593 JXX cond' false,
1594 CMP II32 (OpReg r2_lo) (OpReg r1_lo),
1595 JXX cond true,
1596 NEWBLOCK false ]
1597
1598 genCondJump' _ id bool = do
1599 CondCode is_float cond cond_code <- getCondCode bool
1600 use_sse2 <- sse2Enabled
1601 if not is_float || not use_sse2
1602 then
1603 return (cond_code `snocOL` JXX cond id)
1604 else do
1605 lbl <- getBlockIdNat
1606
1607 -- see comment with condFltReg
1608 let code = case cond of
1609 NE -> or_unordered
1610 GU -> plain_test
1611 GEU -> plain_test
1612 _ -> and_ordered
1613
1614 plain_test = unitOL (
1615 JXX cond id
1616 )
1617 or_unordered = toOL [
1618 JXX cond id,
1619 JXX PARITY id
1620 ]
1621 and_ordered = toOL [
1622 JXX PARITY lbl,
1623 JXX cond id,
1624 JXX ALWAYS lbl,
1625 NEWBLOCK lbl
1626 ]
1627 return (cond_code `appOL` code)
1628
1629 -- -----------------------------------------------------------------------------
1630 -- Generating C calls
1631
1632 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1633 -- @get_arg@, which moves the arguments to the correct registers/stack
1634 -- locations. Apart from that, the code is easy.
1635 --
1636 -- (If applicable) Do not fill the delay slots here; you will confuse the
1637 -- register allocator.
1638
1639 genCCall
1640 :: DynFlags
1641 -> Bool -- 32 bit platform?
1642 -> ForeignTarget -- function to call
1643 -> [CmmFormal] -- where to put the result
1644 -> [CmmActual] -- arguments (of mixed type)
1645 -> NatM InstrBlock
1646
1647 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1648
1649 -- Unroll memcpy calls if the source and destination pointers are at
1650 -- least DWORD aligned and the number of bytes to copy isn't too
1651 -- large. Otherwise, call C's memcpy.
1652 genCCall dflags is32Bit (PrimTarget (MO_Memcpy align)) _
1653 [dst, src, CmmLit (CmmInt n _)]
1654 | fromInteger insns <= maxInlineMemcpyInsns dflags && align .&. 3 == 0 = do
1655 code_dst <- getAnyReg dst
1656 dst_r <- getNewRegNat format
1657 code_src <- getAnyReg src
1658 src_r <- getNewRegNat format
1659 tmp_r <- getNewRegNat format
1660 return $ code_dst dst_r `appOL` code_src src_r `appOL`
1661 go dst_r src_r tmp_r (fromInteger n)
1662 where
1663 -- The number of instructions we will generate (approx). We need 2
1664 -- instructions per move.
1665 insns = 2 * ((n + sizeBytes - 1) `div` sizeBytes)
1666
1667 format = if align .&. 4 /= 0 then II32 else (archWordFormat is32Bit)
1668
1669 -- The size of each move, in bytes.
1670 sizeBytes :: Integer
1671 sizeBytes = fromIntegral (formatInBytes format)
1672
1673 go :: Reg -> Reg -> Reg -> Integer -> OrdList Instr
1674 go dst src tmp i
1675 | i >= sizeBytes =
1676 unitOL (MOV format (OpAddr src_addr) (OpReg tmp)) `appOL`
1677 unitOL (MOV format (OpReg tmp) (OpAddr dst_addr)) `appOL`
1678 go dst src tmp (i - sizeBytes)
1679 -- Deal with remaining bytes.
1680 | i >= 4 = -- Will never happen on 32-bit
1681 unitOL (MOV II32 (OpAddr src_addr) (OpReg tmp)) `appOL`
1682 unitOL (MOV II32 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1683 go dst src tmp (i - 4)
1684 | i >= 2 =
1685 unitOL (MOVZxL II16 (OpAddr src_addr) (OpReg tmp)) `appOL`
1686 unitOL (MOV II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1687 go dst src tmp (i - 2)
1688 | i >= 1 =
1689 unitOL (MOVZxL II8 (OpAddr src_addr) (OpReg tmp)) `appOL`
1690 unitOL (MOV II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1691 go dst src tmp (i - 1)
1692 | otherwise = nilOL
1693 where
1694 src_addr = AddrBaseIndex (EABaseReg src) EAIndexNone
1695 (ImmInteger (n - i))
1696 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1697 (ImmInteger (n - i))
1698
1699 genCCall dflags _ (PrimTarget (MO_Memset align)) _
1700 [dst,
1701 CmmLit (CmmInt c _),
1702 CmmLit (CmmInt n _)]
1703 | fromInteger insns <= maxInlineMemsetInsns dflags && align .&. 3 == 0 = do
1704 code_dst <- getAnyReg dst
1705 dst_r <- getNewRegNat format
1706 return $ code_dst dst_r `appOL` go dst_r (fromInteger n)
1707 where
1708 (format, val) = case align .&. 3 of
1709 2 -> (II16, c2)
1710 0 -> (II32, c4)
1711 _ -> (II8, c)
1712 c2 = c `shiftL` 8 .|. c
1713 c4 = c2 `shiftL` 16 .|. c2
1714
1715 -- The number of instructions we will generate (approx). We need 1
1716 -- instructions per move.
1717 insns = (n + sizeBytes - 1) `div` sizeBytes
1718
1719 -- The size of each move, in bytes.
1720 sizeBytes :: Integer
1721 sizeBytes = fromIntegral (formatInBytes format)
1722
1723 go :: Reg -> Integer -> OrdList Instr
1724 go dst i
1725 -- TODO: Add movabs instruction and support 64-bit sets.
1726 | i >= sizeBytes = -- This might be smaller than the below sizes
1727 unitOL (MOV format (OpImm (ImmInteger val)) (OpAddr dst_addr)) `appOL`
1728 go dst (i - sizeBytes)
1729 | i >= 4 = -- Will never happen on 32-bit
1730 unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr dst_addr)) `appOL`
1731 go dst (i - 4)
1732 | i >= 2 =
1733 unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr dst_addr)) `appOL`
1734 go dst (i - 2)
1735 | i >= 1 =
1736 unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr dst_addr)) `appOL`
1737 go dst (i - 1)
1738 | otherwise = nilOL
1739 where
1740 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1741 (ImmInteger (n - i))
1742
1743 genCCall _ _ (PrimTarget MO_WriteBarrier) _ _ = return nilOL
1744 -- write barrier compiles to no code on x86/x86-64;
1745 -- we keep it this long in order to prevent earlier optimisations.
1746
1747 genCCall _ _ (PrimTarget MO_Touch) _ _ = return nilOL
1748
1749 genCCall _ is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] =
1750 case n of
1751 0 -> genPrefetch src $ PREFETCH NTA format
1752 1 -> genPrefetch src $ PREFETCH Lvl2 format
1753 2 -> genPrefetch src $ PREFETCH Lvl1 format
1754 3 -> genPrefetch src $ PREFETCH Lvl0 format
1755 l -> panic $ "unexpected prefetch level in genCCall MO_Prefetch_Data: " ++ (show l)
1756 -- the c / llvm prefetch convention is 0, 1, 2, and 3
1757 -- the x86 corresponding names are : NTA, 2 , 1, and 0
1758 where
1759 format = archWordFormat is32bit
1760 -- need to know what register width for pointers!
1761 genPrefetch inRegSrc prefetchCTor =
1762 do
1763 code_src <- getAnyReg inRegSrc
1764 src_r <- getNewRegNat format
1765 return $ code_src src_r `appOL`
1766 (unitOL (prefetchCTor (OpAddr
1767 ((AddrBaseIndex (EABaseReg src_r ) EAIndexNone (ImmInt 0)))) ))
1768 -- prefetch always takes an address
1769
1770 genCCall dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] = do
1771 let platform = targetPlatform dflags
1772 let dst_r = getRegisterReg platform False (CmmLocal dst)
1773 case width of
1774 W64 | is32Bit -> do
1775 ChildCode64 vcode rlo <- iselExpr64 src
1776 let dst_rhi = getHiVRegFromLo dst_r
1777 rhi = getHiVRegFromLo rlo
1778 return $ vcode `appOL`
1779 toOL [ MOV II32 (OpReg rlo) (OpReg dst_rhi),
1780 MOV II32 (OpReg rhi) (OpReg dst_r),
1781 BSWAP II32 dst_rhi,
1782 BSWAP II32 dst_r ]
1783 W16 -> do code_src <- getAnyReg src
1784 return $ code_src dst_r `appOL`
1785 unitOL (BSWAP II32 dst_r) `appOL`
1786 unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r))
1787 _ -> do code_src <- getAnyReg src
1788 return $ code_src dst_r `appOL` unitOL (BSWAP format dst_r)
1789 where
1790 format = intFormat width
1791
1792 genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
1793 args@[src] = do
1794 sse4_2 <- sse4_2Enabled
1795 let platform = targetPlatform dflags
1796 if sse4_2
1797 then do code_src <- getAnyReg src
1798 src_r <- getNewRegNat format
1799 let dst_r = getRegisterReg platform False (CmmLocal dst)
1800 return $ code_src src_r `appOL`
1801 (if width == W8 then
1802 -- The POPCNT instruction doesn't take a r/m8
1803 unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL`
1804 unitOL (POPCNT II16 (OpReg src_r) dst_r)
1805 else
1806 unitOL (POPCNT format (OpReg src_r) dst_r)) `appOL`
1807 (if width == W8 || width == W16 then
1808 -- We used a 16-bit destination register above,
1809 -- so zero-extend
1810 unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
1811 else nilOL)
1812 else do
1813 targetExpr <- cmmMakeDynamicReference dflags
1814 CallReference lbl
1815 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1816 [NoHint] [NoHint]
1817 CmmMayReturn)
1818 genCCall dflags is32Bit target dest_regs args
1819 where
1820 format = intFormat width
1821 lbl = mkCmmCodeLabel primPackageKey (fsLit (popCntLabel width))
1822
1823 genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src]
1824 | is32Bit && width == W64 = do
1825 -- Fallback to `hs_clz64` on i386
1826 targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
1827 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1828 [NoHint] [NoHint]
1829 CmmMayReturn)
1830 genCCall dflags is32Bit target dest_regs args
1831
1832 | otherwise = do
1833 code_src <- getAnyReg src
1834 src_r <- getNewRegNat format
1835 tmp_r <- getNewRegNat format
1836 let dst_r = getRegisterReg platform False (CmmLocal dst)
1837
1838 -- The following insn sequence makes sure 'clz 0' has a defined value.
1839 -- starting with Haswell, one could use the LZCNT insn instead.
1840 return $ code_src src_r `appOL` toOL
1841 ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
1842 [ BSR format (OpReg src_r) tmp_r
1843 , MOV II32 (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
1844 , CMOV NE format (OpReg tmp_r) dst_r
1845 , XOR format (OpImm (ImmInt (bw-1))) (OpReg dst_r)
1846 ]) -- NB: We don't need to zero-extend the result for the
1847 -- W8/W16 cases because the 'MOV' insn already
1848 -- took care of implicitly clearing the upper bits
1849 where
1850 bw = widthInBits width
1851 platform = targetPlatform dflags
1852 format = if width == W8 then II16 else intFormat width
1853 lbl = mkCmmCodeLabel primPackageKey (fsLit (clzLabel width))
1854
1855 genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src]
1856 | is32Bit, width == W64 = do
1857 ChildCode64 vcode rlo <- iselExpr64 src
1858 let rhi = getHiVRegFromLo rlo
1859 dst_r = getRegisterReg platform False (CmmLocal dst)
1860 lbl1 <- getBlockIdNat
1861 lbl2 <- getBlockIdNat
1862 tmp_r <- getNewRegNat format
1863
1864 -- The following instruction sequence corresponds to the pseudo-code
1865 --
1866 -- if (src) {
1867 -- dst = src.lo32 ? BSF(src.lo32) : (BSF(src.hi32) + 32);
1868 -- } else {
1869 -- dst = 64;
1870 -- }
1871 return $ vcode `appOL` toOL
1872 ([ MOV II32 (OpReg rhi) (OpReg tmp_r)
1873 , OR II32 (OpReg rlo) (OpReg tmp_r)
1874 , MOV II32 (OpImm (ImmInt 64)) (OpReg dst_r)
1875 , JXX EQQ lbl2
1876 , JXX ALWAYS lbl1
1877
1878 , NEWBLOCK lbl1
1879 , BSF II32 (OpReg rhi) dst_r
1880 , ADD II32 (OpImm (ImmInt 32)) (OpReg dst_r)
1881 , BSF II32 (OpReg rlo) tmp_r
1882 , CMOV NE II32 (OpReg tmp_r) dst_r
1883 , JXX ALWAYS lbl2
1884
1885 , NEWBLOCK lbl2
1886 ])
1887
1888 | otherwise = do
1889 code_src <- getAnyReg src
1890 src_r <- getNewRegNat format
1891 tmp_r <- getNewRegNat format
1892 let dst_r = getRegisterReg platform False (CmmLocal dst)
1893
1894 -- The following insn sequence makes sure 'ctz 0' has a defined value.
1895 -- starting with Haswell, one could use the TZCNT insn instead.
1896 return $ code_src src_r `appOL` toOL
1897 ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
1898 [ BSF format (OpReg src_r) tmp_r
1899 , MOV II32 (OpImm (ImmInt bw)) (OpReg dst_r)
1900 , CMOV NE format (OpReg tmp_r) dst_r
1901 ]) -- NB: We don't need to zero-extend the result for the
1902 -- W8/W16 cases because the 'MOV' insn already
1903 -- took care of implicitly clearing the upper bits
1904 where
1905 bw = widthInBits width
1906 platform = targetPlatform dflags
1907 format = if width == W8 then II16 else intFormat width
1908
1909 genCCall dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args = do
1910 targetExpr <- cmmMakeDynamicReference dflags
1911 CallReference lbl
1912 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1913 [NoHint] [NoHint]
1914 CmmMayReturn)
1915 genCCall dflags is32Bit target dest_regs args
1916 where
1917 lbl = mkCmmCodeLabel primPackageKey (fsLit (word2FloatLabel width))
1918
1919 genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = do
1920 Amode amode addr_code <-
1921 if amop `elem` [AMO_Add, AMO_Sub]
1922 then getAmode addr
1923 else getSimpleAmode dflags is32Bit addr -- See genCCall for MO_Cmpxchg
1924 arg <- getNewRegNat format
1925 arg_code <- getAnyReg n
1926 use_sse2 <- sse2Enabled
1927 let platform = targetPlatform dflags
1928 dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
1929 code <- op_code dst_r arg amode
1930 return $ addr_code `appOL` arg_code arg `appOL` code
1931 where
1932 -- Code for the operation
1933 op_code :: Reg -- Destination reg
1934 -> Reg -- Register containing argument
1935 -> AddrMode -- Address of location to mutate
1936 -> NatM (OrdList Instr)
1937 op_code dst_r arg amode = case amop of
1938 -- In the common case where dst_r is a virtual register the
1939 -- final move should go away, because it's the last use of arg
1940 -- and the first use of dst_r.
1941 AMO_Add -> return $ toOL [ LOCK (XADD format (OpReg arg) (OpAddr amode))
1942 , MOV format (OpReg arg) (OpReg dst_r)
1943 ]
1944 AMO_Sub -> return $ toOL [ NEGI format (OpReg arg)
1945 , LOCK (XADD format (OpReg arg) (OpAddr amode))
1946 , MOV format (OpReg arg) (OpReg dst_r)
1947 ]
1948 AMO_And -> cmpxchg_code (\ src dst -> unitOL $ AND format src dst)
1949 AMO_Nand -> cmpxchg_code (\ src dst -> toOL [ AND format src dst
1950 , NOT format dst
1951 ])
1952 AMO_Or -> cmpxchg_code (\ src dst -> unitOL $ OR format src dst)
1953 AMO_Xor -> cmpxchg_code (\ src dst -> unitOL $ XOR format src dst)
1954 where
1955 -- Simulate operation that lacks a dedicated instruction using
1956 -- cmpxchg.
1957 cmpxchg_code :: (Operand -> Operand -> OrdList Instr)
1958 -> NatM (OrdList Instr)
1959 cmpxchg_code instrs = do
1960 lbl <- getBlockIdNat
1961 tmp <- getNewRegNat format
1962 return $ toOL
1963 [ MOV format (OpAddr amode) (OpReg eax)
1964 , JXX ALWAYS lbl
1965 , NEWBLOCK lbl
1966 -- Keep old value so we can return it:
1967 , MOV format (OpReg eax) (OpReg dst_r)
1968 , MOV format (OpReg eax) (OpReg tmp)
1969 ]
1970 `appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL
1971 [ LOCK (CMPXCHG format (OpReg tmp) (OpAddr amode))
1972 , JXX NE lbl
1973 ]
1974
1975 format = intFormat width
1976
1977 genCCall dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] = do
1978 load_code <- intLoadCode (MOV (intFormat width)) addr
1979 let platform = targetPlatform dflags
1980 use_sse2 <- sse2Enabled
1981 return (load_code (getRegisterReg platform use_sse2 (CmmLocal dst)))
1982
1983 genCCall _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
1984 code <- assignMem_IntCode (intFormat width) addr val
1985 return $ code `snocOL` MFENCE
1986
1987 genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = do
1988 -- On x86 we don't have enough registers to use cmpxchg with a
1989 -- complicated addressing mode, so on that architecture we
1990 -- pre-compute the address first.
1991 Amode amode addr_code <- getSimpleAmode dflags is32Bit addr
1992 newval <- getNewRegNat format
1993 newval_code <- getAnyReg new
1994 oldval <- getNewRegNat format
1995 oldval_code <- getAnyReg old
1996 use_sse2 <- sse2Enabled
1997 let platform = targetPlatform dflags
1998 dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
1999 code = toOL
2000 [ MOV format (OpReg oldval) (OpReg eax)
2001 , LOCK (CMPXCHG format (OpReg newval) (OpAddr amode))
2002 , MOV format (OpReg eax) (OpReg dst_r)
2003 ]
2004 return $ addr_code `appOL` newval_code newval `appOL` oldval_code oldval
2005 `appOL` code
2006 where
2007 format = intFormat width
2008
2009 genCCall _ is32Bit target dest_regs args = do
2010 dflags <- getDynFlags
2011 let platform = targetPlatform dflags
2012 case (target, dest_regs) of
2013 -- void return type prim op
2014 (PrimTarget op, []) ->
2015 outOfLineCmmOp op Nothing args
2016 -- we only cope with a single result for foreign calls
2017 (PrimTarget op, [r])
2018 | not is32Bit -> outOfLineCmmOp op (Just r) args
2019 | otherwise -> do
2020 l1 <- getNewLabelNat
2021 l2 <- getNewLabelNat
2022 sse2 <- sse2Enabled
2023 if sse2
2024 then
2025 outOfLineCmmOp op (Just r) args
2026 else case op of
2027 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
2028 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
2029
2030 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
2031 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
2032
2033 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
2034 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
2035
2036 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
2037 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
2038
2039 _other_op -> outOfLineCmmOp op (Just r) args
2040
2041 where
2042 actuallyInlineFloatOp instr format [x]
2043 = do res <- trivialUFCode format (instr format) x
2044 any <- anyReg res
2045 return (any (getRegisterReg platform False (CmmLocal r)))
2046
2047 actuallyInlineFloatOp _ _ args
2048 = panic $ "genCCall.actuallyInlineFloatOp: bad number of arguments! ("
2049 ++ show (length args) ++ ")"
2050
2051 (PrimTarget (MO_S_QuotRem width), _) -> divOp1 platform True width dest_regs args
2052 (PrimTarget (MO_U_QuotRem width), _) -> divOp1 platform False width dest_regs args
2053 (PrimTarget (MO_U_QuotRem2 width), _) -> divOp2 platform False width dest_regs args
2054 (PrimTarget (MO_Add2 width), [res_h, res_l]) ->
2055 case args of
2056 [arg_x, arg_y] ->
2057 do hCode <- getAnyReg (CmmLit (CmmInt 0 width))
2058 let format = intFormat width
2059 lCode <- anyReg =<< trivialCode width (ADD_CC format)
2060 (Just (ADD_CC format)) arg_x arg_y
2061 let reg_l = getRegisterReg platform True (CmmLocal res_l)
2062 reg_h = getRegisterReg platform True (CmmLocal res_h)
2063 code = hCode reg_h `appOL`
2064 lCode reg_l `snocOL`
2065 ADC format (OpImm (ImmInteger 0)) (OpReg reg_h)
2066 return code
2067 _ -> panic "genCCall: Wrong number of arguments/results for add2"
2068 (PrimTarget (MO_AddIntC width), [res_r, res_c]) ->
2069 addSubIntC platform ADD_CC (Just . ADD_CC) width res_r res_c args
2070 (PrimTarget (MO_SubIntC width), [res_r, res_c]) ->
2071 addSubIntC platform SUB_CC (const Nothing) width res_r res_c args
2072 (PrimTarget (MO_U_Mul2 width), [res_h, res_l]) ->
2073 case args of
2074 [arg_x, arg_y] ->
2075 do (y_reg, y_code) <- getRegOrMem arg_y
2076 x_code <- getAnyReg arg_x
2077 let format = intFormat width
2078 reg_h = getRegisterReg platform True (CmmLocal res_h)
2079 reg_l = getRegisterReg platform True (CmmLocal res_l)
2080 code = y_code `appOL`
2081 x_code rax `appOL`
2082 toOL [MUL2 format y_reg,
2083 MOV format (OpReg rdx) (OpReg reg_h),
2084 MOV format (OpReg rax) (OpReg reg_l)]
2085 return code
2086 _ -> panic "genCCall: Wrong number of arguments/results for add2"
2087
2088 _ -> if is32Bit
2089 then genCCall32' dflags target dest_regs args
2090 else genCCall64' dflags target dest_regs args
2091
2092 where divOp1 platform signed width results [arg_x, arg_y]
2093 = divOp platform signed width results Nothing arg_x arg_y
2094 divOp1 _ _ _ _ _
2095 = panic "genCCall: Wrong number of arguments for divOp1"
2096 divOp2 platform signed width results [arg_x_high, arg_x_low, arg_y]
2097 = divOp platform signed width results (Just arg_x_high) arg_x_low arg_y
2098 divOp2 _ _ _ _ _
2099 = panic "genCCall: Wrong number of arguments for divOp2"
2100 divOp platform signed width [res_q, res_r]
2101 m_arg_x_high arg_x_low arg_y
2102 = do let format = intFormat width
2103 reg_q = getRegisterReg platform True (CmmLocal res_q)
2104 reg_r = getRegisterReg platform True (CmmLocal res_r)
2105 widen | signed = CLTD format
2106 | otherwise = XOR format (OpReg rdx) (OpReg rdx)
2107 instr | signed = IDIV
2108 | otherwise = DIV
2109 (y_reg, y_code) <- getRegOrMem arg_y
2110 x_low_code <- getAnyReg arg_x_low
2111 x_high_code <- case m_arg_x_high of
2112 Just arg_x_high ->
2113 getAnyReg arg_x_high
2114 Nothing ->
2115 return $ const $ unitOL widen
2116 return $ y_code `appOL`
2117 x_low_code rax `appOL`
2118 x_high_code rdx `appOL`
2119 toOL [instr format y_reg,
2120 MOV format (OpReg rax) (OpReg reg_q),
2121 MOV format (OpReg rdx) (OpReg reg_r)]
2122 divOp _ _ _ _ _ _ _
2123 = panic "genCCall: Wrong number of results for divOp"
2124
2125 addSubIntC platform instr mrevinstr width res_r res_c [arg_x, arg_y]
2126 = do let format = intFormat width
2127 rCode <- anyReg =<< trivialCode width (instr format)
2128 (mrevinstr format) arg_x arg_y
2129 reg_tmp <- getNewRegNat II8
2130 let reg_c = getRegisterReg platform True (CmmLocal res_c)
2131 reg_r = getRegisterReg platform True (CmmLocal res_r)
2132 code = rCode reg_r `snocOL`
2133 SETCC OFLO (OpReg reg_tmp) `snocOL`
2134 MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c)
2135 return code
2136 addSubIntC _ _ _ _ _ _ _
2137 = panic "genCCall: Wrong number of arguments/results for addSubIntC"
2138
2139 genCCall32' :: DynFlags
2140 -> ForeignTarget -- function to call
2141 -> [CmmFormal] -- where to put the result
2142 -> [CmmActual] -- arguments (of mixed type)
2143 -> NatM InstrBlock
2144 genCCall32' dflags target dest_regs args = do
2145 let
2146 prom_args = map (maybePromoteCArg dflags W32) args
2147
2148 -- Align stack to 16n for calls, assuming a starting stack
2149 -- alignment of 16n - word_size on procedure entry. Which we
2150 -- maintiain. See Note [rts/StgCRun.c : Stack Alignment on X86]
2151 sizes = map (arg_size . cmmExprType dflags) (reverse args)
2152 raw_arg_size = sum sizes + wORD_SIZE dflags
2153 arg_pad_size = (roundTo 16 $ raw_arg_size) - raw_arg_size
2154 tot_arg_size = raw_arg_size + arg_pad_size - wORD_SIZE dflags
2155 delta0 <- getDeltaNat
2156 setDeltaNat (delta0 - arg_pad_size)
2157
2158 use_sse2 <- sse2Enabled
2159 push_codes <- mapM (push_arg use_sse2) (reverse prom_args)
2160 delta <- getDeltaNat
2161 MASSERT(delta == delta0 - tot_arg_size)
2162
2163 -- deal with static vs dynamic call targets
2164 (callinsns,cconv) <-
2165 case target of
2166 ForeignTarget (CmmLit (CmmLabel lbl)) conv
2167 -> -- ToDo: stdcall arg sizes
2168 return (unitOL (CALL (Left fn_imm) []), conv)
2169 where fn_imm = ImmCLbl lbl
2170 ForeignTarget expr conv
2171 -> do { (dyn_r, dyn_c) <- getSomeReg expr
2172 ; ASSERT( isWord32 (cmmExprType dflags expr) )
2173 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
2174 PrimTarget _
2175 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
2176 ++ "probably because too many return values."
2177
2178 let push_code
2179 | arg_pad_size /= 0
2180 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2181 DELTA (delta0 - arg_pad_size)]
2182 `appOL` concatOL push_codes
2183 | otherwise
2184 = concatOL push_codes
2185
2186 -- Deallocate parameters after call for ccall;
2187 -- but not for stdcall (callee does it)
2188 --
2189 -- We have to pop any stack padding we added
2190 -- even if we are doing stdcall, though (#5052)
2191 pop_size
2192 | ForeignConvention StdCallConv _ _ _ <- cconv = arg_pad_size
2193 | otherwise = tot_arg_size
2194
2195 call = callinsns `appOL`
2196 toOL (
2197 (if pop_size==0 then [] else
2198 [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)])
2199 ++
2200 [DELTA delta0]
2201 )
2202 setDeltaNat delta0
2203
2204 dflags <- getDynFlags
2205 let platform = targetPlatform dflags
2206
2207 let
2208 -- assign the results, if necessary
2209 assign_code [] = nilOL
2210 assign_code [dest]
2211 | isFloatType ty =
2212 if use_sse2
2213 then let tmp_amode = AddrBaseIndex (EABaseReg esp)
2214 EAIndexNone
2215 (ImmInt 0)
2216 fmt = floatFormat w
2217 in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
2218 DELTA (delta0 - b),
2219 GST fmt fake0 tmp_amode,
2220 MOV fmt (OpAddr tmp_amode) (OpReg r_dest),
2221 ADD II32 (OpImm (ImmInt b)) (OpReg esp),
2222 DELTA delta0]
2223 else unitOL (GMOV fake0 r_dest)
2224 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
2225 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
2226 | otherwise = unitOL (MOV (intFormat w)
2227 (OpReg eax)
2228 (OpReg r_dest))
2229 where
2230 ty = localRegType dest
2231 w = typeWidth ty
2232 b = widthInBytes w
2233 r_dest_hi = getHiVRegFromLo r_dest
2234 r_dest = getRegisterReg platform use_sse2 (CmmLocal dest)
2235 assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
2236
2237 return (push_code `appOL`
2238 call `appOL`
2239 assign_code dest_regs)
2240
2241 where
2242 arg_size :: CmmType -> Int -- Width in bytes
2243 arg_size ty = widthInBytes (typeWidth ty)
2244
2245 roundTo a x | x `mod` a == 0 = x
2246 | otherwise = x + a - (x `mod` a)
2247
2248 push_arg :: Bool -> CmmActual {-current argument-}
2249 -> NatM InstrBlock -- code
2250
2251 push_arg use_sse2 arg -- we don't need the hints on x86
2252 | isWord64 arg_ty = do
2253 ChildCode64 code r_lo <- iselExpr64 arg
2254 delta <- getDeltaNat
2255 setDeltaNat (delta - 8)
2256 let
2257 r_hi = getHiVRegFromLo r_lo
2258 return ( code `appOL`
2259 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
2260 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
2261 DELTA (delta-8)]
2262 )
2263
2264 | isFloatType arg_ty = do
2265 (reg, code) <- getSomeReg arg
2266 delta <- getDeltaNat
2267 setDeltaNat (delta-size)
2268 return (code `appOL`
2269 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
2270 DELTA (delta-size),
2271 let addr = AddrBaseIndex (EABaseReg esp)
2272 EAIndexNone
2273 (ImmInt 0)
2274 format = floatFormat (typeWidth arg_ty)
2275 in
2276 if use_sse2
2277 then MOV format (OpReg reg) (OpAddr addr)
2278 else GST format reg addr
2279 ]
2280 )
2281
2282 | otherwise = do
2283 (operand, code) <- getOperand arg
2284 delta <- getDeltaNat
2285 setDeltaNat (delta-size)
2286 return (code `snocOL`
2287 PUSH II32 operand `snocOL`
2288 DELTA (delta-size))
2289
2290 where
2291 arg_ty = cmmExprType dflags arg
2292 size = arg_size arg_ty -- Byte size
2293
2294 genCCall64' :: DynFlags
2295 -> ForeignTarget -- function to call
2296 -> [CmmFormal] -- where to put the result
2297 -> [CmmActual] -- arguments (of mixed type)
2298 -> NatM InstrBlock
2299 genCCall64' dflags target dest_regs args = do
2300 -- load up the register arguments
2301 let prom_args = map (maybePromoteCArg dflags W32) args
2302
2303 (stack_args, int_regs_used, fp_regs_used, load_args_code)
2304 <-
2305 if platformOS platform == OSMinGW32
2306 then load_args_win prom_args [] [] (allArgRegs platform) nilOL
2307 else do (stack_args, aregs, fregs, load_args_code)
2308 <- load_args prom_args (allIntArgRegs platform) (allFPArgRegs platform) nilOL
2309 let fp_regs_used = reverse (drop (length fregs) (reverse (allFPArgRegs platform)))
2310 int_regs_used = reverse (drop (length aregs) (reverse (allIntArgRegs platform)))
2311 return (stack_args, int_regs_used, fp_regs_used, load_args_code)
2312
2313 let
2314 arg_regs_used = int_regs_used ++ fp_regs_used
2315 arg_regs = [eax] ++ arg_regs_used
2316 -- for annotating the call instruction with
2317 sse_regs = length fp_regs_used
2318 arg_stack_slots = if platformOS platform == OSMinGW32
2319 then length stack_args + length (allArgRegs platform)
2320 else length stack_args
2321 tot_arg_size = arg_size * arg_stack_slots
2322
2323
2324 -- Align stack to 16n for calls, assuming a starting stack
2325 -- alignment of 16n - word_size on procedure entry. Which we
2326 -- maintain. See Note [rts/StgCRun.c : Stack Alignment on X86]
2327 (real_size, adjust_rsp) <-
2328 if (tot_arg_size + wORD_SIZE dflags) `rem` 16 == 0
2329 then return (tot_arg_size, nilOL)
2330 else do -- we need to adjust...
2331 delta <- getDeltaNat
2332 setDeltaNat (delta - wORD_SIZE dflags)
2333 return (tot_arg_size + wORD_SIZE dflags, toOL [
2334 SUB II64 (OpImm (ImmInt (wORD_SIZE dflags))) (OpReg rsp),
2335 DELTA (delta - wORD_SIZE dflags) ])
2336
2337 -- push the stack args, right to left
2338 push_code <- push_args (reverse stack_args) nilOL
2339 -- On Win64, we also have to leave stack space for the arguments
2340 -- that we are passing in registers
2341 lss_code <- if platformOS platform == OSMinGW32
2342 then leaveStackSpace (length (allArgRegs platform))
2343 else return nilOL
2344 delta <- getDeltaNat
2345
2346 -- deal with static vs dynamic call targets
2347 (callinsns,_cconv) <-
2348 case target of
2349 ForeignTarget (CmmLit (CmmLabel lbl)) conv
2350 -> -- ToDo: stdcall arg sizes
2351 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
2352 where fn_imm = ImmCLbl lbl
2353 ForeignTarget expr conv
2354 -> do (dyn_r, dyn_c) <- getSomeReg expr
2355 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
2356 PrimTarget _
2357 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
2358 ++ "probably because too many return values."
2359
2360 let
2361 -- The x86_64 ABI requires us to set %al to the number of SSE2
2362 -- registers that contain arguments, if the called routine
2363 -- is a varargs function. We don't know whether it's a
2364 -- varargs function or not, so we have to assume it is.
2365 --
2366 -- It's not safe to omit this assignment, even if the number
2367 -- of SSE2 regs in use is zero. If %al is larger than 8
2368 -- on entry to a varargs function, seg faults ensue.
2369 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
2370
2371 let call = callinsns `appOL`
2372 toOL (
2373 -- Deallocate parameters after call for ccall;
2374 -- stdcall has callee do it, but is not supported on
2375 -- x86_64 target (see #3336)
2376 (if real_size==0 then [] else
2377 [ADD (intFormat (wordWidth dflags)) (OpImm (ImmInt real_size)) (OpReg esp)])
2378 ++
2379 [DELTA (delta + real_size)]
2380 )
2381 setDeltaNat (delta + real_size)
2382
2383 let
2384 -- assign the results, if necessary
2385 assign_code [] = nilOL
2386 assign_code [dest] =
2387 case typeWidth rep of
2388 W32 | isFloatType rep -> unitOL (MOV (floatFormat W32)
2389 (OpReg xmm0)
2390 (OpReg r_dest))
2391 W64 | isFloatType rep -> unitOL (MOV (floatFormat W64)
2392 (OpReg xmm0)
2393 (OpReg r_dest))
2394 _ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest))
2395 where
2396 rep = localRegType dest
2397 r_dest = getRegisterReg platform True (CmmLocal dest)
2398 assign_code _many = panic "genCCall.assign_code many"
2399
2400 return (load_args_code `appOL`
2401 adjust_rsp `appOL`
2402 push_code `appOL`
2403 lss_code `appOL`
2404 assign_eax sse_regs `appOL`
2405 call `appOL`
2406 assign_code dest_regs)
2407
2408 where platform = targetPlatform dflags
2409 arg_size = 8 -- always, at the mo
2410
2411 load_args :: [CmmExpr]
2412 -> [Reg] -- int regs avail for args
2413 -> [Reg] -- FP regs avail for args
2414 -> InstrBlock
2415 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock)
2416 load_args args [] [] code = return (args, [], [], code)
2417 -- no more regs to use
2418 load_args [] aregs fregs code = return ([], aregs, fregs, code)
2419 -- no more args to push
2420 load_args (arg : rest) aregs fregs code
2421 | isFloatType arg_rep =
2422 case fregs of
2423 [] -> push_this_arg
2424 (r:rs) -> do
2425 arg_code <- getAnyReg arg
2426 load_args rest aregs rs (code `appOL` arg_code r)
2427 | otherwise =
2428 case aregs of
2429 [] -> push_this_arg
2430 (r:rs) -> do
2431 arg_code <- getAnyReg arg
2432 load_args rest rs fregs (code `appOL` arg_code r)
2433 where
2434 arg_rep = cmmExprType dflags arg
2435
2436 push_this_arg = do
2437 (args',ars,frs,code') <- load_args rest aregs fregs code
2438 return (arg:args', ars, frs, code')
2439
2440 load_args_win :: [CmmExpr]
2441 -> [Reg] -- used int regs
2442 -> [Reg] -- used FP regs
2443 -> [(Reg, Reg)] -- (int, FP) regs avail for args
2444 -> InstrBlock
2445 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock)
2446 load_args_win args usedInt usedFP [] code
2447 = return (args, usedInt, usedFP, code)
2448 -- no more regs to use
2449 load_args_win [] usedInt usedFP _ code
2450 = return ([], usedInt, usedFP, code)
2451 -- no more args to push
2452 load_args_win (arg : rest) usedInt usedFP
2453 ((ireg, freg) : regs) code
2454 | isFloatType arg_rep = do
2455 arg_code <- getAnyReg arg
2456 load_args_win rest (ireg : usedInt) (freg : usedFP) regs
2457 (code `appOL`
2458 arg_code freg `snocOL`
2459 -- If we are calling a varargs function
2460 -- then we need to define ireg as well
2461 -- as freg
2462 MOV II64 (OpReg freg) (OpReg ireg))
2463 | otherwise = do
2464 arg_code <- getAnyReg arg
2465 load_args_win rest (ireg : usedInt) usedFP regs
2466 (code `appOL` arg_code ireg)
2467 where
2468 arg_rep = cmmExprType dflags arg
2469
2470 push_args [] code = return code
2471 push_args (arg:rest) code
2472 | isFloatType arg_rep = do
2473 (arg_reg, arg_code) <- getSomeReg arg
2474 delta <- getDeltaNat
2475 setDeltaNat (delta-arg_size)
2476 let code' = code `appOL` arg_code `appOL` toOL [
2477 SUB (intFormat (wordWidth dflags)) (OpImm (ImmInt arg_size)) (OpReg rsp),
2478 DELTA (delta-arg_size),
2479 MOV (floatFormat width) (OpReg arg_reg) (OpAddr (spRel dflags 0))]
2480 push_args rest code'
2481
2482 | otherwise = do
2483 ASSERT(width == W64) return ()
2484 (arg_op, arg_code) <- getOperand arg
2485 delta <- getDeltaNat
2486 setDeltaNat (delta-arg_size)
2487 let code' = code `appOL` arg_code `appOL` toOL [
2488 PUSH II64 arg_op,
2489 DELTA (delta-arg_size)]
2490 push_args rest code'
2491 where
2492 arg_rep = cmmExprType dflags arg
2493 width = typeWidth arg_rep
2494
2495 leaveStackSpace n = do
2496 delta <- getDeltaNat
2497 setDeltaNat (delta - n * arg_size)
2498 return $ toOL [
2499 SUB II64 (OpImm (ImmInt (n * wORD_SIZE dflags))) (OpReg rsp),
2500 DELTA (delta - n * arg_size)]
2501
2502 maybePromoteCArg :: DynFlags -> Width -> CmmExpr -> CmmExpr
2503 maybePromoteCArg dflags wto arg
2504 | wfrom < wto = CmmMachOp (MO_UU_Conv wfrom wto) [arg]
2505 | otherwise = arg
2506 where
2507 wfrom = cmmExprWidth dflags arg
2508
2509 outOfLineCmmOp :: CallishMachOp -> Maybe CmmFormal -> [CmmActual] -> NatM InstrBlock
2510 outOfLineCmmOp mop res args
2511 = do
2512 dflags <- getDynFlags
2513 targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
2514 let target = ForeignTarget targetExpr
2515 (ForeignConvention CCallConv [] [] CmmMayReturn)
2516
2517 stmtToInstrs (CmmUnsafeForeignCall target (catMaybes [res]) args)
2518 where
2519 -- Assume we can call these functions directly, and that they're not in a dynamic library.
2520 -- TODO: Why is this ok? Under linux this code will be in libm.so
2521 -- Is is because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31
2522 lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
2523
2524 fn = case mop of
2525 MO_F32_Sqrt -> fsLit "sqrtf"
2526 MO_F32_Sin -> fsLit "sinf"
2527 MO_F32_Cos -> fsLit "cosf"
2528 MO_F32_Tan -> fsLit "tanf"
2529 MO_F32_Exp -> fsLit "expf"
2530 MO_F32_Log -> fsLit "logf"
2531
2532 MO_F32_Asin -> fsLit "asinf"
2533 MO_F32_Acos -> fsLit "acosf"
2534 MO_F32_Atan -> fsLit "atanf"
2535
2536 MO_F32_Sinh -> fsLit "sinhf"
2537 MO_F32_Cosh -> fsLit "coshf"
2538 MO_F32_Tanh -> fsLit "tanhf"
2539 MO_F32_Pwr -> fsLit "powf"
2540
2541 MO_F64_Sqrt -> fsLit "sqrt"
2542 MO_F64_Sin -> fsLit "sin"
2543 MO_F64_Cos -> fsLit "cos"
2544 MO_F64_Tan -> fsLit "tan"
2545 MO_F64_Exp -> fsLit "exp"
2546 MO_F64_Log -> fsLit "log"
2547
2548 MO_F64_Asin -> fsLit "asin"
2549 MO_F64_Acos -> fsLit "acos"
2550 MO_F64_Atan -> fsLit "atan"
2551
2552 MO_F64_Sinh -> fsLit "sinh"
2553 MO_F64_Cosh -> fsLit "cosh"
2554 MO_F64_Tanh -> fsLit "tanh"
2555 MO_F64_Pwr -> fsLit "pow"
2556
2557 MO_Memcpy _ -> fsLit "memcpy"
2558 MO_Memset _ -> fsLit "memset"
2559 MO_Memmove _ -> fsLit "memmove"
2560
2561 MO_PopCnt _ -> fsLit "popcnt"
2562 MO_BSwap _ -> fsLit "bswap"
2563 MO_Clz w -> fsLit $ clzLabel w
2564 MO_Ctz _ -> unsupported
2565
2566 MO_AtomicRMW _ _ -> fsLit "atomicrmw"
2567 MO_AtomicRead _ -> fsLit "atomicread"
2568 MO_AtomicWrite _ -> fsLit "atomicwrite"
2569 MO_Cmpxchg _ -> fsLit "cmpxchg"
2570
2571 MO_UF_Conv _ -> unsupported
2572
2573 MO_S_QuotRem {} -> unsupported
2574 MO_U_QuotRem {} -> unsupported
2575 MO_U_QuotRem2 {} -> unsupported
2576 MO_Add2 {} -> unsupported
2577 MO_AddIntC {} -> unsupported
2578 MO_SubIntC {} -> unsupported
2579 MO_U_Mul2 {} -> unsupported
2580 MO_WriteBarrier -> unsupported
2581 MO_Touch -> unsupported
2582 (MO_Prefetch_Data _ ) -> unsupported
2583 unsupported = panic ("outOfLineCmmOp: " ++ show mop
2584 ++ " not supported here")
2585
2586 -- -----------------------------------------------------------------------------
2587 -- Generating a table-branch
2588
2589 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
2590
2591 genSwitch dflags expr targets
2592 | gopt Opt_PIC dflags
2593 = do
2594 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2595 lbl <- getNewLabelNat
2596 dflags <- getDynFlags
2597 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2598 (tableReg,t_code) <- getSomeReg $ dynRef
2599 let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
2600 (EAIndex reg (wORD_SIZE dflags)) (ImmInt 0))
2601
2602 return $ if target32Bit (targetPlatform dflags)
2603 then e_code `appOL` t_code `appOL` toOL [
2604 ADD (intFormat (wordWidth dflags)) op (OpReg tableReg),
2605 JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
2606 ]
2607 else case platformOS (targetPlatform dflags) of
2608 OSDarwin ->
2609 -- on Mac OS X/x86_64, put the jump table
2610 -- in the text section to work around a
2611 -- limitation of the linker.
2612 -- ld64 is unable to handle the relocations for
2613 -- .quad L1 - L0
2614 -- if L0 is not preceded by a non-anonymous
2615 -- label in its section.
2616 e_code `appOL` t_code `appOL` toOL [
2617 ADD (intFormat (wordWidth dflags)) op (OpReg tableReg),
2618 JMP_TBL (OpReg tableReg) ids Text lbl
2619 ]
2620 _ ->
2621 -- HACK: On x86_64 binutils<2.17 is only able
2622 -- to generate PC32 relocations, hence we only
2623 -- get 32-bit offsets in the jump table. As
2624 -- these offsets are always negative we need
2625 -- to properly sign extend them to 64-bit.
2626 -- This hack should be removed in conjunction
2627 -- with the hack in PprMach.hs/pprDataItem
2628 -- once binutils 2.17 is standard.
2629 e_code `appOL` t_code `appOL` toOL [
2630 MOVSxL II32 op (OpReg reg),
2631 ADD (intFormat (wordWidth dflags)) (OpReg reg) (OpReg tableReg),
2632 JMP_TBL (OpReg tableReg) ids ReadOnlyData lbl
2633 ]
2634 | otherwise
2635 = do
2636 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2637 lbl <- getNewLabelNat
2638 let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg (wORD_SIZE dflags)) (ImmCLbl lbl))
2639 code = e_code `appOL` toOL [
2640 JMP_TBL op ids ReadOnlyData lbl
2641 ]
2642 return code
2643 where (offset, ids) = switchTargetsToTable targets
2644
2645 generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl (Alignment, CmmStatics) Instr)
2646 generateJumpTableForInstr dflags (JMP_TBL _ ids section lbl)
2647 = Just (createJumpTable dflags ids section lbl)
2648 generateJumpTableForInstr _ _ = Nothing
2649
2650 createJumpTable :: DynFlags -> [Maybe BlockId] -> Section -> CLabel
2651 -> GenCmmDecl (Alignment, CmmStatics) h g
2652 createJumpTable dflags ids section lbl
2653 = let jumpTable
2654 | gopt Opt_PIC dflags =
2655 let jumpTableEntryRel Nothing
2656 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
2657 jumpTableEntryRel (Just blockid)
2658 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
2659 where blockLabel = mkAsmTempLabel (getUnique blockid)
2660 in map jumpTableEntryRel ids
2661 | otherwise = map (jumpTableEntry dflags) ids
2662 in CmmData section (1, Statics lbl jumpTable)
2663
2664 -- -----------------------------------------------------------------------------
2665 -- 'condIntReg' and 'condFltReg': condition codes into registers
2666
2667 -- Turn those condition codes into integers now (when they appear on
2668 -- the right hand side of an assignment).
2669 --
2670 -- (If applicable) Do not fill the delay slots here; you will confuse the
2671 -- register allocator.
2672
2673 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2674
2675 condIntReg cond x y = do
2676 CondCode _ cond cond_code <- condIntCode cond x y
2677 tmp <- getNewRegNat II8
2678 let
2679 code dst = cond_code `appOL` toOL [
2680 SETCC cond (OpReg tmp),
2681 MOVZxL II8 (OpReg tmp) (OpReg dst)
2682 ]
2683 return (Any II32 code)
2684
2685
2686
2687 condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
2688 condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
2689 where
2690 condFltReg_x87 = do
2691 CondCode _ cond cond_code <- condFltCode cond x y
2692 tmp <- getNewRegNat II8
2693 let
2694 code dst = cond_code `appOL` toOL [
2695 SETCC cond (OpReg tmp),
2696 MOVZxL II8 (OpReg tmp) (OpReg dst)
2697 ]
2698 return (Any II32 code)
2699
2700 condFltReg_sse2 = do
2701 CondCode _ cond cond_code <- condFltCode cond x y
2702 tmp1 <- getNewRegNat (archWordFormat is32Bit)
2703 tmp2 <- getNewRegNat (archWordFormat is32Bit)
2704 let
2705 -- We have to worry about unordered operands (eg. comparisons
2706 -- against NaN). If the operands are unordered, the comparison
2707 -- sets the parity flag, carry flag and zero flag.
2708 -- All comparisons are supposed to return false for unordered
2709 -- operands except for !=, which returns true.
2710 --
2711 -- Optimisation: we don't have to test the parity flag if we
2712 -- know the test has already excluded the unordered case: eg >
2713 -- and >= test for a zero carry flag, which can only occur for
2714 -- ordered operands.
2715 --
2716 -- ToDo: by reversing comparisons we could avoid testing the
2717 -- parity flag in more cases.
2718
2719 code dst =
2720 cond_code `appOL`
2721 (case cond of
2722 NE -> or_unordered dst
2723 GU -> plain_test dst
2724 GEU -> plain_test dst
2725 _ -> and_ordered dst)
2726
2727 plain_test dst = toOL [
2728 SETCC cond (OpReg tmp1),
2729 MOVZxL II8 (OpReg tmp1) (OpReg dst)
2730 ]
2731 or_unordered dst = toOL [
2732 SETCC cond (OpReg tmp1),
2733 SETCC PARITY (OpReg tmp2),
2734 OR II8 (OpReg tmp1) (OpReg tmp2),
2735 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2736 ]
2737 and_ordered dst = toOL [
2738 SETCC cond (OpReg tmp1),
2739 SETCC NOTPARITY (OpReg tmp2),
2740 AND II8 (OpReg tmp1) (OpReg tmp2),
2741 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2742 ]
2743 return (Any II32 code)
2744
2745
2746 -- -----------------------------------------------------------------------------
2747 -- 'trivial*Code': deal with trivial instructions
2748
2749 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2750 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2751 -- Only look for constants on the right hand side, because that's
2752 -- where the generic optimizer will have put them.
2753
2754 -- Similarly, for unary instructions, we don't have to worry about
2755 -- matching an StInt as the argument, because genericOpt will already
2756 -- have handled the constant-folding.
2757
2758
2759 {-
2760 The Rules of the Game are:
2761
2762 * You cannot assume anything about the destination register dst;
2763 it may be anything, including a fixed reg.
2764
2765 * You may compute an operand into a fixed reg, but you may not
2766 subsequently change the contents of that fixed reg. If you
2767 want to do so, first copy the value either to a temporary
2768 or into dst. You are free to modify dst even if it happens
2769 to be a fixed reg -- that's not your problem.
2770
2771 * You cannot assume that a fixed reg will stay live over an
2772 arbitrary computation. The same applies to the dst reg.
2773
2774 * Temporary regs obtained from getNewRegNat are distinct from
2775 each other and from all other regs, and stay live over
2776 arbitrary computations.
2777
2778 --------------------
2779
2780 SDM's version of The Rules:
2781
2782 * If getRegister returns Any, that means it can generate correct
2783 code which places the result in any register, period. Even if that
2784 register happens to be read during the computation.
2785
2786 Corollary #1: this means that if you are generating code for an
2787 operation with two arbitrary operands, you cannot assign the result
2788 of the first operand into the destination register before computing
2789 the second operand. The second operand might require the old value
2790 of the destination register.
2791
2792 Corollary #2: A function might be able to generate more efficient
2793 code if it knows the destination register is a new temporary (and
2794 therefore not read by any of the sub-computations).
2795
2796 * If getRegister returns Any, then the code it generates may modify only:
2797 (a) fresh temporaries
2798 (b) the destination register
2799 (c) known registers (eg. %ecx is used by shifts)
2800 In particular, it may *not* modify global registers, unless the global
2801 register happens to be the destination register.
2802 -}
2803
2804 trivialCode :: Width -> (Operand -> Operand -> Instr)
2805 -> Maybe (Operand -> Operand -> Instr)
2806 -> CmmExpr -> CmmExpr -> NatM Register
2807 trivialCode width instr m a b
2808 = do is32Bit <- is32BitPlatform
2809 trivialCode' is32Bit width instr m a b
2810
2811 trivialCode' :: Bool -> Width -> (Operand -> Operand -> Instr)
2812 -> Maybe (Operand -> Operand -> Instr)
2813 -> CmmExpr -> CmmExpr -> NatM Register
2814 trivialCode' is32Bit width _ (Just revinstr) (CmmLit lit_a) b
2815 | is32BitLit is32Bit lit_a = do
2816 b_code <- getAnyReg b
2817 let
2818 code dst
2819 = b_code dst `snocOL`
2820 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
2821 return (Any (intFormat width) code)
2822
2823 trivialCode' _ width instr _ a b
2824 = genTrivialCode (intFormat width) instr a b
2825
2826 -- This is re-used for floating pt instructions too.
2827 genTrivialCode :: Format -> (Operand -> Operand -> Instr)
2828 -> CmmExpr -> CmmExpr -> NatM Register
2829 genTrivialCode rep instr a b = do
2830 (b_op, b_code) <- getNonClobberedOperand b
2831 a_code <- getAnyReg a
2832 tmp <- getNewRegNat rep
2833 let
2834 -- We want the value of b to stay alive across the computation of a.
2835 -- But, we want to calculate a straight into the destination register,
2836 -- because the instruction only has two operands (dst := dst `op` src).
2837 -- The troublesome case is when the result of b is in the same register
2838 -- as the destination reg. In this case, we have to save b in a
2839 -- new temporary across the computation of a.
2840 code dst
2841 | dst `regClashesWithOp` b_op =
2842 b_code `appOL`
2843 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
2844 a_code dst `snocOL`
2845 instr (OpReg tmp) (OpReg dst)
2846 | otherwise =
2847 b_code `appOL`
2848 a_code dst `snocOL`
2849 instr b_op (OpReg dst)
2850 return (Any rep code)
2851
2852 regClashesWithOp :: Reg -> Operand -> Bool
2853 reg `regClashesWithOp` OpReg reg2 = reg == reg2
2854 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
2855 _ `regClashesWithOp` _ = False
2856
2857 -----------
2858
2859 trivialUCode :: Format -> (Operand -> Instr)
2860 -> CmmExpr -> NatM Register
2861 trivialUCode rep instr x = do
2862 x_code <- getAnyReg x
2863 let
2864 code dst =
2865 x_code dst `snocOL`
2866 instr (OpReg dst)
2867 return (Any rep code)
2868
2869 -----------
2870
2871 trivialFCode_x87 :: (Format -> Reg -> Reg -> Reg -> Instr)
2872 -> CmmExpr -> CmmExpr -> NatM Register
2873 trivialFCode_x87 instr x y = do
2874 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
2875 (y_reg, y_code) <- getSomeReg y
2876 let
2877 format = FF80 -- always, on x87
2878 code dst =
2879 x_code `appOL`
2880 y_code `snocOL`
2881 instr format x_reg y_reg dst
2882 return (Any format code)
2883
2884 trivialFCode_sse2 :: Width -> (Format -> Operand -> Operand -> Instr)
2885 -> CmmExpr -> CmmExpr -> NatM Register
2886 trivialFCode_sse2 pk instr x y
2887 = genTrivialCode format (instr format) x y
2888 where format = floatFormat pk
2889
2890
2891 trivialUFCode :: Format -> (Reg -> Reg -> Instr) -> CmmExpr -> NatM Register
2892 trivialUFCode format instr x = do
2893 (x_reg, x_code) <- getSomeReg x
2894 let
2895 code dst =
2896 x_code `snocOL`
2897 instr x_reg dst
2898 return (Any format code)
2899
2900
2901 --------------------------------------------------------------------------------
2902 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2903 coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
2904 where
2905 coerce_x87 = do
2906 (x_reg, x_code) <- getSomeReg x
2907 let
2908 opc = case to of W32 -> GITOF; W64 -> GITOD;
2909 n -> panic $ "coerceInt2FP.x87: unhandled width ("
2910 ++ show n ++ ")"
2911 code dst = x_code `snocOL` opc x_reg dst
2912 -- ToDo: works for non-II32 reps?
2913 return (Any FF80 code)
2914
2915 coerce_sse2 = do
2916 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2917 let
2918 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
2919 n -> panic $ "coerceInt2FP.sse: unhandled width ("
2920 ++ show n ++ ")"
2921 code dst = x_code `snocOL` opc (intFormat from) x_op dst
2922 return (Any (floatFormat to) code)
2923 -- works even if the destination rep is <II32
2924
2925 --------------------------------------------------------------------------------
2926 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2927 coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
2928 where
2929 coerceFP2Int_x87 = do
2930 (x_reg, x_code) <- getSomeReg x
2931 let
2932 opc = case from of W32 -> GFTOI; W64 -> GDTOI
2933 n -> panic $ "coerceFP2Int.x87: unhandled width ("
2934 ++ show n ++ ")"
2935 code dst = x_code `snocOL` opc x_reg dst
2936 -- ToDo: works for non-II32 reps?
2937 return (Any (intFormat to) code)
2938
2939 coerceFP2Int_sse2 = do
2940 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2941 let
2942 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ;
2943 n -> panic $ "coerceFP2Init.sse: unhandled width ("
2944 ++ show n ++ ")"
2945 code dst = x_code `snocOL` opc (intFormat to) x_op dst
2946 return (Any (intFormat to) code)
2947 -- works even if the destination rep is <II32
2948
2949
2950 --------------------------------------------------------------------------------
2951 coerceFP2FP :: Width -> CmmExpr -> NatM Register
2952 coerceFP2FP to x = do
2953 use_sse2 <- sse2Enabled
2954 (x_reg, x_code) <- getSomeReg x
2955 let
2956 opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
2957 n -> panic $ "coerceFP2FP: unhandled width ("
2958 ++ show n ++ ")"
2959 | otherwise = GDTOF
2960 code dst = x_code `snocOL` opc x_reg dst
2961 return (Any (if use_sse2 then floatFormat to else FF80) code)
2962
2963 --------------------------------------------------------------------------------
2964
2965 sse2NegCode :: Width -> CmmExpr -> NatM Register
2966 sse2NegCode w x = do
2967 let fmt = floatFormat w
2968 x_code <- getAnyReg x
2969 -- This is how gcc does it, so it can't be that bad:
2970 let
2971 const | FF32 <- fmt = CmmInt 0x80000000 W32
2972 | otherwise = CmmInt 0x8000000000000000 W64
2973 Amode amode amode_code <- memConstant (widthInBytes w) const
2974 tmp <- getNewRegNat fmt
2975 let
2976 code dst = x_code dst `appOL` amode_code `appOL` toOL [
2977 MOV fmt (OpAddr amode) (OpReg tmp),
2978 XOR fmt (OpReg tmp) (OpReg dst)
2979 ]
2980 --
2981 return (Any fmt code)
2982
2983 isVecExpr :: CmmExpr -> Bool
2984 isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True
2985 isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True
2986 isVecExpr (CmmMachOp (MO_V_Add {}) _) = True
2987 isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True
2988 isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True
2989 isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True
2990 isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True
2991 isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True
2992 isVecExpr (CmmMachOp (MO_VF_Insert {}) _) = True
2993 isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True
2994 isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
2995 isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True
2996 isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True
2997 isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True
2998 isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True
2999 isVecExpr (CmmMachOp _ [e]) = isVecExpr e
3000 isVecExpr _ = False
3001
3002 needLlvm :: NatM a
3003 needLlvm =
3004 sorry $ unlines ["The native code generator does not support vector"
3005 ,"instructions. Please use -fllvm."]