Implement function-sections for Haskell code, #8405
[ghc.git] / compiler / nativeGen / X86 / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2004
8 --
9 -----------------------------------------------------------------------------
10
11 -- This is a big module, but, if you pay attention to
12 -- (a) the sectioning, and (b) the type signatures, the
13 -- structure should not be too overwhelming.
14
15 module X86.CodeGen (
16 cmmTopCodeGen,
17 generateJumpTableForInstr,
18 InstrBlock
19 )
20
21 where
22
23 #include "HsVersions.h"
24 #include "nativeGen/NCG.h"
25 #include "../includes/MachDeps.h"
26
27 -- NCG stuff:
28 import X86.Instr
29 import X86.Cond
30 import X86.Regs
31 import X86.RegInfo
32 import CodeGen.Platform
33 import CPrim
34 import Debug ( DebugBlock(..) )
35 import Instruction
36 import PIC
37 import NCGMonad
38 import Format
39 import Reg
40 import Platform
41
42 -- Our intermediate code:
43 import BasicTypes
44 import BlockId
45 import Module ( primUnitId )
46 import PprCmm ()
47 import CmmUtils
48 import CmmSwitch
49 import Cmm
50 import Hoopl
51 import CLabel
52 import CoreSyn ( Tickish(..) )
53 import SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
54
55 -- The rest:
56 import ForeignCall ( CCallConv(..) )
57 import OrdList
58 import Outputable
59 import Unique
60 import FastString
61 import DynFlags
62 import Util
63
64 import Control.Monad
65 import Data.Bits
66 import Data.Int
67 import Data.Maybe
68 import Data.Word
69
70 is32BitPlatform :: NatM Bool
71 is32BitPlatform = do
72 dflags <- getDynFlags
73 return $ target32Bit (targetPlatform dflags)
74
75 sse2Enabled :: NatM Bool
76 sse2Enabled = do
77 dflags <- getDynFlags
78 return (isSse2Enabled dflags)
79
80 sse4_2Enabled :: NatM Bool
81 sse4_2Enabled = do
82 dflags <- getDynFlags
83 return (isSse4_2Enabled dflags)
84
85 if_sse2 :: NatM a -> NatM a -> NatM a
86 if_sse2 sse2 x87 = do
87 b <- sse2Enabled
88 if b then sse2 else x87
89
90 cmmTopCodeGen
91 :: RawCmmDecl
92 -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
93
94 cmmTopCodeGen (CmmProc info lab live graph) = do
95 let blocks = toBlockListEntryFirst graph
96 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
97 picBaseMb <- getPicBaseMaybeNat
98 dflags <- getDynFlags
99 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
100 tops = proc : concat statics
101 os = platformOS $ targetPlatform dflags
102
103 case picBaseMb of
104 Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
105 Nothing -> return tops
106
107 cmmTopCodeGen (CmmData sec dat) = do
108 return [CmmData sec (1, dat)] -- no translation, we just use CmmStatic
109
110
111 basicBlockCodeGen
112 :: CmmBlock
113 -> NatM ( [NatBasicBlock Instr]
114 , [NatCmmDecl (Alignment, CmmStatics) Instr])
115
116 basicBlockCodeGen block = do
117 let (_, nodes, tail) = blockSplit block
118 id = entryLabel block
119 stmts = blockToList nodes
120 -- Generate location directive
121 dbg <- getDebugBlock (entryLabel block)
122 loc_instrs <- case dblSourceTick =<< dbg of
123 Just (SourceNote span name)
124 -> do fileId <- getFileId (srcSpanFile span)
125 let line = srcSpanStartLine span; col = srcSpanStartCol span
126 return $ unitOL $ LOCATION fileId line col name
127 _ -> return nilOL
128 mid_instrs <- stmtsToInstrs stmts
129 tail_instrs <- stmtToInstrs tail
130 let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
131 -- code generation may introduce new basic block boundaries, which
132 -- are indicated by the NEWBLOCK instruction. We must split up the
133 -- instruction stream into basic blocks again. Also, we extract
134 -- LDATAs here too.
135 let
136 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
137
138 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
139 = ([], BasicBlock id instrs : blocks, statics)
140 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
141 = (instrs, blocks, CmmData sec dat:statics)
142 mkBlocks instr (instrs,blocks,statics)
143 = (instr:instrs, blocks, statics)
144 return (BasicBlock id top : other_blocks, statics)
145
146
147 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
148 stmtsToInstrs stmts
149 = do instrss <- mapM stmtToInstrs stmts
150 return (concatOL instrss)
151
152
153 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
154 stmtToInstrs stmt = do
155 dflags <- getDynFlags
156 is32Bit <- is32BitPlatform
157 case stmt of
158 CmmComment s -> return (unitOL (COMMENT s))
159 CmmTick {} -> return nilOL
160 CmmUnwind {} -> return nilOL
161
162 CmmAssign reg src
163 | isFloatType ty -> assignReg_FltCode format reg src
164 | is32Bit && isWord64 ty -> assignReg_I64Code reg src
165 | otherwise -> assignReg_IntCode format reg src
166 where ty = cmmRegType dflags reg
167 format = cmmTypeFormat ty
168
169 CmmStore addr src
170 | isFloatType ty -> assignMem_FltCode format addr src
171 | is32Bit && isWord64 ty -> assignMem_I64Code addr src
172 | otherwise -> assignMem_IntCode format addr src
173 where ty = cmmExprType dflags src
174 format = cmmTypeFormat ty
175
176 CmmUnsafeForeignCall target result_regs args
177 -> genCCall dflags is32Bit target result_regs args
178
179 CmmBranch id -> genBranch id
180 CmmCondBranch arg true false _ -> do
181 b1 <- genCondJump true arg
182 b2 <- genBranch false
183 return (b1 `appOL` b2)
184 CmmSwitch arg ids -> do dflags <- getDynFlags
185 genSwitch dflags arg ids
186 CmmCall { cml_target = arg
187 , cml_args_regs = gregs } -> do
188 dflags <- getDynFlags
189 genJump arg (jumpRegs dflags gregs)
190 _ ->
191 panic "stmtToInstrs: statement should have been cps'd away"
192
193
194 jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
195 jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
196 where platform = targetPlatform dflags
197
198 --------------------------------------------------------------------------------
199 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
200 -- They are really trees of insns to facilitate fast appending, where a
201 -- left-to-right traversal yields the insns in the correct order.
202 --
203 type InstrBlock
204 = OrdList Instr
205
206
207 -- | Condition codes passed up the tree.
208 --
209 data CondCode
210 = CondCode Bool Cond InstrBlock
211
212
213 -- | a.k.a "Register64"
214 -- Reg is the lower 32-bit temporary which contains the result.
215 -- Use getHiVRegFromLo to find the other VRegUnique.
216 --
217 -- Rules of this simplified insn selection game are therefore that
218 -- the returned Reg may be modified
219 --
220 data ChildCode64
221 = ChildCode64
222 InstrBlock
223 Reg
224
225
226 -- | Register's passed up the tree. If the stix code forces the register
227 -- to live in a pre-decided machine register, it comes out as @Fixed@;
228 -- otherwise, it comes out as @Any@, and the parent can decide which
229 -- register to put it in.
230 --
231 data Register
232 = Fixed Format Reg InstrBlock
233 | Any Format (Reg -> InstrBlock)
234
235
236 swizzleRegisterRep :: Register -> Format -> Register
237 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
238 swizzleRegisterRep (Any _ codefn) format = Any format codefn
239
240
241 -- | Grab the Reg for a CmmReg
242 getRegisterReg :: Platform -> Bool -> CmmReg -> Reg
243
244 getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk))
245 = let fmt = cmmTypeFormat pk in
246 if isFloatFormat fmt && not use_sse2
247 then RegVirtual (mkVirtualReg u FF80)
248 else RegVirtual (mkVirtualReg u fmt)
249
250 getRegisterReg platform _ (CmmGlobal mid)
251 = case globalRegMaybe platform mid of
252 Just reg -> RegReal $ reg
253 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
254 -- By this stage, the only MagicIds remaining should be the
255 -- ones which map to a real machine register on this
256 -- platform. Hence ...
257
258
259 -- | Memory addressing modes passed up the tree.
260 data Amode
261 = Amode AddrMode InstrBlock
262
263 {-
264 Now, given a tree (the argument to an CmmLoad) that references memory,
265 produce a suitable addressing mode.
266
267 A Rule of the Game (tm) for Amodes: use of the addr bit must
268 immediately follow use of the code part, since the code part puts
269 values in registers which the addr then refers to. So you can't put
270 anything in between, lest it overwrite some of those registers. If
271 you need to do some other computation between the code part and use of
272 the addr bit, first store the effective address from the amode in a
273 temporary, then do the other computation, and then use the temporary:
274
275 code
276 LEA amode, tmp
277 ... other computation ...
278 ... (tmp) ...
279 -}
280
281
282 -- | Check whether an integer will fit in 32 bits.
283 -- A CmmInt is intended to be truncated to the appropriate
284 -- number of bits, so here we truncate it to Int64. This is
285 -- important because e.g. -1 as a CmmInt might be either
286 -- -1 or 18446744073709551615.
287 --
288 is32BitInteger :: Integer -> Bool
289 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
290 where i64 = fromIntegral i :: Int64
291
292
293 -- | Convert a BlockId to some CmmStatic data
294 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
295 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
296 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
297 where blockLabel = mkAsmTempLabel (getUnique blockid)
298
299
300 -- -----------------------------------------------------------------------------
301 -- General things for putting together code sequences
302
303 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
304 -- CmmExprs into CmmRegOff?
305 mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
306 mangleIndexTree dflags reg off
307 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
308 where width = typeWidth (cmmRegType dflags reg)
309
310 -- | The dual to getAnyReg: compute an expression into a register, but
311 -- we don't mind which one it is.
312 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
313 getSomeReg expr = do
314 r <- getRegister expr
315 case r of
316 Any rep code -> do
317 tmp <- getNewRegNat rep
318 return (tmp, code tmp)
319 Fixed _ reg code ->
320 return (reg, code)
321
322
323 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
324 assignMem_I64Code addrTree valueTree = do
325 Amode addr addr_code <- getAmode addrTree
326 ChildCode64 vcode rlo <- iselExpr64 valueTree
327 let
328 rhi = getHiVRegFromLo rlo
329
330 -- Little-endian store
331 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
332 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
333 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
334
335
336 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
337 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
338 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
339 let
340 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
341 r_dst_hi = getHiVRegFromLo r_dst_lo
342 r_src_hi = getHiVRegFromLo r_src_lo
343 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
344 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
345 return (
346 vcode `snocOL` mov_lo `snocOL` mov_hi
347 )
348
349 assignReg_I64Code _ _
350 = panic "assignReg_I64Code(i386): invalid lvalue"
351
352
353 iselExpr64 :: CmmExpr -> NatM ChildCode64
354 iselExpr64 (CmmLit (CmmInt i _)) = do
355 (rlo,rhi) <- getNewRegPairNat II32
356 let
357 r = fromIntegral (fromIntegral i :: Word32)
358 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
359 code = toOL [
360 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
361 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
362 ]
363 return (ChildCode64 code rlo)
364
365 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
366 Amode addr addr_code <- getAmode addrTree
367 (rlo,rhi) <- getNewRegPairNat II32
368 let
369 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
370 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
371 return (
372 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
373 rlo
374 )
375
376 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
377 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
378
379 -- we handle addition, but rather badly
380 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
381 ChildCode64 code1 r1lo <- iselExpr64 e1
382 (rlo,rhi) <- getNewRegPairNat II32
383 let
384 r = fromIntegral (fromIntegral i :: Word32)
385 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
386 r1hi = getHiVRegFromLo r1lo
387 code = code1 `appOL`
388 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
389 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
390 MOV II32 (OpReg r1hi) (OpReg rhi),
391 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
392 return (ChildCode64 code rlo)
393
394 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
395 ChildCode64 code1 r1lo <- iselExpr64 e1
396 ChildCode64 code2 r2lo <- iselExpr64 e2
397 (rlo,rhi) <- getNewRegPairNat II32
398 let
399 r1hi = getHiVRegFromLo r1lo
400 r2hi = getHiVRegFromLo r2lo
401 code = code1 `appOL`
402 code2 `appOL`
403 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
404 ADD II32 (OpReg r2lo) (OpReg rlo),
405 MOV II32 (OpReg r1hi) (OpReg rhi),
406 ADC II32 (OpReg r2hi) (OpReg rhi) ]
407 return (ChildCode64 code rlo)
408
409 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
410 ChildCode64 code1 r1lo <- iselExpr64 e1
411 ChildCode64 code2 r2lo <- iselExpr64 e2
412 (rlo,rhi) <- getNewRegPairNat II32
413 let
414 r1hi = getHiVRegFromLo r1lo
415 r2hi = getHiVRegFromLo r2lo
416 code = code1 `appOL`
417 code2 `appOL`
418 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
419 SUB II32 (OpReg r2lo) (OpReg rlo),
420 MOV II32 (OpReg r1hi) (OpReg rhi),
421 SBB II32 (OpReg r2hi) (OpReg rhi) ]
422 return (ChildCode64 code rlo)
423
424 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
425 fn <- getAnyReg expr
426 r_dst_lo <- getNewRegNat II32
427 let r_dst_hi = getHiVRegFromLo r_dst_lo
428 code = fn r_dst_lo
429 return (
430 ChildCode64 (code `snocOL`
431 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
432 r_dst_lo
433 )
434
435 iselExpr64 expr
436 = pprPanic "iselExpr64(i386)" (ppr expr)
437
438
439 --------------------------------------------------------------------------------
440 getRegister :: CmmExpr -> NatM Register
441 getRegister e = do dflags <- getDynFlags
442 is32Bit <- is32BitPlatform
443 getRegister' dflags is32Bit e
444
445 getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
446
447 getRegister' dflags is32Bit (CmmReg reg)
448 = case reg of
449 CmmGlobal PicBaseReg
450 | is32Bit ->
451 -- on x86_64, we have %rip for PicBaseReg, but it's not
452 -- a full-featured register, it can only be used for
453 -- rip-relative addressing.
454 do reg' <- getPicBaseNat (archWordFormat is32Bit)
455 return (Fixed (archWordFormat is32Bit) reg' nilOL)
456 _ ->
457 do use_sse2 <- sse2Enabled
458 let
459 fmt = cmmTypeFormat (cmmRegType dflags reg)
460 format | not use_sse2 && isFloatFormat fmt = FF80
461 | otherwise = fmt
462 --
463 let platform = targetPlatform dflags
464 return (Fixed format
465 (getRegisterReg platform use_sse2 reg)
466 nilOL)
467
468
469 getRegister' dflags is32Bit (CmmRegOff r n)
470 = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
471
472 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
473 -- TO_W_(x), TO_W_(x >> 32)
474
475 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
476 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
477 | is32Bit = do
478 ChildCode64 code rlo <- iselExpr64 x
479 return $ Fixed II32 (getHiVRegFromLo rlo) code
480
481 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
482 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
483 | is32Bit = do
484 ChildCode64 code rlo <- iselExpr64 x
485 return $ Fixed II32 (getHiVRegFromLo rlo) code
486
487 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
488 | is32Bit = do
489 ChildCode64 code rlo <- iselExpr64 x
490 return $ Fixed II32 rlo code
491
492 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
493 | is32Bit = do
494 ChildCode64 code rlo <- iselExpr64 x
495 return $ Fixed II32 rlo code
496
497 getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
498 if_sse2 float_const_sse2 float_const_x87
499 where
500 float_const_sse2
501 | f == 0.0 = do
502 let
503 format = floatFormat w
504 code dst = unitOL (XOR format (OpReg dst) (OpReg dst))
505 -- I don't know why there are xorpd, xorps, and pxor instructions.
506 -- They all appear to do the same thing --SDM
507 return (Any format code)
508
509 | otherwise = do
510 Amode addr code <- memConstant (widthInBytes w) lit
511 loadFloatAmode True w addr code
512
513 float_const_x87 = case w of
514 W64
515 | f == 0.0 ->
516 let code dst = unitOL (GLDZ dst)
517 in return (Any FF80 code)
518
519 | f == 1.0 ->
520 let code dst = unitOL (GLD1 dst)
521 in return (Any FF80 code)
522
523 _otherwise -> do
524 Amode addr code <- memConstant (widthInBytes w) lit
525 loadFloatAmode False w addr code
526
527 -- catch simple cases of zero- or sign-extended load
528 getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
529 code <- intLoadCode (MOVZxL II8) addr
530 return (Any II32 code)
531
532 getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
533 code <- intLoadCode (MOVSxL II8) addr
534 return (Any II32 code)
535
536 getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
537 code <- intLoadCode (MOVZxL II16) addr
538 return (Any II32 code)
539
540 getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
541 code <- intLoadCode (MOVSxL II16) addr
542 return (Any II32 code)
543
544 -- catch simple cases of zero- or sign-extended load
545 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
546 | not is32Bit = do
547 code <- intLoadCode (MOVZxL II8) addr
548 return (Any II64 code)
549
550 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
551 | not is32Bit = do
552 code <- intLoadCode (MOVSxL II8) addr
553 return (Any II64 code)
554
555 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
556 | not is32Bit = do
557 code <- intLoadCode (MOVZxL II16) addr
558 return (Any II64 code)
559
560 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
561 | not is32Bit = do
562 code <- intLoadCode (MOVSxL II16) addr
563 return (Any II64 code)
564
565 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
566 | not is32Bit = do
567 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
568 return (Any II64 code)
569
570 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
571 | not is32Bit = do
572 code <- intLoadCode (MOVSxL II32) addr
573 return (Any II64 code)
574
575 getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
576 CmmLit displacement])
577 | not is32Bit = do
578 return $ Any II64 (\dst -> unitOL $
579 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
580
581 getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
582 sse2 <- sse2Enabled
583 case mop of
584 MO_F_Neg w
585 | sse2 -> sse2NegCode w x
586 | otherwise -> trivialUFCode FF80 (GNEG FF80) x
587
588 MO_S_Neg w -> triv_ucode NEGI (intFormat w)
589 MO_Not w -> triv_ucode NOT (intFormat w)
590
591 -- Nop conversions
592 MO_UU_Conv W32 W8 -> toI8Reg W32 x
593 MO_SS_Conv W32 W8 -> toI8Reg W32 x
594 MO_UU_Conv W16 W8 -> toI8Reg W16 x
595 MO_SS_Conv W16 W8 -> toI8Reg W16 x
596 MO_UU_Conv W32 W16 -> toI16Reg W32 x
597 MO_SS_Conv W32 W16 -> toI16Reg W32 x
598
599 MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
600 MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
601 MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
602 MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
603 MO_UU_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
604 MO_SS_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
605
606 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
607 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
608
609 -- widenings
610 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
611 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
612 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
613
614 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
615 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
616 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
617
618 MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x
619 MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
620 MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
621 MO_SS_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVSxL x
622 MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
623 MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
624 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
625 -- However, we don't want the register allocator to throw it
626 -- away as an unnecessary reg-to-reg move, so we keep it in
627 -- the form of a movzl and print it as a movl later.
628
629 MO_FF_Conv W32 W64
630 | sse2 -> coerceFP2FP W64 x
631 | otherwise -> conversionNop FF80 x
632
633 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
634
635 MO_FS_Conv from to -> coerceFP2Int from to x
636 MO_SF_Conv from to -> coerceInt2FP from to x
637
638 MO_V_Insert {} -> needLlvm
639 MO_V_Extract {} -> needLlvm
640 MO_V_Add {} -> needLlvm
641 MO_V_Sub {} -> needLlvm
642 MO_V_Mul {} -> needLlvm
643 MO_VS_Quot {} -> needLlvm
644 MO_VS_Rem {} -> needLlvm
645 MO_VS_Neg {} -> needLlvm
646 MO_VU_Quot {} -> needLlvm
647 MO_VU_Rem {} -> needLlvm
648 MO_VF_Insert {} -> needLlvm
649 MO_VF_Extract {} -> needLlvm
650 MO_VF_Add {} -> needLlvm
651 MO_VF_Sub {} -> needLlvm
652 MO_VF_Mul {} -> needLlvm
653 MO_VF_Quot {} -> needLlvm
654 MO_VF_Neg {} -> needLlvm
655
656 _other -> pprPanic "getRegister" (pprMachOp mop)
657 where
658 triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
659 triv_ucode instr format = trivialUCode format (instr format) x
660
661 -- signed or unsigned extension.
662 integerExtend :: Width -> Width
663 -> (Format -> Operand -> Operand -> Instr)
664 -> CmmExpr -> NatM Register
665 integerExtend from to instr expr = do
666 (reg,e_code) <- if from == W8 then getByteReg expr
667 else getSomeReg expr
668 let
669 code dst =
670 e_code `snocOL`
671 instr (intFormat from) (OpReg reg) (OpReg dst)
672 return (Any (intFormat to) code)
673
674 toI8Reg :: Width -> CmmExpr -> NatM Register
675 toI8Reg new_rep expr
676 = do codefn <- getAnyReg expr
677 return (Any (intFormat new_rep) codefn)
678 -- HACK: use getAnyReg to get a byte-addressable register.
679 -- If the source was a Fixed register, this will add the
680 -- mov instruction to put it into the desired destination.
681 -- We're assuming that the destination won't be a fixed
682 -- non-byte-addressable register; it won't be, because all
683 -- fixed registers are word-sized.
684
685 toI16Reg = toI8Reg -- for now
686
687 conversionNop :: Format -> CmmExpr -> NatM Register
688 conversionNop new_format expr
689 = do e_code <- getRegister' dflags is32Bit expr
690 return (swizzleRegisterRep e_code new_format)
691
692
693 getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
694 sse2 <- sse2Enabled
695 case mop of
696 MO_F_Eq _ -> condFltReg is32Bit EQQ x y
697 MO_F_Ne _ -> condFltReg is32Bit NE x y
698 MO_F_Gt _ -> condFltReg is32Bit GTT x y
699 MO_F_Ge _ -> condFltReg is32Bit GE x y
700 MO_F_Lt _ -> condFltReg is32Bit LTT x y
701 MO_F_Le _ -> condFltReg is32Bit LE x y
702
703 MO_Eq _ -> condIntReg EQQ x y
704 MO_Ne _ -> condIntReg NE x y
705
706 MO_S_Gt _ -> condIntReg GTT x y
707 MO_S_Ge _ -> condIntReg GE x y
708 MO_S_Lt _ -> condIntReg LTT x y
709 MO_S_Le _ -> condIntReg LE x y
710
711 MO_U_Gt _ -> condIntReg GU x y
712 MO_U_Ge _ -> condIntReg GEU x y
713 MO_U_Lt _ -> condIntReg LU x y
714 MO_U_Le _ -> condIntReg LEU x y
715
716 MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y
717 | otherwise -> trivialFCode_x87 GADD x y
718 MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y
719 | otherwise -> trivialFCode_x87 GSUB x y
720 MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y
721 | otherwise -> trivialFCode_x87 GDIV x y
722 MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y
723 | otherwise -> trivialFCode_x87 GMUL x y
724
725 MO_Add rep -> add_code rep x y
726 MO_Sub rep -> sub_code rep x y
727
728 MO_S_Quot rep -> div_code rep True True x y
729 MO_S_Rem rep -> div_code rep True False x y
730 MO_U_Quot rep -> div_code rep False True x y
731 MO_U_Rem rep -> div_code rep False False x y
732
733 MO_S_MulMayOflo rep -> imulMayOflo rep x y
734
735 MO_Mul rep -> triv_op rep IMUL
736 MO_And rep -> triv_op rep AND
737 MO_Or rep -> triv_op rep OR
738 MO_Xor rep -> triv_op rep XOR
739
740 {- Shift ops on x86s have constraints on their source, it
741 either has to be Imm, CL or 1
742 => trivialCode is not restrictive enough (sigh.)
743 -}
744 MO_Shl rep -> shift_code rep SHL x y {-False-}
745 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
746 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
747
748 MO_V_Insert {} -> needLlvm
749 MO_V_Extract {} -> needLlvm
750 MO_V_Add {} -> needLlvm
751 MO_V_Sub {} -> needLlvm
752 MO_V_Mul {} -> needLlvm
753 MO_VS_Quot {} -> needLlvm
754 MO_VS_Rem {} -> needLlvm
755 MO_VS_Neg {} -> needLlvm
756 MO_VF_Insert {} -> needLlvm
757 MO_VF_Extract {} -> needLlvm
758 MO_VF_Add {} -> needLlvm
759 MO_VF_Sub {} -> needLlvm
760 MO_VF_Mul {} -> needLlvm
761 MO_VF_Quot {} -> needLlvm
762 MO_VF_Neg {} -> needLlvm
763
764 _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
765 where
766 --------------------
767 triv_op width instr = trivialCode width op (Just op) x y
768 where op = instr (intFormat width)
769
770 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
771 imulMayOflo rep a b = do
772 (a_reg, a_code) <- getNonClobberedReg a
773 b_code <- getAnyReg b
774 let
775 shift_amt = case rep of
776 W32 -> 31
777 W64 -> 63
778 _ -> panic "shift_amt"
779
780 format = intFormat rep
781 code = a_code `appOL` b_code eax `appOL`
782 toOL [
783 IMUL2 format (OpReg a_reg), -- result in %edx:%eax
784 SAR format (OpImm (ImmInt shift_amt)) (OpReg eax),
785 -- sign extend lower part
786 SUB format (OpReg edx) (OpReg eax)
787 -- compare against upper
788 -- eax==0 if high part == sign extended low part
789 ]
790 return (Fixed format eax code)
791
792 --------------------
793 shift_code :: Width
794 -> (Format -> Operand -> Operand -> Instr)
795 -> CmmExpr
796 -> CmmExpr
797 -> NatM Register
798
799 {- Case1: shift length as immediate -}
800 shift_code width instr x (CmmLit lit) = do
801 x_code <- getAnyReg x
802 let
803 format = intFormat width
804 code dst
805 = x_code dst `snocOL`
806 instr format (OpImm (litToImm lit)) (OpReg dst)
807 return (Any format code)
808
809 {- Case2: shift length is complex (non-immediate)
810 * y must go in %ecx.
811 * we cannot do y first *and* put its result in %ecx, because
812 %ecx might be clobbered by x.
813 * if we do y second, then x cannot be
814 in a clobbered reg. Also, we cannot clobber x's reg
815 with the instruction itself.
816 * so we can either:
817 - do y first, put its result in a fresh tmp, then copy it to %ecx later
818 - do y second and put its result into %ecx. x gets placed in a fresh
819 tmp. This is likely to be better, because the reg alloc can
820 eliminate this reg->reg move here (it won't eliminate the other one,
821 because the move is into the fixed %ecx).
822 -}
823 shift_code width instr x y{-amount-} = do
824 x_code <- getAnyReg x
825 let format = intFormat width
826 tmp <- getNewRegNat format
827 y_code <- getAnyReg y
828 let
829 code = x_code tmp `appOL`
830 y_code ecx `snocOL`
831 instr format (OpReg ecx) (OpReg tmp)
832 return (Fixed format tmp code)
833
834 --------------------
835 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
836 add_code rep x (CmmLit (CmmInt y _))
837 | is32BitInteger y = add_int rep x y
838 add_code rep x y = trivialCode rep (ADD format) (Just (ADD format)) x y
839 where format = intFormat rep
840 -- TODO: There are other interesting patterns we want to replace
841 -- with a LEA, e.g. `(x + offset) + (y << shift)`.
842
843 --------------------
844 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
845 sub_code rep x (CmmLit (CmmInt y _))
846 | is32BitInteger (-y) = add_int rep x (-y)
847 sub_code rep x y = trivialCode rep (SUB (intFormat rep)) Nothing x y
848
849 -- our three-operand add instruction:
850 add_int width x y = do
851 (x_reg, x_code) <- getSomeReg x
852 let
853 format = intFormat width
854 imm = ImmInt (fromInteger y)
855 code dst
856 = x_code `snocOL`
857 LEA format
858 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
859 (OpReg dst)
860 --
861 return (Any format code)
862
863 ----------------------
864 div_code width signed quotient x y = do
865 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
866 x_code <- getAnyReg x
867 let
868 format = intFormat width
869 widen | signed = CLTD format
870 | otherwise = XOR format (OpReg edx) (OpReg edx)
871
872 instr | signed = IDIV
873 | otherwise = DIV
874
875 code = y_code `appOL`
876 x_code eax `appOL`
877 toOL [widen, instr format y_op]
878
879 result | quotient = eax
880 | otherwise = edx
881
882 return (Fixed format result code)
883
884
885 getRegister' _ _ (CmmLoad mem pk)
886 | isFloatType pk
887 = do
888 Amode addr mem_code <- getAmode mem
889 use_sse2 <- sse2Enabled
890 loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
891
892 getRegister' _ is32Bit (CmmLoad mem pk)
893 | is32Bit && not (isWord64 pk)
894 = do
895 code <- intLoadCode instr mem
896 return (Any format code)
897 where
898 width = typeWidth pk
899 format = intFormat width
900 instr = case width of
901 W8 -> MOVZxL II8
902 _other -> MOV format
903 -- We always zero-extend 8-bit loads, if we
904 -- can't think of anything better. This is because
905 -- we can't guarantee access to an 8-bit variant of every register
906 -- (esi and edi don't have 8-bit variants), so to make things
907 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
908
909 -- Simpler memory load code on x86_64
910 getRegister' _ is32Bit (CmmLoad mem pk)
911 | not is32Bit
912 = do
913 code <- intLoadCode (MOV format) mem
914 return (Any format code)
915 where format = intFormat $ typeWidth pk
916
917 getRegister' _ is32Bit (CmmLit (CmmInt 0 width))
918 = let
919 format = intFormat width
920
921 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
922 format1 = if is32Bit then format
923 else case format of
924 II64 -> II32
925 _ -> format
926 code dst
927 = unitOL (XOR format1 (OpReg dst) (OpReg dst))
928 in
929 return (Any format code)
930
931 -- optimisation for loading small literals on x86_64: take advantage
932 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
933 -- instruction forms are shorter.
934 getRegister' dflags is32Bit (CmmLit lit)
935 | not is32Bit, isWord64 (cmmLitType dflags lit), not (isBigLit lit)
936 = let
937 imm = litToImm lit
938 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
939 in
940 return (Any II64 code)
941 where
942 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
943 isBigLit _ = False
944 -- note1: not the same as (not.is32BitLit), because that checks for
945 -- signed literals that fit in 32 bits, but we want unsigned
946 -- literals here.
947 -- note2: all labels are small, because we're assuming the
948 -- small memory model (see gcc docs, -mcmodel=small).
949
950 getRegister' dflags _ (CmmLit lit)
951 = do let format = cmmTypeFormat (cmmLitType dflags lit)
952 imm = litToImm lit
953 code dst = unitOL (MOV format (OpImm imm) (OpReg dst))
954 return (Any format code)
955
956 getRegister' _ _ other
957 | isVecExpr other = needLlvm
958 | otherwise = pprPanic "getRegister(x86)" (ppr other)
959
960
961 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
962 -> NatM (Reg -> InstrBlock)
963 intLoadCode instr mem = do
964 Amode src mem_code <- getAmode mem
965 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
966
967 -- Compute an expression into *any* register, adding the appropriate
968 -- move instruction if necessary.
969 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
970 getAnyReg expr = do
971 r <- getRegister expr
972 anyReg r
973
974 anyReg :: Register -> NatM (Reg -> InstrBlock)
975 anyReg (Any _ code) = return code
976 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
977
978 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
979 -- Fixed registers might not be byte-addressable, so we make sure we've
980 -- got a temporary, inserting an extra reg copy if necessary.
981 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
982 getByteReg expr = do
983 is32Bit <- is32BitPlatform
984 if is32Bit
985 then do r <- getRegister expr
986 case r of
987 Any rep code -> do
988 tmp <- getNewRegNat rep
989 return (tmp, code tmp)
990 Fixed rep reg code
991 | isVirtualReg reg -> return (reg,code)
992 | otherwise -> do
993 tmp <- getNewRegNat rep
994 return (tmp, code `snocOL` reg2reg rep reg tmp)
995 -- ToDo: could optimise slightly by checking for
996 -- byte-addressable real registers, but that will
997 -- happen very rarely if at all.
998 else getSomeReg expr -- all regs are byte-addressable on x86_64
999
1000 -- Another variant: this time we want the result in a register that cannot
1001 -- be modified by code to evaluate an arbitrary expression.
1002 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1003 getNonClobberedReg expr = do
1004 dflags <- getDynFlags
1005 r <- getRegister expr
1006 case r of
1007 Any rep code -> do
1008 tmp <- getNewRegNat rep
1009 return (tmp, code tmp)
1010 Fixed rep reg code
1011 -- only certain regs can be clobbered
1012 | reg `elem` instrClobberedRegs (targetPlatform dflags)
1013 -> do
1014 tmp <- getNewRegNat rep
1015 return (tmp, code `snocOL` reg2reg rep reg tmp)
1016 | otherwise ->
1017 return (reg, code)
1018
1019 reg2reg :: Format -> Reg -> Reg -> Instr
1020 reg2reg format src dst
1021 | format == FF80 = GMOV src dst
1022 | otherwise = MOV format (OpReg src) (OpReg dst)
1023
1024
1025 --------------------------------------------------------------------------------
1026 getAmode :: CmmExpr -> NatM Amode
1027 getAmode e = do is32Bit <- is32BitPlatform
1028 getAmode' is32Bit e
1029
1030 getAmode' :: Bool -> CmmExpr -> NatM Amode
1031 getAmode' _ (CmmRegOff r n) = do dflags <- getDynFlags
1032 getAmode $ mangleIndexTree dflags r n
1033
1034 getAmode' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1035 CmmLit displacement])
1036 | not is32Bit
1037 = return $ Amode (ripRel (litToImm displacement)) nilOL
1038
1039
1040 -- This is all just ridiculous, since it carefully undoes
1041 -- what mangleIndexTree has just done.
1042 getAmode' is32Bit (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)])
1043 | is32BitLit is32Bit lit
1044 -- ASSERT(rep == II32)???
1045 = do (x_reg, x_code) <- getSomeReg x
1046 let off = ImmInt (-(fromInteger i))
1047 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1048
1049 getAmode' is32Bit (CmmMachOp (MO_Add _rep) [x, CmmLit lit])
1050 | is32BitLit is32Bit lit
1051 -- ASSERT(rep == II32)???
1052 = do (x_reg, x_code) <- getSomeReg x
1053 let off = litToImm lit
1054 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1055
1056 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1057 -- recognised by the next rule.
1058 getAmode' is32Bit (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1059 b@(CmmLit _)])
1060 = getAmode' is32Bit (CmmMachOp (MO_Add rep) [b,a])
1061
1062 -- Matches: (x + offset) + (y << shift)
1063 getAmode' _ (CmmMachOp (MO_Add _) [CmmRegOff x offset,
1064 CmmMachOp (MO_Shl _)
1065 [y, CmmLit (CmmInt shift _)]])
1066 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1067 = x86_complex_amode (CmmReg x) y shift (fromIntegral offset)
1068
1069 getAmode' _ (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _)
1070 [y, CmmLit (CmmInt shift _)]])
1071 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1072 = x86_complex_amode x y shift 0
1073
1074 getAmode' _ (CmmMachOp (MO_Add _)
1075 [x, CmmMachOp (MO_Add _)
1076 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1077 CmmLit (CmmInt offset _)]])
1078 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1079 && is32BitInteger offset
1080 = x86_complex_amode x y shift offset
1081
1082 getAmode' _ (CmmMachOp (MO_Add _) [x,y])
1083 = x86_complex_amode x y 0 0
1084
1085 getAmode' is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1086 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1087
1088 getAmode' _ expr = do
1089 (reg,code) <- getSomeReg expr
1090 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1091
1092 -- | Like 'getAmode', but on 32-bit use simple register addressing
1093 -- (i.e. no index register). This stops us from running out of
1094 -- registers on x86 when using instructions such as cmpxchg, which can
1095 -- use up to three virtual registers and one fixed register.
1096 getSimpleAmode :: DynFlags -> Bool -> CmmExpr -> NatM Amode
1097 getSimpleAmode dflags is32Bit addr
1098 | is32Bit = do
1099 addr_code <- getAnyReg addr
1100 addr_r <- getNewRegNat (intFormat (wordWidth dflags))
1101 let amode = AddrBaseIndex (EABaseReg addr_r) EAIndexNone (ImmInt 0)
1102 return $! Amode amode (addr_code addr_r)
1103 | otherwise = getAmode addr
1104
1105 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1106 x86_complex_amode base index shift offset
1107 = do (x_reg, x_code) <- getNonClobberedReg base
1108 -- x must be in a temp, because it has to stay live over y_code
1109 -- we could compre x_reg and y_reg and do something better here...
1110 (y_reg, y_code) <- getSomeReg index
1111 let
1112 code = x_code `appOL` y_code
1113 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8;
1114 n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")"
1115 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1116 code)
1117
1118
1119
1120
1121 -- -----------------------------------------------------------------------------
1122 -- getOperand: sometimes any operand will do.
1123
1124 -- getNonClobberedOperand: the value of the operand will remain valid across
1125 -- the computation of an arbitrary expression, unless the expression
1126 -- is computed directly into a register which the operand refers to
1127 -- (see trivialCode where this function is used for an example).
1128
1129 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1130 getNonClobberedOperand (CmmLit lit) = do
1131 use_sse2 <- sse2Enabled
1132 if use_sse2 && isSuitableFloatingPointLit lit
1133 then do
1134 let CmmFloat _ w = lit
1135 Amode addr code <- memConstant (widthInBytes w) lit
1136 return (OpAddr addr, code)
1137 else do
1138
1139 is32Bit <- is32BitPlatform
1140 dflags <- getDynFlags
1141 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1142 then return (OpImm (litToImm lit), nilOL)
1143 else getNonClobberedOperand_generic (CmmLit lit)
1144
1145 getNonClobberedOperand (CmmLoad mem pk) = do
1146 is32Bit <- is32BitPlatform
1147 use_sse2 <- sse2Enabled
1148 if (not (isFloatType pk) || use_sse2)
1149 && (if is32Bit then not (isWord64 pk) else True)
1150 then do
1151 dflags <- getDynFlags
1152 let platform = targetPlatform dflags
1153 Amode src mem_code <- getAmode mem
1154 (src',save_code) <-
1155 if (amodeCouldBeClobbered platform src)
1156 then do
1157 tmp <- getNewRegNat (archWordFormat is32Bit)
1158 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1159 unitOL (LEA (archWordFormat is32Bit)
1160 (OpAddr src)
1161 (OpReg tmp)))
1162 else
1163 return (src, nilOL)
1164 return (OpAddr src', mem_code `appOL` save_code)
1165 else do
1166 getNonClobberedOperand_generic (CmmLoad mem pk)
1167
1168 getNonClobberedOperand e = getNonClobberedOperand_generic e
1169
1170 getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1171 getNonClobberedOperand_generic e = do
1172 (reg, code) <- getNonClobberedReg e
1173 return (OpReg reg, code)
1174
1175 amodeCouldBeClobbered :: Platform -> AddrMode -> Bool
1176 amodeCouldBeClobbered platform amode = any (regClobbered platform) (addrModeRegs amode)
1177
1178 regClobbered :: Platform -> Reg -> Bool
1179 regClobbered platform (RegReal (RealRegSingle rr)) = freeReg platform rr
1180 regClobbered _ _ = False
1181
1182 -- getOperand: the operand is not required to remain valid across the
1183 -- computation of an arbitrary expression.
1184 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1185
1186 getOperand (CmmLit lit) = do
1187 use_sse2 <- sse2Enabled
1188 if (use_sse2 && isSuitableFloatingPointLit lit)
1189 then do
1190 let CmmFloat _ w = lit
1191 Amode addr code <- memConstant (widthInBytes w) lit
1192 return (OpAddr addr, code)
1193 else do
1194
1195 is32Bit <- is32BitPlatform
1196 dflags <- getDynFlags
1197 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1198 then return (OpImm (litToImm lit), nilOL)
1199 else getOperand_generic (CmmLit lit)
1200
1201 getOperand (CmmLoad mem pk) = do
1202 is32Bit <- is32BitPlatform
1203 use_sse2 <- sse2Enabled
1204 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1205 then do
1206 Amode src mem_code <- getAmode mem
1207 return (OpAddr src, mem_code)
1208 else
1209 getOperand_generic (CmmLoad mem pk)
1210
1211 getOperand e = getOperand_generic e
1212
1213 getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1214 getOperand_generic e = do
1215 (reg, code) <- getSomeReg e
1216 return (OpReg reg, code)
1217
1218 isOperand :: Bool -> CmmExpr -> Bool
1219 isOperand _ (CmmLoad _ _) = True
1220 isOperand is32Bit (CmmLit lit) = is32BitLit is32Bit lit
1221 || isSuitableFloatingPointLit lit
1222 isOperand _ _ = False
1223
1224 memConstant :: Int -> CmmLit -> NatM Amode
1225 memConstant align lit = do
1226 lbl <- getNewLabelNat
1227 let rosection = Section ReadOnlyData lbl
1228 dflags <- getDynFlags
1229 (addr, addr_code) <- if target32Bit (targetPlatform dflags)
1230 then do dynRef <- cmmMakeDynamicReference
1231 dflags
1232 DataReference
1233 lbl
1234 Amode addr addr_code <- getAmode dynRef
1235 return (addr, addr_code)
1236 else return (ripRel (ImmCLbl lbl), nilOL)
1237 let code =
1238 LDATA rosection (align, Statics lbl [CmmStaticLit lit])
1239 `consOL` addr_code
1240 return (Amode addr code)
1241
1242
1243 loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
1244 loadFloatAmode use_sse2 w addr addr_code = do
1245 let format = floatFormat w
1246 code dst = addr_code `snocOL`
1247 if use_sse2
1248 then MOV format (OpAddr addr) (OpReg dst)
1249 else GLD format addr dst
1250 return (Any (if use_sse2 then format else FF80) code)
1251
1252
1253 -- if we want a floating-point literal as an operand, we can
1254 -- use it directly from memory. However, if the literal is
1255 -- zero, we're better off generating it into a register using
1256 -- xor.
1257 isSuitableFloatingPointLit :: CmmLit -> Bool
1258 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1259 isSuitableFloatingPointLit _ = False
1260
1261 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1262 getRegOrMem e@(CmmLoad mem pk) = do
1263 is32Bit <- is32BitPlatform
1264 use_sse2 <- sse2Enabled
1265 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1266 then do
1267 Amode src mem_code <- getAmode mem
1268 return (OpAddr src, mem_code)
1269 else do
1270 (reg, code) <- getNonClobberedReg e
1271 return (OpReg reg, code)
1272 getRegOrMem e = do
1273 (reg, code) <- getNonClobberedReg e
1274 return (OpReg reg, code)
1275
1276 is32BitLit :: Bool -> CmmLit -> Bool
1277 is32BitLit is32Bit (CmmInt i W64)
1278 | not is32Bit
1279 = -- assume that labels are in the range 0-2^31-1: this assumes the
1280 -- small memory model (see gcc docs, -mcmodel=small).
1281 is32BitInteger i
1282 is32BitLit _ _ = True
1283
1284
1285
1286
1287 -- Set up a condition code for a conditional branch.
1288
1289 getCondCode :: CmmExpr -> NatM CondCode
1290
1291 -- yes, they really do seem to want exactly the same!
1292
1293 getCondCode (CmmMachOp mop [x, y])
1294 =
1295 case mop of
1296 MO_F_Eq W32 -> condFltCode EQQ x y
1297 MO_F_Ne W32 -> condFltCode NE x y
1298 MO_F_Gt W32 -> condFltCode GTT x y
1299 MO_F_Ge W32 -> condFltCode GE x y
1300 MO_F_Lt W32 -> condFltCode LTT x y
1301 MO_F_Le W32 -> condFltCode LE x y
1302
1303 MO_F_Eq W64 -> condFltCode EQQ x y
1304 MO_F_Ne W64 -> condFltCode NE x y
1305 MO_F_Gt W64 -> condFltCode GTT x y
1306 MO_F_Ge W64 -> condFltCode GE x y
1307 MO_F_Lt W64 -> condFltCode LTT x y
1308 MO_F_Le W64 -> condFltCode LE x y
1309
1310 _ -> condIntCode (machOpToCond mop) x y
1311
1312 getCondCode other = pprPanic "getCondCode(2)(x86,x86_64)" (ppr other)
1313
1314 machOpToCond :: MachOp -> Cond
1315 machOpToCond mo = case mo of
1316 MO_Eq _ -> EQQ
1317 MO_Ne _ -> NE
1318 MO_S_Gt _ -> GTT
1319 MO_S_Ge _ -> GE
1320 MO_S_Lt _ -> LTT
1321 MO_S_Le _ -> LE
1322 MO_U_Gt _ -> GU
1323 MO_U_Ge _ -> GEU
1324 MO_U_Lt _ -> LU
1325 MO_U_Le _ -> LEU
1326 _other -> pprPanic "machOpToCond" (pprMachOp mo)
1327
1328
1329 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1330 -- passed back up the tree.
1331
1332 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1333 condIntCode cond x y = do is32Bit <- is32BitPlatform
1334 condIntCode' is32Bit cond x y
1335
1336 condIntCode' :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1337
1338 -- memory vs immediate
1339 condIntCode' is32Bit cond (CmmLoad x pk) (CmmLit lit)
1340 | is32BitLit is32Bit lit = do
1341 Amode x_addr x_code <- getAmode x
1342 let
1343 imm = litToImm lit
1344 code = x_code `snocOL`
1345 CMP (cmmTypeFormat pk) (OpImm imm) (OpAddr x_addr)
1346 --
1347 return (CondCode False cond code)
1348
1349 -- anything vs zero, using a mask
1350 -- TODO: Add some sanity checking!!!!
1351 condIntCode' is32Bit cond (CmmMachOp (MO_And _) [x,o2]) (CmmLit (CmmInt 0 pk))
1352 | (CmmLit lit@(CmmInt mask _)) <- o2, is32BitLit is32Bit lit
1353 = do
1354 (x_reg, x_code) <- getSomeReg x
1355 let
1356 code = x_code `snocOL`
1357 TEST (intFormat pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1358 --
1359 return (CondCode False cond code)
1360
1361 -- anything vs zero
1362 condIntCode' _ cond x (CmmLit (CmmInt 0 pk)) = do
1363 (x_reg, x_code) <- getSomeReg x
1364 let
1365 code = x_code `snocOL`
1366 TEST (intFormat pk) (OpReg x_reg) (OpReg x_reg)
1367 --
1368 return (CondCode False cond code)
1369
1370 -- anything vs operand
1371 condIntCode' is32Bit cond x y
1372 | isOperand is32Bit y = do
1373 dflags <- getDynFlags
1374 (x_reg, x_code) <- getNonClobberedReg x
1375 (y_op, y_code) <- getOperand y
1376 let
1377 code = x_code `appOL` y_code `snocOL`
1378 CMP (cmmTypeFormat (cmmExprType dflags x)) y_op (OpReg x_reg)
1379 return (CondCode False cond code)
1380 -- operand vs. anything: invert the comparison so that we can use a
1381 -- single comparison instruction.
1382 | isOperand is32Bit x
1383 , Just revcond <- maybeFlipCond cond = do
1384 dflags <- getDynFlags
1385 (y_reg, y_code) <- getNonClobberedReg y
1386 (x_op, x_code) <- getOperand x
1387 let
1388 code = y_code `appOL` x_code `snocOL`
1389 CMP (cmmTypeFormat (cmmExprType dflags x)) x_op (OpReg y_reg)
1390 return (CondCode False revcond code)
1391
1392 -- anything vs anything
1393 condIntCode' _ cond x y = do
1394 dflags <- getDynFlags
1395 (y_reg, y_code) <- getNonClobberedReg y
1396 (x_op, x_code) <- getRegOrMem x
1397 let
1398 code = y_code `appOL`
1399 x_code `snocOL`
1400 CMP (cmmTypeFormat (cmmExprType dflags x)) (OpReg y_reg) x_op
1401 return (CondCode False cond code)
1402
1403
1404
1405 --------------------------------------------------------------------------------
1406 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1407
1408 condFltCode cond x y
1409 = if_sse2 condFltCode_sse2 condFltCode_x87
1410 where
1411
1412 condFltCode_x87
1413 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1414 (x_reg, x_code) <- getNonClobberedReg x
1415 (y_reg, y_code) <- getSomeReg y
1416 let
1417 code = x_code `appOL` y_code `snocOL`
1418 GCMP cond x_reg y_reg
1419 -- The GCMP insn does the test and sets the zero flag if comparable
1420 -- and true. Hence we always supply EQQ as the condition to test.
1421 return (CondCode True EQQ code)
1422
1423 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1424 -- an operand, but the right must be a reg. We can probably do better
1425 -- than this general case...
1426 condFltCode_sse2 = do
1427 dflags <- getDynFlags
1428 (x_reg, x_code) <- getNonClobberedReg x
1429 (y_op, y_code) <- getOperand y
1430 let
1431 code = x_code `appOL`
1432 y_code `snocOL`
1433 CMP (floatFormat $ cmmExprWidth dflags x) y_op (OpReg x_reg)
1434 -- NB(1): we need to use the unsigned comparison operators on the
1435 -- result of this comparison.
1436 return (CondCode True (condToUnsigned cond) code)
1437
1438 -- -----------------------------------------------------------------------------
1439 -- Generating assignments
1440
1441 -- Assignments are really at the heart of the whole code generation
1442 -- business. Almost all top-level nodes of any real importance are
1443 -- assignments, which correspond to loads, stores, or register
1444 -- transfers. If we're really lucky, some of the register transfers
1445 -- will go away, because we can use the destination register to
1446 -- complete the code generation for the right hand side. This only
1447 -- fails when the right hand side is forced into a fixed register
1448 -- (e.g. the result of a call).
1449
1450 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1451 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1452
1453 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1454 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1455
1456
1457 -- integer assignment to memory
1458
1459 -- specific case of adding/subtracting an integer to a particular address.
1460 -- ToDo: catch other cases where we can use an operation directly on a memory
1461 -- address.
1462 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1463 CmmLit (CmmInt i _)])
1464 | addr == addr2, pk /= II64 || is32BitInteger i,
1465 Just instr <- check op
1466 = do Amode amode code_addr <- getAmode addr
1467 let code = code_addr `snocOL`
1468 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1469 return code
1470 where
1471 check (MO_Add _) = Just ADD
1472 check (MO_Sub _) = Just SUB
1473 check _ = Nothing
1474 -- ToDo: more?
1475
1476 -- general case
1477 assignMem_IntCode pk addr src = do
1478 is32Bit <- is32BitPlatform
1479 Amode addr code_addr <- getAmode addr
1480 (code_src, op_src) <- get_op_RI is32Bit src
1481 let
1482 code = code_src `appOL`
1483 code_addr `snocOL`
1484 MOV pk op_src (OpAddr addr)
1485 -- NOTE: op_src is stable, so it will still be valid
1486 -- after code_addr. This may involve the introduction
1487 -- of an extra MOV to a temporary register, but we hope
1488 -- the register allocator will get rid of it.
1489 --
1490 return code
1491 where
1492 get_op_RI :: Bool -> CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
1493 get_op_RI is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1494 = return (nilOL, OpImm (litToImm lit))
1495 get_op_RI _ op
1496 = do (reg,code) <- getNonClobberedReg op
1497 return (code, OpReg reg)
1498
1499
1500 -- Assign; dst is a reg, rhs is mem
1501 assignReg_IntCode pk reg (CmmLoad src _) = do
1502 load_code <- intLoadCode (MOV pk) src
1503 dflags <- getDynFlags
1504 let platform = targetPlatform dflags
1505 return (load_code (getRegisterReg platform False{-no sse2-} reg))
1506
1507 -- dst is a reg, but src could be anything
1508 assignReg_IntCode _ reg src = do
1509 dflags <- getDynFlags
1510 let platform = targetPlatform dflags
1511 code <- getAnyReg src
1512 return (code (getRegisterReg platform False{-no sse2-} reg))
1513
1514
1515 -- Floating point assignment to memory
1516 assignMem_FltCode pk addr src = do
1517 (src_reg, src_code) <- getNonClobberedReg src
1518 Amode addr addr_code <- getAmode addr
1519 use_sse2 <- sse2Enabled
1520 let
1521 code = src_code `appOL`
1522 addr_code `snocOL`
1523 if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr)
1524 else GST pk src_reg addr
1525 return code
1526
1527 -- Floating point assignment to a register/temporary
1528 assignReg_FltCode _ reg src = do
1529 use_sse2 <- sse2Enabled
1530 src_code <- getAnyReg src
1531 dflags <- getDynFlags
1532 let platform = targetPlatform dflags
1533 return (src_code (getRegisterReg platform use_sse2 reg))
1534
1535
1536 genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
1537
1538 genJump (CmmLoad mem _) regs = do
1539 Amode target code <- getAmode mem
1540 return (code `snocOL` JMP (OpAddr target) regs)
1541
1542 genJump (CmmLit lit) regs = do
1543 return (unitOL (JMP (OpImm (litToImm lit)) regs))
1544
1545 genJump expr regs = do
1546 (reg,code) <- getSomeReg expr
1547 return (code `snocOL` JMP (OpReg reg) regs)
1548
1549
1550 -- -----------------------------------------------------------------------------
1551 -- Unconditional branches
1552
1553 genBranch :: BlockId -> NatM InstrBlock
1554 genBranch = return . toOL . mkJumpInstr
1555
1556
1557
1558 -- -----------------------------------------------------------------------------
1559 -- Conditional jumps
1560
1561 {-
1562 Conditional jumps are always to local labels, so we can use branch
1563 instructions. We peek at the arguments to decide what kind of
1564 comparison to do.
1565
1566 I386: First, we have to ensure that the condition
1567 codes are set according to the supplied comparison operation.
1568 -}
1569
1570 genCondJump
1571 :: BlockId -- the branch target
1572 -> CmmExpr -- the condition on which to branch
1573 -> NatM InstrBlock
1574
1575 genCondJump id expr = do
1576 is32Bit <- is32BitPlatform
1577 genCondJump' is32Bit id expr
1578
1579 genCondJump' :: Bool -> BlockId -> CmmExpr -> NatM InstrBlock
1580
1581 -- 64-bit integer comparisons on 32-bit
1582 genCondJump' is32Bit true (CmmMachOp mop [e1,e2])
1583 | is32Bit, Just W64 <- maybeIntComparison mop = do
1584 ChildCode64 code1 r1_lo <- iselExpr64 e1
1585 ChildCode64 code2 r2_lo <- iselExpr64 e2
1586 let r1_hi = getHiVRegFromLo r1_lo
1587 r2_hi = getHiVRegFromLo r2_lo
1588 cond = machOpToCond mop
1589 Just cond' = maybeFlipCond cond
1590 false <- getBlockIdNat
1591 return $ code1 `appOL` code2 `appOL` toOL [
1592 CMP II32 (OpReg r2_hi) (OpReg r1_hi),
1593 JXX cond true,
1594 JXX cond' false,
1595 CMP II32 (OpReg r2_lo) (OpReg r1_lo),
1596 JXX cond true,
1597 NEWBLOCK false ]
1598
1599 genCondJump' _ id bool = do
1600 CondCode is_float cond cond_code <- getCondCode bool
1601 use_sse2 <- sse2Enabled
1602 if not is_float || not use_sse2
1603 then
1604 return (cond_code `snocOL` JXX cond id)
1605 else do
1606 lbl <- getBlockIdNat
1607
1608 -- see comment with condFltReg
1609 let code = case cond of
1610 NE -> or_unordered
1611 GU -> plain_test
1612 GEU -> plain_test
1613 _ -> and_ordered
1614
1615 plain_test = unitOL (
1616 JXX cond id
1617 )
1618 or_unordered = toOL [
1619 JXX cond id,
1620 JXX PARITY id
1621 ]
1622 and_ordered = toOL [
1623 JXX PARITY lbl,
1624 JXX cond id,
1625 JXX ALWAYS lbl,
1626 NEWBLOCK lbl
1627 ]
1628 return (cond_code `appOL` code)
1629
1630 -- -----------------------------------------------------------------------------
1631 -- Generating C calls
1632
1633 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1634 -- @get_arg@, which moves the arguments to the correct registers/stack
1635 -- locations. Apart from that, the code is easy.
1636 --
1637 -- (If applicable) Do not fill the delay slots here; you will confuse the
1638 -- register allocator.
1639
1640 genCCall
1641 :: DynFlags
1642 -> Bool -- 32 bit platform?
1643 -> ForeignTarget -- function to call
1644 -> [CmmFormal] -- where to put the result
1645 -> [CmmActual] -- arguments (of mixed type)
1646 -> NatM InstrBlock
1647
1648 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1649
1650 -- Unroll memcpy calls if the source and destination pointers are at
1651 -- least DWORD aligned and the number of bytes to copy isn't too
1652 -- large. Otherwise, call C's memcpy.
1653 genCCall dflags is32Bit (PrimTarget (MO_Memcpy align)) _
1654 [dst, src, CmmLit (CmmInt n _)]
1655 | fromInteger insns <= maxInlineMemcpyInsns dflags && align .&. 3 == 0 = do
1656 code_dst <- getAnyReg dst
1657 dst_r <- getNewRegNat format
1658 code_src <- getAnyReg src
1659 src_r <- getNewRegNat format
1660 tmp_r <- getNewRegNat format
1661 return $ code_dst dst_r `appOL` code_src src_r `appOL`
1662 go dst_r src_r tmp_r (fromInteger n)
1663 where
1664 -- The number of instructions we will generate (approx). We need 2
1665 -- instructions per move.
1666 insns = 2 * ((n + sizeBytes - 1) `div` sizeBytes)
1667
1668 format = if align .&. 4 /= 0 then II32 else (archWordFormat is32Bit)
1669
1670 -- The size of each move, in bytes.
1671 sizeBytes :: Integer
1672 sizeBytes = fromIntegral (formatInBytes format)
1673
1674 go :: Reg -> Reg -> Reg -> Integer -> OrdList Instr
1675 go dst src tmp i
1676 | i >= sizeBytes =
1677 unitOL (MOV format (OpAddr src_addr) (OpReg tmp)) `appOL`
1678 unitOL (MOV format (OpReg tmp) (OpAddr dst_addr)) `appOL`
1679 go dst src tmp (i - sizeBytes)
1680 -- Deal with remaining bytes.
1681 | i >= 4 = -- Will never happen on 32-bit
1682 unitOL (MOV II32 (OpAddr src_addr) (OpReg tmp)) `appOL`
1683 unitOL (MOV II32 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1684 go dst src tmp (i - 4)
1685 | i >= 2 =
1686 unitOL (MOVZxL II16 (OpAddr src_addr) (OpReg tmp)) `appOL`
1687 unitOL (MOV II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1688 go dst src tmp (i - 2)
1689 | i >= 1 =
1690 unitOL (MOVZxL II8 (OpAddr src_addr) (OpReg tmp)) `appOL`
1691 unitOL (MOV II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1692 go dst src tmp (i - 1)
1693 | otherwise = nilOL
1694 where
1695 src_addr = AddrBaseIndex (EABaseReg src) EAIndexNone
1696 (ImmInteger (n - i))
1697 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1698 (ImmInteger (n - i))
1699
1700 genCCall dflags _ (PrimTarget (MO_Memset align)) _
1701 [dst,
1702 CmmLit (CmmInt c _),
1703 CmmLit (CmmInt n _)]
1704 | fromInteger insns <= maxInlineMemsetInsns dflags && align .&. 3 == 0 = do
1705 code_dst <- getAnyReg dst
1706 dst_r <- getNewRegNat format
1707 return $ code_dst dst_r `appOL` go dst_r (fromInteger n)
1708 where
1709 (format, val) = case align .&. 3 of
1710 2 -> (II16, c2)
1711 0 -> (II32, c4)
1712 _ -> (II8, c)
1713 c2 = c `shiftL` 8 .|. c
1714 c4 = c2 `shiftL` 16 .|. c2
1715
1716 -- The number of instructions we will generate (approx). We need 1
1717 -- instructions per move.
1718 insns = (n + sizeBytes - 1) `div` sizeBytes
1719
1720 -- The size of each move, in bytes.
1721 sizeBytes :: Integer
1722 sizeBytes = fromIntegral (formatInBytes format)
1723
1724 go :: Reg -> Integer -> OrdList Instr
1725 go dst i
1726 -- TODO: Add movabs instruction and support 64-bit sets.
1727 | i >= sizeBytes = -- This might be smaller than the below sizes
1728 unitOL (MOV format (OpImm (ImmInteger val)) (OpAddr dst_addr)) `appOL`
1729 go dst (i - sizeBytes)
1730 | i >= 4 = -- Will never happen on 32-bit
1731 unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr dst_addr)) `appOL`
1732 go dst (i - 4)
1733 | i >= 2 =
1734 unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr dst_addr)) `appOL`
1735 go dst (i - 2)
1736 | i >= 1 =
1737 unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr dst_addr)) `appOL`
1738 go dst (i - 1)
1739 | otherwise = nilOL
1740 where
1741 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1742 (ImmInteger (n - i))
1743
1744 genCCall _ _ (PrimTarget MO_WriteBarrier) _ _ = return nilOL
1745 -- write barrier compiles to no code on x86/x86-64;
1746 -- we keep it this long in order to prevent earlier optimisations.
1747
1748 genCCall _ _ (PrimTarget MO_Touch) _ _ = return nilOL
1749
1750 genCCall _ is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] =
1751 case n of
1752 0 -> genPrefetch src $ PREFETCH NTA format
1753 1 -> genPrefetch src $ PREFETCH Lvl2 format
1754 2 -> genPrefetch src $ PREFETCH Lvl1 format
1755 3 -> genPrefetch src $ PREFETCH Lvl0 format
1756 l -> panic $ "unexpected prefetch level in genCCall MO_Prefetch_Data: " ++ (show l)
1757 -- the c / llvm prefetch convention is 0, 1, 2, and 3
1758 -- the x86 corresponding names are : NTA, 2 , 1, and 0
1759 where
1760 format = archWordFormat is32bit
1761 -- need to know what register width for pointers!
1762 genPrefetch inRegSrc prefetchCTor =
1763 do
1764 code_src <- getAnyReg inRegSrc
1765 src_r <- getNewRegNat format
1766 return $ code_src src_r `appOL`
1767 (unitOL (prefetchCTor (OpAddr
1768 ((AddrBaseIndex (EABaseReg src_r ) EAIndexNone (ImmInt 0)))) ))
1769 -- prefetch always takes an address
1770
1771 genCCall dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] = do
1772 let platform = targetPlatform dflags
1773 let dst_r = getRegisterReg platform False (CmmLocal dst)
1774 case width of
1775 W64 | is32Bit -> do
1776 ChildCode64 vcode rlo <- iselExpr64 src
1777 let dst_rhi = getHiVRegFromLo dst_r
1778 rhi = getHiVRegFromLo rlo
1779 return $ vcode `appOL`
1780 toOL [ MOV II32 (OpReg rlo) (OpReg dst_rhi),
1781 MOV II32 (OpReg rhi) (OpReg dst_r),
1782 BSWAP II32 dst_rhi,
1783 BSWAP II32 dst_r ]
1784 W16 -> do code_src <- getAnyReg src
1785 return $ code_src dst_r `appOL`
1786 unitOL (BSWAP II32 dst_r) `appOL`
1787 unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r))
1788 _ -> do code_src <- getAnyReg src
1789 return $ code_src dst_r `appOL` unitOL (BSWAP format dst_r)
1790 where
1791 format = intFormat width
1792
1793 genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
1794 args@[src] = do
1795 sse4_2 <- sse4_2Enabled
1796 let platform = targetPlatform dflags
1797 if sse4_2
1798 then do code_src <- getAnyReg src
1799 src_r <- getNewRegNat format
1800 let dst_r = getRegisterReg platform False (CmmLocal dst)
1801 return $ code_src src_r `appOL`
1802 (if width == W8 then
1803 -- The POPCNT instruction doesn't take a r/m8
1804 unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL`
1805 unitOL (POPCNT II16 (OpReg src_r) dst_r)
1806 else
1807 unitOL (POPCNT format (OpReg src_r) dst_r)) `appOL`
1808 (if width == W8 || width == W16 then
1809 -- We used a 16-bit destination register above,
1810 -- so zero-extend
1811 unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
1812 else nilOL)
1813 else do
1814 targetExpr <- cmmMakeDynamicReference dflags
1815 CallReference lbl
1816 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1817 [NoHint] [NoHint]
1818 CmmMayReturn)
1819 genCCall dflags is32Bit target dest_regs args
1820 where
1821 format = intFormat width
1822 lbl = mkCmmCodeLabel primUnitId (fsLit (popCntLabel width))
1823
1824 genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src]
1825 | is32Bit && width == W64 = do
1826 -- Fallback to `hs_clz64` on i386
1827 targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
1828 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1829 [NoHint] [NoHint]
1830 CmmMayReturn)
1831 genCCall dflags is32Bit target dest_regs args
1832
1833 | otherwise = do
1834 code_src <- getAnyReg src
1835 src_r <- getNewRegNat format
1836 tmp_r <- getNewRegNat format
1837 let dst_r = getRegisterReg platform False (CmmLocal dst)
1838
1839 -- The following insn sequence makes sure 'clz 0' has a defined value.
1840 -- starting with Haswell, one could use the LZCNT insn instead.
1841 return $ code_src src_r `appOL` toOL
1842 ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
1843 [ BSR format (OpReg src_r) tmp_r
1844 , MOV II32 (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
1845 , CMOV NE format (OpReg tmp_r) dst_r
1846 , XOR format (OpImm (ImmInt (bw-1))) (OpReg dst_r)
1847 ]) -- NB: We don't need to zero-extend the result for the
1848 -- W8/W16 cases because the 'MOV' insn already
1849 -- took care of implicitly clearing the upper bits
1850 where
1851 bw = widthInBits width
1852 platform = targetPlatform dflags
1853 format = if width == W8 then II16 else intFormat width
1854 lbl = mkCmmCodeLabel primUnitId (fsLit (clzLabel width))
1855
1856 genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src]
1857 | is32Bit, width == W64 = do
1858 ChildCode64 vcode rlo <- iselExpr64 src
1859 let rhi = getHiVRegFromLo rlo
1860 dst_r = getRegisterReg platform False (CmmLocal dst)
1861 lbl1 <- getBlockIdNat
1862 lbl2 <- getBlockIdNat
1863 tmp_r <- getNewRegNat format
1864
1865 -- The following instruction sequence corresponds to the pseudo-code
1866 --
1867 -- if (src) {
1868 -- dst = src.lo32 ? BSF(src.lo32) : (BSF(src.hi32) + 32);
1869 -- } else {
1870 -- dst = 64;
1871 -- }
1872 return $ vcode `appOL` toOL
1873 ([ MOV II32 (OpReg rhi) (OpReg tmp_r)
1874 , OR II32 (OpReg rlo) (OpReg tmp_r)
1875 , MOV II32 (OpImm (ImmInt 64)) (OpReg dst_r)
1876 , JXX EQQ lbl2
1877 , JXX ALWAYS lbl1
1878
1879 , NEWBLOCK lbl1
1880 , BSF II32 (OpReg rhi) dst_r
1881 , ADD II32 (OpImm (ImmInt 32)) (OpReg dst_r)
1882 , BSF II32 (OpReg rlo) tmp_r
1883 , CMOV NE II32 (OpReg tmp_r) dst_r
1884 , JXX ALWAYS lbl2
1885
1886 , NEWBLOCK lbl2
1887 ])
1888
1889 | otherwise = do
1890 code_src <- getAnyReg src
1891 src_r <- getNewRegNat format
1892 tmp_r <- getNewRegNat format
1893 let dst_r = getRegisterReg platform False (CmmLocal dst)
1894
1895 -- The following insn sequence makes sure 'ctz 0' has a defined value.
1896 -- starting with Haswell, one could use the TZCNT insn instead.
1897 return $ code_src src_r `appOL` toOL
1898 ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
1899 [ BSF format (OpReg src_r) tmp_r
1900 , MOV II32 (OpImm (ImmInt bw)) (OpReg dst_r)
1901 , CMOV NE format (OpReg tmp_r) dst_r
1902 ]) -- NB: We don't need to zero-extend the result for the
1903 -- W8/W16 cases because the 'MOV' insn already
1904 -- took care of implicitly clearing the upper bits
1905 where
1906 bw = widthInBits width
1907 platform = targetPlatform dflags
1908 format = if width == W8 then II16 else intFormat width
1909
1910 genCCall dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args = do
1911 targetExpr <- cmmMakeDynamicReference dflags
1912 CallReference lbl
1913 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1914 [NoHint] [NoHint]
1915 CmmMayReturn)
1916 genCCall dflags is32Bit target dest_regs args
1917 where
1918 lbl = mkCmmCodeLabel primUnitId (fsLit (word2FloatLabel width))
1919
1920 genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = do
1921 Amode amode addr_code <-
1922 if amop `elem` [AMO_Add, AMO_Sub]
1923 then getAmode addr
1924 else getSimpleAmode dflags is32Bit addr -- See genCCall for MO_Cmpxchg
1925 arg <- getNewRegNat format
1926 arg_code <- getAnyReg n
1927 use_sse2 <- sse2Enabled
1928 let platform = targetPlatform dflags
1929 dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
1930 code <- op_code dst_r arg amode
1931 return $ addr_code `appOL` arg_code arg `appOL` code
1932 where
1933 -- Code for the operation
1934 op_code :: Reg -- Destination reg
1935 -> Reg -- Register containing argument
1936 -> AddrMode -- Address of location to mutate
1937 -> NatM (OrdList Instr)
1938 op_code dst_r arg amode = case amop of
1939 -- In the common case where dst_r is a virtual register the
1940 -- final move should go away, because it's the last use of arg
1941 -- and the first use of dst_r.
1942 AMO_Add -> return $ toOL [ LOCK (XADD format (OpReg arg) (OpAddr amode))
1943 , MOV format (OpReg arg) (OpReg dst_r)
1944 ]
1945 AMO_Sub -> return $ toOL [ NEGI format (OpReg arg)
1946 , LOCK (XADD format (OpReg arg) (OpAddr amode))
1947 , MOV format (OpReg arg) (OpReg dst_r)
1948 ]
1949 AMO_And -> cmpxchg_code (\ src dst -> unitOL $ AND format src dst)
1950 AMO_Nand -> cmpxchg_code (\ src dst -> toOL [ AND format src dst
1951 , NOT format dst
1952 ])
1953 AMO_Or -> cmpxchg_code (\ src dst -> unitOL $ OR format src dst)
1954 AMO_Xor -> cmpxchg_code (\ src dst -> unitOL $ XOR format src dst)
1955 where
1956 -- Simulate operation that lacks a dedicated instruction using
1957 -- cmpxchg.
1958 cmpxchg_code :: (Operand -> Operand -> OrdList Instr)
1959 -> NatM (OrdList Instr)
1960 cmpxchg_code instrs = do
1961 lbl <- getBlockIdNat
1962 tmp <- getNewRegNat format
1963 return $ toOL
1964 [ MOV format (OpAddr amode) (OpReg eax)
1965 , JXX ALWAYS lbl
1966 , NEWBLOCK lbl
1967 -- Keep old value so we can return it:
1968 , MOV format (OpReg eax) (OpReg dst_r)
1969 , MOV format (OpReg eax) (OpReg tmp)
1970 ]
1971 `appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL
1972 [ LOCK (CMPXCHG format (OpReg tmp) (OpAddr amode))
1973 , JXX NE lbl
1974 ]
1975
1976 format = intFormat width
1977
1978 genCCall dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] = do
1979 load_code <- intLoadCode (MOV (intFormat width)) addr
1980 let platform = targetPlatform dflags
1981 use_sse2 <- sse2Enabled
1982 return (load_code (getRegisterReg platform use_sse2 (CmmLocal dst)))
1983
1984 genCCall _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
1985 code <- assignMem_IntCode (intFormat width) addr val
1986 return $ code `snocOL` MFENCE
1987
1988 genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = do
1989 -- On x86 we don't have enough registers to use cmpxchg with a
1990 -- complicated addressing mode, so on that architecture we
1991 -- pre-compute the address first.
1992 Amode amode addr_code <- getSimpleAmode dflags is32Bit addr
1993 newval <- getNewRegNat format
1994 newval_code <- getAnyReg new
1995 oldval <- getNewRegNat format
1996 oldval_code <- getAnyReg old
1997 use_sse2 <- sse2Enabled
1998 let platform = targetPlatform dflags
1999 dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
2000 code = toOL
2001 [ MOV format (OpReg oldval) (OpReg eax)
2002 , LOCK (CMPXCHG format (OpReg newval) (OpAddr amode))
2003 , MOV format (OpReg eax) (OpReg dst_r)
2004 ]
2005 return $ addr_code `appOL` newval_code newval `appOL` oldval_code oldval
2006 `appOL` code
2007 where
2008 format = intFormat width
2009
2010 genCCall _ is32Bit target dest_regs args = do
2011 dflags <- getDynFlags
2012 let platform = targetPlatform dflags
2013 case (target, dest_regs) of
2014 -- void return type prim op
2015 (PrimTarget op, []) ->
2016 outOfLineCmmOp op Nothing args
2017 -- we only cope with a single result for foreign calls
2018 (PrimTarget op, [r])
2019 | not is32Bit -> outOfLineCmmOp op (Just r) args
2020 | otherwise -> do
2021 l1 <- getNewLabelNat
2022 l2 <- getNewLabelNat
2023 sse2 <- sse2Enabled
2024 if sse2
2025 then
2026 outOfLineCmmOp op (Just r) args
2027 else case op of
2028 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
2029 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
2030
2031 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
2032 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
2033
2034 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
2035 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
2036
2037 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
2038 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
2039
2040 _other_op -> outOfLineCmmOp op (Just r) args
2041
2042 where
2043 actuallyInlineFloatOp instr format [x]
2044 = do res <- trivialUFCode format (instr format) x
2045 any <- anyReg res
2046 return (any (getRegisterReg platform False (CmmLocal r)))
2047
2048 actuallyInlineFloatOp _ _ args
2049 = panic $ "genCCall.actuallyInlineFloatOp: bad number of arguments! ("
2050 ++ show (length args) ++ ")"
2051
2052 (PrimTarget (MO_S_QuotRem width), _) -> divOp1 platform True width dest_regs args
2053 (PrimTarget (MO_U_QuotRem width), _) -> divOp1 platform False width dest_regs args
2054 (PrimTarget (MO_U_QuotRem2 width), _) -> divOp2 platform False width dest_regs args
2055 (PrimTarget (MO_Add2 width), [res_h, res_l]) ->
2056 case args of
2057 [arg_x, arg_y] ->
2058 do hCode <- getAnyReg (CmmLit (CmmInt 0 width))
2059 let format = intFormat width
2060 lCode <- anyReg =<< trivialCode width (ADD_CC format)
2061 (Just (ADD_CC format)) arg_x arg_y
2062 let reg_l = getRegisterReg platform True (CmmLocal res_l)
2063 reg_h = getRegisterReg platform True (CmmLocal res_h)
2064 code = hCode reg_h `appOL`
2065 lCode reg_l `snocOL`
2066 ADC format (OpImm (ImmInteger 0)) (OpReg reg_h)
2067 return code
2068 _ -> panic "genCCall: Wrong number of arguments/results for add2"
2069 (PrimTarget (MO_SubWordC width), [res_r, res_c]) ->
2070 addSubIntC platform SUB_CC (const Nothing) CARRY width res_r res_c args
2071 (PrimTarget (MO_AddIntC width), [res_r, res_c]) ->
2072 addSubIntC platform ADD_CC (Just . ADD_CC) OFLO width res_r res_c args
2073 (PrimTarget (MO_SubIntC width), [res_r, res_c]) ->
2074 addSubIntC platform SUB_CC (const Nothing) OFLO width res_r res_c args
2075 (PrimTarget (MO_U_Mul2 width), [res_h, res_l]) ->
2076 case args of
2077 [arg_x, arg_y] ->
2078 do (y_reg, y_code) <- getRegOrMem arg_y
2079 x_code <- getAnyReg arg_x
2080 let format = intFormat width
2081 reg_h = getRegisterReg platform True (CmmLocal res_h)
2082 reg_l = getRegisterReg platform True (CmmLocal res_l)
2083 code = y_code `appOL`
2084 x_code rax `appOL`
2085 toOL [MUL2 format y_reg,
2086 MOV format (OpReg rdx) (OpReg reg_h),
2087 MOV format (OpReg rax) (OpReg reg_l)]
2088 return code
2089 _ -> panic "genCCall: Wrong number of arguments/results for mul2"
2090
2091 _ -> if is32Bit
2092 then genCCall32' dflags target dest_regs args
2093 else genCCall64' dflags target dest_regs args
2094
2095 where divOp1 platform signed width results [arg_x, arg_y]
2096 = divOp platform signed width results Nothing arg_x arg_y
2097 divOp1 _ _ _ _ _
2098 = panic "genCCall: Wrong number of arguments for divOp1"
2099 divOp2 platform signed width results [arg_x_high, arg_x_low, arg_y]
2100 = divOp platform signed width results (Just arg_x_high) arg_x_low arg_y
2101 divOp2 _ _ _ _ _
2102 = panic "genCCall: Wrong number of arguments for divOp2"
2103 divOp platform signed width [res_q, res_r]
2104 m_arg_x_high arg_x_low arg_y
2105 = do let format = intFormat width
2106 reg_q = getRegisterReg platform True (CmmLocal res_q)
2107 reg_r = getRegisterReg platform True (CmmLocal res_r)
2108 widen | signed = CLTD format
2109 | otherwise = XOR format (OpReg rdx) (OpReg rdx)
2110 instr | signed = IDIV
2111 | otherwise = DIV
2112 (y_reg, y_code) <- getRegOrMem arg_y
2113 x_low_code <- getAnyReg arg_x_low
2114 x_high_code <- case m_arg_x_high of
2115 Just arg_x_high ->
2116 getAnyReg arg_x_high
2117 Nothing ->
2118 return $ const $ unitOL widen
2119 return $ y_code `appOL`
2120 x_low_code rax `appOL`
2121 x_high_code rdx `appOL`
2122 toOL [instr format y_reg,
2123 MOV format (OpReg rax) (OpReg reg_q),
2124 MOV format (OpReg rdx) (OpReg reg_r)]
2125 divOp _ _ _ _ _ _ _
2126 = panic "genCCall: Wrong number of results for divOp"
2127
2128 addSubIntC platform instr mrevinstr cond width
2129 res_r res_c [arg_x, arg_y]
2130 = do let format = intFormat width
2131 rCode <- anyReg =<< trivialCode width (instr format)
2132 (mrevinstr format) arg_x arg_y
2133 reg_tmp <- getNewRegNat II8
2134 let reg_c = getRegisterReg platform True (CmmLocal res_c)
2135 reg_r = getRegisterReg platform True (CmmLocal res_r)
2136 code = rCode reg_r `snocOL`
2137 SETCC cond (OpReg reg_tmp) `snocOL`
2138 MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c)
2139
2140 return code
2141 addSubIntC _ _ _ _ _ _ _ _
2142 = panic "genCCall: Wrong number of arguments/results for addSubIntC"
2143
2144 genCCall32' :: DynFlags
2145 -> ForeignTarget -- function to call
2146 -> [CmmFormal] -- where to put the result
2147 -> [CmmActual] -- arguments (of mixed type)
2148 -> NatM InstrBlock
2149 genCCall32' dflags target dest_regs args = do
2150 let
2151 prom_args = map (maybePromoteCArg dflags W32) args
2152
2153 -- Align stack to 16n for calls, assuming a starting stack
2154 -- alignment of 16n - word_size on procedure entry. Which we
2155 -- maintiain. See Note [rts/StgCRun.c : Stack Alignment on X86]
2156 sizes = map (arg_size . cmmExprType dflags) (reverse args)
2157 raw_arg_size = sum sizes + wORD_SIZE dflags
2158 arg_pad_size = (roundTo 16 $ raw_arg_size) - raw_arg_size
2159 tot_arg_size = raw_arg_size + arg_pad_size - wORD_SIZE dflags
2160 delta0 <- getDeltaNat
2161 setDeltaNat (delta0 - arg_pad_size)
2162
2163 use_sse2 <- sse2Enabled
2164 push_codes <- mapM (push_arg use_sse2) (reverse prom_args)
2165 delta <- getDeltaNat
2166 MASSERT(delta == delta0 - tot_arg_size)
2167
2168 -- deal with static vs dynamic call targets
2169 (callinsns,cconv) <-
2170 case target of
2171 ForeignTarget (CmmLit (CmmLabel lbl)) conv
2172 -> -- ToDo: stdcall arg sizes
2173 return (unitOL (CALL (Left fn_imm) []), conv)
2174 where fn_imm = ImmCLbl lbl
2175 ForeignTarget expr conv
2176 -> do { (dyn_r, dyn_c) <- getSomeReg expr
2177 ; ASSERT( isWord32 (cmmExprType dflags expr) )
2178 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
2179 PrimTarget _
2180 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
2181 ++ "probably because too many return values."
2182
2183 let push_code
2184 | arg_pad_size /= 0
2185 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2186 DELTA (delta0 - arg_pad_size)]
2187 `appOL` concatOL push_codes
2188 | otherwise
2189 = concatOL push_codes
2190
2191 -- Deallocate parameters after call for ccall;
2192 -- but not for stdcall (callee does it)
2193 --
2194 -- We have to pop any stack padding we added
2195 -- even if we are doing stdcall, though (#5052)
2196 pop_size
2197 | ForeignConvention StdCallConv _ _ _ <- cconv = arg_pad_size
2198 | otherwise = tot_arg_size
2199
2200 call = callinsns `appOL`
2201 toOL (
2202 (if pop_size==0 then [] else
2203 [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)])
2204 ++
2205 [DELTA delta0]
2206 )
2207 setDeltaNat delta0
2208
2209 dflags <- getDynFlags
2210 let platform = targetPlatform dflags
2211
2212 let
2213 -- assign the results, if necessary
2214 assign_code [] = nilOL
2215 assign_code [dest]
2216 | isFloatType ty =
2217 if use_sse2
2218 then let tmp_amode = AddrBaseIndex (EABaseReg esp)
2219 EAIndexNone
2220 (ImmInt 0)
2221 fmt = floatFormat w
2222 in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
2223 DELTA (delta0 - b),
2224 GST fmt fake0 tmp_amode,
2225 MOV fmt (OpAddr tmp_amode) (OpReg r_dest),
2226 ADD II32 (OpImm (ImmInt b)) (OpReg esp),
2227 DELTA delta0]
2228 else unitOL (GMOV fake0 r_dest)
2229 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
2230 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
2231 | otherwise = unitOL (MOV (intFormat w)
2232 (OpReg eax)
2233 (OpReg r_dest))
2234 where
2235 ty = localRegType dest
2236 w = typeWidth ty
2237 b = widthInBytes w
2238 r_dest_hi = getHiVRegFromLo r_dest
2239 r_dest = getRegisterReg platform use_sse2 (CmmLocal dest)
2240 assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
2241
2242 return (push_code `appOL`
2243 call `appOL`
2244 assign_code dest_regs)
2245
2246 where
2247 arg_size :: CmmType -> Int -- Width in bytes
2248 arg_size ty = widthInBytes (typeWidth ty)
2249
2250 roundTo a x | x `mod` a == 0 = x
2251 | otherwise = x + a - (x `mod` a)
2252
2253 push_arg :: Bool -> CmmActual {-current argument-}
2254 -> NatM InstrBlock -- code
2255
2256 push_arg use_sse2 arg -- we don't need the hints on x86
2257 | isWord64 arg_ty = do
2258 ChildCode64 code r_lo <- iselExpr64 arg
2259 delta <- getDeltaNat
2260 setDeltaNat (delta - 8)
2261 let
2262 r_hi = getHiVRegFromLo r_lo
2263 return ( code `appOL`
2264 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
2265 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
2266 DELTA (delta-8)]
2267 )
2268
2269 | isFloatType arg_ty = do
2270 (reg, code) <- getSomeReg arg
2271 delta <- getDeltaNat
2272 setDeltaNat (delta-size)
2273 return (code `appOL`
2274 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
2275 DELTA (delta-size),
2276 let addr = AddrBaseIndex (EABaseReg esp)
2277 EAIndexNone
2278 (ImmInt 0)
2279 format = floatFormat (typeWidth arg_ty)
2280 in
2281 if use_sse2
2282 then MOV format (OpReg reg) (OpAddr addr)
2283 else GST format reg addr
2284 ]
2285 )
2286
2287 | otherwise = do
2288 (operand, code) <- getOperand arg
2289 delta <- getDeltaNat
2290 setDeltaNat (delta-size)
2291 return (code `snocOL`
2292 PUSH II32 operand `snocOL`
2293 DELTA (delta-size))
2294
2295 where
2296 arg_ty = cmmExprType dflags arg
2297 size = arg_size arg_ty -- Byte size
2298
2299 genCCall64' :: DynFlags
2300 -> ForeignTarget -- function to call
2301 -> [CmmFormal] -- where to put the result
2302 -> [CmmActual] -- arguments (of mixed type)
2303 -> NatM InstrBlock
2304 genCCall64' dflags target dest_regs args = do
2305 -- load up the register arguments
2306 let prom_args = map (maybePromoteCArg dflags W32) args
2307
2308 (stack_args, int_regs_used, fp_regs_used, load_args_code)
2309 <-
2310 if platformOS platform == OSMinGW32
2311 then load_args_win prom_args [] [] (allArgRegs platform) nilOL
2312 else do (stack_args, aregs, fregs, load_args_code)
2313 <- load_args prom_args (allIntArgRegs platform) (allFPArgRegs platform) nilOL
2314 let fp_regs_used = reverse (drop (length fregs) (reverse (allFPArgRegs platform)))
2315 int_regs_used = reverse (drop (length aregs) (reverse (allIntArgRegs platform)))
2316 return (stack_args, int_regs_used, fp_regs_used, load_args_code)
2317
2318 let
2319 arg_regs_used = int_regs_used ++ fp_regs_used
2320 arg_regs = [eax] ++ arg_regs_used
2321 -- for annotating the call instruction with
2322 sse_regs = length fp_regs_used
2323 arg_stack_slots = if platformOS platform == OSMinGW32
2324 then length stack_args + length (allArgRegs platform)
2325 else length stack_args
2326 tot_arg_size = arg_size * arg_stack_slots
2327
2328
2329 -- Align stack to 16n for calls, assuming a starting stack
2330 -- alignment of 16n - word_size on procedure entry. Which we
2331 -- maintain. See Note [rts/StgCRun.c : Stack Alignment on X86]
2332 (real_size, adjust_rsp) <-
2333 if (tot_arg_size + wORD_SIZE dflags) `rem` 16 == 0
2334 then return (tot_arg_size, nilOL)
2335 else do -- we need to adjust...
2336 delta <- getDeltaNat
2337 setDeltaNat (delta - wORD_SIZE dflags)
2338 return (tot_arg_size + wORD_SIZE dflags, toOL [
2339 SUB II64 (OpImm (ImmInt (wORD_SIZE dflags))) (OpReg rsp),
2340 DELTA (delta - wORD_SIZE dflags) ])
2341
2342 -- push the stack args, right to left
2343 push_code <- push_args (reverse stack_args) nilOL
2344 -- On Win64, we also have to leave stack space for the arguments
2345 -- that we are passing in registers
2346 lss_code <- if platformOS platform == OSMinGW32
2347 then leaveStackSpace (length (allArgRegs platform))
2348 else return nilOL
2349 delta <- getDeltaNat
2350
2351 -- deal with static vs dynamic call targets
2352 (callinsns,_cconv) <-
2353 case target of
2354 ForeignTarget (CmmLit (CmmLabel lbl)) conv
2355 -> -- ToDo: stdcall arg sizes
2356 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
2357 where fn_imm = ImmCLbl lbl
2358 ForeignTarget expr conv
2359 -> do (dyn_r, dyn_c) <- getSomeReg expr
2360 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
2361 PrimTarget _
2362 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
2363 ++ "probably because too many return values."
2364
2365 let
2366 -- The x86_64 ABI requires us to set %al to the number of SSE2
2367 -- registers that contain arguments, if the called routine
2368 -- is a varargs function. We don't know whether it's a
2369 -- varargs function or not, so we have to assume it is.
2370 --
2371 -- It's not safe to omit this assignment, even if the number
2372 -- of SSE2 regs in use is zero. If %al is larger than 8
2373 -- on entry to a varargs function, seg faults ensue.
2374 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
2375
2376 let call = callinsns `appOL`
2377 toOL (
2378 -- Deallocate parameters after call for ccall;
2379 -- stdcall has callee do it, but is not supported on
2380 -- x86_64 target (see #3336)
2381 (if real_size==0 then [] else
2382 [ADD (intFormat (wordWidth dflags)) (OpImm (ImmInt real_size)) (OpReg esp)])
2383 ++
2384 [DELTA (delta + real_size)]
2385 )
2386 setDeltaNat (delta + real_size)
2387
2388 let
2389 -- assign the results, if necessary
2390 assign_code [] = nilOL
2391 assign_code [dest] =
2392 case typeWidth rep of
2393 W32 | isFloatType rep -> unitOL (MOV (floatFormat W32)
2394 (OpReg xmm0)
2395 (OpReg r_dest))
2396 W64 | isFloatType rep -> unitOL (MOV (floatFormat W64)
2397 (OpReg xmm0)
2398 (OpReg r_dest))
2399 _ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest))
2400 where
2401 rep = localRegType dest
2402 r_dest = getRegisterReg platform True (CmmLocal dest)
2403 assign_code _many = panic "genCCall.assign_code many"
2404
2405 return (load_args_code `appOL`
2406 adjust_rsp `appOL`
2407 push_code `appOL`
2408 lss_code `appOL`
2409 assign_eax sse_regs `appOL`
2410 call `appOL`
2411 assign_code dest_regs)
2412
2413 where platform = targetPlatform dflags
2414 arg_size = 8 -- always, at the mo
2415
2416 load_args :: [CmmExpr]
2417 -> [Reg] -- int regs avail for args
2418 -> [Reg] -- FP regs avail for args
2419 -> InstrBlock
2420 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock)
2421 load_args args [] [] code = return (args, [], [], code)
2422 -- no more regs to use
2423 load_args [] aregs fregs code = return ([], aregs, fregs, code)
2424 -- no more args to push
2425 load_args (arg : rest) aregs fregs code
2426 | isFloatType arg_rep =
2427 case fregs of
2428 [] -> push_this_arg
2429 (r:rs) -> do
2430 arg_code <- getAnyReg arg
2431 load_args rest aregs rs (code `appOL` arg_code r)
2432 | otherwise =
2433 case aregs of
2434 [] -> push_this_arg
2435 (r:rs) -> do
2436 arg_code <- getAnyReg arg
2437 load_args rest rs fregs (code `appOL` arg_code r)
2438 where
2439 arg_rep = cmmExprType dflags arg
2440
2441 push_this_arg = do
2442 (args',ars,frs,code') <- load_args rest aregs fregs code
2443 return (arg:args', ars, frs, code')
2444
2445 load_args_win :: [CmmExpr]
2446 -> [Reg] -- used int regs
2447 -> [Reg] -- used FP regs
2448 -> [(Reg, Reg)] -- (int, FP) regs avail for args
2449 -> InstrBlock
2450 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock)
2451 load_args_win args usedInt usedFP [] code
2452 = return (args, usedInt, usedFP, code)
2453 -- no more regs to use
2454 load_args_win [] usedInt usedFP _ code
2455 = return ([], usedInt, usedFP, code)
2456 -- no more args to push
2457 load_args_win (arg : rest) usedInt usedFP
2458 ((ireg, freg) : regs) code
2459 | isFloatType arg_rep = do
2460 arg_code <- getAnyReg arg
2461 load_args_win rest (ireg : usedInt) (freg : usedFP) regs
2462 (code `appOL`
2463 arg_code freg `snocOL`
2464 -- If we are calling a varargs function
2465 -- then we need to define ireg as well
2466 -- as freg
2467 MOV II64 (OpReg freg) (OpReg ireg))
2468 | otherwise = do
2469 arg_code <- getAnyReg arg
2470 load_args_win rest (ireg : usedInt) usedFP regs
2471 (code `appOL` arg_code ireg)
2472 where
2473 arg_rep = cmmExprType dflags arg
2474
2475 push_args [] code = return code
2476 push_args (arg:rest) code
2477 | isFloatType arg_rep = do
2478 (arg_reg, arg_code) <- getSomeReg arg
2479 delta <- getDeltaNat
2480 setDeltaNat (delta-arg_size)
2481 let code' = code `appOL` arg_code `appOL` toOL [
2482 SUB (intFormat (wordWidth dflags)) (OpImm (ImmInt arg_size)) (OpReg rsp),
2483 DELTA (delta-arg_size),
2484 MOV (floatFormat width) (OpReg arg_reg) (OpAddr (spRel dflags 0))]
2485 push_args rest code'
2486
2487 | otherwise = do
2488 ASSERT(width == W64) return ()
2489 (arg_op, arg_code) <- getOperand arg
2490 delta <- getDeltaNat
2491 setDeltaNat (delta-arg_size)
2492 let code' = code `appOL` arg_code `appOL` toOL [
2493 PUSH II64 arg_op,
2494 DELTA (delta-arg_size)]
2495 push_args rest code'
2496 where
2497 arg_rep = cmmExprType dflags arg
2498 width = typeWidth arg_rep
2499
2500 leaveStackSpace n = do
2501 delta <- getDeltaNat
2502 setDeltaNat (delta - n * arg_size)
2503 return $ toOL [
2504 SUB II64 (OpImm (ImmInt (n * wORD_SIZE dflags))) (OpReg rsp),
2505 DELTA (delta - n * arg_size)]
2506
2507 maybePromoteCArg :: DynFlags -> Width -> CmmExpr -> CmmExpr
2508 maybePromoteCArg dflags wto arg
2509 | wfrom < wto = CmmMachOp (MO_UU_Conv wfrom wto) [arg]
2510 | otherwise = arg
2511 where
2512 wfrom = cmmExprWidth dflags arg
2513
2514 outOfLineCmmOp :: CallishMachOp -> Maybe CmmFormal -> [CmmActual] -> NatM InstrBlock
2515 outOfLineCmmOp mop res args
2516 = do
2517 dflags <- getDynFlags
2518 targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
2519 let target = ForeignTarget targetExpr
2520 (ForeignConvention CCallConv [] [] CmmMayReturn)
2521
2522 stmtToInstrs (CmmUnsafeForeignCall target (catMaybes [res]) args)
2523 where
2524 -- Assume we can call these functions directly, and that they're not in a dynamic library.
2525 -- TODO: Why is this ok? Under linux this code will be in libm.so
2526 -- Is is because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31
2527 lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
2528
2529 fn = case mop of
2530 MO_F32_Sqrt -> fsLit "sqrtf"
2531 MO_F32_Sin -> fsLit "sinf"
2532 MO_F32_Cos -> fsLit "cosf"
2533 MO_F32_Tan -> fsLit "tanf"
2534 MO_F32_Exp -> fsLit "expf"
2535 MO_F32_Log -> fsLit "logf"
2536
2537 MO_F32_Asin -> fsLit "asinf"
2538 MO_F32_Acos -> fsLit "acosf"
2539 MO_F32_Atan -> fsLit "atanf"
2540
2541 MO_F32_Sinh -> fsLit "sinhf"
2542 MO_F32_Cosh -> fsLit "coshf"
2543 MO_F32_Tanh -> fsLit "tanhf"
2544 MO_F32_Pwr -> fsLit "powf"
2545
2546 MO_F64_Sqrt -> fsLit "sqrt"
2547 MO_F64_Sin -> fsLit "sin"
2548 MO_F64_Cos -> fsLit "cos"
2549 MO_F64_Tan -> fsLit "tan"
2550 MO_F64_Exp -> fsLit "exp"
2551 MO_F64_Log -> fsLit "log"
2552
2553 MO_F64_Asin -> fsLit "asin"
2554 MO_F64_Acos -> fsLit "acos"
2555 MO_F64_Atan -> fsLit "atan"
2556
2557 MO_F64_Sinh -> fsLit "sinh"
2558 MO_F64_Cosh -> fsLit "cosh"
2559 MO_F64_Tanh -> fsLit "tanh"
2560 MO_F64_Pwr -> fsLit "pow"
2561
2562 MO_Memcpy _ -> fsLit "memcpy"
2563 MO_Memset _ -> fsLit "memset"
2564 MO_Memmove _ -> fsLit "memmove"
2565
2566 MO_PopCnt _ -> fsLit "popcnt"
2567 MO_BSwap _ -> fsLit "bswap"
2568 MO_Clz w -> fsLit $ clzLabel w
2569 MO_Ctz _ -> unsupported
2570
2571 MO_AtomicRMW _ _ -> fsLit "atomicrmw"
2572 MO_AtomicRead _ -> fsLit "atomicread"
2573 MO_AtomicWrite _ -> fsLit "atomicwrite"
2574 MO_Cmpxchg _ -> fsLit "cmpxchg"
2575
2576 MO_UF_Conv _ -> unsupported
2577
2578 MO_S_QuotRem {} -> unsupported
2579 MO_U_QuotRem {} -> unsupported
2580 MO_U_QuotRem2 {} -> unsupported
2581 MO_Add2 {} -> unsupported
2582 MO_AddIntC {} -> unsupported
2583 MO_SubIntC {} -> unsupported
2584 MO_SubWordC {} -> unsupported
2585 MO_U_Mul2 {} -> unsupported
2586 MO_WriteBarrier -> unsupported
2587 MO_Touch -> unsupported
2588 (MO_Prefetch_Data _ ) -> unsupported
2589 unsupported = panic ("outOfLineCmmOp: " ++ show mop
2590 ++ " not supported here")
2591
2592 -- -----------------------------------------------------------------------------
2593 -- Generating a table-branch
2594
2595 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
2596
2597 genSwitch dflags expr targets
2598 | gopt Opt_PIC dflags
2599 = do
2600 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2601 lbl <- getNewLabelNat
2602 dflags <- getDynFlags
2603 let is32bit = target32Bit (targetPlatform dflags)
2604 os = platformOS (targetPlatform dflags)
2605 -- Might want to use .rodata.<function we're in> instead, but as
2606 -- long as it's something unique it'll work out since the
2607 -- references to the jump table are in the appropriate section.
2608 rosection = case os of
2609 -- on Mac OS X/x86_64, put the jump table in the text section to
2610 -- work around a limitation of the linker.
2611 -- ld64 is unable to handle the relocations for
2612 -- .quad L1 - L0
2613 -- if L0 is not preceded by a non-anonymous label in its section.
2614 OSDarwin | not is32bit -> Section Text lbl
2615 _ -> Section ReadOnlyData lbl
2616 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2617 (tableReg,t_code) <- getSomeReg $ dynRef
2618 let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
2619 (EAIndex reg (wORD_SIZE dflags)) (ImmInt 0))
2620
2621 return $ if is32bit || os == OSDarwin
2622 then e_code `appOL` t_code `appOL` toOL [
2623 ADD (intFormat (wordWidth dflags)) op (OpReg tableReg),
2624 JMP_TBL (OpReg tableReg) ids rosection lbl
2625 ]
2626 else -- HACK: On x86_64 binutils<2.17 is only able to generate
2627 -- PC32 relocations, hence we only get 32-bit offsets in
2628 -- the jump table. As these offsets are always negative
2629 -- we need to properly sign extend them to 64-bit. This
2630 -- hack should be removed in conjunction with the hack in
2631 -- PprMach.hs/pprDataItem once binutils 2.17 is standard.
2632 e_code `appOL` t_code `appOL` toOL [
2633 MOVSxL II32 op (OpReg reg),
2634 ADD (intFormat (wordWidth dflags)) (OpReg reg)
2635 (OpReg tableReg),
2636 JMP_TBL (OpReg tableReg) ids rosection lbl
2637 ]
2638 | otherwise
2639 = do
2640 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2641 lbl <- getNewLabelNat
2642 let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg (wORD_SIZE dflags)) (ImmCLbl lbl))
2643 code = e_code `appOL` toOL [
2644 JMP_TBL op ids (Section ReadOnlyData lbl) lbl
2645 ]
2646 return code
2647 where (offset, ids) = switchTargetsToTable targets
2648
2649 generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl (Alignment, CmmStatics) Instr)
2650 generateJumpTableForInstr dflags (JMP_TBL _ ids section lbl)
2651 = Just (createJumpTable dflags ids section lbl)
2652 generateJumpTableForInstr _ _ = Nothing
2653
2654 createJumpTable :: DynFlags -> [Maybe BlockId] -> Section -> CLabel
2655 -> GenCmmDecl (Alignment, CmmStatics) h g
2656 createJumpTable dflags ids section lbl
2657 = let jumpTable
2658 | gopt Opt_PIC dflags =
2659 let jumpTableEntryRel Nothing
2660 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
2661 jumpTableEntryRel (Just blockid)
2662 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
2663 where blockLabel = mkAsmTempLabel (getUnique blockid)
2664 in map jumpTableEntryRel ids
2665 | otherwise = map (jumpTableEntry dflags) ids
2666 in CmmData section (1, Statics lbl jumpTable)
2667
2668 -- -----------------------------------------------------------------------------
2669 -- 'condIntReg' and 'condFltReg': condition codes into registers
2670
2671 -- Turn those condition codes into integers now (when they appear on
2672 -- the right hand side of an assignment).
2673 --
2674 -- (If applicable) Do not fill the delay slots here; you will confuse the
2675 -- register allocator.
2676
2677 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2678
2679 condIntReg cond x y = do
2680 CondCode _ cond cond_code <- condIntCode cond x y
2681 tmp <- getNewRegNat II8
2682 let
2683 code dst = cond_code `appOL` toOL [
2684 SETCC cond (OpReg tmp),
2685 MOVZxL II8 (OpReg tmp) (OpReg dst)
2686 ]
2687 return (Any II32 code)
2688
2689
2690
2691 condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
2692 condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
2693 where
2694 condFltReg_x87 = do
2695 CondCode _ cond cond_code <- condFltCode cond x y
2696 tmp <- getNewRegNat II8
2697 let
2698 code dst = cond_code `appOL` toOL [
2699 SETCC cond (OpReg tmp),
2700 MOVZxL II8 (OpReg tmp) (OpReg dst)
2701 ]
2702 return (Any II32 code)
2703
2704 condFltReg_sse2 = do
2705 CondCode _ cond cond_code <- condFltCode cond x y
2706 tmp1 <- getNewRegNat (archWordFormat is32Bit)
2707 tmp2 <- getNewRegNat (archWordFormat is32Bit)
2708 let
2709 -- We have to worry about unordered operands (eg. comparisons
2710 -- against NaN). If the operands are unordered, the comparison
2711 -- sets the parity flag, carry flag and zero flag.
2712 -- All comparisons are supposed to return false for unordered
2713 -- operands except for !=, which returns true.
2714 --
2715 -- Optimisation: we don't have to test the parity flag if we
2716 -- know the test has already excluded the unordered case: eg >
2717 -- and >= test for a zero carry flag, which can only occur for
2718 -- ordered operands.
2719 --
2720 -- ToDo: by reversing comparisons we could avoid testing the
2721 -- parity flag in more cases.
2722
2723 code dst =
2724 cond_code `appOL`
2725 (case cond of
2726 NE -> or_unordered dst
2727 GU -> plain_test dst
2728 GEU -> plain_test dst
2729 _ -> and_ordered dst)
2730
2731 plain_test dst = toOL [
2732 SETCC cond (OpReg tmp1),
2733 MOVZxL II8 (OpReg tmp1) (OpReg dst)
2734 ]
2735 or_unordered dst = toOL [
2736 SETCC cond (OpReg tmp1),
2737 SETCC PARITY (OpReg tmp2),
2738 OR II8 (OpReg tmp1) (OpReg tmp2),
2739 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2740 ]
2741 and_ordered dst = toOL [
2742 SETCC cond (OpReg tmp1),
2743 SETCC NOTPARITY (OpReg tmp2),
2744 AND II8 (OpReg tmp1) (OpReg tmp2),
2745 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2746 ]
2747 return (Any II32 code)
2748
2749
2750 -- -----------------------------------------------------------------------------
2751 -- 'trivial*Code': deal with trivial instructions
2752
2753 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2754 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2755 -- Only look for constants on the right hand side, because that's
2756 -- where the generic optimizer will have put them.
2757
2758 -- Similarly, for unary instructions, we don't have to worry about
2759 -- matching an StInt as the argument, because genericOpt will already
2760 -- have handled the constant-folding.
2761
2762
2763 {-
2764 The Rules of the Game are:
2765
2766 * You cannot assume anything about the destination register dst;
2767 it may be anything, including a fixed reg.
2768
2769 * You may compute an operand into a fixed reg, but you may not
2770 subsequently change the contents of that fixed reg. If you
2771 want to do so, first copy the value either to a temporary
2772 or into dst. You are free to modify dst even if it happens
2773 to be a fixed reg -- that's not your problem.
2774
2775 * You cannot assume that a fixed reg will stay live over an
2776 arbitrary computation. The same applies to the dst reg.
2777
2778 * Temporary regs obtained from getNewRegNat are distinct from
2779 each other and from all other regs, and stay live over
2780 arbitrary computations.
2781
2782 --------------------
2783
2784 SDM's version of The Rules:
2785
2786 * If getRegister returns Any, that means it can generate correct
2787 code which places the result in any register, period. Even if that
2788 register happens to be read during the computation.
2789
2790 Corollary #1: this means that if you are generating code for an
2791 operation with two arbitrary operands, you cannot assign the result
2792 of the first operand into the destination register before computing
2793 the second operand. The second operand might require the old value
2794 of the destination register.
2795
2796 Corollary #2: A function might be able to generate more efficient
2797 code if it knows the destination register is a new temporary (and
2798 therefore not read by any of the sub-computations).
2799
2800 * If getRegister returns Any, then the code it generates may modify only:
2801 (a) fresh temporaries
2802 (b) the destination register
2803 (c) known registers (eg. %ecx is used by shifts)
2804 In particular, it may *not* modify global registers, unless the global
2805 register happens to be the destination register.
2806 -}
2807
2808 trivialCode :: Width -> (Operand -> Operand -> Instr)
2809 -> Maybe (Operand -> Operand -> Instr)
2810 -> CmmExpr -> CmmExpr -> NatM Register
2811 trivialCode width instr m a b
2812 = do is32Bit <- is32BitPlatform
2813 trivialCode' is32Bit width instr m a b
2814
2815 trivialCode' :: Bool -> Width -> (Operand -> Operand -> Instr)
2816 -> Maybe (Operand -> Operand -> Instr)
2817 -> CmmExpr -> CmmExpr -> NatM Register
2818 trivialCode' is32Bit width _ (Just revinstr) (CmmLit lit_a) b
2819 | is32BitLit is32Bit lit_a = do
2820 b_code <- getAnyReg b
2821 let
2822 code dst
2823 = b_code dst `snocOL`
2824 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
2825 return (Any (intFormat width) code)
2826
2827 trivialCode' _ width instr _ a b
2828 = genTrivialCode (intFormat width) instr a b
2829
2830 -- This is re-used for floating pt instructions too.
2831 genTrivialCode :: Format -> (Operand -> Operand -> Instr)
2832 -> CmmExpr -> CmmExpr -> NatM Register
2833 genTrivialCode rep instr a b = do
2834 (b_op, b_code) <- getNonClobberedOperand b
2835 a_code <- getAnyReg a
2836 tmp <- getNewRegNat rep
2837 let
2838 -- We want the value of b to stay alive across the computation of a.
2839 -- But, we want to calculate a straight into the destination register,
2840 -- because the instruction only has two operands (dst := dst `op` src).
2841 -- The troublesome case is when the result of b is in the same register
2842 -- as the destination reg. In this case, we have to save b in a
2843 -- new temporary across the computation of a.
2844 code dst
2845 | dst `regClashesWithOp` b_op =
2846 b_code `appOL`
2847 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
2848 a_code dst `snocOL`
2849 instr (OpReg tmp) (OpReg dst)
2850 | otherwise =
2851 b_code `appOL`
2852 a_code dst `snocOL`
2853 instr b_op (OpReg dst)
2854 return (Any rep code)
2855
2856 regClashesWithOp :: Reg -> Operand -> Bool
2857 reg `regClashesWithOp` OpReg reg2 = reg == reg2
2858 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
2859 _ `regClashesWithOp` _ = False
2860
2861 -----------
2862
2863 trivialUCode :: Format -> (Operand -> Instr)
2864 -> CmmExpr -> NatM Register
2865 trivialUCode rep instr x = do
2866 x_code <- getAnyReg x
2867 let
2868 code dst =
2869 x_code dst `snocOL`
2870 instr (OpReg dst)
2871 return (Any rep code)
2872
2873 -----------
2874
2875 trivialFCode_x87 :: (Format -> Reg -> Reg -> Reg -> Instr)
2876 -> CmmExpr -> CmmExpr -> NatM Register
2877 trivialFCode_x87 instr x y = do
2878 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
2879 (y_reg, y_code) <- getSomeReg y
2880 let
2881 format = FF80 -- always, on x87
2882 code dst =
2883 x_code `appOL`
2884 y_code `snocOL`
2885 instr format x_reg y_reg dst
2886 return (Any format code)
2887
2888 trivialFCode_sse2 :: Width -> (Format -> Operand -> Operand -> Instr)
2889 -> CmmExpr -> CmmExpr -> NatM Register
2890 trivialFCode_sse2 pk instr x y
2891 = genTrivialCode format (instr format) x y
2892 where format = floatFormat pk
2893
2894
2895 trivialUFCode :: Format -> (Reg -> Reg -> Instr) -> CmmExpr -> NatM Register
2896 trivialUFCode format instr x = do
2897 (x_reg, x_code) <- getSomeReg x
2898 let
2899 code dst =
2900 x_code `snocOL`
2901 instr x_reg dst
2902 return (Any format code)
2903
2904
2905 --------------------------------------------------------------------------------
2906 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2907 coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
2908 where
2909 coerce_x87 = do
2910 (x_reg, x_code) <- getSomeReg x
2911 let
2912 opc = case to of W32 -> GITOF; W64 -> GITOD;
2913 n -> panic $ "coerceInt2FP.x87: unhandled width ("
2914 ++ show n ++ ")"
2915 code dst = x_code `snocOL` opc x_reg dst
2916 -- ToDo: works for non-II32 reps?
2917 return (Any FF80 code)
2918
2919 coerce_sse2 = do
2920 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2921 let
2922 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
2923 n -> panic $ "coerceInt2FP.sse: unhandled width ("
2924 ++ show n ++ ")"
2925 code dst = x_code `snocOL` opc (intFormat from) x_op dst
2926 return (Any (floatFormat to) code)
2927 -- works even if the destination rep is <II32
2928
2929 --------------------------------------------------------------------------------
2930 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2931 coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
2932 where
2933 coerceFP2Int_x87 = do
2934 (x_reg, x_code) <- getSomeReg x
2935 let
2936 opc = case from of W32 -> GFTOI; W64 -> GDTOI
2937 n -> panic $ "coerceFP2Int.x87: unhandled width ("
2938 ++ show n ++ ")"
2939 code dst = x_code `snocOL` opc x_reg dst
2940 -- ToDo: works for non-II32 reps?
2941 return (Any (intFormat to) code)
2942
2943 coerceFP2Int_sse2 = do
2944 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2945 let
2946 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ;
2947 n -> panic $ "coerceFP2Init.sse: unhandled width ("
2948 ++ show n ++ ")"
2949 code dst = x_code `snocOL` opc (intFormat to) x_op dst
2950 return (Any (intFormat to) code)
2951 -- works even if the destination rep is <II32
2952
2953
2954 --------------------------------------------------------------------------------
2955 coerceFP2FP :: Width -> CmmExpr -> NatM Register
2956 coerceFP2FP to x = do
2957 use_sse2 <- sse2Enabled
2958 (x_reg, x_code) <- getSomeReg x
2959 let
2960 opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
2961 n -> panic $ "coerceFP2FP: unhandled width ("
2962 ++ show n ++ ")"
2963 | otherwise = GDTOF
2964 code dst = x_code `snocOL` opc x_reg dst
2965 return (Any (if use_sse2 then floatFormat to else FF80) code)
2966
2967 --------------------------------------------------------------------------------
2968
2969 sse2NegCode :: Width -> CmmExpr -> NatM Register
2970 sse2NegCode w x = do
2971 let fmt = floatFormat w
2972 x_code <- getAnyReg x
2973 -- This is how gcc does it, so it can't be that bad:
2974 let
2975 const | FF32 <- fmt = CmmInt 0x80000000 W32
2976 | otherwise = CmmInt 0x8000000000000000 W64
2977 Amode amode amode_code <- memConstant (widthInBytes w) const
2978 tmp <- getNewRegNat fmt
2979 let
2980 code dst = x_code dst `appOL` amode_code `appOL` toOL [
2981 MOV fmt (OpAddr amode) (OpReg tmp),
2982 XOR fmt (OpReg tmp) (OpReg dst)
2983 ]
2984 --
2985 return (Any fmt code)
2986
2987 isVecExpr :: CmmExpr -> Bool
2988 isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True
2989 isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True
2990 isVecExpr (CmmMachOp (MO_V_Add {}) _) = True
2991 isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True
2992 isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True
2993 isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True
2994 isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True
2995 isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True
2996 isVecExpr (CmmMachOp (MO_VF_Insert {}) _) = True
2997 isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True
2998 isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
2999 isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True
3000 isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True
3001 isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True
3002 isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True
3003 isVecExpr (CmmMachOp _ [e]) = isVecExpr e
3004 isVecExpr _ = False
3005
3006 needLlvm :: NatM a
3007 needLlvm =
3008 sorry $ unlines ["The native code generator does not support vector"
3009 ,"instructions. Please use -fllvm."]