Generalize CmmUnwind and pass unwind information through NCG
[ghc.git] / compiler / nativeGen / X86 / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-}
2
3 -- The default iteration limit is a bit too low for the definitions
4 -- in this module.
5 #if __GLASGOW_HASKELL__ >= 800
6 {-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-}
7 #endif
8
9 -----------------------------------------------------------------------------
10 --
11 -- Generating machine code (instruction selection)
12 --
13 -- (c) The University of Glasgow 1996-2004
14 --
15 -----------------------------------------------------------------------------
16
17 -- This is a big module, but, if you pay attention to
18 -- (a) the sectioning, and (b) the type signatures, the
19 -- structure should not be too overwhelming.
20
21 module X86.CodeGen (
22 cmmTopCodeGen,
23 generateJumpTableForInstr,
24 extractUnwindPoints,
25 InstrBlock
26 )
27
28 where
29
30 #include "HsVersions.h"
31 #include "nativeGen/NCG.h"
32 #include "../includes/MachDeps.h"
33
34 -- NCG stuff:
35 import X86.Instr
36 import X86.Cond
37 import X86.Regs
38 import X86.RegInfo
39 import CodeGen.Platform
40 import CPrim
41 import Debug ( DebugBlock(..), UnwindPoint(..), UnwindTable
42 , UnwindExpr(UwReg), toUnwindExpr )
43 import Instruction
44 import PIC
45 import NCGMonad
46 import Format
47 import Reg
48 import Platform
49
50 -- Our intermediate code:
51 import BasicTypes
52 import BlockId
53 import Module ( primUnitId )
54 import PprCmm ()
55 import CmmUtils
56 import CmmSwitch
57 import Cmm
58 import Hoopl
59 import CLabel
60 import CoreSyn ( Tickish(..) )
61 import SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
62
63 -- The rest:
64 import ForeignCall ( CCallConv(..) )
65 import OrdList
66 import Outputable
67 import Unique
68 import FastString
69 import DynFlags
70 import Util
71
72 import Control.Monad
73 import Data.Bits
74 import Data.Foldable (fold)
75 import Data.Int
76 import Data.Maybe
77 import Data.Word
78
79 import qualified Data.Map as M
80
81 is32BitPlatform :: NatM Bool
82 is32BitPlatform = do
83 dflags <- getDynFlags
84 return $ target32Bit (targetPlatform dflags)
85
86 sse2Enabled :: NatM Bool
87 sse2Enabled = do
88 dflags <- getDynFlags
89 return (isSse2Enabled dflags)
90
91 sse4_2Enabled :: NatM Bool
92 sse4_2Enabled = do
93 dflags <- getDynFlags
94 return (isSse4_2Enabled dflags)
95
96 if_sse2 :: NatM a -> NatM a -> NatM a
97 if_sse2 sse2 x87 = do
98 b <- sse2Enabled
99 if b then sse2 else x87
100
101 cmmTopCodeGen
102 :: RawCmmDecl
103 -> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
104
105 cmmTopCodeGen (CmmProc info lab live graph) = do
106 let blocks = toBlockListEntryFirst graph
107 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
108 picBaseMb <- getPicBaseMaybeNat
109 dflags <- getDynFlags
110 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
111 tops = proc : concat statics
112 os = platformOS $ targetPlatform dflags
113
114 case picBaseMb of
115 Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
116 Nothing -> return tops
117
118 cmmTopCodeGen (CmmData sec dat) = do
119 return [CmmData sec (1, dat)] -- no translation, we just use CmmStatic
120
121
122 basicBlockCodeGen
123 :: CmmBlock
124 -> NatM ( [NatBasicBlock Instr]
125 , [NatCmmDecl (Alignment, CmmStatics) Instr])
126
127 basicBlockCodeGen block = do
128 let (_, nodes, tail) = blockSplit block
129 id = entryLabel block
130 stmts = blockToList nodes
131 -- Generate location directive
132 dbg <- getDebugBlock (entryLabel block)
133 loc_instrs <- case dblSourceTick =<< dbg of
134 Just (SourceNote span name)
135 -> do fileId <- getFileId (srcSpanFile span)
136 let line = srcSpanStartLine span; col = srcSpanStartCol span
137 return $ unitOL $ LOCATION fileId line col name
138 _ -> return nilOL
139 mid_instrs <- stmtsToInstrs stmts
140 tail_instrs <- stmtToInstrs tail
141 let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
142 instrs' <- fold <$> traverse addSpUnwindings instrs
143 -- code generation may introduce new basic block boundaries, which
144 -- are indicated by the NEWBLOCK instruction. We must split up the
145 -- instruction stream into basic blocks again. Also, we extract
146 -- LDATAs here too.
147 let
148 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs'
149
150 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
151 = ([], BasicBlock id instrs : blocks, statics)
152 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
153 = (instrs, blocks, CmmData sec dat:statics)
154 mkBlocks instr (instrs,blocks,statics)
155 = (instr:instrs, blocks, statics)
156 return (BasicBlock id top : other_blocks, statics)
157
158 -- | Convert 'DELTA' instructions into 'UNWIND' instructions to capture changes
159 -- in the @sp@ register. See Note [What is this unwinding business?] in Debug
160 -- for details.
161 addSpUnwindings :: Instr -> NatM (OrdList Instr)
162 addSpUnwindings instr@(DELTA d) = do
163 dflags <- getDynFlags
164 if debugLevel dflags >= 1
165 then do lbl <- newBlockId
166 let unwind = M.singleton MachSp (UwReg MachSp $ negate d)
167 return $ toOL [ instr, UNWIND lbl unwind ]
168 else return (unitOL instr)
169 addSpUnwindings instr = return $ unitOL instr
170
171 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
172 stmtsToInstrs stmts
173 = do instrss <- mapM stmtToInstrs stmts
174 return (concatOL instrss)
175
176
177 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
178 stmtToInstrs stmt = do
179 dflags <- getDynFlags
180 is32Bit <- is32BitPlatform
181 case stmt of
182 CmmComment s -> return (unitOL (COMMENT s))
183 CmmTick {} -> return nilOL
184
185 CmmUnwind regs -> do
186 let to_unwind_entry :: (GlobalReg, CmmExpr) -> UnwindTable
187 to_unwind_entry (reg, expr) = M.singleton reg (toUnwindExpr expr)
188 case foldMap to_unwind_entry regs of
189 tbl | M.null tbl -> return nilOL
190 | otherwise -> do
191 lbl <- newBlockId
192 return $ unitOL $ UNWIND lbl tbl
193
194 CmmAssign reg src
195 | isFloatType ty -> assignReg_FltCode format reg src
196 | is32Bit && isWord64 ty -> assignReg_I64Code reg src
197 | otherwise -> assignReg_IntCode format reg src
198 where ty = cmmRegType dflags reg
199 format = cmmTypeFormat ty
200
201 CmmStore addr src
202 | isFloatType ty -> assignMem_FltCode format addr src
203 | is32Bit && isWord64 ty -> assignMem_I64Code addr src
204 | otherwise -> assignMem_IntCode format addr src
205 where ty = cmmExprType dflags src
206 format = cmmTypeFormat ty
207
208 CmmUnsafeForeignCall target result_regs args
209 -> genCCall dflags is32Bit target result_regs args
210
211 CmmBranch id -> genBranch id
212 CmmCondBranch arg true false _ -> do
213 b1 <- genCondJump true arg
214 b2 <- genBranch false
215 return (b1 `appOL` b2)
216 CmmSwitch arg ids -> do dflags <- getDynFlags
217 genSwitch dflags arg ids
218 CmmCall { cml_target = arg
219 , cml_args_regs = gregs } -> do
220 dflags <- getDynFlags
221 genJump arg (jumpRegs dflags gregs)
222 _ ->
223 panic "stmtToInstrs: statement should have been cps'd away"
224
225
226 jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
227 jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
228 where platform = targetPlatform dflags
229
230 --------------------------------------------------------------------------------
231 -- | 'InstrBlock's are the insn sequences generated by the insn selectors.
232 -- They are really trees of insns to facilitate fast appending, where a
233 -- left-to-right traversal yields the insns in the correct order.
234 --
235 type InstrBlock
236 = OrdList Instr
237
238
239 -- | Condition codes passed up the tree.
240 --
241 data CondCode
242 = CondCode Bool Cond InstrBlock
243
244
245 -- | a.k.a "Register64"
246 -- Reg is the lower 32-bit temporary which contains the result.
247 -- Use getHiVRegFromLo to find the other VRegUnique.
248 --
249 -- Rules of this simplified insn selection game are therefore that
250 -- the returned Reg may be modified
251 --
252 data ChildCode64
253 = ChildCode64
254 InstrBlock
255 Reg
256
257
258 -- | Register's passed up the tree. If the stix code forces the register
259 -- to live in a pre-decided machine register, it comes out as @Fixed@;
260 -- otherwise, it comes out as @Any@, and the parent can decide which
261 -- register to put it in.
262 --
263 data Register
264 = Fixed Format Reg InstrBlock
265 | Any Format (Reg -> InstrBlock)
266
267
268 swizzleRegisterRep :: Register -> Format -> Register
269 swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
270 swizzleRegisterRep (Any _ codefn) format = Any format codefn
271
272
273 -- | Grab the Reg for a CmmReg
274 getRegisterReg :: Platform -> Bool -> CmmReg -> Reg
275
276 getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk))
277 = let fmt = cmmTypeFormat pk in
278 if isFloatFormat fmt && not use_sse2
279 then RegVirtual (mkVirtualReg u FF80)
280 else RegVirtual (mkVirtualReg u fmt)
281
282 getRegisterReg platform _ (CmmGlobal mid)
283 = case globalRegMaybe platform mid of
284 Just reg -> RegReal $ reg
285 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
286 -- By this stage, the only MagicIds remaining should be the
287 -- ones which map to a real machine register on this
288 -- platform. Hence ...
289
290
291 -- | Memory addressing modes passed up the tree.
292 data Amode
293 = Amode AddrMode InstrBlock
294
295 {-
296 Now, given a tree (the argument to an CmmLoad) that references memory,
297 produce a suitable addressing mode.
298
299 A Rule of the Game (tm) for Amodes: use of the addr bit must
300 immediately follow use of the code part, since the code part puts
301 values in registers which the addr then refers to. So you can't put
302 anything in between, lest it overwrite some of those registers. If
303 you need to do some other computation between the code part and use of
304 the addr bit, first store the effective address from the amode in a
305 temporary, then do the other computation, and then use the temporary:
306
307 code
308 LEA amode, tmp
309 ... other computation ...
310 ... (tmp) ...
311 -}
312
313
314 -- | Check whether an integer will fit in 32 bits.
315 -- A CmmInt is intended to be truncated to the appropriate
316 -- number of bits, so here we truncate it to Int64. This is
317 -- important because e.g. -1 as a CmmInt might be either
318 -- -1 or 18446744073709551615.
319 --
320 is32BitInteger :: Integer -> Bool
321 is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
322 where i64 = fromIntegral i :: Int64
323
324
325 -- | Convert a BlockId to some CmmStatic data
326 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
327 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
328 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
329 where blockLabel = mkAsmTempLabel (getUnique blockid)
330
331
332 -- -----------------------------------------------------------------------------
333 -- General things for putting together code sequences
334
335 -- Expand CmmRegOff. ToDo: should we do it this way around, or convert
336 -- CmmExprs into CmmRegOff?
337 mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
338 mangleIndexTree dflags reg off
339 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
340 where width = typeWidth (cmmRegType dflags reg)
341
342 -- | The dual to getAnyReg: compute an expression into a register, but
343 -- we don't mind which one it is.
344 getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
345 getSomeReg expr = do
346 r <- getRegister expr
347 case r of
348 Any rep code -> do
349 tmp <- getNewRegNat rep
350 return (tmp, code tmp)
351 Fixed _ reg code ->
352 return (reg, code)
353
354
355 assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
356 assignMem_I64Code addrTree valueTree = do
357 Amode addr addr_code <- getAmode addrTree
358 ChildCode64 vcode rlo <- iselExpr64 valueTree
359 let
360 rhi = getHiVRegFromLo rlo
361
362 -- Little-endian store
363 mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
364 mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
365 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
366
367
368 assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
369 assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
370 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
371 let
372 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
373 r_dst_hi = getHiVRegFromLo r_dst_lo
374 r_src_hi = getHiVRegFromLo r_src_lo
375 mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
376 mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
377 return (
378 vcode `snocOL` mov_lo `snocOL` mov_hi
379 )
380
381 assignReg_I64Code _ _
382 = panic "assignReg_I64Code(i386): invalid lvalue"
383
384
385 iselExpr64 :: CmmExpr -> NatM ChildCode64
386 iselExpr64 (CmmLit (CmmInt i _)) = do
387 (rlo,rhi) <- getNewRegPairNat II32
388 let
389 r = fromIntegral (fromIntegral i :: Word32)
390 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
391 code = toOL [
392 MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
393 MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
394 ]
395 return (ChildCode64 code rlo)
396
397 iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
398 Amode addr addr_code <- getAmode addrTree
399 (rlo,rhi) <- getNewRegPairNat II32
400 let
401 mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
402 mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
403 return (
404 ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
405 rlo
406 )
407
408 iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
409 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
410
411 -- we handle addition, but rather badly
412 iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
413 ChildCode64 code1 r1lo <- iselExpr64 e1
414 (rlo,rhi) <- getNewRegPairNat II32
415 let
416 r = fromIntegral (fromIntegral i :: Word32)
417 q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
418 r1hi = getHiVRegFromLo r1lo
419 code = code1 `appOL`
420 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
421 ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
422 MOV II32 (OpReg r1hi) (OpReg rhi),
423 ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
424 return (ChildCode64 code rlo)
425
426 iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
427 ChildCode64 code1 r1lo <- iselExpr64 e1
428 ChildCode64 code2 r2lo <- iselExpr64 e2
429 (rlo,rhi) <- getNewRegPairNat II32
430 let
431 r1hi = getHiVRegFromLo r1lo
432 r2hi = getHiVRegFromLo r2lo
433 code = code1 `appOL`
434 code2 `appOL`
435 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
436 ADD II32 (OpReg r2lo) (OpReg rlo),
437 MOV II32 (OpReg r1hi) (OpReg rhi),
438 ADC II32 (OpReg r2hi) (OpReg rhi) ]
439 return (ChildCode64 code rlo)
440
441 iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
442 ChildCode64 code1 r1lo <- iselExpr64 e1
443 ChildCode64 code2 r2lo <- iselExpr64 e2
444 (rlo,rhi) <- getNewRegPairNat II32
445 let
446 r1hi = getHiVRegFromLo r1lo
447 r2hi = getHiVRegFromLo r2lo
448 code = code1 `appOL`
449 code2 `appOL`
450 toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
451 SUB II32 (OpReg r2lo) (OpReg rlo),
452 MOV II32 (OpReg r1hi) (OpReg rhi),
453 SBB II32 (OpReg r2hi) (OpReg rhi) ]
454 return (ChildCode64 code rlo)
455
456 iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
457 fn <- getAnyReg expr
458 r_dst_lo <- getNewRegNat II32
459 let r_dst_hi = getHiVRegFromLo r_dst_lo
460 code = fn r_dst_lo
461 return (
462 ChildCode64 (code `snocOL`
463 MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
464 r_dst_lo
465 )
466
467 iselExpr64 expr
468 = pprPanic "iselExpr64(i386)" (ppr expr)
469
470
471 --------------------------------------------------------------------------------
472 getRegister :: CmmExpr -> NatM Register
473 getRegister e = do dflags <- getDynFlags
474 is32Bit <- is32BitPlatform
475 getRegister' dflags is32Bit e
476
477 getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
478
479 getRegister' dflags is32Bit (CmmReg reg)
480 = case reg of
481 CmmGlobal PicBaseReg
482 | is32Bit ->
483 -- on x86_64, we have %rip for PicBaseReg, but it's not
484 -- a full-featured register, it can only be used for
485 -- rip-relative addressing.
486 do reg' <- getPicBaseNat (archWordFormat is32Bit)
487 return (Fixed (archWordFormat is32Bit) reg' nilOL)
488 _ ->
489 do use_sse2 <- sse2Enabled
490 let
491 fmt = cmmTypeFormat (cmmRegType dflags reg)
492 format | not use_sse2 && isFloatFormat fmt = FF80
493 | otherwise = fmt
494 --
495 let platform = targetPlatform dflags
496 return (Fixed format
497 (getRegisterReg platform use_sse2 reg)
498 nilOL)
499
500
501 getRegister' dflags is32Bit (CmmRegOff r n)
502 = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
503
504 -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
505 -- TO_W_(x), TO_W_(x >> 32)
506
507 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
508 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
509 | is32Bit = do
510 ChildCode64 code rlo <- iselExpr64 x
511 return $ Fixed II32 (getHiVRegFromLo rlo) code
512
513 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
514 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
515 | is32Bit = do
516 ChildCode64 code rlo <- iselExpr64 x
517 return $ Fixed II32 (getHiVRegFromLo rlo) code
518
519 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
520 | is32Bit = do
521 ChildCode64 code rlo <- iselExpr64 x
522 return $ Fixed II32 rlo code
523
524 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
525 | is32Bit = do
526 ChildCode64 code rlo <- iselExpr64 x
527 return $ Fixed II32 rlo code
528
529 getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
530 if_sse2 float_const_sse2 float_const_x87
531 where
532 float_const_sse2
533 | f == 0.0 = do
534 let
535 format = floatFormat w
536 code dst = unitOL (XOR format (OpReg dst) (OpReg dst))
537 -- I don't know why there are xorpd, xorps, and pxor instructions.
538 -- They all appear to do the same thing --SDM
539 return (Any format code)
540
541 | otherwise = do
542 Amode addr code <- memConstant (widthInBytes w) lit
543 loadFloatAmode True w addr code
544
545 float_const_x87 = case w of
546 W64
547 | f == 0.0 ->
548 let code dst = unitOL (GLDZ dst)
549 in return (Any FF80 code)
550
551 | f == 1.0 ->
552 let code dst = unitOL (GLD1 dst)
553 in return (Any FF80 code)
554
555 _otherwise -> do
556 Amode addr code <- memConstant (widthInBytes w) lit
557 loadFloatAmode False w addr code
558
559 -- catch simple cases of zero- or sign-extended load
560 getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
561 code <- intLoadCode (MOVZxL II8) addr
562 return (Any II32 code)
563
564 getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
565 code <- intLoadCode (MOVSxL II8) addr
566 return (Any II32 code)
567
568 getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
569 code <- intLoadCode (MOVZxL II16) addr
570 return (Any II32 code)
571
572 getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
573 code <- intLoadCode (MOVSxL II16) addr
574 return (Any II32 code)
575
576 -- catch simple cases of zero- or sign-extended load
577 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
578 | not is32Bit = do
579 code <- intLoadCode (MOVZxL II8) addr
580 return (Any II64 code)
581
582 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
583 | not is32Bit = do
584 code <- intLoadCode (MOVSxL II8) addr
585 return (Any II64 code)
586
587 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
588 | not is32Bit = do
589 code <- intLoadCode (MOVZxL II16) addr
590 return (Any II64 code)
591
592 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
593 | not is32Bit = do
594 code <- intLoadCode (MOVSxL II16) addr
595 return (Any II64 code)
596
597 getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
598 | not is32Bit = do
599 code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
600 return (Any II64 code)
601
602 getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
603 | not is32Bit = do
604 code <- intLoadCode (MOVSxL II32) addr
605 return (Any II64 code)
606
607 getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
608 CmmLit displacement])
609 | not is32Bit = do
610 return $ Any II64 (\dst -> unitOL $
611 LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
612
613 getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
614 sse2 <- sse2Enabled
615 case mop of
616 MO_F_Neg w
617 | sse2 -> sse2NegCode w x
618 | otherwise -> trivialUFCode FF80 (GNEG FF80) x
619
620 MO_S_Neg w -> triv_ucode NEGI (intFormat w)
621 MO_Not w -> triv_ucode NOT (intFormat w)
622
623 -- Nop conversions
624 MO_UU_Conv W32 W8 -> toI8Reg W32 x
625 MO_SS_Conv W32 W8 -> toI8Reg W32 x
626 MO_UU_Conv W16 W8 -> toI8Reg W16 x
627 MO_SS_Conv W16 W8 -> toI8Reg W16 x
628 MO_UU_Conv W32 W16 -> toI16Reg W32 x
629 MO_SS_Conv W32 W16 -> toI16Reg W32 x
630
631 MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
632 MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
633 MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
634 MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
635 MO_UU_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
636 MO_SS_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
637
638 MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
639 MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
640
641 -- widenings
642 MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
643 MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
644 MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
645
646 MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
647 MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
648 MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
649
650 MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x
651 MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
652 MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
653 MO_SS_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVSxL x
654 MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
655 MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
656 -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
657 -- However, we don't want the register allocator to throw it
658 -- away as an unnecessary reg-to-reg move, so we keep it in
659 -- the form of a movzl and print it as a movl later.
660
661 MO_FF_Conv W32 W64
662 | sse2 -> coerceFP2FP W64 x
663 | otherwise -> conversionNop FF80 x
664
665 MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
666
667 MO_FS_Conv from to -> coerceFP2Int from to x
668 MO_SF_Conv from to -> coerceInt2FP from to x
669
670 MO_V_Insert {} -> needLlvm
671 MO_V_Extract {} -> needLlvm
672 MO_V_Add {} -> needLlvm
673 MO_V_Sub {} -> needLlvm
674 MO_V_Mul {} -> needLlvm
675 MO_VS_Quot {} -> needLlvm
676 MO_VS_Rem {} -> needLlvm
677 MO_VS_Neg {} -> needLlvm
678 MO_VU_Quot {} -> needLlvm
679 MO_VU_Rem {} -> needLlvm
680 MO_VF_Insert {} -> needLlvm
681 MO_VF_Extract {} -> needLlvm
682 MO_VF_Add {} -> needLlvm
683 MO_VF_Sub {} -> needLlvm
684 MO_VF_Mul {} -> needLlvm
685 MO_VF_Quot {} -> needLlvm
686 MO_VF_Neg {} -> needLlvm
687
688 _other -> pprPanic "getRegister" (pprMachOp mop)
689 where
690 triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
691 triv_ucode instr format = trivialUCode format (instr format) x
692
693 -- signed or unsigned extension.
694 integerExtend :: Width -> Width
695 -> (Format -> Operand -> Operand -> Instr)
696 -> CmmExpr -> NatM Register
697 integerExtend from to instr expr = do
698 (reg,e_code) <- if from == W8 then getByteReg expr
699 else getSomeReg expr
700 let
701 code dst =
702 e_code `snocOL`
703 instr (intFormat from) (OpReg reg) (OpReg dst)
704 return (Any (intFormat to) code)
705
706 toI8Reg :: Width -> CmmExpr -> NatM Register
707 toI8Reg new_rep expr
708 = do codefn <- getAnyReg expr
709 return (Any (intFormat new_rep) codefn)
710 -- HACK: use getAnyReg to get a byte-addressable register.
711 -- If the source was a Fixed register, this will add the
712 -- mov instruction to put it into the desired destination.
713 -- We're assuming that the destination won't be a fixed
714 -- non-byte-addressable register; it won't be, because all
715 -- fixed registers are word-sized.
716
717 toI16Reg = toI8Reg -- for now
718
719 conversionNop :: Format -> CmmExpr -> NatM Register
720 conversionNop new_format expr
721 = do e_code <- getRegister' dflags is32Bit expr
722 return (swizzleRegisterRep e_code new_format)
723
724
725 getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
726 sse2 <- sse2Enabled
727 case mop of
728 MO_F_Eq _ -> condFltReg is32Bit EQQ x y
729 MO_F_Ne _ -> condFltReg is32Bit NE x y
730 MO_F_Gt _ -> condFltReg is32Bit GTT x y
731 MO_F_Ge _ -> condFltReg is32Bit GE x y
732 MO_F_Lt _ -> condFltReg is32Bit LTT x y
733 MO_F_Le _ -> condFltReg is32Bit LE x y
734
735 MO_Eq _ -> condIntReg EQQ x y
736 MO_Ne _ -> condIntReg NE x y
737
738 MO_S_Gt _ -> condIntReg GTT x y
739 MO_S_Ge _ -> condIntReg GE x y
740 MO_S_Lt _ -> condIntReg LTT x y
741 MO_S_Le _ -> condIntReg LE x y
742
743 MO_U_Gt _ -> condIntReg GU x y
744 MO_U_Ge _ -> condIntReg GEU x y
745 MO_U_Lt _ -> condIntReg LU x y
746 MO_U_Le _ -> condIntReg LEU x y
747
748 MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y
749 | otherwise -> trivialFCode_x87 GADD x y
750 MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y
751 | otherwise -> trivialFCode_x87 GSUB x y
752 MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y
753 | otherwise -> trivialFCode_x87 GDIV x y
754 MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y
755 | otherwise -> trivialFCode_x87 GMUL x y
756
757 MO_Add rep -> add_code rep x y
758 MO_Sub rep -> sub_code rep x y
759
760 MO_S_Quot rep -> div_code rep True True x y
761 MO_S_Rem rep -> div_code rep True False x y
762 MO_U_Quot rep -> div_code rep False True x y
763 MO_U_Rem rep -> div_code rep False False x y
764
765 MO_S_MulMayOflo rep -> imulMayOflo rep x y
766
767 MO_Mul rep -> triv_op rep IMUL
768 MO_And rep -> triv_op rep AND
769 MO_Or rep -> triv_op rep OR
770 MO_Xor rep -> triv_op rep XOR
771
772 {- Shift ops on x86s have constraints on their source, it
773 either has to be Imm, CL or 1
774 => trivialCode is not restrictive enough (sigh.)
775 -}
776 MO_Shl rep -> shift_code rep SHL x y {-False-}
777 MO_U_Shr rep -> shift_code rep SHR x y {-False-}
778 MO_S_Shr rep -> shift_code rep SAR x y {-False-}
779
780 MO_V_Insert {} -> needLlvm
781 MO_V_Extract {} -> needLlvm
782 MO_V_Add {} -> needLlvm
783 MO_V_Sub {} -> needLlvm
784 MO_V_Mul {} -> needLlvm
785 MO_VS_Quot {} -> needLlvm
786 MO_VS_Rem {} -> needLlvm
787 MO_VS_Neg {} -> needLlvm
788 MO_VF_Insert {} -> needLlvm
789 MO_VF_Extract {} -> needLlvm
790 MO_VF_Add {} -> needLlvm
791 MO_VF_Sub {} -> needLlvm
792 MO_VF_Mul {} -> needLlvm
793 MO_VF_Quot {} -> needLlvm
794 MO_VF_Neg {} -> needLlvm
795
796 _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
797 where
798 --------------------
799 triv_op width instr = trivialCode width op (Just op) x y
800 where op = instr (intFormat width)
801
802 imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
803 imulMayOflo rep a b = do
804 (a_reg, a_code) <- getNonClobberedReg a
805 b_code <- getAnyReg b
806 let
807 shift_amt = case rep of
808 W32 -> 31
809 W64 -> 63
810 _ -> panic "shift_amt"
811
812 format = intFormat rep
813 code = a_code `appOL` b_code eax `appOL`
814 toOL [
815 IMUL2 format (OpReg a_reg), -- result in %edx:%eax
816 SAR format (OpImm (ImmInt shift_amt)) (OpReg eax),
817 -- sign extend lower part
818 SUB format (OpReg edx) (OpReg eax)
819 -- compare against upper
820 -- eax==0 if high part == sign extended low part
821 ]
822 return (Fixed format eax code)
823
824 --------------------
825 shift_code :: Width
826 -> (Format -> Operand -> Operand -> Instr)
827 -> CmmExpr
828 -> CmmExpr
829 -> NatM Register
830
831 {- Case1: shift length as immediate -}
832 shift_code width instr x (CmmLit lit) = do
833 x_code <- getAnyReg x
834 let
835 format = intFormat width
836 code dst
837 = x_code dst `snocOL`
838 instr format (OpImm (litToImm lit)) (OpReg dst)
839 return (Any format code)
840
841 {- Case2: shift length is complex (non-immediate)
842 * y must go in %ecx.
843 * we cannot do y first *and* put its result in %ecx, because
844 %ecx might be clobbered by x.
845 * if we do y second, then x cannot be
846 in a clobbered reg. Also, we cannot clobber x's reg
847 with the instruction itself.
848 * so we can either:
849 - do y first, put its result in a fresh tmp, then copy it to %ecx later
850 - do y second and put its result into %ecx. x gets placed in a fresh
851 tmp. This is likely to be better, because the reg alloc can
852 eliminate this reg->reg move here (it won't eliminate the other one,
853 because the move is into the fixed %ecx).
854 -}
855 shift_code width instr x y{-amount-} = do
856 x_code <- getAnyReg x
857 let format = intFormat width
858 tmp <- getNewRegNat format
859 y_code <- getAnyReg y
860 let
861 code = x_code tmp `appOL`
862 y_code ecx `snocOL`
863 instr format (OpReg ecx) (OpReg tmp)
864 return (Fixed format tmp code)
865
866 --------------------
867 add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
868 add_code rep x (CmmLit (CmmInt y _))
869 | is32BitInteger y = add_int rep x y
870 add_code rep x y = trivialCode rep (ADD format) (Just (ADD format)) x y
871 where format = intFormat rep
872 -- TODO: There are other interesting patterns we want to replace
873 -- with a LEA, e.g. `(x + offset) + (y << shift)`.
874
875 --------------------
876 sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
877 sub_code rep x (CmmLit (CmmInt y _))
878 | is32BitInteger (-y) = add_int rep x (-y)
879 sub_code rep x y = trivialCode rep (SUB (intFormat rep)) Nothing x y
880
881 -- our three-operand add instruction:
882 add_int width x y = do
883 (x_reg, x_code) <- getSomeReg x
884 let
885 format = intFormat width
886 imm = ImmInt (fromInteger y)
887 code dst
888 = x_code `snocOL`
889 LEA format
890 (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
891 (OpReg dst)
892 --
893 return (Any format code)
894
895 ----------------------
896 div_code width signed quotient x y = do
897 (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
898 x_code <- getAnyReg x
899 let
900 format = intFormat width
901 widen | signed = CLTD format
902 | otherwise = XOR format (OpReg edx) (OpReg edx)
903
904 instr | signed = IDIV
905 | otherwise = DIV
906
907 code = y_code `appOL`
908 x_code eax `appOL`
909 toOL [widen, instr format y_op]
910
911 result | quotient = eax
912 | otherwise = edx
913
914 return (Fixed format result code)
915
916
917 getRegister' _ _ (CmmLoad mem pk)
918 | isFloatType pk
919 = do
920 Amode addr mem_code <- getAmode mem
921 use_sse2 <- sse2Enabled
922 loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
923
924 getRegister' _ is32Bit (CmmLoad mem pk)
925 | is32Bit && not (isWord64 pk)
926 = do
927 code <- intLoadCode instr mem
928 return (Any format code)
929 where
930 width = typeWidth pk
931 format = intFormat width
932 instr = case width of
933 W8 -> MOVZxL II8
934 _other -> MOV format
935 -- We always zero-extend 8-bit loads, if we
936 -- can't think of anything better. This is because
937 -- we can't guarantee access to an 8-bit variant of every register
938 -- (esi and edi don't have 8-bit variants), so to make things
939 -- simpler we do our 8-bit arithmetic with full 32-bit registers.
940
941 -- Simpler memory load code on x86_64
942 getRegister' _ is32Bit (CmmLoad mem pk)
943 | not is32Bit
944 = do
945 code <- intLoadCode (MOV format) mem
946 return (Any format code)
947 where format = intFormat $ typeWidth pk
948
949 getRegister' _ is32Bit (CmmLit (CmmInt 0 width))
950 = let
951 format = intFormat width
952
953 -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
954 format1 = if is32Bit then format
955 else case format of
956 II64 -> II32
957 _ -> format
958 code dst
959 = unitOL (XOR format1 (OpReg dst) (OpReg dst))
960 in
961 return (Any format code)
962
963 -- optimisation for loading small literals on x86_64: take advantage
964 -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
965 -- instruction forms are shorter.
966 getRegister' dflags is32Bit (CmmLit lit)
967 | not is32Bit, isWord64 (cmmLitType dflags lit), not (isBigLit lit)
968 = let
969 imm = litToImm lit
970 code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
971 in
972 return (Any II64 code)
973 where
974 isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
975 isBigLit _ = False
976 -- note1: not the same as (not.is32BitLit), because that checks for
977 -- signed literals that fit in 32 bits, but we want unsigned
978 -- literals here.
979 -- note2: all labels are small, because we're assuming the
980 -- small memory model (see gcc docs, -mcmodel=small).
981
982 getRegister' dflags _ (CmmLit lit)
983 = do let format = cmmTypeFormat (cmmLitType dflags lit)
984 imm = litToImm lit
985 code dst = unitOL (MOV format (OpImm imm) (OpReg dst))
986 return (Any format code)
987
988 getRegister' _ _ other
989 | isVecExpr other = needLlvm
990 | otherwise = pprPanic "getRegister(x86)" (ppr other)
991
992
993 intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
994 -> NatM (Reg -> InstrBlock)
995 intLoadCode instr mem = do
996 Amode src mem_code <- getAmode mem
997 return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
998
999 -- Compute an expression into *any* register, adding the appropriate
1000 -- move instruction if necessary.
1001 getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
1002 getAnyReg expr = do
1003 r <- getRegister expr
1004 anyReg r
1005
1006 anyReg :: Register -> NatM (Reg -> InstrBlock)
1007 anyReg (Any _ code) = return code
1008 anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
1009
1010 -- A bit like getSomeReg, but we want a reg that can be byte-addressed.
1011 -- Fixed registers might not be byte-addressable, so we make sure we've
1012 -- got a temporary, inserting an extra reg copy if necessary.
1013 getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
1014 getByteReg expr = do
1015 is32Bit <- is32BitPlatform
1016 if is32Bit
1017 then do r <- getRegister expr
1018 case r of
1019 Any rep code -> do
1020 tmp <- getNewRegNat rep
1021 return (tmp, code tmp)
1022 Fixed rep reg code
1023 | isVirtualReg reg -> return (reg,code)
1024 | otherwise -> do
1025 tmp <- getNewRegNat rep
1026 return (tmp, code `snocOL` reg2reg rep reg tmp)
1027 -- ToDo: could optimise slightly by checking for
1028 -- byte-addressable real registers, but that will
1029 -- happen very rarely if at all.
1030 else getSomeReg expr -- all regs are byte-addressable on x86_64
1031
1032 -- Another variant: this time we want the result in a register that cannot
1033 -- be modified by code to evaluate an arbitrary expression.
1034 getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
1035 getNonClobberedReg expr = do
1036 dflags <- getDynFlags
1037 r <- getRegister expr
1038 case r of
1039 Any rep code -> do
1040 tmp <- getNewRegNat rep
1041 return (tmp, code tmp)
1042 Fixed rep reg code
1043 -- only certain regs can be clobbered
1044 | reg `elem` instrClobberedRegs (targetPlatform dflags)
1045 -> do
1046 tmp <- getNewRegNat rep
1047 return (tmp, code `snocOL` reg2reg rep reg tmp)
1048 | otherwise ->
1049 return (reg, code)
1050
1051 reg2reg :: Format -> Reg -> Reg -> Instr
1052 reg2reg format src dst
1053 | format == FF80 = GMOV src dst
1054 | otherwise = MOV format (OpReg src) (OpReg dst)
1055
1056
1057 --------------------------------------------------------------------------------
1058 getAmode :: CmmExpr -> NatM Amode
1059 getAmode e = do is32Bit <- is32BitPlatform
1060 getAmode' is32Bit e
1061
1062 getAmode' :: Bool -> CmmExpr -> NatM Amode
1063 getAmode' _ (CmmRegOff r n) = do dflags <- getDynFlags
1064 getAmode $ mangleIndexTree dflags r n
1065
1066 getAmode' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
1067 CmmLit displacement])
1068 | not is32Bit
1069 = return $ Amode (ripRel (litToImm displacement)) nilOL
1070
1071
1072 -- This is all just ridiculous, since it carefully undoes
1073 -- what mangleIndexTree has just done.
1074 getAmode' is32Bit (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)])
1075 | is32BitLit is32Bit lit
1076 -- ASSERT(rep == II32)???
1077 = do (x_reg, x_code) <- getSomeReg x
1078 let off = ImmInt (-(fromInteger i))
1079 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1080
1081 getAmode' is32Bit (CmmMachOp (MO_Add _rep) [x, CmmLit lit])
1082 | is32BitLit is32Bit lit
1083 -- ASSERT(rep == II32)???
1084 = do (x_reg, x_code) <- getSomeReg x
1085 let off = litToImm lit
1086 return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
1087
1088 -- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
1089 -- recognised by the next rule.
1090 getAmode' is32Bit (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
1091 b@(CmmLit _)])
1092 = getAmode' is32Bit (CmmMachOp (MO_Add rep) [b,a])
1093
1094 -- Matches: (x + offset) + (y << shift)
1095 getAmode' _ (CmmMachOp (MO_Add _) [CmmRegOff x offset,
1096 CmmMachOp (MO_Shl _)
1097 [y, CmmLit (CmmInt shift _)]])
1098 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1099 = x86_complex_amode (CmmReg x) y shift (fromIntegral offset)
1100
1101 getAmode' _ (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _)
1102 [y, CmmLit (CmmInt shift _)]])
1103 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1104 = x86_complex_amode x y shift 0
1105
1106 getAmode' _ (CmmMachOp (MO_Add _)
1107 [x, CmmMachOp (MO_Add _)
1108 [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
1109 CmmLit (CmmInt offset _)]])
1110 | shift == 0 || shift == 1 || shift == 2 || shift == 3
1111 && is32BitInteger offset
1112 = x86_complex_amode x y shift offset
1113
1114 getAmode' _ (CmmMachOp (MO_Add _) [x,y])
1115 = x86_complex_amode x y 0 0
1116
1117 getAmode' is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1118 = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
1119
1120 getAmode' _ expr = do
1121 (reg,code) <- getSomeReg expr
1122 return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
1123
1124 -- | Like 'getAmode', but on 32-bit use simple register addressing
1125 -- (i.e. no index register). This stops us from running out of
1126 -- registers on x86 when using instructions such as cmpxchg, which can
1127 -- use up to three virtual registers and one fixed register.
1128 getSimpleAmode :: DynFlags -> Bool -> CmmExpr -> NatM Amode
1129 getSimpleAmode dflags is32Bit addr
1130 | is32Bit = do
1131 addr_code <- getAnyReg addr
1132 addr_r <- getNewRegNat (intFormat (wordWidth dflags))
1133 let amode = AddrBaseIndex (EABaseReg addr_r) EAIndexNone (ImmInt 0)
1134 return $! Amode amode (addr_code addr_r)
1135 | otherwise = getAmode addr
1136
1137 x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
1138 x86_complex_amode base index shift offset
1139 = do (x_reg, x_code) <- getNonClobberedReg base
1140 -- x must be in a temp, because it has to stay live over y_code
1141 -- we could compre x_reg and y_reg and do something better here...
1142 (y_reg, y_code) <- getSomeReg index
1143 let
1144 code = x_code `appOL` y_code
1145 base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8;
1146 n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")"
1147 return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
1148 code)
1149
1150
1151
1152
1153 -- -----------------------------------------------------------------------------
1154 -- getOperand: sometimes any operand will do.
1155
1156 -- getNonClobberedOperand: the value of the operand will remain valid across
1157 -- the computation of an arbitrary expression, unless the expression
1158 -- is computed directly into a register which the operand refers to
1159 -- (see trivialCode where this function is used for an example).
1160
1161 getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1162 getNonClobberedOperand (CmmLit lit) = do
1163 use_sse2 <- sse2Enabled
1164 if use_sse2 && isSuitableFloatingPointLit lit
1165 then do
1166 let CmmFloat _ w = lit
1167 Amode addr code <- memConstant (widthInBytes w) lit
1168 return (OpAddr addr, code)
1169 else do
1170
1171 is32Bit <- is32BitPlatform
1172 dflags <- getDynFlags
1173 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1174 then return (OpImm (litToImm lit), nilOL)
1175 else getNonClobberedOperand_generic (CmmLit lit)
1176
1177 getNonClobberedOperand (CmmLoad mem pk) = do
1178 is32Bit <- is32BitPlatform
1179 use_sse2 <- sse2Enabled
1180 if (not (isFloatType pk) || use_sse2)
1181 && (if is32Bit then not (isWord64 pk) else True)
1182 then do
1183 dflags <- getDynFlags
1184 let platform = targetPlatform dflags
1185 Amode src mem_code <- getAmode mem
1186 (src',save_code) <-
1187 if (amodeCouldBeClobbered platform src)
1188 then do
1189 tmp <- getNewRegNat (archWordFormat is32Bit)
1190 return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
1191 unitOL (LEA (archWordFormat is32Bit)
1192 (OpAddr src)
1193 (OpReg tmp)))
1194 else
1195 return (src, nilOL)
1196 return (OpAddr src', mem_code `appOL` save_code)
1197 else do
1198 getNonClobberedOperand_generic (CmmLoad mem pk)
1199
1200 getNonClobberedOperand e = getNonClobberedOperand_generic e
1201
1202 getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1203 getNonClobberedOperand_generic e = do
1204 (reg, code) <- getNonClobberedReg e
1205 return (OpReg reg, code)
1206
1207 amodeCouldBeClobbered :: Platform -> AddrMode -> Bool
1208 amodeCouldBeClobbered platform amode = any (regClobbered platform) (addrModeRegs amode)
1209
1210 regClobbered :: Platform -> Reg -> Bool
1211 regClobbered platform (RegReal (RealRegSingle rr)) = freeReg platform rr
1212 regClobbered _ _ = False
1213
1214 -- getOperand: the operand is not required to remain valid across the
1215 -- computation of an arbitrary expression.
1216 getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
1217
1218 getOperand (CmmLit lit) = do
1219 use_sse2 <- sse2Enabled
1220 if (use_sse2 && isSuitableFloatingPointLit lit)
1221 then do
1222 let CmmFloat _ w = lit
1223 Amode addr code <- memConstant (widthInBytes w) lit
1224 return (OpAddr addr, code)
1225 else do
1226
1227 is32Bit <- is32BitPlatform
1228 dflags <- getDynFlags
1229 if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
1230 then return (OpImm (litToImm lit), nilOL)
1231 else getOperand_generic (CmmLit lit)
1232
1233 getOperand (CmmLoad mem pk) = do
1234 is32Bit <- is32BitPlatform
1235 use_sse2 <- sse2Enabled
1236 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1237 then do
1238 Amode src mem_code <- getAmode mem
1239 return (OpAddr src, mem_code)
1240 else
1241 getOperand_generic (CmmLoad mem pk)
1242
1243 getOperand e = getOperand_generic e
1244
1245 getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
1246 getOperand_generic e = do
1247 (reg, code) <- getSomeReg e
1248 return (OpReg reg, code)
1249
1250 isOperand :: Bool -> CmmExpr -> Bool
1251 isOperand _ (CmmLoad _ _) = True
1252 isOperand is32Bit (CmmLit lit) = is32BitLit is32Bit lit
1253 || isSuitableFloatingPointLit lit
1254 isOperand _ _ = False
1255
1256 memConstant :: Int -> CmmLit -> NatM Amode
1257 memConstant align lit = do
1258 lbl <- getNewLabelNat
1259 let rosection = Section ReadOnlyData lbl
1260 dflags <- getDynFlags
1261 (addr, addr_code) <- if target32Bit (targetPlatform dflags)
1262 then do dynRef <- cmmMakeDynamicReference
1263 dflags
1264 DataReference
1265 lbl
1266 Amode addr addr_code <- getAmode dynRef
1267 return (addr, addr_code)
1268 else return (ripRel (ImmCLbl lbl), nilOL)
1269 let code =
1270 LDATA rosection (align, Statics lbl [CmmStaticLit lit])
1271 `consOL` addr_code
1272 return (Amode addr code)
1273
1274
1275 loadFloatAmode :: Bool -> Width -> AddrMode -> InstrBlock -> NatM Register
1276 loadFloatAmode use_sse2 w addr addr_code = do
1277 let format = floatFormat w
1278 code dst = addr_code `snocOL`
1279 if use_sse2
1280 then MOV format (OpAddr addr) (OpReg dst)
1281 else GLD format addr dst
1282 return (Any (if use_sse2 then format else FF80) code)
1283
1284
1285 -- if we want a floating-point literal as an operand, we can
1286 -- use it directly from memory. However, if the literal is
1287 -- zero, we're better off generating it into a register using
1288 -- xor.
1289 isSuitableFloatingPointLit :: CmmLit -> Bool
1290 isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
1291 isSuitableFloatingPointLit _ = False
1292
1293 getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
1294 getRegOrMem e@(CmmLoad mem pk) = do
1295 is32Bit <- is32BitPlatform
1296 use_sse2 <- sse2Enabled
1297 if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
1298 then do
1299 Amode src mem_code <- getAmode mem
1300 return (OpAddr src, mem_code)
1301 else do
1302 (reg, code) <- getNonClobberedReg e
1303 return (OpReg reg, code)
1304 getRegOrMem e = do
1305 (reg, code) <- getNonClobberedReg e
1306 return (OpReg reg, code)
1307
1308 is32BitLit :: Bool -> CmmLit -> Bool
1309 is32BitLit is32Bit (CmmInt i W64)
1310 | not is32Bit
1311 = -- assume that labels are in the range 0-2^31-1: this assumes the
1312 -- small memory model (see gcc docs, -mcmodel=small).
1313 is32BitInteger i
1314 is32BitLit _ _ = True
1315
1316
1317
1318
1319 -- Set up a condition code for a conditional branch.
1320
1321 getCondCode :: CmmExpr -> NatM CondCode
1322
1323 -- yes, they really do seem to want exactly the same!
1324
1325 getCondCode (CmmMachOp mop [x, y])
1326 =
1327 case mop of
1328 MO_F_Eq W32 -> condFltCode EQQ x y
1329 MO_F_Ne W32 -> condFltCode NE x y
1330 MO_F_Gt W32 -> condFltCode GTT x y
1331 MO_F_Ge W32 -> condFltCode GE x y
1332 MO_F_Lt W32 -> condFltCode LTT x y
1333 MO_F_Le W32 -> condFltCode LE x y
1334
1335 MO_F_Eq W64 -> condFltCode EQQ x y
1336 MO_F_Ne W64 -> condFltCode NE x y
1337 MO_F_Gt W64 -> condFltCode GTT x y
1338 MO_F_Ge W64 -> condFltCode GE x y
1339 MO_F_Lt W64 -> condFltCode LTT x y
1340 MO_F_Le W64 -> condFltCode LE x y
1341
1342 _ -> condIntCode (machOpToCond mop) x y
1343
1344 getCondCode other = pprPanic "getCondCode(2)(x86,x86_64)" (ppr other)
1345
1346 machOpToCond :: MachOp -> Cond
1347 machOpToCond mo = case mo of
1348 MO_Eq _ -> EQQ
1349 MO_Ne _ -> NE
1350 MO_S_Gt _ -> GTT
1351 MO_S_Ge _ -> GE
1352 MO_S_Lt _ -> LTT
1353 MO_S_Le _ -> LE
1354 MO_U_Gt _ -> GU
1355 MO_U_Ge _ -> GEU
1356 MO_U_Lt _ -> LU
1357 MO_U_Le _ -> LEU
1358 _other -> pprPanic "machOpToCond" (pprMachOp mo)
1359
1360
1361 -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
1362 -- passed back up the tree.
1363
1364 condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1365 condIntCode cond x y = do is32Bit <- is32BitPlatform
1366 condIntCode' is32Bit cond x y
1367
1368 condIntCode' :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1369
1370 -- memory vs immediate
1371 condIntCode' is32Bit cond (CmmLoad x pk) (CmmLit lit)
1372 | is32BitLit is32Bit lit = do
1373 Amode x_addr x_code <- getAmode x
1374 let
1375 imm = litToImm lit
1376 code = x_code `snocOL`
1377 CMP (cmmTypeFormat pk) (OpImm imm) (OpAddr x_addr)
1378 --
1379 return (CondCode False cond code)
1380
1381 -- anything vs zero, using a mask
1382 -- TODO: Add some sanity checking!!!!
1383 condIntCode' is32Bit cond (CmmMachOp (MO_And _) [x,o2]) (CmmLit (CmmInt 0 pk))
1384 | (CmmLit lit@(CmmInt mask _)) <- o2, is32BitLit is32Bit lit
1385 = do
1386 (x_reg, x_code) <- getSomeReg x
1387 let
1388 code = x_code `snocOL`
1389 TEST (intFormat pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
1390 --
1391 return (CondCode False cond code)
1392
1393 -- anything vs zero
1394 condIntCode' _ cond x (CmmLit (CmmInt 0 pk)) = do
1395 (x_reg, x_code) <- getSomeReg x
1396 let
1397 code = x_code `snocOL`
1398 TEST (intFormat pk) (OpReg x_reg) (OpReg x_reg)
1399 --
1400 return (CondCode False cond code)
1401
1402 -- anything vs operand
1403 condIntCode' is32Bit cond x y
1404 | isOperand is32Bit y = do
1405 dflags <- getDynFlags
1406 (x_reg, x_code) <- getNonClobberedReg x
1407 (y_op, y_code) <- getOperand y
1408 let
1409 code = x_code `appOL` y_code `snocOL`
1410 CMP (cmmTypeFormat (cmmExprType dflags x)) y_op (OpReg x_reg)
1411 return (CondCode False cond code)
1412 -- operand vs. anything: invert the comparison so that we can use a
1413 -- single comparison instruction.
1414 | isOperand is32Bit x
1415 , Just revcond <- maybeFlipCond cond = do
1416 dflags <- getDynFlags
1417 (y_reg, y_code) <- getNonClobberedReg y
1418 (x_op, x_code) <- getOperand x
1419 let
1420 code = y_code `appOL` x_code `snocOL`
1421 CMP (cmmTypeFormat (cmmExprType dflags x)) x_op (OpReg y_reg)
1422 return (CondCode False revcond code)
1423
1424 -- anything vs anything
1425 condIntCode' _ cond x y = do
1426 dflags <- getDynFlags
1427 (y_reg, y_code) <- getNonClobberedReg y
1428 (x_op, x_code) <- getRegOrMem x
1429 let
1430 code = y_code `appOL`
1431 x_code `snocOL`
1432 CMP (cmmTypeFormat (cmmExprType dflags x)) (OpReg y_reg) x_op
1433 return (CondCode False cond code)
1434
1435
1436
1437 --------------------------------------------------------------------------------
1438 condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
1439
1440 condFltCode cond x y
1441 = if_sse2 condFltCode_sse2 condFltCode_x87
1442 where
1443
1444 condFltCode_x87
1445 = ASSERT(cond `elem` ([EQQ, NE, LE, LTT, GE, GTT])) do
1446 (x_reg, x_code) <- getNonClobberedReg x
1447 (y_reg, y_code) <- getSomeReg y
1448 let
1449 code = x_code `appOL` y_code `snocOL`
1450 GCMP cond x_reg y_reg
1451 -- The GCMP insn does the test and sets the zero flag if comparable
1452 -- and true. Hence we always supply EQQ as the condition to test.
1453 return (CondCode True EQQ code)
1454
1455 -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
1456 -- an operand, but the right must be a reg. We can probably do better
1457 -- than this general case...
1458 condFltCode_sse2 = do
1459 dflags <- getDynFlags
1460 (x_reg, x_code) <- getNonClobberedReg x
1461 (y_op, y_code) <- getOperand y
1462 let
1463 code = x_code `appOL`
1464 y_code `snocOL`
1465 CMP (floatFormat $ cmmExprWidth dflags x) y_op (OpReg x_reg)
1466 -- NB(1): we need to use the unsigned comparison operators on the
1467 -- result of this comparison.
1468 return (CondCode True (condToUnsigned cond) code)
1469
1470 -- -----------------------------------------------------------------------------
1471 -- Generating assignments
1472
1473 -- Assignments are really at the heart of the whole code generation
1474 -- business. Almost all top-level nodes of any real importance are
1475 -- assignments, which correspond to loads, stores, or register
1476 -- transfers. If we're really lucky, some of the register transfers
1477 -- will go away, because we can use the destination register to
1478 -- complete the code generation for the right hand side. This only
1479 -- fails when the right hand side is forced into a fixed register
1480 -- (e.g. the result of a call).
1481
1482 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1483 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1484
1485 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1486 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
1487
1488
1489 -- integer assignment to memory
1490
1491 -- specific case of adding/subtracting an integer to a particular address.
1492 -- ToDo: catch other cases where we can use an operation directly on a memory
1493 -- address.
1494 assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
1495 CmmLit (CmmInt i _)])
1496 | addr == addr2, pk /= II64 || is32BitInteger i,
1497 Just instr <- check op
1498 = do Amode amode code_addr <- getAmode addr
1499 let code = code_addr `snocOL`
1500 instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
1501 return code
1502 where
1503 check (MO_Add _) = Just ADD
1504 check (MO_Sub _) = Just SUB
1505 check _ = Nothing
1506 -- ToDo: more?
1507
1508 -- general case
1509 assignMem_IntCode pk addr src = do
1510 is32Bit <- is32BitPlatform
1511 Amode addr code_addr <- getAmode addr
1512 (code_src, op_src) <- get_op_RI is32Bit src
1513 let
1514 code = code_src `appOL`
1515 code_addr `snocOL`
1516 MOV pk op_src (OpAddr addr)
1517 -- NOTE: op_src is stable, so it will still be valid
1518 -- after code_addr. This may involve the introduction
1519 -- of an extra MOV to a temporary register, but we hope
1520 -- the register allocator will get rid of it.
1521 --
1522 return code
1523 where
1524 get_op_RI :: Bool -> CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
1525 get_op_RI is32Bit (CmmLit lit) | is32BitLit is32Bit lit
1526 = return (nilOL, OpImm (litToImm lit))
1527 get_op_RI _ op
1528 = do (reg,code) <- getNonClobberedReg op
1529 return (code, OpReg reg)
1530
1531
1532 -- Assign; dst is a reg, rhs is mem
1533 assignReg_IntCode pk reg (CmmLoad src _) = do
1534 load_code <- intLoadCode (MOV pk) src
1535 dflags <- getDynFlags
1536 let platform = targetPlatform dflags
1537 return (load_code (getRegisterReg platform False{-no sse2-} reg))
1538
1539 -- dst is a reg, but src could be anything
1540 assignReg_IntCode _ reg src = do
1541 dflags <- getDynFlags
1542 let platform = targetPlatform dflags
1543 code <- getAnyReg src
1544 return (code (getRegisterReg platform False{-no sse2-} reg))
1545
1546
1547 -- Floating point assignment to memory
1548 assignMem_FltCode pk addr src = do
1549 (src_reg, src_code) <- getNonClobberedReg src
1550 Amode addr addr_code <- getAmode addr
1551 use_sse2 <- sse2Enabled
1552 let
1553 code = src_code `appOL`
1554 addr_code `snocOL`
1555 if use_sse2 then MOV pk (OpReg src_reg) (OpAddr addr)
1556 else GST pk src_reg addr
1557 return code
1558
1559 -- Floating point assignment to a register/temporary
1560 assignReg_FltCode _ reg src = do
1561 use_sse2 <- sse2Enabled
1562 src_code <- getAnyReg src
1563 dflags <- getDynFlags
1564 let platform = targetPlatform dflags
1565 return (src_code (getRegisterReg platform use_sse2 reg))
1566
1567
1568 genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
1569
1570 genJump (CmmLoad mem _) regs = do
1571 Amode target code <- getAmode mem
1572 return (code `snocOL` JMP (OpAddr target) regs)
1573
1574 genJump (CmmLit lit) regs = do
1575 return (unitOL (JMP (OpImm (litToImm lit)) regs))
1576
1577 genJump expr regs = do
1578 (reg,code) <- getSomeReg expr
1579 return (code `snocOL` JMP (OpReg reg) regs)
1580
1581
1582 -- -----------------------------------------------------------------------------
1583 -- Unconditional branches
1584
1585 genBranch :: BlockId -> NatM InstrBlock
1586 genBranch = return . toOL . mkJumpInstr
1587
1588
1589
1590 -- -----------------------------------------------------------------------------
1591 -- Conditional jumps
1592
1593 {-
1594 Conditional jumps are always to local labels, so we can use branch
1595 instructions. We peek at the arguments to decide what kind of
1596 comparison to do.
1597
1598 I386: First, we have to ensure that the condition
1599 codes are set according to the supplied comparison operation.
1600 -}
1601
1602 genCondJump
1603 :: BlockId -- the branch target
1604 -> CmmExpr -- the condition on which to branch
1605 -> NatM InstrBlock
1606
1607 genCondJump id expr = do
1608 is32Bit <- is32BitPlatform
1609 genCondJump' is32Bit id expr
1610
1611 genCondJump' :: Bool -> BlockId -> CmmExpr -> NatM InstrBlock
1612
1613 -- 64-bit integer comparisons on 32-bit
1614 genCondJump' is32Bit true (CmmMachOp mop [e1,e2])
1615 | is32Bit, Just W64 <- maybeIntComparison mop = do
1616 ChildCode64 code1 r1_lo <- iselExpr64 e1
1617 ChildCode64 code2 r2_lo <- iselExpr64 e2
1618 let r1_hi = getHiVRegFromLo r1_lo
1619 r2_hi = getHiVRegFromLo r2_lo
1620 cond = machOpToCond mop
1621 Just cond' = maybeFlipCond cond
1622 false <- getBlockIdNat
1623 return $ code1 `appOL` code2 `appOL` toOL [
1624 CMP II32 (OpReg r2_hi) (OpReg r1_hi),
1625 JXX cond true,
1626 JXX cond' false,
1627 CMP II32 (OpReg r2_lo) (OpReg r1_lo),
1628 JXX cond true,
1629 NEWBLOCK false ]
1630
1631 genCondJump' _ id bool = do
1632 CondCode is_float cond cond_code <- getCondCode bool
1633 use_sse2 <- sse2Enabled
1634 if not is_float || not use_sse2
1635 then
1636 return (cond_code `snocOL` JXX cond id)
1637 else do
1638 lbl <- getBlockIdNat
1639
1640 -- see comment with condFltReg
1641 let code = case cond of
1642 NE -> or_unordered
1643 GU -> plain_test
1644 GEU -> plain_test
1645 _ -> and_ordered
1646
1647 plain_test = unitOL (
1648 JXX cond id
1649 )
1650 or_unordered = toOL [
1651 JXX cond id,
1652 JXX PARITY id
1653 ]
1654 and_ordered = toOL [
1655 JXX PARITY lbl,
1656 JXX cond id,
1657 JXX ALWAYS lbl,
1658 NEWBLOCK lbl
1659 ]
1660 return (cond_code `appOL` code)
1661
1662 -- -----------------------------------------------------------------------------
1663 -- Generating C calls
1664
1665 -- Now the biggest nightmare---calls. Most of the nastiness is buried in
1666 -- @get_arg@, which moves the arguments to the correct registers/stack
1667 -- locations. Apart from that, the code is easy.
1668 --
1669 -- (If applicable) Do not fill the delay slots here; you will confuse the
1670 -- register allocator.
1671
1672 genCCall
1673 :: DynFlags
1674 -> Bool -- 32 bit platform?
1675 -> ForeignTarget -- function to call
1676 -> [CmmFormal] -- where to put the result
1677 -> [CmmActual] -- arguments (of mixed type)
1678 -> NatM InstrBlock
1679
1680 -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1681
1682 -- Unroll memcpy calls if the source and destination pointers are at
1683 -- least DWORD aligned and the number of bytes to copy isn't too
1684 -- large. Otherwise, call C's memcpy.
1685 genCCall dflags is32Bit (PrimTarget (MO_Memcpy align)) _
1686 [dst, src, CmmLit (CmmInt n _)]
1687 | fromInteger insns <= maxInlineMemcpyInsns dflags && align .&. 3 == 0 = do
1688 code_dst <- getAnyReg dst
1689 dst_r <- getNewRegNat format
1690 code_src <- getAnyReg src
1691 src_r <- getNewRegNat format
1692 tmp_r <- getNewRegNat format
1693 return $ code_dst dst_r `appOL` code_src src_r `appOL`
1694 go dst_r src_r tmp_r (fromInteger n)
1695 where
1696 -- The number of instructions we will generate (approx). We need 2
1697 -- instructions per move.
1698 insns = 2 * ((n + sizeBytes - 1) `div` sizeBytes)
1699
1700 format = if align .&. 4 /= 0 then II32 else (archWordFormat is32Bit)
1701
1702 -- The size of each move, in bytes.
1703 sizeBytes :: Integer
1704 sizeBytes = fromIntegral (formatInBytes format)
1705
1706 go :: Reg -> Reg -> Reg -> Integer -> OrdList Instr
1707 go dst src tmp i
1708 | i >= sizeBytes =
1709 unitOL (MOV format (OpAddr src_addr) (OpReg tmp)) `appOL`
1710 unitOL (MOV format (OpReg tmp) (OpAddr dst_addr)) `appOL`
1711 go dst src tmp (i - sizeBytes)
1712 -- Deal with remaining bytes.
1713 | i >= 4 = -- Will never happen on 32-bit
1714 unitOL (MOV II32 (OpAddr src_addr) (OpReg tmp)) `appOL`
1715 unitOL (MOV II32 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1716 go dst src tmp (i - 4)
1717 | i >= 2 =
1718 unitOL (MOVZxL II16 (OpAddr src_addr) (OpReg tmp)) `appOL`
1719 unitOL (MOV II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1720 go dst src tmp (i - 2)
1721 | i >= 1 =
1722 unitOL (MOVZxL II8 (OpAddr src_addr) (OpReg tmp)) `appOL`
1723 unitOL (MOV II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
1724 go dst src tmp (i - 1)
1725 | otherwise = nilOL
1726 where
1727 src_addr = AddrBaseIndex (EABaseReg src) EAIndexNone
1728 (ImmInteger (n - i))
1729 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1730 (ImmInteger (n - i))
1731
1732 genCCall dflags _ (PrimTarget (MO_Memset align)) _
1733 [dst,
1734 CmmLit (CmmInt c _),
1735 CmmLit (CmmInt n _)]
1736 | fromInteger insns <= maxInlineMemsetInsns dflags && align .&. 3 == 0 = do
1737 code_dst <- getAnyReg dst
1738 dst_r <- getNewRegNat format
1739 return $ code_dst dst_r `appOL` go dst_r (fromInteger n)
1740 where
1741 (format, val) = case align .&. 3 of
1742 2 -> (II16, c2)
1743 0 -> (II32, c4)
1744 _ -> (II8, c)
1745 c2 = c `shiftL` 8 .|. c
1746 c4 = c2 `shiftL` 16 .|. c2
1747
1748 -- The number of instructions we will generate (approx). We need 1
1749 -- instructions per move.
1750 insns = (n + sizeBytes - 1) `div` sizeBytes
1751
1752 -- The size of each move, in bytes.
1753 sizeBytes :: Integer
1754 sizeBytes = fromIntegral (formatInBytes format)
1755
1756 go :: Reg -> Integer -> OrdList Instr
1757 go dst i
1758 -- TODO: Add movabs instruction and support 64-bit sets.
1759 | i >= sizeBytes = -- This might be smaller than the below sizes
1760 unitOL (MOV format (OpImm (ImmInteger val)) (OpAddr dst_addr)) `appOL`
1761 go dst (i - sizeBytes)
1762 | i >= 4 = -- Will never happen on 32-bit
1763 unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr dst_addr)) `appOL`
1764 go dst (i - 4)
1765 | i >= 2 =
1766 unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr dst_addr)) `appOL`
1767 go dst (i - 2)
1768 | i >= 1 =
1769 unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr dst_addr)) `appOL`
1770 go dst (i - 1)
1771 | otherwise = nilOL
1772 where
1773 dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
1774 (ImmInteger (n - i))
1775
1776 genCCall _ _ (PrimTarget MO_WriteBarrier) _ _ = return nilOL
1777 -- write barrier compiles to no code on x86/x86-64;
1778 -- we keep it this long in order to prevent earlier optimisations.
1779
1780 genCCall _ _ (PrimTarget MO_Touch) _ _ = return nilOL
1781
1782 genCCall _ is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] =
1783 case n of
1784 0 -> genPrefetch src $ PREFETCH NTA format
1785 1 -> genPrefetch src $ PREFETCH Lvl2 format
1786 2 -> genPrefetch src $ PREFETCH Lvl1 format
1787 3 -> genPrefetch src $ PREFETCH Lvl0 format
1788 l -> panic $ "unexpected prefetch level in genCCall MO_Prefetch_Data: " ++ (show l)
1789 -- the c / llvm prefetch convention is 0, 1, 2, and 3
1790 -- the x86 corresponding names are : NTA, 2 , 1, and 0
1791 where
1792 format = archWordFormat is32bit
1793 -- need to know what register width for pointers!
1794 genPrefetch inRegSrc prefetchCTor =
1795 do
1796 code_src <- getAnyReg inRegSrc
1797 src_r <- getNewRegNat format
1798 return $ code_src src_r `appOL`
1799 (unitOL (prefetchCTor (OpAddr
1800 ((AddrBaseIndex (EABaseReg src_r ) EAIndexNone (ImmInt 0)))) ))
1801 -- prefetch always takes an address
1802
1803 genCCall dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] = do
1804 let platform = targetPlatform dflags
1805 let dst_r = getRegisterReg platform False (CmmLocal dst)
1806 case width of
1807 W64 | is32Bit -> do
1808 ChildCode64 vcode rlo <- iselExpr64 src
1809 let dst_rhi = getHiVRegFromLo dst_r
1810 rhi = getHiVRegFromLo rlo
1811 return $ vcode `appOL`
1812 toOL [ MOV II32 (OpReg rlo) (OpReg dst_rhi),
1813 MOV II32 (OpReg rhi) (OpReg dst_r),
1814 BSWAP II32 dst_rhi,
1815 BSWAP II32 dst_r ]
1816 W16 -> do code_src <- getAnyReg src
1817 return $ code_src dst_r `appOL`
1818 unitOL (BSWAP II32 dst_r) `appOL`
1819 unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r))
1820 _ -> do code_src <- getAnyReg src
1821 return $ code_src dst_r `appOL` unitOL (BSWAP format dst_r)
1822 where
1823 format = intFormat width
1824
1825 genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
1826 args@[src] = do
1827 sse4_2 <- sse4_2Enabled
1828 let platform = targetPlatform dflags
1829 if sse4_2
1830 then do code_src <- getAnyReg src
1831 src_r <- getNewRegNat format
1832 let dst_r = getRegisterReg platform False (CmmLocal dst)
1833 return $ code_src src_r `appOL`
1834 (if width == W8 then
1835 -- The POPCNT instruction doesn't take a r/m8
1836 unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL`
1837 unitOL (POPCNT II16 (OpReg src_r) dst_r)
1838 else
1839 unitOL (POPCNT format (OpReg src_r) dst_r)) `appOL`
1840 (if width == W8 || width == W16 then
1841 -- We used a 16-bit destination register above,
1842 -- so zero-extend
1843 unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
1844 else nilOL)
1845 else do
1846 targetExpr <- cmmMakeDynamicReference dflags
1847 CallReference lbl
1848 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1849 [NoHint] [NoHint]
1850 CmmMayReturn)
1851 genCCall dflags is32Bit target dest_regs args
1852 where
1853 format = intFormat width
1854 lbl = mkCmmCodeLabel primUnitId (fsLit (popCntLabel width))
1855
1856 genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src]
1857 | is32Bit && width == W64 = do
1858 -- Fallback to `hs_clz64` on i386
1859 targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
1860 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1861 [NoHint] [NoHint]
1862 CmmMayReturn)
1863 genCCall dflags is32Bit target dest_regs args
1864
1865 | otherwise = do
1866 code_src <- getAnyReg src
1867 src_r <- getNewRegNat format
1868 tmp_r <- getNewRegNat format
1869 let dst_r = getRegisterReg platform False (CmmLocal dst)
1870
1871 -- The following insn sequence makes sure 'clz 0' has a defined value.
1872 -- starting with Haswell, one could use the LZCNT insn instead.
1873 return $ code_src src_r `appOL` toOL
1874 ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
1875 [ BSR format (OpReg src_r) tmp_r
1876 , MOV II32 (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
1877 , CMOV NE format (OpReg tmp_r) dst_r
1878 , XOR format (OpImm (ImmInt (bw-1))) (OpReg dst_r)
1879 ]) -- NB: We don't need to zero-extend the result for the
1880 -- W8/W16 cases because the 'MOV' insn already
1881 -- took care of implicitly clearing the upper bits
1882 where
1883 bw = widthInBits width
1884 platform = targetPlatform dflags
1885 format = if width == W8 then II16 else intFormat width
1886 lbl = mkCmmCodeLabel primUnitId (fsLit (clzLabel width))
1887
1888 genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src]
1889 | is32Bit, width == W64 = do
1890 ChildCode64 vcode rlo <- iselExpr64 src
1891 let rhi = getHiVRegFromLo rlo
1892 dst_r = getRegisterReg platform False (CmmLocal dst)
1893 lbl1 <- getBlockIdNat
1894 lbl2 <- getBlockIdNat
1895 tmp_r <- getNewRegNat format
1896
1897 -- The following instruction sequence corresponds to the pseudo-code
1898 --
1899 -- if (src) {
1900 -- dst = src.lo32 ? BSF(src.lo32) : (BSF(src.hi32) + 32);
1901 -- } else {
1902 -- dst = 64;
1903 -- }
1904 return $ vcode `appOL` toOL
1905 ([ MOV II32 (OpReg rhi) (OpReg tmp_r)
1906 , OR II32 (OpReg rlo) (OpReg tmp_r)
1907 , MOV II32 (OpImm (ImmInt 64)) (OpReg dst_r)
1908 , JXX EQQ lbl2
1909 , JXX ALWAYS lbl1
1910
1911 , NEWBLOCK lbl1
1912 , BSF II32 (OpReg rhi) dst_r
1913 , ADD II32 (OpImm (ImmInt 32)) (OpReg dst_r)
1914 , BSF II32 (OpReg rlo) tmp_r
1915 , CMOV NE II32 (OpReg tmp_r) dst_r
1916 , JXX ALWAYS lbl2
1917
1918 , NEWBLOCK lbl2
1919 ])
1920
1921 | otherwise = do
1922 code_src <- getAnyReg src
1923 src_r <- getNewRegNat format
1924 tmp_r <- getNewRegNat format
1925 let dst_r = getRegisterReg platform False (CmmLocal dst)
1926
1927 -- The following insn sequence makes sure 'ctz 0' has a defined value.
1928 -- starting with Haswell, one could use the TZCNT insn instead.
1929 return $ code_src src_r `appOL` toOL
1930 ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
1931 [ BSF format (OpReg src_r) tmp_r
1932 , MOV II32 (OpImm (ImmInt bw)) (OpReg dst_r)
1933 , CMOV NE format (OpReg tmp_r) dst_r
1934 ]) -- NB: We don't need to zero-extend the result for the
1935 -- W8/W16 cases because the 'MOV' insn already
1936 -- took care of implicitly clearing the upper bits
1937 where
1938 bw = widthInBits width
1939 platform = targetPlatform dflags
1940 format = if width == W8 then II16 else intFormat width
1941
1942 genCCall dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args = do
1943 targetExpr <- cmmMakeDynamicReference dflags
1944 CallReference lbl
1945 let target = ForeignTarget targetExpr (ForeignConvention CCallConv
1946 [NoHint] [NoHint]
1947 CmmMayReturn)
1948 genCCall dflags is32Bit target dest_regs args
1949 where
1950 lbl = mkCmmCodeLabel primUnitId (fsLit (word2FloatLabel width))
1951
1952 genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = do
1953 Amode amode addr_code <-
1954 if amop `elem` [AMO_Add, AMO_Sub]
1955 then getAmode addr
1956 else getSimpleAmode dflags is32Bit addr -- See genCCall for MO_Cmpxchg
1957 arg <- getNewRegNat format
1958 arg_code <- getAnyReg n
1959 use_sse2 <- sse2Enabled
1960 let platform = targetPlatform dflags
1961 dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
1962 code <- op_code dst_r arg amode
1963 return $ addr_code `appOL` arg_code arg `appOL` code
1964 where
1965 -- Code for the operation
1966 op_code :: Reg -- Destination reg
1967 -> Reg -- Register containing argument
1968 -> AddrMode -- Address of location to mutate
1969 -> NatM (OrdList Instr)
1970 op_code dst_r arg amode = case amop of
1971 -- In the common case where dst_r is a virtual register the
1972 -- final move should go away, because it's the last use of arg
1973 -- and the first use of dst_r.
1974 AMO_Add -> return $ toOL [ LOCK (XADD format (OpReg arg) (OpAddr amode))
1975 , MOV format (OpReg arg) (OpReg dst_r)
1976 ]
1977 AMO_Sub -> return $ toOL [ NEGI format (OpReg arg)
1978 , LOCK (XADD format (OpReg arg) (OpAddr amode))
1979 , MOV format (OpReg arg) (OpReg dst_r)
1980 ]
1981 AMO_And -> cmpxchg_code (\ src dst -> unitOL $ AND format src dst)
1982 AMO_Nand -> cmpxchg_code (\ src dst -> toOL [ AND format src dst
1983 , NOT format dst
1984 ])
1985 AMO_Or -> cmpxchg_code (\ src dst -> unitOL $ OR format src dst)
1986 AMO_Xor -> cmpxchg_code (\ src dst -> unitOL $ XOR format src dst)
1987 where
1988 -- Simulate operation that lacks a dedicated instruction using
1989 -- cmpxchg.
1990 cmpxchg_code :: (Operand -> Operand -> OrdList Instr)
1991 -> NatM (OrdList Instr)
1992 cmpxchg_code instrs = do
1993 lbl <- getBlockIdNat
1994 tmp <- getNewRegNat format
1995 return $ toOL
1996 [ MOV format (OpAddr amode) (OpReg eax)
1997 , JXX ALWAYS lbl
1998 , NEWBLOCK lbl
1999 -- Keep old value so we can return it:
2000 , MOV format (OpReg eax) (OpReg dst_r)
2001 , MOV format (OpReg eax) (OpReg tmp)
2002 ]
2003 `appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL
2004 [ LOCK (CMPXCHG format (OpReg tmp) (OpAddr amode))
2005 , JXX NE lbl
2006 ]
2007
2008 format = intFormat width
2009
2010 genCCall dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] = do
2011 load_code <- intLoadCode (MOV (intFormat width)) addr
2012 let platform = targetPlatform dflags
2013 use_sse2 <- sse2Enabled
2014 return (load_code (getRegisterReg platform use_sse2 (CmmLocal dst)))
2015
2016 genCCall _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
2017 code <- assignMem_IntCode (intFormat width) addr val
2018 return $ code `snocOL` MFENCE
2019
2020 genCCall dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] = do
2021 -- On x86 we don't have enough registers to use cmpxchg with a
2022 -- complicated addressing mode, so on that architecture we
2023 -- pre-compute the address first.
2024 Amode amode addr_code <- getSimpleAmode dflags is32Bit addr
2025 newval <- getNewRegNat format
2026 newval_code <- getAnyReg new
2027 oldval <- getNewRegNat format
2028 oldval_code <- getAnyReg old
2029 use_sse2 <- sse2Enabled
2030 let platform = targetPlatform dflags
2031 dst_r = getRegisterReg platform use_sse2 (CmmLocal dst)
2032 code = toOL
2033 [ MOV format (OpReg oldval) (OpReg eax)
2034 , LOCK (CMPXCHG format (OpReg newval) (OpAddr amode))
2035 , MOV format (OpReg eax) (OpReg dst_r)
2036 ]
2037 return $ addr_code `appOL` newval_code newval `appOL` oldval_code oldval
2038 `appOL` code
2039 where
2040 format = intFormat width
2041
2042 genCCall _ is32Bit target dest_regs args = do
2043 dflags <- getDynFlags
2044 let platform = targetPlatform dflags
2045 case (target, dest_regs) of
2046 -- void return type prim op
2047 (PrimTarget op, []) ->
2048 outOfLineCmmOp op Nothing args
2049 -- we only cope with a single result for foreign calls
2050 (PrimTarget op, [r])
2051 | not is32Bit -> outOfLineCmmOp op (Just r) args
2052 | otherwise -> do
2053 l1 <- getNewLabelNat
2054 l2 <- getNewLabelNat
2055 sse2 <- sse2Enabled
2056 if sse2
2057 then
2058 outOfLineCmmOp op (Just r) args
2059 else case op of
2060 MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args
2061 MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args
2062
2063 MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args
2064 MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args
2065
2066 MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args
2067 MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args
2068
2069 MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args
2070 MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args
2071
2072 _other_op -> outOfLineCmmOp op (Just r) args
2073
2074 where
2075 actuallyInlineFloatOp instr format [x]
2076 = do res <- trivialUFCode format (instr format) x
2077 any <- anyReg res
2078 return (any (getRegisterReg platform False (CmmLocal r)))
2079
2080 actuallyInlineFloatOp _ _ args
2081 = panic $ "genCCall.actuallyInlineFloatOp: bad number of arguments! ("
2082 ++ show (length args) ++ ")"
2083
2084 (PrimTarget (MO_S_QuotRem width), _) -> divOp1 platform True width dest_regs args
2085 (PrimTarget (MO_U_QuotRem width), _) -> divOp1 platform False width dest_regs args
2086 (PrimTarget (MO_U_QuotRem2 width), _) -> divOp2 platform False width dest_regs args
2087 (PrimTarget (MO_Add2 width), [res_h, res_l]) ->
2088 case args of
2089 [arg_x, arg_y] ->
2090 do hCode <- getAnyReg (CmmLit (CmmInt 0 width))
2091 let format = intFormat width
2092 lCode <- anyReg =<< trivialCode width (ADD_CC format)
2093 (Just (ADD_CC format)) arg_x arg_y
2094 let reg_l = getRegisterReg platform True (CmmLocal res_l)
2095 reg_h = getRegisterReg platform True (CmmLocal res_h)
2096 code = hCode reg_h `appOL`
2097 lCode reg_l `snocOL`
2098 ADC format (OpImm (ImmInteger 0)) (OpReg reg_h)
2099 return code
2100 _ -> panic "genCCall: Wrong number of arguments/results for add2"
2101 (PrimTarget (MO_SubWordC width), [res_r, res_c]) ->
2102 addSubIntC platform SUB_CC (const Nothing) CARRY width res_r res_c args
2103 (PrimTarget (MO_AddIntC width), [res_r, res_c]) ->
2104 addSubIntC platform ADD_CC (Just . ADD_CC) OFLO width res_r res_c args
2105 (PrimTarget (MO_SubIntC width), [res_r, res_c]) ->
2106 addSubIntC platform SUB_CC (const Nothing) OFLO width res_r res_c args
2107 (PrimTarget (MO_U_Mul2 width), [res_h, res_l]) ->
2108 case args of
2109 [arg_x, arg_y] ->
2110 do (y_reg, y_code) <- getRegOrMem arg_y
2111 x_code <- getAnyReg arg_x
2112 let format = intFormat width
2113 reg_h = getRegisterReg platform True (CmmLocal res_h)
2114 reg_l = getRegisterReg platform True (CmmLocal res_l)
2115 code = y_code `appOL`
2116 x_code rax `appOL`
2117 toOL [MUL2 format y_reg,
2118 MOV format (OpReg rdx) (OpReg reg_h),
2119 MOV format (OpReg rax) (OpReg reg_l)]
2120 return code
2121 _ -> panic "genCCall: Wrong number of arguments/results for mul2"
2122
2123 _ -> if is32Bit
2124 then genCCall32' dflags target dest_regs args
2125 else genCCall64' dflags target dest_regs args
2126
2127 where divOp1 platform signed width results [arg_x, arg_y]
2128 = divOp platform signed width results Nothing arg_x arg_y
2129 divOp1 _ _ _ _ _
2130 = panic "genCCall: Wrong number of arguments for divOp1"
2131 divOp2 platform signed width results [arg_x_high, arg_x_low, arg_y]
2132 = divOp platform signed width results (Just arg_x_high) arg_x_low arg_y
2133 divOp2 _ _ _ _ _
2134 = panic "genCCall: Wrong number of arguments for divOp2"
2135 divOp platform signed width [res_q, res_r]
2136 m_arg_x_high arg_x_low arg_y
2137 = do let format = intFormat width
2138 reg_q = getRegisterReg platform True (CmmLocal res_q)
2139 reg_r = getRegisterReg platform True (CmmLocal res_r)
2140 widen | signed = CLTD format
2141 | otherwise = XOR format (OpReg rdx) (OpReg rdx)
2142 instr | signed = IDIV
2143 | otherwise = DIV
2144 (y_reg, y_code) <- getRegOrMem arg_y
2145 x_low_code <- getAnyReg arg_x_low
2146 x_high_code <- case m_arg_x_high of
2147 Just arg_x_high ->
2148 getAnyReg arg_x_high
2149 Nothing ->
2150 return $ const $ unitOL widen
2151 return $ y_code `appOL`
2152 x_low_code rax `appOL`
2153 x_high_code rdx `appOL`
2154 toOL [instr format y_reg,
2155 MOV format (OpReg rax) (OpReg reg_q),
2156 MOV format (OpReg rdx) (OpReg reg_r)]
2157 divOp _ _ _ _ _ _ _
2158 = panic "genCCall: Wrong number of results for divOp"
2159
2160 addSubIntC platform instr mrevinstr cond width
2161 res_r res_c [arg_x, arg_y]
2162 = do let format = intFormat width
2163 rCode <- anyReg =<< trivialCode width (instr format)
2164 (mrevinstr format) arg_x arg_y
2165 reg_tmp <- getNewRegNat II8
2166 let reg_c = getRegisterReg platform True (CmmLocal res_c)
2167 reg_r = getRegisterReg platform True (CmmLocal res_r)
2168 code = rCode reg_r `snocOL`
2169 SETCC cond (OpReg reg_tmp) `snocOL`
2170 MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c)
2171
2172 return code
2173 addSubIntC _ _ _ _ _ _ _ _
2174 = panic "genCCall: Wrong number of arguments/results for addSubIntC"
2175
2176 genCCall32' :: DynFlags
2177 -> ForeignTarget -- function to call
2178 -> [CmmFormal] -- where to put the result
2179 -> [CmmActual] -- arguments (of mixed type)
2180 -> NatM InstrBlock
2181 genCCall32' dflags target dest_regs args = do
2182 let
2183 prom_args = map (maybePromoteCArg dflags W32) args
2184
2185 -- Align stack to 16n for calls, assuming a starting stack
2186 -- alignment of 16n - word_size on procedure entry. Which we
2187 -- maintiain. See Note [rts/StgCRun.c : Stack Alignment on X86]
2188 sizes = map (arg_size . cmmExprType dflags) (reverse args)
2189 raw_arg_size = sum sizes + wORD_SIZE dflags
2190 arg_pad_size = (roundTo 16 $ raw_arg_size) - raw_arg_size
2191 tot_arg_size = raw_arg_size + arg_pad_size - wORD_SIZE dflags
2192 delta0 <- getDeltaNat
2193 setDeltaNat (delta0 - arg_pad_size)
2194
2195 use_sse2 <- sse2Enabled
2196 push_codes <- mapM (push_arg use_sse2) (reverse prom_args)
2197 delta <- getDeltaNat
2198 MASSERT(delta == delta0 - tot_arg_size)
2199
2200 -- deal with static vs dynamic call targets
2201 (callinsns,cconv) <-
2202 case target of
2203 ForeignTarget (CmmLit (CmmLabel lbl)) conv
2204 -> -- ToDo: stdcall arg sizes
2205 return (unitOL (CALL (Left fn_imm) []), conv)
2206 where fn_imm = ImmCLbl lbl
2207 ForeignTarget expr conv
2208 -> do { (dyn_r, dyn_c) <- getSomeReg expr
2209 ; ASSERT( isWord32 (cmmExprType dflags expr) )
2210 return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
2211 PrimTarget _
2212 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
2213 ++ "probably because too many return values."
2214
2215 let push_code
2216 | arg_pad_size /= 0
2217 = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
2218 DELTA (delta0 - arg_pad_size)]
2219 `appOL` concatOL push_codes
2220 | otherwise
2221 = concatOL push_codes
2222
2223 -- Deallocate parameters after call for ccall;
2224 -- but not for stdcall (callee does it)
2225 --
2226 -- We have to pop any stack padding we added
2227 -- even if we are doing stdcall, though (#5052)
2228 pop_size
2229 | ForeignConvention StdCallConv _ _ _ <- cconv = arg_pad_size
2230 | otherwise = tot_arg_size
2231
2232 call = callinsns `appOL`
2233 toOL (
2234 (if pop_size==0 then [] else
2235 [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)])
2236 ++
2237 [DELTA delta0]
2238 )
2239 setDeltaNat delta0
2240
2241 dflags <- getDynFlags
2242 let platform = targetPlatform dflags
2243
2244 let
2245 -- assign the results, if necessary
2246 assign_code [] = nilOL
2247 assign_code [dest]
2248 | isFloatType ty =
2249 if use_sse2
2250 then let tmp_amode = AddrBaseIndex (EABaseReg esp)
2251 EAIndexNone
2252 (ImmInt 0)
2253 fmt = floatFormat w
2254 in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
2255 DELTA (delta0 - b),
2256 GST fmt fake0 tmp_amode,
2257 MOV fmt (OpAddr tmp_amode) (OpReg r_dest),
2258 ADD II32 (OpImm (ImmInt b)) (OpReg esp),
2259 DELTA delta0]
2260 else unitOL (GMOV fake0 r_dest)
2261 | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
2262 MOV II32 (OpReg edx) (OpReg r_dest_hi)]
2263 | otherwise = unitOL (MOV (intFormat w)
2264 (OpReg eax)
2265 (OpReg r_dest))
2266 where
2267 ty = localRegType dest
2268 w = typeWidth ty
2269 b = widthInBytes w
2270 r_dest_hi = getHiVRegFromLo r_dest
2271 r_dest = getRegisterReg platform use_sse2 (CmmLocal dest)
2272 assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
2273
2274 return (push_code `appOL`
2275 call `appOL`
2276 assign_code dest_regs)
2277
2278 where
2279 arg_size :: CmmType -> Int -- Width in bytes
2280 arg_size ty = widthInBytes (typeWidth ty)
2281
2282 roundTo a x | x `mod` a == 0 = x
2283 | otherwise = x + a - (x `mod` a)
2284
2285 push_arg :: Bool -> CmmActual {-current argument-}
2286 -> NatM InstrBlock -- code
2287
2288 push_arg use_sse2 arg -- we don't need the hints on x86
2289 | isWord64 arg_ty = do
2290 ChildCode64 code r_lo <- iselExpr64 arg
2291 delta <- getDeltaNat
2292 setDeltaNat (delta - 8)
2293 let r_hi = getHiVRegFromLo r_lo
2294 return ( code `appOL`
2295 toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
2296 PUSH II32 (OpReg r_lo), DELTA (delta - 8),
2297 DELTA (delta-8)]
2298 )
2299
2300 | isFloatType arg_ty = do
2301 (reg, code) <- getSomeReg arg
2302 delta <- getDeltaNat
2303 setDeltaNat (delta-size)
2304 return (code `appOL`
2305 toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
2306 DELTA (delta-size),
2307 let addr = AddrBaseIndex (EABaseReg esp)
2308 EAIndexNone
2309 (ImmInt 0)
2310 format = floatFormat (typeWidth arg_ty)
2311 in
2312 if use_sse2
2313 then MOV format (OpReg reg) (OpAddr addr)
2314 else GST format reg addr
2315 ]
2316 )
2317
2318 | otherwise = do
2319 (operand, code) <- getOperand arg
2320 delta <- getDeltaNat
2321 setDeltaNat (delta-size)
2322 return (code `snocOL`
2323 PUSH II32 operand `snocOL`
2324 DELTA (delta-size))
2325
2326 where
2327 arg_ty = cmmExprType dflags arg
2328 size = arg_size arg_ty -- Byte size
2329
2330 genCCall64' :: DynFlags
2331 -> ForeignTarget -- function to call
2332 -> [CmmFormal] -- where to put the result
2333 -> [CmmActual] -- arguments (of mixed type)
2334 -> NatM InstrBlock
2335 genCCall64' dflags target dest_regs args = do
2336 -- load up the register arguments
2337 let prom_args = map (maybePromoteCArg dflags W32) args
2338
2339 (stack_args, int_regs_used, fp_regs_used, load_args_code, assign_args_code)
2340 <-
2341 if platformOS platform == OSMinGW32
2342 then load_args_win prom_args [] [] (allArgRegs platform) nilOL
2343 else do
2344 (stack_args, aregs, fregs, load_args_code, assign_args_code)
2345 <- load_args prom_args (allIntArgRegs platform)
2346 (allFPArgRegs platform)
2347 nilOL nilOL
2348 let used_regs rs as = reverse (drop (length rs) (reverse as))
2349 fregs_used = used_regs fregs (allFPArgRegs platform)
2350 aregs_used = used_regs aregs (allIntArgRegs platform)
2351 return (stack_args, aregs_used, fregs_used, load_args_code
2352 , assign_args_code)
2353
2354 let
2355 arg_regs_used = int_regs_used ++ fp_regs_used
2356 arg_regs = [eax] ++ arg_regs_used
2357 -- for annotating the call instruction with
2358 sse_regs = length fp_regs_used
2359 arg_stack_slots = if platformOS platform == OSMinGW32
2360 then length stack_args + length (allArgRegs platform)
2361 else length stack_args
2362 tot_arg_size = arg_size * arg_stack_slots
2363
2364
2365 -- Align stack to 16n for calls, assuming a starting stack
2366 -- alignment of 16n - word_size on procedure entry. Which we
2367 -- maintain. See Note [rts/StgCRun.c : Stack Alignment on X86]
2368 (real_size, adjust_rsp) <-
2369 if (tot_arg_size + wORD_SIZE dflags) `rem` 16 == 0
2370 then return (tot_arg_size, nilOL)
2371 else do -- we need to adjust...
2372 delta <- getDeltaNat
2373 setDeltaNat (delta - wORD_SIZE dflags)
2374 return (tot_arg_size + wORD_SIZE dflags, toOL [
2375 SUB II64 (OpImm (ImmInt (wORD_SIZE dflags))) (OpReg rsp),
2376 DELTA (delta - wORD_SIZE dflags) ])
2377
2378 -- push the stack args, right to left
2379 push_code <- push_args (reverse stack_args) nilOL
2380 -- On Win64, we also have to leave stack space for the arguments
2381 -- that we are passing in registers
2382 lss_code <- if platformOS platform == OSMinGW32
2383 then leaveStackSpace (length (allArgRegs platform))
2384 else return nilOL
2385 delta <- getDeltaNat
2386
2387 -- deal with static vs dynamic call targets
2388 (callinsns,_cconv) <-
2389 case target of
2390 ForeignTarget (CmmLit (CmmLabel lbl)) conv
2391 -> -- ToDo: stdcall arg sizes
2392 return (unitOL (CALL (Left fn_imm) arg_regs), conv)
2393 where fn_imm = ImmCLbl lbl
2394 ForeignTarget expr conv
2395 -> do (dyn_r, dyn_c) <- getSomeReg expr
2396 return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
2397 PrimTarget _
2398 -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
2399 ++ "probably because too many return values."
2400
2401 let
2402 -- The x86_64 ABI requires us to set %al to the number of SSE2
2403 -- registers that contain arguments, if the called routine
2404 -- is a varargs function. We don't know whether it's a
2405 -- varargs function or not, so we have to assume it is.
2406 --
2407 -- It's not safe to omit this assignment, even if the number
2408 -- of SSE2 regs in use is zero. If %al is larger than 8
2409 -- on entry to a varargs function, seg faults ensue.
2410 assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
2411
2412 let call = callinsns `appOL`
2413 toOL (
2414 -- Deallocate parameters after call for ccall;
2415 -- stdcall has callee do it, but is not supported on
2416 -- x86_64 target (see #3336)
2417 (if real_size==0 then [] else
2418 [ADD (intFormat (wordWidth dflags)) (OpImm (ImmInt real_size)) (OpReg esp)])
2419 ++
2420 [DELTA (delta + real_size)]
2421 )
2422 setDeltaNat (delta + real_size)
2423
2424 let
2425 -- assign the results, if necessary
2426 assign_code [] = nilOL
2427 assign_code [dest] =
2428 case typeWidth rep of
2429 W32 | isFloatType rep -> unitOL (MOV (floatFormat W32)
2430 (OpReg xmm0)
2431 (OpReg r_dest))
2432 W64 | isFloatType rep -> unitOL (MOV (floatFormat W64)
2433 (OpReg xmm0)
2434 (OpReg r_dest))
2435 _ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest))
2436 where
2437 rep = localRegType dest
2438 r_dest = getRegisterReg platform True (CmmLocal dest)
2439 assign_code _many = panic "genCCall.assign_code many"
2440
2441 return (adjust_rsp `appOL`
2442 push_code `appOL`
2443 load_args_code `appOL`
2444 assign_args_code `appOL`
2445 lss_code `appOL`
2446 assign_eax sse_regs `appOL`
2447 call `appOL`
2448 assign_code dest_regs)
2449
2450 where platform = targetPlatform dflags
2451 arg_size = 8 -- always, at the mo
2452
2453
2454 load_args :: [CmmExpr]
2455 -> [Reg] -- int regs avail for args
2456 -> [Reg] -- FP regs avail for args
2457 -> InstrBlock -- code computing args
2458 -> InstrBlock -- code assigning args to ABI regs
2459 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock,InstrBlock)
2460 -- no more regs to use
2461 load_args args [] [] code acode =
2462 return (args, [], [], code, acode)
2463
2464 -- no more args to push
2465 load_args [] aregs fregs code acode =
2466 return ([], aregs, fregs, code, acode)
2467
2468 load_args (arg : rest) aregs fregs code acode
2469 | isFloatType arg_rep = case fregs of
2470 [] -> push_this_arg
2471 (r:rs) -> do
2472 (code',acode') <- reg_this_arg r
2473 load_args rest aregs rs code' acode'
2474 | otherwise = case aregs of
2475 [] -> push_this_arg
2476 (r:rs) -> do
2477 (code',acode') <- reg_this_arg r
2478 load_args rest rs fregs code' acode'
2479 where
2480
2481 -- put arg into the list of stack pushed args
2482 push_this_arg = do
2483 (args',ars,frs,code',acode')
2484 <- load_args rest aregs fregs code acode
2485 return (arg:args', ars, frs, code', acode')
2486
2487 -- pass the arg into the given register
2488 reg_this_arg r
2489 -- "operand" args can be directly assigned into r
2490 | isOperand False arg = do
2491 arg_code <- getAnyReg arg
2492 return (code, (acode `appOL` arg_code r))
2493 -- The last non-operand arg can be directly assigned after its
2494 -- computation without going into a temporary register
2495 | all (isOperand False) rest = do
2496 arg_code <- getAnyReg arg
2497 return (code `appOL` arg_code r,acode)
2498
2499 -- other args need to be computed beforehand to avoid clobbering
2500 -- previously assigned registers used to pass parameters (see
2501 -- #11792, #12614). They are assigned into temporary registers
2502 -- and get assigned to proper call ABI registers after they all
2503 -- have been computed.
2504 | otherwise = do
2505 arg_code <- getAnyReg arg
2506 tmp <- getNewRegNat arg_fmt
2507 let
2508 code' = code `appOL` arg_code tmp
2509 acode' = acode `snocOL` reg2reg arg_fmt tmp r
2510 return (code',acode')
2511
2512 arg_rep = cmmExprType dflags arg
2513 arg_fmt = cmmTypeFormat arg_rep
2514
2515 load_args_win :: [CmmExpr]
2516 -> [Reg] -- used int regs
2517 -> [Reg] -- used FP regs
2518 -> [(Reg, Reg)] -- (int, FP) regs avail for args
2519 -> InstrBlock
2520 -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock,InstrBlock)
2521 load_args_win args usedInt usedFP [] code
2522 = return (args, usedInt, usedFP, code, nilOL)
2523 -- no more regs to use
2524 load_args_win [] usedInt usedFP _ code
2525 = return ([], usedInt, usedFP, code, nilOL)
2526 -- no more args to push
2527 load_args_win (arg : rest) usedInt usedFP
2528 ((ireg, freg) : regs) code
2529 | isFloatType arg_rep = do
2530 arg_code <- getAnyReg arg
2531 load_args_win rest (ireg : usedInt) (freg : usedFP) regs
2532 (code `appOL`
2533 arg_code freg `snocOL`
2534 -- If we are calling a varargs function
2535 -- then we need to define ireg as well
2536 -- as freg
2537 MOV II64 (OpReg freg) (OpReg ireg))
2538 | otherwise = do
2539 arg_code <- getAnyReg arg
2540 load_args_win rest (ireg : usedInt) usedFP regs
2541 (code `appOL` arg_code ireg)
2542 where
2543 arg_rep = cmmExprType dflags arg
2544
2545 push_args [] code = return code
2546 push_args (arg:rest) code
2547 | isFloatType arg_rep = do
2548 (arg_reg, arg_code) <- getSomeReg arg
2549 delta <- getDeltaNat
2550 setDeltaNat (delta-arg_size)
2551 let code' = code `appOL` arg_code `appOL` toOL [
2552 SUB (intFormat (wordWidth dflags)) (OpImm (ImmInt arg_size)) (OpReg rsp),
2553 DELTA (delta-arg_size),
2554 MOV (floatFormat width) (OpReg arg_reg) (OpAddr (spRel dflags 0))]
2555 push_args rest code'
2556
2557 | otherwise = do
2558 ASSERT(width == W64) return ()
2559 (arg_op, arg_code) <- getOperand arg
2560 delta <- getDeltaNat
2561 setDeltaNat (delta-arg_size)
2562 let code' = code `appOL` arg_code `appOL` toOL [
2563 PUSH II64 arg_op,
2564 DELTA (delta-arg_size)]
2565 push_args rest code'
2566 where
2567 arg_rep = cmmExprType dflags arg
2568 width = typeWidth arg_rep
2569
2570 leaveStackSpace n = do
2571 delta <- getDeltaNat
2572 setDeltaNat (delta - n * arg_size)
2573 return $ toOL [
2574 SUB II64 (OpImm (ImmInt (n * wORD_SIZE dflags))) (OpReg rsp),
2575 DELTA (delta - n * arg_size)]
2576
2577 maybePromoteCArg :: DynFlags -> Width -> CmmExpr -> CmmExpr
2578 maybePromoteCArg dflags wto arg
2579 | wfrom < wto = CmmMachOp (MO_UU_Conv wfrom wto) [arg]
2580 | otherwise = arg
2581 where
2582 wfrom = cmmExprWidth dflags arg
2583
2584 outOfLineCmmOp :: CallishMachOp -> Maybe CmmFormal -> [CmmActual] -> NatM InstrBlock
2585 outOfLineCmmOp mop res args
2586 = do
2587 dflags <- getDynFlags
2588 targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
2589 let target = ForeignTarget targetExpr
2590 (ForeignConvention CCallConv [] [] CmmMayReturn)
2591
2592 stmtToInstrs (CmmUnsafeForeignCall target (catMaybes [res]) args)
2593 where
2594 -- Assume we can call these functions directly, and that they're not in a dynamic library.
2595 -- TODO: Why is this ok? Under linux this code will be in libm.so
2596 -- Is is because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31
2597 lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
2598
2599 fn = case mop of
2600 MO_F32_Sqrt -> fsLit "sqrtf"
2601 MO_F32_Sin -> fsLit "sinf"
2602 MO_F32_Cos -> fsLit "cosf"
2603 MO_F32_Tan -> fsLit "tanf"
2604 MO_F32_Exp -> fsLit "expf"
2605 MO_F32_Log -> fsLit "logf"
2606
2607 MO_F32_Asin -> fsLit "asinf"
2608 MO_F32_Acos -> fsLit "acosf"
2609 MO_F32_Atan -> fsLit "atanf"
2610
2611 MO_F32_Sinh -> fsLit "sinhf"
2612 MO_F32_Cosh -> fsLit "coshf"
2613 MO_F32_Tanh -> fsLit "tanhf"
2614 MO_F32_Pwr -> fsLit "powf"
2615
2616 MO_F64_Sqrt -> fsLit "sqrt"
2617 MO_F64_Sin -> fsLit "sin"
2618 MO_F64_Cos -> fsLit "cos"
2619 MO_F64_Tan -> fsLit "tan"
2620 MO_F64_Exp -> fsLit "exp"
2621 MO_F64_Log -> fsLit "log"
2622
2623 MO_F64_Asin -> fsLit "asin"
2624 MO_F64_Acos -> fsLit "acos"
2625 MO_F64_Atan -> fsLit "atan"
2626
2627 MO_F64_Sinh -> fsLit "sinh"
2628 MO_F64_Cosh -> fsLit "cosh"
2629 MO_F64_Tanh -> fsLit "tanh"
2630 MO_F64_Pwr -> fsLit "pow"
2631
2632 MO_Memcpy _ -> fsLit "memcpy"
2633 MO_Memset _ -> fsLit "memset"
2634 MO_Memmove _ -> fsLit "memmove"
2635
2636 MO_PopCnt _ -> fsLit "popcnt"
2637 MO_BSwap _ -> fsLit "bswap"
2638 MO_Clz w -> fsLit $ clzLabel w
2639 MO_Ctz _ -> unsupported
2640
2641 MO_AtomicRMW _ _ -> fsLit "atomicrmw"
2642 MO_AtomicRead _ -> fsLit "atomicread"
2643 MO_AtomicWrite _ -> fsLit "atomicwrite"
2644 MO_Cmpxchg _ -> fsLit "cmpxchg"
2645
2646 MO_UF_Conv _ -> unsupported
2647
2648 MO_S_QuotRem {} -> unsupported
2649 MO_U_QuotRem {} -> unsupported
2650 MO_U_QuotRem2 {} -> unsupported
2651 MO_Add2 {} -> unsupported
2652 MO_AddIntC {} -> unsupported
2653 MO_SubIntC {} -> unsupported
2654 MO_SubWordC {} -> unsupported
2655 MO_U_Mul2 {} -> unsupported
2656 MO_WriteBarrier -> unsupported
2657 MO_Touch -> unsupported
2658 (MO_Prefetch_Data _ ) -> unsupported
2659 unsupported = panic ("outOfLineCmmOp: " ++ show mop
2660 ++ " not supported here")
2661
2662 -- -----------------------------------------------------------------------------
2663 -- Generating a table-branch
2664
2665 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
2666
2667 genSwitch dflags expr targets
2668 | gopt Opt_PIC dflags
2669 = do
2670 (reg,e_code) <- getNonClobberedReg (cmmOffset dflags expr offset)
2671 -- getNonClobberedReg because it needs to survive across t_code
2672 lbl <- getNewLabelNat
2673 dflags <- getDynFlags
2674 let is32bit = target32Bit (targetPlatform dflags)
2675 os = platformOS (targetPlatform dflags)
2676 -- Might want to use .rodata.<function we're in> instead, but as
2677 -- long as it's something unique it'll work out since the
2678 -- references to the jump table are in the appropriate section.
2679 rosection = case os of
2680 -- on Mac OS X/x86_64, put the jump table in the text section to
2681 -- work around a limitation of the linker.
2682 -- ld64 is unable to handle the relocations for
2683 -- .quad L1 - L0
2684 -- if L0 is not preceded by a non-anonymous label in its section.
2685 OSDarwin | not is32bit -> Section Text lbl
2686 _ -> Section ReadOnlyData lbl
2687 dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2688 (tableReg,t_code) <- getSomeReg $ dynRef
2689 let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
2690 (EAIndex reg (wORD_SIZE dflags)) (ImmInt 0))
2691
2692 offsetReg <- getNewRegNat (intFormat (wordWidth dflags))
2693 return $ if is32bit || os == OSDarwin
2694 then e_code `appOL` t_code `appOL` toOL [
2695 ADD (intFormat (wordWidth dflags)) op (OpReg tableReg),
2696 JMP_TBL (OpReg tableReg) ids rosection lbl
2697 ]
2698 else -- HACK: On x86_64 binutils<2.17 is only able to generate
2699 -- PC32 relocations, hence we only get 32-bit offsets in
2700 -- the jump table. As these offsets are always negative
2701 -- we need to properly sign extend them to 64-bit. This
2702 -- hack should be removed in conjunction with the hack in
2703 -- PprMach.hs/pprDataItem once binutils 2.17 is standard.
2704 e_code `appOL` t_code `appOL` toOL [
2705 MOVSxL II32 op (OpReg offsetReg),
2706 ADD (intFormat (wordWidth dflags))
2707 (OpReg offsetReg)
2708 (OpReg tableReg),
2709 JMP_TBL (OpReg tableReg) ids rosection lbl
2710 ]
2711 | otherwise
2712 = do
2713 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2714 lbl <- getNewLabelNat
2715 let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg (wORD_SIZE dflags)) (ImmCLbl lbl))
2716 code = e_code `appOL` toOL [
2717 JMP_TBL op ids (Section ReadOnlyData lbl) lbl
2718 ]
2719 return code
2720 where (offset, ids) = switchTargetsToTable targets
2721
2722 generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl (Alignment, CmmStatics) Instr)
2723 generateJumpTableForInstr dflags (JMP_TBL _ ids section lbl)
2724 = Just (createJumpTable dflags ids section lbl)
2725 generateJumpTableForInstr _ _ = Nothing
2726
2727 createJumpTable :: DynFlags -> [Maybe BlockId] -> Section -> CLabel
2728 -> GenCmmDecl (Alignment, CmmStatics) h g
2729 createJumpTable dflags ids section lbl
2730 = let jumpTable
2731 | gopt Opt_PIC dflags =
2732 let jumpTableEntryRel Nothing
2733 = CmmStaticLit (CmmInt 0 (wordWidth dflags))
2734 jumpTableEntryRel (Just blockid)
2735 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
2736 where blockLabel = mkAsmTempLabel (getUnique blockid)
2737 in map jumpTableEntryRel ids
2738 | otherwise = map (jumpTableEntry dflags) ids
2739 in CmmData section (1, Statics lbl jumpTable)
2740
2741 extractUnwindPoints :: [Instr] -> [UnwindPoint]
2742 extractUnwindPoints instrs =
2743 [ UnwindPoint lbl unwinds | UNWIND lbl unwinds <- instrs]
2744
2745 -- -----------------------------------------------------------------------------
2746 -- 'condIntReg' and 'condFltReg': condition codes into registers
2747
2748 -- Turn those condition codes into integers now (when they appear on
2749 -- the right hand side of an assignment).
2750 --
2751 -- (If applicable) Do not fill the delay slots here; you will confuse the
2752 -- register allocator.
2753
2754 condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2755
2756 condIntReg cond x y = do
2757 CondCode _ cond cond_code <- condIntCode cond x y
2758 tmp <- getNewRegNat II8
2759 let
2760 code dst = cond_code `appOL` toOL [
2761 SETCC cond (OpReg tmp),
2762 MOVZxL II8 (OpReg tmp) (OpReg dst)
2763 ]
2764 return (Any II32 code)
2765
2766
2767
2768 condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
2769 condFltReg is32Bit cond x y = if_sse2 condFltReg_sse2 condFltReg_x87
2770 where
2771 condFltReg_x87 = do
2772 CondCode _ cond cond_code <- condFltCode cond x y
2773 tmp <- getNewRegNat II8
2774 let
2775 code dst = cond_code `appOL` toOL [
2776 SETCC cond (OpReg tmp),
2777 MOVZxL II8 (OpReg tmp) (OpReg dst)
2778 ]
2779 return (Any II32 code)
2780
2781 condFltReg_sse2 = do
2782 CondCode _ cond cond_code <- condFltCode cond x y
2783 tmp1 <- getNewRegNat (archWordFormat is32Bit)
2784 tmp2 <- getNewRegNat (archWordFormat is32Bit)
2785 let
2786 -- We have to worry about unordered operands (eg. comparisons
2787 -- against NaN). If the operands are unordered, the comparison
2788 -- sets the parity flag, carry flag and zero flag.
2789 -- All comparisons are supposed to return false for unordered
2790 -- operands except for !=, which returns true.
2791 --
2792 -- Optimisation: we don't have to test the parity flag if we
2793 -- know the test has already excluded the unordered case: eg >
2794 -- and >= test for a zero carry flag, which can only occur for
2795 -- ordered operands.
2796 --
2797 -- ToDo: by reversing comparisons we could avoid testing the
2798 -- parity flag in more cases.
2799
2800 code dst =
2801 cond_code `appOL`
2802 (case cond of
2803 NE -> or_unordered dst
2804 GU -> plain_test dst
2805 GEU -> plain_test dst
2806 _ -> and_ordered dst)
2807
2808 plain_test dst = toOL [
2809 SETCC cond (OpReg tmp1),
2810 MOVZxL II8 (OpReg tmp1) (OpReg dst)
2811 ]
2812 or_unordered dst = toOL [
2813 SETCC cond (OpReg tmp1),
2814 SETCC PARITY (OpReg tmp2),
2815 OR II8 (OpReg tmp1) (OpReg tmp2),
2816 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2817 ]
2818 and_ordered dst = toOL [
2819 SETCC cond (OpReg tmp1),
2820 SETCC NOTPARITY (OpReg tmp2),
2821 AND II8 (OpReg tmp1) (OpReg tmp2),
2822 MOVZxL II8 (OpReg tmp2) (OpReg dst)
2823 ]
2824 return (Any II32 code)
2825
2826
2827 -- -----------------------------------------------------------------------------
2828 -- 'trivial*Code': deal with trivial instructions
2829
2830 -- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2831 -- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2832 -- Only look for constants on the right hand side, because that's
2833 -- where the generic optimizer will have put them.
2834
2835 -- Similarly, for unary instructions, we don't have to worry about
2836 -- matching an StInt as the argument, because genericOpt will already
2837 -- have handled the constant-folding.
2838
2839
2840 {-
2841 The Rules of the Game are:
2842
2843 * You cannot assume anything about the destination register dst;
2844 it may be anything, including a fixed reg.
2845
2846 * You may compute an operand into a fixed reg, but you may not
2847 subsequently change the contents of that fixed reg. If you
2848 want to do so, first copy the value either to a temporary
2849 or into dst. You are free to modify dst even if it happens
2850 to be a fixed reg -- that's not your problem.
2851
2852 * You cannot assume that a fixed reg will stay live over an
2853 arbitrary computation. The same applies to the dst reg.
2854
2855 * Temporary regs obtained from getNewRegNat are distinct from
2856 each other and from all other regs, and stay live over
2857 arbitrary computations.
2858
2859 --------------------
2860
2861 SDM's version of The Rules:
2862
2863 * If getRegister returns Any, that means it can generate correct
2864 code which places the result in any register, period. Even if that
2865 register happens to be read during the computation.
2866
2867 Corollary #1: this means that if you are generating code for an
2868 operation with two arbitrary operands, you cannot assign the result
2869 of the first operand into the destination register before computing
2870 the second operand. The second operand might require the old value
2871 of the destination register.
2872
2873 Corollary #2: A function might be able to generate more efficient
2874 code if it knows the destination register is a new temporary (and
2875 therefore not read by any of the sub-computations).
2876
2877 * If getRegister returns Any, then the code it generates may modify only:
2878 (a) fresh temporaries
2879 (b) the destination register
2880 (c) known registers (eg. %ecx is used by shifts)
2881 In particular, it may *not* modify global registers, unless the global
2882 register happens to be the destination register.
2883 -}
2884
2885 trivialCode :: Width -> (Operand -> Operand -> Instr)
2886 -> Maybe (Operand -> Operand -> Instr)
2887 -> CmmExpr -> CmmExpr -> NatM Register
2888 trivialCode width instr m a b
2889 = do is32Bit <- is32BitPlatform
2890 trivialCode' is32Bit width instr m a b
2891
2892 trivialCode' :: Bool -> Width -> (Operand -> Operand -> Instr)
2893 -> Maybe (Operand -> Operand -> Instr)
2894 -> CmmExpr -> CmmExpr -> NatM Register
2895 trivialCode' is32Bit width _ (Just revinstr) (CmmLit lit_a) b
2896 | is32BitLit is32Bit lit_a = do
2897 b_code <- getAnyReg b
2898 let
2899 code dst
2900 = b_code dst `snocOL`
2901 revinstr (OpImm (litToImm lit_a)) (OpReg dst)
2902 return (Any (intFormat width) code)
2903
2904 trivialCode' _ width instr _ a b
2905 = genTrivialCode (intFormat width) instr a b
2906
2907 -- This is re-used for floating pt instructions too.
2908 genTrivialCode :: Format -> (Operand -> Operand -> Instr)
2909 -> CmmExpr -> CmmExpr -> NatM Register
2910 genTrivialCode rep instr a b = do
2911 (b_op, b_code) <- getNonClobberedOperand b
2912 a_code <- getAnyReg a
2913 tmp <- getNewRegNat rep
2914 let
2915 -- We want the value of b to stay alive across the computation of a.
2916 -- But, we want to calculate a straight into the destination register,
2917 -- because the instruction only has two operands (dst := dst `op` src).
2918 -- The troublesome case is when the result of b is in the same register
2919 -- as the destination reg. In this case, we have to save b in a
2920 -- new temporary across the computation of a.
2921 code dst
2922 | dst `regClashesWithOp` b_op =
2923 b_code `appOL`
2924 unitOL (MOV rep b_op (OpReg tmp)) `appOL`
2925 a_code dst `snocOL`
2926 instr (OpReg tmp) (OpReg dst)
2927 | otherwise =
2928 b_code `appOL`
2929 a_code dst `snocOL`
2930 instr b_op (OpReg dst)
2931 return (Any rep code)
2932
2933 regClashesWithOp :: Reg -> Operand -> Bool
2934 reg `regClashesWithOp` OpReg reg2 = reg == reg2
2935 reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
2936 _ `regClashesWithOp` _ = False
2937
2938 -----------
2939
2940 trivialUCode :: Format -> (Operand -> Instr)
2941 -> CmmExpr -> NatM Register
2942 trivialUCode rep instr x = do
2943 x_code <- getAnyReg x
2944 let
2945 code dst =
2946 x_code dst `snocOL`
2947 instr (OpReg dst)
2948 return (Any rep code)
2949
2950 -----------
2951
2952 trivialFCode_x87 :: (Format -> Reg -> Reg -> Reg -> Instr)
2953 -> CmmExpr -> CmmExpr -> NatM Register
2954 trivialFCode_x87 instr x y = do
2955 (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too
2956 (y_reg, y_code) <- getSomeReg y
2957 let
2958 format = FF80 -- always, on x87
2959 code dst =
2960 x_code `appOL`
2961 y_code `snocOL`
2962 instr format x_reg y_reg dst
2963 return (Any format code)
2964
2965 trivialFCode_sse2 :: Width -> (Format -> Operand -> Operand -> Instr)
2966 -> CmmExpr -> CmmExpr -> NatM Register
2967 trivialFCode_sse2 pk instr x y
2968 = genTrivialCode format (instr format) x y
2969 where format = floatFormat pk
2970
2971
2972 trivialUFCode :: Format -> (Reg -> Reg -> Instr) -> CmmExpr -> NatM Register
2973 trivialUFCode format instr x = do
2974 (x_reg, x_code) <- getSomeReg x
2975 let
2976 code dst =
2977 x_code `snocOL`
2978 instr x_reg dst
2979 return (Any format code)
2980
2981
2982 --------------------------------------------------------------------------------
2983 coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2984 coerceInt2FP from to x = if_sse2 coerce_sse2 coerce_x87
2985 where
2986 coerce_x87 = do
2987 (x_reg, x_code) <- getSomeReg x
2988 let
2989 opc = case to of W32 -> GITOF; W64 -> GITOD;
2990 n -> panic $ "coerceInt2FP.x87: unhandled width ("
2991 ++ show n ++ ")"
2992 code dst = x_code `snocOL` opc x_reg dst
2993 -- ToDo: works for non-II32 reps?
2994 return (Any FF80 code)
2995
2996 coerce_sse2 = do
2997 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
2998 let
2999 opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
3000 n -> panic $ "coerceInt2FP.sse: unhandled width ("
3001 ++ show n ++ ")"
3002 code dst = x_code `snocOL` opc (intFormat from) x_op dst
3003 return (Any (floatFormat to) code)
3004 -- works even if the destination rep is <II32
3005
3006 --------------------------------------------------------------------------------
3007 coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
3008 coerceFP2Int from to x = if_sse2 coerceFP2Int_sse2 coerceFP2Int_x87
3009 where
3010 coerceFP2Int_x87 = do
3011 (x_reg, x_code) <- getSomeReg x
3012 let
3013 opc = case from of W32 -> GFTOI; W64 -> GDTOI
3014 n -> panic $ "coerceFP2Int.x87: unhandled width ("
3015 ++ show n ++ ")"
3016 code dst = x_code `snocOL` opc x_reg dst
3017 -- ToDo: works for non-II32 reps?
3018 return (Any (intFormat to) code)
3019
3020 coerceFP2Int_sse2 = do
3021 (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
3022 let
3023 opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ;
3024 n -> panic $ "coerceFP2Init.sse: unhandled width ("
3025 ++ show n ++ ")"
3026 code dst = x_code `snocOL` opc (intFormat to) x_op dst
3027 return (Any (intFormat to) code)
3028 -- works even if the destination rep is <II32
3029
3030
3031 --------------------------------------------------------------------------------
3032 coerceFP2FP :: Width -> CmmExpr -> NatM Register
3033 coerceFP2FP to x = do
3034 use_sse2 <- sse2Enabled
3035 (x_reg, x_code) <- getSomeReg x
3036 let
3037 opc | use_sse2 = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
3038 n -> panic $ "coerceFP2FP: unhandled width ("
3039 ++ show n ++ ")"
3040 | otherwise = GDTOF
3041 code dst = x_code `snocOL` opc x_reg dst
3042 return (Any (if use_sse2 then floatFormat to else FF80) code)
3043
3044 --------------------------------------------------------------------------------
3045
3046 sse2NegCode :: Width -> CmmExpr -> NatM Register
3047 sse2NegCode w x = do
3048 let fmt = floatFormat w
3049 x_code <- getAnyReg x
3050 -- This is how gcc does it, so it can't be that bad:
3051 let
3052 const | FF32 <- fmt = CmmInt 0x80000000 W32
3053 | otherwise = CmmInt 0x8000000000000000 W64
3054 Amode amode amode_code <- memConstant (widthInBytes w) const
3055 tmp <- getNewRegNat fmt
3056 let
3057 code dst = x_code dst `appOL` amode_code `appOL` toOL [
3058 MOV fmt (OpAddr amode) (OpReg tmp),
3059 XOR fmt (OpReg tmp) (OpReg dst)
3060 ]
3061 --
3062 return (Any fmt code)
3063
3064 isVecExpr :: CmmExpr -> Bool
3065 isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True
3066 isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True
3067 isVecExpr (CmmMachOp (MO_V_Add {}) _) = True
3068 isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True
3069 isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True
3070 isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True
3071 isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True
3072 isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True
3073 isVecExpr (CmmMachOp (MO_VF_Insert {}) _) = True
3074 isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True
3075 isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
3076 isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True
3077 isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True
3078 isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True
3079 isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True
3080 isVecExpr (CmmMachOp _ [e]) = isVecExpr e
3081 isVecExpr _ = False
3082
3083 needLlvm :: NatM a
3084 needLlvm =
3085 sorry $ unlines ["The native code generator does not support vector"
3086 ,"instructions. Please use -fllvm."]