Add new mbmi and mbmi2 compiler flags
[ghc.git] / compiler / cmm / CmmCallConv.hs
1 {-# LANGUAGE CPP #-}
2
3 module CmmCallConv (
4 ParamLocation(..),
5 assignArgumentsPos,
6 assignStack,
7 realArgRegsCover
8 ) where
9
10 #include "HsVersions.h"
11
12 import GhcPrelude
13
14 import CmmExpr
15 import SMRep
16 import Cmm (Convention(..))
17 import PprCmm ()
18
19 import DynFlags
20 import Platform
21 import Outputable
22
23 -- Calculate the 'GlobalReg' or stack locations for function call
24 -- parameters as used by the Cmm calling convention.
25
26 data ParamLocation
27 = RegisterParam GlobalReg
28 | StackParam ByteOff
29
30 instance Outputable ParamLocation where
31 ppr (RegisterParam g) = ppr g
32 ppr (StackParam p) = ppr p
33
34 -- |
35 -- Given a list of arguments, and a function that tells their types,
36 -- return a list showing where each argument is passed
37 --
38 assignArgumentsPos :: DynFlags
39 -> ByteOff -- stack offset to start with
40 -> Convention
41 -> (a -> CmmType) -- how to get a type from an arg
42 -> [a] -- args
43 -> (
44 ByteOff -- bytes of stack args
45 , [(a, ParamLocation)] -- args and locations
46 )
47
48 assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
49 where
50 regs = case (reps, conv) of
51 (_, NativeNodeCall) -> getRegsWithNode dflags
52 (_, NativeDirectCall) -> getRegsWithoutNode dflags
53 ([_], NativeReturn) -> allRegs dflags
54 (_, NativeReturn) -> getRegsWithNode dflags
55 -- GC calling convention *must* put values in registers
56 (_, GC) -> allRegs dflags
57 (_, Slow) -> nodeOnly
58 -- The calling conventions first assign arguments to registers,
59 -- then switch to the stack when we first run out of registers
60 -- (even if there are still available registers for args of a
61 -- different type). When returning an unboxed tuple, we also
62 -- separate the stack arguments by pointerhood.
63 (reg_assts, stk_args) = assign_regs [] reps regs
64 (stk_off, stk_assts) = assignStack dflags off arg_ty stk_args
65 assignments = reg_assts ++ stk_assts
66
67 assign_regs assts [] _ = (assts, [])
68 assign_regs assts (r:rs) regs | isVecType ty = vec
69 | isFloatType ty = float
70 | otherwise = int
71 where vec = case (w, regs) of
72 (W128, (vs, fs, ds, ls, s:ss))
73 | passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
74 (W256, (vs, fs, ds, ls, s:ss))
75 | passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
76 (W512, (vs, fs, ds, ls, s:ss))
77 | passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
78 _ -> (assts, (r:rs))
79 float = case (w, regs) of
80 (W32, (vs, fs, ds, ls, s:ss))
81 | passFloatInXmm -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss))
82 (W32, (vs, f:fs, ds, ls, ss))
83 | not passFloatInXmm -> k (RegisterParam f, (vs, fs, ds, ls, ss))
84 (W64, (vs, fs, ds, ls, s:ss))
85 | passFloatInXmm -> k (RegisterParam (DoubleReg s), (vs, fs, ds, ls, ss))
86 (W64, (vs, fs, d:ds, ls, ss))
87 | not passFloatInXmm -> k (RegisterParam d, (vs, fs, ds, ls, ss))
88 (W80, _) -> panic "F80 unsupported register type"
89 _ -> (assts, (r:rs))
90 int = case (w, regs) of
91 (W128, _) -> panic "W128 unsupported register type"
92 (_, (v:vs, fs, ds, ls, ss)) | widthInBits w <= widthInBits (wordWidth dflags)
93 -> k (RegisterParam (v gcp), (vs, fs, ds, ls, ss))
94 (_, (vs, fs, ds, l:ls, ss)) | widthInBits w > widthInBits (wordWidth dflags)
95 -> k (RegisterParam l, (vs, fs, ds, ls, ss))
96 _ -> (assts, (r:rs))
97 k (asst, regs') = assign_regs ((r, asst) : assts) rs regs'
98 ty = arg_ty r
99 w = typeWidth ty
100 gcp | isGcPtrType ty = VGcPtr
101 | otherwise = VNonGcPtr
102 passFloatInXmm = passFloatArgsInXmm dflags
103
104 passFloatArgsInXmm :: DynFlags -> Bool
105 passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of
106 ArchX86_64 -> True
107 _ -> False
108
109 -- On X86_64, we always pass 128-bit-wide vectors in registers. On 32-bit X86
110 -- and for all larger vector sizes on X86_64, LLVM's GHC calling convention
111 -- does not currently pass vectors in registers. The patch to update the GHC
112 -- calling convention to support passing SIMD vectors in registers is small and
113 -- well-contained, so it may make it into LLVM 3.4. The hidden
114 -- -fllvm-pass-vectors-in-regs flag will generate LLVM code that attempts to
115 -- pass vectors in registers, but it must only be used with a version of LLVM
116 -- that has an updated GHC calling convention.
117 passVectorInReg :: Width -> DynFlags -> Bool
118 passVectorInReg W128 dflags = case platformArch (targetPlatform dflags) of
119 ArchX86_64 -> True
120 _ -> gopt Opt_LlvmPassVectorsInRegisters dflags
121 passVectorInReg _ dflags = gopt Opt_LlvmPassVectorsInRegisters dflags
122
123 assignStack :: DynFlags -> ByteOff -> (a -> CmmType) -> [a]
124 -> (
125 ByteOff -- bytes of stack args
126 , [(a, ParamLocation)] -- args and locations
127 )
128 assignStack dflags offset arg_ty args = assign_stk offset [] (reverse args)
129 where
130 assign_stk offset assts [] = (offset, assts)
131 assign_stk offset assts (r:rs)
132 = assign_stk off' ((r, StackParam off') : assts) rs
133 where w = typeWidth (arg_ty r)
134 off' = offset + size
135 -- Stack arguments always take a whole number of words, we never
136 -- pack them unlike constructor fields.
137 size = roundUpToWords dflags (widthInBytes w)
138
139 -----------------------------------------------------------------------------
140 -- Local information about the registers available
141
142 type AvailRegs = ( [VGcPtr -> GlobalReg] -- available vanilla regs.
143 , [GlobalReg] -- floats
144 , [GlobalReg] -- doubles
145 , [GlobalReg] -- longs (int64 and word64)
146 , [Int] -- XMM (floats and doubles)
147 )
148
149 -- Vanilla registers can contain pointers, Ints, Chars.
150 -- Floats and doubles have separate register supplies.
151 --
152 -- We take these register supplies from the *real* registers, i.e. those
153 -- that are guaranteed to map to machine registers.
154
155 getRegsWithoutNode, getRegsWithNode :: DynFlags -> AvailRegs
156 getRegsWithoutNode dflags =
157 ( filter (\r -> r VGcPtr /= node) (realVanillaRegs dflags)
158 , realFloatRegs dflags
159 , realDoubleRegs dflags
160 , realLongRegs dflags
161 , realXmmRegNos dflags)
162
163 -- getRegsWithNode uses R1/node even if it isn't a register
164 getRegsWithNode dflags =
165 ( if null (realVanillaRegs dflags)
166 then [VanillaReg 1]
167 else realVanillaRegs dflags
168 , realFloatRegs dflags
169 , realDoubleRegs dflags
170 , realLongRegs dflags
171 , realXmmRegNos dflags)
172
173 allFloatRegs, allDoubleRegs, allLongRegs :: DynFlags -> [GlobalReg]
174 allVanillaRegs :: DynFlags -> [VGcPtr -> GlobalReg]
175 allXmmRegs :: DynFlags -> [Int]
176
177 allVanillaRegs dflags = map VanillaReg $ regList (mAX_Vanilla_REG dflags)
178 allFloatRegs dflags = map FloatReg $ regList (mAX_Float_REG dflags)
179 allDoubleRegs dflags = map DoubleReg $ regList (mAX_Double_REG dflags)
180 allLongRegs dflags = map LongReg $ regList (mAX_Long_REG dflags)
181 allXmmRegs dflags = regList (mAX_XMM_REG dflags)
182
183 realFloatRegs, realDoubleRegs, realLongRegs :: DynFlags -> [GlobalReg]
184 realVanillaRegs :: DynFlags -> [VGcPtr -> GlobalReg]
185 realXmmRegNos :: DynFlags -> [Int]
186
187 realVanillaRegs dflags = map VanillaReg $ regList (mAX_Real_Vanilla_REG dflags)
188 realFloatRegs dflags = map FloatReg $ regList (mAX_Real_Float_REG dflags)
189 realDoubleRegs dflags = map DoubleReg $ regList (mAX_Real_Double_REG dflags)
190 realLongRegs dflags = map LongReg $ regList (mAX_Real_Long_REG dflags)
191
192 realXmmRegNos dflags
193 | isSse2Enabled dflags = regList (mAX_Real_XMM_REG dflags)
194 | otherwise = []
195
196 regList :: Int -> [Int]
197 regList n = [1 .. n]
198
199 allRegs :: DynFlags -> AvailRegs
200 allRegs dflags = (allVanillaRegs dflags,
201 allFloatRegs dflags,
202 allDoubleRegs dflags,
203 allLongRegs dflags,
204 allXmmRegs dflags)
205
206 nodeOnly :: AvailRegs
207 nodeOnly = ([VanillaReg 1], [], [], [], [])
208
209 -- This returns the set of global registers that *cover* the machine registers
210 -- used for argument passing. On platforms where registers can overlap---right
211 -- now just x86-64, where Float and Double registers overlap---passing this set
212 -- of registers is guaranteed to preserve the contents of all live registers. We
213 -- only use this functionality in hand-written C-- code in the RTS.
214 realArgRegsCover :: DynFlags -> [GlobalReg]
215 realArgRegsCover dflags
216 | passFloatArgsInXmm dflags = map ($VGcPtr) (realVanillaRegs dflags) ++
217 realLongRegs dflags ++
218 map XmmReg (realXmmRegNos dflags)
219 | otherwise = map ($VGcPtr) (realVanillaRegs dflags) ++
220 realFloatRegs dflags ++
221 realDoubleRegs dflags ++
222 realLongRegs dflags ++
223 map XmmReg (realXmmRegNos dflags)