Fix #11407.
[ghc.git] / compiler / cmm / CmmCallConv.hs
1 {-# LANGUAGE CPP #-}
2
3 module CmmCallConv (
4 ParamLocation(..),
5 assignArgumentsPos,
6 assignStack,
7 realArgRegsCover
8 ) where
9
10 #include "HsVersions.h"
11
12 import CmmExpr
13 import SMRep
14 import Cmm (Convention(..))
15 import PprCmm ()
16
17 import DynFlags
18 import Platform
19 import Outputable
20
21 -- Calculate the 'GlobalReg' or stack locations for function call
22 -- parameters as used by the Cmm calling convention.
23
24 data ParamLocation
25 = RegisterParam GlobalReg
26 | StackParam ByteOff
27
28 instance Outputable ParamLocation where
29 ppr (RegisterParam g) = ppr g
30 ppr (StackParam p) = ppr p
31
32 -- |
33 -- Given a list of arguments, and a function that tells their types,
34 -- return a list showing where each argument is passed
35 --
36 assignArgumentsPos :: DynFlags
37 -> ByteOff -- stack offset to start with
38 -> Convention
39 -> (a -> CmmType) -- how to get a type from an arg
40 -> [a] -- args
41 -> (
42 ByteOff -- bytes of stack args
43 , [(a, ParamLocation)] -- args and locations
44 )
45
46 assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
47 where
48 regs = case (reps, conv) of
49 (_, NativeNodeCall) -> getRegsWithNode dflags
50 (_, NativeDirectCall) -> getRegsWithoutNode dflags
51 ([_], NativeReturn) -> allRegs dflags
52 (_, NativeReturn) -> getRegsWithNode dflags
53 -- GC calling convention *must* put values in registers
54 (_, GC) -> allRegs dflags
55 (_, Slow) -> nodeOnly
56 -- The calling conventions first assign arguments to registers,
57 -- then switch to the stack when we first run out of registers
58 -- (even if there are still available registers for args of a
59 -- different type). When returning an unboxed tuple, we also
60 -- separate the stack arguments by pointerhood.
61 (reg_assts, stk_args) = assign_regs [] reps regs
62 (stk_off, stk_assts) = assignStack dflags off arg_ty stk_args
63 assignments = reg_assts ++ stk_assts
64
65 assign_regs assts [] _ = (assts, [])
66 assign_regs assts (r:rs) regs | isVecType ty = vec
67 | isFloatType ty = float
68 | otherwise = int
69 where vec = case (w, regs) of
70 (W128, (vs, fs, ds, ls, s:ss))
71 | passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
72 (W256, (vs, fs, ds, ls, s:ss))
73 | passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
74 (W512, (vs, fs, ds, ls, s:ss))
75 | passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
76 _ -> (assts, (r:rs))
77 float = case (w, regs) of
78 (W32, (vs, fs, ds, ls, s:ss))
79 | passFloatInXmm -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss))
80 (W32, (vs, f:fs, ds, ls, ss))
81 | not passFloatInXmm -> k (RegisterParam f, (vs, fs, ds, ls, ss))
82 (W64, (vs, fs, ds, ls, s:ss))
83 | passFloatInXmm -> k (RegisterParam (DoubleReg s), (vs, fs, ds, ls, ss))
84 (W64, (vs, fs, d:ds, ls, ss))
85 | not passFloatInXmm -> k (RegisterParam d, (vs, fs, ds, ls, ss))
86 (W80, _) -> panic "F80 unsupported register type"
87 _ -> (assts, (r:rs))
88 int = case (w, regs) of
89 (W128, _) -> panic "W128 unsupported register type"
90 (_, (v:vs, fs, ds, ls, ss)) | widthInBits w <= widthInBits (wordWidth dflags)
91 -> k (RegisterParam (v gcp), (vs, fs, ds, ls, ss))
92 (_, (vs, fs, ds, l:ls, ss)) | widthInBits w > widthInBits (wordWidth dflags)
93 -> k (RegisterParam l, (vs, fs, ds, ls, ss))
94 _ -> (assts, (r:rs))
95 k (asst, regs') = assign_regs ((r, asst) : assts) rs regs'
96 ty = arg_ty r
97 w = typeWidth ty
98 gcp | isGcPtrType ty = VGcPtr
99 | otherwise = VNonGcPtr
100 passFloatInXmm = passFloatArgsInXmm dflags
101
102 passFloatArgsInXmm :: DynFlags -> Bool
103 passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of
104 ArchX86_64 -> True
105 _ -> False
106
107 -- On X86_64, we always pass 128-bit-wide vectors in registers. On 32-bit X86
108 -- and for all larger vector sizes on X86_64, LLVM's GHC calling convention
109 -- does not currently pass vectors in registers. The patch to update the GHC
110 -- calling convention to support passing SIMD vectors in registers is small and
111 -- well-contained, so it may make it into LLVM 3.4. The hidden
112 -- -fllvm-pass-vectors-in-regs flag will generate LLVM code that attempts to
113 -- pass vectors in registers, but it must only be used with a version of LLVM
114 -- that has an updated GHC calling convention.
115 passVectorInReg :: Width -> DynFlags -> Bool
116 passVectorInReg W128 dflags = case platformArch (targetPlatform dflags) of
117 ArchX86_64 -> True
118 _ -> gopt Opt_LlvmPassVectorsInRegisters dflags
119 passVectorInReg _ dflags = gopt Opt_LlvmPassVectorsInRegisters dflags
120
121 assignStack :: DynFlags -> ByteOff -> (a -> CmmType) -> [a]
122 -> (
123 ByteOff -- bytes of stack args
124 , [(a, ParamLocation)] -- args and locations
125 )
126 assignStack dflags offset arg_ty args = assign_stk offset [] (reverse args)
127 where
128 assign_stk offset assts [] = (offset, assts)
129 assign_stk offset assts (r:rs)
130 = assign_stk off' ((r, StackParam off') : assts) rs
131 where w = typeWidth (arg_ty r)
132 size = (((widthInBytes w - 1) `div` word_size) + 1) * word_size
133 off' = offset + size
134 word_size = wORD_SIZE dflags
135
136 -----------------------------------------------------------------------------
137 -- Local information about the registers available
138
139 type AvailRegs = ( [VGcPtr -> GlobalReg] -- available vanilla regs.
140 , [GlobalReg] -- floats
141 , [GlobalReg] -- doubles
142 , [GlobalReg] -- longs (int64 and word64)
143 , [Int] -- XMM (floats and doubles)
144 )
145
146 -- Vanilla registers can contain pointers, Ints, Chars.
147 -- Floats and doubles have separate register supplies.
148 --
149 -- We take these register supplies from the *real* registers, i.e. those
150 -- that are guaranteed to map to machine registers.
151
152 getRegsWithoutNode, getRegsWithNode :: DynFlags -> AvailRegs
153 getRegsWithoutNode dflags =
154 ( filter (\r -> r VGcPtr /= node) (realVanillaRegs dflags)
155 , realFloatRegs dflags
156 , realDoubleRegs dflags
157 , realLongRegs dflags
158 , realXmmRegNos dflags)
159
160 -- getRegsWithNode uses R1/node even if it isn't a register
161 getRegsWithNode dflags =
162 ( if null (realVanillaRegs dflags)
163 then [VanillaReg 1]
164 else realVanillaRegs dflags
165 , realFloatRegs dflags
166 , realDoubleRegs dflags
167 , realLongRegs dflags
168 , realXmmRegNos dflags)
169
170 allFloatRegs, allDoubleRegs, allLongRegs :: DynFlags -> [GlobalReg]
171 allVanillaRegs :: DynFlags -> [VGcPtr -> GlobalReg]
172 allXmmRegs :: DynFlags -> [Int]
173
174 allVanillaRegs dflags = map VanillaReg $ regList (mAX_Vanilla_REG dflags)
175 allFloatRegs dflags = map FloatReg $ regList (mAX_Float_REG dflags)
176 allDoubleRegs dflags = map DoubleReg $ regList (mAX_Double_REG dflags)
177 allLongRegs dflags = map LongReg $ regList (mAX_Long_REG dflags)
178 allXmmRegs dflags = regList (mAX_XMM_REG dflags)
179
180 realFloatRegs, realDoubleRegs, realLongRegs :: DynFlags -> [GlobalReg]
181 realVanillaRegs :: DynFlags -> [VGcPtr -> GlobalReg]
182 realXmmRegNos :: DynFlags -> [Int]
183
184 realVanillaRegs dflags = map VanillaReg $ regList (mAX_Real_Vanilla_REG dflags)
185 realFloatRegs dflags = map FloatReg $ regList (mAX_Real_Float_REG dflags)
186 realDoubleRegs dflags = map DoubleReg $ regList (mAX_Real_Double_REG dflags)
187 realLongRegs dflags = map LongReg $ regList (mAX_Real_Long_REG dflags)
188
189 realXmmRegNos dflags
190 | isSse2Enabled dflags = regList (mAX_Real_XMM_REG dflags)
191 | otherwise = []
192
193 regList :: Int -> [Int]
194 regList n = [1 .. n]
195
196 allRegs :: DynFlags -> AvailRegs
197 allRegs dflags = (allVanillaRegs dflags,
198 allFloatRegs dflags,
199 allDoubleRegs dflags,
200 allLongRegs dflags,
201 allXmmRegs dflags)
202
203 nodeOnly :: AvailRegs
204 nodeOnly = ([VanillaReg 1], [], [], [], [])
205
206 -- This returns the set of global registers that *cover* the machine registers
207 -- used for argument passing. On platforms where registers can overlap---right
208 -- now just x86-64, where Float and Double registers overlap---passing this set
209 -- of registers is guaranteed to preserve the contents of all live registers. We
210 -- only use this functionality in hand-written C-- code in the RTS.
211 realArgRegsCover :: DynFlags -> [GlobalReg]
212 realArgRegsCover dflags
213 | passFloatArgsInXmm dflags = map ($VGcPtr) (realVanillaRegs dflags) ++
214 realLongRegs dflags ++
215 map XmmReg (realXmmRegNos dflags)
216 | otherwise = map ($VGcPtr) (realVanillaRegs dflags) ++
217 realFloatRegs dflags ++
218 realDoubleRegs dflags ++
219 realLongRegs dflags ++
220 map XmmReg (realXmmRegNos dflags)