Pass Platform down to halfWordWidth
[ghc.git] / compiler / cmm / CmmType.hs
1
2 module CmmType
3 ( CmmType -- Abstract
4 , b8, b16, b32, b64, f32, f64, bWord, bHalfWord, gcWord
5 , cInt, cLong
6 , cmmBits, cmmFloat
7 , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
8 , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32
9
10 , Width(..)
11 , widthInBits, widthInBytes, widthInLog, widthFromBytes
12 , wordWidth, halfWordWidth, cIntWidth, cLongWidth
13 , halfWordMask
14 , narrowU, narrowS
15 )
16 where
17
18 #include "HsVersions.h"
19
20 import Constants
21 import FastString
22 import Outputable
23 import Platform
24
25 import Data.Word
26 import Data.Int
27
28 -----------------------------------------------------------------------------
29 -- CmmType
30 -----------------------------------------------------------------------------
31
32 -- NOTE: CmmType is an abstract type, not exported from this
33 -- module so you can easily change its representation
34 --
35 -- However Width is exported in a concrete way,
36 -- and is used extensively in pattern-matching
37
38 data CmmType -- The important one!
39 = CmmType CmmCat Width
40
41 data CmmCat -- "Category" (not exported)
42 = GcPtrCat -- GC pointer
43 | BitsCat -- Non-pointer
44 | FloatCat -- Float
45 deriving( Eq )
46 -- See Note [Signed vs unsigned] at the end
47
48 instance Outputable CmmType where
49 ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
50
51 instance Outputable CmmCat where
52 ppr FloatCat = ptext $ sLit("F")
53 ppr _ = ptext $ sLit("I")
54
55 -- Why is CmmType stratified? For native code generation,
56 -- most of the time you just want to know what sort of register
57 -- to put the thing in, and for this you need to know how
58 -- many bits thing has and whether it goes in a floating-point
59 -- register. By contrast, the distinction between GcPtr and
60 -- GcNonPtr is of interest to only a few parts of the code generator.
61
62 -------- Equality on CmmType --------------
63 -- CmmType is *not* an instance of Eq; sometimes we care about the
64 -- Gc/NonGc distinction, and sometimes we don't
65 -- So we use an explicit function to force you to think about it
66 cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
67 cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
68
69 cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
70 -- This equality is temporary; used in CmmLint
71 -- but the RTS files are not yet well-typed wrt pointers
72 cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
73 = c1 `weak_eq` c2 && w1==w2
74 where
75 FloatCat `weak_eq` FloatCat = True
76 FloatCat `weak_eq` _other = False
77 _other `weak_eq` FloatCat = False
78 _word1 `weak_eq` _word2 = True -- Ignores GcPtr
79
80 --- Simple operations on CmmType -----
81 typeWidth :: CmmType -> Width
82 typeWidth (CmmType _ w) = w
83
84 cmmBits, cmmFloat :: Width -> CmmType
85 cmmBits = CmmType BitsCat
86 cmmFloat = CmmType FloatCat
87
88 -------- Common CmmTypes ------------
89 -- Floats and words of specific widths
90 b8, b16, b32, b64, f32, f64 :: CmmType
91 b8 = cmmBits W8
92 b16 = cmmBits W16
93 b32 = cmmBits W32
94 b64 = cmmBits W64
95 f32 = cmmFloat W32
96 f64 = cmmFloat W64
97
98 -- CmmTypes of native word widths
99 bWord :: CmmType
100 bWord = cmmBits wordWidth
101
102 bHalfWord :: Platform -> CmmType
103 bHalfWord platform = cmmBits (halfWordWidth platform)
104
105 gcWord :: CmmType
106 gcWord = CmmType GcPtrCat wordWidth
107
108 cInt, cLong :: CmmType
109 cInt = cmmBits cIntWidth
110 cLong = cmmBits cLongWidth
111
112
113 ------------ Predicates ----------------
114 isFloatType, isGcPtrType :: CmmType -> Bool
115 isFloatType (CmmType FloatCat _) = True
116 isFloatType _other = False
117
118 isGcPtrType (CmmType GcPtrCat _) = True
119 isGcPtrType _other = False
120
121 isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
122 -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
123 -- isFloat32 and 64 are obvious
124
125 isWord64 (CmmType BitsCat W64) = True
126 isWord64 (CmmType GcPtrCat W64) = True
127 isWord64 _other = False
128
129 isWord32 (CmmType BitsCat W32) = True
130 isWord32 (CmmType GcPtrCat W32) = True
131 isWord32 _other = False
132
133 isFloat32 (CmmType FloatCat W32) = True
134 isFloat32 _other = False
135
136 isFloat64 (CmmType FloatCat W64) = True
137 isFloat64 _other = False
138
139 -----------------------------------------------------------------------------
140 -- Width
141 -----------------------------------------------------------------------------
142
143 data Width = W8 | W16 | W32 | W64
144 | W80 -- Extended double-precision float,
145 -- used in x86 native codegen only.
146 -- (we use Ord, so it'd better be in this order)
147 | W128
148 deriving (Eq, Ord, Show)
149
150 instance Outputable Width where
151 ppr rep = ptext (mrStr rep)
152
153 mrStr :: Width -> LitString
154 mrStr W8 = sLit("W8")
155 mrStr W16 = sLit("W16")
156 mrStr W32 = sLit("W32")
157 mrStr W64 = sLit("W64")
158 mrStr W128 = sLit("W128")
159 mrStr W80 = sLit("W80")
160
161
162 -------- Common Widths ------------
163 wordWidth :: Width
164 wordWidth | wORD_SIZE == 4 = W32
165 | wORD_SIZE == 8 = W64
166 | otherwise = panic "MachOp.wordRep: Unknown word size"
167
168 halfWordWidth :: Platform -> Width
169 halfWordWidth _
170 | wORD_SIZE == 4 = W16
171 | wORD_SIZE == 8 = W32
172 | otherwise = panic "MachOp.halfWordRep: Unknown word size"
173
174 halfWordMask :: Integer
175 halfWordMask | wORD_SIZE == 4 = 0xFFFF
176 | wORD_SIZE == 8 = 0xFFFFFFFF
177 | otherwise = panic "MachOp.halfWordMask: Unknown word size"
178
179 -- cIntRep is the Width for a C-language 'int'
180 cIntWidth, cLongWidth :: Width
181 #if SIZEOF_INT == 4
182 cIntWidth = W32
183 #elif SIZEOF_INT == 8
184 cIntWidth = W64
185 #endif
186
187 #if SIZEOF_LONG == 4
188 cLongWidth = W32
189 #elif SIZEOF_LONG == 8
190 cLongWidth = W64
191 #endif
192
193 widthInBits :: Width -> Int
194 widthInBits W8 = 8
195 widthInBits W16 = 16
196 widthInBits W32 = 32
197 widthInBits W64 = 64
198 widthInBits W128 = 128
199 widthInBits W80 = 80
200
201 widthInBytes :: Width -> Int
202 widthInBytes W8 = 1
203 widthInBytes W16 = 2
204 widthInBytes W32 = 4
205 widthInBytes W64 = 8
206 widthInBytes W128 = 16
207 widthInBytes W80 = 10
208
209 widthFromBytes :: Int -> Width
210 widthFromBytes 1 = W8
211 widthFromBytes 2 = W16
212 widthFromBytes 4 = W32
213 widthFromBytes 8 = W64
214 widthFromBytes 16 = W128
215 widthFromBytes 10 = W80
216 widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
217
218 -- log_2 of the width in bytes, useful for generating shifts.
219 widthInLog :: Width -> Int
220 widthInLog W8 = 0
221 widthInLog W16 = 1
222 widthInLog W32 = 2
223 widthInLog W64 = 3
224 widthInLog W128 = 4
225 widthInLog W80 = panic "widthInLog: F80"
226
227 -- widening / narrowing
228
229 narrowU :: Width -> Integer -> Integer
230 narrowU W8 x = fromIntegral (fromIntegral x :: Word8)
231 narrowU W16 x = fromIntegral (fromIntegral x :: Word16)
232 narrowU W32 x = fromIntegral (fromIntegral x :: Word32)
233 narrowU W64 x = fromIntegral (fromIntegral x :: Word64)
234 narrowU _ _ = panic "narrowTo"
235
236 narrowS :: Width -> Integer -> Integer
237 narrowS W8 x = fromIntegral (fromIntegral x :: Int8)
238 narrowS W16 x = fromIntegral (fromIntegral x :: Int16)
239 narrowS W32 x = fromIntegral (fromIntegral x :: Int32)
240 narrowS W64 x = fromIntegral (fromIntegral x :: Int64)
241 narrowS _ _ = panic "narrowTo"
242
243 -------------------------------------------------------------------------
244 {- Note [Signed vs unsigned]
245 ~~~~~~~~~~~~~~~~~~~~~~~~~
246 Should a CmmType include a signed vs. unsigned distinction?
247
248 This is very much like a "hint" in C-- terminology: it isn't necessary
249 in order to generate correct code, but it might be useful in that the
250 compiler can generate better code if it has access to higher-level
251 hints about data. This is important at call boundaries, because the
252 definition of a function is not visible at all of its call sites, so
253 the compiler cannot infer the hints.
254
255 Here in Cmm, we're taking a slightly different approach. We include
256 the int vs. float hint in the CmmType, because (a) the majority of
257 platforms have a strong distinction between float and int registers,
258 and (b) we don't want to do any heavyweight hint-inference in the
259 native code backend in order to get good code. We're treating the
260 hint more like a type: our Cmm is always completely consistent with
261 respect to hints. All coercions between float and int are explicit.
262
263 What about the signed vs. unsigned hint? This information might be
264 useful if we want to keep sub-word-sized values in word-size
265 registers, which we must do if we only have word-sized registers.
266
267 On such a system, there are two straightforward conventions for
268 representing sub-word-sized values:
269
270 (a) Leave the upper bits undefined. Comparison operations must
271 sign- or zero-extend both operands before comparing them,
272 depending on whether the comparison is signed or unsigned.
273
274 (b) Always keep the values sign- or zero-extended as appropriate.
275 Arithmetic operations must narrow the result to the appropriate
276 size.
277
278 A clever compiler might not use either (a) or (b) exclusively, instead
279 it would attempt to minimize the coercions by analysis: the same kind
280 of analysis that propagates hints around. In Cmm we don't want to
281 have to do this, so we plump for having richer types and keeping the
282 type information consistent.
283
284 If signed/unsigned hints are missing from CmmType, then the only
285 choice we have is (a), because we don't know whether the result of an
286 operation should be sign- or zero-extended.
287
288 Many architectures have extending load operations, which work well
289 with (b). To make use of them with (a), you need to know whether the
290 value is going to be sign- or zero-extended by an enclosing comparison
291 (for example), which involves knowing above the context. This is
292 doable but more complex.
293
294 Further complicating the issue is foreign calls: a foreign calling
295 convention can specify that signed 8-bit quantities are passed as
296 sign-extended 32 bit quantities, for example (this is the case on the
297 PowerPC). So we *do* need sign information on foreign call arguments.
298
299 Pros for adding signed vs. unsigned to CmmType:
300
301 - It would let us use convention (b) above, and get easier
302 code generation for extending loads.
303
304 - Less information required on foreign calls.
305
306 - MachOp type would be simpler
307
308 Cons:
309
310 - More complexity
311
312 - What is the CmmType for a VanillaReg? Currently it is
313 always wordRep, but now we have to decide whether it is
314 signed or unsigned. The same VanillaReg can thus have
315 different CmmType in different parts of the program.
316
317 - Extra coercions cluttering up expressions.
318
319 Currently for GHC, the foreign call point is moot, because we do our
320 own promotion of sub-word-sized values to word-sized values. The Int8
321 type is represnted by an Int# which is kept sign-extended at all times
322 (this is slightly naughty, because we're making assumptions about the
323 C calling convention rather early on in the compiler). However, given
324 this, the cons outweigh the pros.
325
326 -}
327