Merge branch 'master' into type-nats
[ghc.git] / compiler / cmm / CmmType.hs
1
2 module CmmType
3 ( CmmType -- Abstract
4 , b8, b16, b32, b64, f32, f64, bWord, bHalfWord, gcWord
5 , cInt, cLong
6 , cmmBits, cmmFloat
7 , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
8 , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32
9
10 , Width(..)
11 , widthInBits, widthInBytes, widthInLog, widthFromBytes
12 , wordWidth, halfWordWidth, cIntWidth, cLongWidth
13 , narrowU, narrowS
14 )
15 where
16
17 #include "HsVersions.h"
18
19 import Constants
20 import FastString
21 import Outputable
22
23 import Data.Word
24 import Data.Int
25
26 -----------------------------------------------------------------------------
27 -- CmmType
28 -----------------------------------------------------------------------------
29
30 -- NOTE: CmmType is an abstract type, not exported from this
31 -- module so you can easily change its representation
32 --
33 -- However Width is exported in a concrete way,
34 -- and is used extensively in pattern-matching
35
36 data CmmType -- The important one!
37 = CmmType CmmCat Width
38
39 data CmmCat -- "Category" (not exported)
40 = GcPtrCat -- GC pointer
41 | BitsCat -- Non-pointer
42 | FloatCat -- Float
43 deriving( Eq )
44 -- See Note [Signed vs unsigned] at the end
45
46 instance Outputable CmmType where
47 ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
48
49 instance Outputable CmmCat where
50 ppr FloatCat = ptext $ sLit("F")
51 ppr _ = ptext $ sLit("I")
52 -- Temp Jan 08
53 -- ppr FloatCat = ptext $ sLit("float")
54 -- ppr BitsCat = ptext $ sLit("bits")
55 -- ppr GcPtrCat = ptext $ sLit("gcptr")
56
57 -- Why is CmmType stratified? For native code generation,
58 -- most of the time you just want to know what sort of register
59 -- to put the thing in, and for this you need to know how
60 -- many bits thing has and whether it goes in a floating-point
61 -- register. By contrast, the distinction between GcPtr and
62 -- GcNonPtr is of interest to only a few parts of the code generator.
63
64 -------- Equality on CmmType --------------
65 -- CmmType is *not* an instance of Eq; sometimes we care about the
66 -- Gc/NonGc distinction, and sometimes we don't
67 -- So we use an explicit function to force you to think about it
68 cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
69 cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
70
71 cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
72 -- This equality is temporary; used in CmmLint
73 -- but the RTS files are not yet well-typed wrt pointers
74 cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
75 = c1 `weak_eq` c2 && w1==w2
76 where
77 FloatCat `weak_eq` FloatCat = True
78 FloatCat `weak_eq` _other = False
79 _other `weak_eq` FloatCat = False
80 _word1 `weak_eq` _word2 = True -- Ignores GcPtr
81
82 --- Simple operations on CmmType -----
83 typeWidth :: CmmType -> Width
84 typeWidth (CmmType _ w) = w
85
86 cmmBits, cmmFloat :: Width -> CmmType
87 cmmBits = CmmType BitsCat
88 cmmFloat = CmmType FloatCat
89
90 -------- Common CmmTypes ------------
91 -- Floats and words of specific widths
92 b8, b16, b32, b64, f32, f64 :: CmmType
93 b8 = cmmBits W8
94 b16 = cmmBits W16
95 b32 = cmmBits W32
96 b64 = cmmBits W64
97 f32 = cmmFloat W32
98 f64 = cmmFloat W64
99
100 -- CmmTypes of native word widths
101 bWord, bHalfWord, gcWord :: CmmType
102 bWord = cmmBits wordWidth
103 bHalfWord = cmmBits halfWordWidth
104 gcWord = CmmType GcPtrCat wordWidth
105
106 cInt, cLong :: CmmType
107 cInt = cmmBits cIntWidth
108 cLong = cmmBits cLongWidth
109
110
111 ------------ Predicates ----------------
112 isFloatType, isGcPtrType :: CmmType -> Bool
113 isFloatType (CmmType FloatCat _) = True
114 isFloatType _other = False
115
116 isGcPtrType (CmmType GcPtrCat _) = True
117 isGcPtrType _other = False
118
119 isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
120 -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
121 -- isFloat32 and 64 are obvious
122
123 isWord64 (CmmType BitsCat W64) = True
124 isWord64 (CmmType GcPtrCat W64) = True
125 isWord64 _other = False
126
127 isWord32 (CmmType BitsCat W32) = True
128 isWord32 (CmmType GcPtrCat W32) = True
129 isWord32 _other = False
130
131 isFloat32 (CmmType FloatCat W32) = True
132 isFloat32 _other = False
133
134 isFloat64 (CmmType FloatCat W64) = True
135 isFloat64 _other = False
136
137 -----------------------------------------------------------------------------
138 -- Width
139 -----------------------------------------------------------------------------
140
141 data Width = W8 | W16 | W32 | W64
142 | W80 -- Extended double-precision float,
143 -- used in x86 native codegen only.
144 -- (we use Ord, so it'd better be in this order)
145 | W128
146 deriving (Eq, Ord, Show)
147
148 instance Outputable Width where
149 ppr rep = ptext (mrStr rep)
150
151 mrStr :: Width -> LitString
152 mrStr W8 = sLit("W8")
153 mrStr W16 = sLit("W16")
154 mrStr W32 = sLit("W32")
155 mrStr W64 = sLit("W64")
156 mrStr W128 = sLit("W128")
157 mrStr W80 = sLit("W80")
158
159
160 -------- Common Widths ------------
161 wordWidth, halfWordWidth :: Width
162 wordWidth | wORD_SIZE == 4 = W32
163 | wORD_SIZE == 8 = W64
164 | otherwise = panic "MachOp.wordRep: Unknown word size"
165
166 halfWordWidth | wORD_SIZE == 4 = W16
167 | wORD_SIZE == 8 = W32
168 | otherwise = panic "MachOp.halfWordRep: Unknown word size"
169
170 -- cIntRep is the Width for a C-language 'int'
171 cIntWidth, cLongWidth :: Width
172 #if SIZEOF_INT == 4
173 cIntWidth = W32
174 #elif SIZEOF_INT == 8
175 cIntWidth = W64
176 #endif
177
178 #if SIZEOF_LONG == 4
179 cLongWidth = W32
180 #elif SIZEOF_LONG == 8
181 cLongWidth = W64
182 #endif
183
184 widthInBits :: Width -> Int
185 widthInBits W8 = 8
186 widthInBits W16 = 16
187 widthInBits W32 = 32
188 widthInBits W64 = 64
189 widthInBits W128 = 128
190 widthInBits W80 = 80
191
192 widthInBytes :: Width -> Int
193 widthInBytes W8 = 1
194 widthInBytes W16 = 2
195 widthInBytes W32 = 4
196 widthInBytes W64 = 8
197 widthInBytes W128 = 16
198 widthInBytes W80 = 10
199
200 widthFromBytes :: Int -> Width
201 widthFromBytes 1 = W8
202 widthFromBytes 2 = W16
203 widthFromBytes 4 = W32
204 widthFromBytes 8 = W64
205 widthFromBytes 16 = W128
206 widthFromBytes 10 = W80
207 widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
208
209 -- log_2 of the width in bytes, useful for generating shifts.
210 widthInLog :: Width -> Int
211 widthInLog W8 = 0
212 widthInLog W16 = 1
213 widthInLog W32 = 2
214 widthInLog W64 = 3
215 widthInLog W128 = 4
216 widthInLog W80 = panic "widthInLog: F80"
217
218 -- widening / narrowing
219
220 narrowU :: Width -> Integer -> Integer
221 narrowU W8 x = fromIntegral (fromIntegral x :: Word8)
222 narrowU W16 x = fromIntegral (fromIntegral x :: Word16)
223 narrowU W32 x = fromIntegral (fromIntegral x :: Word32)
224 narrowU W64 x = fromIntegral (fromIntegral x :: Word64)
225 narrowU _ _ = panic "narrowTo"
226
227 narrowS :: Width -> Integer -> Integer
228 narrowS W8 x = fromIntegral (fromIntegral x :: Int8)
229 narrowS W16 x = fromIntegral (fromIntegral x :: Int16)
230 narrowS W32 x = fromIntegral (fromIntegral x :: Int32)
231 narrowS W64 x = fromIntegral (fromIntegral x :: Int64)
232 narrowS _ _ = panic "narrowTo"
233
234 -------------------------------------------------------------------------
235 {- Note [Signed vs unsigned]
236 ~~~~~~~~~~~~~~~~~~~~~~~~~
237 Should a CmmType include a signed vs. unsigned distinction?
238
239 This is very much like a "hint" in C-- terminology: it isn't necessary
240 in order to generate correct code, but it might be useful in that the
241 compiler can generate better code if it has access to higher-level
242 hints about data. This is important at call boundaries, because the
243 definition of a function is not visible at all of its call sites, so
244 the compiler cannot infer the hints.
245
246 Here in Cmm, we're taking a slightly different approach. We include
247 the int vs. float hint in the MachRep, because (a) the majority of
248 platforms have a strong distinction between float and int registers,
249 and (b) we don't want to do any heavyweight hint-inference in the
250 native code backend in order to get good code. We're treating the
251 hint more like a type: our Cmm is always completely consistent with
252 respect to hints. All coercions between float and int are explicit.
253
254 What about the signed vs. unsigned hint? This information might be
255 useful if we want to keep sub-word-sized values in word-size
256 registers, which we must do if we only have word-sized registers.
257
258 On such a system, there are two straightforward conventions for
259 representing sub-word-sized values:
260
261 (a) Leave the upper bits undefined. Comparison operations must
262 sign- or zero-extend both operands before comparing them,
263 depending on whether the comparison is signed or unsigned.
264
265 (b) Always keep the values sign- or zero-extended as appropriate.
266 Arithmetic operations must narrow the result to the appropriate
267 size.
268
269 A clever compiler might not use either (a) or (b) exclusively, instead
270 it would attempt to minimize the coercions by analysis: the same kind
271 of analysis that propagates hints around. In Cmm we don't want to
272 have to do this, so we plump for having richer types and keeping the
273 type information consistent.
274
275 If signed/unsigned hints are missing from MachRep, then the only
276 choice we have is (a), because we don't know whether the result of an
277 operation should be sign- or zero-extended.
278
279 Many architectures have extending load operations, which work well
280 with (b). To make use of them with (a), you need to know whether the
281 value is going to be sign- or zero-extended by an enclosing comparison
282 (for example), which involves knowing above the context. This is
283 doable but more complex.
284
285 Further complicating the issue is foreign calls: a foreign calling
286 convention can specify that signed 8-bit quantities are passed as
287 sign-extended 32 bit quantities, for example (this is the case on the
288 PowerPC). So we *do* need sign information on foreign call arguments.
289
290 Pros for adding signed vs. unsigned to MachRep:
291
292 - It would let us use convention (b) above, and get easier
293 code generation for extending loads.
294
295 - Less information required on foreign calls.
296
297 - MachOp type would be simpler
298
299 Cons:
300
301 - More complexity
302
303 - What is the MachRep for a VanillaReg? Currently it is
304 always wordRep, but now we have to decide whether it is
305 signed or unsigned. The same VanillaReg can thus have
306 different MachReps in different parts of the program.
307
308 - Extra coercions cluttering up expressions.
309
310 Currently for GHC, the foreign call point is moot, because we do our
311 own promotion of sub-word-sized values to word-sized values. The Int8
312 type is represnted by an Int# which is kept sign-extended at all times
313 (this is slightly naughty, because we're making assumptions about the
314 C calling convention rather early on in the compiler). However, given
315 this, the cons outweigh the pros.
316
317 -}
318