Merge remote-tracking branch 'origin/type-nats' into type-nats-merge
[ghc.git] / compiler / cmm / CmmType.hs
1
2 module CmmType
3 ( CmmType -- Abstract
4 , b8, b16, b32, b64, f32, f64, bWord, bHalfWord, gcWord
5 , cInt, cLong
6 , cmmBits, cmmFloat
7 , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
8 , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32
9
10 , Width(..)
11 , widthInBits, widthInBytes, widthInLog, widthFromBytes
12 , wordWidth, halfWordWidth, cIntWidth, cLongWidth
13 , narrowU, narrowS
14 )
15 where
16
17 #include "HsVersions.h"
18
19 import Constants
20 import FastString
21 import Outputable
22
23 import Data.Word
24 import Data.Int
25
26 -----------------------------------------------------------------------------
27 -- CmmType
28 -----------------------------------------------------------------------------
29
30 -- NOTE: CmmType is an abstract type, not exported from this
31 -- module so you can easily change its representation
32 --
33 -- However Width is exported in a concrete way,
34 -- and is used extensively in pattern-matching
35
36 data CmmType -- The important one!
37 = CmmType CmmCat Width
38
39 data CmmCat -- "Category" (not exported)
40 = GcPtrCat -- GC pointer
41 | BitsCat -- Non-pointer
42 | FloatCat -- Float
43 deriving( Eq )
44 -- See Note [Signed vs unsigned] at the end
45
46 instance Outputable CmmType where
47 ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
48
49 instance Outputable CmmCat where
50 ppr FloatCat = ptext $ sLit("F")
51 ppr _ = ptext $ sLit("I")
52
53 -- Why is CmmType stratified? For native code generation,
54 -- most of the time you just want to know what sort of register
55 -- to put the thing in, and for this you need to know how
56 -- many bits thing has and whether it goes in a floating-point
57 -- register. By contrast, the distinction between GcPtr and
58 -- GcNonPtr is of interest to only a few parts of the code generator.
59
60 -------- Equality on CmmType --------------
61 -- CmmType is *not* an instance of Eq; sometimes we care about the
62 -- Gc/NonGc distinction, and sometimes we don't
63 -- So we use an explicit function to force you to think about it
64 cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
65 cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
66
67 cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
68 -- This equality is temporary; used in CmmLint
69 -- but the RTS files are not yet well-typed wrt pointers
70 cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
71 = c1 `weak_eq` c2 && w1==w2
72 where
73 FloatCat `weak_eq` FloatCat = True
74 FloatCat `weak_eq` _other = False
75 _other `weak_eq` FloatCat = False
76 _word1 `weak_eq` _word2 = True -- Ignores GcPtr
77
78 --- Simple operations on CmmType -----
79 typeWidth :: CmmType -> Width
80 typeWidth (CmmType _ w) = w
81
82 cmmBits, cmmFloat :: Width -> CmmType
83 cmmBits = CmmType BitsCat
84 cmmFloat = CmmType FloatCat
85
86 -------- Common CmmTypes ------------
87 -- Floats and words of specific widths
88 b8, b16, b32, b64, f32, f64 :: CmmType
89 b8 = cmmBits W8
90 b16 = cmmBits W16
91 b32 = cmmBits W32
92 b64 = cmmBits W64
93 f32 = cmmFloat W32
94 f64 = cmmFloat W64
95
96 -- CmmTypes of native word widths
97 bWord, bHalfWord, gcWord :: CmmType
98 bWord = cmmBits wordWidth
99 bHalfWord = cmmBits halfWordWidth
100 gcWord = CmmType GcPtrCat wordWidth
101
102 cInt, cLong :: CmmType
103 cInt = cmmBits cIntWidth
104 cLong = cmmBits cLongWidth
105
106
107 ------------ Predicates ----------------
108 isFloatType, isGcPtrType :: CmmType -> Bool
109 isFloatType (CmmType FloatCat _) = True
110 isFloatType _other = False
111
112 isGcPtrType (CmmType GcPtrCat _) = True
113 isGcPtrType _other = False
114
115 isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
116 -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
117 -- isFloat32 and 64 are obvious
118
119 isWord64 (CmmType BitsCat W64) = True
120 isWord64 (CmmType GcPtrCat W64) = True
121 isWord64 _other = False
122
123 isWord32 (CmmType BitsCat W32) = True
124 isWord32 (CmmType GcPtrCat W32) = True
125 isWord32 _other = False
126
127 isFloat32 (CmmType FloatCat W32) = True
128 isFloat32 _other = False
129
130 isFloat64 (CmmType FloatCat W64) = True
131 isFloat64 _other = False
132
133 -----------------------------------------------------------------------------
134 -- Width
135 -----------------------------------------------------------------------------
136
137 data Width = W8 | W16 | W32 | W64
138 | W80 -- Extended double-precision float,
139 -- used in x86 native codegen only.
140 -- (we use Ord, so it'd better be in this order)
141 | W128
142 deriving (Eq, Ord, Show)
143
144 instance Outputable Width where
145 ppr rep = ptext (mrStr rep)
146
147 mrStr :: Width -> LitString
148 mrStr W8 = sLit("W8")
149 mrStr W16 = sLit("W16")
150 mrStr W32 = sLit("W32")
151 mrStr W64 = sLit("W64")
152 mrStr W128 = sLit("W128")
153 mrStr W80 = sLit("W80")
154
155
156 -------- Common Widths ------------
157 wordWidth, halfWordWidth :: Width
158 wordWidth | wORD_SIZE == 4 = W32
159 | wORD_SIZE == 8 = W64
160 | otherwise = panic "MachOp.wordRep: Unknown word size"
161
162 halfWordWidth | wORD_SIZE == 4 = W16
163 | wORD_SIZE == 8 = W32
164 | otherwise = panic "MachOp.halfWordRep: Unknown word size"
165
166 -- cIntRep is the Width for a C-language 'int'
167 cIntWidth, cLongWidth :: Width
168 #if SIZEOF_INT == 4
169 cIntWidth = W32
170 #elif SIZEOF_INT == 8
171 cIntWidth = W64
172 #endif
173
174 #if SIZEOF_LONG == 4
175 cLongWidth = W32
176 #elif SIZEOF_LONG == 8
177 cLongWidth = W64
178 #endif
179
180 widthInBits :: Width -> Int
181 widthInBits W8 = 8
182 widthInBits W16 = 16
183 widthInBits W32 = 32
184 widthInBits W64 = 64
185 widthInBits W128 = 128
186 widthInBits W80 = 80
187
188 widthInBytes :: Width -> Int
189 widthInBytes W8 = 1
190 widthInBytes W16 = 2
191 widthInBytes W32 = 4
192 widthInBytes W64 = 8
193 widthInBytes W128 = 16
194 widthInBytes W80 = 10
195
196 widthFromBytes :: Int -> Width
197 widthFromBytes 1 = W8
198 widthFromBytes 2 = W16
199 widthFromBytes 4 = W32
200 widthFromBytes 8 = W64
201 widthFromBytes 16 = W128
202 widthFromBytes 10 = W80
203 widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
204
205 -- log_2 of the width in bytes, useful for generating shifts.
206 widthInLog :: Width -> Int
207 widthInLog W8 = 0
208 widthInLog W16 = 1
209 widthInLog W32 = 2
210 widthInLog W64 = 3
211 widthInLog W128 = 4
212 widthInLog W80 = panic "widthInLog: F80"
213
214 -- widening / narrowing
215
216 narrowU :: Width -> Integer -> Integer
217 narrowU W8 x = fromIntegral (fromIntegral x :: Word8)
218 narrowU W16 x = fromIntegral (fromIntegral x :: Word16)
219 narrowU W32 x = fromIntegral (fromIntegral x :: Word32)
220 narrowU W64 x = fromIntegral (fromIntegral x :: Word64)
221 narrowU _ _ = panic "narrowTo"
222
223 narrowS :: Width -> Integer -> Integer
224 narrowS W8 x = fromIntegral (fromIntegral x :: Int8)
225 narrowS W16 x = fromIntegral (fromIntegral x :: Int16)
226 narrowS W32 x = fromIntegral (fromIntegral x :: Int32)
227 narrowS W64 x = fromIntegral (fromIntegral x :: Int64)
228 narrowS _ _ = panic "narrowTo"
229
230 -------------------------------------------------------------------------
231 {- Note [Signed vs unsigned]
232 ~~~~~~~~~~~~~~~~~~~~~~~~~
233 Should a CmmType include a signed vs. unsigned distinction?
234
235 This is very much like a "hint" in C-- terminology: it isn't necessary
236 in order to generate correct code, but it might be useful in that the
237 compiler can generate better code if it has access to higher-level
238 hints about data. This is important at call boundaries, because the
239 definition of a function is not visible at all of its call sites, so
240 the compiler cannot infer the hints.
241
242 Here in Cmm, we're taking a slightly different approach. We include
243 the int vs. float hint in the CmmType, because (a) the majority of
244 platforms have a strong distinction between float and int registers,
245 and (b) we don't want to do any heavyweight hint-inference in the
246 native code backend in order to get good code. We're treating the
247 hint more like a type: our Cmm is always completely consistent with
248 respect to hints. All coercions between float and int are explicit.
249
250 What about the signed vs. unsigned hint? This information might be
251 useful if we want to keep sub-word-sized values in word-size
252 registers, which we must do if we only have word-sized registers.
253
254 On such a system, there are two straightforward conventions for
255 representing sub-word-sized values:
256
257 (a) Leave the upper bits undefined. Comparison operations must
258 sign- or zero-extend both operands before comparing them,
259 depending on whether the comparison is signed or unsigned.
260
261 (b) Always keep the values sign- or zero-extended as appropriate.
262 Arithmetic operations must narrow the result to the appropriate
263 size.
264
265 A clever compiler might not use either (a) or (b) exclusively, instead
266 it would attempt to minimize the coercions by analysis: the same kind
267 of analysis that propagates hints around. In Cmm we don't want to
268 have to do this, so we plump for having richer types and keeping the
269 type information consistent.
270
271 If signed/unsigned hints are missing from CmmType, then the only
272 choice we have is (a), because we don't know whether the result of an
273 operation should be sign- or zero-extended.
274
275 Many architectures have extending load operations, which work well
276 with (b). To make use of them with (a), you need to know whether the
277 value is going to be sign- or zero-extended by an enclosing comparison
278 (for example), which involves knowing above the context. This is
279 doable but more complex.
280
281 Further complicating the issue is foreign calls: a foreign calling
282 convention can specify that signed 8-bit quantities are passed as
283 sign-extended 32 bit quantities, for example (this is the case on the
284 PowerPC). So we *do* need sign information on foreign call arguments.
285
286 Pros for adding signed vs. unsigned to CmmType:
287
288 - It would let us use convention (b) above, and get easier
289 code generation for extending loads.
290
291 - Less information required on foreign calls.
292
293 - MachOp type would be simpler
294
295 Cons:
296
297 - More complexity
298
299 - What is the CmmType for a VanillaReg? Currently it is
300 always wordRep, but now we have to decide whether it is
301 signed or unsigned. The same VanillaReg can thus have
302 different CmmType in different parts of the program.
303
304 - Extra coercions cluttering up expressions.
305
306 Currently for GHC, the foreign call point is moot, because we do our
307 own promotion of sub-word-sized values to word-sized values. The Int8
308 type is represnted by an Int# which is kept sign-extended at all times
309 (this is slightly naughty, because we're making assumptions about the
310 C calling convention rather early on in the compiler). However, given
311 this, the cons outweigh the pros.
312
313 -}
314