Merge remote-tracking branch 'origin/master' into tc-untouchables
[ghc.git] / compiler / cmm / CmmType.hs
1
2 module CmmType
3 ( CmmType -- Abstract
4 , b8, b16, b32, b64, f32, f64, bWord, bHalfWord, gcWord
5 , cInt, cLong
6 , cmmBits, cmmFloat
7 , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
8 , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32
9
10 , Width(..)
11 , widthInBits, widthInBytes, widthInLog, widthFromBytes
12 , wordWidth, halfWordWidth, cIntWidth, cLongWidth
13 , halfWordMask
14 , narrowU, narrowS
15 )
16 where
17
18 #include "HsVersions.h"
19
20 import DynFlags
21 import FastString
22 import Outputable
23
24 import Data.Word
25 import Data.Int
26
27 -----------------------------------------------------------------------------
28 -- CmmType
29 -----------------------------------------------------------------------------
30
31 -- NOTE: CmmType is an abstract type, not exported from this
32 -- module so you can easily change its representation
33 --
34 -- However Width is exported in a concrete way,
35 -- and is used extensively in pattern-matching
36
37 data CmmType -- The important one!
38 = CmmType CmmCat Width
39
40 data CmmCat -- "Category" (not exported)
41 = GcPtrCat -- GC pointer
42 | BitsCat -- Non-pointer
43 | FloatCat -- Float
44 deriving( Eq )
45 -- See Note [Signed vs unsigned] at the end
46
47 instance Outputable CmmType where
48 ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
49
50 instance Outputable CmmCat where
51 ppr FloatCat = ptext $ sLit("F")
52 ppr _ = ptext $ sLit("I")
53
54 -- Why is CmmType stratified? For native code generation,
55 -- most of the time you just want to know what sort of register
56 -- to put the thing in, and for this you need to know how
57 -- many bits thing has and whether it goes in a floating-point
58 -- register. By contrast, the distinction between GcPtr and
59 -- GcNonPtr is of interest to only a few parts of the code generator.
60
61 -------- Equality on CmmType --------------
62 -- CmmType is *not* an instance of Eq; sometimes we care about the
63 -- Gc/NonGc distinction, and sometimes we don't
64 -- So we use an explicit function to force you to think about it
65 cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
66 cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
67
68 cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
69 -- This equality is temporary; used in CmmLint
70 -- but the RTS files are not yet well-typed wrt pointers
71 cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
72 = c1 `weak_eq` c2 && w1==w2
73 where
74 FloatCat `weak_eq` FloatCat = True
75 FloatCat `weak_eq` _other = False
76 _other `weak_eq` FloatCat = False
77 _word1 `weak_eq` _word2 = True -- Ignores GcPtr
78
79 --- Simple operations on CmmType -----
80 typeWidth :: CmmType -> Width
81 typeWidth (CmmType _ w) = w
82
83 cmmBits, cmmFloat :: Width -> CmmType
84 cmmBits = CmmType BitsCat
85 cmmFloat = CmmType FloatCat
86
87 -------- Common CmmTypes ------------
88 -- Floats and words of specific widths
89 b8, b16, b32, b64, f32, f64 :: CmmType
90 b8 = cmmBits W8
91 b16 = cmmBits W16
92 b32 = cmmBits W32
93 b64 = cmmBits W64
94 f32 = cmmFloat W32
95 f64 = cmmFloat W64
96
97 -- CmmTypes of native word widths
98 bWord :: DynFlags -> CmmType
99 bWord dflags = cmmBits (wordWidth dflags)
100
101 bHalfWord :: DynFlags -> CmmType
102 bHalfWord dflags = cmmBits (halfWordWidth dflags)
103
104 gcWord :: DynFlags -> CmmType
105 gcWord dflags = CmmType GcPtrCat (wordWidth dflags)
106
107 cInt, cLong :: CmmType
108 cInt = cmmBits cIntWidth
109 cLong = cmmBits cLongWidth
110
111
112 ------------ Predicates ----------------
113 isFloatType, isGcPtrType :: CmmType -> Bool
114 isFloatType (CmmType FloatCat _) = True
115 isFloatType _other = False
116
117 isGcPtrType (CmmType GcPtrCat _) = True
118 isGcPtrType _other = False
119
120 isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
121 -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
122 -- isFloat32 and 64 are obvious
123
124 isWord64 (CmmType BitsCat W64) = True
125 isWord64 (CmmType GcPtrCat W64) = True
126 isWord64 _other = False
127
128 isWord32 (CmmType BitsCat W32) = True
129 isWord32 (CmmType GcPtrCat W32) = True
130 isWord32 _other = False
131
132 isFloat32 (CmmType FloatCat W32) = True
133 isFloat32 _other = False
134
135 isFloat64 (CmmType FloatCat W64) = True
136 isFloat64 _other = False
137
138 -----------------------------------------------------------------------------
139 -- Width
140 -----------------------------------------------------------------------------
141
142 data Width = W8 | W16 | W32 | W64
143 | W80 -- Extended double-precision float,
144 -- used in x86 native codegen only.
145 -- (we use Ord, so it'd better be in this order)
146 | W128
147 deriving (Eq, Ord, Show)
148
149 instance Outputable Width where
150 ppr rep = ptext (mrStr rep)
151
152 mrStr :: Width -> LitString
153 mrStr W8 = sLit("W8")
154 mrStr W16 = sLit("W16")
155 mrStr W32 = sLit("W32")
156 mrStr W64 = sLit("W64")
157 mrStr W128 = sLit("W128")
158 mrStr W80 = sLit("W80")
159
160
161 -------- Common Widths ------------
162 wordWidth :: DynFlags -> Width
163 wordWidth dflags
164 | wORD_SIZE dflags == 4 = W32
165 | wORD_SIZE dflags == 8 = W64
166 | otherwise = panic "MachOp.wordRep: Unknown word size"
167
168 halfWordWidth :: DynFlags -> Width
169 halfWordWidth dflags
170 | wORD_SIZE dflags == 4 = W16
171 | wORD_SIZE dflags == 8 = W32
172 | otherwise = panic "MachOp.halfWordRep: Unknown word size"
173
174 halfWordMask :: DynFlags -> Integer
175 halfWordMask dflags
176 | wORD_SIZE dflags == 4 = 0xFFFF
177 | wORD_SIZE dflags == 8 = 0xFFFFFFFF
178 | otherwise = panic "MachOp.halfWordMask: Unknown word size"
179
180 -- cIntRep is the Width for a C-language 'int'
181 cIntWidth, cLongWidth :: Width
182 #if SIZEOF_INT == 4
183 cIntWidth = W32
184 #elif SIZEOF_INT == 8
185 cIntWidth = W64
186 #endif
187
188 #if SIZEOF_LONG == 4
189 cLongWidth = W32
190 #elif SIZEOF_LONG == 8
191 cLongWidth = W64
192 #endif
193
194 widthInBits :: Width -> Int
195 widthInBits W8 = 8
196 widthInBits W16 = 16
197 widthInBits W32 = 32
198 widthInBits W64 = 64
199 widthInBits W128 = 128
200 widthInBits W80 = 80
201
202 widthInBytes :: Width -> Int
203 widthInBytes W8 = 1
204 widthInBytes W16 = 2
205 widthInBytes W32 = 4
206 widthInBytes W64 = 8
207 widthInBytes W128 = 16
208 widthInBytes W80 = 10
209
210 widthFromBytes :: Int -> Width
211 widthFromBytes 1 = W8
212 widthFromBytes 2 = W16
213 widthFromBytes 4 = W32
214 widthFromBytes 8 = W64
215 widthFromBytes 16 = W128
216 widthFromBytes 10 = W80
217 widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
218
219 -- log_2 of the width in bytes, useful for generating shifts.
220 widthInLog :: Width -> Int
221 widthInLog W8 = 0
222 widthInLog W16 = 1
223 widthInLog W32 = 2
224 widthInLog W64 = 3
225 widthInLog W128 = 4
226 widthInLog W80 = panic "widthInLog: F80"
227
228 -- widening / narrowing
229
230 narrowU :: Width -> Integer -> Integer
231 narrowU W8 x = fromIntegral (fromIntegral x :: Word8)
232 narrowU W16 x = fromIntegral (fromIntegral x :: Word16)
233 narrowU W32 x = fromIntegral (fromIntegral x :: Word32)
234 narrowU W64 x = fromIntegral (fromIntegral x :: Word64)
235 narrowU _ _ = panic "narrowTo"
236
237 narrowS :: Width -> Integer -> Integer
238 narrowS W8 x = fromIntegral (fromIntegral x :: Int8)
239 narrowS W16 x = fromIntegral (fromIntegral x :: Int16)
240 narrowS W32 x = fromIntegral (fromIntegral x :: Int32)
241 narrowS W64 x = fromIntegral (fromIntegral x :: Int64)
242 narrowS _ _ = panic "narrowTo"
243
244 -------------------------------------------------------------------------
245 {- Note [Signed vs unsigned]
246 ~~~~~~~~~~~~~~~~~~~~~~~~~
247 Should a CmmType include a signed vs. unsigned distinction?
248
249 This is very much like a "hint" in C-- terminology: it isn't necessary
250 in order to generate correct code, but it might be useful in that the
251 compiler can generate better code if it has access to higher-level
252 hints about data. This is important at call boundaries, because the
253 definition of a function is not visible at all of its call sites, so
254 the compiler cannot infer the hints.
255
256 Here in Cmm, we're taking a slightly different approach. We include
257 the int vs. float hint in the CmmType, because (a) the majority of
258 platforms have a strong distinction between float and int registers,
259 and (b) we don't want to do any heavyweight hint-inference in the
260 native code backend in order to get good code. We're treating the
261 hint more like a type: our Cmm is always completely consistent with
262 respect to hints. All coercions between float and int are explicit.
263
264 What about the signed vs. unsigned hint? This information might be
265 useful if we want to keep sub-word-sized values in word-size
266 registers, which we must do if we only have word-sized registers.
267
268 On such a system, there are two straightforward conventions for
269 representing sub-word-sized values:
270
271 (a) Leave the upper bits undefined. Comparison operations must
272 sign- or zero-extend both operands before comparing them,
273 depending on whether the comparison is signed or unsigned.
274
275 (b) Always keep the values sign- or zero-extended as appropriate.
276 Arithmetic operations must narrow the result to the appropriate
277 size.
278
279 A clever compiler might not use either (a) or (b) exclusively, instead
280 it would attempt to minimize the coercions by analysis: the same kind
281 of analysis that propagates hints around. In Cmm we don't want to
282 have to do this, so we plump for having richer types and keeping the
283 type information consistent.
284
285 If signed/unsigned hints are missing from CmmType, then the only
286 choice we have is (a), because we don't know whether the result of an
287 operation should be sign- or zero-extended.
288
289 Many architectures have extending load operations, which work well
290 with (b). To make use of them with (a), you need to know whether the
291 value is going to be sign- or zero-extended by an enclosing comparison
292 (for example), which involves knowing above the context. This is
293 doable but more complex.
294
295 Further complicating the issue is foreign calls: a foreign calling
296 convention can specify that signed 8-bit quantities are passed as
297 sign-extended 32 bit quantities, for example (this is the case on the
298 PowerPC). So we *do* need sign information on foreign call arguments.
299
300 Pros for adding signed vs. unsigned to CmmType:
301
302 - It would let us use convention (b) above, and get easier
303 code generation for extending loads.
304
305 - Less information required on foreign calls.
306
307 - MachOp type would be simpler
308
309 Cons:
310
311 - More complexity
312
313 - What is the CmmType for a VanillaReg? Currently it is
314 always wordRep, but now we have to decide whether it is
315 signed or unsigned. The same VanillaReg can thus have
316 different CmmType in different parts of the program.
317
318 - Extra coercions cluttering up expressions.
319
320 Currently for GHC, the foreign call point is moot, because we do our
321 own promotion of sub-word-sized values to word-sized values. The Int8
322 type is represnted by an Int# which is kept sign-extended at all times
323 (this is slightly naughty, because we're making assumptions about the
324 C calling convention rather early on in the compiler). However, given
325 this, the cons outweigh the pros.
326
327 -}
328