Merge remote-tracking branch 'origin/master' into tc-untouchables
[ghc.git] / compiler / cmm / CmmType.hs
1
2 module CmmType
3 ( CmmType -- Abstract
4 , b8, b16, b32, b64, f32, f64, bWord, bHalfWord, gcWord
5 , cInt, cLong
6 , cmmBits, cmmFloat
7 , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
8 , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32
9
10 , Width(..)
11 , widthInBits, widthInBytes, widthInLog, widthFromBytes
12 , wordWidth, halfWordWidth, cIntWidth, cLongWidth
13 , halfWordMask
14 , narrowU, narrowS
15 )
16 where
17
18 #include "HsVersions.h"
19
20 import DynFlags
21 import FastString
22 import Outputable
23
24 import Data.Word
25 import Data.Int
26
27 -----------------------------------------------------------------------------
28 -- CmmType
29 -----------------------------------------------------------------------------
30
31 -- NOTE: CmmType is an abstract type, not exported from this
32 -- module so you can easily change its representation
33 --
34 -- However Width is exported in a concrete way,
35 -- and is used extensively in pattern-matching
36
37 data CmmType -- The important one!
38 = CmmType CmmCat Width
39
40 data CmmCat -- "Category" (not exported)
41 = GcPtrCat -- GC pointer
42 | BitsCat -- Non-pointer
43 | FloatCat -- Float
44 deriving( Eq )
45 -- See Note [Signed vs unsigned] at the end
46
47 instance Outputable CmmType where
48 ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
49
50 instance Outputable CmmCat where
51 ppr FloatCat = ptext $ sLit("F")
52 ppr _ = ptext $ sLit("I")
53
54 -- Why is CmmType stratified? For native code generation,
55 -- most of the time you just want to know what sort of register
56 -- to put the thing in, and for this you need to know how
57 -- many bits thing has and whether it goes in a floating-point
58 -- register. By contrast, the distinction between GcPtr and
59 -- GcNonPtr is of interest to only a few parts of the code generator.
60
61 -------- Equality on CmmType --------------
62 -- CmmType is *not* an instance of Eq; sometimes we care about the
63 -- Gc/NonGc distinction, and sometimes we don't
64 -- So we use an explicit function to force you to think about it
65 cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
66 cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
67
68 cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
69 -- This equality is temporary; used in CmmLint
70 -- but the RTS files are not yet well-typed wrt pointers
71 cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
72 = c1 `weak_eq` c2 && w1==w2
73 where
74 FloatCat `weak_eq` FloatCat = True
75 FloatCat `weak_eq` _other = False
76 _other `weak_eq` FloatCat = False
77 _word1 `weak_eq` _word2 = True -- Ignores GcPtr
78
79 --- Simple operations on CmmType -----
80 typeWidth :: CmmType -> Width
81 typeWidth (CmmType _ w) = w
82
83 cmmBits, cmmFloat :: Width -> CmmType
84 cmmBits = CmmType BitsCat
85 cmmFloat = CmmType FloatCat
86
87 -------- Common CmmTypes ------------
88 -- Floats and words of specific widths
89 b8, b16, b32, b64, f32, f64 :: CmmType
90 b8 = cmmBits W8
91 b16 = cmmBits W16
92 b32 = cmmBits W32
93 b64 = cmmBits W64
94 f32 = cmmFloat W32
95 f64 = cmmFloat W64
96
97 -- CmmTypes of native word widths
98 bWord :: DynFlags -> CmmType
99 bWord dflags = cmmBits (wordWidth dflags)
100
101 bHalfWord :: DynFlags -> CmmType
102 bHalfWord dflags = cmmBits (halfWordWidth dflags)
103
104 gcWord :: DynFlags -> CmmType
105 gcWord dflags = CmmType GcPtrCat (wordWidth dflags)
106
107 cInt, cLong :: DynFlags -> CmmType
108 cInt dflags = cmmBits (cIntWidth dflags)
109 cLong dflags = cmmBits (cLongWidth dflags)
110
111
112 ------------ Predicates ----------------
113 isFloatType, isGcPtrType :: CmmType -> Bool
114 isFloatType (CmmType FloatCat _) = True
115 isFloatType _other = False
116
117 isGcPtrType (CmmType GcPtrCat _) = True
118 isGcPtrType _other = False
119
120 isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
121 -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
122 -- isFloat32 and 64 are obvious
123
124 isWord64 (CmmType BitsCat W64) = True
125 isWord64 (CmmType GcPtrCat W64) = True
126 isWord64 _other = False
127
128 isWord32 (CmmType BitsCat W32) = True
129 isWord32 (CmmType GcPtrCat W32) = True
130 isWord32 _other = False
131
132 isFloat32 (CmmType FloatCat W32) = True
133 isFloat32 _other = False
134
135 isFloat64 (CmmType FloatCat W64) = True
136 isFloat64 _other = False
137
138 -----------------------------------------------------------------------------
139 -- Width
140 -----------------------------------------------------------------------------
141
142 data Width = W8 | W16 | W32 | W64
143 | W80 -- Extended double-precision float,
144 -- used in x86 native codegen only.
145 -- (we use Ord, so it'd better be in this order)
146 | W128
147 deriving (Eq, Ord, Show)
148
149 instance Outputable Width where
150 ppr rep = ptext (mrStr rep)
151
152 mrStr :: Width -> LitString
153 mrStr W8 = sLit("W8")
154 mrStr W16 = sLit("W16")
155 mrStr W32 = sLit("W32")
156 mrStr W64 = sLit("W64")
157 mrStr W128 = sLit("W128")
158 mrStr W80 = sLit("W80")
159
160
161 -------- Common Widths ------------
162 wordWidth :: DynFlags -> Width
163 wordWidth dflags
164 | wORD_SIZE dflags == 4 = W32
165 | wORD_SIZE dflags == 8 = W64
166 | otherwise = panic "MachOp.wordRep: Unknown word size"
167
168 halfWordWidth :: DynFlags -> Width
169 halfWordWidth dflags
170 | wORD_SIZE dflags == 4 = W16
171 | wORD_SIZE dflags == 8 = W32
172 | otherwise = panic "MachOp.halfWordRep: Unknown word size"
173
174 halfWordMask :: DynFlags -> Integer
175 halfWordMask dflags
176 | wORD_SIZE dflags == 4 = 0xFFFF
177 | wORD_SIZE dflags == 8 = 0xFFFFFFFF
178 | otherwise = panic "MachOp.halfWordMask: Unknown word size"
179
180 -- cIntRep is the Width for a C-language 'int'
181 cIntWidth, cLongWidth :: DynFlags -> Width
182 cIntWidth dflags = case cINT_SIZE dflags of
183 4 -> W32
184 8 -> W64
185 s -> panic ("cIntWidth: Unknown cINT_SIZE: " ++ show s)
186 cLongWidth dflags = case cLONG_SIZE dflags of
187 4 -> W32
188 8 -> W64
189 s -> panic ("cIntWidth: Unknown cLONG_SIZE: " ++ show s)
190
191 widthInBits :: Width -> Int
192 widthInBits W8 = 8
193 widthInBits W16 = 16
194 widthInBits W32 = 32
195 widthInBits W64 = 64
196 widthInBits W128 = 128
197 widthInBits W80 = 80
198
199 widthInBytes :: Width -> Int
200 widthInBytes W8 = 1
201 widthInBytes W16 = 2
202 widthInBytes W32 = 4
203 widthInBytes W64 = 8
204 widthInBytes W128 = 16
205 widthInBytes W80 = 10
206
207 widthFromBytes :: Int -> Width
208 widthFromBytes 1 = W8
209 widthFromBytes 2 = W16
210 widthFromBytes 4 = W32
211 widthFromBytes 8 = W64
212 widthFromBytes 16 = W128
213 widthFromBytes 10 = W80
214 widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
215
216 -- log_2 of the width in bytes, useful for generating shifts.
217 widthInLog :: Width -> Int
218 widthInLog W8 = 0
219 widthInLog W16 = 1
220 widthInLog W32 = 2
221 widthInLog W64 = 3
222 widthInLog W128 = 4
223 widthInLog W80 = panic "widthInLog: F80"
224
225 -- widening / narrowing
226
227 narrowU :: Width -> Integer -> Integer
228 narrowU W8 x = fromIntegral (fromIntegral x :: Word8)
229 narrowU W16 x = fromIntegral (fromIntegral x :: Word16)
230 narrowU W32 x = fromIntegral (fromIntegral x :: Word32)
231 narrowU W64 x = fromIntegral (fromIntegral x :: Word64)
232 narrowU _ _ = panic "narrowTo"
233
234 narrowS :: Width -> Integer -> Integer
235 narrowS W8 x = fromIntegral (fromIntegral x :: Int8)
236 narrowS W16 x = fromIntegral (fromIntegral x :: Int16)
237 narrowS W32 x = fromIntegral (fromIntegral x :: Int32)
238 narrowS W64 x = fromIntegral (fromIntegral x :: Int64)
239 narrowS _ _ = panic "narrowTo"
240
241 -------------------------------------------------------------------------
242 {- Note [Signed vs unsigned]
243 ~~~~~~~~~~~~~~~~~~~~~~~~~
244 Should a CmmType include a signed vs. unsigned distinction?
245
246 This is very much like a "hint" in C-- terminology: it isn't necessary
247 in order to generate correct code, but it might be useful in that the
248 compiler can generate better code if it has access to higher-level
249 hints about data. This is important at call boundaries, because the
250 definition of a function is not visible at all of its call sites, so
251 the compiler cannot infer the hints.
252
253 Here in Cmm, we're taking a slightly different approach. We include
254 the int vs. float hint in the CmmType, because (a) the majority of
255 platforms have a strong distinction between float and int registers,
256 and (b) we don't want to do any heavyweight hint-inference in the
257 native code backend in order to get good code. We're treating the
258 hint more like a type: our Cmm is always completely consistent with
259 respect to hints. All coercions between float and int are explicit.
260
261 What about the signed vs. unsigned hint? This information might be
262 useful if we want to keep sub-word-sized values in word-size
263 registers, which we must do if we only have word-sized registers.
264
265 On such a system, there are two straightforward conventions for
266 representing sub-word-sized values:
267
268 (a) Leave the upper bits undefined. Comparison operations must
269 sign- or zero-extend both operands before comparing them,
270 depending on whether the comparison is signed or unsigned.
271
272 (b) Always keep the values sign- or zero-extended as appropriate.
273 Arithmetic operations must narrow the result to the appropriate
274 size.
275
276 A clever compiler might not use either (a) or (b) exclusively, instead
277 it would attempt to minimize the coercions by analysis: the same kind
278 of analysis that propagates hints around. In Cmm we don't want to
279 have to do this, so we plump for having richer types and keeping the
280 type information consistent.
281
282 If signed/unsigned hints are missing from CmmType, then the only
283 choice we have is (a), because we don't know whether the result of an
284 operation should be sign- or zero-extended.
285
286 Many architectures have extending load operations, which work well
287 with (b). To make use of them with (a), you need to know whether the
288 value is going to be sign- or zero-extended by an enclosing comparison
289 (for example), which involves knowing above the context. This is
290 doable but more complex.
291
292 Further complicating the issue is foreign calls: a foreign calling
293 convention can specify that signed 8-bit quantities are passed as
294 sign-extended 32 bit quantities, for example (this is the case on the
295 PowerPC). So we *do* need sign information on foreign call arguments.
296
297 Pros for adding signed vs. unsigned to CmmType:
298
299 - It would let us use convention (b) above, and get easier
300 code generation for extending loads.
301
302 - Less information required on foreign calls.
303
304 - MachOp type would be simpler
305
306 Cons:
307
308 - More complexity
309
310 - What is the CmmType for a VanillaReg? Currently it is
311 always wordRep, but now we have to decide whether it is
312 signed or unsigned. The same VanillaReg can thus have
313 different CmmType in different parts of the program.
314
315 - Extra coercions cluttering up expressions.
316
317 Currently for GHC, the foreign call point is moot, because we do our
318 own promotion of sub-word-sized values to word-sized values. The Int8
319 type is represnted by an Int# which is kept sign-extended at all times
320 (this is slightly naughty, because we're making assumptions about the
321 C calling convention rather early on in the compiler). However, given
322 this, the cons outweigh the pros.
323
324 -}
325