d6da5a4022add21abcdabb4b0e542e070aa61159
[ghc.git] / compiler / cmm / CmmType.hs
1
2 module CmmType
3 ( CmmType -- Abstract
4 , b8, b16, b32, b64, f32, f64, bWord, bHalfWord, gcWord
5 , cInt, cLong
6 , cmmBits, cmmFloat
7 , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
8 , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32
9
10 , Width(..)
11 , widthInBits, widthInBytes, widthInLog, widthFromBytes
12 , wordWidth, halfWordWidth, cIntWidth, cLongWidth
13 , halfWordMask
14 , narrowU, narrowS
15 , rEP_CostCentreStack_mem_alloc
16 , rEP_CostCentreStack_scc_count
17 , rEP_StgEntCounter_allocs
18 )
19 where
20
21 #include "HsVersions.h"
22
23 import DynFlags
24 import FastString
25 import Outputable
26
27 import Data.Word
28 import Data.Int
29
30 -----------------------------------------------------------------------------
31 -- CmmType
32 -----------------------------------------------------------------------------
33
34 -- NOTE: CmmType is an abstract type, not exported from this
35 -- module so you can easily change its representation
36 --
37 -- However Width is exported in a concrete way,
38 -- and is used extensively in pattern-matching
39
40 data CmmType -- The important one!
41 = CmmType CmmCat Width
42
43 data CmmCat -- "Category" (not exported)
44 = GcPtrCat -- GC pointer
45 | BitsCat -- Non-pointer
46 | FloatCat -- Float
47 deriving( Eq )
48 -- See Note [Signed vs unsigned] at the end
49
50 instance Outputable CmmType where
51 ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
52
53 instance Outputable CmmCat where
54 ppr FloatCat = ptext $ sLit("F")
55 ppr _ = ptext $ sLit("I")
56
57 -- Why is CmmType stratified? For native code generation,
58 -- most of the time you just want to know what sort of register
59 -- to put the thing in, and for this you need to know how
60 -- many bits thing has and whether it goes in a floating-point
61 -- register. By contrast, the distinction between GcPtr and
62 -- GcNonPtr is of interest to only a few parts of the code generator.
63
64 -------- Equality on CmmType --------------
65 -- CmmType is *not* an instance of Eq; sometimes we care about the
66 -- Gc/NonGc distinction, and sometimes we don't
67 -- So we use an explicit function to force you to think about it
68 cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
69 cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
70
71 cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
72 -- This equality is temporary; used in CmmLint
73 -- but the RTS files are not yet well-typed wrt pointers
74 cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
75 = c1 `weak_eq` c2 && w1==w2
76 where
77 FloatCat `weak_eq` FloatCat = True
78 FloatCat `weak_eq` _other = False
79 _other `weak_eq` FloatCat = False
80 _word1 `weak_eq` _word2 = True -- Ignores GcPtr
81
82 --- Simple operations on CmmType -----
83 typeWidth :: CmmType -> Width
84 typeWidth (CmmType _ w) = w
85
86 cmmBits, cmmFloat :: Width -> CmmType
87 cmmBits = CmmType BitsCat
88 cmmFloat = CmmType FloatCat
89
90 -------- Common CmmTypes ------------
91 -- Floats and words of specific widths
92 b8, b16, b32, b64, f32, f64 :: CmmType
93 b8 = cmmBits W8
94 b16 = cmmBits W16
95 b32 = cmmBits W32
96 b64 = cmmBits W64
97 f32 = cmmFloat W32
98 f64 = cmmFloat W64
99
100 -- CmmTypes of native word widths
101 bWord :: DynFlags -> CmmType
102 bWord dflags = cmmBits (wordWidth dflags)
103
104 bHalfWord :: DynFlags -> CmmType
105 bHalfWord dflags = cmmBits (halfWordWidth dflags)
106
107 gcWord :: DynFlags -> CmmType
108 gcWord dflags = CmmType GcPtrCat (wordWidth dflags)
109
110 cInt, cLong :: DynFlags -> CmmType
111 cInt dflags = cmmBits (cIntWidth dflags)
112 cLong dflags = cmmBits (cLongWidth dflags)
113
114
115 ------------ Predicates ----------------
116 isFloatType, isGcPtrType :: CmmType -> Bool
117 isFloatType (CmmType FloatCat _) = True
118 isFloatType _other = False
119
120 isGcPtrType (CmmType GcPtrCat _) = True
121 isGcPtrType _other = False
122
123 isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
124 -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
125 -- isFloat32 and 64 are obvious
126
127 isWord64 (CmmType BitsCat W64) = True
128 isWord64 (CmmType GcPtrCat W64) = True
129 isWord64 _other = False
130
131 isWord32 (CmmType BitsCat W32) = True
132 isWord32 (CmmType GcPtrCat W32) = True
133 isWord32 _other = False
134
135 isFloat32 (CmmType FloatCat W32) = True
136 isFloat32 _other = False
137
138 isFloat64 (CmmType FloatCat W64) = True
139 isFloat64 _other = False
140
141 -----------------------------------------------------------------------------
142 -- Width
143 -----------------------------------------------------------------------------
144
145 data Width = W8 | W16 | W32 | W64
146 | W80 -- Extended double-precision float,
147 -- used in x86 native codegen only.
148 -- (we use Ord, so it'd better be in this order)
149 | W128
150 deriving (Eq, Ord, Show)
151
152 instance Outputable Width where
153 ppr rep = ptext (mrStr rep)
154
155 mrStr :: Width -> LitString
156 mrStr W8 = sLit("W8")
157 mrStr W16 = sLit("W16")
158 mrStr W32 = sLit("W32")
159 mrStr W64 = sLit("W64")
160 mrStr W128 = sLit("W128")
161 mrStr W80 = sLit("W80")
162
163
164 -------- Common Widths ------------
165 wordWidth :: DynFlags -> Width
166 wordWidth dflags
167 | wORD_SIZE dflags == 4 = W32
168 | wORD_SIZE dflags == 8 = W64
169 | otherwise = panic "MachOp.wordRep: Unknown word size"
170
171 halfWordWidth :: DynFlags -> Width
172 halfWordWidth dflags
173 | wORD_SIZE dflags == 4 = W16
174 | wORD_SIZE dflags == 8 = W32
175 | otherwise = panic "MachOp.halfWordRep: Unknown word size"
176
177 halfWordMask :: DynFlags -> Integer
178 halfWordMask dflags
179 | wORD_SIZE dflags == 4 = 0xFFFF
180 | wORD_SIZE dflags == 8 = 0xFFFFFFFF
181 | otherwise = panic "MachOp.halfWordMask: Unknown word size"
182
183 -- cIntRep is the Width for a C-language 'int'
184 cIntWidth, cLongWidth :: DynFlags -> Width
185 cIntWidth dflags = case cINT_SIZE dflags of
186 4 -> W32
187 8 -> W64
188 s -> panic ("cIntWidth: Unknown cINT_SIZE: " ++ show s)
189 cLongWidth dflags = case cLONG_SIZE dflags of
190 4 -> W32
191 8 -> W64
192 s -> panic ("cIntWidth: Unknown cLONG_SIZE: " ++ show s)
193
194 widthInBits :: Width -> Int
195 widthInBits W8 = 8
196 widthInBits W16 = 16
197 widthInBits W32 = 32
198 widthInBits W64 = 64
199 widthInBits W128 = 128
200 widthInBits W80 = 80
201
202 widthInBytes :: Width -> Int
203 widthInBytes W8 = 1
204 widthInBytes W16 = 2
205 widthInBytes W32 = 4
206 widthInBytes W64 = 8
207 widthInBytes W128 = 16
208 widthInBytes W80 = 10
209
210 widthFromBytes :: Int -> Width
211 widthFromBytes 1 = W8
212 widthFromBytes 2 = W16
213 widthFromBytes 4 = W32
214 widthFromBytes 8 = W64
215 widthFromBytes 16 = W128
216 widthFromBytes 10 = W80
217 widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
218
219 -- log_2 of the width in bytes, useful for generating shifts.
220 widthInLog :: Width -> Int
221 widthInLog W8 = 0
222 widthInLog W16 = 1
223 widthInLog W32 = 2
224 widthInLog W64 = 3
225 widthInLog W128 = 4
226 widthInLog W80 = panic "widthInLog: F80"
227
228 -- widening / narrowing
229
230 narrowU :: Width -> Integer -> Integer
231 narrowU W8 x = fromIntegral (fromIntegral x :: Word8)
232 narrowU W16 x = fromIntegral (fromIntegral x :: Word16)
233 narrowU W32 x = fromIntegral (fromIntegral x :: Word32)
234 narrowU W64 x = fromIntegral (fromIntegral x :: Word64)
235 narrowU _ _ = panic "narrowTo"
236
237 narrowS :: Width -> Integer -> Integer
238 narrowS W8 x = fromIntegral (fromIntegral x :: Int8)
239 narrowS W16 x = fromIntegral (fromIntegral x :: Int16)
240 narrowS W32 x = fromIntegral (fromIntegral x :: Int32)
241 narrowS W64 x = fromIntegral (fromIntegral x :: Int64)
242 narrowS _ _ = panic "narrowTo"
243
244 -------------------------------------------------------------------------
245
246 -- These don't really belong here, but I don't know where is best to
247 -- put them.
248
249 rEP_CostCentreStack_mem_alloc :: DynFlags -> CmmType
250 rEP_CostCentreStack_mem_alloc dflags
251 = cmmBits (widthFromBytes (pc_REP_CostCentreStack_mem_alloc pc))
252 where pc = sPlatformConstants (settings dflags)
253
254 rEP_CostCentreStack_scc_count :: DynFlags -> CmmType
255 rEP_CostCentreStack_scc_count dflags
256 = cmmBits (widthFromBytes (pc_REP_CostCentreStack_scc_count pc))
257 where pc = sPlatformConstants (settings dflags)
258
259 rEP_StgEntCounter_allocs :: DynFlags -> CmmType
260 rEP_StgEntCounter_allocs dflags
261 = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocs pc))
262 where pc = sPlatformConstants (settings dflags)
263
264 -------------------------------------------------------------------------
265 {- Note [Signed vs unsigned]
266 ~~~~~~~~~~~~~~~~~~~~~~~~~
267 Should a CmmType include a signed vs. unsigned distinction?
268
269 This is very much like a "hint" in C-- terminology: it isn't necessary
270 in order to generate correct code, but it might be useful in that the
271 compiler can generate better code if it has access to higher-level
272 hints about data. This is important at call boundaries, because the
273 definition of a function is not visible at all of its call sites, so
274 the compiler cannot infer the hints.
275
276 Here in Cmm, we're taking a slightly different approach. We include
277 the int vs. float hint in the CmmType, because (a) the majority of
278 platforms have a strong distinction between float and int registers,
279 and (b) we don't want to do any heavyweight hint-inference in the
280 native code backend in order to get good code. We're treating the
281 hint more like a type: our Cmm is always completely consistent with
282 respect to hints. All coercions between float and int are explicit.
283
284 What about the signed vs. unsigned hint? This information might be
285 useful if we want to keep sub-word-sized values in word-size
286 registers, which we must do if we only have word-sized registers.
287
288 On such a system, there are two straightforward conventions for
289 representing sub-word-sized values:
290
291 (a) Leave the upper bits undefined. Comparison operations must
292 sign- or zero-extend both operands before comparing them,
293 depending on whether the comparison is signed or unsigned.
294
295 (b) Always keep the values sign- or zero-extended as appropriate.
296 Arithmetic operations must narrow the result to the appropriate
297 size.
298
299 A clever compiler might not use either (a) or (b) exclusively, instead
300 it would attempt to minimize the coercions by analysis: the same kind
301 of analysis that propagates hints around. In Cmm we don't want to
302 have to do this, so we plump for having richer types and keeping the
303 type information consistent.
304
305 If signed/unsigned hints are missing from CmmType, then the only
306 choice we have is (a), because we don't know whether the result of an
307 operation should be sign- or zero-extended.
308
309 Many architectures have extending load operations, which work well
310 with (b). To make use of them with (a), you need to know whether the
311 value is going to be sign- or zero-extended by an enclosing comparison
312 (for example), which involves knowing above the context. This is
313 doable but more complex.
314
315 Further complicating the issue is foreign calls: a foreign calling
316 convention can specify that signed 8-bit quantities are passed as
317 sign-extended 32 bit quantities, for example (this is the case on the
318 PowerPC). So we *do* need sign information on foreign call arguments.
319
320 Pros for adding signed vs. unsigned to CmmType:
321
322 - It would let us use convention (b) above, and get easier
323 code generation for extending loads.
324
325 - Less information required on foreign calls.
326
327 - MachOp type would be simpler
328
329 Cons:
330
331 - More complexity
332
333 - What is the CmmType for a VanillaReg? Currently it is
334 always wordRep, but now we have to decide whether it is
335 signed or unsigned. The same VanillaReg can thus have
336 different CmmType in different parts of the program.
337
338 - Extra coercions cluttering up expressions.
339
340 Currently for GHC, the foreign call point is moot, because we do our
341 own promotion of sub-word-sized values to word-sized values. The Int8
342 type is represnted by an Int# which is kept sign-extended at all times
343 (this is slightly naughty, because we're making assumptions about the
344 C calling convention rather early on in the compiler). However, given
345 this, the cons outweigh the pros.
346
347 -}
348