Produce new-style Cmm from the Cmm parser
[ghc.git] / compiler / cmm / CmmType.hs
1
2 module CmmType
3 ( CmmType -- Abstract
4 , b8, b16, b32, b64, f32, f64, bWord, bHalfWord, gcWord
5 , cInt, cLong
6 , cmmBits, cmmFloat
7 , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
8 , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32
9
10 , Width(..)
11 , widthInBits, widthInBytes, widthInLog, widthFromBytes
12 , wordWidth, halfWordWidth, cIntWidth, cLongWidth
13 , halfWordMask
14 , narrowU, narrowS
15 , rEP_CostCentreStack_mem_alloc
16 , rEP_CostCentreStack_scc_count
17 , rEP_StgEntCounter_allocs
18
19 , ForeignHint(..)
20 )
21 where
22
23 #include "HsVersions.h"
24
25 import DynFlags
26 import FastString
27 import Outputable
28
29 import Data.Word
30 import Data.Int
31
32 -----------------------------------------------------------------------------
33 -- CmmType
34 -----------------------------------------------------------------------------
35
36 -- NOTE: CmmType is an abstract type, not exported from this
37 -- module so you can easily change its representation
38 --
39 -- However Width is exported in a concrete way,
40 -- and is used extensively in pattern-matching
41
42 data CmmType -- The important one!
43 = CmmType CmmCat Width
44
45 data CmmCat -- "Category" (not exported)
46 = GcPtrCat -- GC pointer
47 | BitsCat -- Non-pointer
48 | FloatCat -- Float
49 deriving( Eq )
50 -- See Note [Signed vs unsigned] at the end
51
52 instance Outputable CmmType where
53 ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
54
55 instance Outputable CmmCat where
56 ppr FloatCat = ptext $ sLit("F")
57 ppr GcPtrCat = ptext $ sLit("P")
58 ppr BitsCat = ptext $ sLit("I")
59
60 -- Why is CmmType stratified? For native code generation,
61 -- most of the time you just want to know what sort of register
62 -- to put the thing in, and for this you need to know how
63 -- many bits thing has and whether it goes in a floating-point
64 -- register. By contrast, the distinction between GcPtr and
65 -- GcNonPtr is of interest to only a few parts of the code generator.
66
67 -------- Equality on CmmType --------------
68 -- CmmType is *not* an instance of Eq; sometimes we care about the
69 -- Gc/NonGc distinction, and sometimes we don't
70 -- So we use an explicit function to force you to think about it
71 cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
72 cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
73
74 cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
75 -- This equality is temporary; used in CmmLint
76 -- but the RTS files are not yet well-typed wrt pointers
77 cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
78 = c1 `weak_eq` c2 && w1==w2
79 where
80 FloatCat `weak_eq` FloatCat = True
81 FloatCat `weak_eq` _other = False
82 _other `weak_eq` FloatCat = False
83 _word1 `weak_eq` _word2 = True -- Ignores GcPtr
84
85 --- Simple operations on CmmType -----
86 typeWidth :: CmmType -> Width
87 typeWidth (CmmType _ w) = w
88
89 cmmBits, cmmFloat :: Width -> CmmType
90 cmmBits = CmmType BitsCat
91 cmmFloat = CmmType FloatCat
92
93 -------- Common CmmTypes ------------
94 -- Floats and words of specific widths
95 b8, b16, b32, b64, f32, f64 :: CmmType
96 b8 = cmmBits W8
97 b16 = cmmBits W16
98 b32 = cmmBits W32
99 b64 = cmmBits W64
100 f32 = cmmFloat W32
101 f64 = cmmFloat W64
102
103 -- CmmTypes of native word widths
104 bWord :: DynFlags -> CmmType
105 bWord dflags = cmmBits (wordWidth dflags)
106
107 bHalfWord :: DynFlags -> CmmType
108 bHalfWord dflags = cmmBits (halfWordWidth dflags)
109
110 gcWord :: DynFlags -> CmmType
111 gcWord dflags = CmmType GcPtrCat (wordWidth dflags)
112
113 cInt, cLong :: DynFlags -> CmmType
114 cInt dflags = cmmBits (cIntWidth dflags)
115 cLong dflags = cmmBits (cLongWidth dflags)
116
117
118 ------------ Predicates ----------------
119 isFloatType, isGcPtrType :: CmmType -> Bool
120 isFloatType (CmmType FloatCat _) = True
121 isFloatType _other = False
122
123 isGcPtrType (CmmType GcPtrCat _) = True
124 isGcPtrType _other = False
125
126 isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
127 -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
128 -- isFloat32 and 64 are obvious
129
130 isWord64 (CmmType BitsCat W64) = True
131 isWord64 (CmmType GcPtrCat W64) = True
132 isWord64 _other = False
133
134 isWord32 (CmmType BitsCat W32) = True
135 isWord32 (CmmType GcPtrCat W32) = True
136 isWord32 _other = False
137
138 isFloat32 (CmmType FloatCat W32) = True
139 isFloat32 _other = False
140
141 isFloat64 (CmmType FloatCat W64) = True
142 isFloat64 _other = False
143
144 -----------------------------------------------------------------------------
145 -- Width
146 -----------------------------------------------------------------------------
147
148 data Width = W8 | W16 | W32 | W64
149 | W80 -- Extended double-precision float,
150 -- used in x86 native codegen only.
151 -- (we use Ord, so it'd better be in this order)
152 | W128
153 deriving (Eq, Ord, Show)
154
155 instance Outputable Width where
156 ppr rep = ptext (mrStr rep)
157
158 mrStr :: Width -> LitString
159 mrStr W8 = sLit("W8")
160 mrStr W16 = sLit("W16")
161 mrStr W32 = sLit("W32")
162 mrStr W64 = sLit("W64")
163 mrStr W128 = sLit("W128")
164 mrStr W80 = sLit("W80")
165
166
167 -------- Common Widths ------------
168 wordWidth :: DynFlags -> Width
169 wordWidth dflags
170 | wORD_SIZE dflags == 4 = W32
171 | wORD_SIZE dflags == 8 = W64
172 | otherwise = panic "MachOp.wordRep: Unknown word size"
173
174 halfWordWidth :: DynFlags -> Width
175 halfWordWidth dflags
176 | wORD_SIZE dflags == 4 = W16
177 | wORD_SIZE dflags == 8 = W32
178 | otherwise = panic "MachOp.halfWordRep: Unknown word size"
179
180 halfWordMask :: DynFlags -> Integer
181 halfWordMask dflags
182 | wORD_SIZE dflags == 4 = 0xFFFF
183 | wORD_SIZE dflags == 8 = 0xFFFFFFFF
184 | otherwise = panic "MachOp.halfWordMask: Unknown word size"
185
186 -- cIntRep is the Width for a C-language 'int'
187 cIntWidth, cLongWidth :: DynFlags -> Width
188 cIntWidth dflags = case cINT_SIZE dflags of
189 4 -> W32
190 8 -> W64
191 s -> panic ("cIntWidth: Unknown cINT_SIZE: " ++ show s)
192 cLongWidth dflags = case cLONG_SIZE dflags of
193 4 -> W32
194 8 -> W64
195 s -> panic ("cIntWidth: Unknown cLONG_SIZE: " ++ show s)
196
197 widthInBits :: Width -> Int
198 widthInBits W8 = 8
199 widthInBits W16 = 16
200 widthInBits W32 = 32
201 widthInBits W64 = 64
202 widthInBits W128 = 128
203 widthInBits W80 = 80
204
205 widthInBytes :: Width -> Int
206 widthInBytes W8 = 1
207 widthInBytes W16 = 2
208 widthInBytes W32 = 4
209 widthInBytes W64 = 8
210 widthInBytes W128 = 16
211 widthInBytes W80 = 10
212
213 widthFromBytes :: Int -> Width
214 widthFromBytes 1 = W8
215 widthFromBytes 2 = W16
216 widthFromBytes 4 = W32
217 widthFromBytes 8 = W64
218 widthFromBytes 16 = W128
219 widthFromBytes 10 = W80
220 widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
221
222 -- log_2 of the width in bytes, useful for generating shifts.
223 widthInLog :: Width -> Int
224 widthInLog W8 = 0
225 widthInLog W16 = 1
226 widthInLog W32 = 2
227 widthInLog W64 = 3
228 widthInLog W128 = 4
229 widthInLog W80 = panic "widthInLog: F80"
230
231 -- widening / narrowing
232
233 narrowU :: Width -> Integer -> Integer
234 narrowU W8 x = fromIntegral (fromIntegral x :: Word8)
235 narrowU W16 x = fromIntegral (fromIntegral x :: Word16)
236 narrowU W32 x = fromIntegral (fromIntegral x :: Word32)
237 narrowU W64 x = fromIntegral (fromIntegral x :: Word64)
238 narrowU _ _ = panic "narrowTo"
239
240 narrowS :: Width -> Integer -> Integer
241 narrowS W8 x = fromIntegral (fromIntegral x :: Int8)
242 narrowS W16 x = fromIntegral (fromIntegral x :: Int16)
243 narrowS W32 x = fromIntegral (fromIntegral x :: Int32)
244 narrowS W64 x = fromIntegral (fromIntegral x :: Int64)
245 narrowS _ _ = panic "narrowTo"
246
247 -------------------------------------------------------------------------
248 -- Hints
249
250 -- Hints are extra type information we attach to the arguments and
251 -- results of a foreign call, where more type information is sometimes
252 -- needed by the ABI to make the correct kind of call.
253
254 data ForeignHint
255 = NoHint | AddrHint | SignedHint
256 deriving( Eq )
257 -- Used to give extra per-argument or per-result
258 -- information needed by foreign calling conventions
259
260 -------------------------------------------------------------------------
261
262 -- These don't really belong here, but I don't know where is best to
263 -- put them.
264
265 rEP_CostCentreStack_mem_alloc :: DynFlags -> CmmType
266 rEP_CostCentreStack_mem_alloc dflags
267 = cmmBits (widthFromBytes (pc_REP_CostCentreStack_mem_alloc pc))
268 where pc = sPlatformConstants (settings dflags)
269
270 rEP_CostCentreStack_scc_count :: DynFlags -> CmmType
271 rEP_CostCentreStack_scc_count dflags
272 = cmmBits (widthFromBytes (pc_REP_CostCentreStack_scc_count pc))
273 where pc = sPlatformConstants (settings dflags)
274
275 rEP_StgEntCounter_allocs :: DynFlags -> CmmType
276 rEP_StgEntCounter_allocs dflags
277 = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocs pc))
278 where pc = sPlatformConstants (settings dflags)
279
280 -------------------------------------------------------------------------
281 {- Note [Signed vs unsigned]
282 ~~~~~~~~~~~~~~~~~~~~~~~~~
283 Should a CmmType include a signed vs. unsigned distinction?
284
285 This is very much like a "hint" in C-- terminology: it isn't necessary
286 in order to generate correct code, but it might be useful in that the
287 compiler can generate better code if it has access to higher-level
288 hints about data. This is important at call boundaries, because the
289 definition of a function is not visible at all of its call sites, so
290 the compiler cannot infer the hints.
291
292 Here in Cmm, we're taking a slightly different approach. We include
293 the int vs. float hint in the CmmType, because (a) the majority of
294 platforms have a strong distinction between float and int registers,
295 and (b) we don't want to do any heavyweight hint-inference in the
296 native code backend in order to get good code. We're treating the
297 hint more like a type: our Cmm is always completely consistent with
298 respect to hints. All coercions between float and int are explicit.
299
300 What about the signed vs. unsigned hint? This information might be
301 useful if we want to keep sub-word-sized values in word-size
302 registers, which we must do if we only have word-sized registers.
303
304 On such a system, there are two straightforward conventions for
305 representing sub-word-sized values:
306
307 (a) Leave the upper bits undefined. Comparison operations must
308 sign- or zero-extend both operands before comparing them,
309 depending on whether the comparison is signed or unsigned.
310
311 (b) Always keep the values sign- or zero-extended as appropriate.
312 Arithmetic operations must narrow the result to the appropriate
313 size.
314
315 A clever compiler might not use either (a) or (b) exclusively, instead
316 it would attempt to minimize the coercions by analysis: the same kind
317 of analysis that propagates hints around. In Cmm we don't want to
318 have to do this, so we plump for having richer types and keeping the
319 type information consistent.
320
321 If signed/unsigned hints are missing from CmmType, then the only
322 choice we have is (a), because we don't know whether the result of an
323 operation should be sign- or zero-extended.
324
325 Many architectures have extending load operations, which work well
326 with (b). To make use of them with (a), you need to know whether the
327 value is going to be sign- or zero-extended by an enclosing comparison
328 (for example), which involves knowing above the context. This is
329 doable but more complex.
330
331 Further complicating the issue is foreign calls: a foreign calling
332 convention can specify that signed 8-bit quantities are passed as
333 sign-extended 32 bit quantities, for example (this is the case on the
334 PowerPC). So we *do* need sign information on foreign call arguments.
335
336 Pros for adding signed vs. unsigned to CmmType:
337
338 - It would let us use convention (b) above, and get easier
339 code generation for extending loads.
340
341 - Less information required on foreign calls.
342
343 - MachOp type would be simpler
344
345 Cons:
346
347 - More complexity
348
349 - What is the CmmType for a VanillaReg? Currently it is
350 always wordRep, but now we have to decide whether it is
351 signed or unsigned. The same VanillaReg can thus have
352 different CmmType in different parts of the program.
353
354 - Extra coercions cluttering up expressions.
355
356 Currently for GHC, the foreign call point is moot, because we do our
357 own promotion of sub-word-sized values to word-sized values. The Int8
358 type is represnted by an Int# which is kept sign-extended at all times
359 (this is slightly naughty, because we're making assumptions about the
360 C calling convention rather early on in the compiler). However, given
361 this, the cons outweigh the pros.
362
363 -}
364