Merge branch 'master' of http://darcs.haskell.org/ghc
[ghc.git] / compiler / cmm / CmmType.hs
1
2 module CmmType
3 ( CmmType -- Abstract
4 , b8, b16, b32, b64, b128, f32, f64, bWord, bHalfWord, gcWord
5 , cInt, cLong
6 , cmmBits, cmmFloat
7 , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
8 , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32
9
10 , Width(..)
11 , widthInBits, widthInBytes, widthInLog, widthFromBytes
12 , wordWidth, halfWordWidth, cIntWidth, cLongWidth
13 , halfWordMask
14 , narrowU, narrowS
15 , rEP_CostCentreStack_mem_alloc
16 , rEP_CostCentreStack_scc_count
17 , rEP_StgEntCounter_allocs
18
19 , ForeignHint(..)
20
21 , Length
22 , vec, vec2, vec4, vec8, vec16
23 , vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8
24 , cmmVec
25 , vecLength, vecElemType
26 , isVecType
27 )
28 where
29
30 #include "HsVersions.h"
31
32 import DynFlags
33 import FastString
34 import Outputable
35
36 import Data.Word
37 import Data.Int
38
39 -----------------------------------------------------------------------------
40 -- CmmType
41 -----------------------------------------------------------------------------
42
43 -- NOTE: CmmType is an abstract type, not exported from this
44 -- module so you can easily change its representation
45 --
46 -- However Width is exported in a concrete way,
47 -- and is used extensively in pattern-matching
48
49 data CmmType -- The important one!
50 = CmmType CmmCat Width
51
52 data CmmCat -- "Category" (not exported)
53 = GcPtrCat -- GC pointer
54 | BitsCat -- Non-pointer
55 | FloatCat -- Float
56 | VecCat Length CmmCat -- Vector
57 deriving( Eq )
58 -- See Note [Signed vs unsigned] at the end
59
60 instance Outputable CmmType where
61 ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
62
63 instance Outputable CmmCat where
64 ppr FloatCat = ptext $ sLit("F")
65 ppr GcPtrCat = ptext $ sLit("P")
66 ppr BitsCat = ptext $ sLit("I")
67 ppr (VecCat n cat) = ppr cat <> text "x" <> ppr n <> text "V"
68
69 -- Why is CmmType stratified? For native code generation,
70 -- most of the time you just want to know what sort of register
71 -- to put the thing in, and for this you need to know how
72 -- many bits thing has and whether it goes in a floating-point
73 -- register. By contrast, the distinction between GcPtr and
74 -- GcNonPtr is of interest to only a few parts of the code generator.
75
76 -------- Equality on CmmType --------------
77 -- CmmType is *not* an instance of Eq; sometimes we care about the
78 -- Gc/NonGc distinction, and sometimes we don't
79 -- So we use an explicit function to force you to think about it
80 cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
81 cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
82
83 cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
84 -- This equality is temporary; used in CmmLint
85 -- but the RTS files are not yet well-typed wrt pointers
86 cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
87 = c1 `weak_eq` c2 && w1==w2
88 where
89 weak_eq :: CmmCat -> CmmCat -> Bool
90 FloatCat `weak_eq` FloatCat = True
91 FloatCat `weak_eq` _other = False
92 _other `weak_eq` FloatCat = False
93 (VecCat l1 cat1) `weak_eq` (VecCat l2 cat2) = l1 == l2
94 && cat1 `weak_eq` cat2
95 (VecCat {}) `weak_eq` _other = False
96 _other `weak_eq` (VecCat {}) = False
97 _word1 `weak_eq` _word2 = True -- Ignores GcPtr
98
99 --- Simple operations on CmmType -----
100 typeWidth :: CmmType -> Width
101 typeWidth (CmmType _ w) = w
102
103 cmmBits, cmmFloat :: Width -> CmmType
104 cmmBits = CmmType BitsCat
105 cmmFloat = CmmType FloatCat
106
107 -------- Common CmmTypes ------------
108 -- Floats and words of specific widths
109 b8, b16, b32, b64, b128, f32, f64 :: CmmType
110 b8 = cmmBits W8
111 b16 = cmmBits W16
112 b32 = cmmBits W32
113 b64 = cmmBits W64
114 b128 = cmmBits W128
115 f32 = cmmFloat W32
116 f64 = cmmFloat W64
117
118 -- CmmTypes of native word widths
119 bWord :: DynFlags -> CmmType
120 bWord dflags = cmmBits (wordWidth dflags)
121
122 bHalfWord :: DynFlags -> CmmType
123 bHalfWord dflags = cmmBits (halfWordWidth dflags)
124
125 gcWord :: DynFlags -> CmmType
126 gcWord dflags = CmmType GcPtrCat (wordWidth dflags)
127
128 cInt, cLong :: DynFlags -> CmmType
129 cInt dflags = cmmBits (cIntWidth dflags)
130 cLong dflags = cmmBits (cLongWidth dflags)
131
132
133 ------------ Predicates ----------------
134 isFloatType, isGcPtrType :: CmmType -> Bool
135 isFloatType (CmmType FloatCat _) = True
136 isFloatType _other = False
137
138 isGcPtrType (CmmType GcPtrCat _) = True
139 isGcPtrType _other = False
140
141 isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
142 -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
143 -- isFloat32 and 64 are obvious
144
145 isWord64 (CmmType BitsCat W64) = True
146 isWord64 (CmmType GcPtrCat W64) = True
147 isWord64 _other = False
148
149 isWord32 (CmmType BitsCat W32) = True
150 isWord32 (CmmType GcPtrCat W32) = True
151 isWord32 _other = False
152
153 isFloat32 (CmmType FloatCat W32) = True
154 isFloat32 _other = False
155
156 isFloat64 (CmmType FloatCat W64) = True
157 isFloat64 _other = False
158
159 -----------------------------------------------------------------------------
160 -- Width
161 -----------------------------------------------------------------------------
162
163 data Width = W8 | W16 | W32 | W64
164 | W80 -- Extended double-precision float,
165 -- used in x86 native codegen only.
166 -- (we use Ord, so it'd better be in this order)
167 | W128
168 deriving (Eq, Ord, Show)
169
170 instance Outputable Width where
171 ppr rep = ptext (mrStr rep)
172
173 mrStr :: Width -> LitString
174 mrStr W8 = sLit("W8")
175 mrStr W16 = sLit("W16")
176 mrStr W32 = sLit("W32")
177 mrStr W64 = sLit("W64")
178 mrStr W128 = sLit("W128")
179 mrStr W80 = sLit("W80")
180
181
182 -------- Common Widths ------------
183 wordWidth :: DynFlags -> Width
184 wordWidth dflags
185 | wORD_SIZE dflags == 4 = W32
186 | wORD_SIZE dflags == 8 = W64
187 | otherwise = panic "MachOp.wordRep: Unknown word size"
188
189 halfWordWidth :: DynFlags -> Width
190 halfWordWidth dflags
191 | wORD_SIZE dflags == 4 = W16
192 | wORD_SIZE dflags == 8 = W32
193 | otherwise = panic "MachOp.halfWordRep: Unknown word size"
194
195 halfWordMask :: DynFlags -> Integer
196 halfWordMask dflags
197 | wORD_SIZE dflags == 4 = 0xFFFF
198 | wORD_SIZE dflags == 8 = 0xFFFFFFFF
199 | otherwise = panic "MachOp.halfWordMask: Unknown word size"
200
201 -- cIntRep is the Width for a C-language 'int'
202 cIntWidth, cLongWidth :: DynFlags -> Width
203 cIntWidth dflags = case cINT_SIZE dflags of
204 4 -> W32
205 8 -> W64
206 s -> panic ("cIntWidth: Unknown cINT_SIZE: " ++ show s)
207 cLongWidth dflags = case cLONG_SIZE dflags of
208 4 -> W32
209 8 -> W64
210 s -> panic ("cIntWidth: Unknown cLONG_SIZE: " ++ show s)
211
212 widthInBits :: Width -> Int
213 widthInBits W8 = 8
214 widthInBits W16 = 16
215 widthInBits W32 = 32
216 widthInBits W64 = 64
217 widthInBits W128 = 128
218 widthInBits W80 = 80
219
220 widthInBytes :: Width -> Int
221 widthInBytes W8 = 1
222 widthInBytes W16 = 2
223 widthInBytes W32 = 4
224 widthInBytes W64 = 8
225 widthInBytes W128 = 16
226 widthInBytes W80 = 10
227
228 widthFromBytes :: Int -> Width
229 widthFromBytes 1 = W8
230 widthFromBytes 2 = W16
231 widthFromBytes 4 = W32
232 widthFromBytes 8 = W64
233 widthFromBytes 16 = W128
234 widthFromBytes 10 = W80
235 widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
236
237 -- log_2 of the width in bytes, useful for generating shifts.
238 widthInLog :: Width -> Int
239 widthInLog W8 = 0
240 widthInLog W16 = 1
241 widthInLog W32 = 2
242 widthInLog W64 = 3
243 widthInLog W128 = 4
244 widthInLog W80 = panic "widthInLog: F80"
245
246 -- widening / narrowing
247
248 narrowU :: Width -> Integer -> Integer
249 narrowU W8 x = fromIntegral (fromIntegral x :: Word8)
250 narrowU W16 x = fromIntegral (fromIntegral x :: Word16)
251 narrowU W32 x = fromIntegral (fromIntegral x :: Word32)
252 narrowU W64 x = fromIntegral (fromIntegral x :: Word64)
253 narrowU _ _ = panic "narrowTo"
254
255 narrowS :: Width -> Integer -> Integer
256 narrowS W8 x = fromIntegral (fromIntegral x :: Int8)
257 narrowS W16 x = fromIntegral (fromIntegral x :: Int16)
258 narrowS W32 x = fromIntegral (fromIntegral x :: Int32)
259 narrowS W64 x = fromIntegral (fromIntegral x :: Int64)
260 narrowS _ _ = panic "narrowTo"
261
262 -----------------------------------------------------------------------------
263 -- SIMD
264 -----------------------------------------------------------------------------
265
266 type Length = Int
267
268 vec :: Length -> CmmType -> CmmType
269 vec l (CmmType cat w) = CmmType (VecCat l cat) vecw
270 where
271 vecw :: Width
272 vecw = widthFromBytes (l*widthInBytes w)
273
274 vec2, vec4, vec8, vec16 :: CmmType -> CmmType
275 vec2 = vec 2
276 vec4 = vec 4
277 vec8 = vec 8
278 vec16 = vec 16
279
280 vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8 :: CmmType
281 vec2f64 = vec 2 f64
282 vec2b64 = vec 2 b64
283 vec4f32 = vec 4 f32
284 vec4b32 = vec 4 b32
285 vec8b16 = vec 8 b16
286 vec16b8 = vec 16 b8
287
288 cmmVec :: Int -> CmmType -> CmmType
289 cmmVec n (CmmType cat w) =
290 CmmType (VecCat n cat) (widthFromBytes (n*widthInBytes w))
291
292 vecLength :: CmmType -> Length
293 vecLength (CmmType (VecCat l _) _) = l
294 vecLength _ = panic "vecLength: not a vector"
295
296 vecElemType :: CmmType -> CmmType
297 vecElemType (CmmType (VecCat l cat) w) = CmmType cat scalw
298 where
299 scalw :: Width
300 scalw = widthFromBytes (widthInBytes w `div` l)
301 vecElemType _ = panic "vecElemType: not a vector"
302
303 isVecType :: CmmType -> Bool
304 isVecType (CmmType (VecCat {}) _) = True
305 isVecType _ = False
306
307 -------------------------------------------------------------------------
308 -- Hints
309
310 -- Hints are extra type information we attach to the arguments and
311 -- results of a foreign call, where more type information is sometimes
312 -- needed by the ABI to make the correct kind of call.
313
314 data ForeignHint
315 = NoHint | AddrHint | SignedHint
316 deriving( Eq )
317 -- Used to give extra per-argument or per-result
318 -- information needed by foreign calling conventions
319
320 -------------------------------------------------------------------------
321
322 -- These don't really belong here, but I don't know where is best to
323 -- put them.
324
325 rEP_CostCentreStack_mem_alloc :: DynFlags -> CmmType
326 rEP_CostCentreStack_mem_alloc dflags
327 = cmmBits (widthFromBytes (pc_REP_CostCentreStack_mem_alloc pc))
328 where pc = sPlatformConstants (settings dflags)
329
330 rEP_CostCentreStack_scc_count :: DynFlags -> CmmType
331 rEP_CostCentreStack_scc_count dflags
332 = cmmBits (widthFromBytes (pc_REP_CostCentreStack_scc_count pc))
333 where pc = sPlatformConstants (settings dflags)
334
335 rEP_StgEntCounter_allocs :: DynFlags -> CmmType
336 rEP_StgEntCounter_allocs dflags
337 = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocs pc))
338 where pc = sPlatformConstants (settings dflags)
339
340 -------------------------------------------------------------------------
341 {- Note [Signed vs unsigned]
342 ~~~~~~~~~~~~~~~~~~~~~~~~~
343 Should a CmmType include a signed vs. unsigned distinction?
344
345 This is very much like a "hint" in C-- terminology: it isn't necessary
346 in order to generate correct code, but it might be useful in that the
347 compiler can generate better code if it has access to higher-level
348 hints about data. This is important at call boundaries, because the
349 definition of a function is not visible at all of its call sites, so
350 the compiler cannot infer the hints.
351
352 Here in Cmm, we're taking a slightly different approach. We include
353 the int vs. float hint in the CmmType, because (a) the majority of
354 platforms have a strong distinction between float and int registers,
355 and (b) we don't want to do any heavyweight hint-inference in the
356 native code backend in order to get good code. We're treating the
357 hint more like a type: our Cmm is always completely consistent with
358 respect to hints. All coercions between float and int are explicit.
359
360 What about the signed vs. unsigned hint? This information might be
361 useful if we want to keep sub-word-sized values in word-size
362 registers, which we must do if we only have word-sized registers.
363
364 On such a system, there are two straightforward conventions for
365 representing sub-word-sized values:
366
367 (a) Leave the upper bits undefined. Comparison operations must
368 sign- or zero-extend both operands before comparing them,
369 depending on whether the comparison is signed or unsigned.
370
371 (b) Always keep the values sign- or zero-extended as appropriate.
372 Arithmetic operations must narrow the result to the appropriate
373 size.
374
375 A clever compiler might not use either (a) or (b) exclusively, instead
376 it would attempt to minimize the coercions by analysis: the same kind
377 of analysis that propagates hints around. In Cmm we don't want to
378 have to do this, so we plump for having richer types and keeping the
379 type information consistent.
380
381 If signed/unsigned hints are missing from CmmType, then the only
382 choice we have is (a), because we don't know whether the result of an
383 operation should be sign- or zero-extended.
384
385 Many architectures have extending load operations, which work well
386 with (b). To make use of them with (a), you need to know whether the
387 value is going to be sign- or zero-extended by an enclosing comparison
388 (for example), which involves knowing above the context. This is
389 doable but more complex.
390
391 Further complicating the issue is foreign calls: a foreign calling
392 convention can specify that signed 8-bit quantities are passed as
393 sign-extended 32 bit quantities, for example (this is the case on the
394 PowerPC). So we *do* need sign information on foreign call arguments.
395
396 Pros for adding signed vs. unsigned to CmmType:
397
398 - It would let us use convention (b) above, and get easier
399 code generation for extending loads.
400
401 - Less information required on foreign calls.
402
403 - MachOp type would be simpler
404
405 Cons:
406
407 - More complexity
408
409 - What is the CmmType for a VanillaReg? Currently it is
410 always wordRep, but now we have to decide whether it is
411 signed or unsigned. The same VanillaReg can thus have
412 different CmmType in different parts of the program.
413
414 - Extra coercions cluttering up expressions.
415
416 Currently for GHC, the foreign call point is moot, because we do our
417 own promotion of sub-word-sized values to word-sized values. The Int8
418 type is represnted by an Int# which is kept sign-extended at all times
419 (this is slightly naughty, because we're making assumptions about the
420 C calling convention rather early on in the compiler). However, given
421 this, the cons outweigh the pros.
422
423 -}
424