add a comment
[ghc.git] / compiler / cmm / CmmType.hs
1 {-# LANGUAGE CPP #-}
2
3 module CmmType
4 ( CmmType -- Abstract
5 , b8, b16, b32, b64, b128, b256, b512, f32, f64, bWord, bHalfWord, gcWord
6 , cInt, cLong
7 , cmmBits, cmmFloat
8 , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
9 , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32
10
11 , Width(..)
12 , widthInBits, widthInBytes, widthInLog, widthFromBytes
13 , wordWidth, halfWordWidth, cIntWidth, cLongWidth
14 , halfWordMask
15 , narrowU, narrowS
16 , rEP_CostCentreStack_mem_alloc
17 , rEP_CostCentreStack_scc_count
18 , rEP_StgEntCounter_allocs
19 , rEP_StgEntCounter_allocd
20
21 , ForeignHint(..)
22
23 , Length
24 , vec, vec2, vec4, vec8, vec16
25 , vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8
26 , cmmVec
27 , vecLength, vecElemType
28 , isVecType
29 )
30 where
31
32 #include "HsVersions.h"
33
34 import DynFlags
35 import FastString
36 import Outputable
37
38 import Data.Word
39 import Data.Int
40
41 -----------------------------------------------------------------------------
42 -- CmmType
43 -----------------------------------------------------------------------------
44
45 -- NOTE: CmmType is an abstract type, not exported from this
46 -- module so you can easily change its representation
47 --
48 -- However Width is exported in a concrete way,
49 -- and is used extensively in pattern-matching
50
51 data CmmType -- The important one!
52 = CmmType CmmCat Width
53
54 data CmmCat -- "Category" (not exported)
55 = GcPtrCat -- GC pointer
56 | BitsCat -- Non-pointer
57 | FloatCat -- Float
58 | VecCat Length CmmCat -- Vector
59 deriving( Eq )
60 -- See Note [Signed vs unsigned] at the end
61
62 instance Outputable CmmType where
63 ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
64
65 instance Outputable CmmCat where
66 ppr FloatCat = ptext $ sLit("F")
67 ppr GcPtrCat = ptext $ sLit("P")
68 ppr BitsCat = ptext $ sLit("I")
69 ppr (VecCat n cat) = ppr cat <> text "x" <> ppr n <> text "V"
70
71 -- Why is CmmType stratified? For native code generation,
72 -- most of the time you just want to know what sort of register
73 -- to put the thing in, and for this you need to know how
74 -- many bits thing has and whether it goes in a floating-point
75 -- register. By contrast, the distinction between GcPtr and
76 -- GcNonPtr is of interest to only a few parts of the code generator.
77
78 -------- Equality on CmmType --------------
79 -- CmmType is *not* an instance of Eq; sometimes we care about the
80 -- Gc/NonGc distinction, and sometimes we don't
81 -- So we use an explicit function to force you to think about it
82 cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
83 cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
84
85 cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
86 -- This equality is temporary; used in CmmLint
87 -- but the RTS files are not yet well-typed wrt pointers
88 cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
89 = c1 `weak_eq` c2 && w1==w2
90 where
91 weak_eq :: CmmCat -> CmmCat -> Bool
92 FloatCat `weak_eq` FloatCat = True
93 FloatCat `weak_eq` _other = False
94 _other `weak_eq` FloatCat = False
95 (VecCat l1 cat1) `weak_eq` (VecCat l2 cat2) = l1 == l2
96 && cat1 `weak_eq` cat2
97 (VecCat {}) `weak_eq` _other = False
98 _other `weak_eq` (VecCat {}) = False
99 _word1 `weak_eq` _word2 = True -- Ignores GcPtr
100
101 --- Simple operations on CmmType -----
102 typeWidth :: CmmType -> Width
103 typeWidth (CmmType _ w) = w
104
105 cmmBits, cmmFloat :: Width -> CmmType
106 cmmBits = CmmType BitsCat
107 cmmFloat = CmmType FloatCat
108
109 -------- Common CmmTypes ------------
110 -- Floats and words of specific widths
111 b8, b16, b32, b64, b128, b256, b512, f32, f64 :: CmmType
112 b8 = cmmBits W8
113 b16 = cmmBits W16
114 b32 = cmmBits W32
115 b64 = cmmBits W64
116 b128 = cmmBits W128
117 b256 = cmmBits W256
118 b512 = cmmBits W512
119 f32 = cmmFloat W32
120 f64 = cmmFloat W64
121
122 -- CmmTypes of native word widths
123 bWord :: DynFlags -> CmmType
124 bWord dflags = cmmBits (wordWidth dflags)
125
126 bHalfWord :: DynFlags -> CmmType
127 bHalfWord dflags = cmmBits (halfWordWidth dflags)
128
129 gcWord :: DynFlags -> CmmType
130 gcWord dflags = CmmType GcPtrCat (wordWidth dflags)
131
132 cInt, cLong :: DynFlags -> CmmType
133 cInt dflags = cmmBits (cIntWidth dflags)
134 cLong dflags = cmmBits (cLongWidth dflags)
135
136
137 ------------ Predicates ----------------
138 isFloatType, isGcPtrType :: CmmType -> Bool
139 isFloatType (CmmType FloatCat _) = True
140 isFloatType _other = False
141
142 isGcPtrType (CmmType GcPtrCat _) = True
143 isGcPtrType _other = False
144
145 isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
146 -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
147 -- isFloat32 and 64 are obvious
148
149 isWord64 (CmmType BitsCat W64) = True
150 isWord64 (CmmType GcPtrCat W64) = True
151 isWord64 _other = False
152
153 isWord32 (CmmType BitsCat W32) = True
154 isWord32 (CmmType GcPtrCat W32) = True
155 isWord32 _other = False
156
157 isFloat32 (CmmType FloatCat W32) = True
158 isFloat32 _other = False
159
160 isFloat64 (CmmType FloatCat W64) = True
161 isFloat64 _other = False
162
163 -----------------------------------------------------------------------------
164 -- Width
165 -----------------------------------------------------------------------------
166
167 data Width = W8 | W16 | W32 | W64
168 | W80 -- Extended double-precision float,
169 -- used in x86 native codegen only.
170 -- (we use Ord, so it'd better be in this order)
171 | W128
172 | W256
173 | W512
174 deriving (Eq, Ord, Show)
175
176 instance Outputable Width where
177 ppr rep = ptext (mrStr rep)
178
179 mrStr :: Width -> LitString
180 mrStr W8 = sLit("W8")
181 mrStr W16 = sLit("W16")
182 mrStr W32 = sLit("W32")
183 mrStr W64 = sLit("W64")
184 mrStr W128 = sLit("W128")
185 mrStr W256 = sLit("W256")
186 mrStr W512 = sLit("W512")
187 mrStr W80 = sLit("W80")
188
189
190 -------- Common Widths ------------
191 wordWidth :: DynFlags -> Width
192 wordWidth dflags
193 | wORD_SIZE dflags == 4 = W32
194 | wORD_SIZE dflags == 8 = W64
195 | otherwise = panic "MachOp.wordRep: Unknown word size"
196
197 halfWordWidth :: DynFlags -> Width
198 halfWordWidth dflags
199 | wORD_SIZE dflags == 4 = W16
200 | wORD_SIZE dflags == 8 = W32
201 | otherwise = panic "MachOp.halfWordRep: Unknown word size"
202
203 halfWordMask :: DynFlags -> Integer
204 halfWordMask dflags
205 | wORD_SIZE dflags == 4 = 0xFFFF
206 | wORD_SIZE dflags == 8 = 0xFFFFFFFF
207 | otherwise = panic "MachOp.halfWordMask: Unknown word size"
208
209 -- cIntRep is the Width for a C-language 'int'
210 cIntWidth, cLongWidth :: DynFlags -> Width
211 cIntWidth dflags = case cINT_SIZE dflags of
212 4 -> W32
213 8 -> W64
214 s -> panic ("cIntWidth: Unknown cINT_SIZE: " ++ show s)
215 cLongWidth dflags = case cLONG_SIZE dflags of
216 4 -> W32
217 8 -> W64
218 s -> panic ("cIntWidth: Unknown cLONG_SIZE: " ++ show s)
219
220 widthInBits :: Width -> Int
221 widthInBits W8 = 8
222 widthInBits W16 = 16
223 widthInBits W32 = 32
224 widthInBits W64 = 64
225 widthInBits W128 = 128
226 widthInBits W256 = 256
227 widthInBits W512 = 512
228 widthInBits W80 = 80
229
230 widthInBytes :: Width -> Int
231 widthInBytes W8 = 1
232 widthInBytes W16 = 2
233 widthInBytes W32 = 4
234 widthInBytes W64 = 8
235 widthInBytes W128 = 16
236 widthInBytes W256 = 32
237 widthInBytes W512 = 64
238 widthInBytes W80 = 10
239
240 widthFromBytes :: Int -> Width
241 widthFromBytes 1 = W8
242 widthFromBytes 2 = W16
243 widthFromBytes 4 = W32
244 widthFromBytes 8 = W64
245 widthFromBytes 16 = W128
246 widthFromBytes 32 = W256
247 widthFromBytes 64 = W512
248 widthFromBytes 10 = W80
249 widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
250
251 -- log_2 of the width in bytes, useful for generating shifts.
252 widthInLog :: Width -> Int
253 widthInLog W8 = 0
254 widthInLog W16 = 1
255 widthInLog W32 = 2
256 widthInLog W64 = 3
257 widthInLog W128 = 4
258 widthInLog W256 = 5
259 widthInLog W512 = 6
260 widthInLog W80 = panic "widthInLog: F80"
261
262 -- widening / narrowing
263
264 narrowU :: Width -> Integer -> Integer
265 narrowU W8 x = fromIntegral (fromIntegral x :: Word8)
266 narrowU W16 x = fromIntegral (fromIntegral x :: Word16)
267 narrowU W32 x = fromIntegral (fromIntegral x :: Word32)
268 narrowU W64 x = fromIntegral (fromIntegral x :: Word64)
269 narrowU _ _ = panic "narrowTo"
270
271 narrowS :: Width -> Integer -> Integer
272 narrowS W8 x = fromIntegral (fromIntegral x :: Int8)
273 narrowS W16 x = fromIntegral (fromIntegral x :: Int16)
274 narrowS W32 x = fromIntegral (fromIntegral x :: Int32)
275 narrowS W64 x = fromIntegral (fromIntegral x :: Int64)
276 narrowS _ _ = panic "narrowTo"
277
278 -----------------------------------------------------------------------------
279 -- SIMD
280 -----------------------------------------------------------------------------
281
282 type Length = Int
283
284 vec :: Length -> CmmType -> CmmType
285 vec l (CmmType cat w) = CmmType (VecCat l cat) vecw
286 where
287 vecw :: Width
288 vecw = widthFromBytes (l*widthInBytes w)
289
290 vec2, vec4, vec8, vec16 :: CmmType -> CmmType
291 vec2 = vec 2
292 vec4 = vec 4
293 vec8 = vec 8
294 vec16 = vec 16
295
296 vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8 :: CmmType
297 vec2f64 = vec 2 f64
298 vec2b64 = vec 2 b64
299 vec4f32 = vec 4 f32
300 vec4b32 = vec 4 b32
301 vec8b16 = vec 8 b16
302 vec16b8 = vec 16 b8
303
304 cmmVec :: Int -> CmmType -> CmmType
305 cmmVec n (CmmType cat w) =
306 CmmType (VecCat n cat) (widthFromBytes (n*widthInBytes w))
307
308 vecLength :: CmmType -> Length
309 vecLength (CmmType (VecCat l _) _) = l
310 vecLength _ = panic "vecLength: not a vector"
311
312 vecElemType :: CmmType -> CmmType
313 vecElemType (CmmType (VecCat l cat) w) = CmmType cat scalw
314 where
315 scalw :: Width
316 scalw = widthFromBytes (widthInBytes w `div` l)
317 vecElemType _ = panic "vecElemType: not a vector"
318
319 isVecType :: CmmType -> Bool
320 isVecType (CmmType (VecCat {}) _) = True
321 isVecType _ = False
322
323 -------------------------------------------------------------------------
324 -- Hints
325
326 -- Hints are extra type information we attach to the arguments and
327 -- results of a foreign call, where more type information is sometimes
328 -- needed by the ABI to make the correct kind of call.
329
330 data ForeignHint
331 = NoHint | AddrHint | SignedHint
332 deriving( Eq )
333 -- Used to give extra per-argument or per-result
334 -- information needed by foreign calling conventions
335
336 -------------------------------------------------------------------------
337
338 -- These don't really belong here, but I don't know where is best to
339 -- put them.
340
341 rEP_CostCentreStack_mem_alloc :: DynFlags -> CmmType
342 rEP_CostCentreStack_mem_alloc dflags
343 = cmmBits (widthFromBytes (pc_REP_CostCentreStack_mem_alloc pc))
344 where pc = sPlatformConstants (settings dflags)
345
346 rEP_CostCentreStack_scc_count :: DynFlags -> CmmType
347 rEP_CostCentreStack_scc_count dflags
348 = cmmBits (widthFromBytes (pc_REP_CostCentreStack_scc_count pc))
349 where pc = sPlatformConstants (settings dflags)
350
351 rEP_StgEntCounter_allocs :: DynFlags -> CmmType
352 rEP_StgEntCounter_allocs dflags
353 = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocs pc))
354 where pc = sPlatformConstants (settings dflags)
355
356 rEP_StgEntCounter_allocd :: DynFlags -> CmmType
357 rEP_StgEntCounter_allocd dflags
358 = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocd pc))
359 where pc = sPlatformConstants (settings dflags)
360
361 -------------------------------------------------------------------------
362 {- Note [Signed vs unsigned]
363 ~~~~~~~~~~~~~~~~~~~~~~~~~
364 Should a CmmType include a signed vs. unsigned distinction?
365
366 This is very much like a "hint" in C-- terminology: it isn't necessary
367 in order to generate correct code, but it might be useful in that the
368 compiler can generate better code if it has access to higher-level
369 hints about data. This is important at call boundaries, because the
370 definition of a function is not visible at all of its call sites, so
371 the compiler cannot infer the hints.
372
373 Here in Cmm, we're taking a slightly different approach. We include
374 the int vs. float hint in the CmmType, because (a) the majority of
375 platforms have a strong distinction between float and int registers,
376 and (b) we don't want to do any heavyweight hint-inference in the
377 native code backend in order to get good code. We're treating the
378 hint more like a type: our Cmm is always completely consistent with
379 respect to hints. All coercions between float and int are explicit.
380
381 What about the signed vs. unsigned hint? This information might be
382 useful if we want to keep sub-word-sized values in word-size
383 registers, which we must do if we only have word-sized registers.
384
385 On such a system, there are two straightforward conventions for
386 representing sub-word-sized values:
387
388 (a) Leave the upper bits undefined. Comparison operations must
389 sign- or zero-extend both operands before comparing them,
390 depending on whether the comparison is signed or unsigned.
391
392 (b) Always keep the values sign- or zero-extended as appropriate.
393 Arithmetic operations must narrow the result to the appropriate
394 size.
395
396 A clever compiler might not use either (a) or (b) exclusively, instead
397 it would attempt to minimize the coercions by analysis: the same kind
398 of analysis that propagates hints around. In Cmm we don't want to
399 have to do this, so we plump for having richer types and keeping the
400 type information consistent.
401
402 If signed/unsigned hints are missing from CmmType, then the only
403 choice we have is (a), because we don't know whether the result of an
404 operation should be sign- or zero-extended.
405
406 Many architectures have extending load operations, which work well
407 with (b). To make use of them with (a), you need to know whether the
408 value is going to be sign- or zero-extended by an enclosing comparison
409 (for example), which involves knowing above the context. This is
410 doable but more complex.
411
412 Further complicating the issue is foreign calls: a foreign calling
413 convention can specify that signed 8-bit quantities are passed as
414 sign-extended 32 bit quantities, for example (this is the case on the
415 PowerPC). So we *do* need sign information on foreign call arguments.
416
417 Pros for adding signed vs. unsigned to CmmType:
418
419 - It would let us use convention (b) above, and get easier
420 code generation for extending loads.
421
422 - Less information required on foreign calls.
423
424 - MachOp type would be simpler
425
426 Cons:
427
428 - More complexity
429
430 - What is the CmmType for a VanillaReg? Currently it is
431 always wordRep, but now we have to decide whether it is
432 signed or unsigned. The same VanillaReg can thus have
433 different CmmType in different parts of the program.
434
435 - Extra coercions cluttering up expressions.
436
437 Currently for GHC, the foreign call point is moot, because we do our
438 own promotion of sub-word-sized values to word-sized values. The Int8
439 type is represnted by an Int# which is kept sign-extended at all times
440 (this is slightly naughty, because we're making assumptions about the
441 C calling convention rather early on in the compiler). However, given
442 this, the cons outweigh the pros.
443
444 -}
445