Add support for producing position-independent executables
[ghc.git] / compiler / cmm / CmmType.hs
1 {-# LANGUAGE CPP #-}
2
3 module CmmType
4 ( CmmType -- Abstract
5 , b8, b16, b32, b64, b128, b256, b512, f32, f64, bWord, bHalfWord, gcWord
6 , cInt
7 , cmmBits, cmmFloat
8 , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
9 , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32
10
11 , Width(..)
12 , widthInBits, widthInBytes, widthInLog, widthFromBytes
13 , wordWidth, halfWordWidth, cIntWidth
14 , halfWordMask
15 , narrowU, narrowS
16 , rEP_CostCentreStack_mem_alloc
17 , rEP_CostCentreStack_scc_count
18 , rEP_StgEntCounter_allocs
19 , rEP_StgEntCounter_allocd
20
21 , ForeignHint(..)
22
23 , Length
24 , vec, vec2, vec4, vec8, vec16
25 , vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8
26 , cmmVec
27 , vecLength, vecElemType
28 , isVecType
29 )
30 where
31
32 #include "HsVersions.h"
33
34 import DynFlags
35 import FastString
36 import Outputable
37
38 import Data.Word
39 import Data.Int
40
41 -----------------------------------------------------------------------------
42 -- CmmType
43 -----------------------------------------------------------------------------
44
45 -- NOTE: CmmType is an abstract type, not exported from this
46 -- module so you can easily change its representation
47 --
48 -- However Width is exported in a concrete way,
49 -- and is used extensively in pattern-matching
50
51 data CmmType -- The important one!
52 = CmmType CmmCat Width
53
54 data CmmCat -- "Category" (not exported)
55 = GcPtrCat -- GC pointer
56 | BitsCat -- Non-pointer
57 | FloatCat -- Float
58 | VecCat Length CmmCat -- Vector
59 deriving( Eq )
60 -- See Note [Signed vs unsigned] at the end
61
62 instance Outputable CmmType where
63 ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
64
65 instance Outputable CmmCat where
66 ppr FloatCat = text "F"
67 ppr GcPtrCat = text "P"
68 ppr BitsCat = text "I"
69 ppr (VecCat n cat) = ppr cat <> text "x" <> ppr n <> text "V"
70
71 -- Why is CmmType stratified? For native code generation,
72 -- most of the time you just want to know what sort of register
73 -- to put the thing in, and for this you need to know how
74 -- many bits thing has and whether it goes in a floating-point
75 -- register. By contrast, the distinction between GcPtr and
76 -- GcNonPtr is of interest to only a few parts of the code generator.
77
78 -------- Equality on CmmType --------------
79 -- CmmType is *not* an instance of Eq; sometimes we care about the
80 -- Gc/NonGc distinction, and sometimes we don't
81 -- So we use an explicit function to force you to think about it
82 cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
83 cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
84
85 cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
86 -- This equality is temporary; used in CmmLint
87 -- but the RTS files are not yet well-typed wrt pointers
88 cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
89 = c1 `weak_eq` c2 && w1==w2
90 where
91 weak_eq :: CmmCat -> CmmCat -> Bool
92 FloatCat `weak_eq` FloatCat = True
93 FloatCat `weak_eq` _other = False
94 _other `weak_eq` FloatCat = False
95 (VecCat l1 cat1) `weak_eq` (VecCat l2 cat2) = l1 == l2
96 && cat1 `weak_eq` cat2
97 (VecCat {}) `weak_eq` _other = False
98 _other `weak_eq` (VecCat {}) = False
99 _word1 `weak_eq` _word2 = True -- Ignores GcPtr
100
101 --- Simple operations on CmmType -----
102 typeWidth :: CmmType -> Width
103 typeWidth (CmmType _ w) = w
104
105 cmmBits, cmmFloat :: Width -> CmmType
106 cmmBits = CmmType BitsCat
107 cmmFloat = CmmType FloatCat
108
109 -------- Common CmmTypes ------------
110 -- Floats and words of specific widths
111 b8, b16, b32, b64, b128, b256, b512, f32, f64 :: CmmType
112 b8 = cmmBits W8
113 b16 = cmmBits W16
114 b32 = cmmBits W32
115 b64 = cmmBits W64
116 b128 = cmmBits W128
117 b256 = cmmBits W256
118 b512 = cmmBits W512
119 f32 = cmmFloat W32
120 f64 = cmmFloat W64
121
122 -- CmmTypes of native word widths
123 bWord :: DynFlags -> CmmType
124 bWord dflags = cmmBits (wordWidth dflags)
125
126 bHalfWord :: DynFlags -> CmmType
127 bHalfWord dflags = cmmBits (halfWordWidth dflags)
128
129 gcWord :: DynFlags -> CmmType
130 gcWord dflags = CmmType GcPtrCat (wordWidth dflags)
131
132 cInt :: DynFlags -> CmmType
133 cInt dflags = cmmBits (cIntWidth dflags)
134
135 ------------ Predicates ----------------
136 isFloatType, isGcPtrType :: CmmType -> Bool
137 isFloatType (CmmType FloatCat _) = True
138 isFloatType _other = False
139
140 isGcPtrType (CmmType GcPtrCat _) = True
141 isGcPtrType _other = False
142
143 isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
144 -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
145 -- isFloat32 and 64 are obvious
146
147 isWord64 (CmmType BitsCat W64) = True
148 isWord64 (CmmType GcPtrCat W64) = True
149 isWord64 _other = False
150
151 isWord32 (CmmType BitsCat W32) = True
152 isWord32 (CmmType GcPtrCat W32) = True
153 isWord32 _other = False
154
155 isFloat32 (CmmType FloatCat W32) = True
156 isFloat32 _other = False
157
158 isFloat64 (CmmType FloatCat W64) = True
159 isFloat64 _other = False
160
161 -----------------------------------------------------------------------------
162 -- Width
163 -----------------------------------------------------------------------------
164
165 data Width = W8 | W16 | W32 | W64
166 | W80 -- Extended double-precision float,
167 -- used in x86 native codegen only.
168 -- (we use Ord, so it'd better be in this order)
169 | W128
170 | W256
171 | W512
172 deriving (Eq, Ord, Show)
173
174 instance Outputable Width where
175 ppr rep = ptext (mrStr rep)
176
177 mrStr :: Width -> LitString
178 mrStr W8 = sLit("W8")
179 mrStr W16 = sLit("W16")
180 mrStr W32 = sLit("W32")
181 mrStr W64 = sLit("W64")
182 mrStr W128 = sLit("W128")
183 mrStr W256 = sLit("W256")
184 mrStr W512 = sLit("W512")
185 mrStr W80 = sLit("W80")
186
187
188 -------- Common Widths ------------
189 wordWidth :: DynFlags -> Width
190 wordWidth dflags
191 | wORD_SIZE dflags == 4 = W32
192 | wORD_SIZE dflags == 8 = W64
193 | otherwise = panic "MachOp.wordRep: Unknown word size"
194
195 halfWordWidth :: DynFlags -> Width
196 halfWordWidth dflags
197 | wORD_SIZE dflags == 4 = W16
198 | wORD_SIZE dflags == 8 = W32
199 | otherwise = panic "MachOp.halfWordRep: Unknown word size"
200
201 halfWordMask :: DynFlags -> Integer
202 halfWordMask dflags
203 | wORD_SIZE dflags == 4 = 0xFFFF
204 | wORD_SIZE dflags == 8 = 0xFFFFFFFF
205 | otherwise = panic "MachOp.halfWordMask: Unknown word size"
206
207 -- cIntRep is the Width for a C-language 'int'
208 cIntWidth :: DynFlags -> Width
209 cIntWidth dflags = case cINT_SIZE dflags of
210 4 -> W32
211 8 -> W64
212 s -> panic ("cIntWidth: Unknown cINT_SIZE: " ++ show s)
213
214 widthInBits :: Width -> Int
215 widthInBits W8 = 8
216 widthInBits W16 = 16
217 widthInBits W32 = 32
218 widthInBits W64 = 64
219 widthInBits W128 = 128
220 widthInBits W256 = 256
221 widthInBits W512 = 512
222 widthInBits W80 = 80
223
224 widthInBytes :: Width -> Int
225 widthInBytes W8 = 1
226 widthInBytes W16 = 2
227 widthInBytes W32 = 4
228 widthInBytes W64 = 8
229 widthInBytes W128 = 16
230 widthInBytes W256 = 32
231 widthInBytes W512 = 64
232 widthInBytes W80 = 10
233
234 widthFromBytes :: Int -> Width
235 widthFromBytes 1 = W8
236 widthFromBytes 2 = W16
237 widthFromBytes 4 = W32
238 widthFromBytes 8 = W64
239 widthFromBytes 16 = W128
240 widthFromBytes 32 = W256
241 widthFromBytes 64 = W512
242 widthFromBytes 10 = W80
243 widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
244
245 -- log_2 of the width in bytes, useful for generating shifts.
246 widthInLog :: Width -> Int
247 widthInLog W8 = 0
248 widthInLog W16 = 1
249 widthInLog W32 = 2
250 widthInLog W64 = 3
251 widthInLog W128 = 4
252 widthInLog W256 = 5
253 widthInLog W512 = 6
254 widthInLog W80 = panic "widthInLog: F80"
255
256 -- widening / narrowing
257
258 narrowU :: Width -> Integer -> Integer
259 narrowU W8 x = fromIntegral (fromIntegral x :: Word8)
260 narrowU W16 x = fromIntegral (fromIntegral x :: Word16)
261 narrowU W32 x = fromIntegral (fromIntegral x :: Word32)
262 narrowU W64 x = fromIntegral (fromIntegral x :: Word64)
263 narrowU _ _ = panic "narrowTo"
264
265 narrowS :: Width -> Integer -> Integer
266 narrowS W8 x = fromIntegral (fromIntegral x :: Int8)
267 narrowS W16 x = fromIntegral (fromIntegral x :: Int16)
268 narrowS W32 x = fromIntegral (fromIntegral x :: Int32)
269 narrowS W64 x = fromIntegral (fromIntegral x :: Int64)
270 narrowS _ _ = panic "narrowTo"
271
272 -----------------------------------------------------------------------------
273 -- SIMD
274 -----------------------------------------------------------------------------
275
276 type Length = Int
277
278 vec :: Length -> CmmType -> CmmType
279 vec l (CmmType cat w) = CmmType (VecCat l cat) vecw
280 where
281 vecw :: Width
282 vecw = widthFromBytes (l*widthInBytes w)
283
284 vec2, vec4, vec8, vec16 :: CmmType -> CmmType
285 vec2 = vec 2
286 vec4 = vec 4
287 vec8 = vec 8
288 vec16 = vec 16
289
290 vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8 :: CmmType
291 vec2f64 = vec 2 f64
292 vec2b64 = vec 2 b64
293 vec4f32 = vec 4 f32
294 vec4b32 = vec 4 b32
295 vec8b16 = vec 8 b16
296 vec16b8 = vec 16 b8
297
298 cmmVec :: Int -> CmmType -> CmmType
299 cmmVec n (CmmType cat w) =
300 CmmType (VecCat n cat) (widthFromBytes (n*widthInBytes w))
301
302 vecLength :: CmmType -> Length
303 vecLength (CmmType (VecCat l _) _) = l
304 vecLength _ = panic "vecLength: not a vector"
305
306 vecElemType :: CmmType -> CmmType
307 vecElemType (CmmType (VecCat l cat) w) = CmmType cat scalw
308 where
309 scalw :: Width
310 scalw = widthFromBytes (widthInBytes w `div` l)
311 vecElemType _ = panic "vecElemType: not a vector"
312
313 isVecType :: CmmType -> Bool
314 isVecType (CmmType (VecCat {}) _) = True
315 isVecType _ = False
316
317 -------------------------------------------------------------------------
318 -- Hints
319
320 -- Hints are extra type information we attach to the arguments and
321 -- results of a foreign call, where more type information is sometimes
322 -- needed by the ABI to make the correct kind of call.
323
324 data ForeignHint
325 = NoHint | AddrHint | SignedHint
326 deriving( Eq )
327 -- Used to give extra per-argument or per-result
328 -- information needed by foreign calling conventions
329
330 -------------------------------------------------------------------------
331
332 -- These don't really belong here, but I don't know where is best to
333 -- put them.
334
335 rEP_CostCentreStack_mem_alloc :: DynFlags -> CmmType
336 rEP_CostCentreStack_mem_alloc dflags
337 = cmmBits (widthFromBytes (pc_REP_CostCentreStack_mem_alloc pc))
338 where pc = sPlatformConstants (settings dflags)
339
340 rEP_CostCentreStack_scc_count :: DynFlags -> CmmType
341 rEP_CostCentreStack_scc_count dflags
342 = cmmBits (widthFromBytes (pc_REP_CostCentreStack_scc_count pc))
343 where pc = sPlatformConstants (settings dflags)
344
345 rEP_StgEntCounter_allocs :: DynFlags -> CmmType
346 rEP_StgEntCounter_allocs dflags
347 = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocs pc))
348 where pc = sPlatformConstants (settings dflags)
349
350 rEP_StgEntCounter_allocd :: DynFlags -> CmmType
351 rEP_StgEntCounter_allocd dflags
352 = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocd pc))
353 where pc = sPlatformConstants (settings dflags)
354
355 -------------------------------------------------------------------------
356 {- Note [Signed vs unsigned]
357 ~~~~~~~~~~~~~~~~~~~~~~~~~
358 Should a CmmType include a signed vs. unsigned distinction?
359
360 This is very much like a "hint" in C-- terminology: it isn't necessary
361 in order to generate correct code, but it might be useful in that the
362 compiler can generate better code if it has access to higher-level
363 hints about data. This is important at call boundaries, because the
364 definition of a function is not visible at all of its call sites, so
365 the compiler cannot infer the hints.
366
367 Here in Cmm, we're taking a slightly different approach. We include
368 the int vs. float hint in the CmmType, because (a) the majority of
369 platforms have a strong distinction between float and int registers,
370 and (b) we don't want to do any heavyweight hint-inference in the
371 native code backend in order to get good code. We're treating the
372 hint more like a type: our Cmm is always completely consistent with
373 respect to hints. All coercions between float and int are explicit.
374
375 What about the signed vs. unsigned hint? This information might be
376 useful if we want to keep sub-word-sized values in word-size
377 registers, which we must do if we only have word-sized registers.
378
379 On such a system, there are two straightforward conventions for
380 representing sub-word-sized values:
381
382 (a) Leave the upper bits undefined. Comparison operations must
383 sign- or zero-extend both operands before comparing them,
384 depending on whether the comparison is signed or unsigned.
385
386 (b) Always keep the values sign- or zero-extended as appropriate.
387 Arithmetic operations must narrow the result to the appropriate
388 size.
389
390 A clever compiler might not use either (a) or (b) exclusively, instead
391 it would attempt to minimize the coercions by analysis: the same kind
392 of analysis that propagates hints around. In Cmm we don't want to
393 have to do this, so we plump for having richer types and keeping the
394 type information consistent.
395
396 If signed/unsigned hints are missing from CmmType, then the only
397 choice we have is (a), because we don't know whether the result of an
398 operation should be sign- or zero-extended.
399
400 Many architectures have extending load operations, which work well
401 with (b). To make use of them with (a), you need to know whether the
402 value is going to be sign- or zero-extended by an enclosing comparison
403 (for example), which involves knowing above the context. This is
404 doable but more complex.
405
406 Further complicating the issue is foreign calls: a foreign calling
407 convention can specify that signed 8-bit quantities are passed as
408 sign-extended 32 bit quantities, for example (this is the case on the
409 PowerPC). So we *do* need sign information on foreign call arguments.
410
411 Pros for adding signed vs. unsigned to CmmType:
412
413 - It would let us use convention (b) above, and get easier
414 code generation for extending loads.
415
416 - Less information required on foreign calls.
417
418 - MachOp type would be simpler
419
420 Cons:
421
422 - More complexity
423
424 - What is the CmmType for a VanillaReg? Currently it is
425 always wordRep, but now we have to decide whether it is
426 signed or unsigned. The same VanillaReg can thus have
427 different CmmType in different parts of the program.
428
429 - Extra coercions cluttering up expressions.
430
431 Currently for GHC, the foreign call point is moot, because we do our
432 own promotion of sub-word-sized values to word-sized values. The Int8
433 type is represented by an Int# which is kept sign-extended at all times
434 (this is slightly naughty, because we're making assumptions about the
435 C calling convention rather early on in the compiler). However, given
436 this, the cons outweigh the pros.
437
438 -}
439