Comments only, on Cmm data types
[ghc.git] / compiler / cmm / CmmExpr.hs
1
2 module CmmExpr
3 ( CmmType -- Abstract
4 , b8, b16, b32, b64, f32, f64, bWord, bHalfWord, gcWord
5 , cInt, cLong
6 , cmmBits, cmmFloat
7 , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
8 , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32
9
10 , Width(..)
11 , widthInBits, widthInBytes, widthInLog, widthFromBytes
12 , wordWidth, halfWordWidth, cIntWidth, cLongWidth
13 , narrowU, narrowS
14
15 , CmmExpr(..), cmmExprType, cmmExprWidth, maybeInvertCmmExpr
16 , CmmReg(..), cmmRegType
17 , CmmLit(..), cmmLitType
18 , LocalReg(..), localRegType
19 , GlobalReg(..), globalRegType, spReg, hpReg, spLimReg, nodeReg, node
20 , VGcPtr(..), vgcFlag -- Temporary!
21 , DefinerOfLocalRegs, UserOfLocalRegs, foldRegsDefd, foldRegsUsed, filterRegsUsed
22 , DefinerOfSlots, UserOfSlots, foldSlotsDefd, foldSlotsUsed
23 , RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet
24 , plusRegSet, minusRegSet, timesRegSet
25 , Area(..), AreaId(..), SubArea, SubAreaSet, AreaMap, isStackSlotOf
26
27 -- MachOp
28 , MachOp(..)
29 , pprMachOp, isCommutableMachOp, isAssociativeMachOp
30 , isComparisonMachOp, machOpResultType
31 , machOpArgReps, maybeInvertComparison
32
33 -- MachOp builders
34 , mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot
35 , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
36 , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
37 , mo_wordULe, mo_wordUGt, mo_wordULt
38 , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr
39 , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
40 , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord
41 , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32
42 )
43 where
44
45 #include "HsVersions.h"
46
47 import BlockId
48 import CLabel
49 import Constants
50 import FastString
51 import FiniteMap
52 import Outputable
53 import Unique
54 import UniqSet
55
56 import Data.Word
57 import Data.Int
58
59 -----------------------------------------------------------------------------
60 -- CmmExpr
61 -- An expression. Expressions have no side effects.
62 -----------------------------------------------------------------------------
63
64 data CmmExpr
65 = CmmLit CmmLit -- Literal
66 | CmmLoad CmmExpr CmmType -- Read memory location
67 | CmmReg CmmReg -- Contents of register
68 | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.)
69 | CmmStackSlot Area Int -- addressing expression of a stack slot
70 | CmmRegOff CmmReg Int
71 -- CmmRegOff reg i
72 -- ** is shorthand only, meaning **
73 -- CmmMachOp (MO_S_Add rep (CmmReg reg) (CmmLit (CmmInt i rep)))
74 -- where rep = cmmRegType reg
75
76 instance Eq CmmExpr where -- Equality ignores the types
77 CmmLit l1 == CmmLit l2 = l1==l2
78 CmmLoad e1 _ == CmmLoad e2 _ = e1==e2
79 CmmReg r1 == CmmReg r2 = r1==r2
80 CmmRegOff r1 i1 == CmmRegOff r2 i2 = r1==r2 && i1==i2
81 CmmMachOp op1 es1 == CmmMachOp op2 es2 = op1==op2 && es1==es2
82 CmmStackSlot a1 i1 == CmmStackSlot a2 i2 = a1==a2 && i1==i2
83 _e1 == _e2 = False
84
85 data CmmReg
86 = CmmLocal LocalReg
87 | CmmGlobal GlobalReg
88 deriving( Eq, Ord )
89
90 -- | A stack area is either the stack slot where a variable is spilled
91 -- or the stack space where function arguments and results are passed.
92 data Area
93 = RegSlot LocalReg
94 | CallArea AreaId
95 deriving (Eq, Ord)
96
97 data AreaId
98 = Old -- See Note [Old Area]
99 | Young BlockId
100 deriving (Eq, Ord)
101
102 {- Note [Old Area]
103 ~~~~~~~~~~~~~~~~~~
104 There is a single call area 'Old', allocated at the extreme old
105 end of the stack frame (ie just younger than the return address)
106 which holds:
107 * incoming (overflow) parameters,
108 * outgoing (overflow) parameter to tail calls,
109 * outgoing (overflow) result values
110 * the update frame (if any)
111
112 Its size is the max of all these requirements. On entry, the stack
113 pointer will point to the youngest incoming parameter, which is not
114 necessarily at the young end of the Old area.
115
116 End of note -}
117
118 type SubArea = (Area, Int, Int) -- area, offset, width
119 type SubAreaSet = FiniteMap Area [SubArea]
120 type AreaMap = FiniteMap Area Int
121
122 data CmmLit
123 = CmmInt Integer Width
124 -- Interpretation: the 2's complement representation of the value
125 -- is truncated to the specified size. This is easier than trying
126 -- to keep the value within range, because we don't know whether
127 -- it will be used as a signed or unsigned value (the CmmType doesn't
128 -- distinguish between signed & unsigned).
129 | CmmFloat Rational Width
130 | CmmLabel CLabel -- Address of label
131 | CmmLabelOff CLabel Int -- Address of label + byte offset
132
133 -- Due to limitations in the C backend, the following
134 -- MUST ONLY be used inside the info table indicated by label2
135 -- (label2 must be the info label), and label1 must be an
136 -- SRT, a slow entrypoint or a large bitmap (see the Mangler)
137 -- Don't use it at all unless tablesNextToCode.
138 -- It is also used inside the NCG during when generating
139 -- position-independent code.
140 | CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset
141 | CmmBlock BlockId -- Code label
142 | CmmHighStackMark -- stands for the max stack space used during a procedure
143 deriving Eq
144
145 cmmExprType :: CmmExpr -> CmmType
146 cmmExprType (CmmLit lit) = cmmLitType lit
147 cmmExprType (CmmLoad _ rep) = rep
148 cmmExprType (CmmReg reg) = cmmRegType reg
149 cmmExprType (CmmMachOp op args) = machOpResultType op (map cmmExprType args)
150 cmmExprType (CmmRegOff reg _) = cmmRegType reg
151 cmmExprType (CmmStackSlot _ _) = bWord -- an address
152
153 cmmLitType :: CmmLit -> CmmType
154 cmmLitType (CmmInt _ width) = cmmBits width
155 cmmLitType (CmmFloat _ width) = cmmFloat width
156 cmmLitType (CmmLabel lbl) = cmmLabelType lbl
157 cmmLitType (CmmLabelOff lbl _) = cmmLabelType lbl
158 cmmLitType (CmmLabelDiffOff {}) = bWord
159 cmmLitType (CmmBlock _) = bWord
160 cmmLitType (CmmHighStackMark) = bWord
161
162 cmmLabelType :: CLabel -> CmmType
163 cmmLabelType lbl | isGcPtrLabel lbl = gcWord
164 | otherwise = bWord
165
166 cmmExprWidth :: CmmExpr -> Width
167 cmmExprWidth e = typeWidth (cmmExprType e)
168
169 --------
170 --- Negation for conditional branches
171
172 maybeInvertCmmExpr :: CmmExpr -> Maybe CmmExpr
173 maybeInvertCmmExpr (CmmMachOp op args) = do op' <- maybeInvertComparison op
174 return (CmmMachOp op' args)
175 maybeInvertCmmExpr _ = Nothing
176
177 -----------------------------------------------------------------------------
178 -- Local registers
179 -----------------------------------------------------------------------------
180
181 data LocalReg
182 = LocalReg !Unique CmmType
183 -- ^ Parameters:
184 -- 1. Identifier
185 -- 2. Type
186
187 instance Eq LocalReg where
188 (LocalReg u1 _) == (LocalReg u2 _) = u1 == u2
189
190 instance Ord LocalReg where
191 compare (LocalReg u1 _) (LocalReg u2 _) = compare u1 u2
192
193 instance Uniquable LocalReg where
194 getUnique (LocalReg uniq _) = uniq
195
196 cmmRegType :: CmmReg -> CmmType
197 cmmRegType (CmmLocal reg) = localRegType reg
198 cmmRegType (CmmGlobal reg) = globalRegType reg
199
200 localRegType :: LocalReg -> CmmType
201 localRegType (LocalReg _ rep) = rep
202
203 -----------------------------------------------------------------------------
204 -- Register-use information for expressions and other types
205 -----------------------------------------------------------------------------
206
207 -- | Sets of local registers
208 type RegSet = UniqSet LocalReg
209 emptyRegSet :: RegSet
210 elemRegSet :: LocalReg -> RegSet -> Bool
211 extendRegSet :: RegSet -> LocalReg -> RegSet
212 deleteFromRegSet :: RegSet -> LocalReg -> RegSet
213 mkRegSet :: [LocalReg] -> RegSet
214 minusRegSet, plusRegSet, timesRegSet :: RegSet -> RegSet -> RegSet
215
216 emptyRegSet = emptyUniqSet
217 elemRegSet = elementOfUniqSet
218 extendRegSet = addOneToUniqSet
219 deleteFromRegSet = delOneFromUniqSet
220 mkRegSet = mkUniqSet
221 minusRegSet = minusUniqSet
222 plusRegSet = unionUniqSets
223 timesRegSet = intersectUniqSets
224
225 class UserOfLocalRegs a where
226 foldRegsUsed :: (b -> LocalReg -> b) -> b -> a -> b
227
228 class DefinerOfLocalRegs a where
229 foldRegsDefd :: (b -> LocalReg -> b) -> b -> a -> b
230
231 filterRegsUsed :: UserOfLocalRegs e => (LocalReg -> Bool) -> e -> RegSet
232 filterRegsUsed p e =
233 foldRegsUsed (\regs r -> if p r then extendRegSet regs r else regs)
234 emptyRegSet e
235
236 instance UserOfLocalRegs CmmReg where
237 foldRegsUsed f z (CmmLocal reg) = f z reg
238 foldRegsUsed _ z (CmmGlobal _) = z
239
240 instance DefinerOfLocalRegs CmmReg where
241 foldRegsDefd f z (CmmLocal reg) = f z reg
242 foldRegsDefd _ z (CmmGlobal _) = z
243
244 instance UserOfLocalRegs LocalReg where
245 foldRegsUsed f z r = f z r
246
247 instance DefinerOfLocalRegs LocalReg where
248 foldRegsDefd f z r = f z r
249
250 instance UserOfLocalRegs RegSet where
251 foldRegsUsed f = foldUniqSet (flip f)
252
253 instance UserOfLocalRegs CmmExpr where
254 foldRegsUsed f z e = expr z e
255 where expr z (CmmLit _) = z
256 expr z (CmmLoad addr _) = foldRegsUsed f z addr
257 expr z (CmmReg r) = foldRegsUsed f z r
258 expr z (CmmMachOp _ exprs) = foldRegsUsed f z exprs
259 expr z (CmmRegOff r _) = foldRegsUsed f z r
260 expr z (CmmStackSlot _ _) = z
261
262 instance UserOfLocalRegs a => UserOfLocalRegs [a] where
263 foldRegsUsed _ set [] = set
264 foldRegsUsed f set (x:xs) = foldRegsUsed f (foldRegsUsed f set x) xs
265
266 instance DefinerOfLocalRegs a => DefinerOfLocalRegs [a] where
267 foldRegsDefd _ set [] = set
268 foldRegsDefd f set (x:xs) = foldRegsDefd f (foldRegsDefd f set x) xs
269
270 instance DefinerOfLocalRegs a => DefinerOfLocalRegs (Maybe a) where
271 foldRegsDefd _ set Nothing = set
272 foldRegsDefd f set (Just x) = foldRegsDefd f set x
273
274
275 -----------------------------------------------------------------------------
276 -- Stack slots
277 -----------------------------------------------------------------------------
278
279 isStackSlotOf :: CmmExpr -> LocalReg -> Bool
280 isStackSlotOf (CmmStackSlot (RegSlot r) _) r' = r == r'
281 isStackSlotOf _ _ = False
282
283 -----------------------------------------------------------------------------
284 -- Stack slot use information for expressions and other types [_$_]
285 -----------------------------------------------------------------------------
286
287 -- Fold over the area, the offset into the area, and the width of the subarea.
288 class UserOfSlots a where
289 foldSlotsUsed :: (b -> SubArea -> b) -> b -> a -> b
290
291 class DefinerOfSlots a where
292 foldSlotsDefd :: (b -> SubArea -> b) -> b -> a -> b
293
294 instance UserOfSlots CmmExpr where
295 foldSlotsUsed f z e = expr z e
296 where expr z (CmmLit _) = z
297 expr z (CmmLoad (CmmStackSlot a i) ty) = f z (a, i, widthInBytes $ typeWidth ty)
298 expr z (CmmLoad addr _) = foldSlotsUsed f z addr
299 expr z (CmmReg _) = z
300 expr z (CmmMachOp _ exprs) = foldSlotsUsed f z exprs
301 expr z (CmmRegOff _ _) = z
302 expr z (CmmStackSlot _ _) = z
303
304 instance UserOfSlots a => UserOfSlots [a] where
305 foldSlotsUsed _ set [] = set
306 foldSlotsUsed f set (x:xs) = foldSlotsUsed f (foldSlotsUsed f set x) xs
307
308
309 -----------------------------------------------------------------------------
310 -- Global STG registers
311 -----------------------------------------------------------------------------
312
313 data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show )
314 -- TEMPORARY!!!
315
316 -----------------------------------------------------------------------------
317 -- Global STG registers
318 -----------------------------------------------------------------------------
319 vgcFlag :: CmmType -> VGcPtr
320 vgcFlag ty | isGcPtrType ty = VGcPtr
321 | otherwise = VNonGcPtr
322
323 data GlobalReg
324 -- Argument and return registers
325 = VanillaReg -- pointers, unboxed ints and chars
326 {-# UNPACK #-} !Int -- its number
327 VGcPtr
328
329 | FloatReg -- single-precision floating-point registers
330 {-# UNPACK #-} !Int -- its number
331
332 | DoubleReg -- double-precision floating-point registers
333 {-# UNPACK #-} !Int -- its number
334
335 | LongReg -- long int registers (64-bit, really)
336 {-# UNPACK #-} !Int -- its number
337
338 -- STG registers
339 | Sp -- Stack ptr; points to last occupied stack location.
340 | SpLim -- Stack limit
341 | Hp -- Heap ptr; points to last occupied heap location.
342 | HpLim -- Heap limit register
343 | CurrentTSO -- pointer to current thread's TSO
344 | CurrentNursery -- pointer to allocation area
345 | HpAlloc -- allocation count for heap check failure
346
347 -- We keep the address of some commonly-called
348 -- functions in the register table, to keep code
349 -- size down:
350 | EagerBlackholeInfo -- stg_EAGER_BLACKHOLE_info
351 | GCEnter1 -- stg_gc_enter_1
352 | GCFun -- stg_gc_fun
353
354 -- Base offset for the register table, used for accessing registers
355 -- which do not have real registers assigned to them. This register
356 -- will only appear after we have expanded GlobalReg into memory accesses
357 -- (where necessary) in the native code generator.
358 | BaseReg
359
360 -- Base Register for PIC (position-independent code) calculations
361 -- Only used inside the native code generator. It's exact meaning differs
362 -- from platform to platform (see module PositionIndependentCode).
363 | PicBaseReg
364
365 deriving( Show )
366
367 instance Eq GlobalReg where
368 VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes
369 FloatReg i == FloatReg j = i==j
370 DoubleReg i == DoubleReg j = i==j
371 LongReg i == LongReg j = i==j
372 Sp == Sp = True
373 SpLim == SpLim = True
374 Hp == Hp = True
375 HpLim == HpLim = True
376 CurrentTSO == CurrentTSO = True
377 CurrentNursery == CurrentNursery = True
378 HpAlloc == HpAlloc = True
379 GCEnter1 == GCEnter1 = True
380 GCFun == GCFun = True
381 BaseReg == BaseReg = True
382 PicBaseReg == PicBaseReg = True
383 _r1 == _r2 = False
384
385 instance Ord GlobalReg where
386 compare (VanillaReg i _) (VanillaReg j _) = compare i j
387 -- Ignore type when seeking clashes
388 compare (FloatReg i) (FloatReg j) = compare i j
389 compare (DoubleReg i) (DoubleReg j) = compare i j
390 compare (LongReg i) (LongReg j) = compare i j
391 compare Sp Sp = EQ
392 compare SpLim SpLim = EQ
393 compare Hp Hp = EQ
394 compare HpLim HpLim = EQ
395 compare CurrentTSO CurrentTSO = EQ
396 compare CurrentNursery CurrentNursery = EQ
397 compare HpAlloc HpAlloc = EQ
398 compare EagerBlackholeInfo EagerBlackholeInfo = EQ
399 compare GCEnter1 GCEnter1 = EQ
400 compare GCFun GCFun = EQ
401 compare BaseReg BaseReg = EQ
402 compare PicBaseReg PicBaseReg = EQ
403 compare (VanillaReg _ _) _ = LT
404 compare _ (VanillaReg _ _) = GT
405 compare (FloatReg _) _ = LT
406 compare _ (FloatReg _) = GT
407 compare (DoubleReg _) _ = LT
408 compare _ (DoubleReg _) = GT
409 compare (LongReg _) _ = LT
410 compare _ (LongReg _) = GT
411 compare Sp _ = LT
412 compare _ Sp = GT
413 compare SpLim _ = LT
414 compare _ SpLim = GT
415 compare Hp _ = LT
416 compare _ Hp = GT
417 compare HpLim _ = LT
418 compare _ HpLim = GT
419 compare CurrentTSO _ = LT
420 compare _ CurrentTSO = GT
421 compare CurrentNursery _ = LT
422 compare _ CurrentNursery = GT
423 compare HpAlloc _ = LT
424 compare _ HpAlloc = GT
425 compare GCEnter1 _ = LT
426 compare _ GCEnter1 = GT
427 compare GCFun _ = LT
428 compare _ GCFun = GT
429 compare BaseReg _ = LT
430 compare _ BaseReg = GT
431 compare EagerBlackholeInfo _ = LT
432 compare _ EagerBlackholeInfo = GT
433
434 -- convenient aliases
435 spReg, hpReg, spLimReg, nodeReg :: CmmReg
436 spReg = CmmGlobal Sp
437 hpReg = CmmGlobal Hp
438 spLimReg = CmmGlobal SpLim
439 nodeReg = CmmGlobal node
440
441 node :: GlobalReg
442 node = VanillaReg 1 VGcPtr
443
444 globalRegType :: GlobalReg -> CmmType
445 globalRegType (VanillaReg _ VGcPtr) = gcWord
446 globalRegType (VanillaReg _ VNonGcPtr) = bWord
447 globalRegType (FloatReg _) = cmmFloat W32
448 globalRegType (DoubleReg _) = cmmFloat W64
449 globalRegType (LongReg _) = cmmBits W64
450 globalRegType Hp = gcWord -- The initialiser for all
451 -- dynamically allocated closures
452 globalRegType _ = bWord
453
454
455 -----------------------------------------------------------------------------
456 -- CmmType
457 -----------------------------------------------------------------------------
458
459 -- NOTE: CmmType is an abstract type, not exported from this
460 -- module so you can easily change its representation
461 --
462 -- However Width is exported in a concrete way,
463 -- and is used extensively in pattern-matching
464
465 data CmmType -- The important one!
466 = CmmType CmmCat Width
467
468 data CmmCat -- "Category" (not exported)
469 = GcPtrCat -- GC pointer
470 | BitsCat -- Non-pointer
471 | FloatCat -- Float
472 deriving( Eq )
473 -- See Note [Signed vs unsigned] at the end
474
475 instance Outputable CmmType where
476 ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
477
478 instance Outputable CmmCat where
479 ppr FloatCat = ptext $ sLit("F")
480 ppr _ = ptext $ sLit("I")
481 -- Temp Jan 08
482 -- ppr FloatCat = ptext $ sLit("float")
483 -- ppr BitsCat = ptext $ sLit("bits")
484 -- ppr GcPtrCat = ptext $ sLit("gcptr")
485
486 -- Why is CmmType stratified? For native code generation,
487 -- most of the time you just want to know what sort of register
488 -- to put the thing in, and for this you need to know how
489 -- many bits thing has and whether it goes in a floating-point
490 -- register. By contrast, the distinction between GcPtr and
491 -- GcNonPtr is of interest to only a few parts of the code generator.
492
493 -------- Equality on CmmType --------------
494 -- CmmType is *not* an instance of Eq; sometimes we care about the
495 -- Gc/NonGc distinction, and sometimes we don't
496 -- So we use an explicit function to force you to think about it
497 cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
498 cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
499
500 cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
501 -- This equality is temporary; used in CmmLint
502 -- but the RTS files are not yet well-typed wrt pointers
503 cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
504 = c1 `weak_eq` c2 && w1==w2
505 where
506 FloatCat `weak_eq` FloatCat = True
507 FloatCat `weak_eq` _other = False
508 _other `weak_eq` FloatCat = False
509 _word1 `weak_eq` _word2 = True -- Ignores GcPtr
510
511 --- Simple operations on CmmType -----
512 typeWidth :: CmmType -> Width
513 typeWidth (CmmType _ w) = w
514
515 cmmBits, cmmFloat :: Width -> CmmType
516 cmmBits = CmmType BitsCat
517 cmmFloat = CmmType FloatCat
518
519 -------- Common CmmTypes ------------
520 -- Floats and words of specific widths
521 b8, b16, b32, b64, f32, f64 :: CmmType
522 b8 = cmmBits W8
523 b16 = cmmBits W16
524 b32 = cmmBits W32
525 b64 = cmmBits W64
526 f32 = cmmFloat W32
527 f64 = cmmFloat W64
528
529 -- CmmTypes of native word widths
530 bWord, bHalfWord, gcWord :: CmmType
531 bWord = cmmBits wordWidth
532 bHalfWord = cmmBits halfWordWidth
533 gcWord = CmmType GcPtrCat wordWidth
534
535 cInt, cLong :: CmmType
536 cInt = cmmBits cIntWidth
537 cLong = cmmBits cLongWidth
538
539
540 ------------ Predicates ----------------
541 isFloatType, isGcPtrType :: CmmType -> Bool
542 isFloatType (CmmType FloatCat _) = True
543 isFloatType _other = False
544
545 isGcPtrType (CmmType GcPtrCat _) = True
546 isGcPtrType _other = False
547
548 isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
549 -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
550 -- isFloat32 and 64 are obvious
551
552 isWord64 (CmmType BitsCat W64) = True
553 isWord64 (CmmType GcPtrCat W64) = True
554 isWord64 _other = False
555
556 isWord32 (CmmType BitsCat W32) = True
557 isWord32 (CmmType GcPtrCat W32) = True
558 isWord32 _other = False
559
560 isFloat32 (CmmType FloatCat W32) = True
561 isFloat32 _other = False
562
563 isFloat64 (CmmType FloatCat W64) = True
564 isFloat64 _other = False
565
566 -----------------------------------------------------------------------------
567 -- Width
568 -----------------------------------------------------------------------------
569
570 data Width = W8 | W16 | W32 | W64
571 | W80 -- Extended double-precision float,
572 -- used in x86 native codegen only.
573 -- (we use Ord, so it'd better be in this order)
574 | W128
575 deriving (Eq, Ord, Show)
576
577 instance Outputable Width where
578 ppr rep = ptext (mrStr rep)
579
580 mrStr :: Width -> LitString
581 mrStr W8 = sLit("W8")
582 mrStr W16 = sLit("W16")
583 mrStr W32 = sLit("W32")
584 mrStr W64 = sLit("W64")
585 mrStr W128 = sLit("W128")
586 mrStr W80 = sLit("W80")
587
588
589 -------- Common Widths ------------
590 wordWidth, halfWordWidth :: Width
591 wordWidth | wORD_SIZE == 4 = W32
592 | wORD_SIZE == 8 = W64
593 | otherwise = panic "MachOp.wordRep: Unknown word size"
594
595 halfWordWidth | wORD_SIZE == 4 = W16
596 | wORD_SIZE == 8 = W32
597 | otherwise = panic "MachOp.halfWordRep: Unknown word size"
598
599 -- cIntRep is the Width for a C-language 'int'
600 cIntWidth, cLongWidth :: Width
601 #if SIZEOF_INT == 4
602 cIntWidth = W32
603 #elif SIZEOF_INT == 8
604 cIntWidth = W64
605 #endif
606
607 #if SIZEOF_LONG == 4
608 cLongWidth = W32
609 #elif SIZEOF_LONG == 8
610 cLongWidth = W64
611 #endif
612
613 widthInBits :: Width -> Int
614 widthInBits W8 = 8
615 widthInBits W16 = 16
616 widthInBits W32 = 32
617 widthInBits W64 = 64
618 widthInBits W128 = 128
619 widthInBits W80 = 80
620
621 widthInBytes :: Width -> Int
622 widthInBytes W8 = 1
623 widthInBytes W16 = 2
624 widthInBytes W32 = 4
625 widthInBytes W64 = 8
626 widthInBytes W128 = 16
627 widthInBytes W80 = 10
628
629 widthFromBytes :: Int -> Width
630 widthFromBytes 1 = W8
631 widthFromBytes 2 = W16
632 widthFromBytes 4 = W32
633 widthFromBytes 8 = W64
634 widthFromBytes 16 = W128
635 widthFromBytes 10 = W80
636 widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
637
638 -- log_2 of the width in bytes, useful for generating shifts.
639 widthInLog :: Width -> Int
640 widthInLog W8 = 0
641 widthInLog W16 = 1
642 widthInLog W32 = 2
643 widthInLog W64 = 3
644 widthInLog W128 = 4
645 widthInLog W80 = panic "widthInLog: F80"
646
647 -- widening / narrowing
648
649 narrowU :: Width -> Integer -> Integer
650 narrowU W8 x = fromIntegral (fromIntegral x :: Word8)
651 narrowU W16 x = fromIntegral (fromIntegral x :: Word16)
652 narrowU W32 x = fromIntegral (fromIntegral x :: Word32)
653 narrowU W64 x = fromIntegral (fromIntegral x :: Word64)
654 narrowU _ _ = panic "narrowTo"
655
656 narrowS :: Width -> Integer -> Integer
657 narrowS W8 x = fromIntegral (fromIntegral x :: Int8)
658 narrowS W16 x = fromIntegral (fromIntegral x :: Int16)
659 narrowS W32 x = fromIntegral (fromIntegral x :: Int32)
660 narrowS W64 x = fromIntegral (fromIntegral x :: Int64)
661 narrowS _ _ = panic "narrowTo"
662
663 -----------------------------------------------------------------------------
664 -- MachOp
665 -----------------------------------------------------------------------------
666
667 {-
668 Implementation notes:
669
670 It might suffice to keep just a width, without distinguishing between
671 floating and integer types. However, keeping the distinction will
672 help the native code generator to assign registers more easily.
673 -}
674
675
676 {- |
677 Machine-level primops; ones which we can reasonably delegate to the
678 native code generators to handle. Basically contains C's primops
679 and no others.
680
681 Nomenclature: all ops indicate width and signedness, where
682 appropriate. Widths: 8\/16\/32\/64 means the given size, obviously.
683 Nat means the operation works on STG word sized objects.
684 Signedness: S means signed, U means unsigned. For operations where
685 signedness is irrelevant or makes no difference (for example
686 integer add), the signedness component is omitted.
687
688 An exception: NatP is a ptr-typed native word. From the point of
689 view of the native code generators this distinction is irrelevant,
690 but the C code generator sometimes needs this info to emit the
691 right casts.
692 -}
693
694 data MachOp
695 -- Integer operations (insensitive to signed/unsigned)
696 = MO_Add Width
697 | MO_Sub Width
698 | MO_Eq Width
699 | MO_Ne Width
700 | MO_Mul Width -- low word of multiply
701
702 -- Signed multiply/divide
703 | MO_S_MulMayOflo Width -- nonzero if signed multiply overflows
704 | MO_S_Quot Width -- signed / (same semantics as IntQuotOp)
705 | MO_S_Rem Width -- signed % (same semantics as IntRemOp)
706 | MO_S_Neg Width -- unary -
707
708 -- Unsigned multiply/divide
709 | MO_U_MulMayOflo Width -- nonzero if unsigned multiply overflows
710 | MO_U_Quot Width -- unsigned / (same semantics as WordQuotOp)
711 | MO_U_Rem Width -- unsigned % (same semantics as WordRemOp)
712
713 -- Signed comparisons
714 | MO_S_Ge Width
715 | MO_S_Le Width
716 | MO_S_Gt Width
717 | MO_S_Lt Width
718
719 -- Unsigned comparisons
720 | MO_U_Ge Width
721 | MO_U_Le Width
722 | MO_U_Gt Width
723 | MO_U_Lt Width
724
725 -- Floating point arithmetic
726 | MO_F_Add Width
727 | MO_F_Sub Width
728 | MO_F_Neg Width -- unary -
729 | MO_F_Mul Width
730 | MO_F_Quot Width
731
732 -- Floating point comparison
733 | MO_F_Eq Width
734 | MO_F_Ne Width
735 | MO_F_Ge Width
736 | MO_F_Le Width
737 | MO_F_Gt Width
738 | MO_F_Lt Width
739
740 -- Bitwise operations. Not all of these may be supported
741 -- at all sizes, and only integral Widths are valid.
742 | MO_And Width
743 | MO_Or Width
744 | MO_Xor Width
745 | MO_Not Width
746 | MO_Shl Width
747 | MO_U_Shr Width -- unsigned shift right
748 | MO_S_Shr Width -- signed shift right
749
750 -- Conversions. Some of these will be NOPs.
751 -- Floating-point conversions use the signed variant.
752 | MO_SF_Conv Width Width -- Signed int -> Float
753 | MO_FS_Conv Width Width -- Float -> Signed int
754 | MO_SS_Conv Width Width -- Signed int -> Signed int
755 | MO_UU_Conv Width Width -- unsigned int -> unsigned int
756 | MO_FF_Conv Width Width -- Float -> Float
757 deriving (Eq, Show)
758
759 pprMachOp :: MachOp -> SDoc
760 pprMachOp mo = text (show mo)
761
762
763
764 -- -----------------------------------------------------------------------------
765 -- Some common MachReps
766
767 -- A 'wordRep' is a machine word on the target architecture
768 -- Specifically, it is the size of an Int#, Word#, Addr#
769 -- and the unit of allocation on the stack and the heap
770 -- Any pointer is also guaranteed to be a wordRep.
771
772 mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot
773 , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
774 , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
775 , mo_wordULe, mo_wordUGt, mo_wordULt
776 , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr
777 , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
778 , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord
779 , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32
780 :: MachOp
781
782 mo_wordAdd = MO_Add wordWidth
783 mo_wordSub = MO_Sub wordWidth
784 mo_wordEq = MO_Eq wordWidth
785 mo_wordNe = MO_Ne wordWidth
786 mo_wordMul = MO_Mul wordWidth
787 mo_wordSQuot = MO_S_Quot wordWidth
788 mo_wordSRem = MO_S_Rem wordWidth
789 mo_wordSNeg = MO_S_Neg wordWidth
790 mo_wordUQuot = MO_U_Quot wordWidth
791 mo_wordURem = MO_U_Rem wordWidth
792
793 mo_wordSGe = MO_S_Ge wordWidth
794 mo_wordSLe = MO_S_Le wordWidth
795 mo_wordSGt = MO_S_Gt wordWidth
796 mo_wordSLt = MO_S_Lt wordWidth
797
798 mo_wordUGe = MO_U_Ge wordWidth
799 mo_wordULe = MO_U_Le wordWidth
800 mo_wordUGt = MO_U_Gt wordWidth
801 mo_wordULt = MO_U_Lt wordWidth
802
803 mo_wordAnd = MO_And wordWidth
804 mo_wordOr = MO_Or wordWidth
805 mo_wordXor = MO_Xor wordWidth
806 mo_wordNot = MO_Not wordWidth
807 mo_wordShl = MO_Shl wordWidth
808 mo_wordSShr = MO_S_Shr wordWidth
809 mo_wordUShr = MO_U_Shr wordWidth
810
811 mo_u_8To32 = MO_UU_Conv W8 W32
812 mo_s_8To32 = MO_SS_Conv W8 W32
813 mo_u_16To32 = MO_UU_Conv W16 W32
814 mo_s_16To32 = MO_SS_Conv W16 W32
815
816 mo_u_8ToWord = MO_UU_Conv W8 wordWidth
817 mo_s_8ToWord = MO_SS_Conv W8 wordWidth
818 mo_u_16ToWord = MO_UU_Conv W16 wordWidth
819 mo_s_16ToWord = MO_SS_Conv W16 wordWidth
820 mo_s_32ToWord = MO_SS_Conv W32 wordWidth
821 mo_u_32ToWord = MO_UU_Conv W32 wordWidth
822
823 mo_WordTo8 = MO_UU_Conv wordWidth W8
824 mo_WordTo16 = MO_UU_Conv wordWidth W16
825 mo_WordTo32 = MO_UU_Conv wordWidth W32
826
827 mo_32To8 = MO_UU_Conv W32 W8
828 mo_32To16 = MO_UU_Conv W32 W16
829
830
831 -- ----------------------------------------------------------------------------
832 -- isCommutableMachOp
833
834 {- |
835 Returns 'True' if the MachOp has commutable arguments. This is used
836 in the platform-independent Cmm optimisations.
837
838 If in doubt, return 'False'. This generates worse code on the
839 native routes, but is otherwise harmless.
840 -}
841 isCommutableMachOp :: MachOp -> Bool
842 isCommutableMachOp mop =
843 case mop of
844 MO_Add _ -> True
845 MO_Eq _ -> True
846 MO_Ne _ -> True
847 MO_Mul _ -> True
848 MO_S_MulMayOflo _ -> True
849 MO_U_MulMayOflo _ -> True
850 MO_And _ -> True
851 MO_Or _ -> True
852 MO_Xor _ -> True
853 _other -> False
854
855 -- ----------------------------------------------------------------------------
856 -- isAssociativeMachOp
857
858 {- |
859 Returns 'True' if the MachOp is associative (i.e. @(x+y)+z == x+(y+z)@)
860 This is used in the platform-independent Cmm optimisations.
861
862 If in doubt, return 'False'. This generates worse code on the
863 native routes, but is otherwise harmless.
864 -}
865 isAssociativeMachOp :: MachOp -> Bool
866 isAssociativeMachOp mop =
867 case mop of
868 MO_Add {} -> True -- NB: does not include
869 MO_Mul {} -> True -- floatint point!
870 MO_And {} -> True
871 MO_Or {} -> True
872 MO_Xor {} -> True
873 _other -> False
874
875 -- ----------------------------------------------------------------------------
876 -- isComparisonMachOp
877
878 {- |
879 Returns 'True' if the MachOp is a comparison.
880
881 If in doubt, return False. This generates worse code on the
882 native routes, but is otherwise harmless.
883 -}
884 isComparisonMachOp :: MachOp -> Bool
885 isComparisonMachOp mop =
886 case mop of
887 MO_Eq _ -> True
888 MO_Ne _ -> True
889 MO_S_Ge _ -> True
890 MO_S_Le _ -> True
891 MO_S_Gt _ -> True
892 MO_S_Lt _ -> True
893 MO_U_Ge _ -> True
894 MO_U_Le _ -> True
895 MO_U_Gt _ -> True
896 MO_U_Lt _ -> True
897 MO_F_Eq {} -> True
898 MO_F_Ne {} -> True
899 MO_F_Ge {} -> True
900 MO_F_Le {} -> True
901 MO_F_Gt {} -> True
902 MO_F_Lt {} -> True
903 _other -> False
904
905 -- -----------------------------------------------------------------------------
906 -- Inverting conditions
907
908 -- Sometimes it's useful to be able to invert the sense of a
909 -- condition. Not all conditional tests are invertible: in
910 -- particular, floating point conditionals cannot be inverted, because
911 -- there exist floating-point values which return False for both senses
912 -- of a condition (eg. !(NaN > NaN) && !(NaN /<= NaN)).
913
914 maybeInvertComparison :: MachOp -> Maybe MachOp
915 maybeInvertComparison op
916 = case op of -- None of these Just cases include floating point
917 MO_Eq r -> Just (MO_Ne r)
918 MO_Ne r -> Just (MO_Eq r)
919 MO_U_Lt r -> Just (MO_U_Ge r)
920 MO_U_Gt r -> Just (MO_U_Le r)
921 MO_U_Le r -> Just (MO_U_Gt r)
922 MO_U_Ge r -> Just (MO_U_Lt r)
923 MO_S_Lt r -> Just (MO_S_Ge r)
924 MO_S_Gt r -> Just (MO_S_Le r)
925 MO_S_Le r -> Just (MO_S_Gt r)
926 MO_S_Ge r -> Just (MO_S_Lt r)
927 MO_F_Eq r -> Just (MO_F_Ne r)
928 MO_F_Ne r -> Just (MO_F_Eq r)
929 MO_F_Ge r -> Just (MO_F_Le r)
930 MO_F_Le r -> Just (MO_F_Ge r)
931 MO_F_Gt r -> Just (MO_F_Lt r)
932 MO_F_Lt r -> Just (MO_F_Gt r)
933 _other -> Nothing
934
935 -- ----------------------------------------------------------------------------
936 -- machOpResultType
937
938 {- |
939 Returns the MachRep of the result of a MachOp.
940 -}
941 machOpResultType :: MachOp -> [CmmType] -> CmmType
942 machOpResultType mop tys =
943 case mop of
944 MO_Add {} -> ty1 -- Preserve GC-ptr-hood
945 MO_Sub {} -> ty1 -- of first arg
946 MO_Mul r -> cmmBits r
947 MO_S_MulMayOflo r -> cmmBits r
948 MO_S_Quot r -> cmmBits r
949 MO_S_Rem r -> cmmBits r
950 MO_S_Neg r -> cmmBits r
951 MO_U_MulMayOflo r -> cmmBits r
952 MO_U_Quot r -> cmmBits r
953 MO_U_Rem r -> cmmBits r
954
955 MO_Eq {} -> comparisonResultRep
956 MO_Ne {} -> comparisonResultRep
957 MO_S_Ge {} -> comparisonResultRep
958 MO_S_Le {} -> comparisonResultRep
959 MO_S_Gt {} -> comparisonResultRep
960 MO_S_Lt {} -> comparisonResultRep
961
962 MO_U_Ge {} -> comparisonResultRep
963 MO_U_Le {} -> comparisonResultRep
964 MO_U_Gt {} -> comparisonResultRep
965 MO_U_Lt {} -> comparisonResultRep
966
967 MO_F_Add r -> cmmFloat r
968 MO_F_Sub r -> cmmFloat r
969 MO_F_Mul r -> cmmFloat r
970 MO_F_Quot r -> cmmFloat r
971 MO_F_Neg r -> cmmFloat r
972 MO_F_Eq {} -> comparisonResultRep
973 MO_F_Ne {} -> comparisonResultRep
974 MO_F_Ge {} -> comparisonResultRep
975 MO_F_Le {} -> comparisonResultRep
976 MO_F_Gt {} -> comparisonResultRep
977 MO_F_Lt {} -> comparisonResultRep
978
979 MO_And {} -> ty1 -- Used for pointer masking
980 MO_Or {} -> ty1
981 MO_Xor {} -> ty1
982 MO_Not r -> cmmBits r
983 MO_Shl r -> cmmBits r
984 MO_U_Shr r -> cmmBits r
985 MO_S_Shr r -> cmmBits r
986
987 MO_SS_Conv _ to -> cmmBits to
988 MO_UU_Conv _ to -> cmmBits to
989 MO_FS_Conv _ to -> cmmBits to
990 MO_SF_Conv _ to -> cmmFloat to
991 MO_FF_Conv _ to -> cmmFloat to
992 where
993 (ty1:_) = tys
994
995 comparisonResultRep :: CmmType
996 comparisonResultRep = bWord -- is it?
997
998
999 -- -----------------------------------------------------------------------------
1000 -- machOpArgReps
1001
1002 -- | This function is used for debugging only: we can check whether an
1003 -- application of a MachOp is "type-correct" by checking that the MachReps of
1004 -- its arguments are the same as the MachOp expects. This is used when
1005 -- linting a CmmExpr.
1006
1007 machOpArgReps :: MachOp -> [Width]
1008 machOpArgReps op =
1009 case op of
1010 MO_Add r -> [r,r]
1011 MO_Sub r -> [r,r]
1012 MO_Eq r -> [r,r]
1013 MO_Ne r -> [r,r]
1014 MO_Mul r -> [r,r]
1015 MO_S_MulMayOflo r -> [r,r]
1016 MO_S_Quot r -> [r,r]
1017 MO_S_Rem r -> [r,r]
1018 MO_S_Neg r -> [r]
1019 MO_U_MulMayOflo r -> [r,r]
1020 MO_U_Quot r -> [r,r]
1021 MO_U_Rem r -> [r,r]
1022
1023 MO_S_Ge r -> [r,r]
1024 MO_S_Le r -> [r,r]
1025 MO_S_Gt r -> [r,r]
1026 MO_S_Lt r -> [r,r]
1027
1028 MO_U_Ge r -> [r,r]
1029 MO_U_Le r -> [r,r]
1030 MO_U_Gt r -> [r,r]
1031 MO_U_Lt r -> [r,r]
1032
1033 MO_F_Add r -> [r,r]
1034 MO_F_Sub r -> [r,r]
1035 MO_F_Mul r -> [r,r]
1036 MO_F_Quot r -> [r,r]
1037 MO_F_Neg r -> [r]
1038 MO_F_Eq r -> [r,r]
1039 MO_F_Ne r -> [r,r]
1040 MO_F_Ge r -> [r,r]
1041 MO_F_Le r -> [r,r]
1042 MO_F_Gt r -> [r,r]
1043 MO_F_Lt r -> [r,r]
1044
1045 MO_And r -> [r,r]
1046 MO_Or r -> [r,r]
1047 MO_Xor r -> [r,r]
1048 MO_Not r -> [r]
1049 MO_Shl r -> [r,wordWidth]
1050 MO_U_Shr r -> [r,wordWidth]
1051 MO_S_Shr r -> [r,wordWidth]
1052
1053 MO_SS_Conv from _ -> [from]
1054 MO_UU_Conv from _ -> [from]
1055 MO_SF_Conv from _ -> [from]
1056 MO_FS_Conv from _ -> [from]
1057 MO_FF_Conv from _ -> [from]
1058
1059
1060 -------------------------------------------------------------------------
1061 {- Note [Signed vs unsigned]
1062 ~~~~~~~~~~~~~~~~~~~~~~~~~
1063 Should a CmmType include a signed vs. unsigned distinction?
1064
1065 This is very much like a "hint" in C-- terminology: it isn't necessary
1066 in order to generate correct code, but it might be useful in that the
1067 compiler can generate better code if it has access to higher-level
1068 hints about data. This is important at call boundaries, because the
1069 definition of a function is not visible at all of its call sites, so
1070 the compiler cannot infer the hints.
1071
1072 Here in Cmm, we're taking a slightly different approach. We include
1073 the int vs. float hint in the MachRep, because (a) the majority of
1074 platforms have a strong distinction between float and int registers,
1075 and (b) we don't want to do any heavyweight hint-inference in the
1076 native code backend in order to get good code. We're treating the
1077 hint more like a type: our Cmm is always completely consistent with
1078 respect to hints. All coercions between float and int are explicit.
1079
1080 What about the signed vs. unsigned hint? This information might be
1081 useful if we want to keep sub-word-sized values in word-size
1082 registers, which we must do if we only have word-sized registers.
1083
1084 On such a system, there are two straightforward conventions for
1085 representing sub-word-sized values:
1086
1087 (a) Leave the upper bits undefined. Comparison operations must
1088 sign- or zero-extend both operands before comparing them,
1089 depending on whether the comparison is signed or unsigned.
1090
1091 (b) Always keep the values sign- or zero-extended as appropriate.
1092 Arithmetic operations must narrow the result to the appropriate
1093 size.
1094
1095 A clever compiler might not use either (a) or (b) exclusively, instead
1096 it would attempt to minimize the coercions by analysis: the same kind
1097 of analysis that propagates hints around. In Cmm we don't want to
1098 have to do this, so we plump for having richer types and keeping the
1099 type information consistent.
1100
1101 If signed/unsigned hints are missing from MachRep, then the only
1102 choice we have is (a), because we don't know whether the result of an
1103 operation should be sign- or zero-extended.
1104
1105 Many architectures have extending load operations, which work well
1106 with (b). To make use of them with (a), you need to know whether the
1107 value is going to be sign- or zero-extended by an enclosing comparison
1108 (for example), which involves knowing above the context. This is
1109 doable but more complex.
1110
1111 Further complicating the issue is foreign calls: a foreign calling
1112 convention can specify that signed 8-bit quantities are passed as
1113 sign-extended 32 bit quantities, for example (this is the case on the
1114 PowerPC). So we *do* need sign information on foreign call arguments.
1115
1116 Pros for adding signed vs. unsigned to MachRep:
1117
1118 - It would let us use convention (b) above, and get easier
1119 code generation for extending loads.
1120
1121 - Less information required on foreign calls.
1122
1123 - MachOp type would be simpler
1124
1125 Cons:
1126
1127 - More complexity
1128
1129 - What is the MachRep for a VanillaReg? Currently it is
1130 always wordRep, but now we have to decide whether it is
1131 signed or unsigned. The same VanillaReg can thus have
1132 different MachReps in different parts of the program.
1133
1134 - Extra coercions cluttering up expressions.
1135
1136 Currently for GHC, the foreign call point is moot, because we do our
1137 own promotion of sub-word-sized values to word-sized values. The Int8
1138 type is represnted by an Int# which is kept sign-extended at all times
1139 (this is slightly naughty, because we're making assumptions about the
1140 C calling convention rather early on in the compiler). However, given
1141 this, the cons outweigh the pros.
1142
1143 -}
1144