Big collection of patches for the new codegen branch.
[ghc.git] / compiler / cmm / CmmExpr.hs
1
2 module CmmExpr
3 ( CmmType -- Abstract
4 , b8, b16, b32, b64, f32, f64, bWord, bHalfWord, gcWord
5 , cInt, cLong
6 , cmmBits, cmmFloat
7 , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
8 , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32
9
10 , Width(..)
11 , widthInBits, widthInBytes, widthInLog, widthFromBytes
12 , wordWidth, halfWordWidth, cIntWidth, cLongWidth
13
14 , CmmExpr(..), cmmExprType, cmmExprWidth, maybeInvertCmmExpr
15 , CmmReg(..), cmmRegType
16 , CmmLit(..), cmmLitType
17 , LocalReg(..), localRegType
18 , GlobalReg(..), globalRegType, spReg, hpReg, spLimReg, nodeReg, node
19 , VGcPtr(..), vgcFlag -- Temporary!
20 , DefinerOfLocalRegs, UserOfLocalRegs, foldRegsDefd, foldRegsUsed, filterRegsUsed
21 , DefinerOfSlots, UserOfSlots, foldSlotsDefd, foldSlotsUsed
22 , RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet
23 , plusRegSet, minusRegSet, timesRegSet
24 , Area(..), AreaId(..), SubArea, SubAreaSet, AreaMap, StackSlotMap, getSlot
25
26 -- MachOp
27 , MachOp(..)
28 , pprMachOp, isCommutableMachOp, isAssociativeMachOp
29 , isComparisonMachOp, machOpResultType
30 , machOpArgReps, maybeInvertComparison
31
32 -- MachOp builders
33 , mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot
34 , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
35 , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
36 , mo_wordULe, mo_wordUGt, mo_wordULt
37 , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr
38 , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
39 , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord
40 , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32
41 )
42 where
43
44 #include "HsVersions.h"
45
46 import BlockId
47 import CLabel
48 import Constants
49 import FastString
50 import FiniteMap
51 import Maybes
52 import Monad
53 import Outputable
54 import Panic
55 import Unique
56 import UniqSet
57
58 -----------------------------------------------------------------------------
59 -- CmmExpr
60 -- An expression. Expressions have no side effects.
61 -----------------------------------------------------------------------------
62
63 data CmmExpr
64 = CmmLit CmmLit -- Literal
65 | CmmLoad CmmExpr CmmType -- Read memory location
66 | CmmReg CmmReg -- Contents of register
67 | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.)
68 | CmmStackSlot Area Int -- addressing expression of a stack slot
69 | CmmRegOff CmmReg Int
70 -- CmmRegOff reg i
71 -- ** is shorthand only, meaning **
72 -- CmmMachOp (MO_S_Add rep (CmmReg reg) (CmmLit (CmmInt i rep)))
73 -- where rep = cmmRegType reg
74
75 instance Eq CmmExpr where -- Equality ignores the types
76 CmmLit l1 == CmmLit l2 = l1==l2
77 CmmLoad e1 _ == CmmLoad e2 _ = e1==e2
78 CmmReg r1 == CmmReg r2 = r1==r2
79 CmmRegOff r1 i1 == CmmRegOff r2 i2 = r1==r2 && i1==i2
80 CmmMachOp op1 es1 == CmmMachOp op2 es2 = op1==op2 && es1==es2
81 CmmStackSlot a1 i1 == CmmStackSlot a2 i2 = a1==a2 && i1==i2
82 _e1 == _e2 = False
83
84 data CmmReg
85 = CmmLocal LocalReg
86 | CmmGlobal GlobalReg
87 deriving( Eq, Ord )
88
89 -- | A stack area is either the stack slot where a variable is spilled
90 -- or the stack space where function arguments and results are passed.
91 data Area
92 = RegSlot LocalReg
93 | CallArea AreaId
94 deriving (Eq, Ord)
95
96 data AreaId
97 = Old -- entry parameters, jumps, and returns share one call area at old end of stack
98 | Young BlockId
99 deriving (Eq, Ord)
100
101 type SubArea = (Area, Int, Int) -- area, offset, width
102 type SubAreaSet = FiniteMap Area [SubArea]
103 type AreaMap = FiniteMap Area Int
104
105 data CmmLit
106 = CmmInt Integer Width
107 -- Interpretation: the 2's complement representation of the value
108 -- is truncated to the specified size. This is easier than trying
109 -- to keep the value within range, because we don't know whether
110 -- it will be used as a signed or unsigned value (the CmmType doesn't
111 -- distinguish between signed & unsigned).
112 | CmmFloat Rational Width
113 | CmmLabel CLabel -- Address of label
114 | CmmLabelOff CLabel Int -- Address of label + byte offset
115
116 -- Due to limitations in the C backend, the following
117 -- MUST ONLY be used inside the info table indicated by label2
118 -- (label2 must be the info label), and label1 must be an
119 -- SRT, a slow entrypoint or a large bitmap (see the Mangler)
120 -- Don't use it at all unless tablesNextToCode.
121 -- It is also used inside the NCG during when generating
122 -- position-independent code.
123 | CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset
124 | CmmBlock BlockId -- Code label
125 | CmmHighStackMark -- stands for the max stack space used during a procedure
126 deriving Eq
127
128 cmmExprType :: CmmExpr -> CmmType
129 cmmExprType (CmmLit lit) = cmmLitType lit
130 cmmExprType (CmmLoad _ rep) = rep
131 cmmExprType (CmmReg reg) = cmmRegType reg
132 cmmExprType (CmmMachOp op args) = machOpResultType op (map cmmExprType args)
133 cmmExprType (CmmRegOff reg _) = cmmRegType reg
134 cmmExprType (CmmStackSlot _ _) = bWord -- an address
135
136 cmmLitType :: CmmLit -> CmmType
137 cmmLitType (CmmInt _ width) = cmmBits width
138 cmmLitType (CmmFloat _ width) = cmmFloat width
139 cmmLitType (CmmLabel lbl) = cmmLabelType lbl
140 cmmLitType (CmmLabelOff lbl _) = cmmLabelType lbl
141 cmmLitType (CmmLabelDiffOff {}) = bWord
142 cmmLitType (CmmBlock _) = bWord
143 cmmLitType (CmmHighStackMark) = bWord
144
145 cmmLabelType :: CLabel -> CmmType
146 cmmLabelType lbl | isGcPtrLabel lbl = gcWord
147 | otherwise = bWord
148
149 cmmExprWidth :: CmmExpr -> Width
150 cmmExprWidth e = typeWidth (cmmExprType e)
151
152 --------
153 --- Negation for conditional branches
154
155 maybeInvertCmmExpr :: CmmExpr -> Maybe CmmExpr
156 maybeInvertCmmExpr (CmmMachOp op args) = do op' <- maybeInvertComparison op
157 return (CmmMachOp op' args)
158 maybeInvertCmmExpr _ = Nothing
159
160 -----------------------------------------------------------------------------
161 -- Local registers
162 -----------------------------------------------------------------------------
163
164 data LocalReg
165 = LocalReg !Unique CmmType
166 -- ^ Parameters:
167 -- 1. Identifier
168 -- 2. Type
169
170 instance Eq LocalReg where
171 (LocalReg u1 _) == (LocalReg u2 _) = u1 == u2
172
173 instance Ord LocalReg where
174 compare (LocalReg u1 _) (LocalReg u2 _) = compare u1 u2
175
176 instance Uniquable LocalReg where
177 getUnique (LocalReg uniq _) = uniq
178
179 cmmRegType :: CmmReg -> CmmType
180 cmmRegType (CmmLocal reg) = localRegType reg
181 cmmRegType (CmmGlobal reg) = globalRegType reg
182
183 localRegType :: LocalReg -> CmmType
184 localRegType (LocalReg _ rep) = rep
185
186 -----------------------------------------------------------------------------
187 -- Register-use information for expressions and other types
188 -----------------------------------------------------------------------------
189
190 -- | Sets of local registers
191 type RegSet = UniqSet LocalReg
192 emptyRegSet :: RegSet
193 elemRegSet :: LocalReg -> RegSet -> Bool
194 extendRegSet :: RegSet -> LocalReg -> RegSet
195 deleteFromRegSet :: RegSet -> LocalReg -> RegSet
196 mkRegSet :: [LocalReg] -> RegSet
197 minusRegSet, plusRegSet, timesRegSet :: RegSet -> RegSet -> RegSet
198
199 emptyRegSet = emptyUniqSet
200 elemRegSet = elementOfUniqSet
201 extendRegSet = addOneToUniqSet
202 deleteFromRegSet = delOneFromUniqSet
203 mkRegSet = mkUniqSet
204 minusRegSet = minusUniqSet
205 plusRegSet = unionUniqSets
206 timesRegSet = intersectUniqSets
207
208 class UserOfLocalRegs a where
209 foldRegsUsed :: (b -> LocalReg -> b) -> b -> a -> b
210
211 class DefinerOfLocalRegs a where
212 foldRegsDefd :: (b -> LocalReg -> b) -> b -> a -> b
213
214 filterRegsUsed :: UserOfLocalRegs e => (LocalReg -> Bool) -> e -> RegSet
215 filterRegsUsed p e =
216 foldRegsUsed (\regs r -> if p r then extendRegSet regs r else regs)
217 emptyRegSet e
218
219 instance UserOfLocalRegs CmmReg where
220 foldRegsUsed f z (CmmLocal reg) = f z reg
221 foldRegsUsed _ z (CmmGlobal _) = z
222
223 instance DefinerOfLocalRegs CmmReg where
224 foldRegsDefd f z (CmmLocal reg) = f z reg
225 foldRegsDefd _ z (CmmGlobal _) = z
226
227 instance UserOfLocalRegs LocalReg where
228 foldRegsUsed f z r = f z r
229
230 instance DefinerOfLocalRegs LocalReg where
231 foldRegsDefd f z r = f z r
232
233 instance UserOfLocalRegs RegSet where
234 foldRegsUsed f = foldUniqSet (flip f)
235
236 instance UserOfLocalRegs CmmExpr where
237 foldRegsUsed f z e = expr z e
238 where expr z (CmmLit _) = z
239 expr z (CmmLoad addr _) = foldRegsUsed f z addr
240 expr z (CmmReg r) = foldRegsUsed f z r
241 expr z (CmmMachOp _ exprs) = foldRegsUsed f z exprs
242 expr z (CmmRegOff r _) = foldRegsUsed f z r
243 expr z (CmmStackSlot _ _) = z
244
245 instance UserOfLocalRegs a => UserOfLocalRegs [a] where
246 foldRegsUsed _ set [] = set
247 foldRegsUsed f set (x:xs) = foldRegsUsed f (foldRegsUsed f set x) xs
248
249 instance DefinerOfLocalRegs a => DefinerOfLocalRegs [a] where
250 foldRegsDefd _ set [] = set
251 foldRegsDefd f set (x:xs) = foldRegsDefd f (foldRegsDefd f set x) xs
252
253 instance DefinerOfLocalRegs a => DefinerOfLocalRegs (Maybe a) where
254 foldRegsDefd _ set Nothing = set
255 foldRegsDefd f set (Just x) = foldRegsDefd f set x
256
257
258 -----------------------------------------------------------------------------
259 -- Stack slots
260 -----------------------------------------------------------------------------
261
262 mkVarSlot :: LocalReg -> CmmExpr
263 mkVarSlot r = CmmStackSlot (RegSlot r) 0
264
265 -- Usually, we either want to lookup a variable's spill slot in an environment
266 -- or else allocate it and add it to the environment.
267 -- For a variable, we just need a single area of the appropriate size.
268 type StackSlotMap = FiniteMap LocalReg CmmExpr
269 getSlot :: StackSlotMap -> LocalReg -> (StackSlotMap, CmmExpr)
270 getSlot map r = case lookupFM map r of
271 Just s -> (map, s)
272 Nothing -> (addToFM map r s, s) where s = mkVarSlot r
273
274 -----------------------------------------------------------------------------
275 -- Stack slot use information for expressions and other types [_$_]
276 -----------------------------------------------------------------------------
277
278
279 -- Fold over the area, the offset into the area, and the width of the subarea.
280 class UserOfSlots a where
281 foldSlotsUsed :: (b -> SubArea -> b) -> b -> a -> b
282
283 class DefinerOfSlots a where
284 foldSlotsDefd :: (b -> SubArea -> b) -> b -> a -> b
285
286 instance UserOfSlots CmmExpr where
287 foldSlotsUsed f z e = expr z e
288 where expr z (CmmLit _) = z
289 expr z (CmmLoad (CmmStackSlot a i) ty) = f z (a, i, widthInBytes $ typeWidth ty)
290 expr z (CmmLoad addr _) = foldSlotsUsed f z addr
291 expr z (CmmReg _) = z
292 expr z (CmmMachOp _ exprs) = foldSlotsUsed f z exprs
293 expr z (CmmRegOff _ _) = z
294 expr z (CmmStackSlot _ _) = z
295
296 instance UserOfSlots a => UserOfSlots [a] where
297 foldSlotsUsed _ set [] = set
298 foldSlotsUsed f set (x:xs) = foldSlotsUsed f (foldSlotsUsed f set x) xs
299
300
301 -----------------------------------------------------------------------------
302 -- Global STG registers
303 -----------------------------------------------------------------------------
304
305 data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show )
306 -- TEMPORARY!!!
307
308 -----------------------------------------------------------------------------
309 -- Global STG registers
310 -----------------------------------------------------------------------------
311 vgcFlag :: CmmType -> VGcPtr
312 vgcFlag ty | isGcPtrType ty = VGcPtr
313 | otherwise = VNonGcPtr
314
315 data GlobalReg
316 -- Argument and return registers
317 = VanillaReg -- pointers, unboxed ints and chars
318 {-# UNPACK #-} !Int -- its number
319 VGcPtr
320
321 | FloatReg -- single-precision floating-point registers
322 {-# UNPACK #-} !Int -- its number
323
324 | DoubleReg -- double-precision floating-point registers
325 {-# UNPACK #-} !Int -- its number
326
327 | LongReg -- long int registers (64-bit, really)
328 {-# UNPACK #-} !Int -- its number
329
330 -- STG registers
331 | Sp -- Stack ptr; points to last occupied stack location.
332 | SpLim -- Stack limit
333 | Hp -- Heap ptr; points to last occupied heap location.
334 | HpLim -- Heap limit register
335 | CurrentTSO -- pointer to current thread's TSO
336 | CurrentNursery -- pointer to allocation area
337 | HpAlloc -- allocation count for heap check failure
338
339 -- We keep the address of some commonly-called
340 -- functions in the register table, to keep code
341 -- size down:
342 | EagerBlackholeInfo -- stg_EAGER_BLACKHOLE_info
343 | GCEnter1 -- stg_gc_enter_1
344 | GCFun -- stg_gc_fun
345
346 -- Base offset for the register table, used for accessing registers
347 -- which do not have real registers assigned to them. This register
348 -- will only appear after we have expanded GlobalReg into memory accesses
349 -- (where necessary) in the native code generator.
350 | BaseReg
351
352 -- Base Register for PIC (position-independent code) calculations
353 -- Only used inside the native code generator. It's exact meaning differs
354 -- from platform to platform (see module PositionIndependentCode).
355 | PicBaseReg
356
357 deriving( Show )
358
359 instance Eq GlobalReg where
360 VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes
361 FloatReg i == FloatReg j = i==j
362 DoubleReg i == DoubleReg j = i==j
363 LongReg i == LongReg j = i==j
364 Sp == Sp = True
365 SpLim == SpLim = True
366 Hp == Hp = True
367 HpLim == HpLim = True
368 CurrentTSO == CurrentTSO = True
369 CurrentNursery == CurrentNursery = True
370 HpAlloc == HpAlloc = True
371 GCEnter1 == GCEnter1 = True
372 GCFun == GCFun = True
373 BaseReg == BaseReg = True
374 PicBaseReg == PicBaseReg = True
375 _r1 == _r2 = False
376
377 instance Ord GlobalReg where
378 compare (VanillaReg i _) (VanillaReg j _) = compare i j
379 -- Ignore type when seeking clashes
380 compare (FloatReg i) (FloatReg j) = compare i j
381 compare (DoubleReg i) (DoubleReg j) = compare i j
382 compare (LongReg i) (LongReg j) = compare i j
383 compare Sp Sp = EQ
384 compare SpLim SpLim = EQ
385 compare Hp Hp = EQ
386 compare HpLim HpLim = EQ
387 compare CurrentTSO CurrentTSO = EQ
388 compare CurrentNursery CurrentNursery = EQ
389 compare HpAlloc HpAlloc = EQ
390 compare GCEnter1 GCEnter1 = EQ
391 compare GCFun GCFun = EQ
392 compare BaseReg BaseReg = EQ
393 compare PicBaseReg PicBaseReg = EQ
394 compare (VanillaReg _ _) _ = LT
395 compare _ (VanillaReg _ _) = GT
396 compare (FloatReg _) _ = LT
397 compare _ (FloatReg _) = GT
398 compare (DoubleReg _) _ = LT
399 compare _ (DoubleReg _) = GT
400 compare (LongReg _) _ = LT
401 compare _ (LongReg _) = GT
402 compare Sp _ = LT
403 compare _ Sp = GT
404 compare SpLim _ = LT
405 compare _ SpLim = GT
406 compare Hp _ = LT
407 compare _ Hp = GT
408 compare HpLim _ = LT
409 compare _ HpLim = GT
410 compare CurrentTSO _ = LT
411 compare _ CurrentTSO = GT
412 compare CurrentNursery _ = LT
413 compare _ CurrentNursery = GT
414 compare HpAlloc _ = LT
415 compare _ HpAlloc = GT
416 compare GCEnter1 _ = LT
417 compare _ GCEnter1 = GT
418 compare GCFun _ = LT
419 compare _ GCFun = GT
420 compare BaseReg _ = LT
421 compare _ BaseReg = GT
422
423 -- convenient aliases
424 spReg, hpReg, spLimReg, nodeReg :: CmmReg
425 spReg = CmmGlobal Sp
426 hpReg = CmmGlobal Hp
427 spLimReg = CmmGlobal SpLim
428 nodeReg = CmmGlobal node
429
430 node :: GlobalReg
431 node = VanillaReg 1 VGcPtr
432
433 globalRegType :: GlobalReg -> CmmType
434 globalRegType (VanillaReg _ VGcPtr) = gcWord
435 globalRegType (VanillaReg _ VNonGcPtr) = bWord
436 globalRegType (FloatReg _) = cmmFloat W32
437 globalRegType (DoubleReg _) = cmmFloat W64
438 globalRegType (LongReg _) = cmmBits W64
439 globalRegType Hp = gcWord -- The initialiser for all
440 -- dynamically allocated closures
441 globalRegType _ = bWord
442
443
444 -----------------------------------------------------------------------------
445 -- CmmType
446 -----------------------------------------------------------------------------
447
448 -- NOTE: CmmType is an abstract type, not exported from this
449 -- module so you can easily change its representation
450 --
451 -- However Width is exported in a concrete way,
452 -- and is used extensively in pattern-matching
453
454 data CmmType -- The important one!
455 = CmmType CmmCat Width
456
457 data CmmCat -- "Category" (not exported)
458 = GcPtrCat -- GC pointer
459 | BitsCat -- Non-pointer
460 | FloatCat -- Float
461 deriving( Eq )
462 -- See Note [Signed vs unsigned] at the end
463
464 instance Outputable CmmType where
465 ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
466
467 instance Outputable CmmCat where
468 ppr FloatCat = ptext $ sLit("F")
469 ppr _ = ptext $ sLit("I")
470 -- Temp Jan 08
471 -- ppr FloatCat = ptext $ sLit("float")
472 -- ppr BitsCat = ptext $ sLit("bits")
473 -- ppr GcPtrCat = ptext $ sLit("gcptr")
474
475 -- Why is CmmType stratified? For native code generation,
476 -- most of the time you just want to know what sort of register
477 -- to put the thing in, and for this you need to know how
478 -- many bits thing has and whether it goes in a floating-point
479 -- register. By contrast, the distinction between GcPtr and
480 -- GcNonPtr is of interest to only a few parts of the code generator.
481
482 -------- Equality on CmmType --------------
483 -- CmmType is *not* an instance of Eq; sometimes we care about the
484 -- Gc/NonGc distinction, and sometimes we don't
485 -- So we use an explicit function to force you to think about it
486 cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
487 cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
488
489 cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
490 -- This equality is temporary; used in CmmLint
491 -- but the RTS files are not yet well-typed wrt pointers
492 cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
493 = c1 `weak_eq` c2 && w1==w2
494 where
495 FloatCat `weak_eq` FloatCat = True
496 FloatCat `weak_eq` _other = False
497 _other `weak_eq` FloatCat = False
498 _word1 `weak_eq` _word2 = True -- Ignores GcPtr
499
500 --- Simple operations on CmmType -----
501 typeWidth :: CmmType -> Width
502 typeWidth (CmmType _ w) = w
503
504 cmmBits, cmmFloat :: Width -> CmmType
505 cmmBits = CmmType BitsCat
506 cmmFloat = CmmType FloatCat
507
508 -------- Common CmmTypes ------------
509 -- Floats and words of specific widths
510 b8, b16, b32, b64, f32, f64 :: CmmType
511 b8 = cmmBits W8
512 b16 = cmmBits W16
513 b32 = cmmBits W32
514 b64 = cmmBits W64
515 f32 = cmmFloat W32
516 f64 = cmmFloat W64
517
518 -- CmmTypes of native word widths
519 bWord, bHalfWord, gcWord :: CmmType
520 bWord = cmmBits wordWidth
521 bHalfWord = cmmBits halfWordWidth
522 gcWord = CmmType GcPtrCat wordWidth
523
524 cInt, cLong :: CmmType
525 cInt = cmmBits cIntWidth
526 cLong = cmmBits cLongWidth
527
528
529 ------------ Predicates ----------------
530 isFloatType, isGcPtrType :: CmmType -> Bool
531 isFloatType (CmmType FloatCat _) = True
532 isFloatType _other = False
533
534 isGcPtrType (CmmType GcPtrCat _) = True
535 isGcPtrType _other = False
536
537 isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
538 -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
539 -- isFloat32 and 64 are obvious
540
541 isWord64 (CmmType BitsCat W64) = True
542 isWord64 (CmmType GcPtrCat W64) = True
543 isWord64 _other = False
544
545 isWord32 (CmmType BitsCat W32) = True
546 isWord32 (CmmType GcPtrCat W32) = True
547 isWord32 _other = False
548
549 isFloat32 (CmmType FloatCat W32) = True
550 isFloat32 _other = False
551
552 isFloat64 (CmmType FloatCat W64) = True
553 isFloat64 _other = False
554
555 -----------------------------------------------------------------------------
556 -- Width
557 -----------------------------------------------------------------------------
558
559 data Width = W8 | W16 | W32 | W64
560 | W80 -- Extended double-precision float,
561 -- used in x86 native codegen only.
562 -- (we use Ord, so it'd better be in this order)
563 | W128
564 deriving (Eq, Ord, Show)
565
566 instance Outputable Width where
567 ppr rep = ptext (mrStr rep)
568
569 mrStr :: Width -> LitString
570 mrStr W8 = sLit("W8")
571 mrStr W16 = sLit("W16")
572 mrStr W32 = sLit("W32")
573 mrStr W64 = sLit("W64")
574 mrStr W128 = sLit("W128")
575 mrStr W80 = sLit("W80")
576
577
578 -------- Common Widths ------------
579 wordWidth, halfWordWidth :: Width
580 wordWidth | wORD_SIZE == 4 = W32
581 | wORD_SIZE == 8 = W64
582 | otherwise = panic "MachOp.wordRep: Unknown word size"
583
584 halfWordWidth | wORD_SIZE == 4 = W16
585 | wORD_SIZE == 8 = W32
586 | otherwise = panic "MachOp.halfWordRep: Unknown word size"
587
588 -- cIntRep is the Width for a C-language 'int'
589 cIntWidth, cLongWidth :: Width
590 #if SIZEOF_INT == 4
591 cIntWidth = W32
592 #elif SIZEOF_INT == 8
593 cIntWidth = W64
594 #endif
595
596 #if SIZEOF_LONG == 4
597 cLongWidth = W32
598 #elif SIZEOF_LONG == 8
599 cLongWidth = W64
600 #endif
601
602 widthInBits :: Width -> Int
603 widthInBits W8 = 8
604 widthInBits W16 = 16
605 widthInBits W32 = 32
606 widthInBits W64 = 64
607 widthInBits W128 = 128
608 widthInBits W80 = 80
609
610 widthInBytes :: Width -> Int
611 widthInBytes W8 = 1
612 widthInBytes W16 = 2
613 widthInBytes W32 = 4
614 widthInBytes W64 = 8
615 widthInBytes W128 = 16
616 widthInBytes W80 = 10
617
618 widthFromBytes :: Int -> Width
619 widthFromBytes 1 = W8
620 widthFromBytes 2 = W16
621 widthFromBytes 4 = W32
622 widthFromBytes 8 = W64
623 widthFromBytes 16 = W128
624 widthFromBytes 10 = W80
625 widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
626
627 -- log_2 of the width in bytes, useful for generating shifts.
628 widthInLog :: Width -> Int
629 widthInLog W8 = 0
630 widthInLog W16 = 1
631 widthInLog W32 = 2
632 widthInLog W64 = 3
633 widthInLog W128 = 4
634 widthInLog W80 = panic "widthInLog: F80"
635
636
637 -----------------------------------------------------------------------------
638 -- MachOp
639 -----------------------------------------------------------------------------
640
641 {-
642 Implementation notes:
643
644 It might suffice to keep just a width, without distinguishing between
645 floating and integer types. However, keeping the distinction will
646 help the native code generator to assign registers more easily.
647 -}
648
649
650 {- |
651 Machine-level primops; ones which we can reasonably delegate to the
652 native code generators to handle. Basically contains C's primops
653 and no others.
654
655 Nomenclature: all ops indicate width and signedness, where
656 appropriate. Widths: 8\/16\/32\/64 means the given size, obviously.
657 Nat means the operation works on STG word sized objects.
658 Signedness: S means signed, U means unsigned. For operations where
659 signedness is irrelevant or makes no difference (for example
660 integer add), the signedness component is omitted.
661
662 An exception: NatP is a ptr-typed native word. From the point of
663 view of the native code generators this distinction is irrelevant,
664 but the C code generator sometimes needs this info to emit the
665 right casts.
666 -}
667
668 data MachOp
669 -- Integer operations (insensitive to signed/unsigned)
670 = MO_Add Width
671 | MO_Sub Width
672 | MO_Eq Width
673 | MO_Ne Width
674 | MO_Mul Width -- low word of multiply
675
676 -- Signed multiply/divide
677 | MO_S_MulMayOflo Width -- nonzero if signed multiply overflows
678 | MO_S_Quot Width -- signed / (same semantics as IntQuotOp)
679 | MO_S_Rem Width -- signed % (same semantics as IntRemOp)
680 | MO_S_Neg Width -- unary -
681
682 -- Unsigned multiply/divide
683 | MO_U_MulMayOflo Width -- nonzero if unsigned multiply overflows
684 | MO_U_Quot Width -- unsigned / (same semantics as WordQuotOp)
685 | MO_U_Rem Width -- unsigned % (same semantics as WordRemOp)
686
687 -- Signed comparisons
688 | MO_S_Ge Width
689 | MO_S_Le Width
690 | MO_S_Gt Width
691 | MO_S_Lt Width
692
693 -- Unsigned comparisons
694 | MO_U_Ge Width
695 | MO_U_Le Width
696 | MO_U_Gt Width
697 | MO_U_Lt Width
698
699 -- Floating point arithmetic
700 | MO_F_Add Width
701 | MO_F_Sub Width
702 | MO_F_Neg Width -- unary -
703 | MO_F_Mul Width
704 | MO_F_Quot Width
705
706 -- Floating point comparison
707 | MO_F_Eq Width
708 | MO_F_Ne Width
709 | MO_F_Ge Width
710 | MO_F_Le Width
711 | MO_F_Gt Width
712 | MO_F_Lt Width
713
714 -- Bitwise operations. Not all of these may be supported
715 -- at all sizes, and only integral Widths are valid.
716 | MO_And Width
717 | MO_Or Width
718 | MO_Xor Width
719 | MO_Not Width
720 | MO_Shl Width
721 | MO_U_Shr Width -- unsigned shift right
722 | MO_S_Shr Width -- signed shift right
723
724 -- Conversions. Some of these will be NOPs.
725 -- Floating-point conversions use the signed variant.
726 | MO_SF_Conv Width Width -- Signed int -> Float
727 | MO_FS_Conv Width Width -- Float -> Signed int
728 | MO_SS_Conv Width Width -- Signed int -> Signed int
729 | MO_UU_Conv Width Width -- unsigned int -> unsigned int
730 | MO_FF_Conv Width Width -- Float -> Float
731 deriving (Eq, Show)
732
733 pprMachOp :: MachOp -> SDoc
734 pprMachOp mo = text (show mo)
735
736
737
738 -- -----------------------------------------------------------------------------
739 -- Some common MachReps
740
741 -- A 'wordRep' is a machine word on the target architecture
742 -- Specifically, it is the size of an Int#, Word#, Addr#
743 -- and the unit of allocation on the stack and the heap
744 -- Any pointer is also guaranteed to be a wordRep.
745
746 mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot
747 , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
748 , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
749 , mo_wordULe, mo_wordUGt, mo_wordULt
750 , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr
751 , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
752 , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord
753 , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32
754 :: MachOp
755
756 mo_wordAdd = MO_Add wordWidth
757 mo_wordSub = MO_Sub wordWidth
758 mo_wordEq = MO_Eq wordWidth
759 mo_wordNe = MO_Ne wordWidth
760 mo_wordMul = MO_Mul wordWidth
761 mo_wordSQuot = MO_S_Quot wordWidth
762 mo_wordSRem = MO_S_Rem wordWidth
763 mo_wordSNeg = MO_S_Neg wordWidth
764 mo_wordUQuot = MO_U_Quot wordWidth
765 mo_wordURem = MO_U_Rem wordWidth
766
767 mo_wordSGe = MO_S_Ge wordWidth
768 mo_wordSLe = MO_S_Le wordWidth
769 mo_wordSGt = MO_S_Gt wordWidth
770 mo_wordSLt = MO_S_Lt wordWidth
771
772 mo_wordUGe = MO_U_Ge wordWidth
773 mo_wordULe = MO_U_Le wordWidth
774 mo_wordUGt = MO_U_Gt wordWidth
775 mo_wordULt = MO_U_Lt wordWidth
776
777 mo_wordAnd = MO_And wordWidth
778 mo_wordOr = MO_Or wordWidth
779 mo_wordXor = MO_Xor wordWidth
780 mo_wordNot = MO_Not wordWidth
781 mo_wordShl = MO_Shl wordWidth
782 mo_wordSShr = MO_S_Shr wordWidth
783 mo_wordUShr = MO_U_Shr wordWidth
784
785 mo_u_8To32 = MO_UU_Conv W8 W32
786 mo_s_8To32 = MO_SS_Conv W8 W32
787 mo_u_16To32 = MO_UU_Conv W16 W32
788 mo_s_16To32 = MO_SS_Conv W16 W32
789
790 mo_u_8ToWord = MO_UU_Conv W8 wordWidth
791 mo_s_8ToWord = MO_SS_Conv W8 wordWidth
792 mo_u_16ToWord = MO_UU_Conv W16 wordWidth
793 mo_s_16ToWord = MO_SS_Conv W16 wordWidth
794 mo_s_32ToWord = MO_SS_Conv W32 wordWidth
795 mo_u_32ToWord = MO_UU_Conv W32 wordWidth
796
797 mo_WordTo8 = MO_UU_Conv wordWidth W8
798 mo_WordTo16 = MO_UU_Conv wordWidth W16
799 mo_WordTo32 = MO_UU_Conv wordWidth W32
800
801 mo_32To8 = MO_UU_Conv W32 W8
802 mo_32To16 = MO_UU_Conv W32 W16
803
804
805 -- ----------------------------------------------------------------------------
806 -- isCommutableMachOp
807
808 {- |
809 Returns 'True' if the MachOp has commutable arguments. This is used
810 in the platform-independent Cmm optimisations.
811
812 If in doubt, return 'False'. This generates worse code on the
813 native routes, but is otherwise harmless.
814 -}
815 isCommutableMachOp :: MachOp -> Bool
816 isCommutableMachOp mop =
817 case mop of
818 MO_Add _ -> True
819 MO_Eq _ -> True
820 MO_Ne _ -> True
821 MO_Mul _ -> True
822 MO_S_MulMayOflo _ -> True
823 MO_U_MulMayOflo _ -> True
824 MO_And _ -> True
825 MO_Or _ -> True
826 MO_Xor _ -> True
827 _other -> False
828
829 -- ----------------------------------------------------------------------------
830 -- isAssociativeMachOp
831
832 {- |
833 Returns 'True' if the MachOp is associative (i.e. @(x+y)+z == x+(y+z)@)
834 This is used in the platform-independent Cmm optimisations.
835
836 If in doubt, return 'False'. This generates worse code on the
837 native routes, but is otherwise harmless.
838 -}
839 isAssociativeMachOp :: MachOp -> Bool
840 isAssociativeMachOp mop =
841 case mop of
842 MO_Add {} -> True -- NB: does not include
843 MO_Mul {} -> True -- floatint point!
844 MO_And {} -> True
845 MO_Or {} -> True
846 MO_Xor {} -> True
847 _other -> False
848
849 -- ----------------------------------------------------------------------------
850 -- isComparisonMachOp
851
852 {- |
853 Returns 'True' if the MachOp is a comparison.
854
855 If in doubt, return False. This generates worse code on the
856 native routes, but is otherwise harmless.
857 -}
858 isComparisonMachOp :: MachOp -> Bool
859 isComparisonMachOp mop =
860 case mop of
861 MO_Eq _ -> True
862 MO_Ne _ -> True
863 MO_S_Ge _ -> True
864 MO_S_Le _ -> True
865 MO_S_Gt _ -> True
866 MO_S_Lt _ -> True
867 MO_U_Ge _ -> True
868 MO_U_Le _ -> True
869 MO_U_Gt _ -> True
870 MO_U_Lt _ -> True
871 MO_F_Eq {} -> True
872 MO_F_Ne {} -> True
873 MO_F_Ge {} -> True
874 MO_F_Le {} -> True
875 MO_F_Gt {} -> True
876 MO_F_Lt {} -> True
877 _other -> False
878
879 -- -----------------------------------------------------------------------------
880 -- Inverting conditions
881
882 -- Sometimes it's useful to be able to invert the sense of a
883 -- condition. Not all conditional tests are invertible: in
884 -- particular, floating point conditionals cannot be inverted, because
885 -- there exist floating-point values which return False for both senses
886 -- of a condition (eg. !(NaN > NaN) && !(NaN /<= NaN)).
887
888 maybeInvertComparison :: MachOp -> Maybe MachOp
889 maybeInvertComparison op
890 = case op of -- None of these Just cases include floating point
891 MO_Eq r -> Just (MO_Ne r)
892 MO_Ne r -> Just (MO_Eq r)
893 MO_U_Lt r -> Just (MO_U_Ge r)
894 MO_U_Gt r -> Just (MO_U_Le r)
895 MO_U_Le r -> Just (MO_U_Gt r)
896 MO_U_Ge r -> Just (MO_U_Lt r)
897 MO_S_Lt r -> Just (MO_S_Ge r)
898 MO_S_Gt r -> Just (MO_S_Le r)
899 MO_S_Le r -> Just (MO_S_Gt r)
900 MO_S_Ge r -> Just (MO_S_Lt r)
901 MO_F_Eq r -> Just (MO_F_Ne r)
902 MO_F_Ne r -> Just (MO_F_Eq r)
903 MO_F_Ge r -> Just (MO_F_Le r)
904 MO_F_Le r -> Just (MO_F_Ge r)
905 MO_F_Gt r -> Just (MO_F_Lt r)
906 MO_F_Lt r -> Just (MO_F_Gt r)
907 _other -> Nothing
908
909 -- ----------------------------------------------------------------------------
910 -- machOpResultType
911
912 {- |
913 Returns the MachRep of the result of a MachOp.
914 -}
915 machOpResultType :: MachOp -> [CmmType] -> CmmType
916 machOpResultType mop tys =
917 case mop of
918 MO_Add {} -> ty1 -- Preserve GC-ptr-hood
919 MO_Sub {} -> ty1 -- of first arg
920 MO_Mul r -> cmmBits r
921 MO_S_MulMayOflo r -> cmmBits r
922 MO_S_Quot r -> cmmBits r
923 MO_S_Rem r -> cmmBits r
924 MO_S_Neg r -> cmmBits r
925 MO_U_MulMayOflo r -> cmmBits r
926 MO_U_Quot r -> cmmBits r
927 MO_U_Rem r -> cmmBits r
928
929 MO_Eq {} -> comparisonResultRep
930 MO_Ne {} -> comparisonResultRep
931 MO_S_Ge {} -> comparisonResultRep
932 MO_S_Le {} -> comparisonResultRep
933 MO_S_Gt {} -> comparisonResultRep
934 MO_S_Lt {} -> comparisonResultRep
935
936 MO_U_Ge {} -> comparisonResultRep
937 MO_U_Le {} -> comparisonResultRep
938 MO_U_Gt {} -> comparisonResultRep
939 MO_U_Lt {} -> comparisonResultRep
940
941 MO_F_Add r -> cmmFloat r
942 MO_F_Sub r -> cmmFloat r
943 MO_F_Mul r -> cmmFloat r
944 MO_F_Quot r -> cmmFloat r
945 MO_F_Neg r -> cmmFloat r
946 MO_F_Eq {} -> comparisonResultRep
947 MO_F_Ne {} -> comparisonResultRep
948 MO_F_Ge {} -> comparisonResultRep
949 MO_F_Le {} -> comparisonResultRep
950 MO_F_Gt {} -> comparisonResultRep
951 MO_F_Lt {} -> comparisonResultRep
952
953 MO_And {} -> ty1 -- Used for pointer masking
954 MO_Or {} -> ty1
955 MO_Xor {} -> ty1
956 MO_Not r -> cmmBits r
957 MO_Shl r -> cmmBits r
958 MO_U_Shr r -> cmmBits r
959 MO_S_Shr r -> cmmBits r
960
961 MO_SS_Conv _ to -> cmmBits to
962 MO_UU_Conv _ to -> cmmBits to
963 MO_FS_Conv _ to -> cmmBits to
964 MO_SF_Conv _ to -> cmmFloat to
965 MO_FF_Conv _ to -> cmmFloat to
966 where
967 (ty1:_) = tys
968
969 comparisonResultRep :: CmmType
970 comparisonResultRep = bWord -- is it?
971
972
973 -- -----------------------------------------------------------------------------
974 -- machOpArgReps
975
976 -- | This function is used for debugging only: we can check whether an
977 -- application of a MachOp is "type-correct" by checking that the MachReps of
978 -- its arguments are the same as the MachOp expects. This is used when
979 -- linting a CmmExpr.
980
981 machOpArgReps :: MachOp -> [Width]
982 machOpArgReps op =
983 case op of
984 MO_Add r -> [r,r]
985 MO_Sub r -> [r,r]
986 MO_Eq r -> [r,r]
987 MO_Ne r -> [r,r]
988 MO_Mul r -> [r,r]
989 MO_S_MulMayOflo r -> [r,r]
990 MO_S_Quot r -> [r,r]
991 MO_S_Rem r -> [r,r]
992 MO_S_Neg r -> [r]
993 MO_U_MulMayOflo r -> [r,r]
994 MO_U_Quot r -> [r,r]
995 MO_U_Rem r -> [r,r]
996
997 MO_S_Ge r -> [r,r]
998 MO_S_Le r -> [r,r]
999 MO_S_Gt r -> [r,r]
1000 MO_S_Lt r -> [r,r]
1001
1002 MO_U_Ge r -> [r,r]
1003 MO_U_Le r -> [r,r]
1004 MO_U_Gt r -> [r,r]
1005 MO_U_Lt r -> [r,r]
1006
1007 MO_F_Add r -> [r,r]
1008 MO_F_Sub r -> [r,r]
1009 MO_F_Mul r -> [r,r]
1010 MO_F_Quot r -> [r,r]
1011 MO_F_Neg r -> [r]
1012 MO_F_Eq r -> [r,r]
1013 MO_F_Ne r -> [r,r]
1014 MO_F_Ge r -> [r,r]
1015 MO_F_Le r -> [r,r]
1016 MO_F_Gt r -> [r,r]
1017 MO_F_Lt r -> [r,r]
1018
1019 MO_And r -> [r,r]
1020 MO_Or r -> [r,r]
1021 MO_Xor r -> [r,r]
1022 MO_Not r -> [r]
1023 MO_Shl r -> [r,wordWidth]
1024 MO_U_Shr r -> [r,wordWidth]
1025 MO_S_Shr r -> [r,wordWidth]
1026
1027 MO_SS_Conv from _ -> [from]
1028 MO_UU_Conv from _ -> [from]
1029 MO_SF_Conv from _ -> [from]
1030 MO_FS_Conv from _ -> [from]
1031 MO_FF_Conv from _ -> [from]
1032
1033
1034 -------------------------------------------------------------------------
1035 {- Note [Signed vs unsigned]
1036 ~~~~~~~~~~~~~~~~~~~~~~~~~
1037 Should a CmmType include a signed vs. unsigned distinction?
1038
1039 This is very much like a "hint" in C-- terminology: it isn't necessary
1040 in order to generate correct code, but it might be useful in that the
1041 compiler can generate better code if it has access to higher-level
1042 hints about data. This is important at call boundaries, because the
1043 definition of a function is not visible at all of its call sites, so
1044 the compiler cannot infer the hints.
1045
1046 Here in Cmm, we're taking a slightly different approach. We include
1047 the int vs. float hint in the MachRep, because (a) the majority of
1048 platforms have a strong distinction between float and int registers,
1049 and (b) we don't want to do any heavyweight hint-inference in the
1050 native code backend in order to get good code. We're treating the
1051 hint more like a type: our Cmm is always completely consistent with
1052 respect to hints. All coercions between float and int are explicit.
1053
1054 What about the signed vs. unsigned hint? This information might be
1055 useful if we want to keep sub-word-sized values in word-size
1056 registers, which we must do if we only have word-sized registers.
1057
1058 On such a system, there are two straightforward conventions for
1059 representing sub-word-sized values:
1060
1061 (a) Leave the upper bits undefined. Comparison operations must
1062 sign- or zero-extend both operands before comparing them,
1063 depending on whether the comparison is signed or unsigned.
1064
1065 (b) Always keep the values sign- or zero-extended as appropriate.
1066 Arithmetic operations must narrow the result to the appropriate
1067 size.
1068
1069 A clever compiler might not use either (a) or (b) exclusively, instead
1070 it would attempt to minimize the coercions by analysis: the same kind
1071 of analysis that propagates hints around. In Cmm we don't want to
1072 have to do this, so we plump for having richer types and keeping the
1073 type information consistent.
1074
1075 If signed/unsigned hints are missing from MachRep, then the only
1076 choice we have is (a), because we don't know whether the result of an
1077 operation should be sign- or zero-extended.
1078
1079 Many architectures have extending load operations, which work well
1080 with (b). To make use of them with (a), you need to know whether the
1081 value is going to be sign- or zero-extended by an enclosing comparison
1082 (for example), which involves knowing above the context. This is
1083 doable but more complex.
1084
1085 Further complicating the issue is foreign calls: a foreign calling
1086 convention can specify that signed 8-bit quantities are passed as
1087 sign-extended 32 bit quantities, for example (this is the case on the
1088 PowerPC). So we *do* need sign information on foreign call arguments.
1089
1090 Pros for adding signed vs. unsigned to MachRep:
1091
1092 - It would let us use convention (b) above, and get easier
1093 code generation for extending loads.
1094
1095 - Less information required on foreign calls.
1096
1097 - MachOp type would be simpler
1098
1099 Cons:
1100
1101 - More complexity
1102
1103 - What is the MachRep for a VanillaReg? Currently it is
1104 always wordRep, but now we have to decide whether it is
1105 signed or unsigned. The same VanillaReg can thus have
1106 different MachReps in different parts of the program.
1107
1108 - Extra coercions cluttering up expressions.
1109
1110 Currently for GHC, the foreign call point is moot, because we do our
1111 own promotion of sub-word-sized values to word-sized values. The Int8
1112 type is represnted by an Int# which is kept sign-extended at all times
1113 (this is slightly naughty, because we're making assumptions about the
1114 C calling convention rather early on in the compiler). However, given
1115 this, the cons outweigh the pros.
1116
1117 -}
1118