Merging in the new codegen branch
[ghc.git] / compiler / cmm / CmmExpr.hs
1
2 module CmmExpr
3 ( CmmType -- Abstract
4 , b8, b16, b32, b64, f32, f64, bWord, bHalfWord, gcWord
5 , cInt, cLong
6 , cmmBits, cmmFloat
7 , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
8 , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32
9
10 , Width(..)
11 , widthInBits, widthInBytes, widthInLog
12 , wordWidth, halfWordWidth, cIntWidth, cLongWidth
13
14 , CmmExpr(..), cmmExprType, cmmExprWidth, maybeInvertCmmExpr
15 , CmmReg(..), cmmRegType
16 , CmmLit(..), cmmLitType
17 , LocalReg(..), localRegType
18 , GlobalReg(..), globalRegType, spReg, hpReg, spLimReg, nodeReg, node
19 , VGcPtr(..), vgcFlag -- Temporary!
20 , DefinerOfLocalRegs, UserOfLocalRegs, foldRegsDefd, foldRegsUsed, filterRegsUsed
21 , DefinerOfSlots, UserOfSlots, foldSlotsDefd, foldSlotsUsed
22 , RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet
23 , plusRegSet, minusRegSet, timesRegSet
24 , Area(..), AreaId(..), SubArea, StackSlotMap, getSlot
25
26 -- MachOp
27 , MachOp(..)
28 , pprMachOp, isCommutableMachOp, isAssociativeMachOp
29 , isComparisonMachOp, machOpResultType
30 , machOpArgReps, maybeInvertComparison
31
32 -- MachOp builders
33 , mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot
34 , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
35 , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
36 , mo_wordULe, mo_wordUGt, mo_wordULt
37 , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr
38 , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
39 , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord
40 , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32
41 )
42 where
43
44 #include "HsVersions.h"
45
46 import BlockId
47 import CLabel
48 import Constants
49 import FastString
50 import FiniteMap
51 import Maybes
52 import Monad
53 import Outputable
54 import Panic
55 import Unique
56 import UniqSet
57
58 -----------------------------------------------------------------------------
59 -- CmmExpr
60 -- An expression. Expressions have no side effects.
61 -----------------------------------------------------------------------------
62
63 data CmmExpr
64 = CmmLit CmmLit -- Literal
65 | CmmLoad CmmExpr CmmType -- Read memory location
66 | CmmReg CmmReg -- Contents of register
67 | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.)
68 | CmmStackSlot Area Int -- addressing expression of a stack slot
69 | CmmRegOff CmmReg Int
70 -- CmmRegOff reg i
71 -- ** is shorthand only, meaning **
72 -- CmmMachOp (MO_S_Add rep (CmmReg reg) (CmmLit (CmmInt i rep)))
73 -- where rep = cmmRegType reg
74
75 instance Eq CmmExpr where -- Equality ignores the types
76 CmmLit l1 == CmmLit l2 = l1==l2
77 CmmLoad e1 _ == CmmLoad e2 _ = e1==e2
78 CmmReg r1 == CmmReg r2 = r1==r2
79 CmmRegOff r1 i1 == CmmRegOff r2 i2 = r1==r2 && i1==i2
80 CmmMachOp op1 es1 == CmmMachOp op2 es2 = op1==op2 && es1==es2
81 CmmStackSlot a1 i1 == CmmStackSlot a2 i2 = a1==a2 && i1==i2
82 _e1 == _e2 = False
83
84 data CmmReg
85 = CmmLocal LocalReg
86 | CmmGlobal GlobalReg
87 deriving( Eq, Ord )
88
89 -- | A stack area is either the stack slot where a variable is spilled
90 -- or the stack space where function arguments and results are passed.
91 data Area
92 = RegSlot LocalReg
93 | CallArea AreaId
94 deriving (Eq, Ord)
95
96 data AreaId
97 = Old -- entry parameters, jumps, and returns share one call area at old end of stack
98 | Young BlockId
99 deriving (Eq, Ord)
100
101 type SubArea = (Area, Int, Int) -- area, offset, width
102
103 data CmmLit
104 = CmmInt Integer Width
105 -- Interpretation: the 2's complement representation of the value
106 -- is truncated to the specified size. This is easier than trying
107 -- to keep the value within range, because we don't know whether
108 -- it will be used as a signed or unsigned value (the CmmType doesn't
109 -- distinguish between signed & unsigned).
110 | CmmFloat Rational Width
111 | CmmLabel CLabel -- Address of label
112 | CmmLabelOff CLabel Int -- Address of label + byte offset
113
114 -- Due to limitations in the C backend, the following
115 -- MUST ONLY be used inside the info table indicated by label2
116 -- (label2 must be the info label), and label1 must be an
117 -- SRT, a slow entrypoint or a large bitmap (see the Mangler)
118 -- Don't use it at all unless tablesNextToCode.
119 -- It is also used inside the NCG during when generating
120 -- position-independent code.
121 | CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset
122 deriving Eq
123
124 cmmExprType :: CmmExpr -> CmmType
125 cmmExprType (CmmLit lit) = cmmLitType lit
126 cmmExprType (CmmLoad _ rep) = rep
127 cmmExprType (CmmReg reg) = cmmRegType reg
128 cmmExprType (CmmMachOp op args) = machOpResultType op (map cmmExprType args)
129 cmmExprType (CmmRegOff reg _) = cmmRegType reg
130 cmmExprType (CmmStackSlot _ _) = bWord -- an address
131
132 cmmLitType :: CmmLit -> CmmType
133 cmmLitType (CmmInt _ width) = cmmBits width
134 cmmLitType (CmmFloat _ width) = cmmFloat width
135 cmmLitType (CmmLabel lbl) = cmmLabelType lbl
136 cmmLitType (CmmLabelOff lbl _) = cmmLabelType lbl
137 cmmLitType (CmmLabelDiffOff {}) = bWord
138
139 cmmLabelType :: CLabel -> CmmType
140 cmmLabelType lbl | isGcPtrLabel lbl = gcWord
141 | otherwise = bWord
142
143 cmmExprWidth :: CmmExpr -> Width
144 cmmExprWidth e = typeWidth (cmmExprType e)
145
146 --------
147 --- Negation for conditional branches
148
149 maybeInvertCmmExpr :: CmmExpr -> Maybe CmmExpr
150 maybeInvertCmmExpr (CmmMachOp op args) = do op' <- maybeInvertComparison op
151 return (CmmMachOp op' args)
152 maybeInvertCmmExpr _ = Nothing
153
154 -----------------------------------------------------------------------------
155 -- Local registers
156 -----------------------------------------------------------------------------
157
158 data LocalReg
159 = LocalReg !Unique CmmType
160 -- ^ Parameters:
161 -- 1. Identifier
162 -- 2. Type
163
164 instance Eq LocalReg where
165 (LocalReg u1 _) == (LocalReg u2 _) = u1 == u2
166
167 instance Ord LocalReg where
168 compare (LocalReg u1 _) (LocalReg u2 _) = compare u1 u2
169
170 instance Uniquable LocalReg where
171 getUnique (LocalReg uniq _) = uniq
172
173 cmmRegType :: CmmReg -> CmmType
174 cmmRegType (CmmLocal reg) = localRegType reg
175 cmmRegType (CmmGlobal reg) = globalRegType reg
176
177 localRegType :: LocalReg -> CmmType
178 localRegType (LocalReg _ rep) = rep
179
180 -----------------------------------------------------------------------------
181 -- Register-use information for expressions and other types
182 -----------------------------------------------------------------------------
183
184 -- | Sets of local registers
185 type RegSet = UniqSet LocalReg
186 emptyRegSet :: RegSet
187 elemRegSet :: LocalReg -> RegSet -> Bool
188 extendRegSet :: RegSet -> LocalReg -> RegSet
189 deleteFromRegSet :: RegSet -> LocalReg -> RegSet
190 mkRegSet :: [LocalReg] -> RegSet
191 minusRegSet, plusRegSet, timesRegSet :: RegSet -> RegSet -> RegSet
192
193 emptyRegSet = emptyUniqSet
194 elemRegSet = elementOfUniqSet
195 extendRegSet = addOneToUniqSet
196 deleteFromRegSet = delOneFromUniqSet
197 mkRegSet = mkUniqSet
198 minusRegSet = minusUniqSet
199 plusRegSet = unionUniqSets
200 timesRegSet = intersectUniqSets
201
202 class UserOfLocalRegs a where
203 foldRegsUsed :: (b -> LocalReg -> b) -> b -> a -> b
204
205 class DefinerOfLocalRegs a where
206 foldRegsDefd :: (b -> LocalReg -> b) -> b -> a -> b
207
208 filterRegsUsed :: UserOfLocalRegs e => (LocalReg -> Bool) -> e -> RegSet
209 filterRegsUsed p e =
210 foldRegsUsed (\regs r -> if p r then extendRegSet regs r else regs)
211 emptyRegSet e
212
213 instance UserOfLocalRegs CmmReg where
214 foldRegsUsed f z (CmmLocal reg) = f z reg
215 foldRegsUsed _ z (CmmGlobal _) = z
216
217 instance DefinerOfLocalRegs CmmReg where
218 foldRegsDefd f z (CmmLocal reg) = f z reg
219 foldRegsDefd _ z (CmmGlobal _) = z
220
221 instance UserOfLocalRegs LocalReg where
222 foldRegsUsed f z r = f z r
223
224 instance DefinerOfLocalRegs LocalReg where
225 foldRegsDefd f z r = f z r
226
227 instance UserOfLocalRegs RegSet where
228 foldRegsUsed f = foldUniqSet (flip f)
229
230 instance UserOfLocalRegs CmmExpr where
231 foldRegsUsed f z e = expr z e
232 where expr z (CmmLit _) = z
233 expr z (CmmLoad addr _) = foldRegsUsed f z addr
234 expr z (CmmReg r) = foldRegsUsed f z r
235 expr z (CmmMachOp _ exprs) = foldRegsUsed f z exprs
236 expr z (CmmRegOff r _) = foldRegsUsed f z r
237 expr z (CmmStackSlot _ _) = z
238
239 instance UserOfLocalRegs a => UserOfLocalRegs [a] where
240 foldRegsUsed _ set [] = set
241 foldRegsUsed f set (x:xs) = foldRegsUsed f (foldRegsUsed f set x) xs
242
243 instance DefinerOfLocalRegs a => DefinerOfLocalRegs [a] where
244 foldRegsDefd _ set [] = set
245 foldRegsDefd f set (x:xs) = foldRegsDefd f (foldRegsDefd f set x) xs
246
247
248 -----------------------------------------------------------------------------
249 -- Stack slots
250 -----------------------------------------------------------------------------
251
252 mkVarSlot :: LocalReg -> CmmExpr
253 mkVarSlot r = CmmStackSlot (RegSlot r) 0
254
255 -- Usually, we either want to lookup a variable's spill slot in an environment
256 -- or else allocate it and add it to the environment.
257 -- For a variable, we just need a single area of the appropriate size.
258 type StackSlotMap = FiniteMap LocalReg CmmExpr
259 getSlot :: StackSlotMap -> LocalReg -> (StackSlotMap, CmmExpr)
260 getSlot map r = case lookupFM map r of
261 Just s -> (map, s)
262 Nothing -> (addToFM map r s, s) where s = mkVarSlot r
263
264 -----------------------------------------------------------------------------
265 -- Stack slot use information for expressions and other types [_$_]
266 -----------------------------------------------------------------------------
267
268
269 -- Fold over the area, the offset into the area, and the width of the subarea.
270 class UserOfSlots a where
271 foldSlotsUsed :: (b -> SubArea -> b) -> b -> a -> b
272
273 class DefinerOfSlots a where
274 foldSlotsDefd :: (b -> SubArea -> b) -> b -> a -> b
275
276 instance UserOfSlots CmmExpr where
277 foldSlotsUsed f z e = expr z e
278 where expr z (CmmLit _) = z
279 expr z (CmmLoad (CmmStackSlot a i) ty) = f z (a, i, widthInBytes $ typeWidth ty)
280 expr z (CmmLoad addr _) = foldSlotsUsed f z addr
281 expr z (CmmReg _) = z
282 expr z (CmmMachOp _ exprs) = foldSlotsUsed f z exprs
283 expr z (CmmRegOff _ _) = z
284 expr z (CmmStackSlot _ _) = z
285
286 instance UserOfSlots a => UserOfSlots [a] where
287 foldSlotsUsed _ set [] = set
288 foldSlotsUsed f set (x:xs) = foldSlotsUsed f (foldSlotsUsed f set x) xs
289
290
291 -----------------------------------------------------------------------------
292 -- Global STG registers
293 -----------------------------------------------------------------------------
294
295 data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show )
296 -- TEMPORARY!!!
297
298 -----------------------------------------------------------------------------
299 -- Global STG registers
300 -----------------------------------------------------------------------------
301 vgcFlag :: CmmType -> VGcPtr
302 vgcFlag ty | isGcPtrType ty = VGcPtr
303 | otherwise = VNonGcPtr
304
305 data GlobalReg
306 -- Argument and return registers
307 = VanillaReg -- pointers, unboxed ints and chars
308 {-# UNPACK #-} !Int -- its number
309 VGcPtr
310
311 | FloatReg -- single-precision floating-point registers
312 {-# UNPACK #-} !Int -- its number
313
314 | DoubleReg -- double-precision floating-point registers
315 {-# UNPACK #-} !Int -- its number
316
317 | LongReg -- long int registers (64-bit, really)
318 {-# UNPACK #-} !Int -- its number
319
320 -- STG registers
321 | Sp -- Stack ptr; points to last occupied stack location.
322 | SpLim -- Stack limit
323 | Hp -- Heap ptr; points to last occupied heap location.
324 | HpLim -- Heap limit register
325 | CurrentTSO -- pointer to current thread's TSO
326 | CurrentNursery -- pointer to allocation area
327 | HpAlloc -- allocation count for heap check failure
328
329 -- We keep the address of some commonly-called
330 -- functions in the register table, to keep code
331 -- size down:
332 | EagerBlackholeInfo -- stg_EAGER_BLACKHOLE_info
333 | GCEnter1 -- stg_gc_enter_1
334 | GCFun -- stg_gc_fun
335
336 -- Base offset for the register table, used for accessing registers
337 -- which do not have real registers assigned to them. This register
338 -- will only appear after we have expanded GlobalReg into memory accesses
339 -- (where necessary) in the native code generator.
340 | BaseReg
341
342 -- Base Register for PIC (position-independent code) calculations
343 -- Only used inside the native code generator. It's exact meaning differs
344 -- from platform to platform (see module PositionIndependentCode).
345 | PicBaseReg
346
347 deriving( Show )
348
349 instance Eq GlobalReg where
350 VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes
351 FloatReg i == FloatReg j = i==j
352 DoubleReg i == DoubleReg j = i==j
353 LongReg i == LongReg j = i==j
354 Sp == Sp = True
355 SpLim == SpLim = True
356 Hp == Hp = True
357 HpLim == HpLim = True
358 CurrentTSO == CurrentTSO = True
359 CurrentNursery == CurrentNursery = True
360 HpAlloc == HpAlloc = True
361 GCEnter1 == GCEnter1 = True
362 GCFun == GCFun = True
363 BaseReg == BaseReg = True
364 PicBaseReg == PicBaseReg = True
365 _r1 == _r2 = False
366
367 instance Ord GlobalReg where
368 compare (VanillaReg i _) (VanillaReg j _) = compare i j
369 -- Ignore type when seeking clashes
370 compare (FloatReg i) (FloatReg j) = compare i j
371 compare (DoubleReg i) (DoubleReg j) = compare i j
372 compare (LongReg i) (LongReg j) = compare i j
373 compare Sp Sp = EQ
374 compare SpLim SpLim = EQ
375 compare Hp Hp = EQ
376 compare HpLim HpLim = EQ
377 compare CurrentTSO CurrentTSO = EQ
378 compare CurrentNursery CurrentNursery = EQ
379 compare HpAlloc HpAlloc = EQ
380 compare GCEnter1 GCEnter1 = EQ
381 compare GCFun GCFun = EQ
382 compare BaseReg BaseReg = EQ
383 compare PicBaseReg PicBaseReg = EQ
384 compare (VanillaReg _ _) _ = LT
385 compare _ (VanillaReg _ _) = GT
386 compare (FloatReg _) _ = LT
387 compare _ (FloatReg _) = GT
388 compare (DoubleReg _) _ = LT
389 compare _ (DoubleReg _) = GT
390 compare (LongReg _) _ = LT
391 compare _ (LongReg _) = GT
392 compare Sp _ = LT
393 compare _ Sp = GT
394 compare SpLim _ = LT
395 compare _ SpLim = GT
396 compare Hp _ = LT
397 compare _ Hp = GT
398 compare HpLim _ = LT
399 compare _ HpLim = GT
400 compare CurrentTSO _ = LT
401 compare _ CurrentTSO = GT
402 compare CurrentNursery _ = LT
403 compare _ CurrentNursery = GT
404 compare HpAlloc _ = LT
405 compare _ HpAlloc = GT
406 compare GCEnter1 _ = LT
407 compare _ GCEnter1 = GT
408 compare GCFun _ = LT
409 compare _ GCFun = GT
410 compare BaseReg _ = LT
411 compare _ BaseReg = GT
412
413 -- convenient aliases
414 spReg, hpReg, spLimReg, nodeReg :: CmmReg
415 spReg = CmmGlobal Sp
416 hpReg = CmmGlobal Hp
417 spLimReg = CmmGlobal SpLim
418 nodeReg = CmmGlobal node
419
420 node :: GlobalReg
421 node = VanillaReg 1 VGcPtr
422
423 globalRegType :: GlobalReg -> CmmType
424 globalRegType (VanillaReg _ VGcPtr) = gcWord
425 globalRegType (VanillaReg _ VNonGcPtr) = bWord
426 globalRegType (FloatReg _) = cmmFloat W32
427 globalRegType (DoubleReg _) = cmmFloat W64
428 globalRegType (LongReg _) = cmmBits W64
429 globalRegType Hp = gcWord -- The initialiser for all
430 -- dynamically allocated closures
431 globalRegType _ = bWord
432
433
434 -----------------------------------------------------------------------------
435 -- CmmType
436 -----------------------------------------------------------------------------
437
438 -- NOTE: CmmType is an abstract type, not exported from this
439 -- module so you can easily change its representation
440 --
441 -- However Width is exported in a concrete way,
442 -- and is used extensively in pattern-matching
443
444 data CmmType -- The important one!
445 = CmmType CmmCat Width
446
447 data CmmCat -- "Category" (not exported)
448 = GcPtrCat -- GC pointer
449 | BitsCat -- Non-pointer
450 | FloatCat -- Float
451 deriving( Eq )
452 -- See Note [Signed vs unsigned] at the end
453
454 instance Outputable CmmType where
455 ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
456
457 instance Outputable CmmCat where
458 ppr FloatCat = ptext $ sLit("F")
459 ppr _ = ptext $ sLit("I")
460 -- Temp Jan 08
461 -- ppr FloatCat = ptext $ sLit("float")
462 -- ppr BitsCat = ptext $ sLit("bits")
463 -- ppr GcPtrCat = ptext $ sLit("gcptr")
464
465 -- Why is CmmType stratified? For native code generation,
466 -- most of the time you just want to know what sort of register
467 -- to put the thing in, and for this you need to know how
468 -- many bits thing has and whether it goes in a floating-point
469 -- register. By contrast, the distinction between GcPtr and
470 -- GcNonPtr is of interest to only a few parts of the code generator.
471
472 -------- Equality on CmmType --------------
473 -- CmmType is *not* an instance of Eq; sometimes we care about the
474 -- Gc/NonGc distinction, and sometimes we don't
475 -- So we use an explicit function to force you to think about it
476 cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
477 cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
478
479 cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
480 -- This equality is temporary; used in CmmLint
481 -- but the RTS files are not yet well-typed wrt pointers
482 cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
483 = c1 `weak_eq` c2 && w1==w2
484 where
485 FloatCat `weak_eq` FloatCat = True
486 FloatCat `weak_eq` _other = False
487 _other `weak_eq` FloatCat = False
488 _word1 `weak_eq` _word2 = True -- Ignores GcPtr
489
490 --- Simple operations on CmmType -----
491 typeWidth :: CmmType -> Width
492 typeWidth (CmmType _ w) = w
493
494 cmmBits, cmmFloat :: Width -> CmmType
495 cmmBits = CmmType BitsCat
496 cmmFloat = CmmType FloatCat
497
498 -------- Common CmmTypes ------------
499 -- Floats and words of specific widths
500 b8, b16, b32, b64, f32, f64 :: CmmType
501 b8 = cmmBits W8
502 b16 = cmmBits W16
503 b32 = cmmBits W32
504 b64 = cmmBits W64
505 f32 = cmmFloat W32
506 f64 = cmmFloat W64
507
508 -- CmmTypes of native word widths
509 bWord, bHalfWord, gcWord :: CmmType
510 bWord = cmmBits wordWidth
511 bHalfWord = cmmBits halfWordWidth
512 gcWord = CmmType GcPtrCat wordWidth
513
514 cInt, cLong :: CmmType
515 cInt = cmmBits cIntWidth
516 cLong = cmmBits cLongWidth
517
518
519 ------------ Predicates ----------------
520 isFloatType, isGcPtrType :: CmmType -> Bool
521 isFloatType (CmmType FloatCat _) = True
522 isFloatType _other = False
523
524 isGcPtrType (CmmType GcPtrCat _) = True
525 isGcPtrType _other = False
526
527 isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
528 -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
529 -- isFloat32 and 64 are obvious
530
531 isWord64 (CmmType BitsCat W64) = True
532 isWord64 (CmmType GcPtrCat W64) = True
533 isWord64 _other = False
534
535 isWord32 (CmmType BitsCat W32) = True
536 isWord32 (CmmType GcPtrCat W32) = True
537 isWord32 _other = False
538
539 isFloat32 (CmmType FloatCat W32) = True
540 isFloat32 _other = False
541
542 isFloat64 (CmmType FloatCat W64) = True
543 isFloat64 _other = False
544
545 -----------------------------------------------------------------------------
546 -- Width
547 -----------------------------------------------------------------------------
548
549 data Width = W8 | W16 | W32 | W64
550 | W80 -- Extended double-precision float,
551 -- used in x86 native codegen only.
552 -- (we use Ord, so it'd better be in this order)
553 | W128
554 deriving (Eq, Ord, Show)
555
556 instance Outputable Width where
557 ppr rep = ptext (mrStr rep)
558
559 mrStr :: Width -> LitString
560 mrStr W8 = sLit("W8")
561 mrStr W16 = sLit("W16")
562 mrStr W32 = sLit("W32")
563 mrStr W64 = sLit("W64")
564 mrStr W128 = sLit("W128")
565 mrStr W80 = sLit("W80")
566
567
568 -------- Common Widths ------------
569 wordWidth, halfWordWidth :: Width
570 wordWidth | wORD_SIZE == 4 = W32
571 | wORD_SIZE == 8 = W64
572 | otherwise = panic "MachOp.wordRep: Unknown word size"
573
574 halfWordWidth | wORD_SIZE == 4 = W16
575 | wORD_SIZE == 8 = W32
576 | otherwise = panic "MachOp.halfWordRep: Unknown word size"
577
578 -- cIntRep is the Width for a C-language 'int'
579 cIntWidth, cLongWidth :: Width
580 #if SIZEOF_INT == 4
581 cIntWidth = W32
582 #elif SIZEOF_INT == 8
583 cIntWidth = W64
584 #endif
585
586 #if SIZEOF_LONG == 4
587 cLongWidth = W32
588 #elif SIZEOF_LONG == 8
589 cLongWidth = W64
590 #endif
591
592 widthInBits :: Width -> Int
593 widthInBits W8 = 8
594 widthInBits W16 = 16
595 widthInBits W32 = 32
596 widthInBits W64 = 64
597 widthInBits W128 = 128
598 widthInBits W80 = 80
599
600 widthInBytes :: Width -> Int
601 widthInBytes W8 = 1
602 widthInBytes W16 = 2
603 widthInBytes W32 = 4
604 widthInBytes W64 = 8
605 widthInBytes W128 = 16
606 widthInBytes W80 = 10
607
608 -- log_2 of the width in bytes, useful for generating shifts.
609 widthInLog :: Width -> Int
610 widthInLog W8 = 0
611 widthInLog W16 = 1
612 widthInLog W32 = 2
613 widthInLog W64 = 3
614 widthInLog W128 = 4
615 widthInLog W80 = panic "widthInLog: F80"
616
617
618 -----------------------------------------------------------------------------
619 -- MachOp
620 -----------------------------------------------------------------------------
621
622 {-
623 Implementation notes:
624
625 It might suffice to keep just a width, without distinguishing between
626 floating and integer types. However, keeping the distinction will
627 help the native code generator to assign registers more easily.
628 -}
629
630
631 {- |
632 Machine-level primops; ones which we can reasonably delegate to the
633 native code generators to handle. Basically contains C's primops
634 and no others.
635
636 Nomenclature: all ops indicate width and signedness, where
637 appropriate. Widths: 8\/16\/32\/64 means the given size, obviously.
638 Nat means the operation works on STG word sized objects.
639 Signedness: S means signed, U means unsigned. For operations where
640 signedness is irrelevant or makes no difference (for example
641 integer add), the signedness component is omitted.
642
643 An exception: NatP is a ptr-typed native word. From the point of
644 view of the native code generators this distinction is irrelevant,
645 but the C code generator sometimes needs this info to emit the
646 right casts.
647 -}
648
649 data MachOp
650 -- Integer operations (insensitive to signed/unsigned)
651 = MO_Add Width
652 | MO_Sub Width
653 | MO_Eq Width
654 | MO_Ne Width
655 | MO_Mul Width -- low word of multiply
656
657 -- Signed multiply/divide
658 | MO_S_MulMayOflo Width -- nonzero if signed multiply overflows
659 | MO_S_Quot Width -- signed / (same semantics as IntQuotOp)
660 | MO_S_Rem Width -- signed % (same semantics as IntRemOp)
661 | MO_S_Neg Width -- unary -
662
663 -- Unsigned multiply/divide
664 | MO_U_MulMayOflo Width -- nonzero if unsigned multiply overflows
665 | MO_U_Quot Width -- unsigned / (same semantics as WordQuotOp)
666 | MO_U_Rem Width -- unsigned % (same semantics as WordRemOp)
667
668 -- Signed comparisons
669 | MO_S_Ge Width
670 | MO_S_Le Width
671 | MO_S_Gt Width
672 | MO_S_Lt Width
673
674 -- Unsigned comparisons
675 | MO_U_Ge Width
676 | MO_U_Le Width
677 | MO_U_Gt Width
678 | MO_U_Lt Width
679
680 -- Floating point arithmetic
681 | MO_F_Add Width
682 | MO_F_Sub Width
683 | MO_F_Neg Width -- unary -
684 | MO_F_Mul Width
685 | MO_F_Quot Width
686
687 -- Floating point comparison
688 | MO_F_Eq Width
689 | MO_F_Ne Width
690 | MO_F_Ge Width
691 | MO_F_Le Width
692 | MO_F_Gt Width
693 | MO_F_Lt Width
694
695 -- Bitwise operations. Not all of these may be supported
696 -- at all sizes, and only integral Widths are valid.
697 | MO_And Width
698 | MO_Or Width
699 | MO_Xor Width
700 | MO_Not Width
701 | MO_Shl Width
702 | MO_U_Shr Width -- unsigned shift right
703 | MO_S_Shr Width -- signed shift right
704
705 -- Conversions. Some of these will be NOPs.
706 -- Floating-point conversions use the signed variant.
707 | MO_SF_Conv Width Width -- Signed int -> Float
708 | MO_FS_Conv Width Width -- Float -> Signed int
709 | MO_SS_Conv Width Width -- Signed int -> Signed int
710 | MO_UU_Conv Width Width -- unsigned int -> unsigned int
711 | MO_FF_Conv Width Width -- Float -> Float
712 deriving (Eq, Show)
713
714 pprMachOp :: MachOp -> SDoc
715 pprMachOp mo = text (show mo)
716
717
718
719 -- -----------------------------------------------------------------------------
720 -- Some common MachReps
721
722 -- A 'wordRep' is a machine word on the target architecture
723 -- Specifically, it is the size of an Int#, Word#, Addr#
724 -- and the unit of allocation on the stack and the heap
725 -- Any pointer is also guaranteed to be a wordRep.
726
727 mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot
728 , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
729 , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
730 , mo_wordULe, mo_wordUGt, mo_wordULt
731 , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr
732 , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
733 , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord
734 , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32
735 :: MachOp
736
737 mo_wordAdd = MO_Add wordWidth
738 mo_wordSub = MO_Sub wordWidth
739 mo_wordEq = MO_Eq wordWidth
740 mo_wordNe = MO_Ne wordWidth
741 mo_wordMul = MO_Mul wordWidth
742 mo_wordSQuot = MO_S_Quot wordWidth
743 mo_wordSRem = MO_S_Rem wordWidth
744 mo_wordSNeg = MO_S_Neg wordWidth
745 mo_wordUQuot = MO_U_Quot wordWidth
746 mo_wordURem = MO_U_Rem wordWidth
747
748 mo_wordSGe = MO_S_Ge wordWidth
749 mo_wordSLe = MO_S_Le wordWidth
750 mo_wordSGt = MO_S_Gt wordWidth
751 mo_wordSLt = MO_S_Lt wordWidth
752
753 mo_wordUGe = MO_U_Ge wordWidth
754 mo_wordULe = MO_U_Le wordWidth
755 mo_wordUGt = MO_U_Gt wordWidth
756 mo_wordULt = MO_U_Lt wordWidth
757
758 mo_wordAnd = MO_And wordWidth
759 mo_wordOr = MO_Or wordWidth
760 mo_wordXor = MO_Xor wordWidth
761 mo_wordNot = MO_Not wordWidth
762 mo_wordShl = MO_Shl wordWidth
763 mo_wordSShr = MO_S_Shr wordWidth
764 mo_wordUShr = MO_U_Shr wordWidth
765
766 mo_u_8To32 = MO_UU_Conv W8 W32
767 mo_s_8To32 = MO_SS_Conv W8 W32
768 mo_u_16To32 = MO_UU_Conv W16 W32
769 mo_s_16To32 = MO_SS_Conv W16 W32
770
771 mo_u_8ToWord = MO_UU_Conv W8 wordWidth
772 mo_s_8ToWord = MO_SS_Conv W8 wordWidth
773 mo_u_16ToWord = MO_UU_Conv W16 wordWidth
774 mo_s_16ToWord = MO_SS_Conv W16 wordWidth
775 mo_s_32ToWord = MO_SS_Conv W32 wordWidth
776 mo_u_32ToWord = MO_UU_Conv W32 wordWidth
777
778 mo_WordTo8 = MO_UU_Conv wordWidth W8
779 mo_WordTo16 = MO_UU_Conv wordWidth W16
780 mo_WordTo32 = MO_UU_Conv wordWidth W32
781
782 mo_32To8 = MO_UU_Conv W32 W8
783 mo_32To16 = MO_UU_Conv W32 W16
784
785
786 -- ----------------------------------------------------------------------------
787 -- isCommutableMachOp
788
789 {- |
790 Returns 'True' if the MachOp has commutable arguments. This is used
791 in the platform-independent Cmm optimisations.
792
793 If in doubt, return 'False'. This generates worse code on the
794 native routes, but is otherwise harmless.
795 -}
796 isCommutableMachOp :: MachOp -> Bool
797 isCommutableMachOp mop =
798 case mop of
799 MO_Add _ -> True
800 MO_Eq _ -> True
801 MO_Ne _ -> True
802 MO_Mul _ -> True
803 MO_S_MulMayOflo _ -> True
804 MO_U_MulMayOflo _ -> True
805 MO_And _ -> True
806 MO_Or _ -> True
807 MO_Xor _ -> True
808 _other -> False
809
810 -- ----------------------------------------------------------------------------
811 -- isAssociativeMachOp
812
813 {- |
814 Returns 'True' if the MachOp is associative (i.e. @(x+y)+z == x+(y+z)@)
815 This is used in the platform-independent Cmm optimisations.
816
817 If in doubt, return 'False'. This generates worse code on the
818 native routes, but is otherwise harmless.
819 -}
820 isAssociativeMachOp :: MachOp -> Bool
821 isAssociativeMachOp mop =
822 case mop of
823 MO_Add {} -> True -- NB: does not include
824 MO_Mul {} -> True -- floatint point!
825 MO_And {} -> True
826 MO_Or {} -> True
827 MO_Xor {} -> True
828 _other -> False
829
830 -- ----------------------------------------------------------------------------
831 -- isComparisonMachOp
832
833 {- |
834 Returns 'True' if the MachOp is a comparison.
835
836 If in doubt, return False. This generates worse code on the
837 native routes, but is otherwise harmless.
838 -}
839 isComparisonMachOp :: MachOp -> Bool
840 isComparisonMachOp mop =
841 case mop of
842 MO_Eq _ -> True
843 MO_Ne _ -> True
844 MO_S_Ge _ -> True
845 MO_S_Le _ -> True
846 MO_S_Gt _ -> True
847 MO_S_Lt _ -> True
848 MO_U_Ge _ -> True
849 MO_U_Le _ -> True
850 MO_U_Gt _ -> True
851 MO_U_Lt _ -> True
852 MO_F_Eq {} -> True
853 MO_F_Ne {} -> True
854 MO_F_Ge {} -> True
855 MO_F_Le {} -> True
856 MO_F_Gt {} -> True
857 MO_F_Lt {} -> True
858 _other -> False
859
860 -- -----------------------------------------------------------------------------
861 -- Inverting conditions
862
863 -- Sometimes it's useful to be able to invert the sense of a
864 -- condition. Not all conditional tests are invertible: in
865 -- particular, floating point conditionals cannot be inverted, because
866 -- there exist floating-point values which return False for both senses
867 -- of a condition (eg. !(NaN > NaN) && !(NaN /<= NaN)).
868
869 maybeInvertComparison :: MachOp -> Maybe MachOp
870 maybeInvertComparison op
871 = case op of -- None of these Just cases include floating point
872 MO_Eq r -> Just (MO_Ne r)
873 MO_Ne r -> Just (MO_Eq r)
874 MO_U_Lt r -> Just (MO_U_Ge r)
875 MO_U_Gt r -> Just (MO_U_Le r)
876 MO_U_Le r -> Just (MO_U_Gt r)
877 MO_U_Ge r -> Just (MO_U_Lt r)
878 MO_S_Lt r -> Just (MO_S_Ge r)
879 MO_S_Gt r -> Just (MO_S_Le r)
880 MO_S_Le r -> Just (MO_S_Gt r)
881 MO_S_Ge r -> Just (MO_S_Lt r)
882 MO_F_Eq r -> Just (MO_F_Ne r)
883 MO_F_Ne r -> Just (MO_F_Eq r)
884 MO_F_Ge r -> Just (MO_F_Le r)
885 MO_F_Le r -> Just (MO_F_Ge r)
886 MO_F_Gt r -> Just (MO_F_Lt r)
887 MO_F_Lt r -> Just (MO_F_Gt r)
888 _other -> Nothing
889
890 -- ----------------------------------------------------------------------------
891 -- machOpResultType
892
893 {- |
894 Returns the MachRep of the result of a MachOp.
895 -}
896 machOpResultType :: MachOp -> [CmmType] -> CmmType
897 machOpResultType mop tys =
898 case mop of
899 MO_Add {} -> ty1 -- Preserve GC-ptr-hood
900 MO_Sub {} -> ty1 -- of first arg
901 MO_Mul r -> cmmBits r
902 MO_S_MulMayOflo r -> cmmBits r
903 MO_S_Quot r -> cmmBits r
904 MO_S_Rem r -> cmmBits r
905 MO_S_Neg r -> cmmBits r
906 MO_U_MulMayOflo r -> cmmBits r
907 MO_U_Quot r -> cmmBits r
908 MO_U_Rem r -> cmmBits r
909
910 MO_Eq {} -> comparisonResultRep
911 MO_Ne {} -> comparisonResultRep
912 MO_S_Ge {} -> comparisonResultRep
913 MO_S_Le {} -> comparisonResultRep
914 MO_S_Gt {} -> comparisonResultRep
915 MO_S_Lt {} -> comparisonResultRep
916
917 MO_U_Ge {} -> comparisonResultRep
918 MO_U_Le {} -> comparisonResultRep
919 MO_U_Gt {} -> comparisonResultRep
920 MO_U_Lt {} -> comparisonResultRep
921
922 MO_F_Add r -> cmmFloat r
923 MO_F_Sub r -> cmmFloat r
924 MO_F_Mul r -> cmmFloat r
925 MO_F_Quot r -> cmmFloat r
926 MO_F_Neg r -> cmmFloat r
927 MO_F_Eq {} -> comparisonResultRep
928 MO_F_Ne {} -> comparisonResultRep
929 MO_F_Ge {} -> comparisonResultRep
930 MO_F_Le {} -> comparisonResultRep
931 MO_F_Gt {} -> comparisonResultRep
932 MO_F_Lt {} -> comparisonResultRep
933
934 MO_And {} -> ty1 -- Used for pointer masking
935 MO_Or {} -> ty1
936 MO_Xor {} -> ty1
937 MO_Not r -> cmmBits r
938 MO_Shl r -> cmmBits r
939 MO_U_Shr r -> cmmBits r
940 MO_S_Shr r -> cmmBits r
941
942 MO_SS_Conv _ to -> cmmBits to
943 MO_UU_Conv _ to -> cmmBits to
944 MO_FS_Conv _ to -> cmmBits to
945 MO_SF_Conv _ to -> cmmFloat to
946 MO_FF_Conv _ to -> cmmFloat to
947 where
948 (ty1:_) = tys
949
950 comparisonResultRep :: CmmType
951 comparisonResultRep = bWord -- is it?
952
953
954 -- -----------------------------------------------------------------------------
955 -- machOpArgReps
956
957 -- | This function is used for debugging only: we can check whether an
958 -- application of a MachOp is "type-correct" by checking that the MachReps of
959 -- its arguments are the same as the MachOp expects. This is used when
960 -- linting a CmmExpr.
961
962 machOpArgReps :: MachOp -> [Width]
963 machOpArgReps op =
964 case op of
965 MO_Add r -> [r,r]
966 MO_Sub r -> [r,r]
967 MO_Eq r -> [r,r]
968 MO_Ne r -> [r,r]
969 MO_Mul r -> [r,r]
970 MO_S_MulMayOflo r -> [r,r]
971 MO_S_Quot r -> [r,r]
972 MO_S_Rem r -> [r,r]
973 MO_S_Neg r -> [r]
974 MO_U_MulMayOflo r -> [r,r]
975 MO_U_Quot r -> [r,r]
976 MO_U_Rem r -> [r,r]
977
978 MO_S_Ge r -> [r,r]
979 MO_S_Le r -> [r,r]
980 MO_S_Gt r -> [r,r]
981 MO_S_Lt r -> [r,r]
982
983 MO_U_Ge r -> [r,r]
984 MO_U_Le r -> [r,r]
985 MO_U_Gt r -> [r,r]
986 MO_U_Lt r -> [r,r]
987
988 MO_F_Add r -> [r,r]
989 MO_F_Sub r -> [r,r]
990 MO_F_Mul r -> [r,r]
991 MO_F_Quot r -> [r,r]
992 MO_F_Neg r -> [r]
993 MO_F_Eq r -> [r,r]
994 MO_F_Ne r -> [r,r]
995 MO_F_Ge r -> [r,r]
996 MO_F_Le r -> [r,r]
997 MO_F_Gt r -> [r,r]
998 MO_F_Lt r -> [r,r]
999
1000 MO_And r -> [r,r]
1001 MO_Or r -> [r,r]
1002 MO_Xor r -> [r,r]
1003 MO_Not r -> [r]
1004 MO_Shl r -> [r,wordWidth]
1005 MO_U_Shr r -> [r,wordWidth]
1006 MO_S_Shr r -> [r,wordWidth]
1007
1008 MO_SS_Conv from _ -> [from]
1009 MO_UU_Conv from _ -> [from]
1010 MO_SF_Conv from _ -> [from]
1011 MO_FS_Conv from _ -> [from]
1012 MO_FF_Conv from _ -> [from]
1013
1014
1015 -------------------------------------------------------------------------
1016 {- Note [Signed vs unsigned]
1017 ~~~~~~~~~~~~~~~~~~~~~~~~~
1018 Should a CmmType include a signed vs. unsigned distinction?
1019
1020 This is very much like a "hint" in C-- terminology: it isn't necessary
1021 in order to generate correct code, but it might be useful in that the
1022 compiler can generate better code if it has access to higher-level
1023 hints about data. This is important at call boundaries, because the
1024 definition of a function is not visible at all of its call sites, so
1025 the compiler cannot infer the hints.
1026
1027 Here in Cmm, we're taking a slightly different approach. We include
1028 the int vs. float hint in the MachRep, because (a) the majority of
1029 platforms have a strong distinction between float and int registers,
1030 and (b) we don't want to do any heavyweight hint-inference in the
1031 native code backend in order to get good code. We're treating the
1032 hint more like a type: our Cmm is always completely consistent with
1033 respect to hints. All coercions between float and int are explicit.
1034
1035 What about the signed vs. unsigned hint? This information might be
1036 useful if we want to keep sub-word-sized values in word-size
1037 registers, which we must do if we only have word-sized registers.
1038
1039 On such a system, there are two straightforward conventions for
1040 representing sub-word-sized values:
1041
1042 (a) Leave the upper bits undefined. Comparison operations must
1043 sign- or zero-extend both operands before comparing them,
1044 depending on whether the comparison is signed or unsigned.
1045
1046 (b) Always keep the values sign- or zero-extended as appropriate.
1047 Arithmetic operations must narrow the result to the appropriate
1048 size.
1049
1050 A clever compiler might not use either (a) or (b) exclusively, instead
1051 it would attempt to minimize the coercions by analysis: the same kind
1052 of analysis that propagates hints around. In Cmm we don't want to
1053 have to do this, so we plump for having richer types and keeping the
1054 type information consistent.
1055
1056 If signed/unsigned hints are missing from MachRep, then the only
1057 choice we have is (a), because we don't know whether the result of an
1058 operation should be sign- or zero-extended.
1059
1060 Many architectures have extending load operations, which work well
1061 with (b). To make use of them with (a), you need to know whether the
1062 value is going to be sign- or zero-extended by an enclosing comparison
1063 (for example), which involves knowing above the context. This is
1064 doable but more complex.
1065
1066 Further complicating the issue is foreign calls: a foreign calling
1067 convention can specify that signed 8-bit quantities are passed as
1068 sign-extended 32 bit quantities, for example (this is the case on the
1069 PowerPC). So we *do* need sign information on foreign call arguments.
1070
1071 Pros for adding signed vs. unsigned to MachRep:
1072
1073 - It would let us use convention (b) above, and get easier
1074 code generation for extending loads.
1075
1076 - Less information required on foreign calls.
1077
1078 - MachOp type would be simpler
1079
1080 Cons:
1081
1082 - More complexity
1083
1084 - What is the MachRep for a VanillaReg? Currently it is
1085 always wordRep, but now we have to decide whether it is
1086 signed or unsigned. The same VanillaReg can thus have
1087 different MachReps in different parts of the program.
1088
1089 - Extra coercions cluttering up expressions.
1090
1091 Currently for GHC, the foreign call point is moot, because we do our
1092 own promotion of sub-word-sized values to word-sized values. The Int8
1093 type is represnted by an Int# which is kept sign-extended at all times
1094 (this is slightly naughty, because we're making assumptions about the
1095 C calling convention rather early on in the compiler). However, given
1096 this, the cons outweigh the pros.
1097
1098 -}
1099