Implement SSE2 floating-point support in the x86 native code generator (#594)
[ghc.git] / compiler / nativeGen / PPC / Regs.hs
1 -- -----------------------------------------------------------------------------
2 --
3 -- (c) The University of Glasgow 1994-2004
4 --
5 -- -----------------------------------------------------------------------------
6
7 module PPC.Regs (
8 -- squeeze functions
9 virtualRegSqueeze,
10 realRegSqueeze,
11
12 mkVirtualReg,
13 regDotColor,
14
15 -- immediates
16 Imm(..),
17 strImmLit,
18 litToImm,
19
20 -- addressing modes
21 AddrMode(..),
22 addrOffset,
23
24 -- registers
25 spRel,
26 argRegs,
27 allArgRegs,
28 callClobberedRegs,
29 allMachRegNos,
30 classOfRealReg,
31 showReg,
32
33 -- machine specific
34 allFPArgRegs,
35 fits16Bits,
36 makeImmediate,
37 fReg,
38 sp, r3, r4, r27, r28, f1, f20, f21,
39
40 -- horrow show
41 freeReg,
42 globalRegMaybe,
43 get_GlobalReg_reg_or_addr,
44 allocatableRegs
45
46 )
47
48 where
49
50 #include "nativeGen/NCG.h"
51 #include "HsVersions.h"
52 #include "../includes/stg/MachRegs.h"
53
54 import Reg
55 import RegClass
56 import Size
57
58 import CgUtils ( get_GlobalReg_addr )
59 import BlockId
60 import Cmm
61 import CLabel ( CLabel )
62 import Unique
63
64 import Pretty
65 import Outputable ( panic, SDoc )
66 import qualified Outputable
67 import Constants
68 import FastBool
69 import FastTypes
70
71 import Data.Word ( Word8, Word16, Word32 )
72 import Data.Int ( Int8, Int16, Int32 )
73
74
75 -- squeese functions for the graph allocator -----------------------------------
76
77 -- | regSqueeze_class reg
78 -- Calculuate the maximum number of register colors that could be
79 -- denied to a node of this class due to having this reg
80 -- as a neighbour.
81 --
82 {-# INLINE virtualRegSqueeze #-}
83 virtualRegSqueeze :: RegClass -> VirtualReg -> FastInt
84 virtualRegSqueeze cls vr
85 = case cls of
86 RcInteger
87 -> case vr of
88 VirtualRegI{} -> _ILIT(1)
89 VirtualRegHi{} -> _ILIT(1)
90 _other -> _ILIT(0)
91
92 RcDouble
93 -> case vr of
94 VirtualRegD{} -> _ILIT(1)
95 VirtualRegF{} -> _ILIT(0)
96 _other -> _ILIT(0)
97
98 _other -> _ILIT(0)
99
100 {-# INLINE realRegSqueeze #-}
101 realRegSqueeze :: RegClass -> RealReg -> FastInt
102 realRegSqueeze cls rr
103 = case cls of
104 RcInteger
105 -> case rr of
106 RealRegSingle regNo
107 | regNo < 32 -> _ILIT(1) -- first fp reg is 32
108 | otherwise -> _ILIT(0)
109
110 RealRegPair{} -> _ILIT(0)
111
112 RcDouble
113 -> case rr of
114 RealRegSingle regNo
115 | regNo < 32 -> _ILIT(0)
116 | otherwise -> _ILIT(1)
117
118 RealRegPair{} -> _ILIT(0)
119
120 _other -> _ILIT(0)
121
122 mkVirtualReg :: Unique -> Size -> VirtualReg
123 mkVirtualReg u size
124 | not (isFloatSize size) = VirtualRegI u
125 | otherwise
126 = case size of
127 FF32 -> VirtualRegD u
128 FF64 -> VirtualRegD u
129 _ -> panic "mkVirtualReg"
130
131 regDotColor :: RealReg -> SDoc
132 regDotColor reg
133 = case classOfRealReg reg of
134 RcInteger -> Outputable.text "blue"
135 RcFloat -> Outputable.text "red"
136 RcDouble -> Outputable.text "green"
137 RcDoubleSSE -> Outputable.text "yellow"
138
139
140 -- immediates ------------------------------------------------------------------
141 data Imm
142 = ImmInt Int
143 | ImmInteger Integer -- Sigh.
144 | ImmCLbl CLabel -- AbstractC Label (with baggage)
145 | ImmLit Doc -- Simple string
146 | ImmIndex CLabel Int
147 | ImmFloat Rational
148 | ImmDouble Rational
149 | ImmConstantSum Imm Imm
150 | ImmConstantDiff Imm Imm
151 | LO Imm
152 | HI Imm
153 | HA Imm {- high halfword adjusted -}
154
155
156 strImmLit :: String -> Imm
157 strImmLit s = ImmLit (text s)
158
159
160 litToImm :: CmmLit -> Imm
161 litToImm (CmmInt i w) = ImmInteger (narrowS w i)
162 -- narrow to the width: a CmmInt might be out of
163 -- range, but we assume that ImmInteger only contains
164 -- in-range values. A signed value should be fine here.
165 litToImm (CmmFloat f W32) = ImmFloat f
166 litToImm (CmmFloat f W64) = ImmDouble f
167 litToImm (CmmLabel l) = ImmCLbl l
168 litToImm (CmmLabelOff l off) = ImmIndex l off
169 litToImm (CmmLabelDiffOff l1 l2 off)
170 = ImmConstantSum
171 (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2))
172 (ImmInt off)
173 litToImm (CmmBlock id) = ImmCLbl (infoTblLbl id)
174 litToImm _ = panic "PPC.Regs.litToImm: no match"
175
176
177 -- addressing modes ------------------------------------------------------------
178
179 data AddrMode
180 = AddrRegReg Reg Reg
181 | AddrRegImm Reg Imm
182
183
184 addrOffset :: AddrMode -> Int -> Maybe AddrMode
185 addrOffset addr off
186 = case addr of
187 AddrRegImm r (ImmInt n)
188 | fits16Bits n2 -> Just (AddrRegImm r (ImmInt n2))
189 | otherwise -> Nothing
190 where n2 = n + off
191
192 AddrRegImm r (ImmInteger n)
193 | fits16Bits n2 -> Just (AddrRegImm r (ImmInt (fromInteger n2)))
194 | otherwise -> Nothing
195 where n2 = n + toInteger off
196
197 _ -> Nothing
198
199
200 -- registers -------------------------------------------------------------------
201 -- @spRel@ gives us a stack relative addressing mode for volatile
202 -- temporaries and for excess call arguments. @fpRel@, where
203 -- applicable, is the same but for the frame pointer.
204
205 spRel :: Int -- desired stack offset in words, positive or negative
206 -> AddrMode
207
208 spRel n = AddrRegImm sp (ImmInt (n * wORD_SIZE))
209
210
211 -- argRegs is the set of regs which are read for an n-argument call to C.
212 -- For archs which pass all args on the stack (x86), is empty.
213 -- Sparc passes up to the first 6 args in regs.
214 -- Dunno about Alpha.
215 argRegs :: RegNo -> [Reg]
216 argRegs 0 = []
217 argRegs 1 = map regSingle [3]
218 argRegs 2 = map regSingle [3,4]
219 argRegs 3 = map regSingle [3..5]
220 argRegs 4 = map regSingle [3..6]
221 argRegs 5 = map regSingle [3..7]
222 argRegs 6 = map regSingle [3..8]
223 argRegs 7 = map regSingle [3..9]
224 argRegs 8 = map regSingle [3..10]
225 argRegs _ = panic "MachRegs.argRegs(powerpc): don't know about >8 arguments!"
226
227
228 allArgRegs :: [Reg]
229 allArgRegs = map regSingle [3..10]
230
231
232 -- these are the regs which we cannot assume stay alive over a C call.
233 callClobberedRegs :: [Reg]
234 #if defined(darwin_TARGET_OS)
235 callClobberedRegs
236 = map regSingle (0:[2..12] ++ map fReg [0..13])
237
238 #elif defined(linux_TARGET_OS)
239 callClobberedRegs
240 = map regSingle (0:[2..13] ++ map fReg [0..13])
241
242 #else
243 callClobberedRegs
244 = panic "PPC.Regs.callClobberedRegs: not defined for this architecture"
245 #endif
246
247
248 allMachRegNos :: [RegNo]
249 allMachRegNos = [0..63]
250
251
252 {-# INLINE classOfRealReg #-}
253 classOfRealReg :: RealReg -> RegClass
254 classOfRealReg (RealRegSingle i)
255 | i < 32 = RcInteger
256 | otherwise = RcDouble
257
258 classOfRealReg (RealRegPair{})
259 = panic "regClass(ppr): no reg pairs on this architecture"
260
261 showReg :: RegNo -> String
262 showReg n
263 | n >= 0 && n <= 31 = "%r" ++ show n
264 | n >= 32 && n <= 63 = "%f" ++ show (n - 32)
265 | otherwise = "%unknown_powerpc_real_reg_" ++ show n
266
267
268
269 -- machine specific ------------------------------------------------------------
270
271 allFPArgRegs :: [Reg]
272 #if defined(darwin_TARGET_OS)
273 allFPArgRegs = map (regSingle . fReg) [1..13]
274
275 #elif defined(linux_TARGET_OS)
276 allFPArgRegs = map (regSingle . fReg) [1..8]
277
278 #else
279 allFPArgRegs = panic "PPC.Regs.allFPArgRegs: not defined for this architecture"
280
281 #endif
282
283 fits16Bits :: Integral a => a -> Bool
284 fits16Bits x = x >= -32768 && x < 32768
285
286 makeImmediate :: Integral a => Width -> Bool -> a -> Maybe Imm
287 makeImmediate rep signed x = fmap ImmInt (toI16 rep signed)
288 where
289 narrow W32 False = fromIntegral (fromIntegral x :: Word32)
290 narrow W16 False = fromIntegral (fromIntegral x :: Word16)
291 narrow W8 False = fromIntegral (fromIntegral x :: Word8)
292 narrow W32 True = fromIntegral (fromIntegral x :: Int32)
293 narrow W16 True = fromIntegral (fromIntegral x :: Int16)
294 narrow W8 True = fromIntegral (fromIntegral x :: Int8)
295 narrow _ _ = panic "PPC.Regs.narrow: no match"
296
297 narrowed = narrow rep signed
298
299 toI16 W32 True
300 | narrowed >= -32768 && narrowed < 32768 = Just narrowed
301 | otherwise = Nothing
302 toI16 W32 False
303 | narrowed >= 0 && narrowed < 65536 = Just narrowed
304 | otherwise = Nothing
305 toI16 _ _ = Just narrowed
306
307
308 {-
309 The PowerPC has 64 registers of interest; 32 integer registers and 32 floating
310 point registers.
311 -}
312
313 fReg :: Int -> RegNo
314 fReg x = (32 + x)
315
316 sp, r3, r4, r27, r28, f1, f20, f21 :: Reg
317 sp = regSingle 1
318 r3 = regSingle 3
319 r4 = regSingle 4
320 r27 = regSingle 27
321 r28 = regSingle 28
322 f1 = regSingle $ fReg 1
323 f20 = regSingle $ fReg 20
324 f21 = regSingle $ fReg 21
325
326
327
328 -- horror show -----------------------------------------------------------------
329 freeReg :: RegNo -> FastBool
330 globalRegMaybe :: GlobalReg -> Maybe Reg
331
332
333 #if powerpc_TARGET_ARCH
334 #define r0 0
335 #define r1 1
336 #define r2 2
337 #define r3 3
338 #define r4 4
339 #define r5 5
340 #define r6 6
341 #define r7 7
342 #define r8 8
343 #define r9 9
344 #define r10 10
345 #define r11 11
346 #define r12 12
347 #define r13 13
348 #define r14 14
349 #define r15 15
350 #define r16 16
351 #define r17 17
352 #define r18 18
353 #define r19 19
354 #define r20 20
355 #define r21 21
356 #define r22 22
357 #define r23 23
358 #define r24 24
359 #define r25 25
360 #define r26 26
361 #define r27 27
362 #define r28 28
363 #define r29 29
364 #define r30 30
365 #define r31 31
366
367 #ifdef darwin_TARGET_OS
368 #define f0 32
369 #define f1 33
370 #define f2 34
371 #define f3 35
372 #define f4 36
373 #define f5 37
374 #define f6 38
375 #define f7 39
376 #define f8 40
377 #define f9 41
378 #define f10 42
379 #define f11 43
380 #define f12 44
381 #define f13 45
382 #define f14 46
383 #define f15 47
384 #define f16 48
385 #define f17 49
386 #define f18 50
387 #define f19 51
388 #define f20 52
389 #define f21 53
390 #define f22 54
391 #define f23 55
392 #define f24 56
393 #define f25 57
394 #define f26 58
395 #define f27 59
396 #define f28 60
397 #define f29 61
398 #define f30 62
399 #define f31 63
400 #else
401 #define fr0 32
402 #define fr1 33
403 #define fr2 34
404 #define fr3 35
405 #define fr4 36
406 #define fr5 37
407 #define fr6 38
408 #define fr7 39
409 #define fr8 40
410 #define fr9 41
411 #define fr10 42
412 #define fr11 43
413 #define fr12 44
414 #define fr13 45
415 #define fr14 46
416 #define fr15 47
417 #define fr16 48
418 #define fr17 49
419 #define fr18 50
420 #define fr19 51
421 #define fr20 52
422 #define fr21 53
423 #define fr22 54
424 #define fr23 55
425 #define fr24 56
426 #define fr25 57
427 #define fr26 58
428 #define fr27 59
429 #define fr28 60
430 #define fr29 61
431 #define fr30 62
432 #define fr31 63
433 #endif
434
435
436
437 freeReg 0 = fastBool False -- Hack: r0 can't be used in all insns, but it's actually free
438 freeReg 1 = fastBool False -- The Stack Pointer
439 #if !darwin_TARGET_OS
440 -- most non-darwin powerpc OSes use r2 as a TOC pointer or something like that
441 freeReg 2 = fastBool False
442 #endif
443
444 #ifdef REG_Base
445 freeReg REG_Base = fastBool False
446 #endif
447 #ifdef REG_R1
448 freeReg REG_R1 = fastBool False
449 #endif
450 #ifdef REG_R2
451 freeReg REG_R2 = fastBool False
452 #endif
453 #ifdef REG_R3
454 freeReg REG_R3 = fastBool False
455 #endif
456 #ifdef REG_R4
457 freeReg REG_R4 = fastBool False
458 #endif
459 #ifdef REG_R5
460 freeReg REG_R5 = fastBool False
461 #endif
462 #ifdef REG_R6
463 freeReg REG_R6 = fastBool False
464 #endif
465 #ifdef REG_R7
466 freeReg REG_R7 = fastBool False
467 #endif
468 #ifdef REG_R8
469 freeReg REG_R8 = fastBool False
470 #endif
471 #ifdef REG_F1
472 freeReg REG_F1 = fastBool False
473 #endif
474 #ifdef REG_F2
475 freeReg REG_F2 = fastBool False
476 #endif
477 #ifdef REG_F3
478 freeReg REG_F3 = fastBool False
479 #endif
480 #ifdef REG_F4
481 freeReg REG_F4 = fastBool False
482 #endif
483 #ifdef REG_D1
484 freeReg REG_D1 = fastBool False
485 #endif
486 #ifdef REG_D2
487 freeReg REG_D2 = fastBool False
488 #endif
489 #ifdef REG_Sp
490 freeReg REG_Sp = fastBool False
491 #endif
492 #ifdef REG_Su
493 freeReg REG_Su = fastBool False
494 #endif
495 #ifdef REG_SpLim
496 freeReg REG_SpLim = fastBool False
497 #endif
498 #ifdef REG_Hp
499 freeReg REG_Hp = fastBool False
500 #endif
501 #ifdef REG_HpLim
502 freeReg REG_HpLim = fastBool False
503 #endif
504 freeReg _ = fastBool True
505
506
507 -- | Returns 'Nothing' if this global register is not stored
508 -- in a real machine register, otherwise returns @'Just' reg@, where
509 -- reg is the machine register it is stored in.
510
511
512 #ifdef REG_Base
513 globalRegMaybe BaseReg = Just (regSingle REG_Base)
514 #endif
515 #ifdef REG_R1
516 globalRegMaybe (VanillaReg 1 _) = Just (regSingle REG_R1)
517 #endif
518 #ifdef REG_R2
519 globalRegMaybe (VanillaReg 2 _) = Just (regSingle REG_R2)
520 #endif
521 #ifdef REG_R3
522 globalRegMaybe (VanillaReg 3 _) = Just (regSingle REG_R3)
523 #endif
524 #ifdef REG_R4
525 globalRegMaybe (VanillaReg 4 _) = Just (regSingle REG_R4)
526 #endif
527 #ifdef REG_R5
528 globalRegMaybe (VanillaReg 5 _) = Just (regSingle REG_R5)
529 #endif
530 #ifdef REG_R6
531 globalRegMaybe (VanillaReg 6 _) = Just (regSingle REG_R6)
532 #endif
533 #ifdef REG_R7
534 globalRegMaybe (VanillaReg 7 _) = Just (regSingle REG_R7)
535 #endif
536 #ifdef REG_R8
537 globalRegMaybe (VanillaReg 8 _) = Just (regSingle REG_R8)
538 #endif
539 #ifdef REG_R9
540 globalRegMaybe (VanillaReg 9 _) = Just (regSingle REG_R9)
541 #endif
542 #ifdef REG_R10
543 globalRegMaybe (VanillaReg 10 _) = Just (regSingle REG_R10)
544 #endif
545 #ifdef REG_F1
546 globalRegMaybe (FloatReg 1) = Just (regSingle REG_F1)
547 #endif
548 #ifdef REG_F2
549 globalRegMaybe (FloatReg 2) = Just (regSingle REG_F2)
550 #endif
551 #ifdef REG_F3
552 globalRegMaybe (FloatReg 3) = Just (regSingle REG_F3)
553 #endif
554 #ifdef REG_F4
555 globalRegMaybe (FloatReg 4) = Just (regSingle REG_F4)
556 #endif
557 #ifdef REG_D1
558 globalRegMaybe (DoubleReg 1) = Just (regSingle REG_D1)
559 #endif
560 #ifdef REG_D2
561 globalRegMaybe (DoubleReg 2) = Just (regSingle REG_D2)
562 #endif
563 #ifdef REG_Sp
564 globalRegMaybe Sp = Just (regSingle REG_Sp)
565 #endif
566 #ifdef REG_Lng1
567 globalRegMaybe (LongReg 1) = Just (regSingle REG_Lng1)
568 #endif
569 #ifdef REG_Lng2
570 globalRegMaybe (LongReg 2) = Just (regSingle REG_Lng2)
571 #endif
572 #ifdef REG_SpLim
573 globalRegMaybe SpLim = Just (regSingle REG_SpLim)
574 #endif
575 #ifdef REG_Hp
576 globalRegMaybe Hp = Just (regSingle REG_Hp)
577 #endif
578 #ifdef REG_HpLim
579 globalRegMaybe HpLim = Just (regSingle REG_HpLim)
580 #endif
581 #ifdef REG_CurrentTSO
582 globalRegMaybe CurrentTSO = Just (regSingle REG_CurrentTSO)
583 #endif
584 #ifdef REG_CurrentNursery
585 globalRegMaybe CurrentNursery = Just (regSingle REG_CurrentNursery)
586 #endif
587 globalRegMaybe _ = Nothing
588
589
590 #else /* powerpc_TARGET_ARCH */
591
592 freeReg _ = 0#
593 globalRegMaybe _ = panic "PPC.Regs.globalRegMaybe: not defined"
594
595 #endif /* powerpc_TARGET_ARCH */
596
597
598 -- We map STG registers onto appropriate CmmExprs. Either they map
599 -- to real machine registers or stored as offsets from BaseReg. Given
600 -- a GlobalReg, get_GlobalReg_reg_or_addr produces either the real
601 -- register it is in, on this platform, or a CmmExpr denoting the
602 -- address in the register table holding it.
603 -- (See also get_GlobalReg_addr in CgUtils.)
604
605 get_GlobalReg_reg_or_addr :: GlobalReg -> Either Reg CmmExpr
606 get_GlobalReg_reg_or_addr mid
607 = case globalRegMaybe mid of
608 Just rr -> Left rr
609 Nothing -> Right (get_GlobalReg_addr mid)
610
611
612 -- allocatableRegs is allMachRegNos with the fixed-use regs removed.
613 -- i.e., these are the regs for which we are prepared to allow the
614 -- register allocator to attempt to map VRegs to.
615 allocatableRegs :: [RealReg]
616 allocatableRegs
617 = let isFree i = isFastTrue (freeReg i)
618 in map RealRegSingle $ filter isFree allMachRegNos