X-Git-Url: http://git.haskell.org/ghc.git/blobdiff_plain/648673209429c0f4bf1e77aa5e69fb97f91933a7..159a1a2b7501b149fadfc0cd1940fab6bf030691:/includes/stg/MachRegs.h diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h index 34685a3..b1a0ef0 100644 --- a/includes/stg/MachRegs.h +++ b/includes/stg/MachRegs.h @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------------- * - * (c) The GHC Team, 1998-2011 + * (c) The GHC Team, 1998-2014 * * Registers used in STG code. Might or might not correspond to * actual machine registers. @@ -8,7 +8,7 @@ * Do not #include this file directly: #include "Rts.h" instead. * * To understand the structure of the RTS headers, see the wiki: - * http://hackage.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes + * http://ghc.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes * * ---------------------------------------------------------------------------*/ @@ -19,43 +19,41 @@ * only in here please. */ -/* - * Defining NO_REGS causes no global registers to be used. NO_REGS is +/* + * Undefine these as a precaution: some of them were found to be + * defined by system headers on ARM/Linux. + */ +#undef REG_R1 +#undef REG_R2 +#undef REG_R3 +#undef REG_R4 +#undef REG_R5 +#undef REG_R6 +#undef REG_R7 +#undef REG_R8 +#undef REG_R9 +#undef REG_R10 + +/* + * Defining MACHREGS_NO_REGS to 1 causes no global registers to be used. + * MACHREGS_NO_REGS is typically controlled by NO_REGS, which is * typically defined by GHC, via a command-line option passed to gcc, * when the -funregisterised flag is given. * - * NB. When NO_REGS is on, calling & return conventions may be + * NB. When MACHREGS_NO_REGS to 1, calling & return conventions may be * different. For example, all function arguments will be passed on * the stack, and components of an unboxed tuple will be returned on * the stack rather than in registers. */ -#ifndef NO_REGS +#if MACHREGS_NO_REGS == 1 -/* NOTE: when testing the platform in this file we must test either - * *_HOST_ARCH and *_TARGET_ARCH, depending on whether COMPILING_GHC - * is set. This is because when we're compiling the RTS and HC code, - * the platform we're running on is the HOST, but when compiling GHC - * we want to know about the register mapping on the TARGET platform. - */ -#ifdef COMPILING_GHC -#define i386_REGS i386_TARGET_ARCH -#define x86_64_REGS x86_64_TARGET_ARCH -#define powerpc_REGS (powerpc_TARGET_ARCH || powerpc64_TARGET_ARCH || rs6000_TARGET_ARCH) -#define sparc_REGS sparc_TARGET_ARCH -#define arm_REGS arm_TARGET_ARCH -#define darwin_REGS darwin_TARGET_OS -#else -#define i386_REGS i386_HOST_ARCH -#define x86_64_REGS x86_64_HOST_ARCH -#define powerpc_REGS (powerpc_HOST_ARCH || powerpc64_HOST_ARCH || rs6000_HOST_ARCH) -#define sparc_REGS sparc_HOST_ARCH -#define arm_REGS arm_HOST_ARCH -#define darwin_REGS darwin_HOST_OS -#endif +/* Nothing */ + +#elif MACHREGS_NO_REGS == 0 /* ---------------------------------------------------------------------------- Caller saves and callee-saves regs. - + Caller-saves regs have to be saved around C-calls made from STG land, so this file defines CALLER_SAVES_ for each that is designated caller-saves in that machine's C calling convention. @@ -84,53 +82,70 @@ Leaving SpLim out of the picture. -------------------------------------------------------------------------- */ -#if i386_REGS +#if MACHREGS_i386 #define REG(x) __asm__("%" #x) #ifndef not_doing_dynamic_linking #define REG_Base ebx #endif -#define REG_Sp ebp +#define REG_Sp ebp #ifndef STOLEN_X86_REGS #define STOLEN_X86_REGS 4 #endif #if STOLEN_X86_REGS >= 3 -# define REG_R1 esi +# define REG_R1 esi #endif #if STOLEN_X86_REGS >= 4 # define REG_Hp edi #endif +#define REG_MachSp esp -#define MAX_REAL_VANILLA_REG 1 /* always, since it defines the entry conv */ +#define REG_XMM1 xmm0 +#define REG_XMM2 xmm1 +#define REG_XMM3 xmm2 +#define REG_XMM4 xmm3 + +#define REG_YMM1 ymm0 +#define REG_YMM2 ymm1 +#define REG_YMM3 ymm2 +#define REG_YMM4 ymm3 + +#define REG_ZMM1 zmm0 +#define REG_ZMM2 zmm1 +#define REG_ZMM3 zmm2 +#define REG_ZMM4 zmm3 + +#define MAX_REAL_VANILLA_REG 1 /* always, since it defines the entry conv */ #define MAX_REAL_FLOAT_REG 0 #define MAX_REAL_DOUBLE_REG 0 #define MAX_REAL_LONG_REG 0 - -#endif /* iX86 */ +#define MAX_REAL_XMM_REG 4 +#define MAX_REAL_YMM_REG 4 +#define MAX_REAL_ZMM_REG 4 /* ----------------------------------------------------------------------------- The x86-64 register mapping - %rax caller-saves, don't steal this one - %rbx YES + %rax caller-saves, don't steal this one + %rbx YES %rcx arg reg, caller-saves - %rdx arg reg, caller-saves - %rsi arg reg, caller-saves - %rdi arg reg, caller-saves - %rbp YES (our *prime* register) - %rsp (unavailable - stack pointer) + %rdx arg reg, caller-saves + %rsi arg reg, caller-saves + %rdi arg reg, caller-saves + %rbp YES (our *prime* register) + %rsp (unavailable - stack pointer) %r8 arg reg, caller-saves - %r9 arg reg, caller-saves + %r9 arg reg, caller-saves %r10 caller-saves - %r11 caller-saves - %r12 YES - %r13 YES - %r14 YES - %r15 YES + %r11 caller-saves + %r12 YES + %r13 YES + %r14 YES + %r15 YES %xmm0-7 arg regs, caller-saves %xmm8-15 caller-saves @@ -141,7 +156,7 @@ --------------------------------------------------------------------------- */ -#if x86_64_REGS +#elif MACHREGS_x86_64 #define REG(x) __asm__("%" #x) @@ -155,17 +170,53 @@ #define REG_R5 r8 #define REG_R6 r9 #define REG_SpLim r15 +#define REG_MachSp rsp + +/* +Map both Fn and Dn to register xmmn so that we can pass a function any +combination of up to six Float# or Double# arguments without touching +the stack. See Note [Overlapping global registers] for implications. +*/ #define REG_F1 xmm1 #define REG_F2 xmm2 #define REG_F3 xmm3 #define REG_F4 xmm4 - -#define REG_D1 xmm5 -#define REG_D2 xmm6 - +#define REG_F5 xmm5 +#define REG_F6 xmm6 + +#define REG_D1 xmm1 +#define REG_D2 xmm2 +#define REG_D3 xmm3 +#define REG_D4 xmm4 +#define REG_D5 xmm5 +#define REG_D6 xmm6 + +#define REG_XMM1 xmm1 +#define REG_XMM2 xmm2 +#define REG_XMM3 xmm3 +#define REG_XMM4 xmm4 +#define REG_XMM5 xmm5 +#define REG_XMM6 xmm6 + +#define REG_YMM1 ymm1 +#define REG_YMM2 ymm2 +#define REG_YMM3 ymm3 +#define REG_YMM4 ymm4 +#define REG_YMM5 ymm5 +#define REG_YMM6 ymm6 + +#define REG_ZMM1 zmm1 +#define REG_ZMM2 zmm2 +#define REG_ZMM3 zmm3 +#define REG_ZMM4 zmm4 +#define REG_ZMM5 zmm5 +#define REG_ZMM6 zmm6 + +#if !defined(mingw32_HOST_OS) #define CALLER_SAVES_R3 #define CALLER_SAVES_R4 +#endif #define CALLER_SAVES_R5 #define CALLER_SAVES_R6 @@ -173,91 +224,127 @@ #define CALLER_SAVES_F2 #define CALLER_SAVES_F3 #define CALLER_SAVES_F4 +#define CALLER_SAVES_F5 +#if !defined(mingw32_HOST_OS) +#define CALLER_SAVES_F6 +#endif #define CALLER_SAVES_D1 #define CALLER_SAVES_D2 +#define CALLER_SAVES_D3 +#define CALLER_SAVES_D4 +#define CALLER_SAVES_D5 +#if !defined(mingw32_HOST_OS) +#define CALLER_SAVES_D6 +#endif + +#define CALLER_SAVES_XMM1 +#define CALLER_SAVES_XMM2 +#define CALLER_SAVES_XMM3 +#define CALLER_SAVES_XMM4 +#define CALLER_SAVES_XMM5 +#if !defined(mingw32_HOST_OS) +#define CALLER_SAVES_XMM6 +#endif + +#define CALLER_SAVES_YMM1 +#define CALLER_SAVES_YMM2 +#define CALLER_SAVES_YMM3 +#define CALLER_SAVES_YMM4 +#define CALLER_SAVES_YMM5 +#if !defined(mingw32_HOST_OS) +#define CALLER_SAVES_YMM6 +#endif + +#define CALLER_SAVES_ZMM1 +#define CALLER_SAVES_ZMM2 +#define CALLER_SAVES_ZMM3 +#define CALLER_SAVES_ZMM4 +#define CALLER_SAVES_ZMM5 +#if !defined(mingw32_HOST_OS) +#define CALLER_SAVES_ZMM6 +#endif #define MAX_REAL_VANILLA_REG 6 -#define MAX_REAL_FLOAT_REG 4 -#define MAX_REAL_DOUBLE_REG 2 +#define MAX_REAL_FLOAT_REG 6 +#define MAX_REAL_DOUBLE_REG 6 #define MAX_REAL_LONG_REG 0 - -#endif /* x86_64 */ +#define MAX_REAL_XMM_REG 6 +#define MAX_REAL_YMM_REG 6 +#define MAX_REAL_ZMM_REG 6 /* ----------------------------------------------------------------------------- The PowerPC register mapping - 0 system glue? (caller-save, volatile) - 1 SP (callee-save, non-volatile) + 0 system glue? (caller-save, volatile) + 1 SP (callee-save, non-volatile) 2 AIX, powerpc64-linux: RTOC (a strange special case) - darwin: + darwin: (caller-save, volatile) powerpc32-linux: reserved for use by system - - 3-10 args/return (caller-save, volatile) - 11,12 system glue? (caller-save, volatile) + + 3-10 args/return (caller-save, volatile) + 11,12 system glue? (caller-save, volatile) 13 on 64-bit: reserved for thread state pointer on 32-bit: (callee-save, non-volatile) - 14-31 (callee-save, non-volatile) - - f0 (caller-save, volatile) - f1-f13 args/return (caller-save, volatile) - f14-f31 (callee-save, non-volatile) - + 14-31 (callee-save, non-volatile) + + f0 (caller-save, volatile) + f1-f13 args/return (caller-save, volatile) + f14-f31 (callee-save, non-volatile) + \tr{14}--\tr{31} are wonderful callee-save registers on all ppc OSes. \tr{0}--\tr{12} are caller-save registers. - + \tr{%f14}--\tr{%f31} are callee-save floating-point registers. - + We can do the Whole Business with callee-save registers only! -------------------------------------------------------------------------- */ -#if powerpc_REGS +#elif MACHREGS_powerpc #define REG(x) __asm__(#x) -#define REG_R1 r14 -#define REG_R2 r15 -#define REG_R3 r16 -#define REG_R4 r17 -#define REG_R5 r18 -#define REG_R6 r19 -#define REG_R7 r20 -#define REG_R8 r21 +#define REG_R1 r14 +#define REG_R2 r15 +#define REG_R3 r16 +#define REG_R4 r17 +#define REG_R5 r18 +#define REG_R6 r19 +#define REG_R7 r20 +#define REG_R8 r21 -#if darwin_REGS +#if MACHREGS_darwin -#define REG_F1 f14 -#define REG_F2 f15 -#define REG_F3 f16 -#define REG_F4 f17 +#define REG_F1 f14 +#define REG_F2 f15 +#define REG_F3 f16 +#define REG_F4 f17 -#define REG_D1 f18 -#define REG_D2 f19 +#define REG_D1 f18 +#define REG_D2 f19 #else -#define REG_F1 fr14 -#define REG_F2 fr15 -#define REG_F3 fr16 -#define REG_F4 fr17 +#define REG_F1 fr14 +#define REG_F2 fr15 +#define REG_F3 fr16 +#define REG_F4 fr17 -#define REG_D1 fr18 -#define REG_D2 fr19 +#define REG_D1 fr18 +#define REG_D2 fr19 #endif -#define REG_Sp r22 -#define REG_SpLim r24 +#define REG_Sp r22 +#define REG_SpLim r24 -#define REG_Hp r25 +#define REG_Hp r25 #define REG_Base r27 -#endif /* powerpc */ - /* ----------------------------------------------------------------------------- The Sun SPARC register mapping @@ -269,46 +356,46 @@ Threaded World, we essentially ``shut down'' the register-window mechanism---the window doesn't move at all while in this World. It *does* move, of course, if we call out to arbitrary~C... - + The %i, %l, and %o registers (8 each) are the input, local, and output registers visible in one register window. The 8 %g (global) registers are visible all the time. - + zero: always zero scratch: volatile across C-fn calls. used by linker. app: usable by application system: reserved for system - + alloc: allocated to in the register allocator, intra-closure only - + GHC usage v8 ABI v9 ABI Global - %g0 zero zero zero - %g1 alloc scratch scrach - %g2 alloc app app - %g3 alloc app app - %g4 alloc app scratch - %g5 system scratch + %g0 zero zero zero + %g1 alloc scratch scrach + %g2 alloc app app + %g3 alloc app app + %g4 alloc app scratch + %g5 system scratch %g6 system system %g7 system system Output: can be zapped by callee - %o0-o5 alloc caller saves + %o0-o5 alloc caller saves %o6 C stack ptr %o7 C ret addr - + Local: maintained by register windowing mechanism - %l0 alloc - %l1 R1 - %l2 R2 - %l3 R3 - %l4 R4 - %l5 R5 - %l6 alloc - %l7 alloc + %l0 alloc + %l1 R1 + %l2 R2 + %l3 R3 + %l4 R4 + %l5 R5 + %l6 alloc + %l7 alloc Input - %i0 Sp + %i0 Sp %i1 Base %i2 SpLim %i3 Hp @@ -316,11 +403,11 @@ %i5 R6 %i6 C frame ptr %i7 C ret addr - + The paired nature of the floating point registers causes complications for the native code generator. For convenience, we pretend that the first 22 fp regs %f0 .. %f21 are actually 11 double regs, and the remaining 10 are - float (single) regs. The NCG acts accordingly. That means that the + float (single) regs. The NCG acts accordingly. That means that the following FP assignment is rather fragile, and should only be changed with extreme care. The current scheme is: @@ -349,7 +436,7 @@ -------------------------------------------------------------------------- */ -#if sparc_REGS +#elif MACHREGS_sparc #define REG(x) __asm__("%" #x) @@ -362,47 +449,36 @@ #define CALLER_SAVES_D1 #define CALLER_SAVES_D2 -#define REG_R1 l1 -#define REG_R2 l2 -#define REG_R3 l3 -#define REG_R4 l4 -#define REG_R5 l5 -#define REG_R6 i5 +#define REG_R1 l1 +#define REG_R2 l2 +#define REG_R3 l3 +#define REG_R4 l4 +#define REG_R5 l5 +#define REG_R6 i5 -#define REG_F1 f22 -#define REG_F2 f23 -#define REG_F3 f24 -#define REG_F4 f25 +#define REG_F1 f22 +#define REG_F2 f23 +#define REG_F3 f24 +#define REG_F4 f25 -/* for each of the double arg regs, +/* for each of the double arg regs, Dn_2 is the high half. */ - -#define REG_D1 f2 -#define REG_D1_2 f3 -#define REG_D2 f4 -#define REG_D2_2 f5 +#define REG_D1 f2 +#define REG_D1_2 f3 -#define REG_Sp i0 -#define REG_SpLim i2 +#define REG_D2 f4 +#define REG_D2_2 f5 -#define REG_Hp i3 +#define REG_Sp i0 +#define REG_SpLim i2 -#define REG_Base i1 +#define REG_Hp i3 -/* -#define NCG_SpillTmp_I1 g1 -#define NCG_SpillTmp_I2 g2 -#define NCG_SpillTmp_F1 f26 -#define NCG_SpillTmp_F2 f27 -#define NCG_SpillTmp_D1 f6 -#define NCG_SpillTmp_D2 f8 -*/ +#define REG_Base i1 #define NCG_FirstFloatReg f22 -#endif /* sparc */ - /* ----------------------------------------------------------------------------- The ARM EABI register mapping @@ -438,8 +514,7 @@ d16-d31/q8-q15 Argument / result/ scratch registers ----------------------------------------------------------------------------- */ - -#if arm_REGS +#elif MACHREGS_arm #define REG(x) __asm__(#x) @@ -464,9 +539,74 @@ #define REG_D2 d11 #endif -#endif /* arm */ +/* ----------------------------------------------------------------------------- + The ARMv8/AArch64 ABI register mapping + + The AArch64 provides 31 64-bit general purpose registers + and 32 128-bit SIMD/floating point registers. + + General purpose registers (see Chapter 5.1.1 in ARM IHI 0055B) + + Register | Special | Role in the procedure call standard + ---------+---------+------------------------------------ + SP | | The Stack Pointer + r30 | LR | The Link Register + r29 | FP | The Frame Pointer + r19-r28 | | Callee-saved registers + r18 | | The Platform Register, if needed; + | | or temporary register + r17 | IP1 | The second intra-procedure-call temporary register + r16 | IP0 | The first intra-procedure-call scratch register + r9-r15 | | Temporary registers + r8 | | Indirect result location register + r0-r7 | | Parameter/result registers + + + FPU/SIMD registers + + s/d/q/v0-v7 Argument / result/ scratch registers + s/d/q/v8-v15 callee-saved registers (must be preserved across subrutine calls, + but only bottom 64-bit value needs to be preserved) + s/d/q/v16-v31 temporary registers + + ----------------------------------------------------------------------------- */ -#endif /* NO_REGS */ +#elif MACHREGS_aarch64 + +#define REG(x) __asm__(#x) + +#define REG_Base r19 +#define REG_Sp r20 +#define REG_Hp r21 +#define REG_R1 r22 +#define REG_R2 r23 +#define REG_R3 r24 +#define REG_R4 r25 +#define REG_R5 r26 +#define REG_R6 r27 +#define REG_SpLim r28 + +#define REG_F1 s8 +#define REG_F2 s9 +#define REG_F3 s10 +#define REG_F4 s11 + +#define REG_D1 d12 +#define REG_D2 d13 +#define REG_D3 d14 +#define REG_D4 d15 + +#else + +#error Cannot find platform to give register info for + +#endif + +#else + +#error Bad MACHREGS_NO_REGS value + +#endif /* ----------------------------------------------------------------------------- * These constants define how many stg registers will be used for @@ -541,11 +681,29 @@ # endif #endif +#ifndef MAX_REAL_XMM_REG +# if defined(REG_XMM6) +# define MAX_REAL_XMM_REG 6 +# elif defined(REG_XMM5) +# define MAX_REAL_XMM_REG 5 +# elif defined(REG_XMM4) +# define MAX_REAL_XMM_REG 4 +# elif defined(REG_XMM3) +# define MAX_REAL_XMM_REG 3 +# elif defined(REG_XMM2) +# define MAX_REAL_XMM_REG 2 +# elif defined(REG_XMM1) +# define MAX_REAL_XMM_REG 1 +# else +# define MAX_REAL_XMM_REG 0 +# endif +#endif + /* define NO_ARG_REGS if we have no argument registers at all (we can * optimise certain code paths using this predicate). */ #if MAX_REAL_VANILLA_REG < 2 -#define NO_ARG_REGS +#define NO_ARG_REGS #else #undef NO_ARG_REGS #endif