Fix caller/callee register saving on Win64
[ghc.git] / includes / stg / MachRegs.h
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2011
4 *
5 * Registers used in STG code. Might or might not correspond to
6 * actual machine registers.
7 *
8 * Do not #include this file directly: #include "Rts.h" instead.
9 *
10 * To understand the structure of the RTS headers, see the wiki:
11 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes
12 *
13 * ---------------------------------------------------------------------------*/
14
15 #ifndef MACHREGS_H
16 #define MACHREGS_H
17
18 /* This file is #included into Haskell code in the compiler: #defines
19 * only in here please.
20 */
21
22 /*
23 * Defining NO_REGS causes no global registers to be used. NO_REGS is
24 * typically defined by GHC, via a command-line option passed to gcc,
25 * when the -funregisterised flag is given.
26 *
27 * NB. When NO_REGS is on, calling & return conventions may be
28 * different. For example, all function arguments will be passed on
29 * the stack, and components of an unboxed tuple will be returned on
30 * the stack rather than in registers.
31 */
32 #ifndef NO_REGS
33
34 /* NOTE: when testing the platform in this file we must test either
35 * *_HOST_ARCH and *_TARGET_ARCH, depending on whether COMPILING_GHC
36 * is set. This is because when we're compiling the RTS and HC code,
37 * the platform we're running on is the HOST, but when compiling GHC
38 * we want to know about the register mapping on the TARGET platform.
39 */
40 #ifdef COMPILING_GHC
41 #define i386_REGS i386_TARGET_ARCH
42 #define x86_64_REGS x86_64_TARGET_ARCH
43 #define powerpc_REGS (powerpc_TARGET_ARCH || powerpc64_TARGET_ARCH || rs6000_TARGET_ARCH)
44 #define sparc_REGS sparc_TARGET_ARCH
45 #define arm_REGS arm_TARGET_ARCH
46 #define darwin_REGS darwin_TARGET_OS
47 #else
48 #define i386_REGS i386_HOST_ARCH
49 #define x86_64_REGS x86_64_HOST_ARCH
50 #define powerpc_REGS (powerpc_HOST_ARCH || powerpc64_HOST_ARCH || rs6000_HOST_ARCH)
51 #define sparc_REGS sparc_HOST_ARCH
52 #define arm_REGS arm_HOST_ARCH
53 #define darwin_REGS darwin_HOST_OS
54 #endif
55
56 /* ----------------------------------------------------------------------------
57 Caller saves and callee-saves regs.
58
59 Caller-saves regs have to be saved around C-calls made from STG
60 land, so this file defines CALLER_SAVES_<reg> for each <reg> that
61 is designated caller-saves in that machine's C calling convention.
62
63 As it stands, the only registers that are ever marked caller saves
64 are the RX, FX, DX and USER registers; as a result, if you
65 decide to caller save a system register (e.g. SP, HP, etc), note that
66 this code path is completely untested! -- EZY
67 -------------------------------------------------------------------------- */
68
69 /* -----------------------------------------------------------------------------
70 The x86 register mapping
71
72 Ok, we've only got 6 general purpose registers, a frame pointer and a
73 stack pointer. \tr{%eax} and \tr{%edx} are return values from C functions,
74 hence they get trashed across ccalls and are caller saves. \tr{%ebx},
75 \tr{%esi}, \tr{%edi}, \tr{%ebp} are all callee-saves.
76
77 Reg STG-Reg
78 ---------------
79 ebx Base
80 ebp Sp
81 esi R1
82 edi Hp
83
84 Leaving SpLim out of the picture.
85 -------------------------------------------------------------------------- */
86
87 #if i386_REGS
88
89 #define REG(x) __asm__("%" #x)
90
91 #ifndef not_doing_dynamic_linking
92 #define REG_Base ebx
93 #endif
94 #define REG_Sp ebp
95
96 #ifndef STOLEN_X86_REGS
97 #define STOLEN_X86_REGS 4
98 #endif
99
100 #if STOLEN_X86_REGS >= 3
101 # define REG_R1 esi
102 #endif
103
104 #if STOLEN_X86_REGS >= 4
105 # define REG_Hp edi
106 #endif
107
108 #define MAX_REAL_VANILLA_REG 1 /* always, since it defines the entry conv */
109 #define MAX_REAL_FLOAT_REG 0
110 #define MAX_REAL_DOUBLE_REG 0
111 #define MAX_REAL_LONG_REG 0
112
113 #endif /* iX86 */
114
115 /* -----------------------------------------------------------------------------
116 The x86-64 register mapping
117
118 %rax caller-saves, don't steal this one
119 %rbx YES
120 %rcx arg reg, caller-saves
121 %rdx arg reg, caller-saves
122 %rsi arg reg, caller-saves
123 %rdi arg reg, caller-saves
124 %rbp YES (our *prime* register)
125 %rsp (unavailable - stack pointer)
126 %r8 arg reg, caller-saves
127 %r9 arg reg, caller-saves
128 %r10 caller-saves
129 %r11 caller-saves
130 %r12 YES
131 %r13 YES
132 %r14 YES
133 %r15 YES
134
135 %xmm0-7 arg regs, caller-saves
136 %xmm8-15 caller-saves
137
138 Use the caller-saves regs for Rn, because we don't always have to
139 save those (as opposed to Sp/Hp/SpLim etc. which always have to be
140 saved).
141
142 --------------------------------------------------------------------------- */
143
144 #if x86_64_REGS
145
146 #define REG(x) __asm__("%" #x)
147
148 #define REG_Base r13
149 #define REG_Sp rbp
150 #define REG_Hp r12
151 #define REG_R1 rbx
152 #define REG_R2 r14
153 #define REG_R3 rsi
154 #define REG_R4 rdi
155 #define REG_R5 r8
156 #define REG_R6 r9
157 #define REG_SpLim r15
158
159 #define REG_F1 xmm1
160 #define REG_F2 xmm2
161 #define REG_F3 xmm3
162 #define REG_F4 xmm4
163
164 #define REG_D1 xmm5
165 #define REG_D2 xmm6
166
167 #if !defined(mingw32_HOST_OS)
168 #define CALLER_SAVES_R3
169 #define CALLER_SAVES_R4
170 #endif
171 #define CALLER_SAVES_R5
172 #define CALLER_SAVES_R6
173
174 #define CALLER_SAVES_F1
175 #define CALLER_SAVES_F2
176 #define CALLER_SAVES_F3
177 #define CALLER_SAVES_F4
178
179 #define CALLER_SAVES_D1
180 #if !defined(mingw32_HOST_OS)
181 #define CALLER_SAVES_D2
182 #endif
183
184 #define MAX_REAL_VANILLA_REG 6
185 #define MAX_REAL_FLOAT_REG 4
186 #define MAX_REAL_DOUBLE_REG 2
187 #define MAX_REAL_LONG_REG 0
188
189 #endif /* x86_64 */
190
191 /* -----------------------------------------------------------------------------
192 The PowerPC register mapping
193
194 0 system glue? (caller-save, volatile)
195 1 SP (callee-save, non-volatile)
196 2 AIX, powerpc64-linux:
197 RTOC (a strange special case)
198 darwin:
199 (caller-save, volatile)
200 powerpc32-linux:
201 reserved for use by system
202
203 3-10 args/return (caller-save, volatile)
204 11,12 system glue? (caller-save, volatile)
205 13 on 64-bit: reserved for thread state pointer
206 on 32-bit: (callee-save, non-volatile)
207 14-31 (callee-save, non-volatile)
208
209 f0 (caller-save, volatile)
210 f1-f13 args/return (caller-save, volatile)
211 f14-f31 (callee-save, non-volatile)
212
213 \tr{14}--\tr{31} are wonderful callee-save registers on all ppc OSes.
214 \tr{0}--\tr{12} are caller-save registers.
215
216 \tr{%f14}--\tr{%f31} are callee-save floating-point registers.
217
218 We can do the Whole Business with callee-save registers only!
219 -------------------------------------------------------------------------- */
220
221 #if powerpc_REGS
222
223 #define REG(x) __asm__(#x)
224
225 #define REG_R1 r14
226 #define REG_R2 r15
227 #define REG_R3 r16
228 #define REG_R4 r17
229 #define REG_R5 r18
230 #define REG_R6 r19
231 #define REG_R7 r20
232 #define REG_R8 r21
233
234 #if darwin_REGS
235
236 #define REG_F1 f14
237 #define REG_F2 f15
238 #define REG_F3 f16
239 #define REG_F4 f17
240
241 #define REG_D1 f18
242 #define REG_D2 f19
243
244 #else
245
246 #define REG_F1 fr14
247 #define REG_F2 fr15
248 #define REG_F3 fr16
249 #define REG_F4 fr17
250
251 #define REG_D1 fr18
252 #define REG_D2 fr19
253
254 #endif
255
256 #define REG_Sp r22
257 #define REG_SpLim r24
258
259 #define REG_Hp r25
260
261 #define REG_Base r27
262
263 #endif /* powerpc */
264
265 /* -----------------------------------------------------------------------------
266 The Sun SPARC register mapping
267
268 !! IMPORTANT: if you change this register mapping you must also update
269 compiler/nativeGen/SPARC/Regs.hs. That file handles the
270 mapping for the NCG. This one only affects via-c code.
271
272 The SPARC register (window) story: Remember, within the Haskell
273 Threaded World, we essentially ``shut down'' the register-window
274 mechanism---the window doesn't move at all while in this World. It
275 *does* move, of course, if we call out to arbitrary~C...
276
277 The %i, %l, and %o registers (8 each) are the input, local, and
278 output registers visible in one register window. The 8 %g (global)
279 registers are visible all the time.
280
281 zero: always zero
282 scratch: volatile across C-fn calls. used by linker.
283 app: usable by application
284 system: reserved for system
285
286 alloc: allocated to in the register allocator, intra-closure only
287
288 GHC usage v8 ABI v9 ABI
289 Global
290 %g0 zero zero zero
291 %g1 alloc scratch scrach
292 %g2 alloc app app
293 %g3 alloc app app
294 %g4 alloc app scratch
295 %g5 system scratch
296 %g6 system system
297 %g7 system system
298
299 Output: can be zapped by callee
300 %o0-o5 alloc caller saves
301 %o6 C stack ptr
302 %o7 C ret addr
303
304 Local: maintained by register windowing mechanism
305 %l0 alloc
306 %l1 R1
307 %l2 R2
308 %l3 R3
309 %l4 R4
310 %l5 R5
311 %l6 alloc
312 %l7 alloc
313
314 Input
315 %i0 Sp
316 %i1 Base
317 %i2 SpLim
318 %i3 Hp
319 %i4 alloc
320 %i5 R6
321 %i6 C frame ptr
322 %i7 C ret addr
323
324 The paired nature of the floating point registers causes complications for
325 the native code generator. For convenience, we pretend that the first 22
326 fp regs %f0 .. %f21 are actually 11 double regs, and the remaining 10 are
327 float (single) regs. The NCG acts accordingly. That means that the
328 following FP assignment is rather fragile, and should only be changed
329 with extreme care. The current scheme is:
330
331 %f0 /%f1 FP return from C
332 %f2 /%f3 D1
333 %f4 /%f5 D2
334 %f6 /%f7 ncg double spill tmp #1
335 %f8 /%f9 ncg double spill tmp #2
336 %f10/%f11 allocatable
337 %f12/%f13 allocatable
338 %f14/%f15 allocatable
339 %f16/%f17 allocatable
340 %f18/%f19 allocatable
341 %f20/%f21 allocatable
342
343 %f22 F1
344 %f23 F2
345 %f24 F3
346 %f25 F4
347 %f26 ncg single spill tmp #1
348 %f27 ncg single spill tmp #2
349 %f28 allocatable
350 %f29 allocatable
351 %f30 allocatable
352 %f31 allocatable
353
354 -------------------------------------------------------------------------- */
355
356 #if sparc_REGS
357
358 #define REG(x) __asm__("%" #x)
359
360 #define CALLER_SAVES_USER
361
362 #define CALLER_SAVES_F1
363 #define CALLER_SAVES_F2
364 #define CALLER_SAVES_F3
365 #define CALLER_SAVES_F4
366 #define CALLER_SAVES_D1
367 #define CALLER_SAVES_D2
368
369 #define REG_R1 l1
370 #define REG_R2 l2
371 #define REG_R3 l3
372 #define REG_R4 l4
373 #define REG_R5 l5
374 #define REG_R6 i5
375
376 #define REG_F1 f22
377 #define REG_F2 f23
378 #define REG_F3 f24
379 #define REG_F4 f25
380
381 /* for each of the double arg regs,
382 Dn_2 is the high half. */
383
384 #define REG_D1 f2
385 #define REG_D1_2 f3
386
387 #define REG_D2 f4
388 #define REG_D2_2 f5
389
390 #define REG_Sp i0
391 #define REG_SpLim i2
392
393 #define REG_Hp i3
394
395 #define REG_Base i1
396
397 /*
398 #define NCG_SpillTmp_I1 g1
399 #define NCG_SpillTmp_I2 g2
400 #define NCG_SpillTmp_F1 f26
401 #define NCG_SpillTmp_F2 f27
402 #define NCG_SpillTmp_D1 f6
403 #define NCG_SpillTmp_D2 f8
404 */
405
406 #define NCG_FirstFloatReg f22
407
408 #endif /* sparc */
409
410 /* -----------------------------------------------------------------------------
411 The ARM EABI register mapping
412
413 Here we consider ARM mode (i.e. 32bit isns)
414 and also CPU with full VFPv3 implementation
415
416 ARM registers (see Chapter 5.1 in ARM IHI 0042D)
417
418 r15 PC The Program Counter.
419 r14 LR The Link Register.
420 r13 SP The Stack Pointer.
421 r12 IP The Intra-Procedure-call scratch register.
422 r11 v8 Variable-register 8.
423 r10 v7 Variable-register 7.
424 r9 v6/SB/TR Platform register. The meaning of this register is
425 defined by the platform standard.
426 r8 v5 Variable-register 5.
427 r7 v4 Variable register 4.
428 r6 v3 Variable register 3.
429 r5 v2 Variable register 2.
430 r4 v1 Variable register 1.
431 r3 a4 Argument / scratch register 4.
432 r2 a3 Argument / scratch register 3.
433 r1 a2 Argument / result / scratch register 2.
434 r0 a1 Argument / result / scratch register 1.
435
436 VFPv2/VFPv3/NEON registers
437 s0-s15/d0-d7/q0-q3 Argument / result/ scratch registers
438 s16-s31/d8-d15/q4-q7 callee-saved registers (must be preserved across
439 subrutine calls)
440
441 VFPv3/NEON registers (added to the VFPv2 registers set)
442 d16-d31/q8-q15 Argument / result/ scratch registers
443 ----------------------------------------------------------------------------- */
444
445
446 #if arm_REGS
447
448 #define REG(x) __asm__(#x)
449
450 #define REG_Base r4
451 #define REG_Sp r5
452 #define REG_Hp r6
453 #define REG_R1 r7
454 #define REG_R2 r8
455 #define REG_R3 r9
456 #define REG_R4 r10
457 #define REG_SpLim r11
458
459 #if !defined(arm_HOST_ARCH_PRE_ARMv6)
460 /* d8 */
461 #define REG_F1 s16
462 #define REG_F2 s17
463 /* d9 */
464 #define REG_F3 s18
465 #define REG_F4 s19
466
467 #define REG_D1 d10
468 #define REG_D2 d11
469 #endif
470
471 #endif /* arm */
472
473 #endif /* NO_REGS */
474
475 /* -----------------------------------------------------------------------------
476 * These constants define how many stg registers will be used for
477 * passing arguments (and results, in the case of an unboxed-tuple
478 * return).
479 *
480 * We usually set MAX_REAL_VANILLA_REG and co. to be the number of the
481 * highest STG register to occupy a real machine register, otherwise
482 * the calling conventions will needlessly shuffle data between the
483 * stack and memory-resident STG registers. We might occasionally
484 * set these macros to other values for testing, though.
485 *
486 * Registers above these values might still be used, for instance to
487 * communicate with PrimOps and RTS functions.
488 */
489
490 #ifndef MAX_REAL_VANILLA_REG
491 # if defined(REG_R10)
492 # define MAX_REAL_VANILLA_REG 10
493 # elif defined(REG_R9)
494 # define MAX_REAL_VANILLA_REG 9
495 # elif defined(REG_R8)
496 # define MAX_REAL_VANILLA_REG 8
497 # elif defined(REG_R7)
498 # define MAX_REAL_VANILLA_REG 7
499 # elif defined(REG_R6)
500 # define MAX_REAL_VANILLA_REG 6
501 # elif defined(REG_R5)
502 # define MAX_REAL_VANILLA_REG 5
503 # elif defined(REG_R4)
504 # define MAX_REAL_VANILLA_REG 4
505 # elif defined(REG_R3)
506 # define MAX_REAL_VANILLA_REG 3
507 # elif defined(REG_R2)
508 # define MAX_REAL_VANILLA_REG 2
509 # elif defined(REG_R1)
510 # define MAX_REAL_VANILLA_REG 1
511 # else
512 # define MAX_REAL_VANILLA_REG 0
513 # endif
514 #endif
515
516 #ifndef MAX_REAL_FLOAT_REG
517 # if defined(REG_F4)
518 # define MAX_REAL_FLOAT_REG 4
519 # elif defined(REG_F3)
520 # define MAX_REAL_FLOAT_REG 3
521 # elif defined(REG_F2)
522 # define MAX_REAL_FLOAT_REG 2
523 # elif defined(REG_F1)
524 # define MAX_REAL_FLOAT_REG 1
525 # else
526 # define MAX_REAL_FLOAT_REG 0
527 # endif
528 #endif
529
530 #ifndef MAX_REAL_DOUBLE_REG
531 # if defined(REG_D2)
532 # define MAX_REAL_DOUBLE_REG 2
533 # elif defined(REG_D1)
534 # define MAX_REAL_DOUBLE_REG 1
535 # else
536 # define MAX_REAL_DOUBLE_REG 0
537 # endif
538 #endif
539
540 #ifndef MAX_REAL_LONG_REG
541 # if defined(REG_L1)
542 # define MAX_REAL_LONG_REG 1
543 # else
544 # define MAX_REAL_LONG_REG 0
545 # endif
546 #endif
547
548 /* define NO_ARG_REGS if we have no argument registers at all (we can
549 * optimise certain code paths using this predicate).
550 */
551 #if MAX_REAL_VANILLA_REG < 2
552 #define NO_ARG_REGS
553 #else
554 #undef NO_ARG_REGS
555 #endif
556
557 #endif /* MACHREGS_H */