PPC NCG: Use liveness information in CmmCall
[ghc.git] / includes / stg / MachRegs.h
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2014
4 *
5 * Registers used in STG code. Might or might not correspond to
6 * actual machine registers.
7 *
8 * Do not #include this file directly: #include "Rts.h" instead.
9 *
10 * To understand the structure of the RTS headers, see the wiki:
11 * http://ghc.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes
12 *
13 * ---------------------------------------------------------------------------*/
14
15 #pragma once
16
17 /* This file is #included into Haskell code in the compiler: #defines
18 * only in here please.
19 */
20
21 /*
22 * Undefine these as a precaution: some of them were found to be
23 * defined by system headers on ARM/Linux.
24 */
25 #undef REG_R1
26 #undef REG_R2
27 #undef REG_R3
28 #undef REG_R4
29 #undef REG_R5
30 #undef REG_R6
31 #undef REG_R7
32 #undef REG_R8
33 #undef REG_R9
34 #undef REG_R10
35
36 /*
37 * Defining MACHREGS_NO_REGS to 1 causes no global registers to be used.
38 * MACHREGS_NO_REGS is typically controlled by NO_REGS, which is
39 * typically defined by GHC, via a command-line option passed to gcc,
40 * when the -funregisterised flag is given.
41 *
42 * NB. When MACHREGS_NO_REGS to 1, calling & return conventions may be
43 * different. For example, all function arguments will be passed on
44 * the stack, and components of an unboxed tuple will be returned on
45 * the stack rather than in registers.
46 */
47 #if MACHREGS_NO_REGS == 1
48
49 /* Nothing */
50
51 #elif MACHREGS_NO_REGS == 0
52
53 /* ----------------------------------------------------------------------------
54 Caller saves and callee-saves regs.
55
56 Caller-saves regs have to be saved around C-calls made from STG
57 land, so this file defines CALLER_SAVES_<reg> for each <reg> that
58 is designated caller-saves in that machine's C calling convention.
59
60 As it stands, the only registers that are ever marked caller saves
61 are the RX, FX, DX and USER registers; as a result, if you
62 decide to caller save a system register (e.g. SP, HP, etc), note that
63 this code path is completely untested! -- EZY
64 -------------------------------------------------------------------------- */
65
66 /* -----------------------------------------------------------------------------
67 The x86 register mapping
68
69 Ok, we've only got 6 general purpose registers, a frame pointer and a
70 stack pointer. \tr{%eax} and \tr{%edx} are return values from C functions,
71 hence they get trashed across ccalls and are caller saves. \tr{%ebx},
72 \tr{%esi}, \tr{%edi}, \tr{%ebp} are all callee-saves.
73
74 Reg STG-Reg
75 ---------------
76 ebx Base
77 ebp Sp
78 esi R1
79 edi Hp
80
81 Leaving SpLim out of the picture.
82 -------------------------------------------------------------------------- */
83
84 #if defined(MACHREGS_i386)
85
86 #define REG(x) __asm__("%" #x)
87
88 #if !defined(not_doing_dynamic_linking)
89 #define REG_Base ebx
90 #endif
91 #define REG_Sp ebp
92
93 #if !defined(STOLEN_X86_REGS)
94 #define STOLEN_X86_REGS 4
95 #endif
96
97 #if STOLEN_X86_REGS >= 3
98 # define REG_R1 esi
99 #endif
100
101 #if STOLEN_X86_REGS >= 4
102 # define REG_Hp edi
103 #endif
104 #define REG_MachSp esp
105
106 #define REG_XMM1 xmm0
107 #define REG_XMM2 xmm1
108 #define REG_XMM3 xmm2
109 #define REG_XMM4 xmm3
110
111 #define REG_YMM1 ymm0
112 #define REG_YMM2 ymm1
113 #define REG_YMM3 ymm2
114 #define REG_YMM4 ymm3
115
116 #define REG_ZMM1 zmm0
117 #define REG_ZMM2 zmm1
118 #define REG_ZMM3 zmm2
119 #define REG_ZMM4 zmm3
120
121 #define MAX_REAL_VANILLA_REG 1 /* always, since it defines the entry conv */
122 #define MAX_REAL_FLOAT_REG 0
123 #define MAX_REAL_DOUBLE_REG 0
124 #define MAX_REAL_LONG_REG 0
125 #define MAX_REAL_XMM_REG 4
126 #define MAX_REAL_YMM_REG 4
127 #define MAX_REAL_ZMM_REG 4
128
129 /* -----------------------------------------------------------------------------
130 The x86-64 register mapping
131
132 %rax caller-saves, don't steal this one
133 %rbx YES
134 %rcx arg reg, caller-saves
135 %rdx arg reg, caller-saves
136 %rsi arg reg, caller-saves
137 %rdi arg reg, caller-saves
138 %rbp YES (our *prime* register)
139 %rsp (unavailable - stack pointer)
140 %r8 arg reg, caller-saves
141 %r9 arg reg, caller-saves
142 %r10 caller-saves
143 %r11 caller-saves
144 %r12 YES
145 %r13 YES
146 %r14 YES
147 %r15 YES
148
149 %xmm0-7 arg regs, caller-saves
150 %xmm8-15 caller-saves
151
152 Use the caller-saves regs for Rn, because we don't always have to
153 save those (as opposed to Sp/Hp/SpLim etc. which always have to be
154 saved).
155
156 --------------------------------------------------------------------------- */
157
158 #elif defined(MACHREGS_x86_64)
159
160 #define REG(x) __asm__("%" #x)
161
162 #define REG_Base r13
163 #define REG_Sp rbp
164 #define REG_Hp r12
165 #define REG_R1 rbx
166 #define REG_R2 r14
167 #define REG_R3 rsi
168 #define REG_R4 rdi
169 #define REG_R5 r8
170 #define REG_R6 r9
171 #define REG_SpLim r15
172 #define REG_MachSp rsp
173
174 /*
175 Map both Fn and Dn to register xmmn so that we can pass a function any
176 combination of up to six Float# or Double# arguments without touching
177 the stack. See Note [Overlapping global registers] for implications.
178 */
179
180 #define REG_F1 xmm1
181 #define REG_F2 xmm2
182 #define REG_F3 xmm3
183 #define REG_F4 xmm4
184 #define REG_F5 xmm5
185 #define REG_F6 xmm6
186
187 #define REG_D1 xmm1
188 #define REG_D2 xmm2
189 #define REG_D3 xmm3
190 #define REG_D4 xmm4
191 #define REG_D5 xmm5
192 #define REG_D6 xmm6
193
194 #define REG_XMM1 xmm1
195 #define REG_XMM2 xmm2
196 #define REG_XMM3 xmm3
197 #define REG_XMM4 xmm4
198 #define REG_XMM5 xmm5
199 #define REG_XMM6 xmm6
200
201 #define REG_YMM1 ymm1
202 #define REG_YMM2 ymm2
203 #define REG_YMM3 ymm3
204 #define REG_YMM4 ymm4
205 #define REG_YMM5 ymm5
206 #define REG_YMM6 ymm6
207
208 #define REG_ZMM1 zmm1
209 #define REG_ZMM2 zmm2
210 #define REG_ZMM3 zmm3
211 #define REG_ZMM4 zmm4
212 #define REG_ZMM5 zmm5
213 #define REG_ZMM6 zmm6
214
215 #if !defined(mingw32_HOST_OS)
216 #define CALLER_SAVES_R3
217 #define CALLER_SAVES_R4
218 #endif
219 #define CALLER_SAVES_R5
220 #define CALLER_SAVES_R6
221
222 #define CALLER_SAVES_F1
223 #define CALLER_SAVES_F2
224 #define CALLER_SAVES_F3
225 #define CALLER_SAVES_F4
226 #define CALLER_SAVES_F5
227 #if !defined(mingw32_HOST_OS)
228 #define CALLER_SAVES_F6
229 #endif
230
231 #define CALLER_SAVES_D1
232 #define CALLER_SAVES_D2
233 #define CALLER_SAVES_D3
234 #define CALLER_SAVES_D4
235 #define CALLER_SAVES_D5
236 #if !defined(mingw32_HOST_OS)
237 #define CALLER_SAVES_D6
238 #endif
239
240 #define CALLER_SAVES_XMM1
241 #define CALLER_SAVES_XMM2
242 #define CALLER_SAVES_XMM3
243 #define CALLER_SAVES_XMM4
244 #define CALLER_SAVES_XMM5
245 #if !defined(mingw32_HOST_OS)
246 #define CALLER_SAVES_XMM6
247 #endif
248
249 #define CALLER_SAVES_YMM1
250 #define CALLER_SAVES_YMM2
251 #define CALLER_SAVES_YMM3
252 #define CALLER_SAVES_YMM4
253 #define CALLER_SAVES_YMM5
254 #if !defined(mingw32_HOST_OS)
255 #define CALLER_SAVES_YMM6
256 #endif
257
258 #define CALLER_SAVES_ZMM1
259 #define CALLER_SAVES_ZMM2
260 #define CALLER_SAVES_ZMM3
261 #define CALLER_SAVES_ZMM4
262 #define CALLER_SAVES_ZMM5
263 #if !defined(mingw32_HOST_OS)
264 #define CALLER_SAVES_ZMM6
265 #endif
266
267 #define MAX_REAL_VANILLA_REG 6
268 #define MAX_REAL_FLOAT_REG 6
269 #define MAX_REAL_DOUBLE_REG 6
270 #define MAX_REAL_LONG_REG 0
271 #define MAX_REAL_XMM_REG 6
272 #define MAX_REAL_YMM_REG 6
273 #define MAX_REAL_ZMM_REG 6
274
275 /* -----------------------------------------------------------------------------
276 The PowerPC register mapping
277
278 0 system glue? (caller-save, volatile)
279 1 SP (callee-save, non-volatile)
280 2 AIX, powerpc64-linux:
281 RTOC (a strange special case)
282 powerpc32-linux:
283 reserved for use by system
284
285 3-10 args/return (caller-save, volatile)
286 11,12 system glue? (caller-save, volatile)
287 13 on 64-bit: reserved for thread state pointer
288 on 32-bit: (callee-save, non-volatile)
289 14-31 (callee-save, non-volatile)
290
291 f0 (caller-save, volatile)
292 f1-f13 args/return (caller-save, volatile)
293 f14-f31 (callee-save, non-volatile)
294
295 \tr{14}--\tr{31} are wonderful callee-save registers on all ppc OSes.
296 \tr{0}--\tr{12} are caller-save registers.
297
298 \tr{%f14}--\tr{%f31} are callee-save floating-point registers.
299
300 We can do the Whole Business with callee-save registers only!
301 -------------------------------------------------------------------------- */
302
303 #elif defined(MACHREGS_powerpc)
304
305 #define REG(x) __asm__(#x)
306
307 #define REG_R1 r14
308 #define REG_R2 r15
309 #define REG_R3 r16
310 #define REG_R4 r17
311 #define REG_R5 r18
312 #define REG_R6 r19
313 #define REG_R7 r20
314 #define REG_R8 r21
315 #define REG_R9 r22
316 #define REG_R10 r23
317
318 #define REG_F1 fr14
319 #define REG_F2 fr15
320 #define REG_F3 fr16
321 #define REG_F4 fr17
322 #define REG_F5 fr18
323 #define REG_F6 fr19
324
325 #define REG_D1 fr20
326 #define REG_D2 fr21
327 #define REG_D3 fr22
328 #define REG_D4 fr23
329 #define REG_D5 fr24
330 #define REG_D6 fr25
331
332 #define REG_Sp r24
333 #define REG_SpLim r25
334 #define REG_Hp r26
335 #define REG_Base r27
336
337 #define MAX_REAL_FLOAT_REG 6
338 #define MAX_REAL_DOUBLE_REG 6
339
340 /* -----------------------------------------------------------------------------
341 The Sun SPARC register mapping
342
343 !! IMPORTANT: if you change this register mapping you must also update
344 compiler/nativeGen/SPARC/Regs.hs. That file handles the
345 mapping for the NCG. This one only affects via-c code.
346
347 The SPARC register (window) story: Remember, within the Haskell
348 Threaded World, we essentially ``shut down'' the register-window
349 mechanism---the window doesn't move at all while in this World. It
350 *does* move, of course, if we call out to arbitrary~C...
351
352 The %i, %l, and %o registers (8 each) are the input, local, and
353 output registers visible in one register window. The 8 %g (global)
354 registers are visible all the time.
355
356 zero: always zero
357 scratch: volatile across C-fn calls. used by linker.
358 app: usable by application
359 system: reserved for system
360
361 alloc: allocated to in the register allocator, intra-closure only
362
363 GHC usage v8 ABI v9 ABI
364 Global
365 %g0 zero zero zero
366 %g1 alloc scratch scrach
367 %g2 alloc app app
368 %g3 alloc app app
369 %g4 alloc app scratch
370 %g5 system scratch
371 %g6 system system
372 %g7 system system
373
374 Output: can be zapped by callee
375 %o0-o5 alloc caller saves
376 %o6 C stack ptr
377 %o7 C ret addr
378
379 Local: maintained by register windowing mechanism
380 %l0 alloc
381 %l1 R1
382 %l2 R2
383 %l3 R3
384 %l4 R4
385 %l5 R5
386 %l6 alloc
387 %l7 alloc
388
389 Input
390 %i0 Sp
391 %i1 Base
392 %i2 SpLim
393 %i3 Hp
394 %i4 alloc
395 %i5 R6
396 %i6 C frame ptr
397 %i7 C ret addr
398
399 The paired nature of the floating point registers causes complications for
400 the native code generator. For convenience, we pretend that the first 22
401 fp regs %f0 .. %f21 are actually 11 double regs, and the remaining 10 are
402 float (single) regs. The NCG acts accordingly. That means that the
403 following FP assignment is rather fragile, and should only be changed
404 with extreme care. The current scheme is:
405
406 %f0 /%f1 FP return from C
407 %f2 /%f3 D1
408 %f4 /%f5 D2
409 %f6 /%f7 ncg double spill tmp #1
410 %f8 /%f9 ncg double spill tmp #2
411 %f10/%f11 allocatable
412 %f12/%f13 allocatable
413 %f14/%f15 allocatable
414 %f16/%f17 allocatable
415 %f18/%f19 allocatable
416 %f20/%f21 allocatable
417
418 %f22 F1
419 %f23 F2
420 %f24 F3
421 %f25 F4
422 %f26 ncg single spill tmp #1
423 %f27 ncg single spill tmp #2
424 %f28 allocatable
425 %f29 allocatable
426 %f30 allocatable
427 %f31 allocatable
428
429 -------------------------------------------------------------------------- */
430
431 #elif defined(MACHREGS_sparc)
432
433 #define REG(x) __asm__("%" #x)
434
435 #define CALLER_SAVES_USER
436
437 #define CALLER_SAVES_F1
438 #define CALLER_SAVES_F2
439 #define CALLER_SAVES_F3
440 #define CALLER_SAVES_F4
441 #define CALLER_SAVES_D1
442 #define CALLER_SAVES_D2
443
444 #define REG_R1 l1
445 #define REG_R2 l2
446 #define REG_R3 l3
447 #define REG_R4 l4
448 #define REG_R5 l5
449 #define REG_R6 i5
450
451 #define REG_F1 f22
452 #define REG_F2 f23
453 #define REG_F3 f24
454 #define REG_F4 f25
455
456 /* for each of the double arg regs,
457 Dn_2 is the high half. */
458
459 #define REG_D1 f2
460 #define REG_D1_2 f3
461
462 #define REG_D2 f4
463 #define REG_D2_2 f5
464
465 #define REG_Sp i0
466 #define REG_SpLim i2
467
468 #define REG_Hp i3
469
470 #define REG_Base i1
471
472 #define NCG_FirstFloatReg f22
473
474 /* -----------------------------------------------------------------------------
475 The ARM EABI register mapping
476
477 Here we consider ARM mode (i.e. 32bit isns)
478 and also CPU with full VFPv3 implementation
479
480 ARM registers (see Chapter 5.1 in ARM IHI 0042D and
481 Section 9.2.2 in ARM Software Development Toolkit Reference Guide)
482
483 r15 PC The Program Counter.
484 r14 LR The Link Register.
485 r13 SP The Stack Pointer.
486 r12 IP The Intra-Procedure-call scratch register.
487 r11 v8/fp Variable-register 8.
488 r10 v7/sl Variable-register 7.
489 r9 v6/SB/TR Platform register. The meaning of this register is
490 defined by the platform standard.
491 r8 v5 Variable-register 5.
492 r7 v4 Variable register 4.
493 r6 v3 Variable register 3.
494 r5 v2 Variable register 2.
495 r4 v1 Variable register 1.
496 r3 a4 Argument / scratch register 4.
497 r2 a3 Argument / scratch register 3.
498 r1 a2 Argument / result / scratch register 2.
499 r0 a1 Argument / result / scratch register 1.
500
501 VFPv2/VFPv3/NEON registers
502 s0-s15/d0-d7/q0-q3 Argument / result/ scratch registers
503 s16-s31/d8-d15/q4-q7 callee-saved registers (must be preserved across
504 subroutine calls)
505
506 VFPv3/NEON registers (added to the VFPv2 registers set)
507 d16-d31/q8-q15 Argument / result/ scratch registers
508 ----------------------------------------------------------------------------- */
509
510 #elif defined(MACHREGS_arm)
511
512 #define REG(x) __asm__(#x)
513
514 #define REG_Base r4
515 #define REG_Sp r5
516 #define REG_Hp r6
517 #define REG_R1 r7
518 #define REG_R2 r8
519 #define REG_R3 r9
520 #define REG_R4 r10
521 #define REG_SpLim r11
522
523 #if !defined(arm_HOST_ARCH_PRE_ARMv6)
524 /* d8 */
525 #define REG_F1 s16
526 #define REG_F2 s17
527 /* d9 */
528 #define REG_F3 s18
529 #define REG_F4 s19
530
531 #define REG_D1 d10
532 #define REG_D2 d11
533 #endif
534
535 /* -----------------------------------------------------------------------------
536 The ARMv8/AArch64 ABI register mapping
537
538 The AArch64 provides 31 64-bit general purpose registers
539 and 32 128-bit SIMD/floating point registers.
540
541 General purpose registers (see Chapter 5.1.1 in ARM IHI 0055B)
542
543 Register | Special | Role in the procedure call standard
544 ---------+---------+------------------------------------
545 SP | | The Stack Pointer
546 r30 | LR | The Link Register
547 r29 | FP | The Frame Pointer
548 r19-r28 | | Callee-saved registers
549 r18 | | The Platform Register, if needed;
550 | | or temporary register
551 r17 | IP1 | The second intra-procedure-call temporary register
552 r16 | IP0 | The first intra-procedure-call scratch register
553 r9-r15 | | Temporary registers
554 r8 | | Indirect result location register
555 r0-r7 | | Parameter/result registers
556
557
558 FPU/SIMD registers
559
560 s/d/q/v0-v7 Argument / result/ scratch registers
561 s/d/q/v8-v15 callee-saved registers (must be preserved across subroutine calls,
562 but only bottom 64-bit value needs to be preserved)
563 s/d/q/v16-v31 temporary registers
564
565 ----------------------------------------------------------------------------- */
566
567 #elif defined(MACHREGS_aarch64)
568
569 #define REG(x) __asm__(#x)
570
571 #define REG_Base r19
572 #define REG_Sp r20
573 #define REG_Hp r21
574 #define REG_R1 r22
575 #define REG_R2 r23
576 #define REG_R3 r24
577 #define REG_R4 r25
578 #define REG_R5 r26
579 #define REG_R6 r27
580 #define REG_SpLim r28
581
582 #define REG_F1 s8
583 #define REG_F2 s9
584 #define REG_F3 s10
585 #define REG_F4 s11
586
587 #define REG_D1 d12
588 #define REG_D2 d13
589 #define REG_D3 d14
590 #define REG_D4 d15
591
592 #else
593
594 #error Cannot find platform to give register info for
595
596 #endif
597
598 #else
599
600 #error Bad MACHREGS_NO_REGS value
601
602 #endif
603
604 /* -----------------------------------------------------------------------------
605 * These constants define how many stg registers will be used for
606 * passing arguments (and results, in the case of an unboxed-tuple
607 * return).
608 *
609 * We usually set MAX_REAL_VANILLA_REG and co. to be the number of the
610 * highest STG register to occupy a real machine register, otherwise
611 * the calling conventions will needlessly shuffle data between the
612 * stack and memory-resident STG registers. We might occasionally
613 * set these macros to other values for testing, though.
614 *
615 * Registers above these values might still be used, for instance to
616 * communicate with PrimOps and RTS functions.
617 */
618
619 #if !defined(MAX_REAL_VANILLA_REG)
620 # if defined(REG_R10)
621 # define MAX_REAL_VANILLA_REG 10
622 # elif defined(REG_R9)
623 # define MAX_REAL_VANILLA_REG 9
624 # elif defined(REG_R8)
625 # define MAX_REAL_VANILLA_REG 8
626 # elif defined(REG_R7)
627 # define MAX_REAL_VANILLA_REG 7
628 # elif defined(REG_R6)
629 # define MAX_REAL_VANILLA_REG 6
630 # elif defined(REG_R5)
631 # define MAX_REAL_VANILLA_REG 5
632 # elif defined(REG_R4)
633 # define MAX_REAL_VANILLA_REG 4
634 # elif defined(REG_R3)
635 # define MAX_REAL_VANILLA_REG 3
636 # elif defined(REG_R2)
637 # define MAX_REAL_VANILLA_REG 2
638 # elif defined(REG_R1)
639 # define MAX_REAL_VANILLA_REG 1
640 # else
641 # define MAX_REAL_VANILLA_REG 0
642 # endif
643 #endif
644
645 #if !defined(MAX_REAL_FLOAT_REG)
646 # if defined(REG_F4)
647 # define MAX_REAL_FLOAT_REG 4
648 # elif defined(REG_F3)
649 # define MAX_REAL_FLOAT_REG 3
650 # elif defined(REG_F2)
651 # define MAX_REAL_FLOAT_REG 2
652 # elif defined(REG_F1)
653 # define MAX_REAL_FLOAT_REG 1
654 # else
655 # define MAX_REAL_FLOAT_REG 0
656 # endif
657 #endif
658
659 #if !defined(MAX_REAL_DOUBLE_REG)
660 # if defined(REG_D2)
661 # define MAX_REAL_DOUBLE_REG 2
662 # elif defined(REG_D1)
663 # define MAX_REAL_DOUBLE_REG 1
664 # else
665 # define MAX_REAL_DOUBLE_REG 0
666 # endif
667 #endif
668
669 #if !defined(MAX_REAL_LONG_REG)
670 # if defined(REG_L1)
671 # define MAX_REAL_LONG_REG 1
672 # else
673 # define MAX_REAL_LONG_REG 0
674 # endif
675 #endif
676
677 #if !defined(MAX_REAL_XMM_REG)
678 # if defined(REG_XMM6)
679 # define MAX_REAL_XMM_REG 6
680 # elif defined(REG_XMM5)
681 # define MAX_REAL_XMM_REG 5
682 # elif defined(REG_XMM4)
683 # define MAX_REAL_XMM_REG 4
684 # elif defined(REG_XMM3)
685 # define MAX_REAL_XMM_REG 3
686 # elif defined(REG_XMM2)
687 # define MAX_REAL_XMM_REG 2
688 # elif defined(REG_XMM1)
689 # define MAX_REAL_XMM_REG 1
690 # else
691 # define MAX_REAL_XMM_REG 0
692 # endif
693 #endif
694
695 /* define NO_ARG_REGS if we have no argument registers at all (we can
696 * optimise certain code paths using this predicate).
697 */
698 #if MAX_REAL_VANILLA_REG < 2
699 #define NO_ARG_REGS
700 #else
701 #undef NO_ARG_REGS
702 #endif