Sparc fix: work around gcc optimising away the reserved stack chunk
[ghc.git] / rts / StgCRun.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2003
4 *
5 * STG-to-C glue.
6 *
7 * To run an STG function from C land, call
8 *
9 * rv = StgRun(f,BaseReg);
10 *
11 * where "f" is the STG function to call, and BaseReg is the address of the
12 * RegTable for this run (we might have separate RegTables if we're running
13 * multiple threads on an SMP machine).
14 *
15 * In the end, "f" must JMP to StgReturn (defined below),
16 * passing the return-value "rv" in R1,
17 * to return to the caller of StgRun returning "rv" in
18 * the whatever way C returns a value.
19 *
20 * NOTE: StgRun/StgReturn do *NOT* load or store Hp or any
21 * other registers (other than saving the C callee-saves
22 * registers). Instead, the called function "f" must do that
23 * in STG land.
24 *
25 * GCC will have assumed that pushing/popping of C-stack frames is
26 * going on when it generated its code, and used stack space
27 * accordingly. However, we actually {\em post-process away} all
28 * such stack-framery (see \tr{ghc/driver/ghc-asm.lprl}). Things will
29 * be OK however, if we initially make sure there are
30 * @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local
31 * variables.
32 *
33 * -------------------------------------------------------------------------- */
34
35 #include "PosixSource.h"
36
37
38 /*
39 * We define the following (unused) global register variables, because for
40 * some reason gcc generates sub-optimal code for StgRun() on the Alpha
41 * (unnecessarily saving extra registers on the stack) if we don't.
42 *
43 * Why do it at the top of this file, rather than near StgRun() below? Because
44 * gcc doesn't let us define global register variables after any function
45 * definition has been read. Any point after #include "Stg.h" would be too
46 * late.
47 *
48 * We define alpha_EXTRA_CAREFUL here to save $s6, $f8 and $f9 -- registers
49 * that we don't use but which are callee-save registers. The __divq() routine
50 * in libc.a clobbers $s6.
51 */
52 #include "ghcconfig.h"
53 #ifdef alpha_HOST_ARCH
54 #define alpha_EXTRA_CAREFUL
55 register long fake_ra __asm__("$26");
56 register long fake_gp __asm__("$29");
57 #ifdef alpha_EXTRA_CAREFUL
58 register long fake_s6 __asm__("$15");
59 register double fake_f8 __asm__("$f8");
60 register double fake_f9 __asm__("$f9");
61 #endif
62 #endif
63
64 /* include Stg.h first because we want real machine regs in here: we
65 * have to get the value of R1 back from Stg land to C land intact.
66 */
67 #include "Stg.h"
68 #include "Rts.h"
69 #include "StgRun.h"
70 #include "RtsFlags.h"
71 #include "OSThreads.h"
72 #include "Capability.h"
73
74 #ifdef DEBUG
75 #include "RtsUtils.h"
76 #include "Printer.h"
77 #endif
78
79 #ifdef USE_MINIINTERPRETER
80
81 /* -----------------------------------------------------------------------------
82 any architecture (using miniinterpreter)
83 -------------------------------------------------------------------------- */
84
85 StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
86 {
87 while (f) {
88 IF_DEBUG(interpreter,
89 debugBelch("Jumping to ");
90 printPtr((P_)f); fflush(stdout);
91 debugBelch("\n");
92 );
93 f = (StgFunPtr) (f)();
94 }
95 return (StgRegTable *)R1.p;
96 }
97
98 StgFunPtr StgReturn(void)
99 {
100 return 0;
101 }
102
103 #else /* !USE_MINIINTERPRETER */
104
105 #ifdef LEADING_UNDERSCORE
106 #define STG_RETURN "_StgReturn"
107 #else
108 #define STG_RETURN "StgReturn"
109 #endif
110
111 /* -----------------------------------------------------------------------------
112 x86 architecture
113 -------------------------------------------------------------------------- */
114
115 #ifdef i386_HOST_ARCH
116
117 #ifdef darwin_TARGET_OS
118 #define STG_GLOBAL ".globl "
119 #else
120 #define STG_GLOBAL ".global "
121 #endif
122
123 StgRegTable *
124 StgRun(StgFunPtr f, StgRegTable *basereg) {
125
126 unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ];
127 StgRegTable * r;
128
129 __asm__ volatile (
130 /*
131 * save callee-saves registers on behalf of the STG code.
132 */
133 "movl %%esp, %%eax\n\t"
134 "addl %4, %%eax\n\t"
135 "movl %%ebx,0(%%eax)\n\t"
136 "movl %%esi,4(%%eax)\n\t"
137 "movl %%edi,8(%%eax)\n\t"
138 "movl %%ebp,12(%%eax)\n\t"
139 /*
140 * Set BaseReg
141 */
142 "movl %3,%%ebx\n\t"
143 /*
144 * grab the function argument from the stack
145 */
146 "movl %2,%%eax\n\t"
147
148 /*
149 * Darwin note:
150 * The stack pointer has to be aligned to a multiple of 16 bytes at
151 * this point. This works out correctly with gcc 4.0.1, but it might
152 * break at any time in the future. TODO: Make this future-proof.
153 */
154
155 /*
156 * jump to it
157 */
158 "jmp *%%eax\n\t"
159
160 STG_GLOBAL STG_RETURN "\n"
161 STG_RETURN ":\n\t"
162
163 "movl %%esi, %%eax\n\t" /* Return value in R1 */
164
165 /*
166 * restore callee-saves registers. (Don't stomp on %%eax!)
167 */
168 "movl %%esp, %%edx\n\t"
169 "addl %4, %%edx\n\t"
170 "movl 0(%%edx),%%ebx\n\t" /* restore the registers saved above */
171 "movl 4(%%edx),%%esi\n\t"
172 "movl 8(%%edx),%%edi\n\t"
173 "movl 12(%%edx),%%ebp\n\t"
174
175 : "=&a" (r), "=m" (space)
176 : "m" (f), "m" (basereg), "i" (RESERVED_C_STACK_BYTES)
177 : "edx" /* stomps on %edx */
178 );
179
180 return r;
181 }
182
183 #endif
184
185 /* ----------------------------------------------------------------------------
186 x86-64 is almost the same as plain x86.
187
188 I've done it using entirely inline assembler, because I couldn't
189 get gcc to generate the correct subtraction from %rsp by using
190 the local array variable trick. It didn't seem to reserve
191 enough space. Oh well, it's not much harder this way.
192
193 ------------------------------------------------------------------------- */
194
195 #ifdef x86_64_HOST_ARCH
196
197 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
198
199 static void GNUC3_ATTRIBUTE(used)
200 StgRunIsImplementedInAssembler(void)
201 {
202 __asm__ volatile (
203 /*
204 * save callee-saves registers on behalf of the STG code.
205 */
206 ".globl StgRun\n"
207 "StgRun:\n\t"
208 "subq %0, %%rsp\n\t"
209 "movq %%rsp, %%rax\n\t"
210 "addq %0-48, %%rax\n\t"
211 "movq %%rbx,0(%%rax)\n\t"
212 "movq %%rbp,8(%%rax)\n\t"
213 "movq %%r12,16(%%rax)\n\t"
214 "movq %%r13,24(%%rax)\n\t"
215 "movq %%r14,32(%%rax)\n\t"
216 "movq %%r15,40(%%rax)\n\t"
217 /*
218 * Set BaseReg
219 */
220 "movq %%rsi,%%r13\n\t"
221 /*
222 * grab the function argument from the stack, and jump to it.
223 */
224 "movq %%rdi,%%rax\n\t"
225 "jmp *%%rax\n\t"
226
227 ".global " STG_RETURN "\n"
228 STG_RETURN ":\n\t"
229
230 "movq %%rbx, %%rax\n\t" /* Return value in R1 */
231
232 /*
233 * restore callee-saves registers. (Don't stomp on %%rax!)
234 */
235 "movq %%rsp, %%rdx\n\t"
236 "addq %0-48, %%rdx\n\t"
237 "movq 0(%%rdx),%%rbx\n\t" /* restore the registers saved above */
238 "movq 8(%%rdx),%%rbp\n\t"
239 "movq 16(%%rdx),%%r12\n\t"
240 "movq 24(%%rdx),%%r13\n\t"
241 "movq 32(%%rdx),%%r14\n\t"
242 "movq 40(%%rdx),%%r15\n\t"
243 "addq %0, %%rsp\n\t"
244 "retq"
245
246 : : "i"(RESERVED_C_STACK_BYTES+48+8 /*stack frame size*/));
247 /*
248 HACK alert!
249
250 The x86_64 ABI specifies that on a procedure call, %rsp is
251 aligned on a 16-byte boundary + 8. That is, the first
252 argument on the stack after the return address will be
253 16-byte aligned.
254
255 Which should be fine: RESERVED_C_STACK_BYTES+48 is a multiple
256 of 16 bytes.
257
258 BUT... when we do a C-call from STG land, gcc likes to put the
259 stack alignment adjustment in the prolog. eg. if we're calling
260 a function with arguments in regs, gcc will insert 'subq $8,%rsp'
261 in the prolog, to keep %rsp aligned (the return address is 8
262 bytes, remember). The mangler throws away the prolog, so we
263 lose the stack alignment.
264
265 The hack is to add this extra 8 bytes to our %rsp adjustment
266 here, so that throughout STG code, %rsp is 16-byte aligned,
267 ready for a C-call.
268
269 A quick way to see if this is wrong is to compile this code:
270
271 main = System.Exit.exitWith ExitSuccess
272
273 And run it with +RTS -sstderr. The stats code in the RTS, in
274 particular statsPrintf(), relies on the stack alignment because
275 it saves the %xmm regs on the stack, so it'll fall over if the
276 stack isn't aligned, and calling exitWith from Haskell invokes
277 shutdownHaskellAndExit using a C call.
278
279 Future gcc releases will almost certainly break this hack...
280 */
281 }
282
283 #endif /* x86-64 */
284
285 /* -----------------------------------------------------------------------------
286 Sparc architecture
287
288 --
289 OLD COMMENT from GHC-3.02:
290
291 We want tailjumps to be calls, because `call xxx' is the only Sparc
292 branch that allows an arbitrary label as a target. (Gcc's ``goto
293 *target'' construct ends up loading the label into a register and
294 then jumping, at the cost of two extra instructions for the 32-bit
295 load.)
296
297 When entering the threaded world, we stash our return address in a
298 known location so that \tr{%i7} is available as an extra
299 callee-saves register. Of course, we have to restore this when
300 coming out of the threaded world.
301
302 I hate this god-forsaken architecture. Since the top of the
303 reserved stack space is used for globals and the bottom is reserved
304 for outgoing arguments, we have to stick our return address
305 somewhere in the middle. Currently, I'm allowing 100 extra
306 outgoing arguments beyond the first 6. --JSM
307
308 Updated info (GHC 4.06): we don't appear to use %i7 any more, so
309 I'm not sure whether we still need to save it. Incedentally, what
310 does the last paragraph above mean when it says "the top of the
311 stack is used for globals"? What globals? --SDM
312
313 Updated info (GHC 4.08.2): not saving %i7 any more (see below).
314 -------------------------------------------------------------------------- */
315
316 #ifdef sparc_HOST_ARCH
317
318 StgRegTable *
319 StgRun(StgFunPtr f, StgRegTable *basereg) {
320
321 unsigned char space[RESERVED_C_STACK_BYTES];
322 #if 0
323 register void *i7 __asm__("%i7");
324 ((void **)(space))[100] = i7;
325 #endif
326 f();
327 __asm__ volatile (
328 ".align 4\n"
329 ".global " STG_RETURN "\n"
330 STG_RETURN ":"
331 : : "p" (space) : "l0","l1","l2","l3","l4","l5","l6","l7");
332 /* we tell the C compiler that l0-l7 are clobbered on return to
333 * StgReturn, otherwise it tries to use these to save eg. the
334 * address of space[100] across the call. The correct thing
335 * to do would be to save all the callee-saves regs, but we
336 * can't be bothered to do that.
337 *
338 * We also explicitly mark space as used since gcc eliminates it
339 * otherwise.
340 *
341 * The code that gcc generates for this little fragment is now
342 * terrible. We could do much better by coding it directly in
343 * assembler.
344 */
345 #if 0
346 /* updated 4.08.2: we don't save %i7 in the middle of the reserved
347 * space any more, since gcc tries to save its address across the
348 * call to f(), this gets clobbered in STG land and we end up
349 * dereferencing a bogus pointer in StgReturn.
350 */
351 __asm__ volatile ("ld %1,%0"
352 : "=r" (i7) : "m" (((void **)(space))[100]));
353 #endif
354 return (StgRegTable *)R1.i;
355 }
356
357 #endif
358
359 /* -----------------------------------------------------------------------------
360 alpha architecture
361
362 "The stack pointer (SP) must at all times denote an address that has octaword
363 alignment. (This restriction has the side effect that the in-memory portion
364 of the argument list, if any, will start on an octaword boundary.) Note that
365 the stack grows toward lower addresses. During a procedure invocation, SP
366 can never be set to a value that is higher than the value of SP at entry to
367 that procedure invocation.
368
369 "The contents of the stack, located above the portion of the argument list
370 (if any) that is passed in memory, belong to the calling procedure. Because
371 they are part of the calling procedure, they should not be read or written
372 by the called procedure, except as specified by indirect arguments or
373 language-controlled up-level references.
374
375 "The SP value might be used by the hardware when raising exceptions and
376 asynchronous interrupts. It must be assumed that the contents of the stack
377 below the current SP value and within the stack for the current thread are
378 continually and unpredictably modified, as specified in the _Alpha
379 Architecture Reference Manual_, and as a result of asynchronous software
380 actions."
381
382 -- Compaq Computer Corporation, Houston. Tru64 UNIX Calling Standard for
383 Alpha Systems, 5.1 edition, August 2000, section 3.2.1. http://www.
384 tru64unix.compaq.com/docs/base_doc/DOCUMENTATION/V51_PDF/ARH9MBTE.PDF
385 -------------------------------------------------------------------------- */
386
387 #ifdef alpha_HOST_ARCH
388
389 StgRegTable *
390 StgRun(StgFunPtr f, StgRegTable *basereg)
391 {
392 register long real_ra __asm__("$26"); volatile long save_ra;
393 register long real_gp __asm__("$29"); volatile long save_gp;
394
395 register long real_s0 __asm__("$9" ); volatile long save_s0;
396 register long real_s1 __asm__("$10"); volatile long save_s1;
397 register long real_s2 __asm__("$11"); volatile long save_s2;
398 register long real_s3 __asm__("$12"); volatile long save_s3;
399 register long real_s4 __asm__("$13"); volatile long save_s4;
400 register long real_s5 __asm__("$14"); volatile long save_s5;
401 #ifdef alpha_EXTRA_CAREFUL
402 register long real_s6 __asm__("$15"); volatile long save_s6;
403 #endif
404
405 register double real_f2 __asm__("$f2"); volatile double save_f2;
406 register double real_f3 __asm__("$f3"); volatile double save_f3;
407 register double real_f4 __asm__("$f4"); volatile double save_f4;
408 register double real_f5 __asm__("$f5"); volatile double save_f5;
409 register double real_f6 __asm__("$f6"); volatile double save_f6;
410 register double real_f7 __asm__("$f7"); volatile double save_f7;
411 #ifdef alpha_EXTRA_CAREFUL
412 register double real_f8 __asm__("$f8"); volatile double save_f8;
413 register double real_f9 __asm__("$f9"); volatile double save_f9;
414 #endif
415
416 register StgFunPtr real_pv __asm__("$27");
417
418 StgRegTable * ret;
419
420 save_ra = real_ra;
421 save_gp = real_gp;
422
423 save_s0 = real_s0;
424 save_s1 = real_s1;
425 save_s2 = real_s2;
426 save_s3 = real_s3;
427 save_s4 = real_s4;
428 save_s5 = real_s5;
429 #ifdef alpha_EXTRA_CAREFUL
430 save_s6 = real_s6;
431 #endif
432
433 save_f2 = real_f2;
434 save_f3 = real_f3;
435 save_f4 = real_f4;
436 save_f5 = real_f5;
437 save_f6 = real_f6;
438 save_f7 = real_f7;
439 #ifdef alpha_EXTRA_CAREFUL
440 save_f8 = real_f8;
441 save_f9 = real_f9;
442 #endif
443
444 real_pv = f;
445
446 __asm__ volatile( "lda $30,-%0($30)" "\n"
447 "\t" "jmp ($27)" "\n"
448 "\t" ".align 3" "\n"
449 ".globl " STG_RETURN "\n"
450 STG_RETURN ":" "\n"
451 "\t" "lda $30,%0($30)" "\n"
452 : : "K" (RESERVED_C_STACK_BYTES));
453
454 ret = real_s5;
455
456 real_s0 = save_s0;
457 real_s1 = save_s1;
458 real_s2 = save_s2;
459 real_s3 = save_s3;
460 real_s4 = save_s4;
461 real_s5 = save_s5;
462 #ifdef alpha_EXTRA_CAREFUL
463 real_s6 = save_s6;
464 #endif
465
466 real_f2 = save_f2;
467 real_f3 = save_f3;
468 real_f4 = save_f4;
469 real_f5 = save_f5;
470 real_f6 = save_f6;
471 real_f7 = save_f7;
472 #ifdef alpha_EXTRA_CAREFUL
473 real_f8 = save_f8;
474 real_f9 = save_f9;
475 #endif
476
477 real_ra = save_ra;
478 real_gp = save_gp;
479
480 return ret;
481 }
482
483 #endif /* alpha_HOST_ARCH */
484
485 /* -----------------------------------------------------------------------------
486 HP-PA architecture
487 -------------------------------------------------------------------------- */
488
489 #ifdef hppa1_1_HOST_ARCH
490
491 StgRegTable *
492 StgRun(StgFunPtr f, StgRegTable *basereg)
493 {
494 StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)];
495 StgRegTable * ret;
496
497 __asm__ volatile ("ldo %0(%%r30),%%r19\n"
498 "\tstw %%r3, 0(0,%%r19)\n"
499 "\tstw %%r4, 4(0,%%r19)\n"
500 "\tstw %%r5, 8(0,%%r19)\n"
501 "\tstw %%r6,12(0,%%r19)\n"
502 "\tstw %%r7,16(0,%%r19)\n"
503 "\tstw %%r8,20(0,%%r19)\n"
504 "\tstw %%r9,24(0,%%r19)\n"
505 "\tstw %%r10,28(0,%%r19)\n"
506 "\tstw %%r11,32(0,%%r19)\n"
507 "\tstw %%r12,36(0,%%r19)\n"
508 "\tstw %%r13,40(0,%%r19)\n"
509 "\tstw %%r14,44(0,%%r19)\n"
510 "\tstw %%r15,48(0,%%r19)\n"
511 "\tstw %%r16,52(0,%%r19)\n"
512 "\tstw %%r17,56(0,%%r19)\n"
513 "\tstw %%r18,60(0,%%r19)\n"
514 "\tldo 80(%%r19),%%r19\n"
515 "\tfstds %%fr12,-16(0,%%r19)\n"
516 "\tfstds %%fr13, -8(0,%%r19)\n"
517 "\tfstds %%fr14, 0(0,%%r19)\n"
518 "\tfstds %%fr15, 8(0,%%r19)\n"
519 "\tldo 32(%%r19),%%r19\n"
520 "\tfstds %%fr16,-16(0,%%r19)\n"
521 "\tfstds %%fr17, -8(0,%%r19)\n"
522 "\tfstds %%fr18, 0(0,%%r19)\n"
523 "\tfstds %%fr19, 8(0,%%r19)\n"
524 "\tldo 32(%%r19),%%r19\n"
525 "\tfstds %%fr20,-16(0,%%r19)\n"
526 "\tfstds %%fr21, -8(0,%%r19)\n" : :
527 "n" (-(116 * sizeof(long) + 10 * sizeof(double))) : "%r19"
528 );
529
530 f();
531
532 __asm__ volatile (".align 4\n"
533 "\t.EXPORT " STG_RETURN ",CODE\n"
534 "\t.EXPORT " STG_RETURN ",ENTRY,PRIV_LEV=3\n"
535 STG_RETURN "\n"
536 /* "\tldo %0(%%r3),%%r19\n" */
537 "\tldo %1(%%r30),%%r19\n"
538 "\tcopy %%r11, %0\n" /* save R1 */
539 "\tldw 0(0,%%r19),%%r3\n"
540 "\tldw 4(0,%%r19),%%r4\n"
541 "\tldw 8(0,%%r19),%%r5\n"
542 "\tldw 12(0,%%r19),%%r6\n"
543 "\tldw 16(0,%%r19),%%r7\n"
544 "\tldw 20(0,%%r19),%%r8\n"
545 "\tldw 24(0,%%r19),%%r9\n"
546 "\tldw 28(0,%%r19),%%r10\n"
547 "\tldw 32(0,%%r19),%%r11\n"
548 "\tldw 36(0,%%r19),%%r12\n"
549 "\tldw 40(0,%%r19),%%r13\n"
550 "\tldw 44(0,%%r19),%%r14\n"
551 "\tldw 48(0,%%r19),%%r15\n"
552 "\tldw 52(0,%%r19),%%r16\n"
553 "\tldw 56(0,%%r19),%%r17\n"
554 "\tldw 60(0,%%r19),%%r18\n"
555 "\tldo 80(%%r19),%%r19\n"
556 "\tfldds -16(0,%%r19),%%fr12\n"
557 "\tfldds -8(0,%%r19),%%fr13\n"
558 "\tfldds 0(0,%%r19),%%fr14\n"
559 "\tfldds 8(0,%%r19),%%fr15\n"
560 "\tldo 32(%%r19),%%r19\n"
561 "\tfldds -16(0,%%r19),%%fr16\n"
562 "\tfldds -8(0,%%r19),%%fr17\n"
563 "\tfldds 0(0,%%r19),%%fr18\n"
564 "\tfldds 8(0,%%r19),%%fr19\n"
565 "\tldo 32(%%r19),%%r19\n"
566 "\tfldds -16(0,%%r19),%%fr20\n"
567 "\tfldds -8(0,%%r19),%%fr21\n"
568 : "=r" (ret)
569 : "n" (-(116 * sizeof(long) + 10 * sizeof(double)))
570 : "%r19"
571 );
572
573 return ret;
574 }
575
576 #endif /* hppa1_1_HOST_ARCH */
577
578 /* -----------------------------------------------------------------------------
579 PowerPC architecture
580
581 Everything is in assembler, so we don't have to deal with GCC...
582
583 -------------------------------------------------------------------------- */
584
585 #ifdef powerpc_HOST_ARCH
586
587 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
588
589 #ifdef darwin_HOST_OS
590 void StgRunIsImplementedInAssembler(void)
591 {
592 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
593 // if the toolchain supports deadstripping, we have to
594 // prevent it here (it tends to get confused here).
595 __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler");
596 #endif
597 __asm__ volatile (
598 "\n.globl _StgRun\n"
599 "_StgRun:\n"
600 "\tmflr r0\n"
601 "\tbl saveFP # f14\n"
602 "\tstmw r13,-220(r1)\n"
603 "\tstwu r1,-%0(r1)\n"
604 "\tmr r27,r4\n" // BaseReg == r27
605 "\tmtctr r3\n"
606 "\tmr r12,r3\n"
607 "\tbctr\n"
608 ".globl _StgReturn\n"
609 "_StgReturn:\n"
610 "\tmr r3,r14\n"
611 "\tla r1,%0(r1)\n"
612 "\tlmw r13,-220(r1)\n"
613 "\tb restFP # f14\n"
614 : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
615 }
616 #else
617
618 // This version is for PowerPC Linux.
619
620 // Differences from the Darwin/Mac OS X version:
621 // *) Different Assembler Syntax
622 // *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
623 // *) We may not access positive stack offsets
624 // (no "Red Zone" as in the Darwin ABI)
625 // *) The Link Register is saved to a different offset in the caller's stack frame
626 // (Linux: 4(r1), Darwin 8(r1))
627
628 static void GNUC3_ATTRIBUTE(used)
629 StgRunIsImplementedInAssembler(void)
630 {
631 __asm__ volatile (
632 "\t.globl StgRun\n"
633 "\t.type StgRun,@function\n"
634 "StgRun:\n"
635 "\tmflr 0\n"
636 "\tstw 0,4(1)\n"
637 "\tmr 5,1\n"
638 "\tstwu 1,-%0(1)\n"
639 "\tstmw 13,-220(5)\n"
640 "\tstfd 14,-144(5)\n"
641 "\tstfd 15,-136(5)\n"
642 "\tstfd 16,-128(5)\n"
643 "\tstfd 17,-120(5)\n"
644 "\tstfd 18,-112(5)\n"
645 "\tstfd 19,-104(5)\n"
646 "\tstfd 20,-96(5)\n"
647 "\tstfd 21,-88(5)\n"
648 "\tstfd 22,-80(5)\n"
649 "\tstfd 23,-72(5)\n"
650 "\tstfd 24,-64(5)\n"
651 "\tstfd 25,-56(5)\n"
652 "\tstfd 26,-48(5)\n"
653 "\tstfd 27,-40(5)\n"
654 "\tstfd 28,-32(5)\n"
655 "\tstfd 29,-24(5)\n"
656 "\tstfd 30,-16(5)\n"
657 "\tstfd 31,-8(5)\n"
658 "\tmr 27,4\n" // BaseReg == r27
659 "\tmtctr 3\n"
660 "\tmr 12,3\n"
661 "\tbctr\n"
662 ".globl StgReturn\n"
663 "\t.type StgReturn,@function\n"
664 "StgReturn:\n"
665 "\tmr 3,14\n"
666 "\tla 5,%0(1)\n"
667 "\tlmw 13,-220(5)\n"
668 "\tlfd 14,-144(5)\n"
669 "\tlfd 15,-136(5)\n"
670 "\tlfd 16,-128(5)\n"
671 "\tlfd 17,-120(5)\n"
672 "\tlfd 18,-112(5)\n"
673 "\tlfd 19,-104(5)\n"
674 "\tlfd 20,-96(5)\n"
675 "\tlfd 21,-88(5)\n"
676 "\tlfd 22,-80(5)\n"
677 "\tlfd 23,-72(5)\n"
678 "\tlfd 24,-64(5)\n"
679 "\tlfd 25,-56(5)\n"
680 "\tlfd 26,-48(5)\n"
681 "\tlfd 27,-40(5)\n"
682 "\tlfd 28,-32(5)\n"
683 "\tlfd 29,-24(5)\n"
684 "\tlfd 30,-16(5)\n"
685 "\tlfd 31,-8(5)\n"
686 "\tmr 1,5\n"
687 "\tlwz 0,4(1)\n"
688 "\tmtlr 0\n"
689 "\tblr\n"
690 : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
691 }
692 #endif
693
694 #endif
695
696 /* -----------------------------------------------------------------------------
697 PowerPC 64 architecture
698
699 Everything is in assembler, so we don't have to deal with GCC...
700
701 -------------------------------------------------------------------------- */
702
703 #ifdef powerpc64_HOST_ARCH
704
705 #ifdef linux_HOST_OS
706 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
707
708 static void GNUC3_ATTRIBUTE(used)
709 StgRunIsImplementedInAssembler(void)
710 {
711 // r0 volatile
712 // r1 stack pointer
713 // r2 toc - needs to be saved
714 // r3-r10 argument passing, volatile
715 // r11, r12 very volatile (not saved across cross-module calls)
716 // r13 thread local state (never modified, don't need to save)
717 // r14-r31 callee-save
718 __asm__ volatile (
719 ".section \".opd\",\"aw\"\n"
720 ".align 3\n"
721 ".globl StgRun\n"
722 "StgRun:\n"
723 "\t.quad\t.StgRun,.TOC.@tocbase,0\n"
724 "\t.size StgRun,24\n"
725 ".globl StgReturn\n"
726 "StgReturn:\n"
727 "\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
728 "\t.size StgReturn,24\n"
729 ".previous\n"
730 ".globl .StgRun\n"
731 ".type .StgRun,@function\n"
732 ".StgRun:\n"
733 "\tmflr 0\n"
734 "\tmr 5, 1\n"
735 "\tstd 0, 16(1)\n"
736 "\tstdu 1, -%0(1)\n"
737 "\tstd 2, -296(5)\n"
738 "\tstd 14, -288(5)\n"
739 "\tstd 15, -280(5)\n"
740 "\tstd 16, -272(5)\n"
741 "\tstd 17, -264(5)\n"
742 "\tstd 18, -256(5)\n"
743 "\tstd 19, -248(5)\n"
744 "\tstd 20, -240(5)\n"
745 "\tstd 21, -232(5)\n"
746 "\tstd 22, -224(5)\n"
747 "\tstd 23, -216(5)\n"
748 "\tstd 24, -208(5)\n"
749 "\tstd 25, -200(5)\n"
750 "\tstd 26, -192(5)\n"
751 "\tstd 27, -184(5)\n"
752 "\tstd 28, -176(5)\n"
753 "\tstd 29, -168(5)\n"
754 "\tstd 30, -160(5)\n"
755 "\tstd 31, -152(5)\n"
756 "\tstfd 14, -144(5)\n"
757 "\tstfd 15, -136(5)\n"
758 "\tstfd 16, -128(5)\n"
759 "\tstfd 17, -120(5)\n"
760 "\tstfd 18, -112(5)\n"
761 "\tstfd 19, -104(5)\n"
762 "\tstfd 20, -96(5)\n"
763 "\tstfd 21, -88(5)\n"
764 "\tstfd 22, -80(5)\n"
765 "\tstfd 23, -72(5)\n"
766 "\tstfd 24, -64(5)\n"
767 "\tstfd 25, -56(5)\n"
768 "\tstfd 26, -48(5)\n"
769 "\tstfd 27, -40(5)\n"
770 "\tstfd 28, -32(5)\n"
771 "\tstfd 29, -24(5)\n"
772 "\tstfd 30, -16(5)\n"
773 "\tstfd 31, -8(5)\n"
774 "\tmr 27, 4\n" // BaseReg == r27
775 "\tld 2, 8(3)\n"
776 "\tld 3, 0(3)\n"
777 "\tmtctr 3\n"
778 "\tbctr\n"
779 ".globl .StgReturn\n"
780 ".type .StgReturn,@function\n"
781 ".StgReturn:\n"
782 "\tmr 3,14\n"
783 "\tla 5, %0(1)\n" // load address == addi r5, r1, %0
784 "\tld 2, -296(5)\n"
785 "\tld 14, -288(5)\n"
786 "\tld 15, -280(5)\n"
787 "\tld 16, -272(5)\n"
788 "\tld 17, -264(5)\n"
789 "\tld 18, -256(5)\n"
790 "\tld 19, -248(5)\n"
791 "\tld 20, -240(5)\n"
792 "\tld 21, -232(5)\n"
793 "\tld 22, -224(5)\n"
794 "\tld 23, -216(5)\n"
795 "\tld 24, -208(5)\n"
796 "\tld 25, -200(5)\n"
797 "\tld 26, -192(5)\n"
798 "\tld 27, -184(5)\n"
799 "\tld 28, -176(5)\n"
800 "\tld 29, -168(5)\n"
801 "\tld 30, -160(5)\n"
802 "\tld 31, -152(5)\n"
803 "\tlfd 14, -144(5)\n"
804 "\tlfd 15, -136(5)\n"
805 "\tlfd 16, -128(5)\n"
806 "\tlfd 17, -120(5)\n"
807 "\tlfd 18, -112(5)\n"
808 "\tlfd 19, -104(5)\n"
809 "\tlfd 20, -96(5)\n"
810 "\tlfd 21, -88(5)\n"
811 "\tlfd 22, -80(5)\n"
812 "\tlfd 23, -72(5)\n"
813 "\tlfd 24, -64(5)\n"
814 "\tlfd 25, -56(5)\n"
815 "\tlfd 26, -48(5)\n"
816 "\tlfd 27, -40(5)\n"
817 "\tlfd 28, -32(5)\n"
818 "\tlfd 29, -24(5)\n"
819 "\tlfd 30, -16(5)\n"
820 "\tlfd 31, -8(5)\n"
821 "\tmr 1, 5\n"
822 "\tld 0, 16(1)\n"
823 "\tmtlr 0\n"
824 "\tblr\n"
825 : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
826 }
827 #else // linux_HOST_OS
828 #error Only linux support for power64 right now.
829 #endif
830
831 #endif
832
833 /* -----------------------------------------------------------------------------
834 IA64 architecture
835
836 Again, in assembler - so we can fiddle with the register stack, and because
837 gcc doesn't handle asm-clobbered callee-saves correctly.
838
839 loc0 - loc15: preserved locals
840 loc16 - loc28: STG registers
841 loc29: saved ar.pfs
842 loc30: saved b0
843 loc31: saved gp (gcc 3.3 uses this slot)
844 -------------------------------------------------------------------------- */
845
846 #ifdef ia64_HOST_ARCH
847
848 /* the memory stack is rarely used, so 16K is excessive */
849 #undef RESERVED_C_STACK_BYTES
850 #define RESERVED_C_STACK_BYTES 1024
851
852 #if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)
853 /* gcc 3.3+: leave an extra slot for gp saves */
854 #define LOCALS 32
855 #else
856 #define LOCALS 31
857 #endif
858
859 static void GNUC3_ATTRIBUTE(used)
860 StgRunIsImplementedInAssembler(void)
861 {
862 __asm__ volatile(
863 ".global StgRun\n"
864 "StgRun:\n"
865 "\talloc loc29 = ar.pfs, 0, %1, 8, 0\n" /* setup register frame */
866 "\tld8 r18 = [r32],8\n" /* get procedure address */
867 "\tadds sp = -%0, sp ;;\n" /* setup stack */
868 "\tld8 gp = [r32]\n" /* get procedure GP */
869 "\tadds r16 = %0-(6*16), sp\n"
870 "\tadds r17 = %0-(5*16), sp ;;\n"
871 "\tstf.spill [r16] = f16,32\n" /* spill callee-saved fp regs */
872 "\tstf.spill [r17] = f17,32\n"
873 "\tmov b6 = r18 ;;\n" /* set target address */
874 "\tstf.spill [r16] = f18,32\n"
875 "\tstf.spill [r17] = f19,32\n"
876 "\tmov loc30 = b0 ;;\n" /* save return address */
877 "\tstf.spill [r16] = f20,32\n"
878 "\tstf.spill [r17] = f21,32\n"
879 "\tbr.few b6 ;;\n" /* branch to function */
880 ".global StgReturn\n"
881 "StgReturn:\n"
882 "\tmov r8 = loc16\n" /* return value in r8 */
883 "\tadds r16 = %0-(6*16), sp\n"
884 "\tadds r17 = %0-(5*16), sp ;;\n"
885 "\tldf.fill f16 = [r16],32\n" /* start restoring fp regs */
886 "\tldf.fill f17 = [r17],32\n"
887 "\tmov ar.pfs = loc29 ;;\n" /* restore register frame */
888 "\tldf.fill f18 = [r16],32\n"
889 "\tldf.fill f19 = [r17],32\n"
890 "\tmov b0 = loc30 ;;\n" /* restore return address */
891 "\tldf.fill f20 = [r16],32\n"
892 "\tldf.fill f21 = [r17],32\n"
893 "\tadds sp = %0, sp\n" /* restore stack */
894 "\tbr.ret.sptk.many b0 ;;\n" /* return */
895 : : "i"(RESERVED_C_STACK_BYTES + 6*16), "i"(LOCALS));
896 }
897
898 #endif
899
900 #endif /* !USE_MINIINTERPRETER */