Merge branch 'master' of http://darcs.haskell.org/ghc
[ghc.git] / rts / StgCRun.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2003
4 *
5 * STG-to-C glue.
6 *
7 * To run an STG function from C land, call
8 *
9 * rv = StgRun(f,BaseReg);
10 *
11 * where "f" is the STG function to call, and BaseReg is the address of the
12 * RegTable for this run (we might have separate RegTables if we're running
13 * multiple threads on an SMP machine).
14 *
15 * In the end, "f" must JMP to StgReturn (defined below),
16 * passing the return-value "rv" in R1,
17 * to return to the caller of StgRun returning "rv" in
18 * the whatever way C returns a value.
19 *
20 * NOTE: StgRun/StgReturn do *NOT* load or store Hp or any
21 * other registers (other than saving the C callee-saves
22 * registers). Instead, the called function "f" must do that
23 * in STG land.
24 *
25 * GCC will have assumed that pushing/popping of C-stack frames is
26 * going on when it generated its code, and used stack space
27 * accordingly. However, we actually {\em post-process away} all
28 * such stack-framery (see \tr{ghc/driver/ghc-asm.lprl}). Things will
29 * be OK however, if we initially make sure there are
30 * @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local
31 * variables.
32 *
33 * -------------------------------------------------------------------------- */
34
35 #include "PosixSource.h"
36
37 /*
38 * We define the following (unused) global register variables, because for
39 * some reason gcc generates sub-optimal code for StgRun() on the Alpha
40 * (unnecessarily saving extra registers on the stack) if we don't.
41 *
42 * Why do it at the top of this file, rather than near StgRun() below? Because
43 * gcc doesn't let us define global register variables after any function
44 * definition has been read. Any point after #include "Stg.h" would be too
45 * late.
46 *
47 * We define alpha_EXTRA_CAREFUL here to save $s6, $f8 and $f9 -- registers
48 * that we don't use but which are callee-save registers. The __divq() routine
49 * in libc.a clobbers $s6.
50 */
51 #include "ghcconfig.h"
52 #ifndef USE_MINIINTERPRETER
53 #ifdef alpha_HOST_ARCH
54 #define alpha_EXTRA_CAREFUL
55 register long fake_ra __asm__("$26");
56 register long fake_gp __asm__("$29");
57 #ifdef alpha_EXTRA_CAREFUL
58 register long fake_s6 __asm__("$15");
59 register double fake_f8 __asm__("$f8");
60 register double fake_f9 __asm__("$f9");
61 #endif
62 #endif
63 #endif
64
65 /* include Stg.h first because we want real machine regs in here: we
66 * have to get the value of R1 back from Stg land to C land intact.
67 */
68 // yeuch
69 #define IN_STGCRUN 1
70 #include "Stg.h"
71 #include "Rts.h"
72
73 #include "StgRun.h"
74 #include "Capability.h"
75
76 #ifdef DEBUG
77 #include "RtsUtils.h"
78 #include "Printer.h"
79 #endif
80
81 #ifdef USE_MINIINTERPRETER
82
83 /* -----------------------------------------------------------------------------
84 any architecture (using miniinterpreter)
85 -------------------------------------------------------------------------- */
86
87 StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
88 {
89 while (f) {
90 IF_DEBUG(interpreter,
91 debugBelch("Jumping to ");
92 printPtr((P_)f); fflush(stdout);
93 debugBelch("\n");
94 );
95 f = (StgFunPtr) (f)();
96 }
97 return (StgRegTable *)R1.p;
98 }
99
100 StgFunPtr StgReturn(void)
101 {
102 return 0;
103 }
104
105 #else /* !USE_MINIINTERPRETER */
106
107 #ifdef LEADING_UNDERSCORE
108 #define STG_RUN "_StgRun"
109 #else
110 #define STG_RUN "StgRun"
111 #endif
112
113 #ifdef LEADING_UNDERSCORE
114 #define STG_RETURN "_StgReturn"
115 #else
116 #define STG_RETURN "StgReturn"
117 #endif
118
119 /* -----------------------------------------------------------------------------
120 x86 architecture
121 -------------------------------------------------------------------------- */
122
123 #ifdef i386_HOST_ARCH
124
125 #ifdef darwin_HOST_OS
126 #define STG_GLOBAL ".globl "
127 #else
128 #define STG_GLOBAL ".global "
129 #endif
130
131 static void GNUC3_ATTRIBUTE(used)
132 StgRunIsImplementedInAssembler(void)
133 {
134 __asm__ volatile (
135 STG_GLOBAL STG_RUN "\n"
136 STG_RUN ":\n\t"
137
138 /*
139 * move %esp down to reserve an area for temporary storage
140 * during the execution of STG code.
141 *
142 * The stack pointer has to be aligned to a multiple of 16
143 * bytes from here - this is a requirement of the C ABI, so
144 * that C code can assign SSE2 registers directly to/from
145 * stack locations.
146 */
147 "subl %0, %%esp\n\t"
148
149 /*
150 * save callee-saves registers on behalf of the STG code.
151 */
152 "movl %%esp, %%eax\n\t"
153 "addl %0-16, %%eax\n\t"
154 "movl %%ebx,0(%%eax)\n\t"
155 "movl %%esi,4(%%eax)\n\t"
156 "movl %%edi,8(%%eax)\n\t"
157 "movl %%ebp,12(%%eax)\n\t"
158 /*
159 * Set BaseReg
160 */
161 "movl 24(%%eax),%%ebx\n\t"
162 /*
163 * grab the function argument from the stack
164 */
165 "movl 20(%%eax),%%eax\n\t"
166 /*
167 * jump to it
168 */
169 "jmp *%%eax\n\t"
170
171 STG_GLOBAL STG_RETURN "\n"
172 STG_RETURN ":\n\t"
173
174 "movl %%esi, %%eax\n\t" /* Return value in R1 */
175
176 /*
177 * restore callee-saves registers. (Don't stomp on %%eax!)
178 */
179 "movl %%esp, %%edx\n\t"
180 "addl %0-16, %%edx\n\t"
181 "movl 0(%%edx),%%ebx\n\t" /* restore the registers saved above */
182 "movl 4(%%edx),%%esi\n\t"
183 "movl 8(%%edx),%%edi\n\t"
184 "movl 12(%%edx),%%ebp\n\t"
185
186 "addl %0, %%esp\n\t"
187 "ret"
188
189 : : "i" (RESERVED_C_STACK_BYTES + 16 + 12)
190 // + 16 to make room for the 4 registers we have to save
191 // + 12 because we need to align %esp to a 16-byte boundary (#5250)
192 );
193 }
194
195 #if defined(mingw32_HOST_OS)
196 // On windows the stack has to be allocated 4k at a time, otherwise
197 // we get a segfault. The C compiler knows how to do this (it calls
198 // _alloca()), so we make sure that we can allocate as much stack as
199 // we need:
200 StgWord8 *win32AllocStack(void)
201 {
202 StgWord8 stack[RESERVED_C_STACK_BYTES + 16 + 12];
203 return stack;
204 }
205 #endif
206
207 #endif
208
209 /* ----------------------------------------------------------------------------
210 x86-64 is almost the same as plain x86.
211
212 I've done it using entirely inline assembler, because I couldn't
213 get gcc to generate the correct subtraction from %rsp by using
214 the local array variable trick. It didn't seem to reserve
215 enough space. Oh well, it's not much harder this way.
216
217 ------------------------------------------------------------------------- */
218
219 #ifdef x86_64_HOST_ARCH
220
221 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
222
223 static void GNUC3_ATTRIBUTE(used)
224 StgRunIsImplementedInAssembler(void)
225 {
226 __asm__ volatile (
227 /*
228 * save callee-saves registers on behalf of the STG code.
229 */
230 ".globl " STG_RUN "\n"
231 STG_RUN ":\n\t"
232 "subq %0, %%rsp\n\t"
233 "movq %%rsp, %%rax\n\t"
234 "addq %0-48, %%rax\n\t"
235 "movq %%rbx,0(%%rax)\n\t"
236 "movq %%rbp,8(%%rax)\n\t"
237 "movq %%r12,16(%%rax)\n\t"
238 "movq %%r13,24(%%rax)\n\t"
239 "movq %%r14,32(%%rax)\n\t"
240 "movq %%r15,40(%%rax)\n\t"
241 /*
242 * Set BaseReg
243 */
244 "movq %%rsi,%%r13\n\t"
245 /*
246 * grab the function argument from the stack, and jump to it.
247 */
248 "movq %%rdi,%%rax\n\t"
249 "jmp *%%rax\n\t"
250
251 ".globl " STG_RETURN "\n"
252 STG_RETURN ":\n\t"
253
254 "movq %%rbx, %%rax\n\t" /* Return value in R1 */
255
256 /*
257 * restore callee-saves registers. (Don't stomp on %%rax!)
258 */
259 "movq %%rsp, %%rdx\n\t"
260 "addq %0-48, %%rdx\n\t"
261 "movq 0(%%rdx),%%rbx\n\t" /* restore the registers saved above */
262 "movq 8(%%rdx),%%rbp\n\t"
263 "movq 16(%%rdx),%%r12\n\t"
264 "movq 24(%%rdx),%%r13\n\t"
265 "movq 32(%%rdx),%%r14\n\t"
266 "movq 40(%%rdx),%%r15\n\t"
267 "addq %0, %%rsp\n\t"
268 "retq"
269
270 : : "i"(RESERVED_C_STACK_BYTES+48+8 /*stack frame size*/));
271 /*
272 HACK alert!
273
274 The x86_64 ABI specifies that on a procedure call, %rsp is
275 aligned on a 16-byte boundary + 8. That is, the first
276 argument on the stack after the return address will be
277 16-byte aligned.
278
279 Which should be fine: RESERVED_C_STACK_BYTES+48 is a multiple
280 of 16 bytes.
281
282 BUT... when we do a C-call from STG land, gcc likes to put the
283 stack alignment adjustment in the prolog. eg. if we're calling
284 a function with arguments in regs, gcc will insert 'subq $8,%rsp'
285 in the prolog, to keep %rsp aligned (the return address is 8
286 bytes, remember). The mangler throws away the prolog, so we
287 lose the stack alignment.
288
289 The hack is to add this extra 8 bytes to our %rsp adjustment
290 here, so that throughout STG code, %rsp is 16-byte aligned,
291 ready for a C-call.
292
293 A quick way to see if this is wrong is to compile this code:
294
295 main = System.Exit.exitWith ExitSuccess
296
297 And run it with +RTS -sstderr. The stats code in the RTS, in
298 particular statsPrintf(), relies on the stack alignment because
299 it saves the %xmm regs on the stack, so it'll fall over if the
300 stack isn't aligned, and calling exitWith from Haskell invokes
301 shutdownHaskellAndExit using a C call.
302
303 Future gcc releases will almost certainly break this hack...
304 */
305 }
306
307 #endif /* x86-64 */
308
309 /* -----------------------------------------------------------------------------
310 Sparc architecture
311
312 --
313 OLD COMMENT from GHC-3.02:
314
315 We want tailjumps to be calls, because `call xxx' is the only Sparc
316 branch that allows an arbitrary label as a target. (Gcc's ``goto
317 *target'' construct ends up loading the label into a register and
318 then jumping, at the cost of two extra instructions for the 32-bit
319 load.)
320
321 When entering the threaded world, we stash our return address in a
322 known location so that \tr{%i7} is available as an extra
323 callee-saves register. Of course, we have to restore this when
324 coming out of the threaded world.
325
326 I hate this god-forsaken architecture. Since the top of the
327 reserved stack space is used for globals and the bottom is reserved
328 for outgoing arguments, we have to stick our return address
329 somewhere in the middle. Currently, I'm allowing 100 extra
330 outgoing arguments beyond the first 6. --JSM
331
332 Updated info (GHC 4.06): we don't appear to use %i7 any more, so
333 I'm not sure whether we still need to save it. Incedentally, what
334 does the last paragraph above mean when it says "the top of the
335 stack is used for globals"? What globals? --SDM
336
337 Updated info (GHC 4.08.2): not saving %i7 any more (see below).
338 -------------------------------------------------------------------------- */
339
340 #ifdef sparc_HOST_ARCH
341
342 StgRegTable *
343 StgRun(StgFunPtr f, StgRegTable *basereg) {
344
345 unsigned char space[RESERVED_C_STACK_BYTES];
346 #if 0
347 register void *i7 __asm__("%i7");
348 ((void **)(space))[100] = i7;
349 #endif
350 f();
351 __asm__ volatile (
352 ".align 4\n"
353 ".global " STG_RETURN "\n"
354 STG_RETURN ":"
355 : : "p" (space) : "l0","l1","l2","l3","l4","l5","l6","l7");
356 /* we tell the C compiler that l0-l7 are clobbered on return to
357 * StgReturn, otherwise it tries to use these to save eg. the
358 * address of space[100] across the call. The correct thing
359 * to do would be to save all the callee-saves regs, but we
360 * can't be bothered to do that.
361 *
362 * We also explicitly mark space as used since gcc eliminates it
363 * otherwise.
364 *
365 * The code that gcc generates for this little fragment is now
366 * terrible. We could do much better by coding it directly in
367 * assembler.
368 */
369 #if 0
370 /* updated 4.08.2: we don't save %i7 in the middle of the reserved
371 * space any more, since gcc tries to save its address across the
372 * call to f(), this gets clobbered in STG land and we end up
373 * dereferencing a bogus pointer in StgReturn.
374 */
375 __asm__ volatile ("ld %1,%0"
376 : "=r" (i7) : "m" (((void **)(space))[100]));
377 #endif
378 return (StgRegTable *)R1.i;
379 }
380
381 #endif
382
383 /* -----------------------------------------------------------------------------
384 alpha architecture
385
386 "The stack pointer (SP) must at all times denote an address that has octaword
387 alignment. (This restriction has the side effect that the in-memory portion
388 of the argument list, if any, will start on an octaword boundary.) Note that
389 the stack grows toward lower addresses. During a procedure invocation, SP
390 can never be set to a value that is higher than the value of SP at entry to
391 that procedure invocation.
392
393 "The contents of the stack, located above the portion of the argument list
394 (if any) that is passed in memory, belong to the calling procedure. Because
395 they are part of the calling procedure, they should not be read or written
396 by the called procedure, except as specified by indirect arguments or
397 language-controlled up-level references.
398
399 "The SP value might be used by the hardware when raising exceptions and
400 asynchronous interrupts. It must be assumed that the contents of the stack
401 below the current SP value and within the stack for the current thread are
402 continually and unpredictably modified, as specified in the _Alpha
403 Architecture Reference Manual_, and as a result of asynchronous software
404 actions."
405
406 -- Compaq Computer Corporation, Houston. Tru64 UNIX Calling Standard for
407 Alpha Systems, 5.1 edition, August 2000, section 3.2.1. http://www.
408 tru64unix.compaq.com/docs/base_doc/DOCUMENTATION/V51_PDF/ARH9MBTE.PDF
409 -------------------------------------------------------------------------- */
410
411 #ifdef alpha_HOST_ARCH
412
413 StgRegTable *
414 StgRun(StgFunPtr f, StgRegTable *basereg)
415 {
416 register long real_ra __asm__("$26"); volatile long save_ra;
417 register long real_gp __asm__("$29"); volatile long save_gp;
418
419 register long real_s0 __asm__("$9" ); volatile long save_s0;
420 register long real_s1 __asm__("$10"); volatile long save_s1;
421 register long real_s2 __asm__("$11"); volatile long save_s2;
422 register long real_s3 __asm__("$12"); volatile long save_s3;
423 register long real_s4 __asm__("$13"); volatile long save_s4;
424 register long real_s5 __asm__("$14"); volatile long save_s5;
425 #ifdef alpha_EXTRA_CAREFUL
426 register long real_s6 __asm__("$15"); volatile long save_s6;
427 #endif
428
429 register double real_f2 __asm__("$f2"); volatile double save_f2;
430 register double real_f3 __asm__("$f3"); volatile double save_f3;
431 register double real_f4 __asm__("$f4"); volatile double save_f4;
432 register double real_f5 __asm__("$f5"); volatile double save_f5;
433 register double real_f6 __asm__("$f6"); volatile double save_f6;
434 register double real_f7 __asm__("$f7"); volatile double save_f7;
435 #ifdef alpha_EXTRA_CAREFUL
436 register double real_f8 __asm__("$f8"); volatile double save_f8;
437 register double real_f9 __asm__("$f9"); volatile double save_f9;
438 #endif
439
440 register StgFunPtr real_pv __asm__("$27");
441
442 StgRegTable * ret;
443
444 save_ra = real_ra;
445 save_gp = real_gp;
446
447 save_s0 = real_s0;
448 save_s1 = real_s1;
449 save_s2 = real_s2;
450 save_s3 = real_s3;
451 save_s4 = real_s4;
452 save_s5 = real_s5;
453 #ifdef alpha_EXTRA_CAREFUL
454 save_s6 = real_s6;
455 #endif
456
457 save_f2 = real_f2;
458 save_f3 = real_f3;
459 save_f4 = real_f4;
460 save_f5 = real_f5;
461 save_f6 = real_f6;
462 save_f7 = real_f7;
463 #ifdef alpha_EXTRA_CAREFUL
464 save_f8 = real_f8;
465 save_f9 = real_f9;
466 #endif
467
468 real_pv = f;
469
470 __asm__ volatile( "lda $30,-%0($30)" "\n"
471 "\t" "jmp ($27)" "\n"
472 "\t" ".align 3" "\n"
473 ".globl " STG_RETURN "\n"
474 STG_RETURN ":" "\n"
475 "\t" "lda $30,%0($30)" "\n"
476 : : "K" (RESERVED_C_STACK_BYTES));
477
478 ret = real_s5;
479
480 real_s0 = save_s0;
481 real_s1 = save_s1;
482 real_s2 = save_s2;
483 real_s3 = save_s3;
484 real_s4 = save_s4;
485 real_s5 = save_s5;
486 #ifdef alpha_EXTRA_CAREFUL
487 real_s6 = save_s6;
488 #endif
489
490 real_f2 = save_f2;
491 real_f3 = save_f3;
492 real_f4 = save_f4;
493 real_f5 = save_f5;
494 real_f6 = save_f6;
495 real_f7 = save_f7;
496 #ifdef alpha_EXTRA_CAREFUL
497 real_f8 = save_f8;
498 real_f9 = save_f9;
499 #endif
500
501 real_ra = save_ra;
502 real_gp = save_gp;
503
504 return ret;
505 }
506
507 #endif /* alpha_HOST_ARCH */
508
509 /* -----------------------------------------------------------------------------
510 HP-PA architecture
511 -------------------------------------------------------------------------- */
512
513 #ifdef hppa1_1_HOST_ARCH
514
515 StgRegTable *
516 StgRun(StgFunPtr f, StgRegTable *basereg)
517 {
518 StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)];
519 StgRegTable * ret;
520
521 __asm__ volatile ("ldo %0(%%r30),%%r19\n"
522 "\tstw %%r3, 0(0,%%r19)\n"
523 "\tstw %%r4, 4(0,%%r19)\n"
524 "\tstw %%r5, 8(0,%%r19)\n"
525 "\tstw %%r6,12(0,%%r19)\n"
526 "\tstw %%r7,16(0,%%r19)\n"
527 "\tstw %%r8,20(0,%%r19)\n"
528 "\tstw %%r9,24(0,%%r19)\n"
529 "\tstw %%r10,28(0,%%r19)\n"
530 "\tstw %%r11,32(0,%%r19)\n"
531 "\tstw %%r12,36(0,%%r19)\n"
532 "\tstw %%r13,40(0,%%r19)\n"
533 "\tstw %%r14,44(0,%%r19)\n"
534 "\tstw %%r15,48(0,%%r19)\n"
535 "\tstw %%r16,52(0,%%r19)\n"
536 "\tstw %%r17,56(0,%%r19)\n"
537 "\tstw %%r18,60(0,%%r19)\n"
538 "\tldo 80(%%r19),%%r19\n"
539 "\tfstds %%fr12,-16(0,%%r19)\n"
540 "\tfstds %%fr13, -8(0,%%r19)\n"
541 "\tfstds %%fr14, 0(0,%%r19)\n"
542 "\tfstds %%fr15, 8(0,%%r19)\n"
543 "\tldo 32(%%r19),%%r19\n"
544 "\tfstds %%fr16,-16(0,%%r19)\n"
545 "\tfstds %%fr17, -8(0,%%r19)\n"
546 "\tfstds %%fr18, 0(0,%%r19)\n"
547 "\tfstds %%fr19, 8(0,%%r19)\n"
548 "\tldo 32(%%r19),%%r19\n"
549 "\tfstds %%fr20,-16(0,%%r19)\n"
550 "\tfstds %%fr21, -8(0,%%r19)\n" : :
551 "n" (-(116 * sizeof(long) + 10 * sizeof(double))) : "%r19"
552 );
553
554 f();
555
556 __asm__ volatile (".align 4\n"
557 "\t.EXPORT " STG_RETURN ",CODE\n"
558 "\t.EXPORT " STG_RETURN ",ENTRY,PRIV_LEV=3\n"
559 STG_RETURN "\n"
560 /* "\tldo %0(%%r3),%%r19\n" */
561 "\tldo %1(%%r30),%%r19\n"
562 "\tcopy %%r11, %0\n" /* save R1 */
563 "\tldw 0(0,%%r19),%%r3\n"
564 "\tldw 4(0,%%r19),%%r4\n"
565 "\tldw 8(0,%%r19),%%r5\n"
566 "\tldw 12(0,%%r19),%%r6\n"
567 "\tldw 16(0,%%r19),%%r7\n"
568 "\tldw 20(0,%%r19),%%r8\n"
569 "\tldw 24(0,%%r19),%%r9\n"
570 "\tldw 28(0,%%r19),%%r10\n"
571 "\tldw 32(0,%%r19),%%r11\n"
572 "\tldw 36(0,%%r19),%%r12\n"
573 "\tldw 40(0,%%r19),%%r13\n"
574 "\tldw 44(0,%%r19),%%r14\n"
575 "\tldw 48(0,%%r19),%%r15\n"
576 "\tldw 52(0,%%r19),%%r16\n"
577 "\tldw 56(0,%%r19),%%r17\n"
578 "\tldw 60(0,%%r19),%%r18\n"
579 "\tldo 80(%%r19),%%r19\n"
580 "\tfldds -16(0,%%r19),%%fr12\n"
581 "\tfldds -8(0,%%r19),%%fr13\n"
582 "\tfldds 0(0,%%r19),%%fr14\n"
583 "\tfldds 8(0,%%r19),%%fr15\n"
584 "\tldo 32(%%r19),%%r19\n"
585 "\tfldds -16(0,%%r19),%%fr16\n"
586 "\tfldds -8(0,%%r19),%%fr17\n"
587 "\tfldds 0(0,%%r19),%%fr18\n"
588 "\tfldds 8(0,%%r19),%%fr19\n"
589 "\tldo 32(%%r19),%%r19\n"
590 "\tfldds -16(0,%%r19),%%fr20\n"
591 "\tfldds -8(0,%%r19),%%fr21\n"
592 : "=r" (ret)
593 : "n" (-(116 * sizeof(long) + 10 * sizeof(double)))
594 : "%r19"
595 );
596
597 return ret;
598 }
599
600 #endif /* hppa1_1_HOST_ARCH */
601
602 /* -----------------------------------------------------------------------------
603 PowerPC architecture
604
605 Everything is in assembler, so we don't have to deal with GCC...
606
607 -------------------------------------------------------------------------- */
608
609 #ifdef powerpc_HOST_ARCH
610
611 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
612
613 #ifdef darwin_HOST_OS
614 void StgRunIsImplementedInAssembler(void)
615 {
616 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
617 // if the toolchain supports deadstripping, we have to
618 // prevent it here (it tends to get confused here).
619 __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler");
620 #endif
621 __asm__ volatile (
622 "\n.globl _StgRun\n"
623 "_StgRun:\n"
624 "\tmflr r0\n"
625 "\tbl saveFP # f14\n"
626 "\tstmw r13,-220(r1)\n"
627 "\tstwu r1,-%0(r1)\n"
628 "\tmr r27,r4\n" // BaseReg == r27
629 "\tmtctr r3\n"
630 "\tmr r12,r3\n"
631 "\tbctr\n"
632 ".globl _StgReturn\n"
633 "_StgReturn:\n"
634 "\tmr r3,r14\n"
635 "\tla r1,%0(r1)\n"
636 "\tlmw r13,-220(r1)\n"
637 "\tb restFP # f14\n"
638 : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
639 }
640 #else
641
642 // This version is for PowerPC Linux.
643
644 // Differences from the Darwin/Mac OS X version:
645 // *) Different Assembler Syntax
646 // *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
647 // *) We may not access positive stack offsets
648 // (no "Red Zone" as in the Darwin ABI)
649 // *) The Link Register is saved to a different offset in the caller's stack frame
650 // (Linux: 4(r1), Darwin 8(r1))
651
652 static void GNUC3_ATTRIBUTE(used)
653 StgRunIsImplementedInAssembler(void)
654 {
655 __asm__ volatile (
656 "\t.globl StgRun\n"
657 "\t.type StgRun,@function\n"
658 "StgRun:\n"
659 "\tmflr 0\n"
660 "\tstw 0,4(1)\n"
661 "\tmr 5,1\n"
662 "\tstwu 1,-%0(1)\n"
663 "\tstmw 13,-220(5)\n"
664 "\tstfd 14,-144(5)\n"
665 "\tstfd 15,-136(5)\n"
666 "\tstfd 16,-128(5)\n"
667 "\tstfd 17,-120(5)\n"
668 "\tstfd 18,-112(5)\n"
669 "\tstfd 19,-104(5)\n"
670 "\tstfd 20,-96(5)\n"
671 "\tstfd 21,-88(5)\n"
672 "\tstfd 22,-80(5)\n"
673 "\tstfd 23,-72(5)\n"
674 "\tstfd 24,-64(5)\n"
675 "\tstfd 25,-56(5)\n"
676 "\tstfd 26,-48(5)\n"
677 "\tstfd 27,-40(5)\n"
678 "\tstfd 28,-32(5)\n"
679 "\tstfd 29,-24(5)\n"
680 "\tstfd 30,-16(5)\n"
681 "\tstfd 31,-8(5)\n"
682 "\tmr 27,4\n" // BaseReg == r27
683 "\tmtctr 3\n"
684 "\tmr 12,3\n"
685 "\tbctr\n"
686 ".globl StgReturn\n"
687 "\t.type StgReturn,@function\n"
688 "StgReturn:\n"
689 "\tmr 3,14\n"
690 "\tla 5,%0(1)\n"
691 "\tlmw 13,-220(5)\n"
692 "\tlfd 14,-144(5)\n"
693 "\tlfd 15,-136(5)\n"
694 "\tlfd 16,-128(5)\n"
695 "\tlfd 17,-120(5)\n"
696 "\tlfd 18,-112(5)\n"
697 "\tlfd 19,-104(5)\n"
698 "\tlfd 20,-96(5)\n"
699 "\tlfd 21,-88(5)\n"
700 "\tlfd 22,-80(5)\n"
701 "\tlfd 23,-72(5)\n"
702 "\tlfd 24,-64(5)\n"
703 "\tlfd 25,-56(5)\n"
704 "\tlfd 26,-48(5)\n"
705 "\tlfd 27,-40(5)\n"
706 "\tlfd 28,-32(5)\n"
707 "\tlfd 29,-24(5)\n"
708 "\tlfd 30,-16(5)\n"
709 "\tlfd 31,-8(5)\n"
710 "\tmr 1,5\n"
711 "\tlwz 0,4(1)\n"
712 "\tmtlr 0\n"
713 "\tblr\n"
714 : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
715 }
716 #endif
717
718 #endif
719
720 /* -----------------------------------------------------------------------------
721 PowerPC 64 architecture
722
723 Everything is in assembler, so we don't have to deal with GCC...
724
725 -------------------------------------------------------------------------- */
726
727 #ifdef powerpc64_HOST_ARCH
728
729 #ifdef linux_HOST_OS
730 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
731
732 static void GNUC3_ATTRIBUTE(used)
733 StgRunIsImplementedInAssembler(void)
734 {
735 // r0 volatile
736 // r1 stack pointer
737 // r2 toc - needs to be saved
738 // r3-r10 argument passing, volatile
739 // r11, r12 very volatile (not saved across cross-module calls)
740 // r13 thread local state (never modified, don't need to save)
741 // r14-r31 callee-save
742 __asm__ volatile (
743 ".section \".opd\",\"aw\"\n"
744 ".align 3\n"
745 ".globl StgRun\n"
746 "StgRun:\n"
747 "\t.quad\t.StgRun,.TOC.@tocbase,0\n"
748 "\t.size StgRun,24\n"
749 ".globl StgReturn\n"
750 "StgReturn:\n"
751 "\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
752 "\t.size StgReturn,24\n"
753 ".previous\n"
754 ".globl .StgRun\n"
755 ".type .StgRun,@function\n"
756 ".StgRun:\n"
757 "\tmflr 0\n"
758 "\tmr 5, 1\n"
759 "\tstd 0, 16(1)\n"
760 "\tstdu 1, -%0(1)\n"
761 "\tstd 2, -296(5)\n"
762 "\tstd 14, -288(5)\n"
763 "\tstd 15, -280(5)\n"
764 "\tstd 16, -272(5)\n"
765 "\tstd 17, -264(5)\n"
766 "\tstd 18, -256(5)\n"
767 "\tstd 19, -248(5)\n"
768 "\tstd 20, -240(5)\n"
769 "\tstd 21, -232(5)\n"
770 "\tstd 22, -224(5)\n"
771 "\tstd 23, -216(5)\n"
772 "\tstd 24, -208(5)\n"
773 "\tstd 25, -200(5)\n"
774 "\tstd 26, -192(5)\n"
775 "\tstd 27, -184(5)\n"
776 "\tstd 28, -176(5)\n"
777 "\tstd 29, -168(5)\n"
778 "\tstd 30, -160(5)\n"
779 "\tstd 31, -152(5)\n"
780 "\tstfd 14, -144(5)\n"
781 "\tstfd 15, -136(5)\n"
782 "\tstfd 16, -128(5)\n"
783 "\tstfd 17, -120(5)\n"
784 "\tstfd 18, -112(5)\n"
785 "\tstfd 19, -104(5)\n"
786 "\tstfd 20, -96(5)\n"
787 "\tstfd 21, -88(5)\n"
788 "\tstfd 22, -80(5)\n"
789 "\tstfd 23, -72(5)\n"
790 "\tstfd 24, -64(5)\n"
791 "\tstfd 25, -56(5)\n"
792 "\tstfd 26, -48(5)\n"
793 "\tstfd 27, -40(5)\n"
794 "\tstfd 28, -32(5)\n"
795 "\tstfd 29, -24(5)\n"
796 "\tstfd 30, -16(5)\n"
797 "\tstfd 31, -8(5)\n"
798 "\tmr 27, 4\n" // BaseReg == r27
799 "\tld 2, 8(3)\n"
800 "\tld 3, 0(3)\n"
801 "\tmtctr 3\n"
802 "\tbctr\n"
803 ".globl .StgReturn\n"
804 ".type .StgReturn,@function\n"
805 ".StgReturn:\n"
806 "\tmr 3,14\n"
807 "\tla 5, %0(1)\n" // load address == addi r5, r1, %0
808 "\tld 2, -296(5)\n"
809 "\tld 14, -288(5)\n"
810 "\tld 15, -280(5)\n"
811 "\tld 16, -272(5)\n"
812 "\tld 17, -264(5)\n"
813 "\tld 18, -256(5)\n"
814 "\tld 19, -248(5)\n"
815 "\tld 20, -240(5)\n"
816 "\tld 21, -232(5)\n"
817 "\tld 22, -224(5)\n"
818 "\tld 23, -216(5)\n"
819 "\tld 24, -208(5)\n"
820 "\tld 25, -200(5)\n"
821 "\tld 26, -192(5)\n"
822 "\tld 27, -184(5)\n"
823 "\tld 28, -176(5)\n"
824 "\tld 29, -168(5)\n"
825 "\tld 30, -160(5)\n"
826 "\tld 31, -152(5)\n"
827 "\tlfd 14, -144(5)\n"
828 "\tlfd 15, -136(5)\n"
829 "\tlfd 16, -128(5)\n"
830 "\tlfd 17, -120(5)\n"
831 "\tlfd 18, -112(5)\n"
832 "\tlfd 19, -104(5)\n"
833 "\tlfd 20, -96(5)\n"
834 "\tlfd 21, -88(5)\n"
835 "\tlfd 22, -80(5)\n"
836 "\tlfd 23, -72(5)\n"
837 "\tlfd 24, -64(5)\n"
838 "\tlfd 25, -56(5)\n"
839 "\tlfd 26, -48(5)\n"
840 "\tlfd 27, -40(5)\n"
841 "\tlfd 28, -32(5)\n"
842 "\tlfd 29, -24(5)\n"
843 "\tlfd 30, -16(5)\n"
844 "\tlfd 31, -8(5)\n"
845 "\tmr 1, 5\n"
846 "\tld 0, 16(1)\n"
847 "\tmtlr 0\n"
848 "\tblr\n"
849 : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
850 }
851 #else // linux_HOST_OS
852 #error Only linux support for power64 right now.
853 #endif
854
855 #endif
856
857 /* -----------------------------------------------------------------------------
858 IA64 architecture
859
860 Again, in assembler - so we can fiddle with the register stack, and because
861 gcc doesn't handle asm-clobbered callee-saves correctly.
862
863 loc0 - loc15: preserved locals
864 loc16 - loc28: STG registers
865 loc29: saved ar.pfs
866 loc30: saved b0
867 loc31: saved gp (gcc 3.3 uses this slot)
868 loc32: saved ar.lc
869 loc33: saved pr
870 f2 - f5: preserved floating-point registers
871 f16 - f23: preserved floating-point registers
872 -------------------------------------------------------------------------- */
873
874 #ifdef ia64_HOST_ARCH
875
876 /* the memory stack is rarely used, so 16K is excessive */
877 #undef RESERVED_C_STACK_BYTES
878 #define RESERVED_C_STACK_BYTES 1024
879
880 /* We don't spill all the callee-save FP registers, only the ones that
881 * gcc has been observed to use */
882 #define PRESERVED_FP_REGISTERS 12
883
884 /* We always allocate 34 local and 8 output registers. As long as gcc used
885 * fewer than 32 locals, the mangler will adjust the stack frame accordingly. */
886 #define LOCALS 34
887
888 static void GNUC3_ATTRIBUTE(used)
889 StgRunIsImplementedInAssembler(void)
890 {
891 __asm__ volatile(
892 ".global StgRun\n"
893 "StgRun:\n"
894 "\talloc loc29 = ar.pfs, 0, %1, 8, 0\n" /* setup register frame */
895 "\tld8 r18 = [r32],8\n" /* get procedure address */
896 "\tadds sp = -%0, sp ;;\n" /* setup stack */
897 "\tld8 gp = [r32]\n" /* get procedure GP */
898 "\tadds r16 = %0-(%2*16), sp\n"
899 "\tadds r17 = %0-((%2-1)*16), sp ;;\n"
900 "\tstf.spill [r16] = f16,32\n" /* spill callee-saved fp regs */
901 "\tstf.spill [r17] = f17,32\n"
902 "\tmov b6 = r18 ;;\n" /* set target address */
903 "\tstf.spill [r16] = f18,32\n"
904 "\tstf.spill [r17] = f19,32\n"
905 "\tmov loc30 = b0 ;;\n" /* save return address */
906 "\tstf.spill [r16] = f20,32\n"
907 "\tstf.spill [r17] = f21,32 ;;\n"
908 "\tstf.spill [r16] = f22,32\n"
909 "\tstf.spill [r17] = f23,32\n"
910 "\tmov loc32 = ar.lc ;;\n" /* save loop counter */
911 "\tstf.spill [r16] = f2,32\n"
912 "\tstf.spill [r17] = f3,32\n"
913 "\tmov loc33 = pr ;;\n" /* save predicate registers */
914 "\tstf.spill [r16] = f4,32\n"
915 "\tstf.spill [r17] = f5,32\n"
916 "\tbr.few b6 ;;\n" /* branch to function */
917 ".global StgReturn\n"
918 "StgReturn:\n"
919 "\tmov r8 = loc16\n" /* return value in r8 */
920 "\tadds r16 = %0-(%2*16), sp\n"
921 "\tadds r17 = %0-((%2-1)*16), sp ;;\n"
922 "\tldf.fill f16 = [r16],32\n" /* start restoring fp regs */
923 "\tldf.fill f17 = [r17],32\n"
924 "\tmov ar.pfs = loc29 ;;\n" /* restore register frame */
925 "\tldf.fill f18 = [r16],32\n"
926 "\tldf.fill f19 = [r17],32\n"
927 "\tmov b0 = loc30 ;;\n" /* restore return address */
928 "\tldf.fill f20 = [r16],32\n"
929 "\tldf.fill f21 = [r17],32\n"
930 "\tmov ar.lc = loc32 ;;\n" /* restore loop counter */
931 "\tldf.fill f22 = [r16],32\n"
932 "\tldf.fill f23 = [r17],32\n"
933 "\tmov pr = loc33 ;;\n" /* restore predicate registers */
934 "\tldf.fill f2 = [r16],32\n"
935 "\tldf.fill f3 = [r17],32\n"
936 "\tadds sp = %0, sp ;;\n" /* restore stack */
937 "\tldf.fill f4 = [r16],32\n"
938 "\tldf.fill f5 = [r17],32\n"
939 "\tbr.ret.sptk.many b0 ;;\n" /* return */
940 : : "i"(RESERVED_C_STACK_BYTES + PRESERVED_FP_REGISTERS*16),
941 "i"(LOCALS),
942 "i"(PRESERVED_FP_REGISTERS));
943 }
944
945 #endif
946
947 /* -----------------------------------------------------------------------------
948 MIPS architecture
949 -------------------------------------------------------------------------- */
950
951 #ifdef mips_HOST_ARCH
952
953 StgThreadReturnCode
954 StgRun(StgFunPtr f, StgRegTable *basereg)
955 {
956 register StgThreadReturnCode __v0 __asm__("$2");
957
958 __asm__ __volatile__(
959 " la $25, %1 \n"
960 " move $30, %2 \n"
961 " jr %1 \n"
962 " .align 3 \n"
963 " .globl " STG_RETURN " \n"
964 " .aent " STG_RETURN " \n"
965 STG_RETURN ": \n"
966 " move %0, $16 \n"
967 " move $3, $17 \n"
968 : "=r" (__v0),
969 : "r" (f), "r" (basereg)
970 "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23",
971 "$25", "$28", "$30",
972 "$f20", "$f22", "$f24", "$f26", "$f28", "$f30",
973 "memory");
974
975 return __v0;
976 }
977
978 #endif /* mips_HOST_ARCH */
979
980 #endif /* !USE_MINIINTERPRETER */