Tabs -> Spaces + formatting fixes
[ghc.git] / rts / StgCRun.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2011
4 *
5 * STG-to-C glue.
6 *
7 * To run an STG function from C land, call
8 *
9 * rv = StgRun(f,BaseReg);
10 *
11 * where "f" is the STG function to call, and BaseReg is the address of the
12 * RegTable for this run (we might have separate RegTables if we're running
13 * multiple threads on an SMP machine).
14 *
15 * In the end, "f" must JMP to StgReturn (defined below), passing the
16 * return-value "rv" in R1, to return to the caller of StgRun returning "rv" in
17 * the whatever way C returns a value.
18 *
19 * NOTE: StgRun/StgReturn do *NOT* load or store Hp or any other registers
20 * (other than saving the C callee-saves registers). Instead, the called
21 * function "f" must do that in STG land.
22 *
23 * GCC will have assumed that pushing/popping of C-stack frames is going on
24 * when it generated its code, and used stack space accordingly. However, we
25 * actually {\em post-process away} all such stack-framery (see
26 * \tr{ghc/driver/ghc-asm.lprl}). Things will be OK however, if we initially
27 * make sure there are @RESERVED_C_STACK_BYTES@ on the C-stack to begin with,
28 * for local variables.
29 *
30 * -------------------------------------------------------------------------- */
31
32 #include "PosixSource.h"
33
34 /*
35 * We define the following (unused) global register variables, because for
36 * some reason gcc generates sub-optimal code for StgRun() on the Alpha
37 * (unnecessarily saving extra registers on the stack) if we don't.
38 *
39 * Why do it at the top of this file, rather than near StgRun() below? Because
40 * gcc doesn't let us define global register variables after any function
41 * definition has been read. Any point after #include "Stg.h" would be too
42 * late.
43 *
44 * We define alpha_EXTRA_CAREFUL here to save $s6, $f8 and $f9 -- registers
45 * that we don't use but which are callee-save registers. The __divq() routine
46 * in libc.a clobbers $s6.
47 */
48 #include "ghcconfig.h"
49 #ifndef USE_MINIINTERPRETER
50 #ifdef alpha_HOST_ARCH
51 #define alpha_EXTRA_CAREFUL
52 register long fake_ra __asm__("$26");
53 register long fake_gp __asm__("$29");
54 #ifdef alpha_EXTRA_CAREFUL
55 register long fake_s6 __asm__("$15");
56 register double fake_f8 __asm__("$f8");
57 register double fake_f9 __asm__("$f9");
58 #endif
59 #endif
60 #endif
61
62 /* include Stg.h first because we want real machine regs in here: we
63 * have to get the value of R1 back from Stg land to C land intact.
64 */
65 #define IN_STGCRUN 1
66 #include "Stg.h"
67 #include "Rts.h"
68
69 #include "StgRun.h"
70 #include "Capability.h"
71
72 #ifdef DEBUG
73 #include "RtsUtils.h"
74 #include "Printer.h"
75 #endif
76
77 #ifdef USE_MINIINTERPRETER
78
79 /* -----------------------------------------------------------------------------
80 any architecture (using miniinterpreter)
81 -------------------------------------------------------------------------- */
82
83 StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
84 {
85 while (f) {
86 IF_DEBUG(interpreter,
87 debugBelch("Jumping to ");
88 printPtr((P_)f); fflush(stdout);
89 debugBelch("\n");
90 );
91 f = (StgFunPtr) (f)();
92 }
93 return (StgRegTable *)R1.p;
94 }
95
96 StgFunPtr StgReturn(void)
97 {
98 return 0;
99 }
100
101 #else /* !USE_MINIINTERPRETER */
102
103 #ifdef LEADING_UNDERSCORE
104 #define STG_RUN "_StgRun"
105 #define STG_RETURN "_StgReturn"
106 #else
107 #define STG_RUN "StgRun"
108 #define STG_RETURN "StgReturn"
109 #endif
110
111 /* -----------------------------------------------------------------------------
112 x86 architecture
113 -------------------------------------------------------------------------- */
114
115 #ifdef i386_HOST_ARCH
116
117 #ifdef darwin_HOST_OS
118 #define STG_GLOBAL ".globl "
119 #else
120 #define STG_GLOBAL ".global "
121 #endif
122
123 static void GNUC3_ATTRIBUTE(used)
124 StgRunIsImplementedInAssembler(void)
125 {
126 __asm__ volatile (
127 STG_GLOBAL STG_RUN "\n"
128 STG_RUN ":\n\t"
129
130 /*
131 * move %esp down to reserve an area for temporary storage
132 * during the execution of STG code.
133 *
134 * The stack pointer has to be aligned to a multiple of 16
135 * bytes from here - this is a requirement of the C ABI, so
136 * that C code can assign SSE2 registers directly to/from
137 * stack locations.
138 */
139 "subl %0, %%esp\n\t"
140
141 /*
142 * save callee-saves registers on behalf of the STG code.
143 */
144 "movl %%esp, %%eax\n\t"
145 "addl %0-16, %%eax\n\t"
146 "movl %%ebx,0(%%eax)\n\t"
147 "movl %%esi,4(%%eax)\n\t"
148 "movl %%edi,8(%%eax)\n\t"
149 "movl %%ebp,12(%%eax)\n\t"
150 /*
151 * Set BaseReg
152 */
153 "movl 24(%%eax),%%ebx\n\t"
154 /*
155 * grab the function argument from the stack
156 */
157 "movl 20(%%eax),%%eax\n\t"
158 /*
159 * jump to it
160 */
161 "jmp *%%eax\n\t"
162
163 STG_GLOBAL STG_RETURN "\n"
164 STG_RETURN ":\n\t"
165
166 "movl %%esi, %%eax\n\t" /* Return value in R1 */
167
168 /*
169 * restore callee-saves registers. (Don't stomp on %%eax!)
170 */
171 "movl %%esp, %%edx\n\t"
172 "addl %0-16, %%edx\n\t"
173 "movl 0(%%edx),%%ebx\n\t" /* restore the registers saved above */
174 "movl 4(%%edx),%%esi\n\t"
175 "movl 8(%%edx),%%edi\n\t"
176 "movl 12(%%edx),%%ebp\n\t"
177
178 "addl %0, %%esp\n\t"
179 "ret"
180
181 : : "i" (RESERVED_C_STACK_BYTES + 16)
182 // + 16 to make room for the 4 registers we have to save
183 // + 12 because we need to align %esp to a 16-byte boundary (#5250)
184 );
185 }
186
187 #if defined(mingw32_HOST_OS)
188 // On windows the stack has to be allocated 4k at a time, otherwise
189 // we get a segfault. The C compiler knows how to do this (it calls
190 // _alloca()), so we make sure that we can allocate as much stack as
191 // we need:
192 StgWord8 *win32AllocStack(void)
193 {
194 StgWord8 stack[RESERVED_C_STACK_BYTES + 16 + 12];
195 return stack;
196 }
197 #endif
198
199 #endif
200
201 /* ----------------------------------------------------------------------------
202 x86-64 is almost the same as plain x86.
203
204 I've done it using entirely inline assembler, because I couldn't
205 get gcc to generate the correct subtraction from %rsp by using
206 the local array variable trick. It didn't seem to reserve
207 enough space. Oh well, it's not much harder this way.
208
209 ------------------------------------------------------------------------- */
210
211 #ifdef x86_64_HOST_ARCH
212
213 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
214
215 static void GNUC3_ATTRIBUTE(used)
216 StgRunIsImplementedInAssembler(void)
217 {
218 __asm__ volatile (
219 /*
220 * save callee-saves registers on behalf of the STG code.
221 */
222 ".globl " STG_RUN "\n"
223 STG_RUN ":\n\t"
224 "subq %0, %%rsp\n\t"
225 "movq %%rsp, %%rax\n\t"
226 "addq %0-48, %%rax\n\t"
227 "movq %%rbx,0(%%rax)\n\t"
228 "movq %%rbp,8(%%rax)\n\t"
229 "movq %%r12,16(%%rax)\n\t"
230 "movq %%r13,24(%%rax)\n\t"
231 "movq %%r14,32(%%rax)\n\t"
232 "movq %%r15,40(%%rax)\n\t"
233 /*
234 * Set BaseReg
235 */
236 "movq %%rsi,%%r13\n\t"
237 /*
238 * grab the function argument from the stack, and jump to it.
239 */
240 "movq %%rdi,%%rax\n\t"
241 "jmp *%%rax\n\t"
242
243 ".globl " STG_RETURN "\n"
244 STG_RETURN ":\n\t"
245
246 "movq %%rbx, %%rax\n\t" /* Return value in R1 */
247
248 /*
249 * restore callee-saves registers. (Don't stomp on %%rax!)
250 */
251 "movq %%rsp, %%rdx\n\t"
252 "addq %0-48, %%rdx\n\t"
253 "movq 0(%%rdx),%%rbx\n\t" /* restore the registers saved above */
254 "movq 8(%%rdx),%%rbp\n\t"
255 "movq 16(%%rdx),%%r12\n\t"
256 "movq 24(%%rdx),%%r13\n\t"
257 "movq 32(%%rdx),%%r14\n\t"
258 "movq 40(%%rdx),%%r15\n\t"
259 "addq %0, %%rsp\n\t"
260 "retq"
261
262 : : "i"(RESERVED_C_STACK_BYTES + 48 /*stack frame size*/));
263 /*
264 The x86_64 ABI specifies that on entry to a procedure, %rsp is
265 aligned on a 16-byte boundary + 8. That is, the first
266 argument on the stack after the return address will be
267 16-byte aligned.
268
269 We maintain the 16+8 stack alignment throughout the STG code.
270
271 When we call STG_RUN the stack will be aligned to 16+8. We used
272 to subtract an extra 8 bytes so that %rsp would be 16 byte
273 aligned at all times in STG land. This worked fine for the
274 native code generator which knew that the stack was already
275 aligned on 16 bytes when it generated calls to C functions.
276
277 This arrangemnt caused problems for the LLVM backend. The LLVM
278 code generator would assume that on entry to each function the
279 stack is aligned to 16+8 as required by the ABI. However, since
280 we only enter STG functions by jumping to them with tail calls,
281 the stack was actually aligned to a 16-byte boundary. The LLVM
282 backend had its own mangler that would post-process the
283 assembly code to fixup the stack manipulation code to mainain
284 the correct alignment (see #4211).
285
286 Therefore, we now now keep the stack aligned to 16+8 while in
287 STG land so that LLVM generates correct code without any
288 mangling. The native code generator can handle this alignment
289 just fine by making sure the stack is aligned to a 16-byte
290 boundary before it makes a C-call.
291
292 A quick way to see if this is wrong is to compile this code:
293
294 main = System.Exit.exitWith ExitSuccess
295
296 And run it with +RTS -sstderr. The stats code in the RTS, in
297 particular statsPrintf(), relies on the stack alignment because
298 it saves the %xmm regs on the stack, so it'll fall over if the
299 stack isn't aligned, and calling exitWith from Haskell invokes
300 shutdownHaskellAndExit using a C call.
301
302 */
303 }
304
305 #endif /* x86-64 */
306
307 /* -----------------------------------------------------------------------------
308 Sparc architecture
309
310 --
311 OLD COMMENT from GHC-3.02:
312
313 We want tailjumps to be calls, because `call xxx' is the only Sparc
314 branch that allows an arbitrary label as a target. (Gcc's ``goto
315 *target'' construct ends up loading the label into a register and
316 then jumping, at the cost of two extra instructions for the 32-bit
317 load.)
318
319 When entering the threaded world, we stash our return address in a
320 known location so that \tr{%i7} is available as an extra
321 callee-saves register. Of course, we have to restore this when
322 coming out of the threaded world.
323
324 I hate this god-forsaken architecture. Since the top of the
325 reserved stack space is used for globals and the bottom is reserved
326 for outgoing arguments, we have to stick our return address
327 somewhere in the middle. Currently, I'm allowing 100 extra
328 outgoing arguments beyond the first 6. --JSM
329
330 Updated info (GHC 4.06): we don't appear to use %i7 any more, so
331 I'm not sure whether we still need to save it. Incedentally, what
332 does the last paragraph above mean when it says "the top of the
333 stack is used for globals"? What globals? --SDM
334
335 Updated info (GHC 4.08.2): not saving %i7 any more (see below).
336 -------------------------------------------------------------------------- */
337
338 #ifdef sparc_HOST_ARCH
339
340 StgRegTable *
341 StgRun(StgFunPtr f, StgRegTable *basereg) {
342
343 unsigned char space[RESERVED_C_STACK_BYTES];
344 #if 0
345 register void *i7 __asm__("%i7");
346 ((void **)(space))[100] = i7;
347 #endif
348 f();
349 __asm__ volatile (
350 ".align 4\n"
351 ".global " STG_RETURN "\n"
352 STG_RETURN ":"
353 : : "p" (space) : "l0","l1","l2","l3","l4","l5","l6","l7");
354 /* we tell the C compiler that l0-l7 are clobbered on return to
355 * StgReturn, otherwise it tries to use these to save eg. the
356 * address of space[100] across the call. The correct thing
357 * to do would be to save all the callee-saves regs, but we
358 * can't be bothered to do that.
359 *
360 * We also explicitly mark space as used since gcc eliminates it
361 * otherwise.
362 *
363 * The code that gcc generates for this little fragment is now
364 * terrible. We could do much better by coding it directly in
365 * assembler.
366 */
367 #if 0
368 /* updated 4.08.2: we don't save %i7 in the middle of the reserved
369 * space any more, since gcc tries to save its address across the
370 * call to f(), this gets clobbered in STG land and we end up
371 * dereferencing a bogus pointer in StgReturn.
372 */
373 __asm__ volatile ("ld %1,%0"
374 : "=r" (i7) : "m" (((void **)(space))[100]));
375 #endif
376 return (StgRegTable *)R1.i;
377 }
378
379 #endif
380
381 /* -----------------------------------------------------------------------------
382 alpha architecture
383
384 "The stack pointer (SP) must at all times denote an address that has octaword
385 alignment. (This restriction has the side effect that the in-memory portion
386 of the argument list, if any, will start on an octaword boundary.) Note that
387 the stack grows toward lower addresses. During a procedure invocation, SP
388 can never be set to a value that is higher than the value of SP at entry to
389 that procedure invocation.
390
391 "The contents of the stack, located above the portion of the argument list
392 (if any) that is passed in memory, belong to the calling procedure. Because
393 they are part of the calling procedure, they should not be read or written
394 by the called procedure, except as specified by indirect arguments or
395 language-controlled up-level references.
396
397 "The SP value might be used by the hardware when raising exceptions and
398 asynchronous interrupts. It must be assumed that the contents of the stack
399 below the current SP value and within the stack for the current thread are
400 continually and unpredictably modified, as specified in the _Alpha
401 Architecture Reference Manual_, and as a result of asynchronous software
402 actions."
403
404 -- Compaq Computer Corporation, Houston. Tru64 UNIX Calling Standard for
405 Alpha Systems, 5.1 edition, August 2000, section 3.2.1. http://www.
406 tru64unix.compaq.com/docs/base_doc/DOCUMENTATION/V51_PDF/ARH9MBTE.PDF
407 -------------------------------------------------------------------------- */
408
409 #ifdef alpha_HOST_ARCH
410
411 StgRegTable *
412 StgRun(StgFunPtr f, StgRegTable *basereg)
413 {
414 register long real_ra __asm__("$26"); volatile long save_ra;
415 register long real_gp __asm__("$29"); volatile long save_gp;
416
417 register long real_s0 __asm__("$9" ); volatile long save_s0;
418 register long real_s1 __asm__("$10"); volatile long save_s1;
419 register long real_s2 __asm__("$11"); volatile long save_s2;
420 register long real_s3 __asm__("$12"); volatile long save_s3;
421 register long real_s4 __asm__("$13"); volatile long save_s4;
422 register long real_s5 __asm__("$14"); volatile long save_s5;
423 #ifdef alpha_EXTRA_CAREFUL
424 register long real_s6 __asm__("$15"); volatile long save_s6;
425 #endif
426
427 register double real_f2 __asm__("$f2"); volatile double save_f2;
428 register double real_f3 __asm__("$f3"); volatile double save_f3;
429 register double real_f4 __asm__("$f4"); volatile double save_f4;
430 register double real_f5 __asm__("$f5"); volatile double save_f5;
431 register double real_f6 __asm__("$f6"); volatile double save_f6;
432 register double real_f7 __asm__("$f7"); volatile double save_f7;
433 #ifdef alpha_EXTRA_CAREFUL
434 register double real_f8 __asm__("$f8"); volatile double save_f8;
435 register double real_f9 __asm__("$f9"); volatile double save_f9;
436 #endif
437
438 register StgFunPtr real_pv __asm__("$27");
439
440 StgRegTable * ret;
441
442 save_ra = real_ra;
443 save_gp = real_gp;
444
445 save_s0 = real_s0;
446 save_s1 = real_s1;
447 save_s2 = real_s2;
448 save_s3 = real_s3;
449 save_s4 = real_s4;
450 save_s5 = real_s5;
451 #ifdef alpha_EXTRA_CAREFUL
452 save_s6 = real_s6;
453 #endif
454
455 save_f2 = real_f2;
456 save_f3 = real_f3;
457 save_f4 = real_f4;
458 save_f5 = real_f5;
459 save_f6 = real_f6;
460 save_f7 = real_f7;
461 #ifdef alpha_EXTRA_CAREFUL
462 save_f8 = real_f8;
463 save_f9 = real_f9;
464 #endif
465
466 real_pv = f;
467
468 __asm__ volatile( "lda $30,-%0($30)" "\n"
469 "\t" "jmp ($27)" "\n"
470 "\t" ".align 3" "\n"
471 ".globl " STG_RETURN "\n"
472 STG_RETURN ":" "\n"
473 "\t" "lda $30,%0($30)" "\n"
474 : : "K" (RESERVED_C_STACK_BYTES));
475
476 ret = real_s5;
477
478 real_s0 = save_s0;
479 real_s1 = save_s1;
480 real_s2 = save_s2;
481 real_s3 = save_s3;
482 real_s4 = save_s4;
483 real_s5 = save_s5;
484 #ifdef alpha_EXTRA_CAREFUL
485 real_s6 = save_s6;
486 #endif
487
488 real_f2 = save_f2;
489 real_f3 = save_f3;
490 real_f4 = save_f4;
491 real_f5 = save_f5;
492 real_f6 = save_f6;
493 real_f7 = save_f7;
494 #ifdef alpha_EXTRA_CAREFUL
495 real_f8 = save_f8;
496 real_f9 = save_f9;
497 #endif
498
499 real_ra = save_ra;
500 real_gp = save_gp;
501
502 return ret;
503 }
504
505 #endif /* alpha_HOST_ARCH */
506
507 /* -----------------------------------------------------------------------------
508 HP-PA architecture
509 -------------------------------------------------------------------------- */
510
511 #ifdef hppa1_1_HOST_ARCH
512
513 StgRegTable *
514 StgRun(StgFunPtr f, StgRegTable *basereg)
515 {
516 StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)];
517 StgRegTable * ret;
518
519 __asm__ volatile ("ldo %0(%%r30),%%r19\n"
520 "\tstw %%r3, 0(0,%%r19)\n"
521 "\tstw %%r4, 4(0,%%r19)\n"
522 "\tstw %%r5, 8(0,%%r19)\n"
523 "\tstw %%r6,12(0,%%r19)\n"
524 "\tstw %%r7,16(0,%%r19)\n"
525 "\tstw %%r8,20(0,%%r19)\n"
526 "\tstw %%r9,24(0,%%r19)\n"
527 "\tstw %%r10,28(0,%%r19)\n"
528 "\tstw %%r11,32(0,%%r19)\n"
529 "\tstw %%r12,36(0,%%r19)\n"
530 "\tstw %%r13,40(0,%%r19)\n"
531 "\tstw %%r14,44(0,%%r19)\n"
532 "\tstw %%r15,48(0,%%r19)\n"
533 "\tstw %%r16,52(0,%%r19)\n"
534 "\tstw %%r17,56(0,%%r19)\n"
535 "\tstw %%r18,60(0,%%r19)\n"
536 "\tldo 80(%%r19),%%r19\n"
537 "\tfstds %%fr12,-16(0,%%r19)\n"
538 "\tfstds %%fr13, -8(0,%%r19)\n"
539 "\tfstds %%fr14, 0(0,%%r19)\n"
540 "\tfstds %%fr15, 8(0,%%r19)\n"
541 "\tldo 32(%%r19),%%r19\n"
542 "\tfstds %%fr16,-16(0,%%r19)\n"
543 "\tfstds %%fr17, -8(0,%%r19)\n"
544 "\tfstds %%fr18, 0(0,%%r19)\n"
545 "\tfstds %%fr19, 8(0,%%r19)\n"
546 "\tldo 32(%%r19),%%r19\n"
547 "\tfstds %%fr20,-16(0,%%r19)\n"
548 "\tfstds %%fr21, -8(0,%%r19)\n" : :
549 "n" (-(116 * sizeof(long) + 10 * sizeof(double))) : "%r19"
550 );
551
552 f();
553
554 __asm__ volatile (".align 4\n"
555 "\t.EXPORT " STG_RETURN ",CODE\n"
556 "\t.EXPORT " STG_RETURN ",ENTRY,PRIV_LEV=3\n"
557 STG_RETURN "\n"
558 /* "\tldo %0(%%r3),%%r19\n" */
559 "\tldo %1(%%r30),%%r19\n"
560 "\tcopy %%r11, %0\n" /* save R1 */
561 "\tldw 0(0,%%r19),%%r3\n"
562 "\tldw 4(0,%%r19),%%r4\n"
563 "\tldw 8(0,%%r19),%%r5\n"
564 "\tldw 12(0,%%r19),%%r6\n"
565 "\tldw 16(0,%%r19),%%r7\n"
566 "\tldw 20(0,%%r19),%%r8\n"
567 "\tldw 24(0,%%r19),%%r9\n"
568 "\tldw 28(0,%%r19),%%r10\n"
569 "\tldw 32(0,%%r19),%%r11\n"
570 "\tldw 36(0,%%r19),%%r12\n"
571 "\tldw 40(0,%%r19),%%r13\n"
572 "\tldw 44(0,%%r19),%%r14\n"
573 "\tldw 48(0,%%r19),%%r15\n"
574 "\tldw 52(0,%%r19),%%r16\n"
575 "\tldw 56(0,%%r19),%%r17\n"
576 "\tldw 60(0,%%r19),%%r18\n"
577 "\tldo 80(%%r19),%%r19\n"
578 "\tfldds -16(0,%%r19),%%fr12\n"
579 "\tfldds -8(0,%%r19),%%fr13\n"
580 "\tfldds 0(0,%%r19),%%fr14\n"
581 "\tfldds 8(0,%%r19),%%fr15\n"
582 "\tldo 32(%%r19),%%r19\n"
583 "\tfldds -16(0,%%r19),%%fr16\n"
584 "\tfldds -8(0,%%r19),%%fr17\n"
585 "\tfldds 0(0,%%r19),%%fr18\n"
586 "\tfldds 8(0,%%r19),%%fr19\n"
587 "\tldo 32(%%r19),%%r19\n"
588 "\tfldds -16(0,%%r19),%%fr20\n"
589 "\tfldds -8(0,%%r19),%%fr21\n"
590 : "=r" (ret)
591 : "n" (-(116 * sizeof(long) + 10 * sizeof(double)))
592 : "%r19"
593 );
594
595 return ret;
596 }
597
598 #endif /* hppa1_1_HOST_ARCH */
599
600 /* -----------------------------------------------------------------------------
601 PowerPC architecture
602
603 Everything is in assembler, so we don't have to deal with GCC...
604
605 -------------------------------------------------------------------------- */
606
607 #ifdef powerpc_HOST_ARCH
608
609 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
610
611 #ifdef darwin_HOST_OS
612 void StgRunIsImplementedInAssembler(void)
613 {
614 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
615 // if the toolchain supports deadstripping, we have to
616 // prevent it here (it tends to get confused here).
617 __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler");
618 #endif
619 __asm__ volatile (
620 "\n.globl _StgRun\n"
621 "_StgRun:\n"
622 "\tmflr r0\n"
623 "\tbl saveFP # f14\n"
624 "\tstmw r13,-220(r1)\n"
625 "\tstwu r1,-%0(r1)\n"
626 "\tmr r27,r4\n" // BaseReg == r27
627 "\tmtctr r3\n"
628 "\tmr r12,r3\n"
629 "\tbctr\n"
630 ".globl _StgReturn\n"
631 "_StgReturn:\n"
632 "\tmr r3,r14\n"
633 "\tla r1,%0(r1)\n"
634 "\tlmw r13,-220(r1)\n"
635 "\tb restFP # f14\n"
636 : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
637 }
638 #else
639
640 // This version is for PowerPC Linux.
641
642 // Differences from the Darwin/Mac OS X version:
643 // *) Different Assembler Syntax
644 // *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
645 // *) We may not access positive stack offsets
646 // (no "Red Zone" as in the Darwin ABI)
647 // *) The Link Register is saved to a different offset in the caller's stack frame
648 // (Linux: 4(r1), Darwin 8(r1))
649
650 static void GNUC3_ATTRIBUTE(used)
651 StgRunIsImplementedInAssembler(void)
652 {
653 __asm__ volatile (
654 "\t.globl StgRun\n"
655 "\t.type StgRun,@function\n"
656 "StgRun:\n"
657 "\tmflr 0\n"
658 "\tstw 0,4(1)\n"
659 "\tmr 5,1\n"
660 "\tstwu 1,-%0(1)\n"
661 "\tstmw 13,-220(5)\n"
662 "\tstfd 14,-144(5)\n"
663 "\tstfd 15,-136(5)\n"
664 "\tstfd 16,-128(5)\n"
665 "\tstfd 17,-120(5)\n"
666 "\tstfd 18,-112(5)\n"
667 "\tstfd 19,-104(5)\n"
668 "\tstfd 20,-96(5)\n"
669 "\tstfd 21,-88(5)\n"
670 "\tstfd 22,-80(5)\n"
671 "\tstfd 23,-72(5)\n"
672 "\tstfd 24,-64(5)\n"
673 "\tstfd 25,-56(5)\n"
674 "\tstfd 26,-48(5)\n"
675 "\tstfd 27,-40(5)\n"
676 "\tstfd 28,-32(5)\n"
677 "\tstfd 29,-24(5)\n"
678 "\tstfd 30,-16(5)\n"
679 "\tstfd 31,-8(5)\n"
680 "\tmr 27,4\n" // BaseReg == r27
681 "\tmtctr 3\n"
682 "\tmr 12,3\n"
683 "\tbctr\n"
684 ".globl StgReturn\n"
685 "\t.type StgReturn,@function\n"
686 "StgReturn:\n"
687 "\tmr 3,14\n"
688 "\tla 5,%0(1)\n"
689 "\tlmw 13,-220(5)\n"
690 "\tlfd 14,-144(5)\n"
691 "\tlfd 15,-136(5)\n"
692 "\tlfd 16,-128(5)\n"
693 "\tlfd 17,-120(5)\n"
694 "\tlfd 18,-112(5)\n"
695 "\tlfd 19,-104(5)\n"
696 "\tlfd 20,-96(5)\n"
697 "\tlfd 21,-88(5)\n"
698 "\tlfd 22,-80(5)\n"
699 "\tlfd 23,-72(5)\n"
700 "\tlfd 24,-64(5)\n"
701 "\tlfd 25,-56(5)\n"
702 "\tlfd 26,-48(5)\n"
703 "\tlfd 27,-40(5)\n"
704 "\tlfd 28,-32(5)\n"
705 "\tlfd 29,-24(5)\n"
706 "\tlfd 30,-16(5)\n"
707 "\tlfd 31,-8(5)\n"
708 "\tmr 1,5\n"
709 "\tlwz 0,4(1)\n"
710 "\tmtlr 0\n"
711 "\tblr\n"
712 : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
713 }
714 #endif
715
716 #endif
717
718 /* -----------------------------------------------------------------------------
719 PowerPC 64 architecture
720
721 Everything is in assembler, so we don't have to deal with GCC...
722
723 -------------------------------------------------------------------------- */
724
725 #ifdef powerpc64_HOST_ARCH
726
727 #ifdef linux_HOST_OS
728 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
729
730 static void GNUC3_ATTRIBUTE(used)
731 StgRunIsImplementedInAssembler(void)
732 {
733 // r0 volatile
734 // r1 stack pointer
735 // r2 toc - needs to be saved
736 // r3-r10 argument passing, volatile
737 // r11, r12 very volatile (not saved across cross-module calls)
738 // r13 thread local state (never modified, don't need to save)
739 // r14-r31 callee-save
740 __asm__ volatile (
741 ".section \".opd\",\"aw\"\n"
742 ".align 3\n"
743 ".globl StgRun\n"
744 "StgRun:\n"
745 "\t.quad\t.StgRun,.TOC.@tocbase,0\n"
746 "\t.size StgRun,24\n"
747 ".globl StgReturn\n"
748 "StgReturn:\n"
749 "\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
750 "\t.size StgReturn,24\n"
751 ".previous\n"
752 ".globl .StgRun\n"
753 ".type .StgRun,@function\n"
754 ".StgRun:\n"
755 "\tmflr 0\n"
756 "\tmr 5, 1\n"
757 "\tstd 0, 16(1)\n"
758 "\tstdu 1, -%0(1)\n"
759 "\tstd 2, -296(5)\n"
760 "\tstd 14, -288(5)\n"
761 "\tstd 15, -280(5)\n"
762 "\tstd 16, -272(5)\n"
763 "\tstd 17, -264(5)\n"
764 "\tstd 18, -256(5)\n"
765 "\tstd 19, -248(5)\n"
766 "\tstd 20, -240(5)\n"
767 "\tstd 21, -232(5)\n"
768 "\tstd 22, -224(5)\n"
769 "\tstd 23, -216(5)\n"
770 "\tstd 24, -208(5)\n"
771 "\tstd 25, -200(5)\n"
772 "\tstd 26, -192(5)\n"
773 "\tstd 27, -184(5)\n"
774 "\tstd 28, -176(5)\n"
775 "\tstd 29, -168(5)\n"
776 "\tstd 30, -160(5)\n"
777 "\tstd 31, -152(5)\n"
778 "\tstfd 14, -144(5)\n"
779 "\tstfd 15, -136(5)\n"
780 "\tstfd 16, -128(5)\n"
781 "\tstfd 17, -120(5)\n"
782 "\tstfd 18, -112(5)\n"
783 "\tstfd 19, -104(5)\n"
784 "\tstfd 20, -96(5)\n"
785 "\tstfd 21, -88(5)\n"
786 "\tstfd 22, -80(5)\n"
787 "\tstfd 23, -72(5)\n"
788 "\tstfd 24, -64(5)\n"
789 "\tstfd 25, -56(5)\n"
790 "\tstfd 26, -48(5)\n"
791 "\tstfd 27, -40(5)\n"
792 "\tstfd 28, -32(5)\n"
793 "\tstfd 29, -24(5)\n"
794 "\tstfd 30, -16(5)\n"
795 "\tstfd 31, -8(5)\n"
796 "\tmr 27, 4\n" // BaseReg == r27
797 "\tld 2, 8(3)\n"
798 "\tld 3, 0(3)\n"
799 "\tmtctr 3\n"
800 "\tbctr\n"
801 ".globl .StgReturn\n"
802 ".type .StgReturn,@function\n"
803 ".StgReturn:\n"
804 "\tmr 3,14\n"
805 "\tla 5, %0(1)\n" // load address == addi r5, r1, %0
806 "\tld 2, -296(5)\n"
807 "\tld 14, -288(5)\n"
808 "\tld 15, -280(5)\n"
809 "\tld 16, -272(5)\n"
810 "\tld 17, -264(5)\n"
811 "\tld 18, -256(5)\n"
812 "\tld 19, -248(5)\n"
813 "\tld 20, -240(5)\n"
814 "\tld 21, -232(5)\n"
815 "\tld 22, -224(5)\n"
816 "\tld 23, -216(5)\n"
817 "\tld 24, -208(5)\n"
818 "\tld 25, -200(5)\n"
819 "\tld 26, -192(5)\n"
820 "\tld 27, -184(5)\n"
821 "\tld 28, -176(5)\n"
822 "\tld 29, -168(5)\n"
823 "\tld 30, -160(5)\n"
824 "\tld 31, -152(5)\n"
825 "\tlfd 14, -144(5)\n"
826 "\tlfd 15, -136(5)\n"
827 "\tlfd 16, -128(5)\n"
828 "\tlfd 17, -120(5)\n"
829 "\tlfd 18, -112(5)\n"
830 "\tlfd 19, -104(5)\n"
831 "\tlfd 20, -96(5)\n"
832 "\tlfd 21, -88(5)\n"
833 "\tlfd 22, -80(5)\n"
834 "\tlfd 23, -72(5)\n"
835 "\tlfd 24, -64(5)\n"
836 "\tlfd 25, -56(5)\n"
837 "\tlfd 26, -48(5)\n"
838 "\tlfd 27, -40(5)\n"
839 "\tlfd 28, -32(5)\n"
840 "\tlfd 29, -24(5)\n"
841 "\tlfd 30, -16(5)\n"
842 "\tlfd 31, -8(5)\n"
843 "\tmr 1, 5\n"
844 "\tld 0, 16(1)\n"
845 "\tmtlr 0\n"
846 "\tblr\n"
847 : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
848 }
849 #else // linux_HOST_OS
850 #error Only linux support for power64 right now.
851 #endif
852
853 #endif
854
855 /* -----------------------------------------------------------------------------
856 IA64 architecture
857
858 Again, in assembler - so we can fiddle with the register stack, and because
859 gcc doesn't handle asm-clobbered callee-saves correctly.
860
861 loc0 - loc15: preserved locals
862 loc16 - loc28: STG registers
863 loc29: saved ar.pfs
864 loc30: saved b0
865 loc31: saved gp (gcc 3.3 uses this slot)
866 loc32: saved ar.lc
867 loc33: saved pr
868 f2 - f5: preserved floating-point registers
869 f16 - f23: preserved floating-point registers
870 -------------------------------------------------------------------------- */
871
872 #ifdef ia64_HOST_ARCH
873
874 /* the memory stack is rarely used, so 16K is excessive */
875 #undef RESERVED_C_STACK_BYTES
876 #define RESERVED_C_STACK_BYTES 1024
877
878 /* We don't spill all the callee-save FP registers, only the ones that
879 * gcc has been observed to use */
880 #define PRESERVED_FP_REGISTERS 12
881
882 /* We always allocate 34 local and 8 output registers. As long as gcc used
883 * fewer than 32 locals, the mangler will adjust the stack frame accordingly. */
884 #define LOCALS 34
885
886 static void GNUC3_ATTRIBUTE(used)
887 StgRunIsImplementedInAssembler(void)
888 {
889 __asm__ volatile(
890 ".global StgRun\n"
891 "StgRun:\n"
892 "\talloc loc29 = ar.pfs, 0, %1, 8, 0\n" /* setup register frame */
893 "\tld8 r18 = [r32],8\n" /* get procedure address */
894 "\tadds sp = -%0, sp ;;\n" /* setup stack */
895 "\tld8 gp = [r32]\n" /* get procedure GP */
896 "\tadds r16 = %0-(%2*16), sp\n"
897 "\tadds r17 = %0-((%2-1)*16), sp ;;\n"
898 "\tstf.spill [r16] = f16,32\n" /* spill callee-saved fp regs */
899 "\tstf.spill [r17] = f17,32\n"
900 "\tmov b6 = r18 ;;\n" /* set target address */
901 "\tstf.spill [r16] = f18,32\n"
902 "\tstf.spill [r17] = f19,32\n"
903 "\tmov loc30 = b0 ;;\n" /* save return address */
904 "\tstf.spill [r16] = f20,32\n"
905 "\tstf.spill [r17] = f21,32 ;;\n"
906 "\tstf.spill [r16] = f22,32\n"
907 "\tstf.spill [r17] = f23,32\n"
908 "\tmov loc32 = ar.lc ;;\n" /* save loop counter */
909 "\tstf.spill [r16] = f2,32\n"
910 "\tstf.spill [r17] = f3,32\n"
911 "\tmov loc33 = pr ;;\n" /* save predicate registers */
912 "\tstf.spill [r16] = f4,32\n"
913 "\tstf.spill [r17] = f5,32\n"
914 "\tbr.few b6 ;;\n" /* branch to function */
915 ".global StgReturn\n"
916 "StgReturn:\n"
917 "\tmov r8 = loc16\n" /* return value in r8 */
918 "\tadds r16 = %0-(%2*16), sp\n"
919 "\tadds r17 = %0-((%2-1)*16), sp ;;\n"
920 "\tldf.fill f16 = [r16],32\n" /* start restoring fp regs */
921 "\tldf.fill f17 = [r17],32\n"
922 "\tmov ar.pfs = loc29 ;;\n" /* restore register frame */
923 "\tldf.fill f18 = [r16],32\n"
924 "\tldf.fill f19 = [r17],32\n"
925 "\tmov b0 = loc30 ;;\n" /* restore return address */
926 "\tldf.fill f20 = [r16],32\n"
927 "\tldf.fill f21 = [r17],32\n"
928 "\tmov ar.lc = loc32 ;;\n" /* restore loop counter */
929 "\tldf.fill f22 = [r16],32\n"
930 "\tldf.fill f23 = [r17],32\n"
931 "\tmov pr = loc33 ;;\n" /* restore predicate registers */
932 "\tldf.fill f2 = [r16],32\n"
933 "\tldf.fill f3 = [r17],32\n"
934 "\tadds sp = %0, sp ;;\n" /* restore stack */
935 "\tldf.fill f4 = [r16],32\n"
936 "\tldf.fill f5 = [r17],32\n"
937 "\tbr.ret.sptk.many b0 ;;\n" /* return */
938 : : "i"(RESERVED_C_STACK_BYTES + PRESERVED_FP_REGISTERS*16),
939 "i"(LOCALS),
940 "i"(PRESERVED_FP_REGISTERS));
941 }
942
943 #endif
944
945 /* -----------------------------------------------------------------------------
946 MIPS architecture
947 -------------------------------------------------------------------------- */
948
949 #ifdef mips_HOST_ARCH
950
951 StgThreadReturnCode
952 StgRun(StgFunPtr f, StgRegTable *basereg)
953 {
954 register StgThreadReturnCode __v0 __asm__("$2");
955
956 __asm__ __volatile__(
957 " la $25, %1 \n"
958 " move $30, %2 \n"
959 " jr %1 \n"
960 " .align 3 \n"
961 " .globl " STG_RETURN " \n"
962 " .aent " STG_RETURN " \n"
963 STG_RETURN ": \n"
964 " move %0, $16 \n"
965 " move $3, $17 \n"
966 : "=r" (__v0),
967 : "r" (f), "r" (basereg)
968 "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23",
969 "$25", "$28", "$30",
970 "$f20", "$f22", "$f24", "$f26", "$f28", "$f30",
971 "memory");
972
973 return __v0;
974 }
975
976 #endif /* mips_HOST_ARCH */
977
978 /* -----------------------------------------------------------------------------
979 ARM architecture
980 -------------------------------------------------------------------------- */
981
982 #ifdef arm_HOST_ARCH
983
984 #if defined(__thumb__)
985 #define THUMB_FUNC ".thumb\n\t.thumb_func\n\t"
986 #else
987 #define THUMB_FUNC
988 #endif
989
990 StgRegTable *
991 StgRun(StgFunPtr f, StgRegTable *basereg) {
992 StgRegTable * r;
993 __asm__ volatile (
994 /*
995 * save callee-saves registers on behalf of the STG code.
996 */
997 "stmfd sp!, {r4-r10, fp, ip, lr}\n\t"
998 #if !defined(arm_HOST_ARCH_PRE_ARMv6)
999 "vstmdb sp!, {d8-d11}\n\t"
1000 #endif
1001 /*
1002 * allocate some space for Stg machine's temporary storage.
1003 * Note: RESERVER_C_STACK_BYTES has to be a round number here or
1004 * the assembler can't assemble it.
1005 */
1006 "sub sp, sp, %3\n\t"
1007 /*
1008 * Set BaseReg
1009 */
1010 "mov r4, %2\n\t"
1011 /*
1012 * Jump to function argument.
1013 */
1014 "bx %1\n\t"
1015
1016 ".global " STG_RETURN "\n\t"
1017 THUMB_FUNC
1018 ".type " STG_RETURN ", %%function\n"
1019 STG_RETURN ":\n\t"
1020 /*
1021 * Free the space we allocated
1022 */
1023 "add sp, sp, %3\n\t"
1024 /*
1025 * Return the new register table, taking it from Stg's R1 (ARM's R7).
1026 */
1027 "mov %0, r7\n\t"
1028 /*
1029 * restore callee-saves registers.
1030 */
1031 #if !defined(arm_HOST_ARCH_PRE_ARMv6)
1032 "vldmia sp!, {d8-d11}\n\t"
1033 #endif
1034 "ldmfd sp!, {r4-r10, fp, ip, lr}\n\t"
1035 : "=r" (r)
1036 : "r" (f), "r" (basereg), "i" (RESERVED_C_STACK_BYTES)
1037 :
1038 );
1039 return r;
1040 }
1041 #endif
1042
1043 #endif /* !USE_MINIINTERPRETER */