Formatting wibble
[ghc.git] / rts / StgCRun.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2011
4 *
5 * STG-to-C glue.
6 *
7 * To run an STG function from C land, call
8 *
9 * rv = StgRun(f,BaseReg);
10 *
11 * where "f" is the STG function to call, and BaseReg is the address of the
12 * RegTable for this run (we might have separate RegTables if we're running
13 * multiple threads on an SMP machine).
14 *
15 * In the end, "f" must JMP to StgReturn (defined below), passing the
16 * return-value "rv" in R1, to return to the caller of StgRun returning "rv" in
17 * the whatever way C returns a value.
18 *
19 * NOTE: StgRun/StgReturn do *NOT* load or store Hp or any other registers
20 * (other than saving the C callee-saves registers). Instead, the called
21 * function "f" must do that in STG land.
22 *
23 * We also initially make sure that there are @RESERVED_C_STACK_BYTES@ on the
24 * C-stack. This is done to reserve some space for the allocation of
25 * temporaries in STG code.
26 *
27 * -------------------------------------------------------------------------- */
28
29 #include "PosixSource.h"
30 #include "ghcconfig.h"
31
32 #if defined(sparc_HOST_ARCH) || defined(USE_MINIINTERPRETER)
33 /* include Stg.h first because we want real machine regs in here: we
34 * have to get the value of R1 back from Stg land to C land intact.
35 */
36 #define IN_STGCRUN 1
37 #include "Stg.h"
38 #include "Rts.h"
39 #else
40 /* The other architectures do not require the actual register macro definitions
41 * here because they use hand written assembly to implement the StgRun
42 * function. Including Stg.h first will define the R1 values using GCC specific
43 * techniques, which we don't want for LLVM based C compilers. Since we don't
44 * actually need the real machine register definitions here, we include the
45 * headers in the opposite order to allow LLVM-based C compilers to work.
46 */
47 #include "Rts.h"
48 #include "Stg.h"
49 #endif
50
51 #include "StgRun.h"
52 #include "Capability.h"
53
54 #ifdef DEBUG
55 #include "RtsUtils.h"
56 #include "Printer.h"
57 #endif
58
59 #ifdef USE_MINIINTERPRETER
60
61 /* -----------------------------------------------------------------------------
62 any architecture (using miniinterpreter)
63 -------------------------------------------------------------------------- */
64
65 StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
66 {
67 while (f) {
68 IF_DEBUG(interpreter,
69 debugBelch("Jumping to ");
70 printPtr((P_)f); fflush(stdout);
71 debugBelch("\n");
72 );
73 f = (StgFunPtr) (f)();
74 }
75 return (StgRegTable *)R1.p;
76 }
77
78 StgFunPtr StgReturn(void)
79 {
80 return 0;
81 }
82
83 #else /* !USE_MINIINTERPRETER */
84
85 #ifdef LEADING_UNDERSCORE
86 #define STG_RUN "_StgRun"
87 #define STG_RETURN "_StgReturn"
88 #else
89 #define STG_RUN "StgRun"
90 #define STG_RETURN "StgReturn"
91 #endif
92
93 /* -----------------------------------------------------------------------------
94 x86 architecture
95 -------------------------------------------------------------------------- */
96
97 #ifdef i386_HOST_ARCH
98
99 #ifdef darwin_HOST_OS
100 #define STG_GLOBAL ".globl "
101 #else
102 #define STG_GLOBAL ".global "
103 #endif
104
105 /*
106 * Note [Stack Alignment on X86]
107 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
108 *
109 * On X86 (both 32bit and 64bit) we keep the stack aligned on function calls at
110 * a 16-byte boundary. This is done because on a number of architectures the
111 * ABI requires this (x64, Mac OSX 32bit/64bit) as well as interfacing with
112 * other libraries through the FFI.
113 *
114 * As part of this arrangment we must maitain the stack at a 16-byte boundary
115 * - word_size-bytes (so 16n - 4 for i386 and 16n - 8 for x64) on entry to a
116 * procedure since both GCC and LLVM expect this. This is because the stack
117 * should have been 16-byte boundary aligned and then a call made which pushes
118 * a return address onto the stack (so word_size more space used). In STG code
119 * we only jump to other STG procedures, so we maintain the 16n - word_size
120 * alignment for these jumps.
121 *
122 * This gives us binary compatability with LLVM and GCC as well as dealing
123 * with the FFI. Previously we just maintianed a 16n byte alignment for
124 * procedure entry and calls, which led to bugs (see #4211 and #5250).
125 *
126 * To change this convention you need to change the code here, and in
127 * compiler/nativeGen/X86/CodeGen.hs::GenCCall, and maybe the adjustor
128 * code for thunks in rts/AdjustorAsm.s, rts/Adjustor.c.
129 *
130 * A quick way to see if this is wrong is to compile this code:
131 *
132 * main = System.Exit.exitWith ExitSuccess
133 *
134 * And run it with +RTS -sstderr. The stats code in the RTS, in
135 * particular statsPrintf(), relies on the stack alignment because
136 * it saves the %xmm regs on the stack, so it'll fall over if the
137 * stack isn't aligned, and calling exitWith from Haskell invokes
138 * shutdownHaskellAndExit using a C call.
139 *
140 */
141
142 static void GNUC3_ATTRIBUTE(used)
143 StgRunIsImplementedInAssembler(void)
144 {
145 __asm__ volatile (
146 STG_GLOBAL STG_RUN "\n"
147 STG_RUN ":\n\t"
148
149 /*
150 * move %esp down to reserve an area for temporary storage
151 * during the execution of STG code.
152 *
153 * The stack pointer has to be aligned to a multiple of 16
154 * bytes from here - this is a requirement of the C ABI, so
155 * that C code can assign SSE2 registers directly to/from
156 * stack locations.
157 */
158 "subl %0, %%esp\n\t"
159
160 /*
161 * save callee-saves registers on behalf of the STG code.
162 */
163 "movl %%esp, %%eax\n\t"
164 "addl %0-16, %%eax\n\t"
165 "movl %%ebx,0(%%eax)\n\t"
166 "movl %%esi,4(%%eax)\n\t"
167 "movl %%edi,8(%%eax)\n\t"
168 "movl %%ebp,12(%%eax)\n\t"
169 /*
170 * Set BaseReg
171 */
172 "movl 24(%%eax),%%ebx\n\t"
173 /*
174 * grab the function argument from the stack
175 */
176 "movl 20(%%eax),%%eax\n\t"
177 /*
178 * jump to it
179 */
180 "jmp *%%eax\n\t"
181
182 STG_GLOBAL STG_RETURN "\n"
183 STG_RETURN ":\n\t"
184
185 "movl %%esi, %%eax\n\t" /* Return value in R1 */
186
187 /*
188 * restore callee-saves registers. (Don't stomp on %%eax!)
189 */
190 "movl %%esp, %%edx\n\t"
191 "addl %0-16, %%edx\n\t"
192 "movl 0(%%edx),%%ebx\n\t" /* restore the registers saved above */
193 "movl 4(%%edx),%%esi\n\t"
194 "movl 8(%%edx),%%edi\n\t"
195 "movl 12(%%edx),%%ebp\n\t"
196
197 "addl %0, %%esp\n\t"
198 "ret"
199
200 : : "i" (RESERVED_C_STACK_BYTES + 16)
201 // + 16 to make room for the 4 registers we have to save
202 // See Note [Stack Alignment on X86]
203 );
204 }
205
206 #if defined(mingw32_HOST_OS)
207 // On windows the stack has to be allocated 4k at a time, otherwise
208 // we get a segfault. The C compiler knows how to do this (it calls
209 // _alloca()), so we make sure that we can allocate as much stack as
210 // we need:
211 StgWord8 *win32AllocStack(void)
212 {
213 StgWord8 stack[RESERVED_C_STACK_BYTES + 16 + 12];
214 return stack;
215 }
216 #endif
217
218 #endif
219
220 /* ----------------------------------------------------------------------------
221 x86-64 is almost the same as plain x86.
222
223 I've done it using entirely inline assembler, because I couldn't
224 get gcc to generate the correct subtraction from %rsp by using
225 the local array variable trick. It didn't seem to reserve
226 enough space. Oh well, it's not much harder this way.
227 ------------------------------------------------------------------------- */
228
229 #ifdef x86_64_HOST_ARCH
230
231 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
232
233 static void GNUC3_ATTRIBUTE(used)
234 StgRunIsImplementedInAssembler(void)
235 {
236 __asm__ volatile (
237 /*
238 * save callee-saves registers on behalf of the STG code.
239 */
240 ".globl " STG_RUN "\n"
241 STG_RUN ":\n\t"
242 "subq %0, %%rsp\n\t"
243 "movq %%rsp, %%rax\n\t"
244 "addq %0-48, %%rax\n\t"
245 "movq %%rbx,0(%%rax)\n\t"
246 "movq %%rbp,8(%%rax)\n\t"
247 "movq %%r12,16(%%rax)\n\t"
248 "movq %%r13,24(%%rax)\n\t"
249 "movq %%r14,32(%%rax)\n\t"
250 "movq %%r15,40(%%rax)\n\t"
251 /*
252 * Set BaseReg
253 */
254 "movq %%rsi,%%r13\n\t"
255 /*
256 * grab the function argument from the stack, and jump to it.
257 */
258 "movq %%rdi,%%rax\n\t"
259 "jmp *%%rax\n\t"
260
261 ".globl " STG_RETURN "\n"
262 STG_RETURN ":\n\t"
263
264 "movq %%rbx, %%rax\n\t" /* Return value in R1 */
265
266 /*
267 * restore callee-saves registers. (Don't stomp on %%rax!)
268 */
269 "movq %%rsp, %%rdx\n\t"
270 "addq %0-48, %%rdx\n\t"
271 "movq 0(%%rdx),%%rbx\n\t" /* restore the registers saved above */
272 "movq 8(%%rdx),%%rbp\n\t"
273 "movq 16(%%rdx),%%r12\n\t"
274 "movq 24(%%rdx),%%r13\n\t"
275 "movq 32(%%rdx),%%r14\n\t"
276 "movq 40(%%rdx),%%r15\n\t"
277 "addq %0, %%rsp\n\t"
278 "retq"
279
280 : : "i"(RESERVED_C_STACK_BYTES + 48 /*stack frame size*/));
281 /*
282 * See Note [Stack Alignment on X86]
283 */
284 }
285
286 #endif /* x86-64 */
287
288 /* -----------------------------------------------------------------------------
289 Sparc architecture
290
291 --
292 OLD COMMENT from GHC-3.02:
293
294 We want tailjumps to be calls, because `call xxx' is the only Sparc
295 branch that allows an arbitrary label as a target. (Gcc's ``goto
296 *target'' construct ends up loading the label into a register and
297 then jumping, at the cost of two extra instructions for the 32-bit
298 load.)
299
300 When entering the threaded world, we stash our return address in a
301 known location so that \tr{%i7} is available as an extra
302 callee-saves register. Of course, we have to restore this when
303 coming out of the threaded world.
304
305 I hate this god-forsaken architecture. Since the top of the
306 reserved stack space is used for globals and the bottom is reserved
307 for outgoing arguments, we have to stick our return address
308 somewhere in the middle. Currently, I'm allowing 100 extra
309 outgoing arguments beyond the first 6. --JSM
310
311 Updated info (GHC 4.06): we don't appear to use %i7 any more, so
312 I'm not sure whether we still need to save it. Incedentally, what
313 does the last paragraph above mean when it says "the top of the
314 stack is used for globals"? What globals? --SDM
315
316 Updated info (GHC 4.08.2): not saving %i7 any more (see below).
317 -------------------------------------------------------------------------- */
318
319 #ifdef sparc_HOST_ARCH
320
321 StgRegTable *
322 StgRun(StgFunPtr f, StgRegTable *basereg) {
323
324 unsigned char space[RESERVED_C_STACK_BYTES];
325 #if 0
326 register void *i7 __asm__("%i7");
327 ((void **)(space))[100] = i7;
328 #endif
329 f();
330 __asm__ volatile (
331 ".align 4\n"
332 ".global " STG_RETURN "\n"
333 STG_RETURN ":"
334 : : "p" (space) : "l0","l1","l2","l3","l4","l5","l6","l7");
335 /* we tell the C compiler that l0-l7 are clobbered on return to
336 * StgReturn, otherwise it tries to use these to save eg. the
337 * address of space[100] across the call. The correct thing
338 * to do would be to save all the callee-saves regs, but we
339 * can't be bothered to do that.
340 *
341 * We also explicitly mark space as used since gcc eliminates it
342 * otherwise.
343 *
344 * The code that gcc generates for this little fragment is now
345 * terrible. We could do much better by coding it directly in
346 * assembler.
347 */
348 #if 0
349 /* updated 4.08.2: we don't save %i7 in the middle of the reserved
350 * space any more, since gcc tries to save its address across the
351 * call to f(), this gets clobbered in STG land and we end up
352 * dereferencing a bogus pointer in StgReturn.
353 */
354 __asm__ volatile ("ld %1,%0"
355 : "=r" (i7) : "m" (((void **)(space))[100]));
356 #endif
357 return (StgRegTable *)R1.i;
358 }
359
360 #endif
361
362 /* -----------------------------------------------------------------------------
363 PowerPC architecture
364
365 Everything is in assembler, so we don't have to deal with GCC...
366 -------------------------------------------------------------------------- */
367
368 #ifdef powerpc_HOST_ARCH
369
370 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
371
372 #ifdef darwin_HOST_OS
373 void StgRunIsImplementedInAssembler(void)
374 {
375 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
376 // if the toolchain supports deadstripping, we have to
377 // prevent it here (it tends to get confused here).
378 __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler");
379 #endif
380 __asm__ volatile (
381 "\n.globl _StgRun\n"
382 "_StgRun:\n"
383 "\tmflr r0\n"
384 "\tbl saveFP # f14\n"
385 "\tstmw r13,-220(r1)\n"
386 "\tstwu r1,-%0(r1)\n"
387 "\tmr r27,r4\n" // BaseReg == r27
388 "\tmtctr r3\n"
389 "\tmr r12,r3\n"
390 "\tbctr\n"
391 ".globl _StgReturn\n"
392 "_StgReturn:\n"
393 "\tmr r3,r14\n"
394 "\tla r1,%0(r1)\n"
395 "\tlmw r13,-220(r1)\n"
396 "\tb restFP # f14\n"
397 : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
398 }
399 #else
400
401 // This version is for PowerPC Linux.
402
403 // Differences from the Darwin/Mac OS X version:
404 // *) Different Assembler Syntax
405 // *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
406 // *) We may not access positive stack offsets
407 // (no "Red Zone" as in the Darwin ABI)
408 // *) The Link Register is saved to a different offset in the caller's stack frame
409 // (Linux: 4(r1), Darwin 8(r1))
410
411 static void GNUC3_ATTRIBUTE(used)
412 StgRunIsImplementedInAssembler(void)
413 {
414 __asm__ volatile (
415 "\t.globl StgRun\n"
416 "\t.type StgRun,@function\n"
417 "StgRun:\n"
418 "\tmflr 0\n"
419 "\tstw 0,4(1)\n"
420 "\tmr 5,1\n"
421 "\tstwu 1,-%0(1)\n"
422 "\tstmw 13,-220(5)\n"
423 "\tstfd 14,-144(5)\n"
424 "\tstfd 15,-136(5)\n"
425 "\tstfd 16,-128(5)\n"
426 "\tstfd 17,-120(5)\n"
427 "\tstfd 18,-112(5)\n"
428 "\tstfd 19,-104(5)\n"
429 "\tstfd 20,-96(5)\n"
430 "\tstfd 21,-88(5)\n"
431 "\tstfd 22,-80(5)\n"
432 "\tstfd 23,-72(5)\n"
433 "\tstfd 24,-64(5)\n"
434 "\tstfd 25,-56(5)\n"
435 "\tstfd 26,-48(5)\n"
436 "\tstfd 27,-40(5)\n"
437 "\tstfd 28,-32(5)\n"
438 "\tstfd 29,-24(5)\n"
439 "\tstfd 30,-16(5)\n"
440 "\tstfd 31,-8(5)\n"
441 "\tmr 27,4\n" // BaseReg == r27
442 "\tmtctr 3\n"
443 "\tmr 12,3\n"
444 "\tbctr\n"
445 ".globl StgReturn\n"
446 "\t.type StgReturn,@function\n"
447 "StgReturn:\n"
448 "\tmr 3,14\n"
449 "\tla 5,%0(1)\n"
450 "\tlmw 13,-220(5)\n"
451 "\tlfd 14,-144(5)\n"
452 "\tlfd 15,-136(5)\n"
453 "\tlfd 16,-128(5)\n"
454 "\tlfd 17,-120(5)\n"
455 "\tlfd 18,-112(5)\n"
456 "\tlfd 19,-104(5)\n"
457 "\tlfd 20,-96(5)\n"
458 "\tlfd 21,-88(5)\n"
459 "\tlfd 22,-80(5)\n"
460 "\tlfd 23,-72(5)\n"
461 "\tlfd 24,-64(5)\n"
462 "\tlfd 25,-56(5)\n"
463 "\tlfd 26,-48(5)\n"
464 "\tlfd 27,-40(5)\n"
465 "\tlfd 28,-32(5)\n"
466 "\tlfd 29,-24(5)\n"
467 "\tlfd 30,-16(5)\n"
468 "\tlfd 31,-8(5)\n"
469 "\tmr 1,5\n"
470 "\tlwz 0,4(1)\n"
471 "\tmtlr 0\n"
472 "\tblr\n"
473 : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
474 }
475 #endif
476
477 #endif
478
479 /* -----------------------------------------------------------------------------
480 PowerPC 64 architecture
481
482 Everything is in assembler, so we don't have to deal with GCC...
483 -------------------------------------------------------------------------- */
484
485 #ifdef powerpc64_HOST_ARCH
486
487 #ifdef linux_HOST_OS
488 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
489
490 static void GNUC3_ATTRIBUTE(used)
491 StgRunIsImplementedInAssembler(void)
492 {
493 // r0 volatile
494 // r1 stack pointer
495 // r2 toc - needs to be saved
496 // r3-r10 argument passing, volatile
497 // r11, r12 very volatile (not saved across cross-module calls)
498 // r13 thread local state (never modified, don't need to save)
499 // r14-r31 callee-save
500 __asm__ volatile (
501 ".section \".opd\",\"aw\"\n"
502 ".align 3\n"
503 ".globl StgRun\n"
504 "StgRun:\n"
505 "\t.quad\t.StgRun,.TOC.@tocbase,0\n"
506 "\t.size StgRun,24\n"
507 ".globl StgReturn\n"
508 "StgReturn:\n"
509 "\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
510 "\t.size StgReturn,24\n"
511 ".previous\n"
512 ".globl .StgRun\n"
513 ".type .StgRun,@function\n"
514 ".StgRun:\n"
515 "\tmflr 0\n"
516 "\tmr 5, 1\n"
517 "\tstd 0, 16(1)\n"
518 "\tstdu 1, -%0(1)\n"
519 "\tstd 2, -296(5)\n"
520 "\tstd 14, -288(5)\n"
521 "\tstd 15, -280(5)\n"
522 "\tstd 16, -272(5)\n"
523 "\tstd 17, -264(5)\n"
524 "\tstd 18, -256(5)\n"
525 "\tstd 19, -248(5)\n"
526 "\tstd 20, -240(5)\n"
527 "\tstd 21, -232(5)\n"
528 "\tstd 22, -224(5)\n"
529 "\tstd 23, -216(5)\n"
530 "\tstd 24, -208(5)\n"
531 "\tstd 25, -200(5)\n"
532 "\tstd 26, -192(5)\n"
533 "\tstd 27, -184(5)\n"
534 "\tstd 28, -176(5)\n"
535 "\tstd 29, -168(5)\n"
536 "\tstd 30, -160(5)\n"
537 "\tstd 31, -152(5)\n"
538 "\tstfd 14, -144(5)\n"
539 "\tstfd 15, -136(5)\n"
540 "\tstfd 16, -128(5)\n"
541 "\tstfd 17, -120(5)\n"
542 "\tstfd 18, -112(5)\n"
543 "\tstfd 19, -104(5)\n"
544 "\tstfd 20, -96(5)\n"
545 "\tstfd 21, -88(5)\n"
546 "\tstfd 22, -80(5)\n"
547 "\tstfd 23, -72(5)\n"
548 "\tstfd 24, -64(5)\n"
549 "\tstfd 25, -56(5)\n"
550 "\tstfd 26, -48(5)\n"
551 "\tstfd 27, -40(5)\n"
552 "\tstfd 28, -32(5)\n"
553 "\tstfd 29, -24(5)\n"
554 "\tstfd 30, -16(5)\n"
555 "\tstfd 31, -8(5)\n"
556 "\tmr 27, 4\n" // BaseReg == r27
557 "\tld 2, 8(3)\n"
558 "\tld 3, 0(3)\n"
559 "\tmtctr 3\n"
560 "\tbctr\n"
561 ".globl .StgReturn\n"
562 ".type .StgReturn,@function\n"
563 ".StgReturn:\n"
564 "\tmr 3,14\n"
565 "\tla 5, %0(1)\n" // load address == addi r5, r1, %0
566 "\tld 2, -296(5)\n"
567 "\tld 14, -288(5)\n"
568 "\tld 15, -280(5)\n"
569 "\tld 16, -272(5)\n"
570 "\tld 17, -264(5)\n"
571 "\tld 18, -256(5)\n"
572 "\tld 19, -248(5)\n"
573 "\tld 20, -240(5)\n"
574 "\tld 21, -232(5)\n"
575 "\tld 22, -224(5)\n"
576 "\tld 23, -216(5)\n"
577 "\tld 24, -208(5)\n"
578 "\tld 25, -200(5)\n"
579 "\tld 26, -192(5)\n"
580 "\tld 27, -184(5)\n"
581 "\tld 28, -176(5)\n"
582 "\tld 29, -168(5)\n"
583 "\tld 30, -160(5)\n"
584 "\tld 31, -152(5)\n"
585 "\tlfd 14, -144(5)\n"
586 "\tlfd 15, -136(5)\n"
587 "\tlfd 16, -128(5)\n"
588 "\tlfd 17, -120(5)\n"
589 "\tlfd 18, -112(5)\n"
590 "\tlfd 19, -104(5)\n"
591 "\tlfd 20, -96(5)\n"
592 "\tlfd 21, -88(5)\n"
593 "\tlfd 22, -80(5)\n"
594 "\tlfd 23, -72(5)\n"
595 "\tlfd 24, -64(5)\n"
596 "\tlfd 25, -56(5)\n"
597 "\tlfd 26, -48(5)\n"
598 "\tlfd 27, -40(5)\n"
599 "\tlfd 28, -32(5)\n"
600 "\tlfd 29, -24(5)\n"
601 "\tlfd 30, -16(5)\n"
602 "\tlfd 31, -8(5)\n"
603 "\tmr 1, 5\n"
604 "\tld 0, 16(1)\n"
605 "\tmtlr 0\n"
606 "\tblr\n"
607 : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
608 }
609
610 #else // linux_HOST_OS
611 #error Only linux support for power64 right now.
612 #endif
613
614 #endif
615
616 /* -----------------------------------------------------------------------------
617 ARM architecture
618 -------------------------------------------------------------------------- */
619
620 #ifdef arm_HOST_ARCH
621
622 #if defined(__thumb__)
623 #define THUMB_FUNC ".thumb\n\t.thumb_func\n\t"
624 #else
625 #define THUMB_FUNC
626 #endif
627
628 StgRegTable *
629 StgRun(StgFunPtr f, StgRegTable *basereg) {
630 StgRegTable * r;
631 __asm__ volatile (
632 /*
633 * save callee-saves registers on behalf of the STG code.
634 */
635 "stmfd sp!, {r4-r10, fp, ip, lr}\n\t"
636 #if !defined(arm_HOST_ARCH_PRE_ARMv6)
637 "vstmdb sp!, {d8-d11}\n\t"
638 #endif
639 /*
640 * allocate some space for Stg machine's temporary storage.
641 * Note: RESERVER_C_STACK_BYTES has to be a round number here or
642 * the assembler can't assemble it.
643 */
644 "sub sp, sp, %3\n\t"
645 /*
646 * Set BaseReg
647 */
648 "mov r4, %2\n\t"
649 /*
650 * Jump to function argument.
651 */
652 "bx %1\n\t"
653
654 ".global " STG_RETURN "\n\t"
655 THUMB_FUNC
656 ".type " STG_RETURN ", %%function\n"
657 STG_RETURN ":\n\t"
658 /*
659 * Free the space we allocated
660 */
661 "add sp, sp, %3\n\t"
662 /*
663 * Return the new register table, taking it from Stg's R1 (ARM's R7).
664 */
665 "mov %0, r7\n\t"
666 /*
667 * restore callee-saves registers.
668 */
669 #if !defined(arm_HOST_ARCH_PRE_ARMv6)
670 "vldmia sp!, {d8-d11}\n\t"
671 #endif
672 "ldmfd sp!, {r4-r10, fp, ip, lr}\n\t"
673 : "=r" (r)
674 : "r" (f), "r" (basereg), "i" (RESERVED_C_STACK_BYTES)
675 :
676 );
677 return r;
678 }
679 #endif
680
681 #endif /* !USE_MINIINTERPRETER */