UNREG: use __builtin___clear_cache where available
[ghc.git] / rts / StgCRun.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2011
4 *
5 * STG-to-C glue.
6 *
7 * To run an STG function from C land, call
8 *
9 * rv = StgRun(f,BaseReg);
10 *
11 * where "f" is the STG function to call, and BaseReg is the address of the
12 * RegTable for this run (we might have separate RegTables if we're running
13 * multiple threads on an SMP machine).
14 *
15 * In the end, "f" must JMP to StgReturn (defined below), passing the
16 * return-value "rv" in R1, to return to the caller of StgRun returning "rv" in
17 * the whatever way C returns a value.
18 *
19 * NOTE: StgRun/StgReturn do *NOT* load or store Hp or any other registers
20 * (other than saving the C callee-saves registers). Instead, the called
21 * function "f" must do that in STG land.
22 *
23 * We also initially make sure that there are @RESERVED_C_STACK_BYTES@ on the
24 * C-stack. This is done to reserve some space for the allocation of
25 * temporaries in STG code.
26 *
27 * -------------------------------------------------------------------------- */
28
29 #include "PosixSource.h"
30 #include "ghcconfig.h"
31
32 #if defined(sparc_HOST_ARCH) || defined(USE_MINIINTERPRETER)
33 /* include Stg.h first because we want real machine regs in here: we
34 * have to get the value of R1 back from Stg land to C land intact.
35 */
36
37 /* We include windows.h very early, as on Win64 the CONTEXT type has
38 fields "R8", "R9" and "R10", which goes bad if we've already
39 #define'd those names for our own purposes (in stg/Regs.h) */
40 #if defined(HAVE_WINDOWS_H)
41 #include <windows.h>
42 #endif
43
44 #define IN_STGCRUN 1
45 #include "Stg.h"
46 #include "Rts.h"
47 #else
48 /* The other architectures do not require the actual register macro definitions
49 * here because they use hand written assembly to implement the StgRun
50 * function. Including Stg.h first will define the R1 values using GCC specific
51 * techniques, which we don't want for LLVM based C compilers. Since we don't
52 * actually need the real machine register definitions here, we include the
53 * headers in the opposite order to allow LLVM-based C compilers to work.
54 */
55 #include "Rts.h"
56 #include "Stg.h"
57 #endif
58
59 #include "StgRun.h"
60 #include "Capability.h"
61
62 #if defined(DEBUG)
63 #include "RtsUtils.h"
64 #include "Printer.h"
65 #endif
66
67 #if defined(USE_MINIINTERPRETER)
68
69 /* -----------------------------------------------------------------------------
70 any architecture (using miniinterpreter)
71 -------------------------------------------------------------------------- */
72
73 StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
74 {
75 while (f) {
76 IF_DEBUG(interpreter,
77 debugBelch("Jumping to ");
78 printPtr((P_)f); fflush(stdout);
79 debugBelch("\n");
80 );
81 f = (StgFunPtr) (f)();
82 }
83 return (StgRegTable *)R1.p;
84 }
85
86 StgFunPtr StgReturn(void)
87 {
88 return 0;
89 }
90
91 #else /* !USE_MINIINTERPRETER */
92
93 #if defined(LEADING_UNDERSCORE)
94 #define STG_RUN "_StgRun"
95 #define STG_RETURN "_StgReturn"
96 #else
97 #define STG_RUN "StgRun"
98 #define STG_RETURN "StgReturn"
99 #endif
100
101 #if defined(mingw32_HOST_OS)
102 // On windows the stack has to be allocated 4k at a time, otherwise
103 // we get a segfault. The C compiler knows how to do this (it calls
104 // _alloca()), so we make sure that we can allocate as much stack as
105 // we need:
106 StgWord8 *win32AllocStack(void)
107 {
108 StgWord8 stack[RESERVED_C_STACK_BYTES + 16 + 12];
109 return stack;
110 }
111 #endif
112
113 /* -----------------------------------------------------------------------------
114 x86 architecture
115 -------------------------------------------------------------------------- */
116
117 #if defined(i386_HOST_ARCH)
118
119 #if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
120 #define STG_GLOBAL ".globl "
121 #define STG_HIDDEN ".private_extern "
122 #else
123 #define STG_GLOBAL ".global "
124 #define STG_HIDDEN ".hidden "
125 #endif
126
127 /*
128 * Note [Stack Alignment on X86]
129 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
130 *
131 * On X86 (both 32bit and 64bit) we keep the stack aligned on function calls at
132 * a 16-byte boundary. This is done because on a number of architectures the
133 * ABI requires this (x64, Mac OSX 32bit/64bit) as well as interfacing with
134 * other libraries through the FFI.
135 *
136 * As part of this arrangment we must maintain the stack at a 16-byte boundary
137 * - word_size-bytes (so 16n - 4 for i386 and 16n - 8 for x64) on entry to a
138 * procedure since both GCC and LLVM expect this. This is because the stack
139 * should have been 16-byte boundary aligned and then a call made which pushes
140 * a return address onto the stack (so word_size more space used). In STG code
141 * we only jump to other STG procedures, so we maintain the 16n - word_size
142 * alignment for these jumps.
143 *
144 * This gives us binary compatibility with LLVM and GCC as well as dealing
145 * with the FFI. Previously we just maintianed a 16n byte alignment for
146 * procedure entry and calls, which led to bugs (see #4211 and #5250).
147 *
148 * To change this convention you need to change the code here, and in
149 * compiler/nativeGen/X86/CodeGen.hs::GenCCall, and maybe the adjustor
150 * code for thunks in rts/AdjustorAsm.s, rts/Adjustor.c.
151 *
152 * A quick way to see if this is wrong is to compile this code:
153 *
154 * main = System.Exit.exitWith ExitSuccess
155 *
156 * And run it with +RTS -sstderr. The stats code in the RTS, in
157 * particular statsPrintf(), relies on the stack alignment because
158 * it saves the %xmm regs on the stack, so it'll fall over if the
159 * stack isn't aligned, and calling exitWith from Haskell invokes
160 * shutdownHaskellAndExit using a C call.
161 *
162 * If you edit the sequence below be sure to update the unwinding information
163 * for stg_stop_thread in StgStartup.cmm.
164 */
165
166 static void GNUC3_ATTRIBUTE(used)
167 StgRunIsImplementedInAssembler(void)
168 {
169 __asm__ volatile (
170 STG_GLOBAL STG_RUN "\n"
171 #if !defined(mingw32_HOST_OS)
172 STG_HIDDEN STG_RUN "\n"
173 #endif
174 STG_RUN ":\n\t"
175
176 /*
177 * move %esp down to reserve an area for temporary storage
178 * during the execution of STG code.
179 *
180 * The stack pointer has to be aligned to a multiple of 16
181 * bytes from here - this is a requirement of the C ABI, so
182 * that C code can assign SSE2 registers directly to/from
183 * stack locations.
184 */
185 "subl %0, %%esp\n\t"
186
187 /*
188 * save callee-saves registers on behalf of the STG code.
189 */
190 "movl %%esp, %%eax\n\t"
191 "addl %0-16, %%eax\n\t"
192 "movl %%ebx,0(%%eax)\n\t"
193 "movl %%esi,4(%%eax)\n\t"
194 "movl %%edi,8(%%eax)\n\t"
195 "movl %%ebp,12(%%eax)\n\t"
196 /*
197 * Set BaseReg
198 */
199 "movl 24(%%eax),%%ebx\n\t"
200 /*
201 * grab the function argument from the stack
202 */
203 "movl 20(%%eax),%%eax\n\t"
204 /*
205 * jump to it
206 */
207 "jmp *%%eax\n\t"
208
209 STG_GLOBAL STG_RETURN "\n"
210 STG_RETURN ":\n\t"
211
212 "movl %%esi, %%eax\n\t" /* Return value in R1 */
213
214 /*
215 * restore callee-saves registers. (Don't stomp on %%eax!)
216 */
217 "movl %%esp, %%edx\n\t"
218 "addl %0-16, %%edx\n\t"
219 "movl 0(%%edx),%%ebx\n\t" /* restore the registers saved above */
220 "movl 4(%%edx),%%esi\n\t"
221 "movl 8(%%edx),%%edi\n\t"
222 "movl 12(%%edx),%%ebp\n\t"
223
224 "addl %0, %%esp\n\t"
225 "ret"
226
227 : : "i" (RESERVED_C_STACK_BYTES + 16)
228 // + 16 to make room for the 4 registers we have to save
229 // See Note [Stack Alignment on X86]
230 );
231 }
232
233 #endif
234
235 /* ----------------------------------------------------------------------------
236 x86-64 is almost the same as plain x86.
237
238 I've done it using entirely inline assembler, because I couldn't
239 get gcc to generate the correct subtraction from %rsp by using
240 the local array variable trick. It didn't seem to reserve
241 enough space. Oh well, it's not much harder this way.
242 ------------------------------------------------------------------------- */
243
244 #if defined(x86_64_HOST_ARCH)
245
246 #define STG_GLOBAL ".globl "
247
248 #if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
249 #define STG_HIDDEN ".private_extern "
250 #else
251 #define STG_HIDDEN ".hidden "
252 #endif
253
254 static void GNUC3_ATTRIBUTE(used)
255 StgRunIsImplementedInAssembler(void)
256 {
257 __asm__ volatile (
258 /*
259 * save callee-saves registers on behalf of the STG code.
260 */
261 STG_GLOBAL STG_RUN "\n"
262 #if !defined(mingw32_HOST_OS)
263 STG_HIDDEN STG_RUN "\n"
264 #endif
265 STG_RUN ":\n\t"
266 "subq %1, %%rsp\n\t"
267 "movq %%rsp, %%rax\n\t"
268 "subq %0, %%rsp\n\t"
269 "movq %%rbx,0(%%rax)\n\t"
270 "movq %%rbp,8(%%rax)\n\t"
271 "movq %%r12,16(%%rax)\n\t"
272 "movq %%r13,24(%%rax)\n\t"
273 "movq %%r14,32(%%rax)\n\t"
274 "movq %%r15,40(%%rax)\n\t"
275 #if defined(mingw32_HOST_OS)
276 "movq %%rdi,48(%%rax)\n\t"
277 "movq %%rsi,56(%%rax)\n\t"
278 "movq %%xmm6,64(%%rax)\n\t"
279 #endif
280 /*
281 * Set BaseReg
282 */
283 #if defined(mingw32_HOST_OS)
284 "movq %%rdx,%%r13\n\t"
285 #else
286 "movq %%rsi,%%r13\n\t"
287 #endif
288 /*
289 * grab the function argument from the stack, and jump to it.
290 */
291 #if defined(mingw32_HOST_OS)
292 "movq %%rcx,%%rax\n\t"
293 #else
294 "movq %%rdi,%%rax\n\t"
295 #endif
296 "jmp *%%rax\n\t"
297
298 ".globl " STG_RETURN "\n"
299 STG_RETURN ":\n\t"
300
301 "movq %%rbx, %%rax\n\t" /* Return value in R1 */
302
303 /*
304 * restore callee-saves registers. (Don't stomp on %%rax!)
305 */
306 "addq %0, %%rsp\n\t"
307 "movq 0(%%rsp),%%rbx\n\t" /* restore the registers saved above */
308 "movq 8(%%rsp),%%rbp\n\t"
309 "movq 16(%%rsp),%%r12\n\t"
310 "movq 24(%%rsp),%%r13\n\t"
311 "movq 32(%%rsp),%%r14\n\t"
312 "movq 40(%%rsp),%%r15\n\t"
313 #if defined(mingw32_HOST_OS)
314 "movq 48(%%rsp),%%rdi\n\t"
315 "movq 56(%%rsp),%%rsi\n\t"
316 "movq 64(%%rsp),%%xmm6\n\t"
317 #endif
318 "addq %1, %%rsp\n\t"
319 "retq"
320
321 :
322 : "i"(RESERVED_C_STACK_BYTES),
323 "i"(STG_RUN_STACK_FRAME_SIZE /* stack frame size */)
324 );
325 /*
326 * See Note [Stack Alignment on X86]
327 */
328 }
329
330 #endif /* x86-64 */
331
332 /* -----------------------------------------------------------------------------
333 Sparc architecture
334
335 --
336 OLD COMMENT from GHC-3.02:
337
338 We want tailjumps to be calls, because `call xxx' is the only Sparc
339 branch that allows an arbitrary label as a target. (Gcc's ``goto
340 *target'' construct ends up loading the label into a register and
341 then jumping, at the cost of two extra instructions for the 32-bit
342 load.)
343
344 When entering the threaded world, we stash our return address in a
345 known location so that \tr{%i7} is available as an extra
346 callee-saves register. Of course, we have to restore this when
347 coming out of the threaded world.
348
349 I hate this god-forsaken architecture. Since the top of the
350 reserved stack space is used for globals and the bottom is reserved
351 for outgoing arguments, we have to stick our return address
352 somewhere in the middle. Currently, I'm allowing 100 extra
353 outgoing arguments beyond the first 6. --JSM
354
355 Updated info (GHC 4.06): we don't appear to use %i7 any more, so
356 I'm not sure whether we still need to save it. Incedentally, what
357 does the last paragraph above mean when it says "the top of the
358 stack is used for globals"? What globals? --SDM
359
360 Updated info (GHC 4.08.2): not saving %i7 any more (see below).
361 -------------------------------------------------------------------------- */
362
363 #if defined(sparc_HOST_ARCH)
364
365 StgRegTable *
366 StgRun(StgFunPtr f, StgRegTable *basereg) {
367
368 unsigned char space[RESERVED_C_STACK_BYTES];
369 #if 0
370 register void *i7 __asm__("%i7");
371 ((void **)(space))[100] = i7;
372 #endif
373 f();
374 __asm__ volatile (
375 ".align 4\n"
376 ".global " STG_RETURN "\n"
377 STG_RETURN ":"
378 : : "p" (space) : "l0","l1","l2","l3","l4","l5","l6","l7");
379 /* we tell the C compiler that l0-l7 are clobbered on return to
380 * StgReturn, otherwise it tries to use these to save eg. the
381 * address of space[100] across the call. The correct thing
382 * to do would be to save all the callee-saves regs, but we
383 * can't be bothered to do that.
384 *
385 * We also explicitly mark space as used since gcc eliminates it
386 * otherwise.
387 *
388 * The code that gcc generates for this little fragment is now
389 * terrible. We could do much better by coding it directly in
390 * assembler.
391 */
392 #if 0
393 /* updated 4.08.2: we don't save %i7 in the middle of the reserved
394 * space any more, since gcc tries to save its address across the
395 * call to f(), this gets clobbered in STG land and we end up
396 * dereferencing a bogus pointer in StgReturn.
397 */
398 __asm__ volatile ("ld %1,%0"
399 : "=r" (i7) : "m" (((void **)(space))[100]));
400 #endif
401 return (StgRegTable *)R1.i;
402 }
403
404 #endif
405
406 /* -----------------------------------------------------------------------------
407 PowerPC architecture
408
409 Everything is in assembler, so we don't have to deal with GCC...
410 -------------------------------------------------------------------------- */
411
412 #if defined(powerpc_HOST_ARCH)
413
414 #define STG_GLOBAL ".globl "
415
416 #if defined(darwin_HOST_OS)
417 #define STG_HIDDEN ".private_extern "
418 #else
419 #define STG_HIDDEN ".hidden "
420 #endif
421
422 #if defined(aix_HOST_OS)
423
424 // implementation is in StgCRunAsm.S
425
426 #elif defined(darwin_HOST_OS)
427 void StgRunIsImplementedInAssembler(void)
428 {
429 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
430 // if the toolchain supports deadstripping, we have to
431 // prevent it here (it tends to get confused here).
432 __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler\n");
433 #endif
434 __asm__ volatile (
435 STG_GLOBAL STG_RUN "\n"
436 STG_HIDDEN STG_RUN "\n"
437 STG_RUN ":\n"
438 "\tmflr r0\n"
439 "\tbl saveFP # f14\n"
440 "\tstmw r13,-220(r1)\n"
441 "\tstwu r1,-%0(r1)\n"
442 "\tmr r27,r4\n" // BaseReg == r27
443 "\tmtctr r3\n"
444 "\tmr r12,r3\n"
445 "\tbctr\n"
446 ".globl _StgReturn\n"
447 "_StgReturn:\n"
448 "\tmr r3,r14\n"
449 "\tla r1,%0(r1)\n"
450 "\tlmw r13,-220(r1)\n"
451 "\tb restFP # f14\n"
452 : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
453 }
454 #else
455
456 // This version is for PowerPC Linux.
457
458 // Differences from the Darwin/Mac OS X version:
459 // *) Different Assembler Syntax
460 // *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
461 // *) We may not access positive stack offsets
462 // (no "Red Zone" as in the Darwin ABI)
463 // *) The Link Register is saved to a different offset in the caller's stack frame
464 // (Linux: 4(r1), Darwin 8(r1))
465
466 static void GNUC3_ATTRIBUTE(used)
467 StgRunIsImplementedInAssembler(void)
468 {
469 __asm__ volatile (
470 "\t.globl StgRun\n"
471 "\t.hidden StgRun\n"
472 "\t.type StgRun,@function\n"
473 "StgRun:\n"
474 "\tmflr 0\n"
475 "\tstw 0,4(1)\n"
476 "\tmr 5,1\n"
477 "\tstwu 1,-%0(1)\n"
478 "\tstmw 13,-220(5)\n"
479 "\tstfd 14,-144(5)\n"
480 "\tstfd 15,-136(5)\n"
481 "\tstfd 16,-128(5)\n"
482 "\tstfd 17,-120(5)\n"
483 "\tstfd 18,-112(5)\n"
484 "\tstfd 19,-104(5)\n"
485 "\tstfd 20,-96(5)\n"
486 "\tstfd 21,-88(5)\n"
487 "\tstfd 22,-80(5)\n"
488 "\tstfd 23,-72(5)\n"
489 "\tstfd 24,-64(5)\n"
490 "\tstfd 25,-56(5)\n"
491 "\tstfd 26,-48(5)\n"
492 "\tstfd 27,-40(5)\n"
493 "\tstfd 28,-32(5)\n"
494 "\tstfd 29,-24(5)\n"
495 "\tstfd 30,-16(5)\n"
496 "\tstfd 31,-8(5)\n"
497 "\tmr 27,4\n" // BaseReg == r27
498 "\tmtctr 3\n"
499 "\tmr 12,3\n"
500 "\tbctr\n"
501 ".globl StgReturn\n"
502 "\t.type StgReturn,@function\n"
503 "StgReturn:\n"
504 "\tmr 3,14\n"
505 "\tla 5,%0(1)\n"
506 "\tlmw 13,-220(5)\n"
507 "\tlfd 14,-144(5)\n"
508 "\tlfd 15,-136(5)\n"
509 "\tlfd 16,-128(5)\n"
510 "\tlfd 17,-120(5)\n"
511 "\tlfd 18,-112(5)\n"
512 "\tlfd 19,-104(5)\n"
513 "\tlfd 20,-96(5)\n"
514 "\tlfd 21,-88(5)\n"
515 "\tlfd 22,-80(5)\n"
516 "\tlfd 23,-72(5)\n"
517 "\tlfd 24,-64(5)\n"
518 "\tlfd 25,-56(5)\n"
519 "\tlfd 26,-48(5)\n"
520 "\tlfd 27,-40(5)\n"
521 "\tlfd 28,-32(5)\n"
522 "\tlfd 29,-24(5)\n"
523 "\tlfd 30,-16(5)\n"
524 "\tlfd 31,-8(5)\n"
525 "\tmr 1,5\n"
526 "\tlwz 0,4(1)\n"
527 "\tmtlr 0\n"
528 "\tblr\n"
529 : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
530 }
531 #endif
532
533 #endif
534
535 /* -----------------------------------------------------------------------------
536 PowerPC 64 architecture
537
538 Everything is in assembler, so we don't have to deal with GCC...
539 -------------------------------------------------------------------------- */
540
541 #if defined(powerpc64_HOST_ARCH)
542
543 #if defined(linux_HOST_OS)
544 static void GNUC3_ATTRIBUTE(used)
545 StgRunIsImplementedInAssembler(void)
546 {
547 // r0 volatile
548 // r1 stack pointer
549 // r2 toc - needs to be saved
550 // r3-r10 argument passing, volatile
551 // r11, r12 very volatile (not saved across cross-module calls)
552 // r13 thread local state (never modified, don't need to save)
553 // r14-r31 callee-save
554 __asm__ volatile (
555 ".section \".opd\",\"aw\"\n"
556 ".align 3\n"
557 ".globl StgRun\n"
558 ".hidden StgRun\n"
559 "StgRun:\n"
560 "\t.quad\t.StgRun,.TOC.@tocbase,0\n"
561 "\t.size StgRun,24\n"
562 ".globl StgReturn\n"
563 "StgReturn:\n"
564 "\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
565 "\t.size StgReturn,24\n"
566 ".previous\n"
567 ".globl .StgRun\n"
568 ".type .StgRun,@function\n"
569 ".StgRun:\n"
570 "\tmflr 0\n"
571 "\tmr 5, 1\n"
572 "\tstd 0, 16(1)\n"
573 "\tstdu 1, -%0(1)\n"
574 "\tstd 2, -296(5)\n"
575 "\tstd 14, -288(5)\n"
576 "\tstd 15, -280(5)\n"
577 "\tstd 16, -272(5)\n"
578 "\tstd 17, -264(5)\n"
579 "\tstd 18, -256(5)\n"
580 "\tstd 19, -248(5)\n"
581 "\tstd 20, -240(5)\n"
582 "\tstd 21, -232(5)\n"
583 "\tstd 22, -224(5)\n"
584 "\tstd 23, -216(5)\n"
585 "\tstd 24, -208(5)\n"
586 "\tstd 25, -200(5)\n"
587 "\tstd 26, -192(5)\n"
588 "\tstd 27, -184(5)\n"
589 "\tstd 28, -176(5)\n"
590 "\tstd 29, -168(5)\n"
591 "\tstd 30, -160(5)\n"
592 "\tstd 31, -152(5)\n"
593 "\tstfd 14, -144(5)\n"
594 "\tstfd 15, -136(5)\n"
595 "\tstfd 16, -128(5)\n"
596 "\tstfd 17, -120(5)\n"
597 "\tstfd 18, -112(5)\n"
598 "\tstfd 19, -104(5)\n"
599 "\tstfd 20, -96(5)\n"
600 "\tstfd 21, -88(5)\n"
601 "\tstfd 22, -80(5)\n"
602 "\tstfd 23, -72(5)\n"
603 "\tstfd 24, -64(5)\n"
604 "\tstfd 25, -56(5)\n"
605 "\tstfd 26, -48(5)\n"
606 "\tstfd 27, -40(5)\n"
607 "\tstfd 28, -32(5)\n"
608 "\tstfd 29, -24(5)\n"
609 "\tstfd 30, -16(5)\n"
610 "\tstfd 31, -8(5)\n"
611 "\tmr 27, 4\n" // BaseReg == r27
612 "\tld 2, 8(3)\n"
613 "\tld 3, 0(3)\n"
614 "\tmtctr 3\n"
615 "\tbctr\n"
616 ".globl .StgReturn\n"
617 ".type .StgReturn,@function\n"
618 ".StgReturn:\n"
619 "\tmr 3,14\n"
620 "\tla 5, %0(1)\n" // load address == addi r5, r1, %0
621 "\tld 2, -296(5)\n"
622 "\tld 14, -288(5)\n"
623 "\tld 15, -280(5)\n"
624 "\tld 16, -272(5)\n"
625 "\tld 17, -264(5)\n"
626 "\tld 18, -256(5)\n"
627 "\tld 19, -248(5)\n"
628 "\tld 20, -240(5)\n"
629 "\tld 21, -232(5)\n"
630 "\tld 22, -224(5)\n"
631 "\tld 23, -216(5)\n"
632 "\tld 24, -208(5)\n"
633 "\tld 25, -200(5)\n"
634 "\tld 26, -192(5)\n"
635 "\tld 27, -184(5)\n"
636 "\tld 28, -176(5)\n"
637 "\tld 29, -168(5)\n"
638 "\tld 30, -160(5)\n"
639 "\tld 31, -152(5)\n"
640 "\tlfd 14, -144(5)\n"
641 "\tlfd 15, -136(5)\n"
642 "\tlfd 16, -128(5)\n"
643 "\tlfd 17, -120(5)\n"
644 "\tlfd 18, -112(5)\n"
645 "\tlfd 19, -104(5)\n"
646 "\tlfd 20, -96(5)\n"
647 "\tlfd 21, -88(5)\n"
648 "\tlfd 22, -80(5)\n"
649 "\tlfd 23, -72(5)\n"
650 "\tlfd 24, -64(5)\n"
651 "\tlfd 25, -56(5)\n"
652 "\tlfd 26, -48(5)\n"
653 "\tlfd 27, -40(5)\n"
654 "\tlfd 28, -32(5)\n"
655 "\tlfd 29, -24(5)\n"
656 "\tlfd 30, -16(5)\n"
657 "\tlfd 31, -8(5)\n"
658 "\tmr 1, 5\n"
659 "\tld 0, 16(1)\n"
660 "\tmtlr 0\n"
661 "\tblr\n"
662 : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
663 }
664
665 #else // linux_HOST_OS
666 #error Only Linux support for power64 right now.
667 #endif
668
669 #endif
670
671 #if defined(powerpc64le_HOST_ARCH)
672 /* -----------------------------------------------------------------------------
673 PowerPC 64 little endian architecture
674
675 Really everything is in assembler, so we don't have to deal with GCC...
676 -------------------------------------------------------------------------- */
677 #endif
678
679 /* -----------------------------------------------------------------------------
680 ARM architecture
681 -------------------------------------------------------------------------- */
682
683 #if defined(arm_HOST_ARCH)
684
685 #if defined(__thumb__)
686 #define THUMB_FUNC ".thumb\n\t.thumb_func\n\t"
687 #else
688 #define THUMB_FUNC
689 #endif
690
691 StgRegTable *
692 StgRun(StgFunPtr f, StgRegTable *basereg) {
693 StgRegTable * r;
694 __asm__ volatile (
695 /*
696 * save callee-saves registers on behalf of the STG code.
697 */
698 "stmfd sp!, {r4-r11, ip, lr}\n\t"
699 #if !defined(arm_HOST_ARCH_PRE_ARMv6)
700 "vstmdb sp!, {d8-d11}\n\t"
701 #endif
702 /*
703 * allocate some space for Stg machine's temporary storage.
704 * Note: RESERVED_C_STACK_BYTES has to be a round number here or
705 * the assembler can't assemble it.
706 */
707 "sub sp, sp, %3\n\t"
708 /*
709 * Set BaseReg
710 */
711 "mov r4, %2\n\t"
712 /*
713 * Jump to function argument.
714 */
715 "bx %1\n\t"
716
717 ".globl " STG_RETURN "\n\t"
718 THUMB_FUNC
719 #if !defined(ios_HOST_OS)
720 ".type " STG_RETURN ", %%function\n"
721 #endif
722 STG_RETURN ":\n\t"
723 /*
724 * Free the space we allocated
725 */
726 "add sp, sp, %3\n\t"
727 /*
728 * Return the new register table, taking it from Stg's R1 (ARM's R7).
729 */
730 "mov %0, r7\n\t"
731 /*
732 * restore callee-saves registers.
733 */
734 #if !defined(arm_HOST_ARCH_PRE_ARMv6)
735 "vldmia sp!, {d8-d11}\n\t"
736 #endif
737 "ldmfd sp!, {r4-r11, ip, lr}\n\t"
738 : "=r" (r)
739 : "r" (f), "r" (basereg), "i" (RESERVED_C_STACK_BYTES)
740 #if !defined(__thumb__)
741 /* In ARM mode, r11/fp is frame-pointer and so we cannot mark
742 it as clobbered. If we do so, GCC complains with error. */
743 : "%r4", "%r5", "%r6", "%r7", "%r8", "%r9", "%r10", "%ip", "%lr"
744 #else
745 /* In Thumb mode r7 is frame-pointer and so we cannot mark it
746 as clobbered. On the other hand we mark as clobbered also
747 those regs not used in Thumb mode. Hard to judge if this is
748 needed, but certainly Haskell code is using them for
749 placing GHC's virtual registers there. See
750 includes/stg/MachRegs.h Please note that Haskell code is
751 compiled by GHC/LLVM into ARM code (not Thumb!), at least
752 as of February 2012 */
753 : "%r4", "%r5", "%r6", "%r8", "%r9", "%r10", "%11", "%ip", "%lr"
754 #endif
755 );
756 return r;
757 }
758 #endif
759
760 #if defined(aarch64_HOST_ARCH)
761
762 StgRegTable *
763 StgRun(StgFunPtr f, StgRegTable *basereg) {
764 StgRegTable * r;
765 __asm__ volatile (
766 /*
767 * Save callee-saves registers on behalf of the STG code.
768 * Floating point registers only need the bottom 64 bits preserved.
769 * We need to use the the names x16, x17, x29 and x30 instead of ip0
770 * ip1, fp and lp because one of either clang or gcc doesn't understand
771 * the later names.
772 */
773 "stp x29, x30, [sp, #-16]!\n\t"
774 "mov x29, sp\n\t"
775 "stp x16, x17, [sp, #-16]!\n\t"
776 "stp x19, x20, [sp, #-16]!\n\t"
777 "stp x21, x22, [sp, #-16]!\n\t"
778 "stp x23, x24, [sp, #-16]!\n\t"
779 "stp x25, x26, [sp, #-16]!\n\t"
780 "stp x27, x28, [sp, #-16]!\n\t"
781 "stp d8, d9, [sp, #-16]!\n\t"
782 "stp d10, d11, [sp, #-16]!\n\t"
783 "stp d12, d13, [sp, #-16]!\n\t"
784 "stp d14, d15, [sp, #-16]!\n\t"
785
786 /*
787 * allocate some space for Stg machine's temporary storage.
788 * Note: RESERVED_C_STACK_BYTES has to be a round number here or
789 * the assembler can't assemble it.
790 */
791 "sub sp, sp, %3\n\t"
792 /*
793 * Set BaseReg
794 */
795 "mov x19, %2\n\t"
796 /*
797 * Jump to function argument.
798 */
799 "br %1\n\t"
800
801 ".globl " STG_RETURN "\n\t"
802 #if !defined(ios_HOST_OS)
803 ".type " STG_RETURN ", %%function\n"
804 #endif
805 STG_RETURN ":\n\t"
806 /*
807 * Free the space we allocated
808 */
809 "add sp, sp, %3\n\t"
810 /*
811 * Return the new register table, taking it from Stg's R1 (ARM64's R22).
812 */
813 "mov %0, x22\n\t"
814 /*
815 * restore callee-saves registers.
816 */
817
818 "ldp d14, d15, [sp], #16\n\t"
819 "ldp d12, d13, [sp], #16\n\t"
820 "ldp d10, d11, [sp], #16\n\t"
821 "ldp d8, d9, [sp], #16\n\t"
822 "ldp x27, x28, [sp], #16\n\t"
823 "ldp x25, x26, [sp], #16\n\t"
824 "ldp x23, x24, [sp], #16\n\t"
825 "ldp x21, x22, [sp], #16\n\t"
826 "ldp x19, x20, [sp], #16\n\t"
827 "ldp x16, x17, [sp], #16\n\t"
828 "ldp x29, x30, [sp], #16\n\t"
829
830 : "=r" (r)
831 : "r" (f), "r" (basereg), "i" (RESERVED_C_STACK_BYTES)
832 : "%x19", "%x20", "%x21", "%x22", "%x23", "%x24", "%x25", "%x26", "%x27", "%x28",
833 "%x16", "%x17", "%x30"
834 );
835 return r;
836 }
837
838 #endif
839
840 #endif /* !USE_MINIINTERPRETER */