Enable pthread_getspecific() tls for LLVM compiler
[ghc.git] / rts / StgCRun.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2011
4 *
5 * STG-to-C glue.
6 *
7 * To run an STG function from C land, call
8 *
9 * rv = StgRun(f,BaseReg);
10 *
11 * where "f" is the STG function to call, and BaseReg is the address of the
12 * RegTable for this run (we might have separate RegTables if we're running
13 * multiple threads on an SMP machine).
14 *
15 * In the end, "f" must JMP to StgReturn (defined below),
16 * passing the return-value "rv" in R1,
17 * to return to the caller of StgRun returning "rv" in
18 * the whatever way C returns a value.
19 *
20 * NOTE: StgRun/StgReturn do *NOT* load or store Hp or any
21 * other registers (other than saving the C callee-saves
22 * registers). Instead, the called function "f" must do that
23 * in STG land.
24 *
25 * GCC will have assumed that pushing/popping of C-stack frames is
26 * going on when it generated its code, and used stack space
27 * accordingly. However, we actually {\em post-process away} all
28 * such stack-framery (see \tr{ghc/driver/ghc-asm.lprl}). Things will
29 * be OK however, if we initially make sure there are
30 * @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local
31 * variables.
32 *
33 * -------------------------------------------------------------------------- */
34
35 #include "PosixSource.h"
36
37 /*
38 * We define the following (unused) global register variables, because for
39 * some reason gcc generates sub-optimal code for StgRun() on the Alpha
40 * (unnecessarily saving extra registers on the stack) if we don't.
41 *
42 * Why do it at the top of this file, rather than near StgRun() below? Because
43 * gcc doesn't let us define global register variables after any function
44 * definition has been read. Any point after #include "Stg.h" would be too
45 * late.
46 *
47 * We define alpha_EXTRA_CAREFUL here to save $s6, $f8 and $f9 -- registers
48 * that we don't use but which are callee-save registers. The __divq() routine
49 * in libc.a clobbers $s6.
50 */
51 #include "ghcconfig.h"
52 #ifndef USE_MINIINTERPRETER
53 #ifdef alpha_HOST_ARCH
54 #define alpha_EXTRA_CAREFUL
55 register long fake_ra __asm__("$26");
56 register long fake_gp __asm__("$29");
57 #ifdef alpha_EXTRA_CAREFUL
58 register long fake_s6 __asm__("$15");
59 register double fake_f8 __asm__("$f8");
60 register double fake_f9 __asm__("$f9");
61 #endif
62 #endif
63 #endif
64
65 // yeuch
66 #define IN_STGCRUN 1
67 #ifdef sparc_HOST_ARCH
68 /* include Stg.h first because we want real machine regs in here: we
69 * have to get the value of R1 back from Stg land to C land intact.
70 */
71 #include "Stg.h"
72 #include "Rts.h"
73 #else
74 /* The other architectures do not require the actual register macro
75 * definitons here because they use hand written assembly to implement
76 * the StgRun function. The sparc code could be changed so that it does
77 * not require the register macro definitions.
78 */
79 #include "Rts.h"
80 #include "Stg.h"
81 #endif
82
83 #include "StgRun.h"
84 #include "Capability.h"
85
86 #ifdef DEBUG
87 #include "RtsUtils.h"
88 #include "Printer.h"
89 #endif
90
91 #ifdef USE_MINIINTERPRETER
92
93 /* -----------------------------------------------------------------------------
94 any architecture (using miniinterpreter)
95 -------------------------------------------------------------------------- */
96
97 StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
98 {
99 while (f) {
100 IF_DEBUG(interpreter,
101 debugBelch("Jumping to ");
102 printPtr((P_)f); fflush(stdout);
103 debugBelch("\n");
104 );
105 f = (StgFunPtr) (f)();
106 }
107 return (StgRegTable *)R1.p;
108 }
109
110 StgFunPtr StgReturn(void)
111 {
112 return 0;
113 }
114
115 #else /* !USE_MINIINTERPRETER */
116
117 #ifdef LEADING_UNDERSCORE
118 #define STG_RUN "_StgRun"
119 #else
120 #define STG_RUN "StgRun"
121 #endif
122
123 #ifdef LEADING_UNDERSCORE
124 #define STG_RETURN "_StgReturn"
125 #else
126 #define STG_RETURN "StgReturn"
127 #endif
128
129 /* -----------------------------------------------------------------------------
130 x86 architecture
131 -------------------------------------------------------------------------- */
132
133 #ifdef i386_HOST_ARCH
134
135 #ifdef darwin_HOST_OS
136 #define STG_GLOBAL ".globl "
137 #else
138 #define STG_GLOBAL ".global "
139 #endif
140
141 static void GNUC3_ATTRIBUTE(used)
142 StgRunIsImplementedInAssembler(void)
143 {
144 __asm__ volatile (
145 STG_GLOBAL STG_RUN "\n"
146 STG_RUN ":\n\t"
147
148 /*
149 * move %esp down to reserve an area for temporary storage
150 * during the execution of STG code.
151 *
152 * The stack pointer has to be aligned to a multiple of 16
153 * bytes from here - this is a requirement of the C ABI, so
154 * that C code can assign SSE2 registers directly to/from
155 * stack locations.
156 */
157 "subl %0, %%esp\n\t"
158
159 /*
160 * save callee-saves registers on behalf of the STG code.
161 */
162 "movl %%esp, %%eax\n\t"
163 "addl %0-16, %%eax\n\t"
164 "movl %%ebx,0(%%eax)\n\t"
165 "movl %%esi,4(%%eax)\n\t"
166 "movl %%edi,8(%%eax)\n\t"
167 "movl %%ebp,12(%%eax)\n\t"
168 /*
169 * Set BaseReg
170 */
171 "movl 24(%%eax),%%ebx\n\t"
172 /*
173 * grab the function argument from the stack
174 */
175 "movl 20(%%eax),%%eax\n\t"
176 /*
177 * jump to it
178 */
179 "jmp *%%eax\n\t"
180
181 STG_GLOBAL STG_RETURN "\n"
182 STG_RETURN ":\n\t"
183
184 "movl %%esi, %%eax\n\t" /* Return value in R1 */
185
186 /*
187 * restore callee-saves registers. (Don't stomp on %%eax!)
188 */
189 "movl %%esp, %%edx\n\t"
190 "addl %0-16, %%edx\n\t"
191 "movl 0(%%edx),%%ebx\n\t" /* restore the registers saved above */
192 "movl 4(%%edx),%%esi\n\t"
193 "movl 8(%%edx),%%edi\n\t"
194 "movl 12(%%edx),%%ebp\n\t"
195
196 "addl %0, %%esp\n\t"
197 "ret"
198
199 : : "i" (RESERVED_C_STACK_BYTES + 16 + 12)
200 // + 16 to make room for the 4 registers we have to save
201 // + 12 because we need to align %esp to a 16-byte boundary (#5250)
202 );
203 }
204
205 #if defined(mingw32_HOST_OS)
206 // On windows the stack has to be allocated 4k at a time, otherwise
207 // we get a segfault. The C compiler knows how to do this (it calls
208 // _alloca()), so we make sure that we can allocate as much stack as
209 // we need:
210 StgWord8 *win32AllocStack(void)
211 {
212 StgWord8 stack[RESERVED_C_STACK_BYTES + 16 + 12];
213 return stack;
214 }
215 #endif
216
217 #endif
218
219 /* ----------------------------------------------------------------------------
220 x86-64 is almost the same as plain x86.
221
222 I've done it using entirely inline assembler, because I couldn't
223 get gcc to generate the correct subtraction from %rsp by using
224 the local array variable trick. It didn't seem to reserve
225 enough space. Oh well, it's not much harder this way.
226
227 ------------------------------------------------------------------------- */
228
229 #ifdef x86_64_HOST_ARCH
230
231 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
232
233 static void GNUC3_ATTRIBUTE(used)
234 StgRunIsImplementedInAssembler(void)
235 {
236 __asm__ volatile (
237 /*
238 * save callee-saves registers on behalf of the STG code.
239 */
240 ".globl " STG_RUN "\n"
241 STG_RUN ":\n\t"
242 "subq %0, %%rsp\n\t"
243 "movq %%rsp, %%rax\n\t"
244 "addq %0-48, %%rax\n\t"
245 "movq %%rbx,0(%%rax)\n\t"
246 "movq %%rbp,8(%%rax)\n\t"
247 "movq %%r12,16(%%rax)\n\t"
248 "movq %%r13,24(%%rax)\n\t"
249 "movq %%r14,32(%%rax)\n\t"
250 "movq %%r15,40(%%rax)\n\t"
251 /*
252 * Set BaseReg
253 */
254 "movq %%rsi,%%r13\n\t"
255 /*
256 * grab the function argument from the stack, and jump to it.
257 */
258 "movq %%rdi,%%rax\n\t"
259 "jmp *%%rax\n\t"
260
261 ".globl " STG_RETURN "\n"
262 STG_RETURN ":\n\t"
263
264 "movq %%rbx, %%rax\n\t" /* Return value in R1 */
265
266 /*
267 * restore callee-saves registers. (Don't stomp on %%rax!)
268 */
269 "movq %%rsp, %%rdx\n\t"
270 "addq %0-48, %%rdx\n\t"
271 "movq 0(%%rdx),%%rbx\n\t" /* restore the registers saved above */
272 "movq 8(%%rdx),%%rbp\n\t"
273 "movq 16(%%rdx),%%r12\n\t"
274 "movq 24(%%rdx),%%r13\n\t"
275 "movq 32(%%rdx),%%r14\n\t"
276 "movq 40(%%rdx),%%r15\n\t"
277 "addq %0, %%rsp\n\t"
278 "retq"
279
280 : : "i"(RESERVED_C_STACK_BYTES+48+8 /*stack frame size*/));
281 /*
282 HACK alert!
283
284 The x86_64 ABI specifies that on a procedure call, %rsp is
285 aligned on a 16-byte boundary + 8. That is, the first
286 argument on the stack after the return address will be
287 16-byte aligned.
288
289 Which should be fine: RESERVED_C_STACK_BYTES+48 is a multiple
290 of 16 bytes.
291
292 BUT... when we do a C-call from STG land, gcc likes to put the
293 stack alignment adjustment in the prolog. eg. if we're calling
294 a function with arguments in regs, gcc will insert 'subq $8,%rsp'
295 in the prolog, to keep %rsp aligned (the return address is 8
296 bytes, remember). The mangler throws away the prolog, so we
297 lose the stack alignment.
298
299 The hack is to add this extra 8 bytes to our %rsp adjustment
300 here, so that throughout STG code, %rsp is 16-byte aligned,
301 ready for a C-call.
302
303 A quick way to see if this is wrong is to compile this code:
304
305 main = System.Exit.exitWith ExitSuccess
306
307 And run it with +RTS -sstderr. The stats code in the RTS, in
308 particular statsPrintf(), relies on the stack alignment because
309 it saves the %xmm regs on the stack, so it'll fall over if the
310 stack isn't aligned, and calling exitWith from Haskell invokes
311 shutdownHaskellAndExit using a C call.
312
313 Future gcc releases will almost certainly break this hack...
314 */
315 }
316
317 #endif /* x86-64 */
318
319 /* -----------------------------------------------------------------------------
320 Sparc architecture
321
322 --
323 OLD COMMENT from GHC-3.02:
324
325 We want tailjumps to be calls, because `call xxx' is the only Sparc
326 branch that allows an arbitrary label as a target. (Gcc's ``goto
327 *target'' construct ends up loading the label into a register and
328 then jumping, at the cost of two extra instructions for the 32-bit
329 load.)
330
331 When entering the threaded world, we stash our return address in a
332 known location so that \tr{%i7} is available as an extra
333 callee-saves register. Of course, we have to restore this when
334 coming out of the threaded world.
335
336 I hate this god-forsaken architecture. Since the top of the
337 reserved stack space is used for globals and the bottom is reserved
338 for outgoing arguments, we have to stick our return address
339 somewhere in the middle. Currently, I'm allowing 100 extra
340 outgoing arguments beyond the first 6. --JSM
341
342 Updated info (GHC 4.06): we don't appear to use %i7 any more, so
343 I'm not sure whether we still need to save it. Incedentally, what
344 does the last paragraph above mean when it says "the top of the
345 stack is used for globals"? What globals? --SDM
346
347 Updated info (GHC 4.08.2): not saving %i7 any more (see below).
348 -------------------------------------------------------------------------- */
349
350 #ifdef sparc_HOST_ARCH
351
352 StgRegTable *
353 StgRun(StgFunPtr f, StgRegTable *basereg) {
354
355 unsigned char space[RESERVED_C_STACK_BYTES];
356 #if 0
357 register void *i7 __asm__("%i7");
358 ((void **)(space))[100] = i7;
359 #endif
360 f();
361 __asm__ volatile (
362 ".align 4\n"
363 ".global " STG_RETURN "\n"
364 STG_RETURN ":"
365 : : "p" (space) : "l0","l1","l2","l3","l4","l5","l6","l7");
366 /* we tell the C compiler that l0-l7 are clobbered on return to
367 * StgReturn, otherwise it tries to use these to save eg. the
368 * address of space[100] across the call. The correct thing
369 * to do would be to save all the callee-saves regs, but we
370 * can't be bothered to do that.
371 *
372 * We also explicitly mark space as used since gcc eliminates it
373 * otherwise.
374 *
375 * The code that gcc generates for this little fragment is now
376 * terrible. We could do much better by coding it directly in
377 * assembler.
378 */
379 #if 0
380 /* updated 4.08.2: we don't save %i7 in the middle of the reserved
381 * space any more, since gcc tries to save its address across the
382 * call to f(), this gets clobbered in STG land and we end up
383 * dereferencing a bogus pointer in StgReturn.
384 */
385 __asm__ volatile ("ld %1,%0"
386 : "=r" (i7) : "m" (((void **)(space))[100]));
387 #endif
388 return (StgRegTable *)R1.i;
389 }
390
391 #endif
392
393 /* -----------------------------------------------------------------------------
394 alpha architecture
395
396 "The stack pointer (SP) must at all times denote an address that has octaword
397 alignment. (This restriction has the side effect that the in-memory portion
398 of the argument list, if any, will start on an octaword boundary.) Note that
399 the stack grows toward lower addresses. During a procedure invocation, SP
400 can never be set to a value that is higher than the value of SP at entry to
401 that procedure invocation.
402
403 "The contents of the stack, located above the portion of the argument list
404 (if any) that is passed in memory, belong to the calling procedure. Because
405 they are part of the calling procedure, they should not be read or written
406 by the called procedure, except as specified by indirect arguments or
407 language-controlled up-level references.
408
409 "The SP value might be used by the hardware when raising exceptions and
410 asynchronous interrupts. It must be assumed that the contents of the stack
411 below the current SP value and within the stack for the current thread are
412 continually and unpredictably modified, as specified in the _Alpha
413 Architecture Reference Manual_, and as a result of asynchronous software
414 actions."
415
416 -- Compaq Computer Corporation, Houston. Tru64 UNIX Calling Standard for
417 Alpha Systems, 5.1 edition, August 2000, section 3.2.1. http://www.
418 tru64unix.compaq.com/docs/base_doc/DOCUMENTATION/V51_PDF/ARH9MBTE.PDF
419 -------------------------------------------------------------------------- */
420
421 #ifdef alpha_HOST_ARCH
422
423 StgRegTable *
424 StgRun(StgFunPtr f, StgRegTable *basereg)
425 {
426 register long real_ra __asm__("$26"); volatile long save_ra;
427 register long real_gp __asm__("$29"); volatile long save_gp;
428
429 register long real_s0 __asm__("$9" ); volatile long save_s0;
430 register long real_s1 __asm__("$10"); volatile long save_s1;
431 register long real_s2 __asm__("$11"); volatile long save_s2;
432 register long real_s3 __asm__("$12"); volatile long save_s3;
433 register long real_s4 __asm__("$13"); volatile long save_s4;
434 register long real_s5 __asm__("$14"); volatile long save_s5;
435 #ifdef alpha_EXTRA_CAREFUL
436 register long real_s6 __asm__("$15"); volatile long save_s6;
437 #endif
438
439 register double real_f2 __asm__("$f2"); volatile double save_f2;
440 register double real_f3 __asm__("$f3"); volatile double save_f3;
441 register double real_f4 __asm__("$f4"); volatile double save_f4;
442 register double real_f5 __asm__("$f5"); volatile double save_f5;
443 register double real_f6 __asm__("$f6"); volatile double save_f6;
444 register double real_f7 __asm__("$f7"); volatile double save_f7;
445 #ifdef alpha_EXTRA_CAREFUL
446 register double real_f8 __asm__("$f8"); volatile double save_f8;
447 register double real_f9 __asm__("$f9"); volatile double save_f9;
448 #endif
449
450 register StgFunPtr real_pv __asm__("$27");
451
452 StgRegTable * ret;
453
454 save_ra = real_ra;
455 save_gp = real_gp;
456
457 save_s0 = real_s0;
458 save_s1 = real_s1;
459 save_s2 = real_s2;
460 save_s3 = real_s3;
461 save_s4 = real_s4;
462 save_s5 = real_s5;
463 #ifdef alpha_EXTRA_CAREFUL
464 save_s6 = real_s6;
465 #endif
466
467 save_f2 = real_f2;
468 save_f3 = real_f3;
469 save_f4 = real_f4;
470 save_f5 = real_f5;
471 save_f6 = real_f6;
472 save_f7 = real_f7;
473 #ifdef alpha_EXTRA_CAREFUL
474 save_f8 = real_f8;
475 save_f9 = real_f9;
476 #endif
477
478 real_pv = f;
479
480 __asm__ volatile( "lda $30,-%0($30)" "\n"
481 "\t" "jmp ($27)" "\n"
482 "\t" ".align 3" "\n"
483 ".globl " STG_RETURN "\n"
484 STG_RETURN ":" "\n"
485 "\t" "lda $30,%0($30)" "\n"
486 : : "K" (RESERVED_C_STACK_BYTES));
487
488 ret = real_s5;
489
490 real_s0 = save_s0;
491 real_s1 = save_s1;
492 real_s2 = save_s2;
493 real_s3 = save_s3;
494 real_s4 = save_s4;
495 real_s5 = save_s5;
496 #ifdef alpha_EXTRA_CAREFUL
497 real_s6 = save_s6;
498 #endif
499
500 real_f2 = save_f2;
501 real_f3 = save_f3;
502 real_f4 = save_f4;
503 real_f5 = save_f5;
504 real_f6 = save_f6;
505 real_f7 = save_f7;
506 #ifdef alpha_EXTRA_CAREFUL
507 real_f8 = save_f8;
508 real_f9 = save_f9;
509 #endif
510
511 real_ra = save_ra;
512 real_gp = save_gp;
513
514 return ret;
515 }
516
517 #endif /* alpha_HOST_ARCH */
518
519 /* -----------------------------------------------------------------------------
520 HP-PA architecture
521 -------------------------------------------------------------------------- */
522
523 #ifdef hppa1_1_HOST_ARCH
524
525 StgRegTable *
526 StgRun(StgFunPtr f, StgRegTable *basereg)
527 {
528 StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)];
529 StgRegTable * ret;
530
531 __asm__ volatile ("ldo %0(%%r30),%%r19\n"
532 "\tstw %%r3, 0(0,%%r19)\n"
533 "\tstw %%r4, 4(0,%%r19)\n"
534 "\tstw %%r5, 8(0,%%r19)\n"
535 "\tstw %%r6,12(0,%%r19)\n"
536 "\tstw %%r7,16(0,%%r19)\n"
537 "\tstw %%r8,20(0,%%r19)\n"
538 "\tstw %%r9,24(0,%%r19)\n"
539 "\tstw %%r10,28(0,%%r19)\n"
540 "\tstw %%r11,32(0,%%r19)\n"
541 "\tstw %%r12,36(0,%%r19)\n"
542 "\tstw %%r13,40(0,%%r19)\n"
543 "\tstw %%r14,44(0,%%r19)\n"
544 "\tstw %%r15,48(0,%%r19)\n"
545 "\tstw %%r16,52(0,%%r19)\n"
546 "\tstw %%r17,56(0,%%r19)\n"
547 "\tstw %%r18,60(0,%%r19)\n"
548 "\tldo 80(%%r19),%%r19\n"
549 "\tfstds %%fr12,-16(0,%%r19)\n"
550 "\tfstds %%fr13, -8(0,%%r19)\n"
551 "\tfstds %%fr14, 0(0,%%r19)\n"
552 "\tfstds %%fr15, 8(0,%%r19)\n"
553 "\tldo 32(%%r19),%%r19\n"
554 "\tfstds %%fr16,-16(0,%%r19)\n"
555 "\tfstds %%fr17, -8(0,%%r19)\n"
556 "\tfstds %%fr18, 0(0,%%r19)\n"
557 "\tfstds %%fr19, 8(0,%%r19)\n"
558 "\tldo 32(%%r19),%%r19\n"
559 "\tfstds %%fr20,-16(0,%%r19)\n"
560 "\tfstds %%fr21, -8(0,%%r19)\n" : :
561 "n" (-(116 * sizeof(long) + 10 * sizeof(double))) : "%r19"
562 );
563
564 f();
565
566 __asm__ volatile (".align 4\n"
567 "\t.EXPORT " STG_RETURN ",CODE\n"
568 "\t.EXPORT " STG_RETURN ",ENTRY,PRIV_LEV=3\n"
569 STG_RETURN "\n"
570 /* "\tldo %0(%%r3),%%r19\n" */
571 "\tldo %1(%%r30),%%r19\n"
572 "\tcopy %%r11, %0\n" /* save R1 */
573 "\tldw 0(0,%%r19),%%r3\n"
574 "\tldw 4(0,%%r19),%%r4\n"
575 "\tldw 8(0,%%r19),%%r5\n"
576 "\tldw 12(0,%%r19),%%r6\n"
577 "\tldw 16(0,%%r19),%%r7\n"
578 "\tldw 20(0,%%r19),%%r8\n"
579 "\tldw 24(0,%%r19),%%r9\n"
580 "\tldw 28(0,%%r19),%%r10\n"
581 "\tldw 32(0,%%r19),%%r11\n"
582 "\tldw 36(0,%%r19),%%r12\n"
583 "\tldw 40(0,%%r19),%%r13\n"
584 "\tldw 44(0,%%r19),%%r14\n"
585 "\tldw 48(0,%%r19),%%r15\n"
586 "\tldw 52(0,%%r19),%%r16\n"
587 "\tldw 56(0,%%r19),%%r17\n"
588 "\tldw 60(0,%%r19),%%r18\n"
589 "\tldo 80(%%r19),%%r19\n"
590 "\tfldds -16(0,%%r19),%%fr12\n"
591 "\tfldds -8(0,%%r19),%%fr13\n"
592 "\tfldds 0(0,%%r19),%%fr14\n"
593 "\tfldds 8(0,%%r19),%%fr15\n"
594 "\tldo 32(%%r19),%%r19\n"
595 "\tfldds -16(0,%%r19),%%fr16\n"
596 "\tfldds -8(0,%%r19),%%fr17\n"
597 "\tfldds 0(0,%%r19),%%fr18\n"
598 "\tfldds 8(0,%%r19),%%fr19\n"
599 "\tldo 32(%%r19),%%r19\n"
600 "\tfldds -16(0,%%r19),%%fr20\n"
601 "\tfldds -8(0,%%r19),%%fr21\n"
602 : "=r" (ret)
603 : "n" (-(116 * sizeof(long) + 10 * sizeof(double)))
604 : "%r19"
605 );
606
607 return ret;
608 }
609
610 #endif /* hppa1_1_HOST_ARCH */
611
612 /* -----------------------------------------------------------------------------
613 PowerPC architecture
614
615 Everything is in assembler, so we don't have to deal with GCC...
616
617 -------------------------------------------------------------------------- */
618
619 #ifdef powerpc_HOST_ARCH
620
621 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
622
623 #ifdef darwin_HOST_OS
624 void StgRunIsImplementedInAssembler(void)
625 {
626 #if HAVE_SUBSECTIONS_VIA_SYMBOLS
627 // if the toolchain supports deadstripping, we have to
628 // prevent it here (it tends to get confused here).
629 __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler");
630 #endif
631 __asm__ volatile (
632 "\n.globl _StgRun\n"
633 "_StgRun:\n"
634 "\tmflr r0\n"
635 "\tbl saveFP # f14\n"
636 "\tstmw r13,-220(r1)\n"
637 "\tstwu r1,-%0(r1)\n"
638 "\tmr r27,r4\n" // BaseReg == r27
639 "\tmtctr r3\n"
640 "\tmr r12,r3\n"
641 "\tbctr\n"
642 ".globl _StgReturn\n"
643 "_StgReturn:\n"
644 "\tmr r3,r14\n"
645 "\tla r1,%0(r1)\n"
646 "\tlmw r13,-220(r1)\n"
647 "\tb restFP # f14\n"
648 : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
649 }
650 #else
651
652 // This version is for PowerPC Linux.
653
654 // Differences from the Darwin/Mac OS X version:
655 // *) Different Assembler Syntax
656 // *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
657 // *) We may not access positive stack offsets
658 // (no "Red Zone" as in the Darwin ABI)
659 // *) The Link Register is saved to a different offset in the caller's stack frame
660 // (Linux: 4(r1), Darwin 8(r1))
661
662 static void GNUC3_ATTRIBUTE(used)
663 StgRunIsImplementedInAssembler(void)
664 {
665 __asm__ volatile (
666 "\t.globl StgRun\n"
667 "\t.type StgRun,@function\n"
668 "StgRun:\n"
669 "\tmflr 0\n"
670 "\tstw 0,4(1)\n"
671 "\tmr 5,1\n"
672 "\tstwu 1,-%0(1)\n"
673 "\tstmw 13,-220(5)\n"
674 "\tstfd 14,-144(5)\n"
675 "\tstfd 15,-136(5)\n"
676 "\tstfd 16,-128(5)\n"
677 "\tstfd 17,-120(5)\n"
678 "\tstfd 18,-112(5)\n"
679 "\tstfd 19,-104(5)\n"
680 "\tstfd 20,-96(5)\n"
681 "\tstfd 21,-88(5)\n"
682 "\tstfd 22,-80(5)\n"
683 "\tstfd 23,-72(5)\n"
684 "\tstfd 24,-64(5)\n"
685 "\tstfd 25,-56(5)\n"
686 "\tstfd 26,-48(5)\n"
687 "\tstfd 27,-40(5)\n"
688 "\tstfd 28,-32(5)\n"
689 "\tstfd 29,-24(5)\n"
690 "\tstfd 30,-16(5)\n"
691 "\tstfd 31,-8(5)\n"
692 "\tmr 27,4\n" // BaseReg == r27
693 "\tmtctr 3\n"
694 "\tmr 12,3\n"
695 "\tbctr\n"
696 ".globl StgReturn\n"
697 "\t.type StgReturn,@function\n"
698 "StgReturn:\n"
699 "\tmr 3,14\n"
700 "\tla 5,%0(1)\n"
701 "\tlmw 13,-220(5)\n"
702 "\tlfd 14,-144(5)\n"
703 "\tlfd 15,-136(5)\n"
704 "\tlfd 16,-128(5)\n"
705 "\tlfd 17,-120(5)\n"
706 "\tlfd 18,-112(5)\n"
707 "\tlfd 19,-104(5)\n"
708 "\tlfd 20,-96(5)\n"
709 "\tlfd 21,-88(5)\n"
710 "\tlfd 22,-80(5)\n"
711 "\tlfd 23,-72(5)\n"
712 "\tlfd 24,-64(5)\n"
713 "\tlfd 25,-56(5)\n"
714 "\tlfd 26,-48(5)\n"
715 "\tlfd 27,-40(5)\n"
716 "\tlfd 28,-32(5)\n"
717 "\tlfd 29,-24(5)\n"
718 "\tlfd 30,-16(5)\n"
719 "\tlfd 31,-8(5)\n"
720 "\tmr 1,5\n"
721 "\tlwz 0,4(1)\n"
722 "\tmtlr 0\n"
723 "\tblr\n"
724 : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
725 }
726 #endif
727
728 #endif
729
730 /* -----------------------------------------------------------------------------
731 PowerPC 64 architecture
732
733 Everything is in assembler, so we don't have to deal with GCC...
734
735 -------------------------------------------------------------------------- */
736
737 #ifdef powerpc64_HOST_ARCH
738
739 #ifdef linux_HOST_OS
740 extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
741
742 static void GNUC3_ATTRIBUTE(used)
743 StgRunIsImplementedInAssembler(void)
744 {
745 // r0 volatile
746 // r1 stack pointer
747 // r2 toc - needs to be saved
748 // r3-r10 argument passing, volatile
749 // r11, r12 very volatile (not saved across cross-module calls)
750 // r13 thread local state (never modified, don't need to save)
751 // r14-r31 callee-save
752 __asm__ volatile (
753 ".section \".opd\",\"aw\"\n"
754 ".align 3\n"
755 ".globl StgRun\n"
756 "StgRun:\n"
757 "\t.quad\t.StgRun,.TOC.@tocbase,0\n"
758 "\t.size StgRun,24\n"
759 ".globl StgReturn\n"
760 "StgReturn:\n"
761 "\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
762 "\t.size StgReturn,24\n"
763 ".previous\n"
764 ".globl .StgRun\n"
765 ".type .StgRun,@function\n"
766 ".StgRun:\n"
767 "\tmflr 0\n"
768 "\tmr 5, 1\n"
769 "\tstd 0, 16(1)\n"
770 "\tstdu 1, -%0(1)\n"
771 "\tstd 2, -296(5)\n"
772 "\tstd 14, -288(5)\n"
773 "\tstd 15, -280(5)\n"
774 "\tstd 16, -272(5)\n"
775 "\tstd 17, -264(5)\n"
776 "\tstd 18, -256(5)\n"
777 "\tstd 19, -248(5)\n"
778 "\tstd 20, -240(5)\n"
779 "\tstd 21, -232(5)\n"
780 "\tstd 22, -224(5)\n"
781 "\tstd 23, -216(5)\n"
782 "\tstd 24, -208(5)\n"
783 "\tstd 25, -200(5)\n"
784 "\tstd 26, -192(5)\n"
785 "\tstd 27, -184(5)\n"
786 "\tstd 28, -176(5)\n"
787 "\tstd 29, -168(5)\n"
788 "\tstd 30, -160(5)\n"
789 "\tstd 31, -152(5)\n"
790 "\tstfd 14, -144(5)\n"
791 "\tstfd 15, -136(5)\n"
792 "\tstfd 16, -128(5)\n"
793 "\tstfd 17, -120(5)\n"
794 "\tstfd 18, -112(5)\n"
795 "\tstfd 19, -104(5)\n"
796 "\tstfd 20, -96(5)\n"
797 "\tstfd 21, -88(5)\n"
798 "\tstfd 22, -80(5)\n"
799 "\tstfd 23, -72(5)\n"
800 "\tstfd 24, -64(5)\n"
801 "\tstfd 25, -56(5)\n"
802 "\tstfd 26, -48(5)\n"
803 "\tstfd 27, -40(5)\n"
804 "\tstfd 28, -32(5)\n"
805 "\tstfd 29, -24(5)\n"
806 "\tstfd 30, -16(5)\n"
807 "\tstfd 31, -8(5)\n"
808 "\tmr 27, 4\n" // BaseReg == r27
809 "\tld 2, 8(3)\n"
810 "\tld 3, 0(3)\n"
811 "\tmtctr 3\n"
812 "\tbctr\n"
813 ".globl .StgReturn\n"
814 ".type .StgReturn,@function\n"
815 ".StgReturn:\n"
816 "\tmr 3,14\n"
817 "\tla 5, %0(1)\n" // load address == addi r5, r1, %0
818 "\tld 2, -296(5)\n"
819 "\tld 14, -288(5)\n"
820 "\tld 15, -280(5)\n"
821 "\tld 16, -272(5)\n"
822 "\tld 17, -264(5)\n"
823 "\tld 18, -256(5)\n"
824 "\tld 19, -248(5)\n"
825 "\tld 20, -240(5)\n"
826 "\tld 21, -232(5)\n"
827 "\tld 22, -224(5)\n"
828 "\tld 23, -216(5)\n"
829 "\tld 24, -208(5)\n"
830 "\tld 25, -200(5)\n"
831 "\tld 26, -192(5)\n"
832 "\tld 27, -184(5)\n"
833 "\tld 28, -176(5)\n"
834 "\tld 29, -168(5)\n"
835 "\tld 30, -160(5)\n"
836 "\tld 31, -152(5)\n"
837 "\tlfd 14, -144(5)\n"
838 "\tlfd 15, -136(5)\n"
839 "\tlfd 16, -128(5)\n"
840 "\tlfd 17, -120(5)\n"
841 "\tlfd 18, -112(5)\n"
842 "\tlfd 19, -104(5)\n"
843 "\tlfd 20, -96(5)\n"
844 "\tlfd 21, -88(5)\n"
845 "\tlfd 22, -80(5)\n"
846 "\tlfd 23, -72(5)\n"
847 "\tlfd 24, -64(5)\n"
848 "\tlfd 25, -56(5)\n"
849 "\tlfd 26, -48(5)\n"
850 "\tlfd 27, -40(5)\n"
851 "\tlfd 28, -32(5)\n"
852 "\tlfd 29, -24(5)\n"
853 "\tlfd 30, -16(5)\n"
854 "\tlfd 31, -8(5)\n"
855 "\tmr 1, 5\n"
856 "\tld 0, 16(1)\n"
857 "\tmtlr 0\n"
858 "\tblr\n"
859 : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
860 }
861 #else // linux_HOST_OS
862 #error Only linux support for power64 right now.
863 #endif
864
865 #endif
866
867 /* -----------------------------------------------------------------------------
868 IA64 architecture
869
870 Again, in assembler - so we can fiddle with the register stack, and because
871 gcc doesn't handle asm-clobbered callee-saves correctly.
872
873 loc0 - loc15: preserved locals
874 loc16 - loc28: STG registers
875 loc29: saved ar.pfs
876 loc30: saved b0
877 loc31: saved gp (gcc 3.3 uses this slot)
878 loc32: saved ar.lc
879 loc33: saved pr
880 f2 - f5: preserved floating-point registers
881 f16 - f23: preserved floating-point registers
882 -------------------------------------------------------------------------- */
883
884 #ifdef ia64_HOST_ARCH
885
886 /* the memory stack is rarely used, so 16K is excessive */
887 #undef RESERVED_C_STACK_BYTES
888 #define RESERVED_C_STACK_BYTES 1024
889
890 /* We don't spill all the callee-save FP registers, only the ones that
891 * gcc has been observed to use */
892 #define PRESERVED_FP_REGISTERS 12
893
894 /* We always allocate 34 local and 8 output registers. As long as gcc used
895 * fewer than 32 locals, the mangler will adjust the stack frame accordingly. */
896 #define LOCALS 34
897
898 static void GNUC3_ATTRIBUTE(used)
899 StgRunIsImplementedInAssembler(void)
900 {
901 __asm__ volatile(
902 ".global StgRun\n"
903 "StgRun:\n"
904 "\talloc loc29 = ar.pfs, 0, %1, 8, 0\n" /* setup register frame */
905 "\tld8 r18 = [r32],8\n" /* get procedure address */
906 "\tadds sp = -%0, sp ;;\n" /* setup stack */
907 "\tld8 gp = [r32]\n" /* get procedure GP */
908 "\tadds r16 = %0-(%2*16), sp\n"
909 "\tadds r17 = %0-((%2-1)*16), sp ;;\n"
910 "\tstf.spill [r16] = f16,32\n" /* spill callee-saved fp regs */
911 "\tstf.spill [r17] = f17,32\n"
912 "\tmov b6 = r18 ;;\n" /* set target address */
913 "\tstf.spill [r16] = f18,32\n"
914 "\tstf.spill [r17] = f19,32\n"
915 "\tmov loc30 = b0 ;;\n" /* save return address */
916 "\tstf.spill [r16] = f20,32\n"
917 "\tstf.spill [r17] = f21,32 ;;\n"
918 "\tstf.spill [r16] = f22,32\n"
919 "\tstf.spill [r17] = f23,32\n"
920 "\tmov loc32 = ar.lc ;;\n" /* save loop counter */
921 "\tstf.spill [r16] = f2,32\n"
922 "\tstf.spill [r17] = f3,32\n"
923 "\tmov loc33 = pr ;;\n" /* save predicate registers */
924 "\tstf.spill [r16] = f4,32\n"
925 "\tstf.spill [r17] = f5,32\n"
926 "\tbr.few b6 ;;\n" /* branch to function */
927 ".global StgReturn\n"
928 "StgReturn:\n"
929 "\tmov r8 = loc16\n" /* return value in r8 */
930 "\tadds r16 = %0-(%2*16), sp\n"
931 "\tadds r17 = %0-((%2-1)*16), sp ;;\n"
932 "\tldf.fill f16 = [r16],32\n" /* start restoring fp regs */
933 "\tldf.fill f17 = [r17],32\n"
934 "\tmov ar.pfs = loc29 ;;\n" /* restore register frame */
935 "\tldf.fill f18 = [r16],32\n"
936 "\tldf.fill f19 = [r17],32\n"
937 "\tmov b0 = loc30 ;;\n" /* restore return address */
938 "\tldf.fill f20 = [r16],32\n"
939 "\tldf.fill f21 = [r17],32\n"
940 "\tmov ar.lc = loc32 ;;\n" /* restore loop counter */
941 "\tldf.fill f22 = [r16],32\n"
942 "\tldf.fill f23 = [r17],32\n"
943 "\tmov pr = loc33 ;;\n" /* restore predicate registers */
944 "\tldf.fill f2 = [r16],32\n"
945 "\tldf.fill f3 = [r17],32\n"
946 "\tadds sp = %0, sp ;;\n" /* restore stack */
947 "\tldf.fill f4 = [r16],32\n"
948 "\tldf.fill f5 = [r17],32\n"
949 "\tbr.ret.sptk.many b0 ;;\n" /* return */
950 : : "i"(RESERVED_C_STACK_BYTES + PRESERVED_FP_REGISTERS*16),
951 "i"(LOCALS),
952 "i"(PRESERVED_FP_REGISTERS));
953 }
954
955 #endif
956
957 /* -----------------------------------------------------------------------------
958 MIPS architecture
959 -------------------------------------------------------------------------- */
960
961 #ifdef mips_HOST_ARCH
962
963 StgThreadReturnCode
964 StgRun(StgFunPtr f, StgRegTable *basereg)
965 {
966 register StgThreadReturnCode __v0 __asm__("$2");
967
968 __asm__ __volatile__(
969 " la $25, %1 \n"
970 " move $30, %2 \n"
971 " jr %1 \n"
972 " .align 3 \n"
973 " .globl " STG_RETURN " \n"
974 " .aent " STG_RETURN " \n"
975 STG_RETURN ": \n"
976 " move %0, $16 \n"
977 " move $3, $17 \n"
978 : "=r" (__v0),
979 : "r" (f), "r" (basereg)
980 "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23",
981 "$25", "$28", "$30",
982 "$f20", "$f22", "$f24", "$f26", "$f28", "$f30",
983 "memory");
984
985 return __v0;
986 }
987
988 #endif /* mips_HOST_ARCH */
989
990 /* -----------------------------------------------------------------------------
991 ARM architecture
992 -------------------------------------------------------------------------- */
993
994 #ifdef arm_HOST_ARCH
995
996 #if defined(__thumb__)
997 #define THUMB_FUNC ".thumb\n\t.thumb_func\n\t"
998 #else
999 #define THUMB_FUNC
1000 #endif
1001
1002 StgRegTable *
1003 StgRun(StgFunPtr f, StgRegTable *basereg) {
1004 StgRegTable * r;
1005 __asm__ volatile (
1006 /*
1007 * save callee-saves registers on behalf of the STG code.
1008 */
1009 "stmfd sp!, {r4-r10, fp, ip, lr}\n\t"
1010 #if !defined(arm_HOST_ARCH_PRE_ARMv6)
1011 "vstmdb sp!, {d8-d11}\n\t"
1012 #endif
1013 /*
1014 * allocate some space for Stg machine's temporary storage.
1015 * Note: RESERVER_C_STACK_BYTES has to be a round number here or
1016 * the assembler can't assemble it.
1017 */
1018 "sub sp, sp, %3\n\t"
1019 /*
1020 * Set BaseReg
1021 */
1022 "mov r4, %2\n\t"
1023 /*
1024 * Jump to function argument.
1025 */
1026 "bx %1\n\t"
1027
1028 ".global " STG_RETURN "\n\t"
1029 THUMB_FUNC
1030 ".type " STG_RETURN ", %%function\n"
1031 STG_RETURN ":\n\t"
1032 /*
1033 * Free the space we allocated
1034 */
1035 "add sp, sp, %3\n\t"
1036 /*
1037 * Return the new register table, taking it from Stg's R1 (ARM's R7).
1038 */
1039 "mov %0, r7\n\t"
1040 /*
1041 * restore callee-saves registers.
1042 */
1043 #if !defined(arm_HOST_ARCH_PRE_ARMv6)
1044 "vldmia sp!, {d8-d11}\n\t"
1045 #endif
1046 "ldmfd sp!, {r4-r10, fp, ip, lr}\n\t"
1047 : "=r" (r)
1048 : "r" (f), "r" (basereg), "i" (RESERVED_C_STACK_BYTES)
1049 :
1050 );
1051 return r;
1052 }
1053 #endif
1054
1055 #endif /* !USE_MINIINTERPRETER */