Building GHC with hadrian on FreeBSD
[ghc.git] / rts / Adjustor.c
1 /* -----------------------------------------------------------------------------
2 * Foreign export adjustor thunks
3 *
4 * Copyright (c) 1998.
5 *
6 * ---------------------------------------------------------------------------*/
7
8 /* A little bit of background...
9
10 An adjustor thunk is a dynamically allocated code snippet that allows
11 Haskell closures to be viewed as C function pointers.
12
13 Stable pointers provide a way for the outside world to get access to,
14 and evaluate, Haskell heap objects, with the RTS providing a small
15 range of ops for doing so. So, assuming we've got a stable pointer in
16 our hand in C, we can jump into the Haskell world and evaluate a callback
17 procedure, say. This works OK in some cases where callbacks are used, but
18 does require the external code to know about stable pointers and how to deal
19 with them. We'd like to hide the Haskell-nature of a callback and have it
20 be invoked just like any other C function pointer.
21
22 Enter adjustor thunks. An adjustor thunk is a little piece of code
23 that's generated on-the-fly (one per Haskell closure being exported)
24 that, when entered using some 'universal' calling convention (e.g., the
25 C calling convention on platform X), pushes an implicit stable pointer
26 (to the Haskell callback) before calling another (static) C function stub
27 which takes care of entering the Haskell code via its stable pointer.
28
29 An adjustor thunk is allocated on the C heap, and is called from within
30 Haskell just before handing out the function pointer to the Haskell (IO)
31 action. User code should never have to invoke it explicitly.
32
33 An adjustor thunk differs from a C function pointer in one respect: when
34 the code is through with it, it has to be freed in order to release Haskell
35 and C resources. Failure to do so will result in memory leaks on both the C and
36 Haskell side.
37 */
38
39 #include "PosixSource.h"
40 #include "Rts.h"
41
42 #include "RtsUtils.h"
43 #include "StablePtr.h"
44
45 #if defined(USE_LIBFFI_FOR_ADJUSTORS)
46 #include "ffi.h"
47 #include <string.h>
48 #endif
49
50 #if defined(i386_HOST_ARCH)
51 extern void adjustorCode(void);
52 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
53 // from AdjustorAsm.s
54 // not declared as a function so that AIX-style
55 // fundescs can never get in the way.
56 extern void *adjustorCode;
57 #endif
58
59 #if defined(USE_LIBFFI_FOR_ADJUSTORS)
60 /* There are subtle differences between how libffi adjustors work on
61 * different platforms, and the situation is a little complex.
62 *
63 * HOW ADJUSTORS/CLOSURES WORK ON LIBFFI:
64 * libffi's ffi_closure_alloc() function gives you two pointers to a closure,
65 * 1. the writable pointer, and 2. the executable pointer. You write the
66 * closure into the writable pointer (and ffi_prep_closure_loc() will do this
67 * for you) and you execute it at the executable pointer.
68 *
69 * THE PROBLEM:
70 * The RTS deals only with the executable pointer, but when it comes time to
71 * free the closure, libffi wants the writable pointer back that it gave you
72 * when you allocated it.
73 *
74 * On Linux we solve this problem by storing the address of the writable
75 * mapping into itself, then returning both writable and executable pointers
76 * plus 1 machine word for preparing the closure for use by the RTS (see the
77 * Linux version of allocateExec() in rts/sm/Storage.c). When we want to
78 * recover the writable address, we subtract 1 word from the executable
79 * address and fetch. This works because Linux kernel magic gives us two
80 * pointers with different addresses that refer to the same memory. Whatever
81 * you write into the writable address can be read back at the executable
82 * address. This method is very efficient.
83 *
84 * On iOS this breaks for two reasons: 1. the two pointers do not refer to
85 * the same memory (so we can't retrieve anything stored into the writable
86 * pointer if we only have the exec pointer), and 2. libffi's
87 * ffi_closure_alloc() assumes the pointer it has returned you is a
88 * ffi_closure structure and treats it as such: It uses that memory to
89 * communicate with ffi_prep_closure_loc(). On Linux by contrast
90 * ffi_closure_alloc() is viewed simply as a memory allocation, and only
91 * ffi_prep_closure_loc() deals in ffi_closure structures. Each of these
92 * differences is enough make the efficient way used on Linux not work on iOS.
93 * Instead on iOS we use hash tables to recover the writable address from the
94 * executable one. This method is conservative and would almost certainly work
95 * on any platform, but on Linux it makes sense to use the faster method.
96 */
97 void
98 freeHaskellFunctionPtr(void* ptr)
99 {
100 ffi_closure *cl;
101
102 #if defined(ios_HOST_OS)
103 cl = execToWritable(ptr);
104 #else
105 cl = (ffi_closure*)ptr;
106 #endif
107 freeStablePtr(cl->user_data);
108 stgFree(cl->cif->arg_types);
109 stgFree(cl->cif);
110 freeExec(ptr);
111 }
112
113 static ffi_type * char_to_ffi_type(char c)
114 {
115 switch (c) {
116 case 'v': return &ffi_type_void;
117 case 'f': return &ffi_type_float;
118 case 'd': return &ffi_type_double;
119 case 'L': return &ffi_type_sint64;
120 case 'l': return &ffi_type_uint64;
121 case 'W': return &ffi_type_sint32;
122 case 'w': return &ffi_type_uint32;
123 case 'S': return &ffi_type_sint16;
124 case 's': return &ffi_type_uint16;
125 case 'B': return &ffi_type_sint8;
126 case 'b': return &ffi_type_uint8;
127 case 'p': return &ffi_type_pointer;
128 default: barf("char_to_ffi_type: unknown type '%c'", c);
129 }
130 }
131
132 void*
133 createAdjustor (int cconv,
134 StgStablePtr hptr,
135 StgFunPtr wptr,
136 char *typeString)
137 {
138 ffi_cif *cif;
139 ffi_type **arg_types;
140 uint32_t n_args, i;
141 ffi_type *result_type;
142 ffi_closure *cl;
143 int r, abi;
144 void *code;
145
146 n_args = strlen(typeString) - 1;
147 cif = stgMallocBytes(sizeof(ffi_cif), "createAdjustor");
148 arg_types = stgMallocBytes(n_args * sizeof(ffi_type*), "createAdjustor");
149
150 result_type = char_to_ffi_type(typeString[0]);
151 for (i=0; i < n_args; i++) {
152 arg_types[i] = char_to_ffi_type(typeString[i+1]);
153 }
154 switch (cconv) {
155 #if defined(mingw32_HOST_OS) && defined(i386_HOST_ARCH)
156 case 0: /* stdcall */
157 abi = FFI_STDCALL;
158 break;
159 #endif
160 case 1: /* ccall */
161 abi = FFI_DEFAULT_ABI;
162 break;
163 default:
164 barf("createAdjustor: convention %d not supported on this platform", cconv);
165 }
166
167 r = ffi_prep_cif(cif, abi, n_args, result_type, arg_types);
168 if (r != FFI_OK) barf("ffi_prep_cif failed: %d", r);
169
170 cl = allocateExec(sizeof(ffi_closure), &code);
171 if (cl == NULL) {
172 barf("createAdjustor: failed to allocate memory");
173 }
174
175 r = ffi_prep_closure_loc(cl, cif, (void*)wptr, hptr/*userdata*/, code);
176 if (r != FFI_OK) barf("ffi_prep_closure_loc failed: %d", r);
177
178 return (void*)code;
179 }
180
181 #else // To end of file...
182
183 #if defined(_WIN32)
184 #include <windows.h>
185 #endif
186
187 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
188 #include <string.h>
189 #endif
190
191 #if defined(LEADING_UNDERSCORE)
192 #define UNDERSCORE "_"
193 #else
194 #define UNDERSCORE ""
195 #endif
196
197 #if defined(x86_64_HOST_ARCH)
198 /*
199 Now here's something obscure for you:
200
201 When generating an adjustor thunk that uses the C calling
202 convention, we have to make sure that the thunk kicks off
203 the process of jumping into Haskell with a tail jump. Why?
204 Because as a result of jumping in into Haskell we may end
205 up freeing the very adjustor thunk we came from using
206 freeHaskellFunctionPtr(). Hence, we better not return to
207 the adjustor code on our way out, since it could by then
208 point to junk.
209
210 The fix is readily at hand, just include the opcodes
211 for the C stack fixup code that we need to perform when
212 returning in some static piece of memory and arrange
213 to return to it before tail jumping from the adjustor thunk.
214 */
215 static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
216 {
217 __asm__ (
218 ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
219 UNDERSCORE "obscure_ccall_ret_code:\n\t"
220 "addq $0x8, %rsp\n\t"
221 #if defined(mingw32_HOST_OS)
222 /* On Win64, we had to put the original return address after the
223 arg 1-4 spill slots, ro now we have to move it back */
224 "movq 0x20(%rsp), %rcx\n"
225 "movq %rcx, (%rsp)\n"
226 #endif
227 "ret"
228 );
229 }
230 extern void obscure_ccall_ret_code(void);
231 #endif
232
233 #if defined(alpha_HOST_ARCH)
234 /* To get the definition of PAL_imb: */
235 # if defined(linux_HOST_OS)
236 # include <asm/pal.h>
237 # else
238 # include <machine/pal.h>
239 # endif
240 #endif
241
242 #if defined(ia64_HOST_ARCH)
243
244 /* Layout of a function descriptor */
245 typedef struct _IA64FunDesc {
246 StgWord64 ip;
247 StgWord64 gp;
248 } IA64FunDesc;
249
250 static void *
251 stgAllocStable(size_t size_in_bytes, StgStablePtr *stable)
252 {
253 StgArrBytes* arr;
254 uint32_t data_size_in_words, total_size_in_words;
255
256 /* round up to a whole number of words */
257 data_size_in_words = ROUNDUP_BYTES_TO_WDS(size_in_bytes);
258 total_size_in_words = sizeofW(StgArrBytes) + data_size_in_words;
259
260 /* allocate and fill it in */
261 arr = (StgArrBytes *)allocate(total_size_in_words);
262 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, size_in_bytes);
263
264 /* obtain a stable ptr */
265 *stable = getStablePtr((StgPtr)arr);
266
267 /* and return a ptr to the goods inside the array */
268 return(&(arr->payload));
269 }
270 #endif
271
272 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
273 __asm__("obscure_ccall_ret_code:\n\t"
274 "lwz 1,0(1)\n\t"
275 "lwz 0,4(1)\n\t"
276 "mtlr 0\n\t"
277 "blr");
278 extern void obscure_ccall_ret_code(void);
279 #endif
280
281 #if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
282 #if !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS))
283
284 /* !!! !!! WARNING: !!! !!!
285 * This structure is accessed from AdjustorAsm.s
286 * Any changes here have to be mirrored in the offsets there.
287 */
288
289 typedef struct AdjustorStub {
290 #if defined(powerpc_HOST_ARCH) && defined(darwin_HOST_OS)
291 unsigned lis;
292 unsigned ori;
293 unsigned lwz;
294 unsigned mtctr;
295 unsigned bctr;
296 StgFunPtr code;
297 #elif defined(powerpc64_HOST_ARCH) && defined(darwin_HOST_OS)
298 /* powerpc64-darwin: just guessing that it won't use fundescs. */
299 unsigned lis;
300 unsigned ori;
301 unsigned rldimi;
302 unsigned oris;
303 unsigned ori2;
304 unsigned lwz;
305 unsigned mtctr;
306 unsigned bctr;
307 StgFunPtr code;
308 #else
309 /* fundesc-based ABIs */
310 #define FUNDESCS
311 StgFunPtr code;
312 struct AdjustorStub
313 *toc;
314 void *env;
315 #endif
316 StgStablePtr hptr;
317 StgFunPtr wptr;
318 StgInt negative_framesize;
319 StgInt extrawords_plus_one;
320 } AdjustorStub;
321
322 #endif
323 #endif
324
325 #if defined(i386_HOST_ARCH)
326
327 /* !!! !!! WARNING: !!! !!!
328 * This structure is accessed from AdjustorAsm.s
329 * Any changes here have to be mirrored in the offsets there.
330 */
331
332 typedef struct AdjustorStub {
333 unsigned char call[8];
334 StgStablePtr hptr;
335 StgFunPtr wptr;
336 StgInt frame_size;
337 StgInt argument_size;
338 } AdjustorStub;
339 #endif
340
341 #if defined(i386_HOST_ARCH) || defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
342 static int totalArgumentSize(char *typeString)
343 {
344 int sz = 0;
345 while(*typeString)
346 {
347 char t = *typeString++;
348
349 switch(t)
350 {
351 // on 32-bit platforms, Double and Int64 occupy two words.
352 case 'd':
353 case 'l':
354 case 'L':
355 if(sizeof(void*) == 4)
356 {
357 sz += 2;
358 break;
359 }
360 /* fall through */
361 // everything else is one word.
362 default:
363 sz += 1;
364 }
365 }
366 return sz;
367 }
368 #endif
369
370 void*
371 createAdjustor(int cconv, StgStablePtr hptr,
372 StgFunPtr wptr,
373 char *typeString
374 #if !defined(powerpc_HOST_ARCH) && !defined(powerpc64_HOST_ARCH) && !defined(x86_64_HOST_ARCH)
375 STG_UNUSED
376 #endif
377 )
378 {
379 void *adjustor = NULL;
380 void *code = NULL;
381
382 switch (cconv)
383 {
384 case 0: /* _stdcall */
385 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
386 /* Magic constant computed by inspecting the code length of
387 the following assembly language snippet
388 (offset and machine code prefixed):
389
390 <0>: 58 popl %eax # temp. remove ret addr..
391 <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
392 # hold a StgStablePtr
393 <6>: 50 pushl %eax # put back ret. addr
394 <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
395 <c>: ff e0 jmp %eax # and jump to it.
396 # the callee cleans up the stack
397 */
398 adjustor = allocateExec(14,&code);
399 {
400 unsigned char *const adj_code = (unsigned char *)adjustor;
401 adj_code[0x00] = (unsigned char)0x58; /* popl %eax */
402
403 adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
404 *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
405
406 adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
407
408 adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */
409 *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
410
411 adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
412 adj_code[0x0d] = (unsigned char)0xe0;
413 }
414 #endif
415 break;
416
417 case 1: /* _ccall */
418 #if defined(i386_HOST_ARCH)
419 {
420 /*
421 Most of the trickiness here is due to the need to keep the
422 stack pointer 16-byte aligned (see #5250). That means we
423 can't just push another argument on the stack and call the
424 wrapper, we may have to shuffle the whole argument block.
425
426 We offload most of the work to AdjustorAsm.S.
427 */
428 AdjustorStub *adjustorStub = allocateExec(sizeof(AdjustorStub),&code);
429 adjustor = adjustorStub;
430
431 int sz = totalArgumentSize(typeString);
432
433 adjustorStub->call[0] = 0xe8;
434 *(long*)&adjustorStub->call[1] = ((char*)&adjustorCode) - ((char*)code + 5);
435 adjustorStub->hptr = hptr;
436 adjustorStub->wptr = wptr;
437
438 // The adjustor puts the following things on the stack:
439 // 1.) %ebp link
440 // 2.) padding and (a copy of) the arguments
441 // 3.) a dummy argument
442 // 4.) hptr
443 // 5.) return address (for returning to the adjustor)
444 // All these have to add up to a multiple of 16.
445
446 // first, include everything in frame_size
447 adjustorStub->frame_size = sz * 4 + 16;
448 // align to 16 bytes
449 adjustorStub->frame_size = (adjustorStub->frame_size + 15) & ~15;
450 // only count 2.) and 3.) as part of frame_size
451 adjustorStub->frame_size -= 12;
452 adjustorStub->argument_size = sz;
453 }
454
455 #elif defined(x86_64_HOST_ARCH)
456
457 # if defined(mingw32_HOST_OS)
458 /*
459 stack at call:
460 argn
461 ...
462 arg5
463 return address
464 %rcx,%rdx,%r8,%r9 = arg1..arg4
465
466 if there are <4 integer args, then we can just push the
467 StablePtr into %rcx and shuffle the other args up.
468
469 If there are >=4 integer args, then we have to flush one arg
470 to the stack, and arrange to adjust the stack ptr on return.
471 The stack will be rearranged to this:
472
473 argn
474 ...
475 arg5
476 return address *** <-- dummy arg in stub fn.
477 arg4
478 obscure_ccall_ret_code
479
480 This unfortunately means that the type of the stub function
481 must have a dummy argument for the original return address
482 pointer inserted just after the 4th integer argument.
483
484 Code for the simple case:
485
486 0: 4d 89 c1 mov %r8,%r9
487 3: 49 89 d0 mov %rdx,%r8
488 6: 48 89 ca mov %rcx,%rdx
489 9: f2 0f 10 da movsd %xmm2,%xmm3
490 d: f2 0f 10 d1 movsd %xmm1,%xmm2
491 11: f2 0f 10 c8 movsd %xmm0,%xmm1
492 15: 48 8b 0d 0c 00 00 00 mov 0xc(%rip),%rcx # 28 <.text+0x28>
493 1c: ff 25 0e 00 00 00 jmpq *0xe(%rip) # 30 <.text+0x30>
494 22: 90 nop
495 [...]
496
497
498 And the version for >=4 integer arguments:
499
500 [we want to push the 4th argument (either %r9 or %xmm3, depending on
501 whether it is a floating arg or not) and the return address onto the
502 stack. However, slots 1-4 are reserved for code we call to spill its
503 args 1-4 into, so we can't just push them onto the bottom of the stack.
504 So first put the 4th argument onto the stack, above what will be the
505 spill slots.]
506 0: 48 83 ec 08 sub $0x8,%rsp
507 [if non-floating arg, then do this:]
508 4: 90 nop
509 5: 4c 89 4c 24 20 mov %r9,0x20(%rsp)
510 [else if floating arg then do this:]
511 4: f2 0f 11 5c 24 20 movsd %xmm3,0x20(%rsp)
512 [end if]
513 [Now push the new return address onto the stack]
514 a: ff 35 30 00 00 00 pushq 0x30(%rip) # 40 <.text+0x40>
515 [But the old return address has been moved up into a spill slot, so
516 we need to move it above them]
517 10: 4c 8b 4c 24 10 mov 0x10(%rsp),%r9
518 15: 4c 89 4c 24 30 mov %r9,0x30(%rsp)
519 [Now we do the normal register shuffle-up etc]
520 1a: 4d 89 c1 mov %r8,%r9
521 1d: 49 89 d0 mov %rdx,%r8
522 20: 48 89 ca mov %rcx,%rdx
523 23: f2 0f 10 da movsd %xmm2,%xmm3
524 27: f2 0f 10 d1 movsd %xmm1,%xmm2
525 2b: f2 0f 10 c8 movsd %xmm0,%xmm1
526 2f: 48 8b 0d 12 00 00 00 mov 0x12(%rip),%rcx # 48 <.text+0x48>
527 36: ff 25 14 00 00 00 jmpq *0x14(%rip) # 50 <.text+0x50>
528 3c: 90 nop
529 3d: 90 nop
530 3e: 90 nop
531 3f: 90 nop
532 [...]
533
534 */
535 {
536 StgWord8 *adj_code;
537
538 // determine whether we have 4 or more integer arguments,
539 // and therefore need to flush one to the stack.
540 if ((typeString[0] == '\0') ||
541 (typeString[1] == '\0') ||
542 (typeString[2] == '\0') ||
543 (typeString[3] == '\0')) {
544
545 adjustor = allocateExec(0x38,&code);
546 adj_code = (StgWord8*)adjustor;
547
548 *(StgInt32 *)adj_code = 0x49c1894d;
549 *(StgInt32 *)(adj_code+0x4) = 0x8948d089;
550 *(StgInt32 *)(adj_code+0x8) = 0x100ff2ca;
551 *(StgInt32 *)(adj_code+0xc) = 0x100ff2da;
552 *(StgInt32 *)(adj_code+0x10) = 0x100ff2d1;
553 *(StgInt32 *)(adj_code+0x14) = 0x0d8b48c8;
554 *(StgInt32 *)(adj_code+0x18) = 0x0000000c;
555
556 *(StgInt32 *)(adj_code+0x1c) = 0x000e25ff;
557 *(StgInt32 *)(adj_code+0x20) = 0x00000000;
558 *(StgInt64 *)(adj_code+0x28) = (StgInt64)hptr;
559 *(StgInt64 *)(adj_code+0x30) = (StgInt64)wptr;
560 }
561 else
562 {
563 int fourthFloating;
564
565 fourthFloating = (typeString[3] == 'f' || typeString[3] == 'd');
566 adjustor = allocateExec(0x58,&code);
567 adj_code = (StgWord8*)adjustor;
568 *(StgInt32 *)adj_code = 0x08ec8348;
569 *(StgInt32 *)(adj_code+0x4) = fourthFloating ? 0x5c110ff2
570 : 0x4c894c90;
571 *(StgInt32 *)(adj_code+0x8) = 0x35ff2024;
572 *(StgInt32 *)(adj_code+0xc) = 0x00000030;
573 *(StgInt32 *)(adj_code+0x10) = 0x244c8b4c;
574 *(StgInt32 *)(adj_code+0x14) = 0x4c894c10;
575 *(StgInt32 *)(adj_code+0x18) = 0x894d3024;
576 *(StgInt32 *)(adj_code+0x1c) = 0xd08949c1;
577 *(StgInt32 *)(adj_code+0x20) = 0xf2ca8948;
578 *(StgInt32 *)(adj_code+0x24) = 0xf2da100f;
579 *(StgInt32 *)(adj_code+0x28) = 0xf2d1100f;
580 *(StgInt32 *)(adj_code+0x2c) = 0x48c8100f;
581 *(StgInt32 *)(adj_code+0x30) = 0x00120d8b;
582 *(StgInt32 *)(adj_code+0x34) = 0x25ff0000;
583 *(StgInt32 *)(adj_code+0x38) = 0x00000014;
584 *(StgInt32 *)(adj_code+0x3c) = 0x90909090;
585 *(StgInt64 *)(adj_code+0x40) = (StgInt64)obscure_ccall_ret_code;
586 *(StgInt64 *)(adj_code+0x48) = (StgInt64)hptr;
587 *(StgInt64 *)(adj_code+0x50) = (StgInt64)wptr;
588 }
589 }
590
591 # else
592 /*
593 stack at call:
594 argn
595 ...
596 arg7
597 return address
598 %rdi,%rsi,%rdx,%rcx,%r8,%r9 = arg1..arg6
599
600 if there are <6 integer args, then we can just push the
601 StablePtr into %edi and shuffle the other args up.
602
603 If there are >=6 integer args, then we have to flush one arg
604 to the stack, and arrange to adjust the stack ptr on return.
605 The stack will be rearranged to this:
606
607 argn
608 ...
609 arg7
610 return address *** <-- dummy arg in stub fn.
611 arg6
612 obscure_ccall_ret_code
613
614 This unfortunately means that the type of the stub function
615 must have a dummy argument for the original return address
616 pointer inserted just after the 6th integer argument.
617
618 Code for the simple case:
619
620 0: 4d 89 c1 mov %r8,%r9
621 3: 49 89 c8 mov %rcx,%r8
622 6: 48 89 d1 mov %rdx,%rcx
623 9: 48 89 f2 mov %rsi,%rdx
624 c: 48 89 fe mov %rdi,%rsi
625 f: 48 8b 3d 0a 00 00 00 mov 10(%rip),%rdi
626 16: ff 25 0c 00 00 00 jmpq *12(%rip)
627 ...
628 20: .quad 0 # aligned on 8-byte boundary
629 28: .quad 0 # aligned on 8-byte boundary
630
631
632 And the version for >=6 integer arguments:
633
634 0: 41 51 push %r9
635 2: ff 35 20 00 00 00 pushq 32(%rip) # 28 <ccall_adjustor+0x28>
636 8: 4d 89 c1 mov %r8,%r9
637 b: 49 89 c8 mov %rcx,%r8
638 e: 48 89 d1 mov %rdx,%rcx
639 11: 48 89 f2 mov %rsi,%rdx
640 14: 48 89 fe mov %rdi,%rsi
641 17: 48 8b 3d 12 00 00 00 mov 18(%rip),%rdi # 30 <ccall_adjustor+0x30>
642 1e: ff 25 14 00 00 00 jmpq *20(%rip) # 38 <ccall_adjustor+0x38>
643 ...
644 28: .quad 0 # aligned on 8-byte boundary
645 30: .quad 0 # aligned on 8-byte boundary
646 38: .quad 0 # aligned on 8-byte boundary
647 */
648
649 {
650 int i = 0;
651 char *c;
652 StgWord8 *adj_code;
653
654 // determine whether we have 6 or more integer arguments,
655 // and therefore need to flush one to the stack.
656 for (c = typeString; *c != '\0'; c++) {
657 if (*c != 'f' && *c != 'd') i++;
658 if (i == 6) break;
659 }
660
661 if (i < 6) {
662 adjustor = allocateExec(0x30,&code);
663 adj_code = (StgWord8*)adjustor;
664
665 *(StgInt32 *)adj_code = 0x49c1894d;
666 *(StgInt32 *)(adj_code+0x4) = 0x8948c889;
667 *(StgInt32 *)(adj_code+0x8) = 0xf28948d1;
668 *(StgInt32 *)(adj_code+0xc) = 0x48fe8948;
669 *(StgInt32 *)(adj_code+0x10) = 0x000a3d8b;
670 *(StgInt32 *)(adj_code+0x14) = 0x25ff0000;
671 *(StgInt32 *)(adj_code+0x18) = 0x0000000c;
672 *(StgInt64 *)(adj_code+0x20) = (StgInt64)hptr;
673 *(StgInt64 *)(adj_code+0x28) = (StgInt64)wptr;
674 }
675 else
676 {
677 adjustor = allocateExec(0x40,&code);
678 adj_code = (StgWord8*)adjustor;
679
680 *(StgInt32 *)adj_code = 0x35ff5141;
681 *(StgInt32 *)(adj_code+0x4) = 0x00000020;
682 *(StgInt32 *)(adj_code+0x8) = 0x49c1894d;
683 *(StgInt32 *)(adj_code+0xc) = 0x8948c889;
684 *(StgInt32 *)(adj_code+0x10) = 0xf28948d1;
685 *(StgInt32 *)(adj_code+0x14) = 0x48fe8948;
686 *(StgInt32 *)(adj_code+0x18) = 0x00123d8b;
687 *(StgInt32 *)(adj_code+0x1c) = 0x25ff0000;
688 *(StgInt32 *)(adj_code+0x20) = 0x00000014;
689
690 *(StgInt64 *)(adj_code+0x28) = (StgInt64)obscure_ccall_ret_code;
691 *(StgInt64 *)(adj_code+0x30) = (StgInt64)hptr;
692 *(StgInt64 *)(adj_code+0x38) = (StgInt64)wptr;
693 }
694 }
695 # endif
696
697
698 #elif defined(sparc_HOST_ARCH)
699 /* Magic constant computed by inspecting the code length of the following
700 assembly language snippet (offset and machine code prefixed):
701
702 <00>: 9C23A008 sub %sp, 8, %sp ! make room for %o4/%o5 in caller's frame
703 <04>: DA23A060 st %o5, [%sp + 96] ! shift registers by 2 positions
704 <08>: D823A05C st %o4, [%sp + 92]
705 <0C>: 9A10000B mov %o3, %o5
706 <10>: 9810000A mov %o2, %o4
707 <14>: 96100009 mov %o1, %o3
708 <18>: 94100008 mov %o0, %o2
709 <1C>: 13000000 sethi %hi(wptr), %o1 ! load up wptr (1 of 2)
710 <20>: 11000000 sethi %hi(hptr), %o0 ! load up hptr (1 of 2)
711 <24>: 81C26000 jmp %o1 + %lo(wptr) ! jump to wptr (load 2 of 2)
712 <28>: 90122000 or %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot)
713 <2C> 00000000 ! place for getting hptr back easily
714
715 ccall'ing on SPARC is easy, because we are quite lucky to push a
716 multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the
717 existing arguments (note that %sp must stay double-word aligned at
718 all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf).
719 To do this, we extend the *caller's* stack frame by 2 words and shift
720 the output registers used for argument passing (%o0 - %o5, we are a *leaf*
721 procedure because of the tail-jump) by 2 positions. This makes room in
722 %o0 and %o1 for the additional arguments, namely hptr and a dummy (used
723 for destination addr of jump on SPARC, return address on x86, ...). This
724 shouldn't cause any problems for a C-like caller: alloca is implemented
725 similarly, and local variables should be accessed via %fp, not %sp. In a
726 nutshell: This should work! (Famous last words! :-)
727 */
728 adjustor = allocateExec(4*(11+1),&code);
729 {
730 unsigned long *const adj_code = (unsigned long *)adjustor;
731
732 adj_code[ 0] = 0x9C23A008UL; /* sub %sp, 8, %sp */
733 adj_code[ 1] = 0xDA23A060UL; /* st %o5, [%sp + 96] */
734 adj_code[ 2] = 0xD823A05CUL; /* st %o4, [%sp + 92] */
735 adj_code[ 3] = 0x9A10000BUL; /* mov %o3, %o5 */
736 adj_code[ 4] = 0x9810000AUL; /* mov %o2, %o4 */
737 adj_code[ 5] = 0x96100009UL; /* mov %o1, %o3 */
738 adj_code[ 6] = 0x94100008UL; /* mov %o0, %o2 */
739 adj_code[ 7] = 0x13000000UL; /* sethi %hi(wptr), %o1 */
740 adj_code[ 7] |= ((unsigned long)wptr) >> 10;
741 adj_code[ 8] = 0x11000000UL; /* sethi %hi(hptr), %o0 */
742 adj_code[ 8] |= ((unsigned long)hptr) >> 10;
743 adj_code[ 9] = 0x81C26000UL; /* jmp %o1 + %lo(wptr) */
744 adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL;
745 adj_code[10] = 0x90122000UL; /* or %o0, %lo(hptr), %o0 */
746 adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL;
747
748 adj_code[11] = (unsigned long)hptr;
749
750 /* flush cache */
751 asm("flush %0" : : "r" (adj_code ));
752 asm("flush %0" : : "r" (adj_code + 2));
753 asm("flush %0" : : "r" (adj_code + 4));
754 asm("flush %0" : : "r" (adj_code + 6));
755 asm("flush %0" : : "r" (adj_code + 10));
756
757 /* max. 5 instructions latency, and we need at >= 1 for returning */
758 asm("nop");
759 asm("nop");
760 asm("nop");
761 asm("nop");
762 }
763 #elif defined(alpha_HOST_ARCH)
764 /* Magic constant computed by inspecting the code length of
765 the following assembly language snippet
766 (offset and machine code prefixed; note that the machine code
767 shown is longwords stored in little-endian order):
768
769 <00>: 46520414 mov a2, a4
770 <04>: 46100412 mov a0, a2
771 <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
772 <0c>: 46730415 mov a3, a5
773 <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
774 <14>: 46310413 mov a1, a3
775 <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
776 <1c>: 00000000 # padding for alignment
777 <20>: [8 bytes for hptr quadword]
778 <28>: [8 bytes for wptr quadword]
779
780 The "computed" jump at <08> above is really a jump to a fixed
781 location. Accordingly, we place an always-correct hint in the
782 jump instruction, namely the address offset from <0c> to wptr,
783 divided by 4, taking the lowest 14 bits.
784
785 We only support passing 4 or fewer argument words, for the same
786 reason described under sparc_HOST_ARCH above by JRS, 21 Aug 01.
787 On the Alpha the first 6 integer arguments are in a0 through a5,
788 and the rest on the stack. Hence we want to shuffle the original
789 caller's arguments by two.
790
791 On the Alpha the calling convention is so complex and dependent
792 on the callee's signature -- for example, the stack pointer has
793 to be a multiple of 16 -- that it seems impossible to me [ccshan]
794 to handle the general case correctly without changing how the
795 adjustor is called from C. For now, our solution of shuffling
796 registers only and ignoring the stack only works if the original
797 caller passed 4 or fewer argument words.
798
799 TODO: Depending on how much allocation overhead stgMallocBytes uses for
800 header information (more precisely, if the overhead is no more than
801 4 bytes), we should move the first three instructions above down by
802 4 bytes (getting rid of the nop), hence saving memory. [ccshan]
803 */
804 ASSERT(((StgWord64)wptr & 3) == 0);
805 adjustor = allocateExec(48,&code);
806 {
807 StgWord64 *const code = (StgWord64 *)adjustor;
808
809 code[0] = 0x4610041246520414L;
810 code[1] = 0x46730415a61b0020L;
811 code[2] = 0x46310413a77b0028L;
812 code[3] = 0x000000006bfb0000L
813 | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
814
815 code[4] = (StgWord64)hptr;
816 code[5] = (StgWord64)wptr;
817
818 /* Ensure that instruction cache is consistent with our new code */
819 __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
820 }
821 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
822
823 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
824 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
825 {
826 /* The PowerPC Linux (32-bit) calling convention is annoyingly complex.
827 We need to calculate all the details of the stack frame layout,
828 taking into account the types of all the arguments, and then
829 generate code on the fly. */
830
831 int src_gpr = 3, dst_gpr = 5;
832 int fpr = 3;
833 int src_offset = 0, dst_offset = 0;
834 int n = strlen(typeString),i;
835 int src_locs[n], dst_locs[n];
836 int frameSize;
837 unsigned *code;
838
839 /* Step 1:
840 Calculate where the arguments should go.
841 src_locs[] will contain the locations of the arguments in the
842 original stack frame passed to the adjustor.
843 dst_locs[] will contain the locations of the arguments after the
844 adjustor runs, on entry to the wrapper proc pointed to by wptr.
845
846 This algorithm is based on the one described on page 3-19 of the
847 System V ABI PowerPC Processor Supplement.
848 */
849 for(i=0;typeString[i];i++)
850 {
851 char t = typeString[i];
852 if((t == 'f' || t == 'd') && fpr <= 8)
853 src_locs[i] = dst_locs[i] = -32-(fpr++);
854 else
855 {
856 if((t == 'l' || t == 'L') && src_gpr <= 9)
857 {
858 if((src_gpr & 1) == 0)
859 src_gpr++;
860 src_locs[i] = -src_gpr;
861 src_gpr += 2;
862 }
863 else if((t == 'w' || t == 'W') && src_gpr <= 10)
864 {
865 src_locs[i] = -(src_gpr++);
866 }
867 else
868 {
869 if(t == 'l' || t == 'L' || t == 'd')
870 {
871 if(src_offset % 8)
872 src_offset += 4;
873 }
874 src_locs[i] = src_offset;
875 src_offset += (t == 'l' || t == 'L' || t == 'd') ? 8 : 4;
876 }
877
878 if((t == 'l' || t == 'L') && dst_gpr <= 9)
879 {
880 if((dst_gpr & 1) == 0)
881 dst_gpr++;
882 dst_locs[i] = -dst_gpr;
883 dst_gpr += 2;
884 }
885 else if((t == 'w' || t == 'W') && dst_gpr <= 10)
886 {
887 dst_locs[i] = -(dst_gpr++);
888 }
889 else
890 {
891 if(t == 'l' || t == 'L' || t == 'd')
892 {
893 if(dst_offset % 8)
894 dst_offset += 4;
895 }
896 dst_locs[i] = dst_offset;
897 dst_offset += (t == 'l' || t == 'L' || t == 'd') ? 8 : 4;
898 }
899 }
900 }
901
902 frameSize = dst_offset + 8;
903 frameSize = (frameSize+15) & ~0xF;
904
905 /* Step 2:
906 Build the adjustor.
907 */
908 // allocate space for at most 4 insns per parameter
909 // plus 14 more instructions.
910 adjustor = allocateExec(4 * (4*n + 14),&code);
911 code = (unsigned*)adjustor;
912
913 *code++ = 0x48000008; // b *+8
914 // * Put the hptr in a place where freeHaskellFunctionPtr
915 // can get at it.
916 *code++ = (unsigned) hptr;
917
918 // * save the link register
919 *code++ = 0x7c0802a6; // mflr r0;
920 *code++ = 0x90010004; // stw r0, 4(r1);
921 // * and build a new stack frame
922 *code++ = OP_LO(0x9421, -frameSize); // stwu r1, -frameSize(r1)
923
924 // * now generate instructions to copy arguments
925 // from the old stack frame into the new stack frame.
926 for(i=n-1;i>=0;i--)
927 {
928 if(src_locs[i] < -32)
929 ASSERT(dst_locs[i] == src_locs[i]);
930 else if(src_locs[i] < 0)
931 {
932 // source in GPR.
933 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
934 if(dst_locs[i] < 0)
935 {
936 ASSERT(dst_locs[i] > -32);
937 // dst is in GPR, too.
938
939 if(typeString[i] == 'l' || typeString[i] == 'L')
940 {
941 // mr dst+1, src+1
942 *code++ = 0x7c000378
943 | ((-dst_locs[i]+1) << 16)
944 | ((-src_locs[i]+1) << 11)
945 | ((-src_locs[i]+1) << 21);
946 }
947 // mr dst, src
948 *code++ = 0x7c000378
949 | ((-dst_locs[i]) << 16)
950 | ((-src_locs[i]) << 11)
951 | ((-src_locs[i]) << 21);
952 }
953 else
954 {
955 if(typeString[i] == 'l' || typeString[i] == 'L')
956 {
957 // stw src+1, dst_offset+4(r1)
958 *code++ = 0x90010000
959 | ((-src_locs[i]+1) << 21)
960 | (dst_locs[i] + 4);
961 }
962
963 // stw src, dst_offset(r1)
964 *code++ = 0x90010000
965 | ((-src_locs[i]) << 21)
966 | (dst_locs[i] + 8);
967 }
968 }
969 else
970 {
971 ASSERT(dst_locs[i] >= 0);
972 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
973
974 if(typeString[i] == 'l' || typeString[i] == 'L')
975 {
976 // lwz r0, src_offset(r1)
977 *code++ = 0x80010000
978 | (src_locs[i] + frameSize + 8 + 4);
979 // stw r0, dst_offset(r1)
980 *code++ = 0x90010000
981 | (dst_locs[i] + 8 + 4);
982 }
983 // lwz r0, src_offset(r1)
984 *code++ = 0x80010000
985 | (src_locs[i] + frameSize + 8);
986 // stw r0, dst_offset(r1)
987 *code++ = 0x90010000
988 | (dst_locs[i] + 8);
989 }
990 }
991
992 // * hptr will be the new first argument.
993 // lis r3, hi(hptr)
994 *code++ = OP_HI(0x3c60, hptr);
995 // ori r3,r3,lo(hptr)
996 *code++ = OP_LO(0x6063, hptr);
997
998 // * we need to return to a piece of code
999 // which will tear down the stack frame.
1000 // lis r11,hi(obscure_ccall_ret_code)
1001 *code++ = OP_HI(0x3d60, obscure_ccall_ret_code);
1002 // ori r11,r11,lo(obscure_ccall_ret_code)
1003 *code++ = OP_LO(0x616b, obscure_ccall_ret_code);
1004 // mtlr r11
1005 *code++ = 0x7d6803a6;
1006
1007 // * jump to wptr
1008 // lis r11,hi(wptr)
1009 *code++ = OP_HI(0x3d60, wptr);
1010 // ori r11,r11,lo(wptr)
1011 *code++ = OP_LO(0x616b, wptr);
1012 // mtctr r11
1013 *code++ = 0x7d6903a6;
1014 // bctr
1015 *code++ = 0x4e800420;
1016
1017 // Flush the Instruction cache:
1018 {
1019 unsigned *p = adjustor;
1020 while(p < code)
1021 {
1022 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
1023 : : "r" (p));
1024 p++;
1025 }
1026 __asm__ volatile ("sync\n\tisync");
1027 }
1028 }
1029
1030 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
1031
1032 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
1033 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
1034 {
1035 /* The following code applies to all PowerPC and PowerPC64 platforms
1036 whose stack layout is based on the AIX ABI.
1037
1038 Besides (obviously) AIX, this includes
1039 Mac OS 9 and BeOS/PPC (may they rest in peace),
1040 which use the 32-bit AIX ABI
1041 powerpc64-linux,
1042 which uses the 64-bit AIX ABI
1043 and Darwin (Mac OS X),
1044 which uses the same stack layout as AIX,
1045 but no function descriptors.
1046
1047 The actual stack-frame shuffling is implemented out-of-line
1048 in the function adjustorCode, in AdjustorAsm.S.
1049 Here, we set up an AdjustorStub structure, which
1050 is a function descriptor (on platforms that have function
1051 descriptors) or a short piece of stub code (on Darwin) to call
1052 adjustorCode with a pointer to the AdjustorStub struct loaded
1053 into register r2.
1054
1055 One nice thing about this is that there is _no_ code generated at
1056 runtime on the platforms that have function descriptors.
1057 */
1058 AdjustorStub *adjustorStub;
1059 int sz = 0, extra_sz, total_sz;
1060
1061 #if defined(FUNDESCS)
1062 adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor");
1063 #else
1064 adjustorStub = allocateExec(sizeof(AdjustorStub),&code);
1065 #endif
1066 adjustor = adjustorStub;
1067
1068 adjustorStub->code = (void*) &adjustorCode;
1069
1070 #if defined(FUNDESCS)
1071 // function descriptors are a cool idea.
1072 // We don't need to generate any code at runtime.
1073 adjustorStub->toc = adjustorStub;
1074 #else
1075
1076 // no function descriptors :-(
1077 // We need to do things "by hand".
1078 #if defined(powerpc_HOST_ARCH)
1079 // lis r2, hi(adjustorStub)
1080 adjustorStub->lis = OP_HI(0x3c40, adjustorStub);
1081 // ori r2, r2, lo(adjustorStub)
1082 adjustorStub->ori = OP_LO(0x6042, adjustorStub);
1083 // lwz r0, code(r2)
1084 adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code)
1085 - (char*)adjustorStub);
1086 // mtctr r0
1087 adjustorStub->mtctr = 0x7c0903a6;
1088 // bctr
1089 adjustorStub->bctr = 0x4e800420;
1090 #else
1091 barf("adjustor creation not supported on this platform");
1092 #endif
1093
1094 // Flush the Instruction cache:
1095 {
1096 int n = sizeof(AdjustorStub)/sizeof(unsigned);
1097 unsigned *p = (unsigned*)adjustor;
1098 while(n--)
1099 {
1100 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
1101 : : "r" (p));
1102 p++;
1103 }
1104 __asm__ volatile ("sync\n\tisync");
1105 }
1106 #endif
1107
1108 // Calculate the size of the stack frame, in words.
1109 sz = totalArgumentSize(typeString);
1110
1111 // The first eight words of the parameter area
1112 // are just "backing store" for the parameters passed in
1113 // the GPRs. extra_sz is the number of words beyond those first
1114 // 8 words.
1115 extra_sz = sz - 8;
1116 if(extra_sz < 0)
1117 extra_sz = 0;
1118
1119 // Calculate the total size of the stack frame.
1120 total_sz = (6 /* linkage area */
1121 + 8 /* minimum parameter area */
1122 + 2 /* two extra arguments */
1123 + extra_sz)*sizeof(StgWord);
1124
1125 // align to 16 bytes.
1126 // AIX only requires 8 bytes, but who cares?
1127 total_sz = (total_sz+15) & ~0xF;
1128
1129 // Fill in the information that adjustorCode in AdjustorAsm.S
1130 // will use to create a new stack frame with the additional args.
1131 adjustorStub->hptr = hptr;
1132 adjustorStub->wptr = wptr;
1133 adjustorStub->negative_framesize = -total_sz;
1134 adjustorStub->extrawords_plus_one = extra_sz + 1;
1135 }
1136
1137 #elif defined(ia64_HOST_ARCH)
1138 /*
1139 Up to 8 inputs are passed in registers. We flush the last two inputs to
1140 the stack, initially into the 16-byte scratch region left by the caller.
1141 We then shuffle the others along by 4 (taking 2 registers for ourselves
1142 to save return address and previous function state - we need to come back
1143 here on the way out to restore the stack, so this is a real function
1144 rather than just a trampoline).
1145
1146 The function descriptor we create contains the gp of the target function
1147 so gp is already loaded correctly.
1148
1149 [MLX] alloc r16=ar.pfs,10,2,0
1150 movl r17=wptr
1151 [MII] st8.spill [r12]=r38,8 // spill in6 (out4)
1152 mov r41=r37 // out7 = in5 (out3)
1153 mov r40=r36;; // out6 = in4 (out2)
1154 [MII] st8.spill [r12]=r39 // spill in7 (out5)
1155 mov.sptk b6=r17,50
1156 mov r38=r34;; // out4 = in2 (out0)
1157 [MII] mov r39=r35 // out5 = in3 (out1)
1158 mov r37=r33 // out3 = in1 (loc1)
1159 mov r36=r32 // out2 = in0 (loc0)
1160 [MLX] adds r12=-24,r12 // update sp
1161 movl r34=hptr;; // out0 = hptr
1162 [MIB] mov r33=r16 // loc1 = ar.pfs
1163 mov r32=b0 // loc0 = retaddr
1164 br.call.sptk.many b0=b6;;
1165
1166 [MII] adds r12=-16,r12
1167 mov b0=r32
1168 mov.i ar.pfs=r33
1169 [MFB] nop.m 0x0
1170 nop.f 0x0
1171 br.ret.sptk.many b0;;
1172 */
1173
1174 /* These macros distribute a long constant into the two words of an MLX bundle */
1175 #define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1))
1176 #define MOVL_LOWORD(val) (BITS(val,22,18) << 46)
1177 #define MOVL_HIWORD(val) ( (BITS(val,0,7) << 36) \
1178 | (BITS(val,7,9) << 50) \
1179 | (BITS(val,16,5) << 45) \
1180 | (BITS(val,21,1) << 44) \
1181 | (BITS(val,40,23)) \
1182 | (BITS(val,63,1) << 59))
1183
1184 {
1185 StgStablePtr stable;
1186 IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
1187 StgWord64 wcode = wdesc->ip;
1188 IA64FunDesc *fdesc;
1189 StgWord64 *code;
1190
1191 /* we allocate on the Haskell heap since malloc'd memory isn't
1192 * executable - argh */
1193 /* Allocated memory is word-aligned (8 bytes) but functions on ia64
1194 * must be aligned to 16 bytes. We allocate an extra 8 bytes of
1195 * wiggle room so that we can put the code on a 16 byte boundary. */
1196 adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8+8, &stable);
1197
1198 fdesc = (IA64FunDesc *)adjustor;
1199 code = (StgWord64 *)(fdesc + 1);
1200 /* add 8 bytes to code if needed to align to a 16-byte boundary */
1201 if ((StgWord64)code & 15) code++;
1202 fdesc->ip = (StgWord64)code;
1203 fdesc->gp = wdesc->gp;
1204
1205 code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode);
1206 code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode);
1207 code[2] = 0x029015d818984001;
1208 code[3] = 0x8401200500420094;
1209 code[4] = 0x886011d8189c0001;
1210 code[5] = 0x84011004c00380c0;
1211 code[6] = 0x0250210046013800;
1212 code[7] = 0x8401000480420084;
1213 code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
1214 code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
1215 code[10] = 0x0200210020010811;
1216 code[11] = 0x1080006800006200;
1217 code[12] = 0x0000210018406000;
1218 code[13] = 0x00aa021000038005;
1219 code[14] = 0x000000010000001d;
1220 code[15] = 0x0084000880000200;
1221
1222 /* save stable pointers in convenient form */
1223 code[16] = (StgWord64)hptr;
1224 code[17] = (StgWord64)stable;
1225 }
1226 #else
1227 barf("adjustor creation not supported on this platform");
1228 #endif
1229 break;
1230
1231 default:
1232 ASSERT(0);
1233 break;
1234 }
1235
1236 /* Have fun! */
1237 return code;
1238 }
1239
1240
1241 void
1242 freeHaskellFunctionPtr(void* ptr)
1243 {
1244 #if defined(i386_HOST_ARCH)
1245 if ( *(unsigned char*)ptr != 0xe8 &&
1246 *(unsigned char*)ptr != 0x58 ) {
1247 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1248 return;
1249 }
1250 if (*(unsigned char*)ptr == 0xe8) { /* Aha, a ccall adjustor! */
1251 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1252 } else {
1253 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
1254 }
1255 #elif defined(x86_64_HOST_ARCH)
1256 if ( *(StgWord16 *)ptr == 0x894d ) {
1257 freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+
1258 #if defined(mingw32_HOST_OS)
1259 0x28
1260 #else
1261 0x20
1262 #endif
1263 ));
1264 #if !defined(mingw32_HOST_OS)
1265 } else if ( *(StgWord16 *)ptr == 0x5141 ) {
1266 freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x30));
1267 #endif
1268 #if defined(mingw32_HOST_OS)
1269 } else if ( *(StgWord16 *)ptr == 0x8348 ) {
1270 freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x48));
1271 #endif
1272 } else {
1273 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1274 return;
1275 }
1276 #elif defined(sparc_HOST_ARCH)
1277 if ( *(unsigned long*)ptr != 0x9C23A008UL ) {
1278 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1279 return;
1280 }
1281
1282 /* Free the stable pointer first..*/
1283 freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11)));
1284 #elif defined(alpha_HOST_ARCH)
1285 if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
1286 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1287 return;
1288 }
1289
1290 /* Free the stable pointer first..*/
1291 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
1292 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
1293 if ( *(StgWord*)ptr != 0x48000008 ) {
1294 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1295 return;
1296 }
1297 freeStablePtr(((StgStablePtr*)ptr)[1]);
1298 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
1299 if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) {
1300 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1301 return;
1302 }
1303 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1304 #elif defined(ia64_HOST_ARCH)
1305 IA64FunDesc *fdesc = (IA64FunDesc *)ptr;
1306 StgWord64 *code = (StgWord64 *)(fdesc+1);
1307
1308 if (fdesc->ip != (StgWord64)code) {
1309 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1310 return;
1311 }
1312 freeStablePtr((StgStablePtr)code[16]);
1313 freeStablePtr((StgStablePtr)code[17]);
1314 return;
1315 #else
1316 ASSERT(0);
1317 #endif
1318 // Can't write to this memory, it is only executable:
1319 // *((unsigned char*)ptr) = '\0';
1320
1321 freeExec(ptr);
1322 }
1323
1324 #endif // !USE_LIBFFI_FOR_ADJUSTORS