Use snwprintf instead of swprintf in rts/Linker.c.
[ghc.git] / rts / Adjustor.c
1 /* -----------------------------------------------------------------------------
2 * Foreign export adjustor thunks
3 *
4 * Copyright (c) 1998.
5 *
6 * ---------------------------------------------------------------------------*/
7
8 /* A little bit of background...
9
10 An adjustor thunk is a dynamically allocated code snippet that allows
11 Haskell closures to be viewed as C function pointers.
12
13 Stable pointers provide a way for the outside world to get access to,
14 and evaluate, Haskell heap objects, with the RTS providing a small
15 range of ops for doing so. So, assuming we've got a stable pointer in
16 our hand in C, we can jump into the Haskell world and evaluate a callback
17 procedure, say. This works OK in some cases where callbacks are used, but
18 does require the external code to know about stable pointers and how to deal
19 with them. We'd like to hide the Haskell-nature of a callback and have it
20 be invoked just like any other C function pointer.
21
22 Enter adjustor thunks. An adjustor thunk is a little piece of code
23 that's generated on-the-fly (one per Haskell closure being exported)
24 that, when entered using some 'universal' calling convention (e.g., the
25 C calling convention on platform X), pushes an implicit stable pointer
26 (to the Haskell callback) before calling another (static) C function stub
27 which takes care of entering the Haskell code via its stable pointer.
28
29 An adjustor thunk is allocated on the C heap, and is called from within
30 Haskell just before handing out the function pointer to the Haskell (IO)
31 action. User code should never have to invoke it explicitly.
32
33 An adjustor thunk differs from a C function pointer in one respect: when
34 the code is through with it, it has to be freed in order to release Haskell
35 and C resources. Failure to do so will result in memory leaks on both the C and
36 Haskell side.
37 */
38
39 #include "PosixSource.h"
40 #include "Rts.h"
41
42 #include "RtsUtils.h"
43 #include "Stable.h"
44
45 #if defined(USE_LIBFFI_FOR_ADJUSTORS)
46 #include "ffi.h"
47 #include <string.h>
48 #endif
49
50 #if defined(i386_HOST_ARCH)
51 extern void adjustorCode(void);
52 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
53 // from AdjustorAsm.s
54 // not declared as a function so that AIX-style
55 // fundescs can never get in the way.
56 extern void *adjustorCode;
57 #endif
58
59 #if defined(USE_LIBFFI_FOR_ADJUSTORS)
60 /* There are subtle differences between how libffi adjustors work on
61 * different platforms, and the situation is a little complex.
62 *
63 * HOW ADJUSTORS/CLOSURES WORK ON LIBFFI:
64 * libffi's ffi_closure_alloc() function gives you two pointers to a closure,
65 * 1. the writable pointer, and 2. the executable pointer. You write the
66 * closure into the writable pointer (and ffi_prep_closure_loc() will do this
67 * for you) and you execute it at the executable pointer.
68 *
69 * THE PROBLEM:
70 * The RTS deals only with the executable pointer, but when it comes time to
71 * free the closure, libffi wants the writable pointer back that it gave you
72 * when you allocated it.
73 *
74 * On Linux we solve this problem by storing the address of the writable
75 * mapping into itself, then returning both writable and executable pointers
76 * plus 1 machine word for preparing the closure for use by the RTS (see the
77 * Linux version of allocateExec() in rts/sm/Storage.c). When we want to
78 * recover the writable address, we subtract 1 word from the executable
79 * address and fetch. This works because Linux kernel magic gives us two
80 * pointers with different addresses that refer to the same memory. Whatever
81 * you write into the writeable address can be read back at the executable
82 * address. This method is very efficient.
83 *
84 * On iOS this breaks for two reasons: 1. the two pointers do not refer to
85 * the same memory (so we can't retrieve anything stored into the writable
86 * pointer if we only have the exec pointer), and 2. libffi's
87 * ffi_closure_alloc() assumes the pointer it has returned you is a
88 * ffi_closure structure and treats it as such: It uses that memory to
89 * communicate with ffi_prep_closure_loc(). On Linux by contrast
90 * ffi_closure_alloc() is viewed simply as a memory allocation, and only
91 * ffi_prep_closure_loc() deals in ffi_closure structures. Each of these
92 * differences is enough make the efficient way used on Linux not work on iOS.
93 * Instead on iOS we use hash tables to recover the writable address from the
94 * executable one. This method is conservative and would almost certainly work
95 * on any platform, but on Linux it makes sense to use the faster method.
96 */
97 void
98 freeHaskellFunctionPtr(void* ptr)
99 {
100 ffi_closure *cl;
101
102 #if defined(ios_HOST_OS)
103 cl = execToWritable(ptr);
104 #else
105 cl = (ffi_closure*)ptr;
106 #endif
107 freeStablePtr(cl->user_data);
108 stgFree(cl->cif->arg_types);
109 stgFree(cl->cif);
110 freeExec(ptr);
111 }
112
113 static ffi_type * char_to_ffi_type(char c)
114 {
115 switch (c) {
116 case 'v': return &ffi_type_void;
117 case 'f': return &ffi_type_float;
118 case 'd': return &ffi_type_double;
119 case 'L': return &ffi_type_sint64;
120 case 'l': return &ffi_type_uint64;
121 case 'W': return &ffi_type_sint32;
122 case 'w': return &ffi_type_uint32;
123 case 'S': return &ffi_type_sint16;
124 case 's': return &ffi_type_uint16;
125 case 'B': return &ffi_type_sint8;
126 case 'b': return &ffi_type_uint8;
127 case 'p': return &ffi_type_pointer;
128 default: barf("char_to_ffi_type: unknown type '%c'", c);
129 }
130 }
131
132 void*
133 createAdjustor (int cconv,
134 StgStablePtr hptr,
135 StgFunPtr wptr,
136 char *typeString)
137 {
138 ffi_cif *cif;
139 ffi_type **arg_types;
140 nat n_args, i;
141 ffi_type *result_type;
142 ffi_closure *cl;
143 int r, abi;
144 void *code;
145
146 n_args = strlen(typeString) - 1;
147 cif = stgMallocBytes(sizeof(ffi_cif), "createAdjustor");
148 arg_types = stgMallocBytes(n_args * sizeof(ffi_type*), "createAdjustor");
149
150 result_type = char_to_ffi_type(typeString[0]);
151 for (i=0; i < n_args; i++) {
152 arg_types[i] = char_to_ffi_type(typeString[i+1]);
153 }
154 switch (cconv) {
155 #if defined(mingw32_HOST_OS) && defined(i386_HOST_ARCH)
156 case 0: /* stdcall */
157 abi = FFI_STDCALL;
158 break;
159 #endif
160 case 1: /* ccall */
161 abi = FFI_DEFAULT_ABI;
162 break;
163 default:
164 barf("createAdjustor: convention %d not supported on this platform", cconv);
165 }
166
167 r = ffi_prep_cif(cif, abi, n_args, result_type, arg_types);
168 if (r != FFI_OK) barf("ffi_prep_cif failed: %d", r);
169
170 cl = allocateExec(sizeof(ffi_closure), &code);
171 if (cl == NULL) {
172 barf("createAdjustor: failed to allocate memory");
173 }
174
175 r = ffi_prep_closure_loc(cl, cif, (void*)wptr, hptr/*userdata*/, code);
176 if (r != FFI_OK) barf("ffi_prep_closure_loc failed: %d", r);
177
178 return (void*)code;
179 }
180
181 #else // To end of file...
182
183 #if defined(_WIN32)
184 #include <windows.h>
185 #endif
186
187 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
188 #include <string.h>
189 #endif
190
191 #ifdef LEADING_UNDERSCORE
192 #define UNDERSCORE "_"
193 #else
194 #define UNDERSCORE ""
195 #endif
196
197 #if defined(x86_64_HOST_ARCH)
198 /*
199 Now here's something obscure for you:
200
201 When generating an adjustor thunk that uses the C calling
202 convention, we have to make sure that the thunk kicks off
203 the process of jumping into Haskell with a tail jump. Why?
204 Because as a result of jumping in into Haskell we may end
205 up freeing the very adjustor thunk we came from using
206 freeHaskellFunctionPtr(). Hence, we better not return to
207 the adjustor code on our way out, since it could by then
208 point to junk.
209
210 The fix is readily at hand, just include the opcodes
211 for the C stack fixup code that we need to perform when
212 returning in some static piece of memory and arrange
213 to return to it before tail jumping from the adjustor thunk.
214 */
215 static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
216 {
217 __asm__ (
218 ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
219 UNDERSCORE "obscure_ccall_ret_code:\n\t"
220 "addq $0x8, %rsp\n\t"
221 #if defined(mingw32_HOST_OS)
222 /* On Win64, we had to put the original return address after the
223 arg 1-4 spill slots, ro now we have to move it back */
224 "movq 0x20(%rsp), %rcx\n"
225 "movq %rcx, (%rsp)\n"
226 #endif
227 "ret"
228 );
229 }
230 extern void obscure_ccall_ret_code(void);
231 #endif
232
233 #if defined(alpha_HOST_ARCH)
234 /* To get the definition of PAL_imb: */
235 # if defined(linux_HOST_OS)
236 # include <asm/pal.h>
237 # else
238 # include <machine/pal.h>
239 # endif
240 #endif
241
242 #if defined(ia64_HOST_ARCH)
243
244 /* Layout of a function descriptor */
245 typedef struct _IA64FunDesc {
246 StgWord64 ip;
247 StgWord64 gp;
248 } IA64FunDesc;
249
250 static void *
251 stgAllocStable(size_t size_in_bytes, StgStablePtr *stable)
252 {
253 StgArrWords* arr;
254 nat data_size_in_words, total_size_in_words;
255
256 /* round up to a whole number of words */
257 data_size_in_words = ROUNDUP_BYTES_TO_WDS(size_in_bytes);
258 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
259
260 /* allocate and fill it in */
261 arr = (StgArrWords *)allocate(total_size_in_words);
262 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, size_in_bytes);
263
264 /* obtain a stable ptr */
265 *stable = getStablePtr((StgPtr)arr);
266
267 /* and return a ptr to the goods inside the array */
268 return(&(arr->payload));
269 }
270 #endif
271
272 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
273 __asm__("obscure_ccall_ret_code:\n\t"
274 "lwz 1,0(1)\n\t"
275 "lwz 0,4(1)\n\t"
276 "mtlr 0\n\t"
277 "blr");
278 extern void obscure_ccall_ret_code(void);
279 #endif
280
281 #if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
282 #if !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS))
283
284 /* !!! !!! WARNING: !!! !!!
285 * This structure is accessed from AdjustorAsm.s
286 * Any changes here have to be mirrored in the offsets there.
287 */
288
289 typedef struct AdjustorStub {
290 #if defined(powerpc_HOST_ARCH) && defined(darwin_HOST_OS)
291 unsigned lis;
292 unsigned ori;
293 unsigned lwz;
294 unsigned mtctr;
295 unsigned bctr;
296 StgFunPtr code;
297 #elif defined(powerpc64_HOST_ARCH) && defined(darwin_HOST_OS)
298 /* powerpc64-darwin: just guessing that it won't use fundescs. */
299 unsigned lis;
300 unsigned ori;
301 unsigned rldimi;
302 unsigned oris;
303 unsigned ori2;
304 unsigned lwz;
305 unsigned mtctr;
306 unsigned bctr;
307 StgFunPtr code;
308 #else
309 /* fundesc-based ABIs */
310 #define FUNDESCS
311 StgFunPtr code;
312 struct AdjustorStub
313 *toc;
314 void *env;
315 #endif
316 StgStablePtr hptr;
317 StgFunPtr wptr;
318 StgInt negative_framesize;
319 StgInt extrawords_plus_one;
320 } AdjustorStub;
321
322 #endif
323 #endif
324
325 #if defined(i386_HOST_ARCH)
326
327 /* !!! !!! WARNING: !!! !!!
328 * This structure is accessed from AdjustorAsm.s
329 * Any changes here have to be mirrored in the offsets there.
330 */
331
332 typedef struct AdjustorStub {
333 unsigned char call[8];
334 StgStablePtr hptr;
335 StgFunPtr wptr;
336 StgInt frame_size;
337 StgInt argument_size;
338 } AdjustorStub;
339 #endif
340
341 #if defined(i386_HOST_ARCH) || defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
342 static int totalArgumentSize(char *typeString)
343 {
344 int sz = 0;
345 while(*typeString)
346 {
347 char t = *typeString++;
348
349 switch(t)
350 {
351 // on 32-bit platforms, Double and Int64 occupy two words.
352 case 'd':
353 case 'l':
354 case 'L':
355 if(sizeof(void*) == 4)
356 {
357 sz += 2;
358 break;
359 }
360 // everything else is one word.
361 default:
362 sz += 1;
363 }
364 }
365 return sz;
366 }
367 #endif
368
369 void*
370 createAdjustor(int cconv, StgStablePtr hptr,
371 StgFunPtr wptr,
372 char *typeString
373 #if !defined(powerpc_HOST_ARCH) && !defined(powerpc64_HOST_ARCH) && !defined(x86_64_HOST_ARCH)
374 STG_UNUSED
375 #endif
376 )
377 {
378 void *adjustor = NULL;
379 void *code = NULL;
380
381 switch (cconv)
382 {
383 case 0: /* _stdcall */
384 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
385 /* Magic constant computed by inspecting the code length of
386 the following assembly language snippet
387 (offset and machine code prefixed):
388
389 <0>: 58 popl %eax # temp. remove ret addr..
390 <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
391 # hold a StgStablePtr
392 <6>: 50 pushl %eax # put back ret. addr
393 <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
394 <c>: ff e0 jmp %eax # and jump to it.
395 # the callee cleans up the stack
396 */
397 adjustor = allocateExec(14,&code);
398 {
399 unsigned char *const adj_code = (unsigned char *)adjustor;
400 adj_code[0x00] = (unsigned char)0x58; /* popl %eax */
401
402 adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
403 *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
404
405 adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
406
407 adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */
408 *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
409
410 adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
411 adj_code[0x0d] = (unsigned char)0xe0;
412 }
413 #endif
414 break;
415
416 case 1: /* _ccall */
417 #if defined(i386_HOST_ARCH)
418 {
419 /*
420 Most of the trickiness here is due to the need to keep the
421 stack pointer 16-byte aligned (see #5250). That means we
422 can't just push another argument on the stack and call the
423 wrapper, we may have to shuffle the whole argument block.
424
425 We offload most of the work to AdjustorAsm.S.
426 */
427 AdjustorStub *adjustorStub = allocateExec(sizeof(AdjustorStub),&code);
428 adjustor = adjustorStub;
429
430 int sz = totalArgumentSize(typeString);
431
432 adjustorStub->call[0] = 0xe8;
433 *(long*)&adjustorStub->call[1] = ((char*)&adjustorCode) - ((char*)code + 5);
434 adjustorStub->hptr = hptr;
435 adjustorStub->wptr = wptr;
436
437 // The adjustor puts the following things on the stack:
438 // 1.) %ebp link
439 // 2.) padding and (a copy of) the arguments
440 // 3.) a dummy argument
441 // 4.) hptr
442 // 5.) return address (for returning to the adjustor)
443 // All these have to add up to a multiple of 16.
444
445 // first, include everything in frame_size
446 adjustorStub->frame_size = sz * 4 + 16;
447 // align to 16 bytes
448 adjustorStub->frame_size = (adjustorStub->frame_size + 15) & ~15;
449 // only count 2.) and 3.) as part of frame_size
450 adjustorStub->frame_size -= 12;
451 adjustorStub->argument_size = sz;
452 }
453
454 #elif defined(x86_64_HOST_ARCH)
455
456 # if defined(mingw32_HOST_OS)
457 /*
458 stack at call:
459 argn
460 ...
461 arg5
462 return address
463 %rcx,%rdx,%r8,%r9 = arg1..arg4
464
465 if there are <4 integer args, then we can just push the
466 StablePtr into %rcx and shuffle the other args up.
467
468 If there are >=4 integer args, then we have to flush one arg
469 to the stack, and arrange to adjust the stack ptr on return.
470 The stack will be rearranged to this:
471
472 argn
473 ...
474 arg5
475 return address *** <-- dummy arg in stub fn.
476 arg4
477 obscure_ccall_ret_code
478
479 This unfortunately means that the type of the stub function
480 must have a dummy argument for the original return address
481 pointer inserted just after the 4th integer argument.
482
483 Code for the simple case:
484
485 0: 4d 89 c1 mov %r8,%r9
486 3: 49 89 d0 mov %rdx,%r8
487 6: 48 89 ca mov %rcx,%rdx
488 9: f2 0f 10 da movsd %xmm2,%xmm3
489 d: f2 0f 10 d1 movsd %xmm1,%xmm2
490 11: f2 0f 10 c8 movsd %xmm0,%xmm1
491 15: 48 8b 0d 0c 00 00 00 mov 0xc(%rip),%rcx # 28 <.text+0x28>
492 1c: ff 25 0e 00 00 00 jmpq *0xe(%rip) # 30 <.text+0x30>
493 22: 90 nop
494 [...]
495
496
497 And the version for >=4 integer arguments:
498
499 [we want to push the 4th argument (either %r9 or %xmm3, depending on
500 whether it is a floating arg or not) and the return address onto the
501 stack. However, slots 1-4 are reserved for code we call to spill its
502 args 1-4 into, so we can't just push them onto the bottom of the stack.
503 So first put the 4th argument onto the stack, above what will be the
504 spill slots.]
505 0: 48 83 ec 08 sub $0x8,%rsp
506 [if non-floating arg, then do this:]
507 4: 90 nop
508 5: 4c 89 4c 24 20 mov %r9,0x20(%rsp)
509 [else if floating arg then do this:]
510 4: f2 0f 11 5c 24 20 movsd %xmm3,0x20(%rsp)
511 [end if]
512 [Now push the new return address onto the stack]
513 a: ff 35 30 00 00 00 pushq 0x30(%rip) # 40 <.text+0x40>
514 [But the old return address has been moved up into a spill slot, so
515 we need to move it above them]
516 10: 4c 8b 4c 24 10 mov 0x10(%rsp),%r9
517 15: 4c 89 4c 24 30 mov %r9,0x30(%rsp)
518 [Now we do the normal register shuffle-up etc]
519 1a: 4d 89 c1 mov %r8,%r9
520 1d: 49 89 d0 mov %rdx,%r8
521 20: 48 89 ca mov %rcx,%rdx
522 23: f2 0f 10 da movsd %xmm2,%xmm3
523 27: f2 0f 10 d1 movsd %xmm1,%xmm2
524 2b: f2 0f 10 c8 movsd %xmm0,%xmm1
525 2f: 48 8b 0d 12 00 00 00 mov 0x12(%rip),%rcx # 48 <.text+0x48>
526 36: ff 25 14 00 00 00 jmpq *0x14(%rip) # 50 <.text+0x50>
527 3c: 90 nop
528 3d: 90 nop
529 3e: 90 nop
530 3f: 90 nop
531 [...]
532
533 */
534 {
535 StgWord8 *adj_code;
536
537 // determine whether we have 4 or more integer arguments,
538 // and therefore need to flush one to the stack.
539 if ((typeString[0] == '\0') ||
540 (typeString[1] == '\0') ||
541 (typeString[2] == '\0') ||
542 (typeString[3] == '\0')) {
543
544 adjustor = allocateExec(0x38,&code);
545 adj_code = (StgWord8*)adjustor;
546
547 *(StgInt32 *)adj_code = 0x49c1894d;
548 *(StgInt32 *)(adj_code+0x4) = 0x8948d089;
549 *(StgInt32 *)(adj_code+0x8) = 0x100ff2ca;
550 *(StgInt32 *)(adj_code+0xc) = 0x100ff2da;
551 *(StgInt32 *)(adj_code+0x10) = 0x100ff2d1;
552 *(StgInt32 *)(adj_code+0x14) = 0x0d8b48c8;
553 *(StgInt32 *)(adj_code+0x18) = 0x0000000c;
554
555 *(StgInt32 *)(adj_code+0x1c) = 0x000e25ff;
556 *(StgInt32 *)(adj_code+0x20) = 0x00000000;
557 *(StgInt64 *)(adj_code+0x28) = (StgInt64)hptr;
558 *(StgInt64 *)(adj_code+0x30) = (StgInt64)wptr;
559 }
560 else
561 {
562 int fourthFloating;
563
564 fourthFloating = (typeString[3] == 'f' || typeString[3] == 'd');
565 adjustor = allocateExec(0x58,&code);
566 adj_code = (StgWord8*)adjustor;
567 *(StgInt32 *)adj_code = 0x08ec8348;
568 *(StgInt32 *)(adj_code+0x4) = fourthFloating ? 0x5c110ff2
569 : 0x4c894c90;
570 *(StgInt32 *)(adj_code+0x8) = 0x35ff2024;
571 *(StgInt32 *)(adj_code+0xc) = 0x00000030;
572 *(StgInt32 *)(adj_code+0x10) = 0x244c8b4c;
573 *(StgInt32 *)(adj_code+0x14) = 0x4c894c10;
574 *(StgInt32 *)(adj_code+0x18) = 0x894d3024;
575 *(StgInt32 *)(adj_code+0x1c) = 0xd08949c1;
576 *(StgInt32 *)(adj_code+0x20) = 0xf2ca8948;
577 *(StgInt32 *)(adj_code+0x24) = 0xf2da100f;
578 *(StgInt32 *)(adj_code+0x28) = 0xf2d1100f;
579 *(StgInt32 *)(adj_code+0x2c) = 0x48c8100f;
580 *(StgInt32 *)(adj_code+0x30) = 0x00120d8b;
581 *(StgInt32 *)(adj_code+0x34) = 0x25ff0000;
582 *(StgInt32 *)(adj_code+0x38) = 0x00000014;
583 *(StgInt32 *)(adj_code+0x3c) = 0x90909090;
584 *(StgInt64 *)(adj_code+0x40) = (StgInt64)obscure_ccall_ret_code;
585 *(StgInt64 *)(adj_code+0x48) = (StgInt64)hptr;
586 *(StgInt64 *)(adj_code+0x50) = (StgInt64)wptr;
587 }
588 }
589
590 # else
591 /*
592 stack at call:
593 argn
594 ...
595 arg7
596 return address
597 %rdi,%rsi,%rdx,%rcx,%r8,%r9 = arg1..arg6
598
599 if there are <6 integer args, then we can just push the
600 StablePtr into %edi and shuffle the other args up.
601
602 If there are >=6 integer args, then we have to flush one arg
603 to the stack, and arrange to adjust the stack ptr on return.
604 The stack will be rearranged to this:
605
606 argn
607 ...
608 arg7
609 return address *** <-- dummy arg in stub fn.
610 arg6
611 obscure_ccall_ret_code
612
613 This unfortunately means that the type of the stub function
614 must have a dummy argument for the original return address
615 pointer inserted just after the 6th integer argument.
616
617 Code for the simple case:
618
619 0: 4d 89 c1 mov %r8,%r9
620 3: 49 89 c8 mov %rcx,%r8
621 6: 48 89 d1 mov %rdx,%rcx
622 9: 48 89 f2 mov %rsi,%rdx
623 c: 48 89 fe mov %rdi,%rsi
624 f: 48 8b 3d 0a 00 00 00 mov 10(%rip),%rdi
625 16: ff 25 0c 00 00 00 jmpq *12(%rip)
626 ...
627 20: .quad 0 # aligned on 8-byte boundary
628 28: .quad 0 # aligned on 8-byte boundary
629
630
631 And the version for >=6 integer arguments:
632
633 0: 41 51 push %r9
634 2: ff 35 20 00 00 00 pushq 32(%rip) # 28 <ccall_adjustor+0x28>
635 8: 4d 89 c1 mov %r8,%r9
636 b: 49 89 c8 mov %rcx,%r8
637 e: 48 89 d1 mov %rdx,%rcx
638 11: 48 89 f2 mov %rsi,%rdx
639 14: 48 89 fe mov %rdi,%rsi
640 17: 48 8b 3d 12 00 00 00 mov 18(%rip),%rdi # 30 <ccall_adjustor+0x30>
641 1e: ff 25 14 00 00 00 jmpq *20(%rip) # 38 <ccall_adjustor+0x38>
642 ...
643 28: .quad 0 # aligned on 8-byte boundary
644 30: .quad 0 # aligned on 8-byte boundary
645 38: .quad 0 # aligned on 8-byte boundary
646 */
647
648 {
649 int i = 0;
650 char *c;
651 StgWord8 *adj_code;
652
653 // determine whether we have 6 or more integer arguments,
654 // and therefore need to flush one to the stack.
655 for (c = typeString; *c != '\0'; c++) {
656 if (*c != 'f' && *c != 'd') i++;
657 if (i == 6) break;
658 }
659
660 if (i < 6) {
661 adjustor = allocateExec(0x30,&code);
662 adj_code = (StgWord8*)adjustor;
663
664 *(StgInt32 *)adj_code = 0x49c1894d;
665 *(StgInt32 *)(adj_code+0x4) = 0x8948c889;
666 *(StgInt32 *)(adj_code+0x8) = 0xf28948d1;
667 *(StgInt32 *)(adj_code+0xc) = 0x48fe8948;
668 *(StgInt32 *)(adj_code+0x10) = 0x000a3d8b;
669 *(StgInt32 *)(adj_code+0x14) = 0x25ff0000;
670 *(StgInt32 *)(adj_code+0x18) = 0x0000000c;
671 *(StgInt64 *)(adj_code+0x20) = (StgInt64)hptr;
672 *(StgInt64 *)(adj_code+0x28) = (StgInt64)wptr;
673 }
674 else
675 {
676 adjustor = allocateExec(0x40,&code);
677 adj_code = (StgWord8*)adjustor;
678
679 *(StgInt32 *)adj_code = 0x35ff5141;
680 *(StgInt32 *)(adj_code+0x4) = 0x00000020;
681 *(StgInt32 *)(adj_code+0x8) = 0x49c1894d;
682 *(StgInt32 *)(adj_code+0xc) = 0x8948c889;
683 *(StgInt32 *)(adj_code+0x10) = 0xf28948d1;
684 *(StgInt32 *)(adj_code+0x14) = 0x48fe8948;
685 *(StgInt32 *)(adj_code+0x18) = 0x00123d8b;
686 *(StgInt32 *)(adj_code+0x1c) = 0x25ff0000;
687 *(StgInt32 *)(adj_code+0x20) = 0x00000014;
688
689 *(StgInt64 *)(adj_code+0x28) = (StgInt64)obscure_ccall_ret_code;
690 *(StgInt64 *)(adj_code+0x30) = (StgInt64)hptr;
691 *(StgInt64 *)(adj_code+0x38) = (StgInt64)wptr;
692 }
693 }
694 # endif
695
696
697 #elif defined(sparc_HOST_ARCH)
698 /* Magic constant computed by inspecting the code length of the following
699 assembly language snippet (offset and machine code prefixed):
700
701 <00>: 9C23A008 sub %sp, 8, %sp ! make room for %o4/%o5 in caller's frame
702 <04>: DA23A060 st %o5, [%sp + 96] ! shift registers by 2 positions
703 <08>: D823A05C st %o4, [%sp + 92]
704 <0C>: 9A10000B mov %o3, %o5
705 <10>: 9810000A mov %o2, %o4
706 <14>: 96100009 mov %o1, %o3
707 <18>: 94100008 mov %o0, %o2
708 <1C>: 13000000 sethi %hi(wptr), %o1 ! load up wptr (1 of 2)
709 <20>: 11000000 sethi %hi(hptr), %o0 ! load up hptr (1 of 2)
710 <24>: 81C26000 jmp %o1 + %lo(wptr) ! jump to wptr (load 2 of 2)
711 <28>: 90122000 or %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot)
712 <2C> 00000000 ! place for getting hptr back easily
713
714 ccall'ing on SPARC is easy, because we are quite lucky to push a
715 multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the
716 existing arguments (note that %sp must stay double-word aligned at
717 all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf).
718 To do this, we extend the *caller's* stack frame by 2 words and shift
719 the output registers used for argument passing (%o0 - %o5, we are a *leaf*
720 procedure because of the tail-jump) by 2 positions. This makes room in
721 %o0 and %o1 for the additinal arguments, namely hptr and a dummy (used
722 for destination addr of jump on SPARC, return address on x86, ...). This
723 shouldn't cause any problems for a C-like caller: alloca is implemented
724 similarly, and local variables should be accessed via %fp, not %sp. In a
725 nutshell: This should work! (Famous last words! :-)
726 */
727 adjustor = allocateExec(4*(11+1),&code);
728 {
729 unsigned long *const adj_code = (unsigned long *)adjustor;
730
731 adj_code[ 0] = 0x9C23A008UL; /* sub %sp, 8, %sp */
732 adj_code[ 1] = 0xDA23A060UL; /* st %o5, [%sp + 96] */
733 adj_code[ 2] = 0xD823A05CUL; /* st %o4, [%sp + 92] */
734 adj_code[ 3] = 0x9A10000BUL; /* mov %o3, %o5 */
735 adj_code[ 4] = 0x9810000AUL; /* mov %o2, %o4 */
736 adj_code[ 5] = 0x96100009UL; /* mov %o1, %o3 */
737 adj_code[ 6] = 0x94100008UL; /* mov %o0, %o2 */
738 adj_code[ 7] = 0x13000000UL; /* sethi %hi(wptr), %o1 */
739 adj_code[ 7] |= ((unsigned long)wptr) >> 10;
740 adj_code[ 8] = 0x11000000UL; /* sethi %hi(hptr), %o0 */
741 adj_code[ 8] |= ((unsigned long)hptr) >> 10;
742 adj_code[ 9] = 0x81C26000UL; /* jmp %o1 + %lo(wptr) */
743 adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL;
744 adj_code[10] = 0x90122000UL; /* or %o0, %lo(hptr), %o0 */
745 adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL;
746
747 adj_code[11] = (unsigned long)hptr;
748
749 /* flush cache */
750 asm("flush %0" : : "r" (adj_code ));
751 asm("flush %0" : : "r" (adj_code + 2));
752 asm("flush %0" : : "r" (adj_code + 4));
753 asm("flush %0" : : "r" (adj_code + 6));
754 asm("flush %0" : : "r" (adj_code + 10));
755
756 /* max. 5 instructions latency, and we need at >= 1 for returning */
757 asm("nop");
758 asm("nop");
759 asm("nop");
760 asm("nop");
761 }
762 #elif defined(alpha_HOST_ARCH)
763 /* Magic constant computed by inspecting the code length of
764 the following assembly language snippet
765 (offset and machine code prefixed; note that the machine code
766 shown is longwords stored in little-endian order):
767
768 <00>: 46520414 mov a2, a4
769 <04>: 46100412 mov a0, a2
770 <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
771 <0c>: 46730415 mov a3, a5
772 <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
773 <14>: 46310413 mov a1, a3
774 <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
775 <1c>: 00000000 # padding for alignment
776 <20>: [8 bytes for hptr quadword]
777 <28>: [8 bytes for wptr quadword]
778
779 The "computed" jump at <08> above is really a jump to a fixed
780 location. Accordingly, we place an always-correct hint in the
781 jump instruction, namely the address offset from <0c> to wptr,
782 divided by 4, taking the lowest 14 bits.
783
784 We only support passing 4 or fewer argument words, for the same
785 reason described under sparc_HOST_ARCH above by JRS, 21 Aug 01.
786 On the Alpha the first 6 integer arguments are in a0 through a5,
787 and the rest on the stack. Hence we want to shuffle the original
788 caller's arguments by two.
789
790 On the Alpha the calling convention is so complex and dependent
791 on the callee's signature -- for example, the stack pointer has
792 to be a multiple of 16 -- that it seems impossible to me [ccshan]
793 to handle the general case correctly without changing how the
794 adjustor is called from C. For now, our solution of shuffling
795 registers only and ignoring the stack only works if the original
796 caller passed 4 or fewer argument words.
797
798 TODO: Depending on how much allocation overhead stgMallocBytes uses for
799 header information (more precisely, if the overhead is no more than
800 4 bytes), we should move the first three instructions above down by
801 4 bytes (getting rid of the nop), hence saving memory. [ccshan]
802 */
803 ASSERT(((StgWord64)wptr & 3) == 0);
804 adjustor = allocateExec(48,&code);
805 {
806 StgWord64 *const code = (StgWord64 *)adjustor;
807
808 code[0] = 0x4610041246520414L;
809 code[1] = 0x46730415a61b0020L;
810 code[2] = 0x46310413a77b0028L;
811 code[3] = 0x000000006bfb0000L
812 | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
813
814 code[4] = (StgWord64)hptr;
815 code[5] = (StgWord64)wptr;
816
817 /* Ensure that instruction cache is consistent with our new code */
818 __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
819 }
820 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
821
822 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
823 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
824 {
825 /* The PowerPC Linux (32-bit) calling convention is annoyingly complex.
826 We need to calculate all the details of the stack frame layout,
827 taking into account the types of all the arguments, and then
828 generate code on the fly. */
829
830 int src_gpr = 3, dst_gpr = 5;
831 int fpr = 3;
832 int src_offset = 0, dst_offset = 0;
833 int n = strlen(typeString),i;
834 int src_locs[n], dst_locs[n];
835 int frameSize;
836 unsigned *code;
837
838 /* Step 1:
839 Calculate where the arguments should go.
840 src_locs[] will contain the locations of the arguments in the
841 original stack frame passed to the adjustor.
842 dst_locs[] will contain the locations of the arguments after the
843 adjustor runs, on entry to the wrapper proc pointed to by wptr.
844
845 This algorithm is based on the one described on page 3-19 of the
846 System V ABI PowerPC Processor Supplement.
847 */
848 for(i=0;typeString[i];i++)
849 {
850 char t = typeString[i];
851 if((t == 'f' || t == 'd') && fpr <= 8)
852 src_locs[i] = dst_locs[i] = -32-(fpr++);
853 else
854 {
855 if((t == 'l' || t == 'L') && src_gpr <= 9)
856 {
857 if((src_gpr & 1) == 0)
858 src_gpr++;
859 src_locs[i] = -src_gpr;
860 src_gpr += 2;
861 }
862 else if((t == 'w' || t == 'W') && src_gpr <= 10)
863 {
864 src_locs[i] = -(src_gpr++);
865 }
866 else
867 {
868 if(t == 'l' || t == 'L' || t == 'd')
869 {
870 if(src_offset % 8)
871 src_offset += 4;
872 }
873 src_locs[i] = src_offset;
874 src_offset += (t == 'l' || t == 'L' || t == 'd') ? 8 : 4;
875 }
876
877 if((t == 'l' || t == 'L') && dst_gpr <= 9)
878 {
879 if((dst_gpr & 1) == 0)
880 dst_gpr++;
881 dst_locs[i] = -dst_gpr;
882 dst_gpr += 2;
883 }
884 else if((t == 'w' || t == 'W') && dst_gpr <= 10)
885 {
886 dst_locs[i] = -(dst_gpr++);
887 }
888 else
889 {
890 if(t == 'l' || t == 'L' || t == 'd')
891 {
892 if(dst_offset % 8)
893 dst_offset += 4;
894 }
895 dst_locs[i] = dst_offset;
896 dst_offset += (t == 'l' || t == 'L' || t == 'd') ? 8 : 4;
897 }
898 }
899 }
900
901 frameSize = dst_offset + 8;
902 frameSize = (frameSize+15) & ~0xF;
903
904 /* Step 2:
905 Build the adjustor.
906 */
907 // allocate space for at most 4 insns per parameter
908 // plus 14 more instructions.
909 adjustor = allocateExec(4 * (4*n + 14),&code);
910 code = (unsigned*)adjustor;
911
912 *code++ = 0x48000008; // b *+8
913 // * Put the hptr in a place where freeHaskellFunctionPtr
914 // can get at it.
915 *code++ = (unsigned) hptr;
916
917 // * save the link register
918 *code++ = 0x7c0802a6; // mflr r0;
919 *code++ = 0x90010004; // stw r0, 4(r1);
920 // * and build a new stack frame
921 *code++ = OP_LO(0x9421, -frameSize); // stwu r1, -frameSize(r1)
922
923 // * now generate instructions to copy arguments
924 // from the old stack frame into the new stack frame.
925 for(i=n-1;i>=0;i--)
926 {
927 if(src_locs[i] < -32)
928 ASSERT(dst_locs[i] == src_locs[i]);
929 else if(src_locs[i] < 0)
930 {
931 // source in GPR.
932 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
933 if(dst_locs[i] < 0)
934 {
935 ASSERT(dst_locs[i] > -32);
936 // dst is in GPR, too.
937
938 if(typeString[i] == 'l' || typeString[i] == 'L')
939 {
940 // mr dst+1, src+1
941 *code++ = 0x7c000378
942 | ((-dst_locs[i]+1) << 16)
943 | ((-src_locs[i]+1) << 11)
944 | ((-src_locs[i]+1) << 21);
945 }
946 // mr dst, src
947 *code++ = 0x7c000378
948 | ((-dst_locs[i]) << 16)
949 | ((-src_locs[i]) << 11)
950 | ((-src_locs[i]) << 21);
951 }
952 else
953 {
954 if(typeString[i] == 'l' || typeString[i] == 'L')
955 {
956 // stw src+1, dst_offset+4(r1)
957 *code++ = 0x90010000
958 | ((-src_locs[i]+1) << 21)
959 | (dst_locs[i] + 4);
960 }
961
962 // stw src, dst_offset(r1)
963 *code++ = 0x90010000
964 | ((-src_locs[i]) << 21)
965 | (dst_locs[i] + 8);
966 }
967 }
968 else
969 {
970 ASSERT(dst_locs[i] >= 0);
971 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
972
973 if(typeString[i] == 'l' || typeString[i] == 'L')
974 {
975 // lwz r0, src_offset(r1)
976 *code++ = 0x80010000
977 | (src_locs[i] + frameSize + 8 + 4);
978 // stw r0, dst_offset(r1)
979 *code++ = 0x90010000
980 | (dst_locs[i] + 8 + 4);
981 }
982 // lwz r0, src_offset(r1)
983 *code++ = 0x80010000
984 | (src_locs[i] + frameSize + 8);
985 // stw r0, dst_offset(r1)
986 *code++ = 0x90010000
987 | (dst_locs[i] + 8);
988 }
989 }
990
991 // * hptr will be the new first argument.
992 // lis r3, hi(hptr)
993 *code++ = OP_HI(0x3c60, hptr);
994 // ori r3,r3,lo(hptr)
995 *code++ = OP_LO(0x6063, hptr);
996
997 // * we need to return to a piece of code
998 // which will tear down the stack frame.
999 // lis r11,hi(obscure_ccall_ret_code)
1000 *code++ = OP_HI(0x3d60, obscure_ccall_ret_code);
1001 // ori r11,r11,lo(obscure_ccall_ret_code)
1002 *code++ = OP_LO(0x616b, obscure_ccall_ret_code);
1003 // mtlr r11
1004 *code++ = 0x7d6803a6;
1005
1006 // * jump to wptr
1007 // lis r11,hi(wptr)
1008 *code++ = OP_HI(0x3d60, wptr);
1009 // ori r11,r11,lo(wptr)
1010 *code++ = OP_LO(0x616b, wptr);
1011 // mtctr r11
1012 *code++ = 0x7d6903a6;
1013 // bctr
1014 *code++ = 0x4e800420;
1015
1016 // Flush the Instruction cache:
1017 {
1018 unsigned *p = adjustor;
1019 while(p < code)
1020 {
1021 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
1022 : : "r" (p));
1023 p++;
1024 }
1025 __asm__ volatile ("sync\n\tisync");
1026 }
1027 }
1028
1029 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
1030
1031 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
1032 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
1033 {
1034 /* The following code applies to all PowerPC and PowerPC64 platforms
1035 whose stack layout is based on the AIX ABI.
1036
1037 Besides (obviously) AIX, this includes
1038 Mac OS 9 and BeOS/PPC (may they rest in peace),
1039 which use the 32-bit AIX ABI
1040 powerpc64-linux,
1041 which uses the 64-bit AIX ABI
1042 and Darwin (Mac OS X),
1043 which uses the same stack layout as AIX,
1044 but no function descriptors.
1045
1046 The actual stack-frame shuffling is implemented out-of-line
1047 in the function adjustorCode, in AdjustorAsm.S.
1048 Here, we set up an AdjustorStub structure, which
1049 is a function descriptor (on platforms that have function
1050 descriptors) or a short piece of stub code (on Darwin) to call
1051 adjustorCode with a pointer to the AdjustorStub struct loaded
1052 into register r2.
1053
1054 One nice thing about this is that there is _no_ code generated at
1055 runtime on the platforms that have function descriptors.
1056 */
1057 AdjustorStub *adjustorStub;
1058 int sz = 0, extra_sz, total_sz;
1059
1060 #ifdef FUNDESCS
1061 adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor");
1062 #else
1063 adjustorStub = allocateExec(sizeof(AdjustorStub),&code);
1064 #endif
1065 adjustor = adjustorStub;
1066
1067 adjustorStub->code = (void*) &adjustorCode;
1068
1069 #ifdef FUNDESCS
1070 // function descriptors are a cool idea.
1071 // We don't need to generate any code at runtime.
1072 adjustorStub->toc = adjustorStub;
1073 #else
1074
1075 // no function descriptors :-(
1076 // We need to do things "by hand".
1077 #if defined(powerpc_HOST_ARCH)
1078 // lis r2, hi(adjustorStub)
1079 adjustorStub->lis = OP_HI(0x3c40, adjustorStub);
1080 // ori r2, r2, lo(adjustorStub)
1081 adjustorStub->ori = OP_LO(0x6042, adjustorStub);
1082 // lwz r0, code(r2)
1083 adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code)
1084 - (char*)adjustorStub);
1085 // mtctr r0
1086 adjustorStub->mtctr = 0x7c0903a6;
1087 // bctr
1088 adjustorStub->bctr = 0x4e800420;
1089 #else
1090 barf("adjustor creation not supported on this platform");
1091 #endif
1092
1093 // Flush the Instruction cache:
1094 {
1095 int n = sizeof(AdjustorStub)/sizeof(unsigned);
1096 unsigned *p = (unsigned*)adjustor;
1097 while(n--)
1098 {
1099 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
1100 : : "r" (p));
1101 p++;
1102 }
1103 __asm__ volatile ("sync\n\tisync");
1104 }
1105 #endif
1106
1107 // Calculate the size of the stack frame, in words.
1108 sz = totalArgumentSize(typeString);
1109
1110 // The first eight words of the parameter area
1111 // are just "backing store" for the parameters passed in
1112 // the GPRs. extra_sz is the number of words beyond those first
1113 // 8 words.
1114 extra_sz = sz - 8;
1115 if(extra_sz < 0)
1116 extra_sz = 0;
1117
1118 // Calculate the total size of the stack frame.
1119 total_sz = (6 /* linkage area */
1120 + 8 /* minimum parameter area */
1121 + 2 /* two extra arguments */
1122 + extra_sz)*sizeof(StgWord);
1123
1124 // align to 16 bytes.
1125 // AIX only requires 8 bytes, but who cares?
1126 total_sz = (total_sz+15) & ~0xF;
1127
1128 // Fill in the information that adjustorCode in AdjustorAsm.S
1129 // will use to create a new stack frame with the additional args.
1130 adjustorStub->hptr = hptr;
1131 adjustorStub->wptr = wptr;
1132 adjustorStub->negative_framesize = -total_sz;
1133 adjustorStub->extrawords_plus_one = extra_sz + 1;
1134 }
1135
1136 #elif defined(ia64_HOST_ARCH)
1137 /*
1138 Up to 8 inputs are passed in registers. We flush the last two inputs to
1139 the stack, initially into the 16-byte scratch region left by the caller.
1140 We then shuffle the others along by 4 (taking 2 registers for ourselves
1141 to save return address and previous function state - we need to come back
1142 here on the way out to restore the stack, so this is a real function
1143 rather than just a trampoline).
1144
1145 The function descriptor we create contains the gp of the target function
1146 so gp is already loaded correctly.
1147
1148 [MLX] alloc r16=ar.pfs,10,2,0
1149 movl r17=wptr
1150 [MII] st8.spill [r12]=r38,8 // spill in6 (out4)
1151 mov r41=r37 // out7 = in5 (out3)
1152 mov r40=r36;; // out6 = in4 (out2)
1153 [MII] st8.spill [r12]=r39 // spill in7 (out5)
1154 mov.sptk b6=r17,50
1155 mov r38=r34;; // out4 = in2 (out0)
1156 [MII] mov r39=r35 // out5 = in3 (out1)
1157 mov r37=r33 // out3 = in1 (loc1)
1158 mov r36=r32 // out2 = in0 (loc0)
1159 [MLX] adds r12=-24,r12 // update sp
1160 movl r34=hptr;; // out0 = hptr
1161 [MIB] mov r33=r16 // loc1 = ar.pfs
1162 mov r32=b0 // loc0 = retaddr
1163 br.call.sptk.many b0=b6;;
1164
1165 [MII] adds r12=-16,r12
1166 mov b0=r32
1167 mov.i ar.pfs=r33
1168 [MFB] nop.m 0x0
1169 nop.f 0x0
1170 br.ret.sptk.many b0;;
1171 */
1172
1173 /* These macros distribute a long constant into the two words of an MLX bundle */
1174 #define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1))
1175 #define MOVL_LOWORD(val) (BITS(val,22,18) << 46)
1176 #define MOVL_HIWORD(val) ( (BITS(val,0,7) << 36) \
1177 | (BITS(val,7,9) << 50) \
1178 | (BITS(val,16,5) << 45) \
1179 | (BITS(val,21,1) << 44) \
1180 | (BITS(val,40,23)) \
1181 | (BITS(val,63,1) << 59))
1182
1183 {
1184 StgStablePtr stable;
1185 IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
1186 StgWord64 wcode = wdesc->ip;
1187 IA64FunDesc *fdesc;
1188 StgWord64 *code;
1189
1190 /* we allocate on the Haskell heap since malloc'd memory isn't
1191 * executable - argh */
1192 /* Allocated memory is word-aligned (8 bytes) but functions on ia64
1193 * must be aligned to 16 bytes. We allocate an extra 8 bytes of
1194 * wiggle room so that we can put the code on a 16 byte boundary. */
1195 adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8+8, &stable);
1196
1197 fdesc = (IA64FunDesc *)adjustor;
1198 code = (StgWord64 *)(fdesc + 1);
1199 /* add 8 bytes to code if needed to align to a 16-byte boundary */
1200 if ((StgWord64)code & 15) code++;
1201 fdesc->ip = (StgWord64)code;
1202 fdesc->gp = wdesc->gp;
1203
1204 code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode);
1205 code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode);
1206 code[2] = 0x029015d818984001;
1207 code[3] = 0x8401200500420094;
1208 code[4] = 0x886011d8189c0001;
1209 code[5] = 0x84011004c00380c0;
1210 code[6] = 0x0250210046013800;
1211 code[7] = 0x8401000480420084;
1212 code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
1213 code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
1214 code[10] = 0x0200210020010811;
1215 code[11] = 0x1080006800006200;
1216 code[12] = 0x0000210018406000;
1217 code[13] = 0x00aa021000038005;
1218 code[14] = 0x000000010000001d;
1219 code[15] = 0x0084000880000200;
1220
1221 /* save stable pointers in convenient form */
1222 code[16] = (StgWord64)hptr;
1223 code[17] = (StgWord64)stable;
1224 }
1225 #else
1226 barf("adjustor creation not supported on this platform");
1227 #endif
1228 break;
1229
1230 default:
1231 ASSERT(0);
1232 break;
1233 }
1234
1235 /* Have fun! */
1236 return code;
1237 }
1238
1239
1240 void
1241 freeHaskellFunctionPtr(void* ptr)
1242 {
1243 #if defined(i386_HOST_ARCH)
1244 if ( *(unsigned char*)ptr != 0xe8 &&
1245 *(unsigned char*)ptr != 0x58 ) {
1246 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1247 return;
1248 }
1249 if (*(unsigned char*)ptr == 0xe8) { /* Aha, a ccall adjustor! */
1250 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1251 } else {
1252 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
1253 }
1254 #elif defined(x86_64_HOST_ARCH)
1255 if ( *(StgWord16 *)ptr == 0x894d ) {
1256 freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+
1257 #if defined(mingw32_HOST_OS)
1258 0x28
1259 #else
1260 0x20
1261 #endif
1262 ));
1263 #if !defined(mingw32_HOST_OS)
1264 } else if ( *(StgWord16 *)ptr == 0x5141 ) {
1265 freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x30));
1266 #endif
1267 #if defined(mingw32_HOST_OS)
1268 } else if ( *(StgWord16 *)ptr == 0x8348 ) {
1269 freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x48));
1270 #endif
1271 } else {
1272 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1273 return;
1274 }
1275 #elif defined(sparc_HOST_ARCH)
1276 if ( *(unsigned long*)ptr != 0x9C23A008UL ) {
1277 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1278 return;
1279 }
1280
1281 /* Free the stable pointer first..*/
1282 freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11)));
1283 #elif defined(alpha_HOST_ARCH)
1284 if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
1285 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1286 return;
1287 }
1288
1289 /* Free the stable pointer first..*/
1290 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
1291 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
1292 if ( *(StgWord*)ptr != 0x48000008 ) {
1293 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1294 return;
1295 }
1296 freeStablePtr(((StgStablePtr*)ptr)[1]);
1297 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
1298 if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) {
1299 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1300 return;
1301 }
1302 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1303 #elif defined(ia64_HOST_ARCH)
1304 IA64FunDesc *fdesc = (IA64FunDesc *)ptr;
1305 StgWord64 *code = (StgWord64 *)(fdesc+1);
1306
1307 if (fdesc->ip != (StgWord64)code) {
1308 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1309 return;
1310 }
1311 freeStablePtr((StgStablePtr)code[16]);
1312 freeStablePtr((StgStablePtr)code[17]);
1313 return;
1314 #else
1315 ASSERT(0);
1316 #endif
1317 // Can't write to this memory, it is only executable:
1318 // *((unsigned char*)ptr) = '\0';
1319
1320 freeExec(ptr);
1321 }
1322
1323 #endif // !USE_LIBFFI_FOR_ADJUSTORS