Eliminate atomic_inc_by and instead medofiy atomic_inc.
[ghc.git] / rts / Adjustor.c
1 /* -----------------------------------------------------------------------------
2 * Foreign export adjustor thunks
3 *
4 * Copyright (c) 1998.
5 *
6 * ---------------------------------------------------------------------------*/
7
8 /* A little bit of background...
9
10 An adjustor thunk is a dynamically allocated code snippet that allows
11 Haskell closures to be viewed as C function pointers.
12
13 Stable pointers provide a way for the outside world to get access to,
14 and evaluate, Haskell heap objects, with the RTS providing a small
15 range of ops for doing so. So, assuming we've got a stable pointer in
16 our hand in C, we can jump into the Haskell world and evaluate a callback
17 procedure, say. This works OK in some cases where callbacks are used, but
18 does require the external code to know about stable pointers and how to deal
19 with them. We'd like to hide the Haskell-nature of a callback and have it
20 be invoked just like any other C function pointer.
21
22 Enter adjustor thunks. An adjustor thunk is a little piece of code
23 that's generated on-the-fly (one per Haskell closure being exported)
24 that, when entered using some 'universal' calling convention (e.g., the
25 C calling convention on platform X), pushes an implicit stable pointer
26 (to the Haskell callback) before calling another (static) C function stub
27 which takes care of entering the Haskell code via its stable pointer.
28
29 An adjustor thunk is allocated on the C heap, and is called from within
30 Haskell just before handing out the function pointer to the Haskell (IO)
31 action. User code should never have to invoke it explicitly.
32
33 An adjustor thunk differs from a C function pointer in one respect: when
34 the code is through with it, it has to be freed in order to release Haskell
35 and C resources. Failure to do so will result in memory leaks on both the C and
36 Haskell side.
37 */
38
39 #include "PosixSource.h"
40 #include "Rts.h"
41
42 #include "RtsUtils.h"
43 #include "Stable.h"
44
45 #if defined(USE_LIBFFI_FOR_ADJUSTORS)
46 #include "ffi.h"
47 #include <string.h>
48 #endif
49
50 #if defined(i386_HOST_ARCH)
51 extern void adjustorCode(void);
52 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
53 // from AdjustorAsm.s
54 // not declared as a function so that AIX-style
55 // fundescs can never get in the way.
56 extern void *adjustorCode;
57 #endif
58
59 #if defined(USE_LIBFFI_FOR_ADJUSTORS)
60 void
61 freeHaskellFunctionPtr(void* ptr)
62 {
63 ffi_closure *cl;
64
65 cl = (ffi_closure*)ptr;
66 freeStablePtr(cl->user_data);
67 stgFree(cl->cif->arg_types);
68 stgFree(cl->cif);
69 freeExec(cl);
70 }
71
72 static ffi_type * char_to_ffi_type(char c)
73 {
74 switch (c) {
75 case 'v': return &ffi_type_void;
76 case 'f': return &ffi_type_float;
77 case 'd': return &ffi_type_double;
78 case 'L': return &ffi_type_sint64;
79 case 'l': return &ffi_type_uint64;
80 case 'W': return &ffi_type_sint32;
81 case 'w': return &ffi_type_uint32;
82 case 'S': return &ffi_type_sint16;
83 case 's': return &ffi_type_uint16;
84 case 'B': return &ffi_type_sint8;
85 case 'b': return &ffi_type_uint8;
86 case 'p': return &ffi_type_pointer;
87 default: barf("char_to_ffi_type: unknown type '%c'", c);
88 }
89 }
90
91 void*
92 createAdjustor (int cconv,
93 StgStablePtr hptr,
94 StgFunPtr wptr,
95 char *typeString)
96 {
97 ffi_cif *cif;
98 ffi_type **arg_types;
99 nat n_args, i;
100 ffi_type *result_type;
101 ffi_closure *cl;
102 int r, abi;
103 void *code;
104
105 n_args = strlen(typeString) - 1;
106 cif = stgMallocBytes(sizeof(ffi_cif), "createAdjustor");
107 arg_types = stgMallocBytes(n_args * sizeof(ffi_type*), "createAdjustor");
108
109 result_type = char_to_ffi_type(typeString[0]);
110 for (i=0; i < n_args; i++) {
111 arg_types[i] = char_to_ffi_type(typeString[i+1]);
112 }
113 switch (cconv) {
114 #if defined(mingw32_HOST_OS) && defined(i386_HOST_ARCH)
115 case 0: /* stdcall */
116 abi = FFI_STDCALL;
117 break;
118 #endif
119 case 1: /* ccall */
120 abi = FFI_DEFAULT_ABI;
121 break;
122 default:
123 barf("createAdjustor: convention %d not supported on this platform", cconv);
124 }
125
126 r = ffi_prep_cif(cif, abi, n_args, result_type, arg_types);
127 if (r != FFI_OK) barf("ffi_prep_cif failed: %d", r);
128
129 cl = allocateExec(sizeof(ffi_closure), &code);
130 if (cl == NULL) {
131 barf("createAdjustor: failed to allocate memory");
132 }
133
134 r = ffi_prep_closure_loc(cl, cif, (void*)wptr, hptr/*userdata*/, code);
135 if (r != FFI_OK) barf("ffi_prep_closure_loc failed: %d", r);
136
137 return (void*)code;
138 }
139
140 #else // To end of file...
141
142 #if defined(_WIN32)
143 #include <windows.h>
144 #endif
145
146 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
147 #include <string.h>
148 #endif
149
150 #ifdef LEADING_UNDERSCORE
151 #define UNDERSCORE "_"
152 #else
153 #define UNDERSCORE ""
154 #endif
155
156 #if defined(x86_64_HOST_ARCH)
157 /*
158 Now here's something obscure for you:
159
160 When generating an adjustor thunk that uses the C calling
161 convention, we have to make sure that the thunk kicks off
162 the process of jumping into Haskell with a tail jump. Why?
163 Because as a result of jumping in into Haskell we may end
164 up freeing the very adjustor thunk we came from using
165 freeHaskellFunctionPtr(). Hence, we better not return to
166 the adjustor code on our way out, since it could by then
167 point to junk.
168
169 The fix is readily at hand, just include the opcodes
170 for the C stack fixup code that we need to perform when
171 returning in some static piece of memory and arrange
172 to return to it before tail jumping from the adjustor thunk.
173 */
174 static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
175 {
176 __asm__ (
177 ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
178 UNDERSCORE "obscure_ccall_ret_code:\n\t"
179 "addq $0x8, %rsp\n\t"
180 #if defined(mingw32_HOST_OS)
181 /* On Win64, we had to put the original return address after the
182 arg 1-4 spill slots, ro now we have to move it back */
183 "movq 0x20(%rsp), %rcx\n"
184 "movq %rcx, (%rsp)\n"
185 #endif
186 "ret"
187 );
188 }
189 extern void obscure_ccall_ret_code(void);
190 #endif
191
192 #if defined(alpha_HOST_ARCH)
193 /* To get the definition of PAL_imb: */
194 # if defined(linux_HOST_OS)
195 # include <asm/pal.h>
196 # else
197 # include <machine/pal.h>
198 # endif
199 #endif
200
201 #if defined(ia64_HOST_ARCH)
202
203 /* Layout of a function descriptor */
204 typedef struct _IA64FunDesc {
205 StgWord64 ip;
206 StgWord64 gp;
207 } IA64FunDesc;
208
209 static void *
210 stgAllocStable(size_t size_in_bytes, StgStablePtr *stable)
211 {
212 StgArrWords* arr;
213 nat data_size_in_words, total_size_in_words;
214
215 /* round up to a whole number of words */
216 data_size_in_words = ROUNDUP_BYTES_TO_WDS(size_in_bytes);
217 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
218
219 /* allocate and fill it in */
220 arr = (StgArrWords *)allocate(total_size_in_words);
221 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, size_in_bytes);
222
223 /* obtain a stable ptr */
224 *stable = getStablePtr((StgPtr)arr);
225
226 /* and return a ptr to the goods inside the array */
227 return(&(arr->payload));
228 }
229 #endif
230
231 #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
232 __asm__("obscure_ccall_ret_code:\n\t"
233 "lwz 1,0(1)\n\t"
234 "lwz 0,4(1)\n\t"
235 "mtlr 0\n\t"
236 "blr");
237 extern void obscure_ccall_ret_code(void);
238 #endif
239
240 #if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
241 #if !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS))
242
243 /* !!! !!! WARNING: !!! !!!
244 * This structure is accessed from AdjustorAsm.s
245 * Any changes here have to be mirrored in the offsets there.
246 */
247
248 typedef struct AdjustorStub {
249 #if defined(powerpc_HOST_ARCH) && defined(darwin_HOST_OS)
250 unsigned lis;
251 unsigned ori;
252 unsigned lwz;
253 unsigned mtctr;
254 unsigned bctr;
255 StgFunPtr code;
256 #elif defined(powerpc64_HOST_ARCH) && defined(darwin_HOST_OS)
257 /* powerpc64-darwin: just guessing that it won't use fundescs. */
258 unsigned lis;
259 unsigned ori;
260 unsigned rldimi;
261 unsigned oris;
262 unsigned ori2;
263 unsigned lwz;
264 unsigned mtctr;
265 unsigned bctr;
266 StgFunPtr code;
267 #else
268 /* fundesc-based ABIs */
269 #define FUNDESCS
270 StgFunPtr code;
271 struct AdjustorStub
272 *toc;
273 void *env;
274 #endif
275 StgStablePtr hptr;
276 StgFunPtr wptr;
277 StgInt negative_framesize;
278 StgInt extrawords_plus_one;
279 } AdjustorStub;
280
281 #endif
282 #endif
283
284 #if defined(i386_HOST_ARCH)
285
286 /* !!! !!! WARNING: !!! !!!
287 * This structure is accessed from AdjustorAsm.s
288 * Any changes here have to be mirrored in the offsets there.
289 */
290
291 typedef struct AdjustorStub {
292 unsigned char call[8];
293 StgStablePtr hptr;
294 StgFunPtr wptr;
295 StgInt frame_size;
296 StgInt argument_size;
297 } AdjustorStub;
298 #endif
299
300 #if defined(i386_HOST_ARCH) || defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
301 static int totalArgumentSize(char *typeString)
302 {
303 int sz = 0;
304 while(*typeString)
305 {
306 char t = *typeString++;
307
308 switch(t)
309 {
310 // on 32-bit platforms, Double and Int64 occupy two words.
311 case 'd':
312 case 'l':
313 case 'L':
314 if(sizeof(void*) == 4)
315 {
316 sz += 2;
317 break;
318 }
319 // everything else is one word.
320 default:
321 sz += 1;
322 }
323 }
324 return sz;
325 }
326 #endif
327
328 void*
329 createAdjustor(int cconv, StgStablePtr hptr,
330 StgFunPtr wptr,
331 char *typeString
332 #if !defined(powerpc_HOST_ARCH) && !defined(powerpc64_HOST_ARCH) && !defined(x86_64_HOST_ARCH)
333 STG_UNUSED
334 #endif
335 )
336 {
337 void *adjustor = NULL;
338 void *code = NULL;
339
340 switch (cconv)
341 {
342 case 0: /* _stdcall */
343 #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
344 /* Magic constant computed by inspecting the code length of
345 the following assembly language snippet
346 (offset and machine code prefixed):
347
348 <0>: 58 popl %eax # temp. remove ret addr..
349 <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
350 # hold a StgStablePtr
351 <6>: 50 pushl %eax # put back ret. addr
352 <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
353 <c>: ff e0 jmp %eax # and jump to it.
354 # the callee cleans up the stack
355 */
356 adjustor = allocateExec(14,&code);
357 {
358 unsigned char *const adj_code = (unsigned char *)adjustor;
359 adj_code[0x00] = (unsigned char)0x58; /* popl %eax */
360
361 adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
362 *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
363
364 adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
365
366 adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */
367 *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
368
369 adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
370 adj_code[0x0d] = (unsigned char)0xe0;
371 }
372 #endif
373 break;
374
375 case 1: /* _ccall */
376 #if defined(i386_HOST_ARCH)
377 {
378 /*
379 Most of the trickiness here is due to the need to keep the
380 stack pointer 16-byte aligned (see #5250). That means we
381 can't just push another argument on the stack and call the
382 wrapper, we may have to shuffle the whole argument block.
383
384 We offload most of the work to AdjustorAsm.S.
385 */
386 AdjustorStub *adjustorStub = allocateExec(sizeof(AdjustorStub),&code);
387 adjustor = adjustorStub;
388
389 int sz = totalArgumentSize(typeString);
390
391 adjustorStub->call[0] = 0xe8;
392 *(long*)&adjustorStub->call[1] = ((char*)&adjustorCode) - ((char*)code + 5);
393 adjustorStub->hptr = hptr;
394 adjustorStub->wptr = wptr;
395
396 // The adjustor puts the following things on the stack:
397 // 1.) %ebp link
398 // 2.) padding and (a copy of) the arguments
399 // 3.) a dummy argument
400 // 4.) hptr
401 // 5.) return address (for returning to the adjustor)
402 // All these have to add up to a multiple of 16.
403
404 // first, include everything in frame_size
405 adjustorStub->frame_size = sz * 4 + 16;
406 // align to 16 bytes
407 adjustorStub->frame_size = (adjustorStub->frame_size + 15) & ~15;
408 // only count 2.) and 3.) as part of frame_size
409 adjustorStub->frame_size -= 12;
410 adjustorStub->argument_size = sz;
411 }
412
413 #elif defined(x86_64_HOST_ARCH)
414
415 # if defined(mingw32_HOST_OS)
416 /*
417 stack at call:
418 argn
419 ...
420 arg5
421 return address
422 %rcx,%rdx,%r8,%r9 = arg1..arg4
423
424 if there are <4 integer args, then we can just push the
425 StablePtr into %rcx and shuffle the other args up.
426
427 If there are >=4 integer args, then we have to flush one arg
428 to the stack, and arrange to adjust the stack ptr on return.
429 The stack will be rearranged to this:
430
431 argn
432 ...
433 arg5
434 return address *** <-- dummy arg in stub fn.
435 arg4
436 obscure_ccall_ret_code
437
438 This unfortunately means that the type of the stub function
439 must have a dummy argument for the original return address
440 pointer inserted just after the 4th integer argument.
441
442 Code for the simple case:
443
444 0: 4d 89 c1 mov %r8,%r9
445 3: 49 89 d0 mov %rdx,%r8
446 6: 48 89 ca mov %rcx,%rdx
447 9: f2 0f 10 da movsd %xmm2,%xmm3
448 d: f2 0f 10 d1 movsd %xmm1,%xmm2
449 11: f2 0f 10 c8 movsd %xmm0,%xmm1
450 15: 48 8b 0d 0c 00 00 00 mov 0xc(%rip),%rcx # 28 <.text+0x28>
451 1c: ff 25 0e 00 00 00 jmpq *0xe(%rip) # 30 <.text+0x30>
452 22: 90 nop
453 [...]
454
455
456 And the version for >=4 integer arguments:
457
458 [we want to push the 4th argument (either %r9 or %xmm3, depending on
459 whether it is a floating arg or not) and the return address onto the
460 stack. However, slots 1-4 are reserved for code we call to spill its
461 args 1-4 into, so we can't just push them onto the bottom of the stack.
462 So first put the 4th argument onto the stack, above what will be the
463 spill slots.]
464 0: 48 83 ec 08 sub $0x8,%rsp
465 [if non-floating arg, then do this:]
466 4: 90 nop
467 5: 4c 89 4c 24 20 mov %r9,0x20(%rsp)
468 [else if floating arg then do this:]
469 4: f2 0f 11 5c 24 20 movsd %xmm3,0x20(%rsp)
470 [end if]
471 [Now push the new return address onto the stack]
472 a: ff 35 30 00 00 00 pushq 0x30(%rip) # 40 <.text+0x40>
473 [But the old return address has been moved up into a spill slot, so
474 we need to move it above them]
475 10: 4c 8b 4c 24 10 mov 0x10(%rsp),%r9
476 15: 4c 89 4c 24 30 mov %r9,0x30(%rsp)
477 [Now we do the normal register shuffle-up etc]
478 1a: 4d 89 c1 mov %r8,%r9
479 1d: 49 89 d0 mov %rdx,%r8
480 20: 48 89 ca mov %rcx,%rdx
481 23: f2 0f 10 da movsd %xmm2,%xmm3
482 27: f2 0f 10 d1 movsd %xmm1,%xmm2
483 2b: f2 0f 10 c8 movsd %xmm0,%xmm1
484 2f: 48 8b 0d 12 00 00 00 mov 0x12(%rip),%rcx # 48 <.text+0x48>
485 36: ff 25 14 00 00 00 jmpq *0x14(%rip) # 50 <.text+0x50>
486 3c: 90 nop
487 3d: 90 nop
488 3e: 90 nop
489 3f: 90 nop
490 [...]
491
492 */
493 {
494 StgWord8 *adj_code;
495
496 // determine whether we have 4 or more integer arguments,
497 // and therefore need to flush one to the stack.
498 if ((typeString[0] == '\0') ||
499 (typeString[1] == '\0') ||
500 (typeString[2] == '\0') ||
501 (typeString[3] == '\0')) {
502
503 adjustor = allocateExec(0x38,&code);
504 adj_code = (StgWord8*)adjustor;
505
506 *(StgInt32 *)adj_code = 0x49c1894d;
507 *(StgInt32 *)(adj_code+0x4) = 0x8948d089;
508 *(StgInt32 *)(adj_code+0x8) = 0x100ff2ca;
509 *(StgInt32 *)(adj_code+0xc) = 0x100ff2da;
510 *(StgInt32 *)(adj_code+0x10) = 0x100ff2d1;
511 *(StgInt32 *)(adj_code+0x14) = 0x0d8b48c8;
512 *(StgInt32 *)(adj_code+0x18) = 0x0000000c;
513
514 *(StgInt32 *)(adj_code+0x1c) = 0x000e25ff;
515 *(StgInt32 *)(adj_code+0x20) = 0x00000000;
516 *(StgInt64 *)(adj_code+0x28) = (StgInt64)hptr;
517 *(StgInt64 *)(adj_code+0x30) = (StgInt64)wptr;
518 }
519 else
520 {
521 int fourthFloating;
522
523 fourthFloating = (typeString[3] == 'f' || typeString[3] == 'd');
524 adjustor = allocateExec(0x58,&code);
525 adj_code = (StgWord8*)adjustor;
526 *(StgInt32 *)adj_code = 0x08ec8348;
527 *(StgInt32 *)(adj_code+0x4) = fourthFloating ? 0x5c110ff2
528 : 0x4c894c90;
529 *(StgInt32 *)(adj_code+0x8) = 0x35ff2024;
530 *(StgInt32 *)(adj_code+0xc) = 0x00000030;
531 *(StgInt32 *)(adj_code+0x10) = 0x244c8b4c;
532 *(StgInt32 *)(adj_code+0x14) = 0x4c894c10;
533 *(StgInt32 *)(adj_code+0x18) = 0x894d3024;
534 *(StgInt32 *)(adj_code+0x1c) = 0xd08949c1;
535 *(StgInt32 *)(adj_code+0x20) = 0xf2ca8948;
536 *(StgInt32 *)(adj_code+0x24) = 0xf2da100f;
537 *(StgInt32 *)(adj_code+0x28) = 0xf2d1100f;
538 *(StgInt32 *)(adj_code+0x2c) = 0x48c8100f;
539 *(StgInt32 *)(adj_code+0x30) = 0x00120d8b;
540 *(StgInt32 *)(adj_code+0x34) = 0x25ff0000;
541 *(StgInt32 *)(adj_code+0x38) = 0x00000014;
542 *(StgInt32 *)(adj_code+0x3c) = 0x90909090;
543 *(StgInt64 *)(adj_code+0x40) = (StgInt64)obscure_ccall_ret_code;
544 *(StgInt64 *)(adj_code+0x48) = (StgInt64)hptr;
545 *(StgInt64 *)(adj_code+0x50) = (StgInt64)wptr;
546 }
547 }
548
549 # else
550 /*
551 stack at call:
552 argn
553 ...
554 arg7
555 return address
556 %rdi,%rsi,%rdx,%rcx,%r8,%r9 = arg1..arg6
557
558 if there are <6 integer args, then we can just push the
559 StablePtr into %edi and shuffle the other args up.
560
561 If there are >=6 integer args, then we have to flush one arg
562 to the stack, and arrange to adjust the stack ptr on return.
563 The stack will be rearranged to this:
564
565 argn
566 ...
567 arg7
568 return address *** <-- dummy arg in stub fn.
569 arg6
570 obscure_ccall_ret_code
571
572 This unfortunately means that the type of the stub function
573 must have a dummy argument for the original return address
574 pointer inserted just after the 6th integer argument.
575
576 Code for the simple case:
577
578 0: 4d 89 c1 mov %r8,%r9
579 3: 49 89 c8 mov %rcx,%r8
580 6: 48 89 d1 mov %rdx,%rcx
581 9: 48 89 f2 mov %rsi,%rdx
582 c: 48 89 fe mov %rdi,%rsi
583 f: 48 8b 3d 0a 00 00 00 mov 10(%rip),%rdi
584 16: ff 25 0c 00 00 00 jmpq *12(%rip)
585 ...
586 20: .quad 0 # aligned on 8-byte boundary
587 28: .quad 0 # aligned on 8-byte boundary
588
589
590 And the version for >=6 integer arguments:
591
592 0: 41 51 push %r9
593 2: ff 35 20 00 00 00 pushq 32(%rip) # 28 <ccall_adjustor+0x28>
594 8: 4d 89 c1 mov %r8,%r9
595 b: 49 89 c8 mov %rcx,%r8
596 e: 48 89 d1 mov %rdx,%rcx
597 11: 48 89 f2 mov %rsi,%rdx
598 14: 48 89 fe mov %rdi,%rsi
599 17: 48 8b 3d 12 00 00 00 mov 18(%rip),%rdi # 30 <ccall_adjustor+0x30>
600 1e: ff 25 14 00 00 00 jmpq *20(%rip) # 38 <ccall_adjustor+0x38>
601 ...
602 28: .quad 0 # aligned on 8-byte boundary
603 30: .quad 0 # aligned on 8-byte boundary
604 38: .quad 0 # aligned on 8-byte boundary
605 */
606
607 {
608 int i = 0;
609 char *c;
610 StgWord8 *adj_code;
611
612 // determine whether we have 6 or more integer arguments,
613 // and therefore need to flush one to the stack.
614 for (c = typeString; *c != '\0'; c++) {
615 if (*c != 'f' && *c != 'd') i++;
616 if (i == 6) break;
617 }
618
619 if (i < 6) {
620 adjustor = allocateExec(0x30,&code);
621 adj_code = (StgWord8*)adjustor;
622
623 *(StgInt32 *)adj_code = 0x49c1894d;
624 *(StgInt32 *)(adj_code+0x4) = 0x8948c889;
625 *(StgInt32 *)(adj_code+0x8) = 0xf28948d1;
626 *(StgInt32 *)(adj_code+0xc) = 0x48fe8948;
627 *(StgInt32 *)(adj_code+0x10) = 0x000a3d8b;
628 *(StgInt32 *)(adj_code+0x14) = 0x25ff0000;
629 *(StgInt32 *)(adj_code+0x18) = 0x0000000c;
630 *(StgInt64 *)(adj_code+0x20) = (StgInt64)hptr;
631 *(StgInt64 *)(adj_code+0x28) = (StgInt64)wptr;
632 }
633 else
634 {
635 adjustor = allocateExec(0x40,&code);
636 adj_code = (StgWord8*)adjustor;
637
638 *(StgInt32 *)adj_code = 0x35ff5141;
639 *(StgInt32 *)(adj_code+0x4) = 0x00000020;
640 *(StgInt32 *)(adj_code+0x8) = 0x49c1894d;
641 *(StgInt32 *)(adj_code+0xc) = 0x8948c889;
642 *(StgInt32 *)(adj_code+0x10) = 0xf28948d1;
643 *(StgInt32 *)(adj_code+0x14) = 0x48fe8948;
644 *(StgInt32 *)(adj_code+0x18) = 0x00123d8b;
645 *(StgInt32 *)(adj_code+0x1c) = 0x25ff0000;
646 *(StgInt32 *)(adj_code+0x20) = 0x00000014;
647
648 *(StgInt64 *)(adj_code+0x28) = (StgInt64)obscure_ccall_ret_code;
649 *(StgInt64 *)(adj_code+0x30) = (StgInt64)hptr;
650 *(StgInt64 *)(adj_code+0x38) = (StgInt64)wptr;
651 }
652 }
653 # endif
654
655
656 #elif defined(sparc_HOST_ARCH)
657 /* Magic constant computed by inspecting the code length of the following
658 assembly language snippet (offset and machine code prefixed):
659
660 <00>: 9C23A008 sub %sp, 8, %sp ! make room for %o4/%o5 in caller's frame
661 <04>: DA23A060 st %o5, [%sp + 96] ! shift registers by 2 positions
662 <08>: D823A05C st %o4, [%sp + 92]
663 <0C>: 9A10000B mov %o3, %o5
664 <10>: 9810000A mov %o2, %o4
665 <14>: 96100009 mov %o1, %o3
666 <18>: 94100008 mov %o0, %o2
667 <1C>: 13000000 sethi %hi(wptr), %o1 ! load up wptr (1 of 2)
668 <20>: 11000000 sethi %hi(hptr), %o0 ! load up hptr (1 of 2)
669 <24>: 81C26000 jmp %o1 + %lo(wptr) ! jump to wptr (load 2 of 2)
670 <28>: 90122000 or %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot)
671 <2C> 00000000 ! place for getting hptr back easily
672
673 ccall'ing on SPARC is easy, because we are quite lucky to push a
674 multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the
675 existing arguments (note that %sp must stay double-word aligned at
676 all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf).
677 To do this, we extend the *caller's* stack frame by 2 words and shift
678 the output registers used for argument passing (%o0 - %o5, we are a *leaf*
679 procedure because of the tail-jump) by 2 positions. This makes room in
680 %o0 and %o1 for the additinal arguments, namely hptr and a dummy (used
681 for destination addr of jump on SPARC, return address on x86, ...). This
682 shouldn't cause any problems for a C-like caller: alloca is implemented
683 similarly, and local variables should be accessed via %fp, not %sp. In a
684 nutshell: This should work! (Famous last words! :-)
685 */
686 adjustor = allocateExec(4*(11+1),&code);
687 {
688 unsigned long *const adj_code = (unsigned long *)adjustor;
689
690 adj_code[ 0] = 0x9C23A008UL; /* sub %sp, 8, %sp */
691 adj_code[ 1] = 0xDA23A060UL; /* st %o5, [%sp + 96] */
692 adj_code[ 2] = 0xD823A05CUL; /* st %o4, [%sp + 92] */
693 adj_code[ 3] = 0x9A10000BUL; /* mov %o3, %o5 */
694 adj_code[ 4] = 0x9810000AUL; /* mov %o2, %o4 */
695 adj_code[ 5] = 0x96100009UL; /* mov %o1, %o3 */
696 adj_code[ 6] = 0x94100008UL; /* mov %o0, %o2 */
697 adj_code[ 7] = 0x13000000UL; /* sethi %hi(wptr), %o1 */
698 adj_code[ 7] |= ((unsigned long)wptr) >> 10;
699 adj_code[ 8] = 0x11000000UL; /* sethi %hi(hptr), %o0 */
700 adj_code[ 8] |= ((unsigned long)hptr) >> 10;
701 adj_code[ 9] = 0x81C26000UL; /* jmp %o1 + %lo(wptr) */
702 adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL;
703 adj_code[10] = 0x90122000UL; /* or %o0, %lo(hptr), %o0 */
704 adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL;
705
706 adj_code[11] = (unsigned long)hptr;
707
708 /* flush cache */
709 asm("flush %0" : : "r" (adj_code ));
710 asm("flush %0" : : "r" (adj_code + 2));
711 asm("flush %0" : : "r" (adj_code + 4));
712 asm("flush %0" : : "r" (adj_code + 6));
713 asm("flush %0" : : "r" (adj_code + 10));
714
715 /* max. 5 instructions latency, and we need at >= 1 for returning */
716 asm("nop");
717 asm("nop");
718 asm("nop");
719 asm("nop");
720 }
721 #elif defined(alpha_HOST_ARCH)
722 /* Magic constant computed by inspecting the code length of
723 the following assembly language snippet
724 (offset and machine code prefixed; note that the machine code
725 shown is longwords stored in little-endian order):
726
727 <00>: 46520414 mov a2, a4
728 <04>: 46100412 mov a0, a2
729 <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
730 <0c>: 46730415 mov a3, a5
731 <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
732 <14>: 46310413 mov a1, a3
733 <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
734 <1c>: 00000000 # padding for alignment
735 <20>: [8 bytes for hptr quadword]
736 <28>: [8 bytes for wptr quadword]
737
738 The "computed" jump at <08> above is really a jump to a fixed
739 location. Accordingly, we place an always-correct hint in the
740 jump instruction, namely the address offset from <0c> to wptr,
741 divided by 4, taking the lowest 14 bits.
742
743 We only support passing 4 or fewer argument words, for the same
744 reason described under sparc_HOST_ARCH above by JRS, 21 Aug 01.
745 On the Alpha the first 6 integer arguments are in a0 through a5,
746 and the rest on the stack. Hence we want to shuffle the original
747 caller's arguments by two.
748
749 On the Alpha the calling convention is so complex and dependent
750 on the callee's signature -- for example, the stack pointer has
751 to be a multiple of 16 -- that it seems impossible to me [ccshan]
752 to handle the general case correctly without changing how the
753 adjustor is called from C. For now, our solution of shuffling
754 registers only and ignoring the stack only works if the original
755 caller passed 4 or fewer argument words.
756
757 TODO: Depending on how much allocation overhead stgMallocBytes uses for
758 header information (more precisely, if the overhead is no more than
759 4 bytes), we should move the first three instructions above down by
760 4 bytes (getting rid of the nop), hence saving memory. [ccshan]
761 */
762 ASSERT(((StgWord64)wptr & 3) == 0);
763 adjustor = allocateExec(48,&code);
764 {
765 StgWord64 *const code = (StgWord64 *)adjustor;
766
767 code[0] = 0x4610041246520414L;
768 code[1] = 0x46730415a61b0020L;
769 code[2] = 0x46310413a77b0028L;
770 code[3] = 0x000000006bfb0000L
771 | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
772
773 code[4] = (StgWord64)hptr;
774 code[5] = (StgWord64)wptr;
775
776 /* Ensure that instruction cache is consistent with our new code */
777 __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
778 }
779 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
780
781 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
782 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
783 {
784 /* The PowerPC Linux (32-bit) calling convention is annoyingly complex.
785 We need to calculate all the details of the stack frame layout,
786 taking into account the types of all the arguments, and then
787 generate code on the fly. */
788
789 int src_gpr = 3, dst_gpr = 5;
790 int fpr = 3;
791 int src_offset = 0, dst_offset = 0;
792 int n = strlen(typeString),i;
793 int src_locs[n], dst_locs[n];
794 int frameSize;
795 unsigned *code;
796
797 /* Step 1:
798 Calculate where the arguments should go.
799 src_locs[] will contain the locations of the arguments in the
800 original stack frame passed to the adjustor.
801 dst_locs[] will contain the locations of the arguments after the
802 adjustor runs, on entry to the wrapper proc pointed to by wptr.
803
804 This algorithm is based on the one described on page 3-19 of the
805 System V ABI PowerPC Processor Supplement.
806 */
807 for(i=0;typeString[i];i++)
808 {
809 char t = typeString[i];
810 if((t == 'f' || t == 'd') && fpr <= 8)
811 src_locs[i] = dst_locs[i] = -32-(fpr++);
812 else
813 {
814 if((t == 'l' || t == 'L') && src_gpr <= 9)
815 {
816 if((src_gpr & 1) == 0)
817 src_gpr++;
818 src_locs[i] = -src_gpr;
819 src_gpr += 2;
820 }
821 else if((t == 'w' || t == 'W') && src_gpr <= 10)
822 {
823 src_locs[i] = -(src_gpr++);
824 }
825 else
826 {
827 if(t == 'l' || t == 'L' || t == 'd')
828 {
829 if(src_offset % 8)
830 src_offset += 4;
831 }
832 src_locs[i] = src_offset;
833 src_offset += (t == 'l' || t == 'L' || t == 'd') ? 8 : 4;
834 }
835
836 if((t == 'l' || t == 'L') && dst_gpr <= 9)
837 {
838 if((dst_gpr & 1) == 0)
839 dst_gpr++;
840 dst_locs[i] = -dst_gpr;
841 dst_gpr += 2;
842 }
843 else if((t == 'w' || t == 'W') && dst_gpr <= 10)
844 {
845 dst_locs[i] = -(dst_gpr++);
846 }
847 else
848 {
849 if(t == 'l' || t == 'L' || t == 'd')
850 {
851 if(dst_offset % 8)
852 dst_offset += 4;
853 }
854 dst_locs[i] = dst_offset;
855 dst_offset += (t == 'l' || t == 'L' || t == 'd') ? 8 : 4;
856 }
857 }
858 }
859
860 frameSize = dst_offset + 8;
861 frameSize = (frameSize+15) & ~0xF;
862
863 /* Step 2:
864 Build the adjustor.
865 */
866 // allocate space for at most 4 insns per parameter
867 // plus 14 more instructions.
868 adjustor = allocateExec(4 * (4*n + 14),&code);
869 code = (unsigned*)adjustor;
870
871 *code++ = 0x48000008; // b *+8
872 // * Put the hptr in a place where freeHaskellFunctionPtr
873 // can get at it.
874 *code++ = (unsigned) hptr;
875
876 // * save the link register
877 *code++ = 0x7c0802a6; // mflr r0;
878 *code++ = 0x90010004; // stw r0, 4(r1);
879 // * and build a new stack frame
880 *code++ = OP_LO(0x9421, -frameSize); // stwu r1, -frameSize(r1)
881
882 // * now generate instructions to copy arguments
883 // from the old stack frame into the new stack frame.
884 for(i=n-1;i>=0;i--)
885 {
886 if(src_locs[i] < -32)
887 ASSERT(dst_locs[i] == src_locs[i]);
888 else if(src_locs[i] < 0)
889 {
890 // source in GPR.
891 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
892 if(dst_locs[i] < 0)
893 {
894 ASSERT(dst_locs[i] > -32);
895 // dst is in GPR, too.
896
897 if(typeString[i] == 'l' || typeString[i] == 'L')
898 {
899 // mr dst+1, src+1
900 *code++ = 0x7c000378
901 | ((-dst_locs[i]+1) << 16)
902 | ((-src_locs[i]+1) << 11)
903 | ((-src_locs[i]+1) << 21);
904 }
905 // mr dst, src
906 *code++ = 0x7c000378
907 | ((-dst_locs[i]) << 16)
908 | ((-src_locs[i]) << 11)
909 | ((-src_locs[i]) << 21);
910 }
911 else
912 {
913 if(typeString[i] == 'l' || typeString[i] == 'L')
914 {
915 // stw src+1, dst_offset+4(r1)
916 *code++ = 0x90010000
917 | ((-src_locs[i]+1) << 21)
918 | (dst_locs[i] + 4);
919 }
920
921 // stw src, dst_offset(r1)
922 *code++ = 0x90010000
923 | ((-src_locs[i]) << 21)
924 | (dst_locs[i] + 8);
925 }
926 }
927 else
928 {
929 ASSERT(dst_locs[i] >= 0);
930 ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
931
932 if(typeString[i] == 'l' || typeString[i] == 'L')
933 {
934 // lwz r0, src_offset(r1)
935 *code++ = 0x80010000
936 | (src_locs[i] + frameSize + 8 + 4);
937 // stw r0, dst_offset(r1)
938 *code++ = 0x90010000
939 | (dst_locs[i] + 8 + 4);
940 }
941 // lwz r0, src_offset(r1)
942 *code++ = 0x80010000
943 | (src_locs[i] + frameSize + 8);
944 // stw r0, dst_offset(r1)
945 *code++ = 0x90010000
946 | (dst_locs[i] + 8);
947 }
948 }
949
950 // * hptr will be the new first argument.
951 // lis r3, hi(hptr)
952 *code++ = OP_HI(0x3c60, hptr);
953 // ori r3,r3,lo(hptr)
954 *code++ = OP_LO(0x6063, hptr);
955
956 // * we need to return to a piece of code
957 // which will tear down the stack frame.
958 // lis r11,hi(obscure_ccall_ret_code)
959 *code++ = OP_HI(0x3d60, obscure_ccall_ret_code);
960 // ori r11,r11,lo(obscure_ccall_ret_code)
961 *code++ = OP_LO(0x616b, obscure_ccall_ret_code);
962 // mtlr r11
963 *code++ = 0x7d6803a6;
964
965 // * jump to wptr
966 // lis r11,hi(wptr)
967 *code++ = OP_HI(0x3d60, wptr);
968 // ori r11,r11,lo(wptr)
969 *code++ = OP_LO(0x616b, wptr);
970 // mtctr r11
971 *code++ = 0x7d6903a6;
972 // bctr
973 *code++ = 0x4e800420;
974
975 // Flush the Instruction cache:
976 {
977 unsigned *p = adjustor;
978 while(p < code)
979 {
980 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
981 : : "r" (p));
982 p++;
983 }
984 __asm__ volatile ("sync\n\tisync");
985 }
986 }
987
988 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
989
990 #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
991 #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
992 {
993 /* The following code applies to all PowerPC and PowerPC64 platforms
994 whose stack layout is based on the AIX ABI.
995
996 Besides (obviously) AIX, this includes
997 Mac OS 9 and BeOS/PPC (may they rest in peace),
998 which use the 32-bit AIX ABI
999 powerpc64-linux,
1000 which uses the 64-bit AIX ABI
1001 and Darwin (Mac OS X),
1002 which uses the same stack layout as AIX,
1003 but no function descriptors.
1004
1005 The actual stack-frame shuffling is implemented out-of-line
1006 in the function adjustorCode, in AdjustorAsm.S.
1007 Here, we set up an AdjustorStub structure, which
1008 is a function descriptor (on platforms that have function
1009 descriptors) or a short piece of stub code (on Darwin) to call
1010 adjustorCode with a pointer to the AdjustorStub struct loaded
1011 into register r2.
1012
1013 One nice thing about this is that there is _no_ code generated at
1014 runtime on the platforms that have function descriptors.
1015 */
1016 AdjustorStub *adjustorStub;
1017 int sz = 0, extra_sz, total_sz;
1018
1019 #ifdef FUNDESCS
1020 adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor");
1021 #else
1022 adjustorStub = allocateExec(sizeof(AdjustorStub),&code);
1023 #endif
1024 adjustor = adjustorStub;
1025
1026 adjustorStub->code = (void*) &adjustorCode;
1027
1028 #ifdef FUNDESCS
1029 // function descriptors are a cool idea.
1030 // We don't need to generate any code at runtime.
1031 adjustorStub->toc = adjustorStub;
1032 #else
1033
1034 // no function descriptors :-(
1035 // We need to do things "by hand".
1036 #if defined(powerpc_HOST_ARCH)
1037 // lis r2, hi(adjustorStub)
1038 adjustorStub->lis = OP_HI(0x3c40, adjustorStub);
1039 // ori r2, r2, lo(adjustorStub)
1040 adjustorStub->ori = OP_LO(0x6042, adjustorStub);
1041 // lwz r0, code(r2)
1042 adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code)
1043 - (char*)adjustorStub);
1044 // mtctr r0
1045 adjustorStub->mtctr = 0x7c0903a6;
1046 // bctr
1047 adjustorStub->bctr = 0x4e800420;
1048 #else
1049 barf("adjustor creation not supported on this platform");
1050 #endif
1051
1052 // Flush the Instruction cache:
1053 {
1054 int n = sizeof(AdjustorStub)/sizeof(unsigned);
1055 unsigned *p = (unsigned*)adjustor;
1056 while(n--)
1057 {
1058 __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
1059 : : "r" (p));
1060 p++;
1061 }
1062 __asm__ volatile ("sync\n\tisync");
1063 }
1064 #endif
1065
1066 // Calculate the size of the stack frame, in words.
1067 sz = totalArgumentSize(typeString);
1068
1069 // The first eight words of the parameter area
1070 // are just "backing store" for the parameters passed in
1071 // the GPRs. extra_sz is the number of words beyond those first
1072 // 8 words.
1073 extra_sz = sz - 8;
1074 if(extra_sz < 0)
1075 extra_sz = 0;
1076
1077 // Calculate the total size of the stack frame.
1078 total_sz = (6 /* linkage area */
1079 + 8 /* minimum parameter area */
1080 + 2 /* two extra arguments */
1081 + extra_sz)*sizeof(StgWord);
1082
1083 // align to 16 bytes.
1084 // AIX only requires 8 bytes, but who cares?
1085 total_sz = (total_sz+15) & ~0xF;
1086
1087 // Fill in the information that adjustorCode in AdjustorAsm.S
1088 // will use to create a new stack frame with the additional args.
1089 adjustorStub->hptr = hptr;
1090 adjustorStub->wptr = wptr;
1091 adjustorStub->negative_framesize = -total_sz;
1092 adjustorStub->extrawords_plus_one = extra_sz + 1;
1093 }
1094
1095 #elif defined(ia64_HOST_ARCH)
1096 /*
1097 Up to 8 inputs are passed in registers. We flush the last two inputs to
1098 the stack, initially into the 16-byte scratch region left by the caller.
1099 We then shuffle the others along by 4 (taking 2 registers for ourselves
1100 to save return address and previous function state - we need to come back
1101 here on the way out to restore the stack, so this is a real function
1102 rather than just a trampoline).
1103
1104 The function descriptor we create contains the gp of the target function
1105 so gp is already loaded correctly.
1106
1107 [MLX] alloc r16=ar.pfs,10,2,0
1108 movl r17=wptr
1109 [MII] st8.spill [r12]=r38,8 // spill in6 (out4)
1110 mov r41=r37 // out7 = in5 (out3)
1111 mov r40=r36;; // out6 = in4 (out2)
1112 [MII] st8.spill [r12]=r39 // spill in7 (out5)
1113 mov.sptk b6=r17,50
1114 mov r38=r34;; // out4 = in2 (out0)
1115 [MII] mov r39=r35 // out5 = in3 (out1)
1116 mov r37=r33 // out3 = in1 (loc1)
1117 mov r36=r32 // out2 = in0 (loc0)
1118 [MLX] adds r12=-24,r12 // update sp
1119 movl r34=hptr;; // out0 = hptr
1120 [MIB] mov r33=r16 // loc1 = ar.pfs
1121 mov r32=b0 // loc0 = retaddr
1122 br.call.sptk.many b0=b6;;
1123
1124 [MII] adds r12=-16,r12
1125 mov b0=r32
1126 mov.i ar.pfs=r33
1127 [MFB] nop.m 0x0
1128 nop.f 0x0
1129 br.ret.sptk.many b0;;
1130 */
1131
1132 /* These macros distribute a long constant into the two words of an MLX bundle */
1133 #define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1))
1134 #define MOVL_LOWORD(val) (BITS(val,22,18) << 46)
1135 #define MOVL_HIWORD(val) ( (BITS(val,0,7) << 36) \
1136 | (BITS(val,7,9) << 50) \
1137 | (BITS(val,16,5) << 45) \
1138 | (BITS(val,21,1) << 44) \
1139 | (BITS(val,40,23)) \
1140 | (BITS(val,63,1) << 59))
1141
1142 {
1143 StgStablePtr stable;
1144 IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
1145 StgWord64 wcode = wdesc->ip;
1146 IA64FunDesc *fdesc;
1147 StgWord64 *code;
1148
1149 /* we allocate on the Haskell heap since malloc'd memory isn't
1150 * executable - argh */
1151 /* Allocated memory is word-aligned (8 bytes) but functions on ia64
1152 * must be aligned to 16 bytes. We allocate an extra 8 bytes of
1153 * wiggle room so that we can put the code on a 16 byte boundary. */
1154 adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8+8, &stable);
1155
1156 fdesc = (IA64FunDesc *)adjustor;
1157 code = (StgWord64 *)(fdesc + 1);
1158 /* add 8 bytes to code if needed to align to a 16-byte boundary */
1159 if ((StgWord64)code & 15) code++;
1160 fdesc->ip = (StgWord64)code;
1161 fdesc->gp = wdesc->gp;
1162
1163 code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode);
1164 code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode);
1165 code[2] = 0x029015d818984001;
1166 code[3] = 0x8401200500420094;
1167 code[4] = 0x886011d8189c0001;
1168 code[5] = 0x84011004c00380c0;
1169 code[6] = 0x0250210046013800;
1170 code[7] = 0x8401000480420084;
1171 code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
1172 code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
1173 code[10] = 0x0200210020010811;
1174 code[11] = 0x1080006800006200;
1175 code[12] = 0x0000210018406000;
1176 code[13] = 0x00aa021000038005;
1177 code[14] = 0x000000010000001d;
1178 code[15] = 0x0084000880000200;
1179
1180 /* save stable pointers in convenient form */
1181 code[16] = (StgWord64)hptr;
1182 code[17] = (StgWord64)stable;
1183 }
1184 #else
1185 barf("adjustor creation not supported on this platform");
1186 #endif
1187 break;
1188
1189 default:
1190 ASSERT(0);
1191 break;
1192 }
1193
1194 /* Have fun! */
1195 return code;
1196 }
1197
1198
1199 void
1200 freeHaskellFunctionPtr(void* ptr)
1201 {
1202 #if defined(i386_HOST_ARCH)
1203 if ( *(unsigned char*)ptr != 0xe8 &&
1204 *(unsigned char*)ptr != 0x58 ) {
1205 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1206 return;
1207 }
1208 if (*(unsigned char*)ptr == 0xe8) { /* Aha, a ccall adjustor! */
1209 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1210 } else {
1211 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
1212 }
1213 #elif defined(x86_64_HOST_ARCH)
1214 if ( *(StgWord16 *)ptr == 0x894d ) {
1215 freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+
1216 #if defined(mingw32_HOST_OS)
1217 0x28
1218 #else
1219 0x20
1220 #endif
1221 ));
1222 #if !defined(mingw32_HOST_OS)
1223 } else if ( *(StgWord16 *)ptr == 0x5141 ) {
1224 freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x30));
1225 #endif
1226 #if defined(mingw32_HOST_OS)
1227 } else if ( *(StgWord16 *)ptr == 0x8348 ) {
1228 freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x48));
1229 #endif
1230 } else {
1231 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1232 return;
1233 }
1234 #elif defined(sparc_HOST_ARCH)
1235 if ( *(unsigned long*)ptr != 0x9C23A008UL ) {
1236 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1237 return;
1238 }
1239
1240 /* Free the stable pointer first..*/
1241 freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11)));
1242 #elif defined(alpha_HOST_ARCH)
1243 if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
1244 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1245 return;
1246 }
1247
1248 /* Free the stable pointer first..*/
1249 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
1250 #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
1251 if ( *(StgWord*)ptr != 0x48000008 ) {
1252 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1253 return;
1254 }
1255 freeStablePtr(((StgStablePtr*)ptr)[1]);
1256 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
1257 if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) {
1258 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1259 return;
1260 }
1261 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1262 #elif defined(ia64_HOST_ARCH)
1263 IA64FunDesc *fdesc = (IA64FunDesc *)ptr;
1264 StgWord64 *code = (StgWord64 *)(fdesc+1);
1265
1266 if (fdesc->ip != (StgWord64)code) {
1267 errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1268 return;
1269 }
1270 freeStablePtr((StgStablePtr)code[16]);
1271 freeStablePtr((StgStablePtr)code[17]);
1272 return;
1273 #else
1274 ASSERT(0);
1275 #endif
1276 // Can't write to this memory, it is only executable:
1277 // *((unsigned char*)ptr) = '\0';
1278
1279 freeExec(ptr);
1280 }
1281
1282 #endif // !USE_LIBFFI_FOR_ADJUSTORS