Update README with a new port
[libffi.git] / src / aarch64 / ffi.c
1 /* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
2
3 Permission is hereby granted, free of charge, to any person obtaining
4 a copy of this software and associated documentation files (the
5 ``Software''), to deal in the Software without restriction, including
6 without limitation the rights to use, copy, modify, merge, publish,
7 distribute, sublicense, and/or sell copies of the Software, and to
8 permit persons to whom the Software is furnished to do so, subject to
9 the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
21
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <stdint.h>
25 #include <fficonfig.h>
26 #include <ffi.h>
27 #include <ffi_common.h>
28 #include "internal.h"
29
30 /* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
31 all further uses in this file will refer to the 128-bit type. */
32 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
33 # if FFI_TYPE_LONGDOUBLE != 4
34 # error FFI_TYPE_LONGDOUBLE out of date
35 # endif
36 #else
37 # undef FFI_TYPE_LONGDOUBLE
38 # define FFI_TYPE_LONGDOUBLE 4
39 #endif
40
41 union _d
42 {
43 UINT64 d;
44 UINT32 s[2];
45 };
46
47 struct _v
48 {
49 union _d d[2] __attribute__((aligned(16)));
50 };
51
52 struct call_context
53 {
54 struct _v v[N_V_ARG_REG];
55 UINT64 x[N_X_ARG_REG];
56 };
57
58 #if defined (__clang__) && defined (__APPLE__)
59 extern void sys_icache_invalidate (void *start, size_t len);
60 #endif
61
62 static inline void
63 ffi_clear_cache (void *start, void *end)
64 {
65 #if defined (__clang__) && defined (__APPLE__)
66 sys_icache_invalidate (start, (char *)end - (char *)start);
67 #elif defined (__GNUC__)
68 __builtin___clear_cache (start, end);
69 #else
70 #error "Missing builtin to flush instruction cache"
71 #endif
72 }
73
74 #if FFI_EXEC_TRAMPOLINE_TABLE
75
76 #ifdef __MACH__
77 #include <mach/vm_param.h>
78 #endif
79
80 #endif
81
82 /* A subroutine of is_vfp_type. Given a structure type, return the type code
83 of the first non-structure element. Recurse for structure elements.
84 Return -1 if the structure is in fact empty, i.e. no nested elements. */
85
86 static int
87 is_hfa0 (const ffi_type *ty)
88 {
89 ffi_type **elements = ty->elements;
90 int i, ret = -1;
91
92 if (elements != NULL)
93 for (i = 0; elements[i]; ++i)
94 {
95 ret = elements[i]->type;
96 if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX)
97 {
98 ret = is_hfa0 (elements[i]);
99 if (ret < 0)
100 continue;
101 }
102 break;
103 }
104
105 return ret;
106 }
107
108 /* A subroutine of is_vfp_type. Given a structure type, return true if all
109 of the non-structure elements are the same as CANDIDATE. */
110
111 static int
112 is_hfa1 (const ffi_type *ty, int candidate)
113 {
114 ffi_type **elements = ty->elements;
115 int i;
116
117 if (elements != NULL)
118 for (i = 0; elements[i]; ++i)
119 {
120 int t = elements[i]->type;
121 if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
122 {
123 if (!is_hfa1 (elements[i], candidate))
124 return 0;
125 }
126 else if (t != candidate)
127 return 0;
128 }
129
130 return 1;
131 }
132
133 /* Determine if TY may be allocated to the FP registers. This is both an
134 fp scalar type as well as an homogenous floating point aggregate (HFA).
135 That is, a structure consisting of 1 to 4 members of all the same type,
136 where that type is an fp scalar.
137
138 Returns non-zero iff TY is an HFA. The result is the AARCH64_RET_*
139 constant for the type. */
140
141 static int
142 is_vfp_type (const ffi_type *ty)
143 {
144 ffi_type **elements;
145 int candidate, i;
146 size_t size, ele_count;
147
148 /* Quickest tests first. */
149 candidate = ty->type;
150 switch (candidate)
151 {
152 default:
153 return 0;
154 case FFI_TYPE_FLOAT:
155 case FFI_TYPE_DOUBLE:
156 case FFI_TYPE_LONGDOUBLE:
157 ele_count = 1;
158 goto done;
159 case FFI_TYPE_COMPLEX:
160 candidate = ty->elements[0]->type;
161 switch (candidate)
162 {
163 case FFI_TYPE_FLOAT:
164 case FFI_TYPE_DOUBLE:
165 case FFI_TYPE_LONGDOUBLE:
166 ele_count = 2;
167 goto done;
168 }
169 return 0;
170 case FFI_TYPE_STRUCT:
171 break;
172 }
173
174 /* No HFA types are smaller than 4 bytes, or larger than 64 bytes. */
175 size = ty->size;
176 if (size < 4 || size > 64)
177 return 0;
178
179 /* Find the type of the first non-structure member. */
180 elements = ty->elements;
181 candidate = elements[0]->type;
182 if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX)
183 {
184 for (i = 0; ; ++i)
185 {
186 candidate = is_hfa0 (elements[i]);
187 if (candidate >= 0)
188 break;
189 }
190 }
191
192 /* If the first member is not a floating point type, it's not an HFA.
193 Also quickly re-check the size of the structure. */
194 switch (candidate)
195 {
196 case FFI_TYPE_FLOAT:
197 ele_count = size / sizeof(float);
198 if (size != ele_count * sizeof(float))
199 return 0;
200 break;
201 case FFI_TYPE_DOUBLE:
202 ele_count = size / sizeof(double);
203 if (size != ele_count * sizeof(double))
204 return 0;
205 break;
206 case FFI_TYPE_LONGDOUBLE:
207 ele_count = size / sizeof(long double);
208 if (size != ele_count * sizeof(long double))
209 return 0;
210 break;
211 default:
212 return 0;
213 }
214 if (ele_count > 4)
215 return 0;
216
217 /* Finally, make sure that all scalar elements are the same type. */
218 for (i = 0; elements[i]; ++i)
219 {
220 int t = elements[i]->type;
221 if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
222 {
223 if (!is_hfa1 (elements[i], candidate))
224 return 0;
225 }
226 else if (t != candidate)
227 return 0;
228 }
229
230 /* All tests succeeded. Encode the result. */
231 done:
232 return candidate * 4 + (4 - ele_count);
233 }
234
235 /* Representation of the procedure call argument marshalling
236 state.
237
238 The terse state variable names match the names used in the AARCH64
239 PCS. */
240
241 struct arg_state
242 {
243 unsigned ngrn; /* Next general-purpose register number. */
244 unsigned nsrn; /* Next vector register number. */
245 size_t nsaa; /* Next stack offset. */
246
247 #if defined (__APPLE__)
248 unsigned allocating_variadic;
249 #endif
250 };
251
252 /* Initialize a procedure call argument marshalling state. */
253 static void
254 arg_init (struct arg_state *state)
255 {
256 state->ngrn = 0;
257 state->nsrn = 0;
258 state->nsaa = 0;
259 #if defined (__APPLE__)
260 state->allocating_variadic = 0;
261 #endif
262 }
263
264 /* Allocate an aligned slot on the stack and return a pointer to it. */
265 static void *
266 allocate_to_stack (struct arg_state *state, void *stack,
267 size_t alignment, size_t size)
268 {
269 size_t nsaa = state->nsaa;
270
271 /* Round up the NSAA to the larger of 8 or the natural
272 alignment of the argument's type. */
273 #if defined (__APPLE__)
274 if (state->allocating_variadic && alignment < 8)
275 alignment = 8;
276 #else
277 if (alignment < 8)
278 alignment = 8;
279 #endif
280
281 nsaa = ALIGN (nsaa, alignment);
282 state->nsaa = nsaa + size;
283
284 return (char *)stack + nsaa;
285 }
286
287 static ffi_arg
288 extend_integer_type (void *source, int type)
289 {
290 switch (type)
291 {
292 case FFI_TYPE_UINT8:
293 return *(UINT8 *) source;
294 case FFI_TYPE_SINT8:
295 return *(SINT8 *) source;
296 case FFI_TYPE_UINT16:
297 return *(UINT16 *) source;
298 case FFI_TYPE_SINT16:
299 return *(SINT16 *) source;
300 case FFI_TYPE_UINT32:
301 return *(UINT32 *) source;
302 case FFI_TYPE_INT:
303 case FFI_TYPE_SINT32:
304 return *(SINT32 *) source;
305 case FFI_TYPE_UINT64:
306 case FFI_TYPE_SINT64:
307 return *(UINT64 *) source;
308 break;
309 case FFI_TYPE_POINTER:
310 return *(uintptr_t *) source;
311 default:
312 abort();
313 }
314 }
315
316 static void
317 extend_hfa_type (void *dest, void *src, int h)
318 {
319 int f = h - AARCH64_RET_S4;
320 void *x0;
321
322 asm volatile (
323 "adr %0, 0f\n"
324 " add %0, %0, %1\n"
325 " br %0\n"
326 "0: ldp s16, s17, [%3]\n" /* S4 */
327 " ldp s18, s19, [%3, #8]\n"
328 " b 4f\n"
329 " ldp s16, s17, [%3]\n" /* S3 */
330 " ldr s18, [%3, #8]\n"
331 " b 3f\n"
332 " ldp s16, s17, [%3]\n" /* S2 */
333 " b 2f\n"
334 " nop\n"
335 " ldr s16, [%3]\n" /* S1 */
336 " b 1f\n"
337 " nop\n"
338 " ldp d16, d17, [%3]\n" /* D4 */
339 " ldp d18, d19, [%3, #16]\n"
340 " b 4f\n"
341 " ldp d16, d17, [%3]\n" /* D3 */
342 " ldr d18, [%3, #16]\n"
343 " b 3f\n"
344 " ldp d16, d17, [%3]\n" /* D2 */
345 " b 2f\n"
346 " nop\n"
347 " ldr d16, [%3]\n" /* D1 */
348 " b 1f\n"
349 " nop\n"
350 " ldp q16, q17, [%3]\n" /* Q4 */
351 " ldp q18, q19, [%3, #16]\n"
352 " b 4f\n"
353 " ldp q16, q17, [%3]\n" /* Q3 */
354 " ldr q18, [%3, #16]\n"
355 " b 3f\n"
356 " ldp q16, q17, [%3]\n" /* Q2 */
357 " b 2f\n"
358 " nop\n"
359 " ldr q16, [%3]\n" /* Q1 */
360 " b 1f\n"
361 "4: str q19, [%2, #48]\n"
362 "3: str q18, [%2, #32]\n"
363 "2: str q17, [%2, #16]\n"
364 "1: str q16, [%2]"
365 : "=&r"(x0)
366 : "r"(f * 12), "r"(dest), "r"(src)
367 : "memory", "v16", "v17", "v18", "v19");
368 }
369
370 static void *
371 compress_hfa_type (void *dest, void *reg, int h)
372 {
373 switch (h)
374 {
375 case AARCH64_RET_S1:
376 if (dest == reg)
377 {
378 #ifdef __AARCH64EB__
379 dest += 12;
380 #endif
381 }
382 else
383 *(float *)dest = *(float *)reg;
384 break;
385 case AARCH64_RET_S2:
386 asm ("ldp q16, q17, [%1]\n\t"
387 "st2 { v16.s, v17.s }[0], [%0]"
388 : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
389 break;
390 case AARCH64_RET_S3:
391 asm ("ldp q16, q17, [%1]\n\t"
392 "ldr q18, [%1, #32]\n\t"
393 "st3 { v16.s, v17.s, v18.s }[0], [%0]"
394 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
395 break;
396 case AARCH64_RET_S4:
397 asm ("ldp q16, q17, [%1]\n\t"
398 "ldp q18, q19, [%1, #32]\n\t"
399 "st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]"
400 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
401 break;
402
403 case AARCH64_RET_D1:
404 if (dest == reg)
405 {
406 #ifdef __AARCH64EB__
407 dest += 8;
408 #endif
409 }
410 else
411 *(double *)dest = *(double *)reg;
412 break;
413 case AARCH64_RET_D2:
414 asm ("ldp q16, q17, [%1]\n\t"
415 "st2 { v16.d, v17.d }[0], [%0]"
416 : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
417 break;
418 case AARCH64_RET_D3:
419 asm ("ldp q16, q17, [%1]\n\t"
420 "ldr q18, [%1, #32]\n\t"
421 "st3 { v16.d, v17.d, v18.d }[0], [%0]"
422 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
423 break;
424 case AARCH64_RET_D4:
425 asm ("ldp q16, q17, [%1]\n\t"
426 "ldp q18, q19, [%1, #32]\n\t"
427 "st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]"
428 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
429 break;
430
431 default:
432 if (dest != reg)
433 return memcpy (dest, reg, 16 * (4 - (h & 3)));
434 break;
435 }
436 return dest;
437 }
438
439 /* Either allocate an appropriate register for the argument type, or if
440 none are available, allocate a stack slot and return a pointer
441 to the allocated space. */
442
443 static void *
444 allocate_int_to_reg_or_stack (struct call_context *context,
445 struct arg_state *state,
446 void *stack, size_t size)
447 {
448 if (state->ngrn < N_X_ARG_REG)
449 return &context->x[state->ngrn++];
450
451 state->ngrn = N_X_ARG_REG;
452 return allocate_to_stack (state, stack, size, size);
453 }
454
455 ffi_status
456 ffi_prep_cif_machdep (ffi_cif *cif)
457 {
458 ffi_type *rtype = cif->rtype;
459 size_t bytes = cif->bytes;
460 int flags, i, n;
461
462 switch (rtype->type)
463 {
464 case FFI_TYPE_VOID:
465 flags = AARCH64_RET_VOID;
466 break;
467 case FFI_TYPE_UINT8:
468 flags = AARCH64_RET_UINT8;
469 break;
470 case FFI_TYPE_UINT16:
471 flags = AARCH64_RET_UINT16;
472 break;
473 case FFI_TYPE_UINT32:
474 flags = AARCH64_RET_UINT32;
475 break;
476 case FFI_TYPE_SINT8:
477 flags = AARCH64_RET_SINT8;
478 break;
479 case FFI_TYPE_SINT16:
480 flags = AARCH64_RET_SINT16;
481 break;
482 case FFI_TYPE_INT:
483 case FFI_TYPE_SINT32:
484 flags = AARCH64_RET_SINT32;
485 break;
486 case FFI_TYPE_SINT64:
487 case FFI_TYPE_UINT64:
488 flags = AARCH64_RET_INT64;
489 break;
490 case FFI_TYPE_POINTER:
491 flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64);
492 break;
493
494 case FFI_TYPE_FLOAT:
495 case FFI_TYPE_DOUBLE:
496 case FFI_TYPE_LONGDOUBLE:
497 case FFI_TYPE_STRUCT:
498 case FFI_TYPE_COMPLEX:
499 flags = is_vfp_type (rtype);
500 if (flags == 0)
501 {
502 size_t s = rtype->size;
503 if (s > 16)
504 {
505 flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
506 bytes += 8;
507 }
508 else if (s == 16)
509 flags = AARCH64_RET_INT128;
510 else if (s == 8)
511 flags = AARCH64_RET_INT64;
512 else
513 flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
514 }
515 break;
516
517 default:
518 abort();
519 }
520
521 for (i = 0, n = cif->nargs; i < n; i++)
522 if (is_vfp_type (cif->arg_types[i]))
523 {
524 flags |= AARCH64_FLAG_ARG_V;
525 break;
526 }
527
528 /* Round the stack up to a multiple of the stack alignment requirement. */
529 cif->bytes = ALIGN(bytes, 16);
530 cif->flags = flags;
531 #if defined (__APPLE__)
532 cif->aarch64_nfixedargs = 0;
533 #endif
534
535 return FFI_OK;
536 }
537
538 #if defined (__APPLE__)
539 /* Perform Apple-specific cif processing for variadic calls */
540 ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
541 unsigned int nfixedargs,
542 unsigned int ntotalargs)
543 {
544 ffi_status status = ffi_prep_cif_machdep (cif);
545 cif->aarch64_nfixedargs = nfixedargs;
546 return status;
547 }
548 #endif /* __APPLE__ */
549
550 extern void ffi_call_SYSV (struct call_context *context, void *frame,
551 void (*fn)(void), void *rvalue, int flags,
552 void *closure) FFI_HIDDEN;
553
554 /* Call a function with the provided arguments and capture the return
555 value. */
556 static void
557 ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
558 void **avalue, void *closure)
559 {
560 struct call_context *context;
561 void *stack, *frame, *rvalue;
562 struct arg_state state;
563 size_t stack_bytes, rtype_size, rsize;
564 int i, nargs, flags;
565 ffi_type *rtype;
566
567 flags = cif->flags;
568 rtype = cif->rtype;
569 rtype_size = rtype->size;
570 stack_bytes = cif->bytes;
571
572 /* If the target function returns a structure via hidden pointer,
573 then we cannot allow a null rvalue. Otherwise, mash a null
574 rvalue to void return type. */
575 rsize = 0;
576 if (flags & AARCH64_RET_IN_MEM)
577 {
578 if (orig_rvalue == NULL)
579 rsize = rtype_size;
580 }
581 else if (orig_rvalue == NULL)
582 flags &= AARCH64_FLAG_ARG_V;
583 else if (flags & AARCH64_RET_NEED_COPY)
584 rsize = 16;
585
586 /* Allocate consectutive stack for everything we'll need. */
587 context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize);
588 stack = context + 1;
589 frame = stack + stack_bytes;
590 rvalue = (rsize ? frame + 32 : orig_rvalue);
591
592 arg_init (&state);
593 for (i = 0, nargs = cif->nargs; i < nargs; i++)
594 {
595 ffi_type *ty = cif->arg_types[i];
596 size_t s = ty->size;
597 void *a = avalue[i];
598 int h, t;
599
600 t = ty->type;
601 switch (t)
602 {
603 case FFI_TYPE_VOID:
604 FFI_ASSERT (0);
605 break;
606
607 /* If the argument is a basic type the argument is allocated to an
608 appropriate register, or if none are available, to the stack. */
609 case FFI_TYPE_INT:
610 case FFI_TYPE_UINT8:
611 case FFI_TYPE_SINT8:
612 case FFI_TYPE_UINT16:
613 case FFI_TYPE_SINT16:
614 case FFI_TYPE_UINT32:
615 case FFI_TYPE_SINT32:
616 case FFI_TYPE_UINT64:
617 case FFI_TYPE_SINT64:
618 case FFI_TYPE_POINTER:
619 do_pointer:
620 {
621 ffi_arg ext = extend_integer_type (a, t);
622 if (state.ngrn < N_X_ARG_REG)
623 context->x[state.ngrn++] = ext;
624 else
625 {
626 void *d = allocate_to_stack (&state, stack, ty->alignment, s);
627 state.ngrn = N_X_ARG_REG;
628 /* Note that the default abi extends each argument
629 to a full 64-bit slot, while the iOS abi allocates
630 only enough space. */
631 #ifdef __APPLE__
632 memcpy(d, a, s);
633 #else
634 *(ffi_arg *)d = ext;
635 #endif
636 }
637 }
638 break;
639
640 case FFI_TYPE_FLOAT:
641 case FFI_TYPE_DOUBLE:
642 case FFI_TYPE_LONGDOUBLE:
643 case FFI_TYPE_STRUCT:
644 case FFI_TYPE_COMPLEX:
645 {
646 void *dest;
647
648 h = is_vfp_type (ty);
649 if (h)
650 {
651 int elems = 4 - (h & 3);
652 if (state.nsrn + elems <= N_V_ARG_REG)
653 {
654 dest = &context->v[state.nsrn];
655 state.nsrn += elems;
656 extend_hfa_type (dest, a, h);
657 break;
658 }
659 state.nsrn = N_V_ARG_REG;
660 dest = allocate_to_stack (&state, stack, ty->alignment, s);
661 }
662 else if (s > 16)
663 {
664 /* If the argument is a composite type that is larger than 16
665 bytes, then the argument has been copied to memory, and
666 the argument is replaced by a pointer to the copy. */
667 a = &avalue[i];
668 t = FFI_TYPE_POINTER;
669 goto do_pointer;
670 }
671 else
672 {
673 size_t n = (s + 7) / 8;
674 if (state.ngrn + n <= N_X_ARG_REG)
675 {
676 /* If the argument is a composite type and the size in
677 double-words is not more than the number of available
678 X registers, then the argument is copied into
679 consecutive X registers. */
680 dest = &context->x[state.ngrn];
681 state.ngrn += n;
682 }
683 else
684 {
685 /* Otherwise, there are insufficient X registers. Further
686 X register allocations are prevented, the NSAA is
687 adjusted and the argument is copied to memory at the
688 adjusted NSAA. */
689 state.ngrn = N_X_ARG_REG;
690 dest = allocate_to_stack (&state, stack, ty->alignment, s);
691 }
692 }
693 memcpy (dest, a, s);
694 }
695 break;
696
697 default:
698 abort();
699 }
700
701 #if defined (__APPLE__)
702 if (i + 1 == cif->aarch64_nfixedargs)
703 {
704 state.ngrn = N_X_ARG_REG;
705 state.nsrn = N_V_ARG_REG;
706 state.allocating_variadic = 1;
707 }
708 #endif
709 }
710
711 ffi_call_SYSV (context, frame, fn, rvalue, flags, closure);
712
713 if (flags & AARCH64_RET_NEED_COPY)
714 memcpy (orig_rvalue, rvalue, rtype_size);
715 }
716
717 void
718 ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
719 {
720 ffi_call_int (cif, fn, rvalue, avalue, NULL);
721 }
722
723 #ifdef FFI_GO_CLOSURES
724 void
725 ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
726 void **avalue, void *closure)
727 {
728 ffi_call_int (cif, fn, rvalue, avalue, closure);
729 }
730 #endif /* FFI_GO_CLOSURES */
731
732 /* Build a trampoline. */
733
734 extern void ffi_closure_SYSV (void) FFI_HIDDEN;
735 extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
736
737 ffi_status
738 ffi_prep_closure_loc (ffi_closure *closure,
739 ffi_cif* cif,
740 void (*fun)(ffi_cif*,void*,void**,void*),
741 void *user_data,
742 void *codeloc)
743 {
744 if (cif->abi != FFI_SYSV)
745 return FFI_BAD_ABI;
746
747 void (*start)(void);
748
749 if (cif->flags & AARCH64_FLAG_ARG_V)
750 start = ffi_closure_SYSV_V;
751 else
752 start = ffi_closure_SYSV;
753
754 #if FFI_EXEC_TRAMPOLINE_TABLE
755 #ifdef __MACH__
756 void **config = (void **)((uint8_t *)codeloc - PAGE_MAX_SIZE);
757 config[0] = closure;
758 config[1] = start;
759 #endif
760 #else
761 static const unsigned char trampoline[16] = {
762 0x90, 0x00, 0x00, 0x58, /* ldr x16, tramp+16 */
763 0xf1, 0xff, 0xff, 0x10, /* adr x17, tramp+0 */
764 0x00, 0x02, 0x1f, 0xd6 /* br x16 */
765 };
766 char *tramp = closure->tramp;
767
768 memcpy (tramp, trampoline, sizeof(trampoline));
769
770 *(UINT64 *)(tramp + 16) = (uintptr_t)start;
771
772 ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
773 #endif
774
775 closure->cif = cif;
776 closure->fun = fun;
777 closure->user_data = user_data;
778
779 return FFI_OK;
780 }
781
782 #ifdef FFI_GO_CLOSURES
783 extern void ffi_go_closure_SYSV (void) FFI_HIDDEN;
784 extern void ffi_go_closure_SYSV_V (void) FFI_HIDDEN;
785
786 ffi_status
787 ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif* cif,
788 void (*fun)(ffi_cif*,void*,void**,void*))
789 {
790 void (*start)(void);
791
792 if (cif->abi != FFI_SYSV)
793 return FFI_BAD_ABI;
794
795 if (cif->flags & AARCH64_FLAG_ARG_V)
796 start = ffi_go_closure_SYSV_V;
797 else
798 start = ffi_go_closure_SYSV;
799
800 closure->tramp = start;
801 closure->cif = cif;
802 closure->fun = fun;
803
804 return FFI_OK;
805 }
806 #endif /* FFI_GO_CLOSURES */
807
808 /* Primary handler to setup and invoke a function within a closure.
809
810 A closure when invoked enters via the assembler wrapper
811 ffi_closure_SYSV(). The wrapper allocates a call context on the
812 stack, saves the interesting registers (from the perspective of
813 the calling convention) into the context then passes control to
814 ffi_closure_SYSV_inner() passing the saved context and a pointer to
815 the stack at the point ffi_closure_SYSV() was invoked.
816
817 On the return path the assembler wrapper will reload call context
818 registers.
819
820 ffi_closure_SYSV_inner() marshalls the call context into ffi value
821 descriptors, invokes the wrapped function, then marshalls the return
822 value back into the call context. */
823
824 int FFI_HIDDEN
825 ffi_closure_SYSV_inner (ffi_cif *cif,
826 void (*fun)(ffi_cif*,void*,void**,void*),
827 void *user_data,
828 struct call_context *context,
829 void *stack, void *rvalue, void *struct_rvalue)
830 {
831 void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
832 int i, h, nargs, flags;
833 struct arg_state state;
834
835 arg_init (&state);
836
837 for (i = 0, nargs = cif->nargs; i < nargs; i++)
838 {
839 ffi_type *ty = cif->arg_types[i];
840 int t = ty->type;
841 size_t n, s = ty->size;
842
843 switch (t)
844 {
845 case FFI_TYPE_VOID:
846 FFI_ASSERT (0);
847 break;
848
849 case FFI_TYPE_INT:
850 case FFI_TYPE_UINT8:
851 case FFI_TYPE_SINT8:
852 case FFI_TYPE_UINT16:
853 case FFI_TYPE_SINT16:
854 case FFI_TYPE_UINT32:
855 case FFI_TYPE_SINT32:
856 case FFI_TYPE_UINT64:
857 case FFI_TYPE_SINT64:
858 case FFI_TYPE_POINTER:
859 avalue[i] = allocate_int_to_reg_or_stack (context, &state, stack, s);
860 break;
861
862 case FFI_TYPE_FLOAT:
863 case FFI_TYPE_DOUBLE:
864 case FFI_TYPE_LONGDOUBLE:
865 case FFI_TYPE_STRUCT:
866 case FFI_TYPE_COMPLEX:
867 h = is_vfp_type (ty);
868 if (h)
869 {
870 n = 4 - (h & 3);
871 if (state.nsrn + n <= N_V_ARG_REG)
872 {
873 void *reg = &context->v[state.nsrn];
874 state.nsrn += n;
875
876 /* Eeek! We need a pointer to the structure, however the
877 homogeneous float elements are being passed in individual
878 registers, therefore for float and double the structure
879 is not represented as a contiguous sequence of bytes in
880 our saved register context. We don't need the original
881 contents of the register storage, so we reformat the
882 structure into the same memory. */
883 avalue[i] = compress_hfa_type (reg, reg, h);
884 }
885 else
886 {
887 state.nsrn = N_V_ARG_REG;
888 avalue[i] = allocate_to_stack (&state, stack,
889 ty->alignment, s);
890 }
891 }
892 else if (s > 16)
893 {
894 /* Replace Composite type of size greater than 16 with a
895 pointer. */
896 avalue[i] = *(void **)
897 allocate_int_to_reg_or_stack (context, &state, stack,
898 sizeof (void *));
899 }
900 else
901 {
902 n = (s + 7) / 8;
903 if (state.ngrn + n <= N_X_ARG_REG)
904 {
905 avalue[i] = &context->x[state.ngrn];
906 state.ngrn += n;
907 }
908 else
909 {
910 state.ngrn = N_X_ARG_REG;
911 avalue[i] = allocate_to_stack (&state, stack,
912 ty->alignment, s);
913 }
914 }
915 break;
916
917 default:
918 abort();
919 }
920 }
921
922 flags = cif->flags;
923 if (flags & AARCH64_RET_IN_MEM)
924 rvalue = struct_rvalue;
925
926 fun (cif, rvalue, avalue, user_data);
927
928 return flags;
929 }