Merge pull request #238 from KubaKaszycki/master
[libffi.git] / src / aarch64 / ffi.c
1 /* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
2
3 Permission is hereby granted, free of charge, to any person obtaining
4 a copy of this software and associated documentation files (the
5 ``Software''), to deal in the Software without restriction, including
6 without limitation the rights to use, copy, modify, merge, publish,
7 distribute, sublicense, and/or sell copies of the Software, and to
8 permit persons to whom the Software is furnished to do so, subject to
9 the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
21
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <stdint.h>
25 #include <fficonfig.h>
26 #include <ffi.h>
27 #include <ffi_common.h>
28 #include "internal.h"
29
30 /* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
31 all further uses in this file will refer to the 128-bit type. */
32 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
33 # if FFI_TYPE_LONGDOUBLE != 4
34 # error FFI_TYPE_LONGDOUBLE out of date
35 # endif
36 #else
37 # undef FFI_TYPE_LONGDOUBLE
38 # define FFI_TYPE_LONGDOUBLE 4
39 #endif
40
41 union _d
42 {
43 UINT64 d;
44 UINT32 s[2];
45 };
46
47 struct _v
48 {
49 union _d d[2] __attribute__((aligned(16)));
50 };
51
52 struct call_context
53 {
54 struct _v v[N_V_ARG_REG];
55 UINT64 x[N_X_ARG_REG];
56 };
57
58 #if FFI_EXEC_TRAMPOLINE_TABLE
59
60 #ifdef __MACH__
61 #include <mach/vm_param.h>
62 #endif
63
64 #else
65
66 #if defined (__clang__) && defined (__APPLE__)
67 extern void sys_icache_invalidate (void *start, size_t len);
68 #endif
69
70 static inline void
71 ffi_clear_cache (void *start, void *end)
72 {
73 #if defined (__clang__) && defined (__APPLE__)
74 sys_icache_invalidate (start, (char *)end - (char *)start);
75 #elif defined (__GNUC__)
76 __builtin___clear_cache (start, end);
77 #else
78 #error "Missing builtin to flush instruction cache"
79 #endif
80 }
81
82 #endif
83
84 /* A subroutine of is_vfp_type. Given a structure type, return the type code
85 of the first non-structure element. Recurse for structure elements.
86 Return -1 if the structure is in fact empty, i.e. no nested elements. */
87
88 static int
89 is_hfa0 (const ffi_type *ty)
90 {
91 ffi_type **elements = ty->elements;
92 int i, ret = -1;
93
94 if (elements != NULL)
95 for (i = 0; elements[i]; ++i)
96 {
97 ret = elements[i]->type;
98 if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX)
99 {
100 ret = is_hfa0 (elements[i]);
101 if (ret < 0)
102 continue;
103 }
104 break;
105 }
106
107 return ret;
108 }
109
110 /* A subroutine of is_vfp_type. Given a structure type, return true if all
111 of the non-structure elements are the same as CANDIDATE. */
112
113 static int
114 is_hfa1 (const ffi_type *ty, int candidate)
115 {
116 ffi_type **elements = ty->elements;
117 int i;
118
119 if (elements != NULL)
120 for (i = 0; elements[i]; ++i)
121 {
122 int t = elements[i]->type;
123 if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
124 {
125 if (!is_hfa1 (elements[i], candidate))
126 return 0;
127 }
128 else if (t != candidate)
129 return 0;
130 }
131
132 return 1;
133 }
134
135 /* Determine if TY may be allocated to the FP registers. This is both an
136 fp scalar type as well as an homogenous floating point aggregate (HFA).
137 That is, a structure consisting of 1 to 4 members of all the same type,
138 where that type is an fp scalar.
139
140 Returns non-zero iff TY is an HFA. The result is the AARCH64_RET_*
141 constant for the type. */
142
143 static int
144 is_vfp_type (const ffi_type *ty)
145 {
146 ffi_type **elements;
147 int candidate, i;
148 size_t size, ele_count;
149
150 /* Quickest tests first. */
151 candidate = ty->type;
152 switch (candidate)
153 {
154 default:
155 return 0;
156 case FFI_TYPE_FLOAT:
157 case FFI_TYPE_DOUBLE:
158 case FFI_TYPE_LONGDOUBLE:
159 ele_count = 1;
160 goto done;
161 case FFI_TYPE_COMPLEX:
162 candidate = ty->elements[0]->type;
163 switch (candidate)
164 {
165 case FFI_TYPE_FLOAT:
166 case FFI_TYPE_DOUBLE:
167 case FFI_TYPE_LONGDOUBLE:
168 ele_count = 2;
169 goto done;
170 }
171 return 0;
172 case FFI_TYPE_STRUCT:
173 break;
174 }
175
176 /* No HFA types are smaller than 4 bytes, or larger than 64 bytes. */
177 size = ty->size;
178 if (size < 4 || size > 64)
179 return 0;
180
181 /* Find the type of the first non-structure member. */
182 elements = ty->elements;
183 candidate = elements[0]->type;
184 if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX)
185 {
186 for (i = 0; ; ++i)
187 {
188 candidate = is_hfa0 (elements[i]);
189 if (candidate >= 0)
190 break;
191 }
192 }
193
194 /* If the first member is not a floating point type, it's not an HFA.
195 Also quickly re-check the size of the structure. */
196 switch (candidate)
197 {
198 case FFI_TYPE_FLOAT:
199 ele_count = size / sizeof(float);
200 if (size != ele_count * sizeof(float))
201 return 0;
202 break;
203 case FFI_TYPE_DOUBLE:
204 ele_count = size / sizeof(double);
205 if (size != ele_count * sizeof(double))
206 return 0;
207 break;
208 case FFI_TYPE_LONGDOUBLE:
209 ele_count = size / sizeof(long double);
210 if (size != ele_count * sizeof(long double))
211 return 0;
212 break;
213 default:
214 return 0;
215 }
216 if (ele_count > 4)
217 return 0;
218
219 /* Finally, make sure that all scalar elements are the same type. */
220 for (i = 0; elements[i]; ++i)
221 {
222 int t = elements[i]->type;
223 if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
224 {
225 if (!is_hfa1 (elements[i], candidate))
226 return 0;
227 }
228 else if (t != candidate)
229 return 0;
230 }
231
232 /* All tests succeeded. Encode the result. */
233 done:
234 return candidate * 4 + (4 - ele_count);
235 }
236
237 /* Representation of the procedure call argument marshalling
238 state.
239
240 The terse state variable names match the names used in the AARCH64
241 PCS. */
242
243 struct arg_state
244 {
245 unsigned ngrn; /* Next general-purpose register number. */
246 unsigned nsrn; /* Next vector register number. */
247 size_t nsaa; /* Next stack offset. */
248
249 #if defined (__APPLE__)
250 unsigned allocating_variadic;
251 #endif
252 };
253
254 /* Initialize a procedure call argument marshalling state. */
255 static void
256 arg_init (struct arg_state *state)
257 {
258 state->ngrn = 0;
259 state->nsrn = 0;
260 state->nsaa = 0;
261 #if defined (__APPLE__)
262 state->allocating_variadic = 0;
263 #endif
264 }
265
266 /* Allocate an aligned slot on the stack and return a pointer to it. */
267 static void *
268 allocate_to_stack (struct arg_state *state, void *stack,
269 size_t alignment, size_t size)
270 {
271 size_t nsaa = state->nsaa;
272
273 /* Round up the NSAA to the larger of 8 or the natural
274 alignment of the argument's type. */
275 #if defined (__APPLE__)
276 if (state->allocating_variadic && alignment < 8)
277 alignment = 8;
278 #else
279 if (alignment < 8)
280 alignment = 8;
281 #endif
282
283 nsaa = ALIGN (nsaa, alignment);
284 state->nsaa = nsaa + size;
285
286 return (char *)stack + nsaa;
287 }
288
289 static ffi_arg
290 extend_integer_type (void *source, int type)
291 {
292 switch (type)
293 {
294 case FFI_TYPE_UINT8:
295 return *(UINT8 *) source;
296 case FFI_TYPE_SINT8:
297 return *(SINT8 *) source;
298 case FFI_TYPE_UINT16:
299 return *(UINT16 *) source;
300 case FFI_TYPE_SINT16:
301 return *(SINT16 *) source;
302 case FFI_TYPE_UINT32:
303 return *(UINT32 *) source;
304 case FFI_TYPE_INT:
305 case FFI_TYPE_SINT32:
306 return *(SINT32 *) source;
307 case FFI_TYPE_UINT64:
308 case FFI_TYPE_SINT64:
309 return *(UINT64 *) source;
310 break;
311 case FFI_TYPE_POINTER:
312 return *(uintptr_t *) source;
313 default:
314 abort();
315 }
316 }
317
318 static void
319 extend_hfa_type (void *dest, void *src, int h)
320 {
321 ssize_t f = h - AARCH64_RET_S4;
322 void *x0;
323
324 asm volatile (
325 "adr %0, 0f\n"
326 " add %0, %0, %1\n"
327 " br %0\n"
328 "0: ldp s16, s17, [%3]\n" /* S4 */
329 " ldp s18, s19, [%3, #8]\n"
330 " b 4f\n"
331 " ldp s16, s17, [%3]\n" /* S3 */
332 " ldr s18, [%3, #8]\n"
333 " b 3f\n"
334 " ldp s16, s17, [%3]\n" /* S2 */
335 " b 2f\n"
336 " nop\n"
337 " ldr s16, [%3]\n" /* S1 */
338 " b 1f\n"
339 " nop\n"
340 " ldp d16, d17, [%3]\n" /* D4 */
341 " ldp d18, d19, [%3, #16]\n"
342 " b 4f\n"
343 " ldp d16, d17, [%3]\n" /* D3 */
344 " ldr d18, [%3, #16]\n"
345 " b 3f\n"
346 " ldp d16, d17, [%3]\n" /* D2 */
347 " b 2f\n"
348 " nop\n"
349 " ldr d16, [%3]\n" /* D1 */
350 " b 1f\n"
351 " nop\n"
352 " ldp q16, q17, [%3]\n" /* Q4 */
353 " ldp q18, q19, [%3, #16]\n"
354 " b 4f\n"
355 " ldp q16, q17, [%3]\n" /* Q3 */
356 " ldr q18, [%3, #16]\n"
357 " b 3f\n"
358 " ldp q16, q17, [%3]\n" /* Q2 */
359 " b 2f\n"
360 " nop\n"
361 " ldr q16, [%3]\n" /* Q1 */
362 " b 1f\n"
363 "4: str q19, [%2, #48]\n"
364 "3: str q18, [%2, #32]\n"
365 "2: str q17, [%2, #16]\n"
366 "1: str q16, [%2]"
367 : "=&r"(x0)
368 : "r"(f * 12), "r"(dest), "r"(src)
369 : "memory", "v16", "v17", "v18", "v19");
370 }
371
372 static void *
373 compress_hfa_type (void *dest, void *reg, int h)
374 {
375 switch (h)
376 {
377 case AARCH64_RET_S1:
378 if (dest == reg)
379 {
380 #ifdef __AARCH64EB__
381 dest += 12;
382 #endif
383 }
384 else
385 *(float *)dest = *(float *)reg;
386 break;
387 case AARCH64_RET_S2:
388 asm ("ldp q16, q17, [%1]\n\t"
389 "st2 { v16.s, v17.s }[0], [%0]"
390 : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
391 break;
392 case AARCH64_RET_S3:
393 asm ("ldp q16, q17, [%1]\n\t"
394 "ldr q18, [%1, #32]\n\t"
395 "st3 { v16.s, v17.s, v18.s }[0], [%0]"
396 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
397 break;
398 case AARCH64_RET_S4:
399 asm ("ldp q16, q17, [%1]\n\t"
400 "ldp q18, q19, [%1, #32]\n\t"
401 "st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]"
402 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
403 break;
404
405 case AARCH64_RET_D1:
406 if (dest == reg)
407 {
408 #ifdef __AARCH64EB__
409 dest += 8;
410 #endif
411 }
412 else
413 *(double *)dest = *(double *)reg;
414 break;
415 case AARCH64_RET_D2:
416 asm ("ldp q16, q17, [%1]\n\t"
417 "st2 { v16.d, v17.d }[0], [%0]"
418 : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
419 break;
420 case AARCH64_RET_D3:
421 asm ("ldp q16, q17, [%1]\n\t"
422 "ldr q18, [%1, #32]\n\t"
423 "st3 { v16.d, v17.d, v18.d }[0], [%0]"
424 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
425 break;
426 case AARCH64_RET_D4:
427 asm ("ldp q16, q17, [%1]\n\t"
428 "ldp q18, q19, [%1, #32]\n\t"
429 "st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]"
430 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
431 break;
432
433 default:
434 if (dest != reg)
435 return memcpy (dest, reg, 16 * (4 - (h & 3)));
436 break;
437 }
438 return dest;
439 }
440
441 /* Either allocate an appropriate register for the argument type, or if
442 none are available, allocate a stack slot and return a pointer
443 to the allocated space. */
444
445 static void *
446 allocate_int_to_reg_or_stack (struct call_context *context,
447 struct arg_state *state,
448 void *stack, size_t size)
449 {
450 if (state->ngrn < N_X_ARG_REG)
451 return &context->x[state->ngrn++];
452
453 state->ngrn = N_X_ARG_REG;
454 return allocate_to_stack (state, stack, size, size);
455 }
456
457 ffi_status
458 ffi_prep_cif_machdep (ffi_cif *cif)
459 {
460 ffi_type *rtype = cif->rtype;
461 size_t bytes = cif->bytes;
462 int flags, i, n;
463
464 switch (rtype->type)
465 {
466 case FFI_TYPE_VOID:
467 flags = AARCH64_RET_VOID;
468 break;
469 case FFI_TYPE_UINT8:
470 flags = AARCH64_RET_UINT8;
471 break;
472 case FFI_TYPE_UINT16:
473 flags = AARCH64_RET_UINT16;
474 break;
475 case FFI_TYPE_UINT32:
476 flags = AARCH64_RET_UINT32;
477 break;
478 case FFI_TYPE_SINT8:
479 flags = AARCH64_RET_SINT8;
480 break;
481 case FFI_TYPE_SINT16:
482 flags = AARCH64_RET_SINT16;
483 break;
484 case FFI_TYPE_INT:
485 case FFI_TYPE_SINT32:
486 flags = AARCH64_RET_SINT32;
487 break;
488 case FFI_TYPE_SINT64:
489 case FFI_TYPE_UINT64:
490 flags = AARCH64_RET_INT64;
491 break;
492 case FFI_TYPE_POINTER:
493 flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64);
494 break;
495
496 case FFI_TYPE_FLOAT:
497 case FFI_TYPE_DOUBLE:
498 case FFI_TYPE_LONGDOUBLE:
499 case FFI_TYPE_STRUCT:
500 case FFI_TYPE_COMPLEX:
501 flags = is_vfp_type (rtype);
502 if (flags == 0)
503 {
504 size_t s = rtype->size;
505 if (s > 16)
506 {
507 flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
508 bytes += 8;
509 }
510 else if (s == 16)
511 flags = AARCH64_RET_INT128;
512 else if (s == 8)
513 flags = AARCH64_RET_INT64;
514 else
515 flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
516 }
517 break;
518
519 default:
520 abort();
521 }
522
523 for (i = 0, n = cif->nargs; i < n; i++)
524 if (is_vfp_type (cif->arg_types[i]))
525 {
526 flags |= AARCH64_FLAG_ARG_V;
527 break;
528 }
529
530 /* Round the stack up to a multiple of the stack alignment requirement. */
531 cif->bytes = ALIGN(bytes, 16);
532 cif->flags = flags;
533 #if defined (__APPLE__)
534 cif->aarch64_nfixedargs = 0;
535 #endif
536
537 return FFI_OK;
538 }
539
540 #if defined (__APPLE__)
541 /* Perform Apple-specific cif processing for variadic calls */
542 ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
543 unsigned int nfixedargs,
544 unsigned int ntotalargs)
545 {
546 ffi_status status = ffi_prep_cif_machdep (cif);
547 cif->aarch64_nfixedargs = nfixedargs;
548 return status;
549 }
550 #endif /* __APPLE__ */
551
552 extern void ffi_call_SYSV (struct call_context *context, void *frame,
553 void (*fn)(void), void *rvalue, int flags,
554 void *closure) FFI_HIDDEN;
555
556 /* Call a function with the provided arguments and capture the return
557 value. */
558 static void
559 ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
560 void **avalue, void *closure)
561 {
562 struct call_context *context;
563 void *stack, *frame, *rvalue;
564 struct arg_state state;
565 size_t stack_bytes, rtype_size, rsize;
566 int i, nargs, flags;
567 ffi_type *rtype;
568
569 flags = cif->flags;
570 rtype = cif->rtype;
571 rtype_size = rtype->size;
572 stack_bytes = cif->bytes;
573
574 /* If the target function returns a structure via hidden pointer,
575 then we cannot allow a null rvalue. Otherwise, mash a null
576 rvalue to void return type. */
577 rsize = 0;
578 if (flags & AARCH64_RET_IN_MEM)
579 {
580 if (orig_rvalue == NULL)
581 rsize = rtype_size;
582 }
583 else if (orig_rvalue == NULL)
584 flags &= AARCH64_FLAG_ARG_V;
585 else if (flags & AARCH64_RET_NEED_COPY)
586 rsize = 16;
587
588 /* Allocate consectutive stack for everything we'll need. */
589 context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize);
590 stack = context + 1;
591 frame = stack + stack_bytes;
592 rvalue = (rsize ? frame + 32 : orig_rvalue);
593
594 arg_init (&state);
595 for (i = 0, nargs = cif->nargs; i < nargs; i++)
596 {
597 ffi_type *ty = cif->arg_types[i];
598 size_t s = ty->size;
599 void *a = avalue[i];
600 int h, t;
601
602 t = ty->type;
603 switch (t)
604 {
605 case FFI_TYPE_VOID:
606 FFI_ASSERT (0);
607 break;
608
609 /* If the argument is a basic type the argument is allocated to an
610 appropriate register, or if none are available, to the stack. */
611 case FFI_TYPE_INT:
612 case FFI_TYPE_UINT8:
613 case FFI_TYPE_SINT8:
614 case FFI_TYPE_UINT16:
615 case FFI_TYPE_SINT16:
616 case FFI_TYPE_UINT32:
617 case FFI_TYPE_SINT32:
618 case FFI_TYPE_UINT64:
619 case FFI_TYPE_SINT64:
620 case FFI_TYPE_POINTER:
621 do_pointer:
622 {
623 ffi_arg ext = extend_integer_type (a, t);
624 if (state.ngrn < N_X_ARG_REG)
625 context->x[state.ngrn++] = ext;
626 else
627 {
628 void *d = allocate_to_stack (&state, stack, ty->alignment, s);
629 state.ngrn = N_X_ARG_REG;
630 /* Note that the default abi extends each argument
631 to a full 64-bit slot, while the iOS abi allocates
632 only enough space. */
633 #ifdef __APPLE__
634 memcpy(d, a, s);
635 #else
636 *(ffi_arg *)d = ext;
637 #endif
638 }
639 }
640 break;
641
642 case FFI_TYPE_FLOAT:
643 case FFI_TYPE_DOUBLE:
644 case FFI_TYPE_LONGDOUBLE:
645 case FFI_TYPE_STRUCT:
646 case FFI_TYPE_COMPLEX:
647 {
648 void *dest;
649
650 h = is_vfp_type (ty);
651 if (h)
652 {
653 int elems = 4 - (h & 3);
654 if (state.nsrn + elems <= N_V_ARG_REG)
655 {
656 dest = &context->v[state.nsrn];
657 state.nsrn += elems;
658 extend_hfa_type (dest, a, h);
659 break;
660 }
661 state.nsrn = N_V_ARG_REG;
662 dest = allocate_to_stack (&state, stack, ty->alignment, s);
663 }
664 else if (s > 16)
665 {
666 /* If the argument is a composite type that is larger than 16
667 bytes, then the argument has been copied to memory, and
668 the argument is replaced by a pointer to the copy. */
669 a = &avalue[i];
670 t = FFI_TYPE_POINTER;
671 s = sizeof (void *);
672 goto do_pointer;
673 }
674 else
675 {
676 size_t n = (s + 7) / 8;
677 if (state.ngrn + n <= N_X_ARG_REG)
678 {
679 /* If the argument is a composite type and the size in
680 double-words is not more than the number of available
681 X registers, then the argument is copied into
682 consecutive X registers. */
683 dest = &context->x[state.ngrn];
684 state.ngrn += n;
685 }
686 else
687 {
688 /* Otherwise, there are insufficient X registers. Further
689 X register allocations are prevented, the NSAA is
690 adjusted and the argument is copied to memory at the
691 adjusted NSAA. */
692 state.ngrn = N_X_ARG_REG;
693 dest = allocate_to_stack (&state, stack, ty->alignment, s);
694 }
695 }
696 memcpy (dest, a, s);
697 }
698 break;
699
700 default:
701 abort();
702 }
703
704 #if defined (__APPLE__)
705 if (i + 1 == cif->aarch64_nfixedargs)
706 {
707 state.ngrn = N_X_ARG_REG;
708 state.nsrn = N_V_ARG_REG;
709 state.allocating_variadic = 1;
710 }
711 #endif
712 }
713
714 ffi_call_SYSV (context, frame, fn, rvalue, flags, closure);
715
716 if (flags & AARCH64_RET_NEED_COPY)
717 memcpy (orig_rvalue, rvalue, rtype_size);
718 }
719
720 void
721 ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
722 {
723 ffi_call_int (cif, fn, rvalue, avalue, NULL);
724 }
725
726 #ifdef FFI_GO_CLOSURES
727 void
728 ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
729 void **avalue, void *closure)
730 {
731 ffi_call_int (cif, fn, rvalue, avalue, closure);
732 }
733 #endif /* FFI_GO_CLOSURES */
734
735 /* Build a trampoline. */
736
737 extern void ffi_closure_SYSV (void) FFI_HIDDEN;
738 extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
739
740 ffi_status
741 ffi_prep_closure_loc (ffi_closure *closure,
742 ffi_cif* cif,
743 void (*fun)(ffi_cif*,void*,void**,void*),
744 void *user_data,
745 void *codeloc)
746 {
747 if (cif->abi != FFI_SYSV)
748 return FFI_BAD_ABI;
749
750 void (*start)(void);
751
752 if (cif->flags & AARCH64_FLAG_ARG_V)
753 start = ffi_closure_SYSV_V;
754 else
755 start = ffi_closure_SYSV;
756
757 #if FFI_EXEC_TRAMPOLINE_TABLE
758 #ifdef __MACH__
759 void **config = (void **)((uint8_t *)codeloc - PAGE_MAX_SIZE);
760 config[0] = closure;
761 config[1] = start;
762 #endif
763 #else
764 static const unsigned char trampoline[16] = {
765 0x90, 0x00, 0x00, 0x58, /* ldr x16, tramp+16 */
766 0xf1, 0xff, 0xff, 0x10, /* adr x17, tramp+0 */
767 0x00, 0x02, 0x1f, 0xd6 /* br x16 */
768 };
769 char *tramp = closure->tramp;
770
771 memcpy (tramp, trampoline, sizeof(trampoline));
772
773 *(UINT64 *)(tramp + 16) = (uintptr_t)start;
774
775 ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
776 #endif
777
778 closure->cif = cif;
779 closure->fun = fun;
780 closure->user_data = user_data;
781
782 return FFI_OK;
783 }
784
785 #ifdef FFI_GO_CLOSURES
786 extern void ffi_go_closure_SYSV (void) FFI_HIDDEN;
787 extern void ffi_go_closure_SYSV_V (void) FFI_HIDDEN;
788
789 ffi_status
790 ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif* cif,
791 void (*fun)(ffi_cif*,void*,void**,void*))
792 {
793 void (*start)(void);
794
795 if (cif->abi != FFI_SYSV)
796 return FFI_BAD_ABI;
797
798 if (cif->flags & AARCH64_FLAG_ARG_V)
799 start = ffi_go_closure_SYSV_V;
800 else
801 start = ffi_go_closure_SYSV;
802
803 closure->tramp = start;
804 closure->cif = cif;
805 closure->fun = fun;
806
807 return FFI_OK;
808 }
809 #endif /* FFI_GO_CLOSURES */
810
811 /* Primary handler to setup and invoke a function within a closure.
812
813 A closure when invoked enters via the assembler wrapper
814 ffi_closure_SYSV(). The wrapper allocates a call context on the
815 stack, saves the interesting registers (from the perspective of
816 the calling convention) into the context then passes control to
817 ffi_closure_SYSV_inner() passing the saved context and a pointer to
818 the stack at the point ffi_closure_SYSV() was invoked.
819
820 On the return path the assembler wrapper will reload call context
821 registers.
822
823 ffi_closure_SYSV_inner() marshalls the call context into ffi value
824 descriptors, invokes the wrapped function, then marshalls the return
825 value back into the call context. */
826
827 int FFI_HIDDEN
828 ffi_closure_SYSV_inner (ffi_cif *cif,
829 void (*fun)(ffi_cif*,void*,void**,void*),
830 void *user_data,
831 struct call_context *context,
832 void *stack, void *rvalue, void *struct_rvalue)
833 {
834 void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
835 int i, h, nargs, flags;
836 struct arg_state state;
837
838 arg_init (&state);
839
840 for (i = 0, nargs = cif->nargs; i < nargs; i++)
841 {
842 ffi_type *ty = cif->arg_types[i];
843 int t = ty->type;
844 size_t n, s = ty->size;
845
846 switch (t)
847 {
848 case FFI_TYPE_VOID:
849 FFI_ASSERT (0);
850 break;
851
852 case FFI_TYPE_INT:
853 case FFI_TYPE_UINT8:
854 case FFI_TYPE_SINT8:
855 case FFI_TYPE_UINT16:
856 case FFI_TYPE_SINT16:
857 case FFI_TYPE_UINT32:
858 case FFI_TYPE_SINT32:
859 case FFI_TYPE_UINT64:
860 case FFI_TYPE_SINT64:
861 case FFI_TYPE_POINTER:
862 avalue[i] = allocate_int_to_reg_or_stack (context, &state, stack, s);
863 break;
864
865 case FFI_TYPE_FLOAT:
866 case FFI_TYPE_DOUBLE:
867 case FFI_TYPE_LONGDOUBLE:
868 case FFI_TYPE_STRUCT:
869 case FFI_TYPE_COMPLEX:
870 h = is_vfp_type (ty);
871 if (h)
872 {
873 n = 4 - (h & 3);
874 if (state.nsrn + n <= N_V_ARG_REG)
875 {
876 void *reg = &context->v[state.nsrn];
877 state.nsrn += n;
878
879 /* Eeek! We need a pointer to the structure, however the
880 homogeneous float elements are being passed in individual
881 registers, therefore for float and double the structure
882 is not represented as a contiguous sequence of bytes in
883 our saved register context. We don't need the original
884 contents of the register storage, so we reformat the
885 structure into the same memory. */
886 avalue[i] = compress_hfa_type (reg, reg, h);
887 }
888 else
889 {
890 state.nsrn = N_V_ARG_REG;
891 avalue[i] = allocate_to_stack (&state, stack,
892 ty->alignment, s);
893 }
894 }
895 else if (s > 16)
896 {
897 /* Replace Composite type of size greater than 16 with a
898 pointer. */
899 avalue[i] = *(void **)
900 allocate_int_to_reg_or_stack (context, &state, stack,
901 sizeof (void *));
902 }
903 else
904 {
905 n = (s + 7) / 8;
906 if (state.ngrn + n <= N_X_ARG_REG)
907 {
908 avalue[i] = &context->x[state.ngrn];
909 state.ngrn += n;
910 }
911 else
912 {
913 state.ngrn = N_X_ARG_REG;
914 avalue[i] = allocate_to_stack (&state, stack,
915 ty->alignment, s);
916 }
917 }
918 break;
919
920 default:
921 abort();
922 }
923
924 #if defined (__APPLE__)
925 if (i + 1 == cif->aarch64_nfixedargs)
926 {
927 state.ngrn = N_X_ARG_REG;
928 state.nsrn = N_V_ARG_REG;
929 state.allocating_variadic = 1;
930 }
931 #endif
932 }
933
934 flags = cif->flags;
935 if (flags & AARCH64_RET_IN_MEM)
936 rvalue = struct_rvalue;
937
938 fun (cif, rvalue, avalue, user_data);
939
940 return flags;
941 }