Refresh from GCC
[libffi.git] / src / x86 / ffi64.c
1 /* -----------------------------------------------------------------------
2 ffi64.c - Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
3 Copyright (c) 2008, 2010 Red Hat, Inc.
4
5 x86-64 Foreign Function Interface
6
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 ``Software''), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
14
15 The above copyright notice and this permission notice shall be included
16 in all copies or substantial portions of the Software.
17
18 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
26 ----------------------------------------------------------------------- */
27
28 #include <ffi.h>
29 #include <ffi_common.h>
30
31 #include <stdlib.h>
32 #include <stdarg.h>
33
34 #ifdef __x86_64__
35
36 #define MAX_GPR_REGS 6
37 #define MAX_SSE_REGS 8
38
39 struct register_args
40 {
41 /* Registers for argument passing. */
42 UINT64 gpr[MAX_GPR_REGS];
43 __int128_t sse[MAX_SSE_REGS];
44 };
45
46 extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
47 void *raddr, void (*fnaddr)(void), unsigned ssecount);
48
49 /* All reference to register classes here is identical to the code in
50 gcc/config/i386/i386.c. Do *not* change one without the other. */
51
52 /* Register class used for passing given 64bit part of the argument.
53 These represent classes as documented by the PS ABI, with the
54 exception of SSESF, SSEDF classes, that are basically SSE class,
55 just gcc will use SF or DFmode move instead of DImode to avoid
56 reformatting penalties.
57
58 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
59 whenever possible (upper half does contain padding). */
60 enum x86_64_reg_class
61 {
62 X86_64_NO_CLASS,
63 X86_64_INTEGER_CLASS,
64 X86_64_INTEGERSI_CLASS,
65 X86_64_SSE_CLASS,
66 X86_64_SSESF_CLASS,
67 X86_64_SSEDF_CLASS,
68 X86_64_SSEUP_CLASS,
69 X86_64_X87_CLASS,
70 X86_64_X87UP_CLASS,
71 X86_64_COMPLEX_X87_CLASS,
72 X86_64_MEMORY_CLASS
73 };
74
75 #define MAX_CLASSES 4
76
77 #define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
78
79 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
80 of this code is to classify each 8bytes of incoming argument by the register
81 class and assign registers accordingly. */
82
83 /* Return the union class of CLASS1 and CLASS2.
84 See the x86-64 PS ABI for details. */
85
86 static enum x86_64_reg_class
87 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
88 {
89 /* Rule #1: If both classes are equal, this is the resulting class. */
90 if (class1 == class2)
91 return class1;
92
93 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
94 the other class. */
95 if (class1 == X86_64_NO_CLASS)
96 return class2;
97 if (class2 == X86_64_NO_CLASS)
98 return class1;
99
100 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
101 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
102 return X86_64_MEMORY_CLASS;
103
104 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
105 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
106 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
107 return X86_64_INTEGERSI_CLASS;
108 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
109 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
110 return X86_64_INTEGER_CLASS;
111
112 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
113 MEMORY is used. */
114 if (class1 == X86_64_X87_CLASS
115 || class1 == X86_64_X87UP_CLASS
116 || class1 == X86_64_COMPLEX_X87_CLASS
117 || class2 == X86_64_X87_CLASS
118 || class2 == X86_64_X87UP_CLASS
119 || class2 == X86_64_COMPLEX_X87_CLASS)
120 return X86_64_MEMORY_CLASS;
121
122 /* Rule #6: Otherwise class SSE is used. */
123 return X86_64_SSE_CLASS;
124 }
125
126 /* Classify the argument of type TYPE and mode MODE.
127 CLASSES will be filled by the register class used to pass each word
128 of the operand. The number of words is returned. In case the parameter
129 should be passed in memory, 0 is returned. As a special case for zero
130 sized containers, classes[0] will be NO_CLASS and 1 is returned.
131
132 See the x86-64 PS ABI for details.
133 */
134 static int
135 classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
136 size_t byte_offset)
137 {
138 switch (type->type)
139 {
140 case FFI_TYPE_UINT8:
141 case FFI_TYPE_SINT8:
142 case FFI_TYPE_UINT16:
143 case FFI_TYPE_SINT16:
144 case FFI_TYPE_UINT32:
145 case FFI_TYPE_SINT32:
146 case FFI_TYPE_UINT64:
147 case FFI_TYPE_SINT64:
148 case FFI_TYPE_POINTER:
149 {
150 int size = byte_offset + type->size;
151
152 if (size <= 4)
153 {
154 classes[0] = X86_64_INTEGERSI_CLASS;
155 return 1;
156 }
157 else if (size <= 8)
158 {
159 classes[0] = X86_64_INTEGER_CLASS;
160 return 1;
161 }
162 else if (size <= 12)
163 {
164 classes[0] = X86_64_INTEGER_CLASS;
165 classes[1] = X86_64_INTEGERSI_CLASS;
166 return 2;
167 }
168 else if (size <= 16)
169 {
170 classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
171 return 2;
172 }
173 else
174 FFI_ASSERT (0);
175 }
176 case FFI_TYPE_FLOAT:
177 if (!(byte_offset % 8))
178 classes[0] = X86_64_SSESF_CLASS;
179 else
180 classes[0] = X86_64_SSE_CLASS;
181 return 1;
182 case FFI_TYPE_DOUBLE:
183 classes[0] = X86_64_SSEDF_CLASS;
184 return 1;
185 case FFI_TYPE_LONGDOUBLE:
186 classes[0] = X86_64_X87_CLASS;
187 classes[1] = X86_64_X87UP_CLASS;
188 return 2;
189 case FFI_TYPE_STRUCT:
190 {
191 const int UNITS_PER_WORD = 8;
192 int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
193 ffi_type **ptr;
194 int i;
195 enum x86_64_reg_class subclasses[MAX_CLASSES];
196
197 /* If the struct is larger than 32 bytes, pass it on the stack. */
198 if (type->size > 32)
199 return 0;
200
201 for (i = 0; i < words; i++)
202 classes[i] = X86_64_NO_CLASS;
203
204 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
205 signalize memory class, so handle it as special case. */
206 if (!words)
207 {
208 classes[0] = X86_64_NO_CLASS;
209 return 1;
210 }
211
212 /* Merge the fields of structure. */
213 for (ptr = type->elements; *ptr != NULL; ptr++)
214 {
215 int num;
216
217 byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
218
219 num = classify_argument (*ptr, subclasses, byte_offset % 8);
220 if (num == 0)
221 return 0;
222 for (i = 0; i < num; i++)
223 {
224 int pos = byte_offset / 8;
225 classes[i + pos] =
226 merge_classes (subclasses[i], classes[i + pos]);
227 }
228
229 byte_offset += (*ptr)->size;
230 }
231
232 if (words > 2)
233 {
234 /* When size > 16 bytes, if the first one isn't
235 X86_64_SSE_CLASS or any other ones aren't
236 X86_64_SSEUP_CLASS, everything should be passed in
237 memory. */
238 if (classes[0] != X86_64_SSE_CLASS)
239 return 0;
240
241 for (i = 1; i < words; i++)
242 if (classes[i] != X86_64_SSEUP_CLASS)
243 return 0;
244 }
245
246 /* Final merger cleanup. */
247 for (i = 0; i < words; i++)
248 {
249 /* If one class is MEMORY, everything should be passed in
250 memory. */
251 if (classes[i] == X86_64_MEMORY_CLASS)
252 return 0;
253
254 /* The X86_64_SSEUP_CLASS should be always preceded by
255 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
256 if (classes[i] == X86_64_SSEUP_CLASS
257 && classes[i - 1] != X86_64_SSE_CLASS
258 && classes[i - 1] != X86_64_SSEUP_CLASS)
259 {
260 /* The first one should never be X86_64_SSEUP_CLASS. */
261 FFI_ASSERT (i != 0);
262 classes[i] = X86_64_SSE_CLASS;
263 }
264
265 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
266 everything should be passed in memory. */
267 if (classes[i] == X86_64_X87UP_CLASS
268 && (classes[i - 1] != X86_64_X87_CLASS))
269 {
270 /* The first one should never be X86_64_X87UP_CLASS. */
271 FFI_ASSERT (i != 0);
272 return 0;
273 }
274 }
275 return words;
276 }
277
278 default:
279 FFI_ASSERT(0);
280 }
281 return 0; /* Never reached. */
282 }
283
284 /* Examine the argument and return set number of register required in each
285 class. Return zero iff parameter should be passed in memory, otherwise
286 the number of registers. */
287
288 static int
289 examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
290 _Bool in_return, int *pngpr, int *pnsse)
291 {
292 int i, n, ngpr, nsse;
293
294 n = classify_argument (type, classes, 0);
295 if (n == 0)
296 return 0;
297
298 ngpr = nsse = 0;
299 for (i = 0; i < n; ++i)
300 switch (classes[i])
301 {
302 case X86_64_INTEGER_CLASS:
303 case X86_64_INTEGERSI_CLASS:
304 ngpr++;
305 break;
306 case X86_64_SSE_CLASS:
307 case X86_64_SSESF_CLASS:
308 case X86_64_SSEDF_CLASS:
309 nsse++;
310 break;
311 case X86_64_NO_CLASS:
312 case X86_64_SSEUP_CLASS:
313 break;
314 case X86_64_X87_CLASS:
315 case X86_64_X87UP_CLASS:
316 case X86_64_COMPLEX_X87_CLASS:
317 return in_return != 0;
318 default:
319 abort ();
320 }
321
322 *pngpr = ngpr;
323 *pnsse = nsse;
324
325 return n;
326 }
327
328 /* Perform machine dependent cif processing. */
329
330 ffi_status
331 ffi_prep_cif_machdep (ffi_cif *cif)
332 {
333 int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
334 enum x86_64_reg_class classes[MAX_CLASSES];
335 size_t bytes;
336
337 gprcount = ssecount = 0;
338
339 flags = cif->rtype->type;
340 if (flags != FFI_TYPE_VOID)
341 {
342 n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
343 if (n == 0)
344 {
345 /* The return value is passed in memory. A pointer to that
346 memory is the first argument. Allocate a register for it. */
347 gprcount++;
348 /* We don't have to do anything in asm for the return. */
349 flags = FFI_TYPE_VOID;
350 }
351 else if (flags == FFI_TYPE_STRUCT)
352 {
353 /* Mark which registers the result appears in. */
354 _Bool sse0 = SSE_CLASS_P (classes[0]);
355 _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
356 if (sse0 && !sse1)
357 flags |= 1 << 8;
358 else if (!sse0 && sse1)
359 flags |= 1 << 9;
360 else if (sse0 && sse1)
361 flags |= 1 << 10;
362 /* Mark the true size of the structure. */
363 flags |= cif->rtype->size << 12;
364 }
365 }
366
367 /* Go over all arguments and determine the way they should be passed.
368 If it's in a register and there is space for it, let that be so. If
369 not, add it's size to the stack byte count. */
370 for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
371 {
372 if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
373 || gprcount + ngpr > MAX_GPR_REGS
374 || ssecount + nsse > MAX_SSE_REGS)
375 {
376 long align = cif->arg_types[i]->alignment;
377
378 if (align < 8)
379 align = 8;
380
381 bytes = ALIGN (bytes, align);
382 bytes += cif->arg_types[i]->size;
383 }
384 else
385 {
386 gprcount += ngpr;
387 ssecount += nsse;
388 }
389 }
390 if (ssecount)
391 flags |= 1 << 11;
392 cif->flags = flags;
393 cif->bytes = ALIGN (bytes, 8);
394
395 return FFI_OK;
396 }
397
398 void
399 ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
400 {
401 enum x86_64_reg_class classes[MAX_CLASSES];
402 char *stack, *argp;
403 ffi_type **arg_types;
404 int gprcount, ssecount, ngpr, nsse, i, avn;
405 _Bool ret_in_memory;
406 struct register_args *reg_args;
407
408 /* Can't call 32-bit mode from 64-bit mode. */
409 FFI_ASSERT (cif->abi == FFI_UNIX64);
410
411 /* If the return value is a struct and we don't have a return value
412 address then we need to make one. Note the setting of flags to
413 VOID above in ffi_prep_cif_machdep. */
414 ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
415 && (cif->flags & 0xff) == FFI_TYPE_VOID);
416 if (rvalue == NULL && ret_in_memory)
417 rvalue = alloca (cif->rtype->size);
418
419 /* Allocate the space for the arguments, plus 4 words of temp space. */
420 stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
421 reg_args = (struct register_args *) stack;
422 argp = stack + sizeof (struct register_args);
423
424 gprcount = ssecount = 0;
425
426 /* If the return value is passed in memory, add the pointer as the
427 first integer argument. */
428 if (ret_in_memory)
429 reg_args->gpr[gprcount++] = (long) rvalue;
430
431 avn = cif->nargs;
432 arg_types = cif->arg_types;
433
434 for (i = 0; i < avn; ++i)
435 {
436 size_t size = arg_types[i]->size;
437 int n;
438
439 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
440 if (n == 0
441 || gprcount + ngpr > MAX_GPR_REGS
442 || ssecount + nsse > MAX_SSE_REGS)
443 {
444 long align = arg_types[i]->alignment;
445
446 /* Stack arguments are *always* at least 8 byte aligned. */
447 if (align < 8)
448 align = 8;
449
450 /* Pass this argument in memory. */
451 argp = (void *) ALIGN (argp, align);
452 memcpy (argp, avalue[i], size);
453 argp += size;
454 }
455 else
456 {
457 /* The argument is passed entirely in registers. */
458 char *a = (char *) avalue[i];
459 int j;
460
461 for (j = 0; j < n; j++, a += 8, size -= 8)
462 {
463 switch (classes[j])
464 {
465 case X86_64_INTEGER_CLASS:
466 case X86_64_INTEGERSI_CLASS:
467 reg_args->gpr[gprcount] = 0;
468 memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
469 gprcount++;
470 break;
471 case X86_64_SSE_CLASS:
472 case X86_64_SSEDF_CLASS:
473 reg_args->sse[ssecount++] = *(UINT64 *) a;
474 break;
475 case X86_64_SSESF_CLASS:
476 reg_args->sse[ssecount++] = *(UINT32 *) a;
477 break;
478 default:
479 abort();
480 }
481 }
482 }
483 }
484
485 ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
486 cif->flags, rvalue, fn, ssecount);
487 }
488
489
490 extern void ffi_closure_unix64(void);
491
492 ffi_status
493 ffi_prep_closure_loc (ffi_closure* closure,
494 ffi_cif* cif,
495 void (*fun)(ffi_cif*, void*, void**, void*),
496 void *user_data,
497 void *codeloc)
498 {
499 volatile unsigned short *tramp;
500
501 tramp = (volatile unsigned short *) &closure->tramp[0];
502
503 tramp[0] = 0xbb49; /* mov <code>, %r11 */
504 *(void * volatile *) &tramp[1] = ffi_closure_unix64;
505 tramp[5] = 0xba49; /* mov <data>, %r10 */
506 *(void * volatile *) &tramp[6] = codeloc;
507
508 /* Set the carry bit iff the function uses any sse registers.
509 This is clc or stc, together with the first byte of the jmp. */
510 tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
511
512 tramp[11] = 0xe3ff; /* jmp *%r11 */
513
514 closure->cif = cif;
515 closure->fun = fun;
516 closure->user_data = user_data;
517
518 return FFI_OK;
519 }
520
521 int
522 ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
523 struct register_args *reg_args, char *argp)
524 {
525 ffi_cif *cif;
526 void **avalue;
527 ffi_type **arg_types;
528 long i, avn;
529 int gprcount, ssecount, ngpr, nsse;
530 int ret;
531
532 cif = closure->cif;
533 avalue = alloca(cif->nargs * sizeof(void *));
534 gprcount = ssecount = 0;
535
536 ret = cif->rtype->type;
537 if (ret != FFI_TYPE_VOID)
538 {
539 enum x86_64_reg_class classes[MAX_CLASSES];
540 int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
541 if (n == 0)
542 {
543 /* The return value goes in memory. Arrange for the closure
544 return value to go directly back to the original caller. */
545 rvalue = (void *) reg_args->gpr[gprcount++];
546 /* We don't have to do anything in asm for the return. */
547 ret = FFI_TYPE_VOID;
548 }
549 else if (ret == FFI_TYPE_STRUCT && n == 2)
550 {
551 /* Mark which register the second word of the structure goes in. */
552 _Bool sse0 = SSE_CLASS_P (classes[0]);
553 _Bool sse1 = SSE_CLASS_P (classes[1]);
554 if (!sse0 && sse1)
555 ret |= 1 << 8;
556 else if (sse0 && !sse1)
557 ret |= 1 << 9;
558 }
559 }
560
561 avn = cif->nargs;
562 arg_types = cif->arg_types;
563
564 for (i = 0; i < avn; ++i)
565 {
566 enum x86_64_reg_class classes[MAX_CLASSES];
567 int n;
568
569 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
570 if (n == 0
571 || gprcount + ngpr > MAX_GPR_REGS
572 || ssecount + nsse > MAX_SSE_REGS)
573 {
574 long align = arg_types[i]->alignment;
575
576 /* Stack arguments are *always* at least 8 byte aligned. */
577 if (align < 8)
578 align = 8;
579
580 /* Pass this argument in memory. */
581 argp = (void *) ALIGN (argp, align);
582 avalue[i] = argp;
583 argp += arg_types[i]->size;
584 }
585 /* If the argument is in a single register, or two consecutive
586 integer registers, then we can use that address directly. */
587 else if (n == 1
588 || (n == 2 && !(SSE_CLASS_P (classes[0])
589 || SSE_CLASS_P (classes[1]))))
590 {
591 /* The argument is in a single register. */
592 if (SSE_CLASS_P (classes[0]))
593 {
594 avalue[i] = &reg_args->sse[ssecount];
595 ssecount += n;
596 }
597 else
598 {
599 avalue[i] = &reg_args->gpr[gprcount];
600 gprcount += n;
601 }
602 }
603 /* Otherwise, allocate space to make them consecutive. */
604 else
605 {
606 char *a = alloca (16);
607 int j;
608
609 avalue[i] = a;
610 for (j = 0; j < n; j++, a += 8)
611 {
612 if (SSE_CLASS_P (classes[j]))
613 memcpy (a, &reg_args->sse[ssecount++], 8);
614 else
615 memcpy (a, &reg_args->gpr[gprcount++], 8);
616 }
617 }
618 }
619
620 /* Invoke the closure. */
621 closure->fun (cif, rvalue, avalue, closure->user_data);
622
623 /* Tell assembly how to perform return type promotions. */
624 return ret;
625 }
626
627 #endif /* __x86_64__ */