Fix for sunpro compiler on Solaris
[libffi.git] / src / x86 / ffi64.c
1 /* -----------------------------------------------------------------------
2 ffi64.c - Copyright (c) 2013 The Written Word, Inc.
3 Copyright (c) 2011 Anthony Green
4 Copyright (c) 2008, 2010 Red Hat, Inc.
5 Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
6
7 x86-64 Foreign Function Interface
8
9 Permission is hereby granted, free of charge, to any person obtaining
10 a copy of this software and associated documentation files (the
11 ``Software''), to deal in the Software without restriction, including
12 without limitation the rights to use, copy, modify, merge, publish,
13 distribute, sublicense, and/or sell copies of the Software, and to
14 permit persons to whom the Software is furnished to do so, subject to
15 the following conditions:
16
17 The above copyright notice and this permission notice shall be included
18 in all copies or substantial portions of the Software.
19
20 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24 HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 DEALINGS IN THE SOFTWARE.
28 ----------------------------------------------------------------------- */
29
30 #include <ffi.h>
31 #include <ffi_common.h>
32
33 #include <stdlib.h>
34 #include <stdarg.h>
35
36 #ifdef __x86_64__
37
38 #define MAX_GPR_REGS 6
39 #define MAX_SSE_REGS 8
40
41 #if defined(__INTEL_COMPILER)
42 #define UINT128 __m128
43 #else
44 #if defined(__SUNPRO_C)
45 #include <sunmedia_types.h>
46 #define UINT128 __m128i
47 #else
48 #define UINT128 __int128_t
49 #endif
50 #endif
51
52 union big_int_union
53 {
54 UINT32 i32;
55 UINT64 i64;
56 UINT128 i128;
57 };
58
59 struct register_args
60 {
61 /* Registers for argument passing. */
62 UINT64 gpr[MAX_GPR_REGS];
63 union big_int_union sse[MAX_SSE_REGS];
64 };
65
66 extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
67 void *raddr, void (*fnaddr)(void), unsigned ssecount);
68
69 /* All reference to register classes here is identical to the code in
70 gcc/config/i386/i386.c. Do *not* change one without the other. */
71
72 /* Register class used for passing given 64bit part of the argument.
73 These represent classes as documented by the PS ABI, with the
74 exception of SSESF, SSEDF classes, that are basically SSE class,
75 just gcc will use SF or DFmode move instead of DImode to avoid
76 reformatting penalties.
77
78 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
79 whenever possible (upper half does contain padding). */
80 enum x86_64_reg_class
81 {
82 X86_64_NO_CLASS,
83 X86_64_INTEGER_CLASS,
84 X86_64_INTEGERSI_CLASS,
85 X86_64_SSE_CLASS,
86 X86_64_SSESF_CLASS,
87 X86_64_SSEDF_CLASS,
88 X86_64_SSEUP_CLASS,
89 X86_64_X87_CLASS,
90 X86_64_X87UP_CLASS,
91 X86_64_COMPLEX_X87_CLASS,
92 X86_64_MEMORY_CLASS
93 };
94
95 #define MAX_CLASSES 4
96
97 #define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
98
99 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
100 of this code is to classify each 8bytes of incoming argument by the register
101 class and assign registers accordingly. */
102
103 /* Return the union class of CLASS1 and CLASS2.
104 See the x86-64 PS ABI for details. */
105
106 static enum x86_64_reg_class
107 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
108 {
109 /* Rule #1: If both classes are equal, this is the resulting class. */
110 if (class1 == class2)
111 return class1;
112
113 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
114 the other class. */
115 if (class1 == X86_64_NO_CLASS)
116 return class2;
117 if (class2 == X86_64_NO_CLASS)
118 return class1;
119
120 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
121 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
122 return X86_64_MEMORY_CLASS;
123
124 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
125 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
126 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
127 return X86_64_INTEGERSI_CLASS;
128 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
129 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
130 return X86_64_INTEGER_CLASS;
131
132 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
133 MEMORY is used. */
134 if (class1 == X86_64_X87_CLASS
135 || class1 == X86_64_X87UP_CLASS
136 || class1 == X86_64_COMPLEX_X87_CLASS
137 || class2 == X86_64_X87_CLASS
138 || class2 == X86_64_X87UP_CLASS
139 || class2 == X86_64_COMPLEX_X87_CLASS)
140 return X86_64_MEMORY_CLASS;
141
142 /* Rule #6: Otherwise class SSE is used. */
143 return X86_64_SSE_CLASS;
144 }
145
146 /* Classify the argument of type TYPE and mode MODE.
147 CLASSES will be filled by the register class used to pass each word
148 of the operand. The number of words is returned. In case the parameter
149 should be passed in memory, 0 is returned. As a special case for zero
150 sized containers, classes[0] will be NO_CLASS and 1 is returned.
151
152 See the x86-64 PS ABI for details.
153 */
154 static int
155 classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
156 size_t byte_offset)
157 {
158 switch (type->type)
159 {
160 case FFI_TYPE_UINT8:
161 case FFI_TYPE_SINT8:
162 case FFI_TYPE_UINT16:
163 case FFI_TYPE_SINT16:
164 case FFI_TYPE_UINT32:
165 case FFI_TYPE_SINT32:
166 case FFI_TYPE_UINT64:
167 case FFI_TYPE_SINT64:
168 case FFI_TYPE_POINTER:
169 {
170 int size = byte_offset + type->size;
171
172 if (size <= 4)
173 {
174 classes[0] = X86_64_INTEGERSI_CLASS;
175 return 1;
176 }
177 else if (size <= 8)
178 {
179 classes[0] = X86_64_INTEGER_CLASS;
180 return 1;
181 }
182 else if (size <= 12)
183 {
184 classes[0] = X86_64_INTEGER_CLASS;
185 classes[1] = X86_64_INTEGERSI_CLASS;
186 return 2;
187 }
188 else if (size <= 16)
189 {
190 classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
191 return 2;
192 }
193 else
194 FFI_ASSERT (0);
195 }
196 case FFI_TYPE_FLOAT:
197 if (!(byte_offset % 8))
198 classes[0] = X86_64_SSESF_CLASS;
199 else
200 classes[0] = X86_64_SSE_CLASS;
201 return 1;
202 case FFI_TYPE_DOUBLE:
203 classes[0] = X86_64_SSEDF_CLASS;
204 return 1;
205 case FFI_TYPE_LONGDOUBLE:
206 classes[0] = X86_64_X87_CLASS;
207 classes[1] = X86_64_X87UP_CLASS;
208 return 2;
209 case FFI_TYPE_STRUCT:
210 {
211 const int UNITS_PER_WORD = 8;
212 int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
213 ffi_type **ptr;
214 int i;
215 enum x86_64_reg_class subclasses[MAX_CLASSES];
216
217 /* If the struct is larger than 32 bytes, pass it on the stack. */
218 if (type->size > 32)
219 return 0;
220
221 for (i = 0; i < words; i++)
222 classes[i] = X86_64_NO_CLASS;
223
224 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
225 signalize memory class, so handle it as special case. */
226 if (!words)
227 {
228 classes[0] = X86_64_NO_CLASS;
229 return 1;
230 }
231
232 /* Merge the fields of structure. */
233 for (ptr = type->elements; *ptr != NULL; ptr++)
234 {
235 int num;
236
237 byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
238
239 num = classify_argument (*ptr, subclasses, byte_offset % 8);
240 if (num == 0)
241 return 0;
242 for (i = 0; i < num; i++)
243 {
244 int pos = byte_offset / 8;
245 classes[i + pos] =
246 merge_classes (subclasses[i], classes[i + pos]);
247 }
248
249 byte_offset += (*ptr)->size;
250 }
251
252 if (words > 2)
253 {
254 /* When size > 16 bytes, if the first one isn't
255 X86_64_SSE_CLASS or any other ones aren't
256 X86_64_SSEUP_CLASS, everything should be passed in
257 memory. */
258 if (classes[0] != X86_64_SSE_CLASS)
259 return 0;
260
261 for (i = 1; i < words; i++)
262 if (classes[i] != X86_64_SSEUP_CLASS)
263 return 0;
264 }
265
266 /* Final merger cleanup. */
267 for (i = 0; i < words; i++)
268 {
269 /* If one class is MEMORY, everything should be passed in
270 memory. */
271 if (classes[i] == X86_64_MEMORY_CLASS)
272 return 0;
273
274 /* The X86_64_SSEUP_CLASS should be always preceded by
275 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
276 if (classes[i] == X86_64_SSEUP_CLASS
277 && classes[i - 1] != X86_64_SSE_CLASS
278 && classes[i - 1] != X86_64_SSEUP_CLASS)
279 {
280 /* The first one should never be X86_64_SSEUP_CLASS. */
281 FFI_ASSERT (i != 0);
282 classes[i] = X86_64_SSE_CLASS;
283 }
284
285 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
286 everything should be passed in memory. */
287 if (classes[i] == X86_64_X87UP_CLASS
288 && (classes[i - 1] != X86_64_X87_CLASS))
289 {
290 /* The first one should never be X86_64_X87UP_CLASS. */
291 FFI_ASSERT (i != 0);
292 return 0;
293 }
294 }
295 return words;
296 }
297
298 default:
299 FFI_ASSERT(0);
300 }
301 return 0; /* Never reached. */
302 }
303
304 /* Examine the argument and return set number of register required in each
305 class. Return zero iff parameter should be passed in memory, otherwise
306 the number of registers. */
307
308 static int
309 examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
310 _Bool in_return, int *pngpr, int *pnsse)
311 {
312 int i, n, ngpr, nsse;
313
314 n = classify_argument (type, classes, 0);
315 if (n == 0)
316 return 0;
317
318 ngpr = nsse = 0;
319 for (i = 0; i < n; ++i)
320 switch (classes[i])
321 {
322 case X86_64_INTEGER_CLASS:
323 case X86_64_INTEGERSI_CLASS:
324 ngpr++;
325 break;
326 case X86_64_SSE_CLASS:
327 case X86_64_SSESF_CLASS:
328 case X86_64_SSEDF_CLASS:
329 nsse++;
330 break;
331 case X86_64_NO_CLASS:
332 case X86_64_SSEUP_CLASS:
333 break;
334 case X86_64_X87_CLASS:
335 case X86_64_X87UP_CLASS:
336 case X86_64_COMPLEX_X87_CLASS:
337 return in_return != 0;
338 default:
339 abort ();
340 }
341
342 *pngpr = ngpr;
343 *pnsse = nsse;
344
345 return n;
346 }
347
348 /* Perform machine dependent cif processing. */
349
350 ffi_status
351 ffi_prep_cif_machdep (ffi_cif *cif)
352 {
353 int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
354 enum x86_64_reg_class classes[MAX_CLASSES];
355 size_t bytes;
356
357 gprcount = ssecount = 0;
358
359 flags = cif->rtype->type;
360 if (flags != FFI_TYPE_VOID)
361 {
362 n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
363 if (n == 0)
364 {
365 /* The return value is passed in memory. A pointer to that
366 memory is the first argument. Allocate a register for it. */
367 gprcount++;
368 /* We don't have to do anything in asm for the return. */
369 flags = FFI_TYPE_VOID;
370 }
371 else if (flags == FFI_TYPE_STRUCT)
372 {
373 /* Mark which registers the result appears in. */
374 _Bool sse0 = SSE_CLASS_P (classes[0]);
375 _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
376 if (sse0 && !sse1)
377 flags |= 1 << 8;
378 else if (!sse0 && sse1)
379 flags |= 1 << 9;
380 else if (sse0 && sse1)
381 flags |= 1 << 10;
382 /* Mark the true size of the structure. */
383 flags |= cif->rtype->size << 12;
384 }
385 }
386
387 /* Go over all arguments and determine the way they should be passed.
388 If it's in a register and there is space for it, let that be so. If
389 not, add it's size to the stack byte count. */
390 for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
391 {
392 if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
393 || gprcount + ngpr > MAX_GPR_REGS
394 || ssecount + nsse > MAX_SSE_REGS)
395 {
396 long align = cif->arg_types[i]->alignment;
397
398 if (align < 8)
399 align = 8;
400
401 bytes = ALIGN (bytes, align);
402 bytes += cif->arg_types[i]->size;
403 }
404 else
405 {
406 gprcount += ngpr;
407 ssecount += nsse;
408 }
409 }
410 if (ssecount)
411 flags |= 1 << 11;
412 cif->flags = flags;
413 cif->bytes = ALIGN (bytes, 8);
414
415 return FFI_OK;
416 }
417
418 void
419 ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
420 {
421 enum x86_64_reg_class classes[MAX_CLASSES];
422 char *stack, *argp;
423 ffi_type **arg_types;
424 int gprcount, ssecount, ngpr, nsse, i, avn;
425 _Bool ret_in_memory;
426 struct register_args *reg_args;
427
428 /* Can't call 32-bit mode from 64-bit mode. */
429 FFI_ASSERT (cif->abi == FFI_UNIX64);
430
431 /* If the return value is a struct and we don't have a return value
432 address then we need to make one. Note the setting of flags to
433 VOID above in ffi_prep_cif_machdep. */
434 ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
435 && (cif->flags & 0xff) == FFI_TYPE_VOID);
436 if (rvalue == NULL && ret_in_memory)
437 rvalue = alloca (cif->rtype->size);
438
439 /* Allocate the space for the arguments, plus 4 words of temp space. */
440 stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
441 reg_args = (struct register_args *) stack;
442 argp = stack + sizeof (struct register_args);
443
444 gprcount = ssecount = 0;
445
446 /* If the return value is passed in memory, add the pointer as the
447 first integer argument. */
448 if (ret_in_memory)
449 reg_args->gpr[gprcount++] = (unsigned long) rvalue;
450
451 avn = cif->nargs;
452 arg_types = cif->arg_types;
453
454 for (i = 0; i < avn; ++i)
455 {
456 size_t size = arg_types[i]->size;
457 int n;
458
459 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
460 if (n == 0
461 || gprcount + ngpr > MAX_GPR_REGS
462 || ssecount + nsse > MAX_SSE_REGS)
463 {
464 long align = arg_types[i]->alignment;
465
466 /* Stack arguments are *always* at least 8 byte aligned. */
467 if (align < 8)
468 align = 8;
469
470 /* Pass this argument in memory. */
471 argp = (void *) ALIGN (argp, align);
472 memcpy (argp, avalue[i], size);
473 argp += size;
474 }
475 else
476 {
477 /* The argument is passed entirely in registers. */
478 char *a = (char *) avalue[i];
479 int j;
480
481 for (j = 0; j < n; j++, a += 8, size -= 8)
482 {
483 switch (classes[j])
484 {
485 case X86_64_INTEGER_CLASS:
486 case X86_64_INTEGERSI_CLASS:
487 reg_args->gpr[gprcount] = 0;
488 memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
489 gprcount++;
490 break;
491 case X86_64_SSE_CLASS:
492 case X86_64_SSEDF_CLASS:
493 reg_args->sse[ssecount++].i64 = *(UINT64 *) a;
494 break;
495 case X86_64_SSESF_CLASS:
496 reg_args->sse[ssecount++].i32 = *(UINT32 *) a;
497 break;
498 default:
499 abort();
500 }
501 }
502 }
503 }
504
505 ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
506 cif->flags, rvalue, fn, ssecount);
507 }
508
509
510 extern void ffi_closure_unix64(void);
511
512 ffi_status
513 ffi_prep_closure_loc (ffi_closure* closure,
514 ffi_cif* cif,
515 void (*fun)(ffi_cif*, void*, void**, void*),
516 void *user_data,
517 void *codeloc)
518 {
519 volatile unsigned short *tramp;
520
521 /* Sanity check on the cif ABI. */
522 {
523 int abi = cif->abi;
524 if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI)))
525 return FFI_BAD_ABI;
526 }
527
528 tramp = (volatile unsigned short *) &closure->tramp[0];
529
530 tramp[0] = 0xbb49; /* mov <code>, %r11 */
531 *((unsigned long long * volatile) &tramp[1])
532 = (unsigned long) ffi_closure_unix64;
533 tramp[5] = 0xba49; /* mov <data>, %r10 */
534 *((unsigned long long * volatile) &tramp[6])
535 = (unsigned long) codeloc;
536
537 /* Set the carry bit iff the function uses any sse registers.
538 This is clc or stc, together with the first byte of the jmp. */
539 tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
540
541 tramp[11] = 0xe3ff; /* jmp *%r11 */
542
543 closure->cif = cif;
544 closure->fun = fun;
545 closure->user_data = user_data;
546
547 return FFI_OK;
548 }
549
550 int
551 ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
552 struct register_args *reg_args, char *argp)
553 {
554 ffi_cif *cif;
555 void **avalue;
556 ffi_type **arg_types;
557 long i, avn;
558 int gprcount, ssecount, ngpr, nsse;
559 int ret;
560
561 cif = closure->cif;
562 avalue = alloca(cif->nargs * sizeof(void *));
563 gprcount = ssecount = 0;
564
565 ret = cif->rtype->type;
566 if (ret != FFI_TYPE_VOID)
567 {
568 enum x86_64_reg_class classes[MAX_CLASSES];
569 int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
570 if (n == 0)
571 {
572 /* The return value goes in memory. Arrange for the closure
573 return value to go directly back to the original caller. */
574 rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++];
575 /* We don't have to do anything in asm for the return. */
576 ret = FFI_TYPE_VOID;
577 }
578 else if (ret == FFI_TYPE_STRUCT && n == 2)
579 {
580 /* Mark which register the second word of the structure goes in. */
581 _Bool sse0 = SSE_CLASS_P (classes[0]);
582 _Bool sse1 = SSE_CLASS_P (classes[1]);
583 if (!sse0 && sse1)
584 ret |= 1 << 8;
585 else if (sse0 && !sse1)
586 ret |= 1 << 9;
587 }
588 }
589
590 avn = cif->nargs;
591 arg_types = cif->arg_types;
592
593 for (i = 0; i < avn; ++i)
594 {
595 enum x86_64_reg_class classes[MAX_CLASSES];
596 int n;
597
598 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
599 if (n == 0
600 || gprcount + ngpr > MAX_GPR_REGS
601 || ssecount + nsse > MAX_SSE_REGS)
602 {
603 long align = arg_types[i]->alignment;
604
605 /* Stack arguments are *always* at least 8 byte aligned. */
606 if (align < 8)
607 align = 8;
608
609 /* Pass this argument in memory. */
610 argp = (void *) ALIGN (argp, align);
611 avalue[i] = argp;
612 argp += arg_types[i]->size;
613 }
614 /* If the argument is in a single register, or two consecutive
615 integer registers, then we can use that address directly. */
616 else if (n == 1
617 || (n == 2 && !(SSE_CLASS_P (classes[0])
618 || SSE_CLASS_P (classes[1]))))
619 {
620 /* The argument is in a single register. */
621 if (SSE_CLASS_P (classes[0]))
622 {
623 avalue[i] = &reg_args->sse[ssecount];
624 ssecount += n;
625 }
626 else
627 {
628 avalue[i] = &reg_args->gpr[gprcount];
629 gprcount += n;
630 }
631 }
632 /* Otherwise, allocate space to make them consecutive. */
633 else
634 {
635 char *a = alloca (16);
636 int j;
637
638 avalue[i] = a;
639 for (j = 0; j < n; j++, a += 8)
640 {
641 if (SSE_CLASS_P (classes[j]))
642 memcpy (a, &reg_args->sse[ssecount++], 8);
643 else
644 memcpy (a, &reg_args->gpr[gprcount++], 8);
645 }
646 }
647 }
648
649 /* Invoke the closure. */
650 closure->fun (cif, rvalue, avalue, closure->user_data);
651
652 /* Tell assembly how to perform return type promotions. */
653 return ret;
654 }
655
656 #endif /* __x86_64__ */