add (c) Lennart Augustsson (part of #740)
[ghc.git] / rts / StgPrimFloat.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) Lennart Augustsson
4 * (c) The GHC Team, 1998-2000
5 *
6 * Miscellaneous support for floating-point primitives
7 *
8 * ---------------------------------------------------------------------------*/
9
10 #include "PosixSource.h"
11 #include "Rts.h"
12
13 #include <math.h>
14
15 /*
16 * Encoding and decoding Doubles. Code based on the HBC code
17 * (lib/fltcode.c).
18 */
19
20 #ifdef _SHORT_LIMB
21 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_INT
22 #else
23 #ifdef _LONG_LONG_LIMB
24 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG_LONG
25 #else
26 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG
27 #endif
28 #endif
29
30 #if SIZEOF_LIMB_T == 4
31 #define GMP_BASE 4294967296.0
32 #elif SIZEOF_LIMB_T == 8
33 #define GMP_BASE 18446744073709551616.0
34 #else
35 #error Cannot cope with SIZEOF_LIMB_T -- please add definition of GMP_BASE
36 #endif
37
38 #define DNBIGIT ((SIZEOF_DOUBLE+SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
39 #define FNBIGIT ((SIZEOF_FLOAT +SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
40
41 #if IEEE_FLOATING_POINT
42 #define MY_DMINEXP ((DBL_MIN_EXP) - (DBL_MANT_DIG) - 1)
43 /* DMINEXP is defined in values.h on Linux (for example) */
44 #define DHIGHBIT 0x00100000
45 #define DMSBIT 0x80000000
46
47 #define MY_FMINEXP ((FLT_MIN_EXP) - (FLT_MANT_DIG) - 1)
48 #define FHIGHBIT 0x00800000
49 #define FMSBIT 0x80000000
50 #endif
51
52 #if defined(WORDS_BIGENDIAN) || defined(FLOAT_WORDS_BIGENDIAN)
53 #define L 1
54 #define H 0
55 #else
56 #define L 0
57 #define H 1
58 #endif
59
60 #define __abs(a) (( (a) >= 0 ) ? (a) : (-(a)))
61
62 StgDouble
63 __encodeDouble (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
64 {
65 StgDouble r;
66 const mp_limb_t *const arr = (const mp_limb_t *)ba;
67 I_ i;
68
69 /* Convert MP_INT to a double; knows a lot about internal rep! */
70 for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
71 r = (r * GMP_BASE) + arr[i];
72
73 /* Now raise to the exponent */
74 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
75 r = ldexp(r, e);
76
77 /* sign is encoded in the size */
78 if (size < 0)
79 r = -r;
80
81 return r;
82 }
83
84 StgDouble
85 __2Int_encodeDouble (I_ j_high, I_ j_low, I_ e)
86 {
87 StgDouble r;
88
89 /* assuming 32 bit ints */
90 ASSERT(sizeof(int ) == 4 );
91
92 r = (StgDouble)((unsigned int)j_high);
93 r *= 4294967296.0; /* exp2f(32); */
94 r += (StgDouble)((unsigned int)j_low);
95
96 /* Now raise to the exponent */
97 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
98 r = ldexp(r, e);
99
100 /* sign is encoded in the size */
101 if (j_high < 0)
102 r = -r;
103
104 return r;
105 }
106
107 /* Special version for words */
108 StgDouble
109 __word_encodeDouble (W_ j, I_ e)
110 {
111 StgDouble r;
112
113 r = (StgDouble)j;
114
115 /* Now raise to the exponent */
116 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
117 r = ldexp(r, e);
118
119 return r;
120 }
121
122 /* Special version for small Integers */
123 StgDouble
124 __int_encodeDouble (I_ j, I_ e)
125 {
126 StgDouble r;
127
128 r = (StgDouble)__abs(j);
129
130 /* Now raise to the exponent */
131 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
132 r = ldexp(r, e);
133
134 /* sign is encoded in the size */
135 if (j < 0)
136 r = -r;
137
138 return r;
139 }
140
141 StgFloat
142 __encodeFloat (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
143 {
144 StgFloat r;
145 const mp_limb_t *arr = (const mp_limb_t *)ba;
146 I_ i;
147
148 /* Convert MP_INT to a float; knows a lot about internal rep! */
149 for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
150 r = (r * GMP_BASE) + arr[i];
151
152 /* Now raise to the exponent */
153 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
154 r = ldexp(r, e);
155
156 /* sign is encoded in the size */
157 if (size < 0)
158 r = -r;
159
160 return r;
161 }
162
163 /* Special version for small Integers */
164 StgFloat
165 __int_encodeFloat (I_ j, I_ e)
166 {
167 StgFloat r;
168
169 r = (StgFloat)__abs(j);
170
171 /* Now raise to the exponent */
172 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
173 r = ldexp(r, e);
174
175 /* sign is encoded in the size */
176 if (j < 0)
177 r = -r;
178
179 return r;
180 }
181
182 /* Special version for small positive Integers */
183 StgFloat
184 __word_encodeFloat (W_ j, I_ e)
185 {
186 StgFloat r;
187
188 r = (StgFloat)j;
189
190 /* Now raise to the exponent */
191 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
192 r = ldexp(r, e);
193
194 return r;
195 }
196
197 /* This only supports IEEE floating point */
198
199 void
200 __decodeDouble (MP_INT *man, I_ *exp, StgDouble dbl)
201 {
202 /* Do some bit fiddling on IEEE */
203 unsigned int low, high; /* assuming 32 bit ints */
204 int sign, iexp;
205 union { double d; unsigned int i[2]; } u; /* assuming 32 bit ints, 64 bit double */
206
207 ASSERT(sizeof(unsigned int ) == 4 );
208 ASSERT(sizeof(dbl ) == SIZEOF_DOUBLE);
209 ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
210 ASSERT(DNBIGIT*SIZEOF_LIMB_T >= SIZEOF_DOUBLE);
211
212 u.d = dbl; /* grab chunks of the double */
213 low = u.i[L];
214 high = u.i[H];
215
216 /* we know the MP_INT* passed in has size zero, so we realloc
217 no matter what.
218 */
219 man->_mp_alloc = DNBIGIT;
220
221 if (low == 0 && (high & ~DMSBIT) == 0) {
222 man->_mp_size = 0;
223 *exp = 0L;
224 } else {
225 man->_mp_size = DNBIGIT;
226 iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
227 sign = high;
228
229 high &= DHIGHBIT-1;
230 if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
231 high |= DHIGHBIT;
232 else {
233 iexp++;
234 /* A denorm, normalize the mantissa */
235 while (! (high & DHIGHBIT)) {
236 high <<= 1;
237 if (low & DMSBIT)
238 high++;
239 low <<= 1;
240 iexp--;
241 }
242 }
243 *exp = (I_) iexp;
244 #if DNBIGIT == 2
245 man->_mp_d[0] = (mp_limb_t)low;
246 man->_mp_d[1] = (mp_limb_t)high;
247 #else
248 #if DNBIGIT == 1
249 man->_mp_d[0] = ((mp_limb_t)high) << 32 | (mp_limb_t)low;
250 #else
251 #error Cannot cope with DNBIGIT
252 #endif
253 #endif
254 if (sign < 0)
255 man->_mp_size = -man->_mp_size;
256 }
257 }
258
259 void
260 __decodeDouble_2Int (I_ *man_sign, W_ *man_high, W_ *man_low, I_ *exp, StgDouble dbl)
261 {
262 /* Do some bit fiddling on IEEE */
263 unsigned int low, high; /* assuming 32 bit ints */
264 int sign, iexp;
265 union { double d; unsigned int i[2]; } u; /* assuming 32 bit ints, 64 bit double */
266
267 ASSERT(sizeof(unsigned int ) == 4 );
268 ASSERT(sizeof(dbl ) == 8 );
269 ASSERT(sizeof(dbl ) == SIZEOF_DOUBLE);
270
271 u.d = dbl; /* grab chunks of the double */
272 low = u.i[L];
273 high = u.i[H];
274
275 if (low == 0 && (high & ~DMSBIT) == 0) {
276 *man_low = 0;
277 *man_high = 0;
278 *exp = 0L;
279 } else {
280 iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
281 sign = high;
282
283 high &= DHIGHBIT-1;
284 if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
285 high |= DHIGHBIT;
286 else {
287 iexp++;
288 /* A denorm, normalize the mantissa */
289 while (! (high & DHIGHBIT)) {
290 high <<= 1;
291 if (low & DMSBIT)
292 high++;
293 low <<= 1;
294 iexp--;
295 }
296 }
297 *exp = (I_) iexp;
298 *man_low = low;
299 *man_high = high;
300 *man_sign = (sign < 0) ? -1 : 1;
301 }
302 }
303
304 void
305 __decodeFloat (MP_INT *man, I_ *exp, StgFloat flt)
306 {
307 /* Do some bit fiddling on IEEE */
308 int high, sign; /* assuming 32 bit ints */
309 union { float f; int i; } u; /* assuming 32 bit float and int */
310
311 ASSERT(sizeof(int ) == 4 );
312 ASSERT(sizeof(flt ) == SIZEOF_FLOAT );
313 ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
314 ASSERT(FNBIGIT*SIZEOF_LIMB_T >= SIZEOF_FLOAT );
315
316 u.f = flt; /* grab the float */
317 high = u.i;
318
319 /* we know the MP_INT* passed in has size zero, so we realloc
320 no matter what.
321 */
322 man->_mp_alloc = FNBIGIT;
323
324 if ((high & ~FMSBIT) == 0) {
325 man->_mp_size = 0;
326 *exp = 0;
327 } else {
328 man->_mp_size = FNBIGIT;
329 *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
330 sign = high;
331
332 high &= FHIGHBIT-1;
333 if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
334 high |= FHIGHBIT;
335 else {
336 (*exp)++;
337 /* A denorm, normalize the mantissa */
338 while (! (high & FHIGHBIT)) {
339 high <<= 1;
340 (*exp)--;
341 }
342 }
343 #if FNBIGIT == 1
344 man->_mp_d[0] = (mp_limb_t)high;
345 #else
346 #error Cannot cope with FNBIGIT
347 #endif
348 if (sign < 0)
349 man->_mp_size = -man->_mp_size;
350 }
351 }
352
353 /* Convenient union types for checking the layout of IEEE 754 types -
354 based on defs in GNU libc <ieee754.h>
355 */
356
357 void
358 __decodeFloat_Int (I_ *man, I_ *exp, StgFloat flt)
359 {
360 /* Do some bit fiddling on IEEE */
361 int high, sign; /* assuming 32 bit ints */
362 union { float f; int i; } u; /* assuming 32 bit float and int */
363
364 ASSERT(sizeof(int ) == 4 );
365 ASSERT(sizeof(flt ) == 4 );
366 ASSERT(sizeof(flt ) == SIZEOF_FLOAT );
367
368 u.f = flt; /* grab the float */
369 high = u.i;
370
371 if ((high & ~FMSBIT) == 0) {
372 *man = 0;
373 *exp = 0;
374 } else {
375 *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
376 sign = high;
377
378 high &= FHIGHBIT-1;
379 if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
380 high |= FHIGHBIT;
381 else {
382 (*exp)++;
383 /* A denorm, normalize the mantissa */
384 while (! (high & FHIGHBIT)) {
385 high <<= 1;
386 (*exp)--;
387 }
388 }
389 *man = high;
390 if (sign < 0)
391 *man = - *man;
392 }
393 }
394
395 union stg_ieee754_flt
396 {
397 float f;
398 struct {
399
400 #if WORDS_BIGENDIAN
401 unsigned int negative:1;
402 unsigned int exponent:8;
403 unsigned int mantissa:23;
404 #else
405 unsigned int mantissa:23;
406 unsigned int exponent:8;
407 unsigned int negative:1;
408 #endif
409 } ieee;
410 struct {
411
412 #if WORDS_BIGENDIAN
413 unsigned int negative:1;
414 unsigned int exponent:8;
415 unsigned int quiet_nan:1;
416 unsigned int mantissa:22;
417 #else
418 unsigned int mantissa:22;
419 unsigned int quiet_nan:1;
420 unsigned int exponent:8;
421 unsigned int negative:1;
422 #endif
423 } ieee_nan;
424 };
425
426 /*
427
428 To recap, here's the representation of a double precision
429 IEEE floating point number:
430
431 sign 63 sign bit (0==positive, 1==negative)
432 exponent 62-52 exponent (biased by 1023)
433 fraction 51-0 fraction (bits to right of binary point)
434 */
435
436 union stg_ieee754_dbl
437 {
438 double d;
439 struct {
440
441 #if WORDS_BIGENDIAN
442 unsigned int negative:1;
443 unsigned int exponent:11;
444 unsigned int mantissa0:20;
445 unsigned int mantissa1:32;
446 #else
447 #if FLOAT_WORDS_BIGENDIAN
448 unsigned int mantissa0:20;
449 unsigned int exponent:11;
450 unsigned int negative:1;
451 unsigned int mantissa1:32;
452 #else
453 unsigned int mantissa1:32;
454 unsigned int mantissa0:20;
455 unsigned int exponent:11;
456 unsigned int negative:1;
457 #endif
458 #endif
459 } ieee;
460 /* This format makes it easier to see if a NaN is a signalling NaN. */
461 struct {
462
463 #if WORDS_BIGENDIAN
464 unsigned int negative:1;
465 unsigned int exponent:11;
466 unsigned int quiet_nan:1;
467 unsigned int mantissa0:19;
468 unsigned int mantissa1:32;
469 #else
470 #if FLOAT_WORDS_BIGENDIAN
471 unsigned int mantissa0:19;
472 unsigned int quiet_nan:1;
473 unsigned int exponent:11;
474 unsigned int negative:1;
475 unsigned int mantissa1:32;
476 #else
477 unsigned int mantissa1:32;
478 unsigned int mantissa0:19;
479 unsigned int quiet_nan:1;
480 unsigned int exponent:11;
481 unsigned int negative:1;
482 #endif
483 #endif
484 } ieee_nan;
485 };
486
487 /*
488 * Predicates for testing for extreme IEEE fp values. Used
489 * by the bytecode evaluator and the Prelude.
490 *
491 */
492
493 /* In case you don't suppport IEEE, you'll just get dummy defs.. */
494 #ifdef IEEE_FLOATING_POINT
495
496 StgInt
497 isDoubleNaN(StgDouble d)
498 {
499 union stg_ieee754_dbl u;
500
501 u.d = d;
502
503 return (
504 u.ieee.exponent == 2047 /* 2^11 - 1 */ && /* Is the exponent all ones? */
505 (u.ieee.mantissa0 != 0 || u.ieee.mantissa1 != 0)
506 /* and the mantissa non-zero? */
507 );
508 }
509
510 StgInt
511 isDoubleInfinite(StgDouble d)
512 {
513 union stg_ieee754_dbl u;
514
515 u.d = d;
516
517 /* Inf iff exponent is all ones, mantissa all zeros */
518 return (
519 u.ieee.exponent == 2047 /* 2^11 - 1 */ &&
520 u.ieee.mantissa0 == 0 &&
521 u.ieee.mantissa1 == 0
522 );
523 }
524
525 StgInt
526 isDoubleDenormalized(StgDouble d)
527 {
528 union stg_ieee754_dbl u;
529
530 u.d = d;
531
532 /* A (single/double/quad) precision floating point number
533 is denormalised iff:
534 - exponent is zero
535 - mantissa is non-zero.
536 - (don't care about setting of sign bit.)
537
538 */
539 return (
540 u.ieee.exponent == 0 &&
541 (u.ieee.mantissa0 != 0 ||
542 u.ieee.mantissa1 != 0)
543 );
544
545 }
546
547 StgInt
548 isDoubleNegativeZero(StgDouble d)
549 {
550 union stg_ieee754_dbl u;
551
552 u.d = d;
553 /* sign (bit 63) set (only) => negative zero */
554
555 return (
556 u.ieee.negative == 1 &&
557 u.ieee.exponent == 0 &&
558 u.ieee.mantissa0 == 0 &&
559 u.ieee.mantissa1 == 0);
560 }
561
562 /* Same tests, this time for StgFloats. */
563
564 /*
565 To recap, here's the representation of a single precision
566 IEEE floating point number:
567
568 sign 31 sign bit (0 == positive, 1 == negative)
569 exponent 30-23 exponent (biased by 127)
570 fraction 22-0 fraction (bits to right of binary point)
571 */
572
573
574 StgInt
575 isFloatNaN(StgFloat f)
576 {
577 union stg_ieee754_flt u;
578 u.f = f;
579
580 /* Floating point NaN iff exponent is all ones, mantissa is
581 non-zero (but see below.) */
582 return (
583 u.ieee.exponent == 255 /* 2^8 - 1 */ &&
584 u.ieee.mantissa != 0);
585 }
586
587 StgInt
588 isFloatInfinite(StgFloat f)
589 {
590 union stg_ieee754_flt u;
591 u.f = f;
592
593 /* A float is Inf iff exponent is max (all ones),
594 and mantissa is min(all zeros.) */
595 return (
596 u.ieee.exponent == 255 /* 2^8 - 1 */ &&
597 u.ieee.mantissa == 0);
598 }
599
600 StgInt
601 isFloatDenormalized(StgFloat f)
602 {
603 union stg_ieee754_flt u;
604 u.f = f;
605
606 /* A (single/double/quad) precision floating point number
607 is denormalised iff:
608 - exponent is zero
609 - mantissa is non-zero.
610 - (don't care about setting of sign bit.)
611
612 */
613 return (
614 u.ieee.exponent == 0 &&
615 u.ieee.mantissa != 0);
616 }
617
618 StgInt
619 isFloatNegativeZero(StgFloat f)
620 {
621 union stg_ieee754_flt u;
622 u.f = f;
623
624 /* sign (bit 31) set (only) => negative zero */
625 return (
626 u.ieee.negative &&
627 u.ieee.exponent == 0 &&
628 u.ieee.mantissa == 0);
629 }
630
631 #else /* ! IEEE_FLOATING_POINT */
632
633 /* Dummy definitions of predicates - they all return false */
634 StgInt isDoubleNaN(d) StgDouble d; { return 0; }
635 StgInt isDoubleInfinite(d) StgDouble d; { return 0; }
636 StgInt isDoubleDenormalized(d) StgDouble d; { return 0; }
637 StgInt isDoubleNegativeZero(d) StgDouble d; { return 0; }
638 StgInt isFloatNaN(f) StgFloat f; { return 0; }
639 StgInt isFloatInfinite(f) StgFloat f; { return 0; }
640 StgInt isFloatDenormalized(f) StgFloat f; { return 0; }
641 StgInt isFloatNegativeZero(f) StgFloat f; { return 0; }
642
643 #endif /* ! IEEE_FLOATING_POINT */