Add some more generic (en|de)code(Double|Float) code
[ghc.git] / rts / StgPrimFloat.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Miscellaneous support for floating-point primitives
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include <math.h>
13
14 /*
15 * Encoding and decoding Doubles. Code based on the HBC code
16 * (lib/fltcode.c).
17 */
18
19 #ifdef _SHORT_LIMB
20 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_INT
21 #else
22 #ifdef _LONG_LONG_LIMB
23 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG_LONG
24 #else
25 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG
26 #endif
27 #endif
28
29 #if SIZEOF_LIMB_T == 4
30 #define GMP_BASE 4294967296.0
31 #elif SIZEOF_LIMB_T == 8
32 #define GMP_BASE 18446744073709551616.0
33 #else
34 #error Cannot cope with SIZEOF_LIMB_T -- please add definition of GMP_BASE
35 #endif
36
37 #define DNBIGIT ((SIZEOF_DOUBLE+SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
38 #define FNBIGIT ((SIZEOF_FLOAT +SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
39
40 #if IEEE_FLOATING_POINT
41 #define MY_DMINEXP ((DBL_MIN_EXP) - (DBL_MANT_DIG) - 1)
42 /* DMINEXP is defined in values.h on Linux (for example) */
43 #define DHIGHBIT 0x00100000
44 #define DMSBIT 0x80000000
45
46 #define MY_FMINEXP ((FLT_MIN_EXP) - (FLT_MANT_DIG) - 1)
47 #define FHIGHBIT 0x00800000
48 #define FMSBIT 0x80000000
49 #endif
50
51 #if defined(WORDS_BIGENDIAN) || defined(FLOAT_WORDS_BIGENDIAN)
52 #define L 1
53 #define H 0
54 #else
55 #define L 0
56 #define H 1
57 #endif
58
59 #define __abs(a) (( (a) >= 0 ) ? (a) : (-(a)))
60
61 StgDouble
62 __encodeDouble (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
63 {
64 StgDouble r;
65 const mp_limb_t *const arr = (const mp_limb_t *)ba;
66 I_ i;
67
68 /* Convert MP_INT to a double; knows a lot about internal rep! */
69 for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
70 r = (r * GMP_BASE) + arr[i];
71
72 /* Now raise to the exponent */
73 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
74 r = ldexp(r, e);
75
76 /* sign is encoded in the size */
77 if (size < 0)
78 r = -r;
79
80 return r;
81 }
82
83 StgDouble
84 __2Int_encodeDouble (I_ j_high, I_ j_low, I_ e)
85 {
86 StgDouble r;
87
88 /* assuming 32 bit ints */
89 ASSERT(sizeof(int ) == 4 );
90
91 r = (StgDouble)((unsigned int)j_high);
92 r *= exp2f(32);
93 r += (StgDouble)((unsigned int)j_low);
94
95 /* Now raise to the exponent */
96 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
97 r = ldexp(r, e);
98
99 /* sign is encoded in the size */
100 if (j_high < 0)
101 r = -r;
102
103 return r;
104 }
105
106 /* Special version for small Integers */
107 StgDouble
108 __int_encodeDouble (I_ j, I_ e)
109 {
110 StgDouble r;
111
112 r = (StgDouble)__abs(j);
113
114 /* Now raise to the exponent */
115 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
116 r = ldexp(r, e);
117
118 /* sign is encoded in the size */
119 if (j < 0)
120 r = -r;
121
122 return r;
123 }
124
125 StgFloat
126 __encodeFloat (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
127 {
128 StgFloat r;
129 const mp_limb_t *arr = (const mp_limb_t *)ba;
130 I_ i;
131
132 /* Convert MP_INT to a float; knows a lot about internal rep! */
133 for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
134 r = (r * GMP_BASE) + arr[i];
135
136 /* Now raise to the exponent */
137 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
138 r = ldexp(r, e);
139
140 /* sign is encoded in the size */
141 if (size < 0)
142 r = -r;
143
144 return r;
145 }
146
147 /* Special version for small Integers */
148 StgFloat
149 __int_encodeFloat (I_ j, I_ e)
150 {
151 StgFloat r;
152
153 r = (StgFloat)__abs(j);
154
155 /* Now raise to the exponent */
156 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
157 r = ldexp(r, e);
158
159 /* sign is encoded in the size */
160 if (j < 0)
161 r = -r;
162
163 return r;
164 }
165
166 /* This only supports IEEE floating point */
167
168 void
169 __decodeDouble (MP_INT *man, I_ *exp, StgDouble dbl)
170 {
171 /* Do some bit fiddling on IEEE */
172 unsigned int low, high; /* assuming 32 bit ints */
173 int sign, iexp;
174 union { double d; unsigned int i[2]; } u; /* assuming 32 bit ints, 64 bit double */
175
176 ASSERT(sizeof(unsigned int ) == 4 );
177 ASSERT(sizeof(dbl ) == SIZEOF_DOUBLE);
178 ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
179 ASSERT(DNBIGIT*SIZEOF_LIMB_T >= SIZEOF_DOUBLE);
180
181 u.d = dbl; /* grab chunks of the double */
182 low = u.i[L];
183 high = u.i[H];
184
185 /* we know the MP_INT* passed in has size zero, so we realloc
186 no matter what.
187 */
188 man->_mp_alloc = DNBIGIT;
189
190 if (low == 0 && (high & ~DMSBIT) == 0) {
191 man->_mp_size = 0;
192 *exp = 0L;
193 } else {
194 man->_mp_size = DNBIGIT;
195 iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
196 sign = high;
197
198 high &= DHIGHBIT-1;
199 if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
200 high |= DHIGHBIT;
201 else {
202 iexp++;
203 /* A denorm, normalize the mantissa */
204 while (! (high & DHIGHBIT)) {
205 high <<= 1;
206 if (low & DMSBIT)
207 high++;
208 low <<= 1;
209 iexp--;
210 }
211 }
212 *exp = (I_) iexp;
213 #if DNBIGIT == 2
214 man->_mp_d[0] = (mp_limb_t)low;
215 man->_mp_d[1] = (mp_limb_t)high;
216 #else
217 #if DNBIGIT == 1
218 man->_mp_d[0] = ((mp_limb_t)high) << 32 | (mp_limb_t)low;
219 #else
220 #error Cannot cope with DNBIGIT
221 #endif
222 #endif
223 if (sign < 0)
224 man->_mp_size = -man->_mp_size;
225 }
226 }
227
228 void
229 __decodeDouble_2Int (I_ *man_high, I_ *man_low, I_ *exp, StgDouble dbl)
230 {
231 /* Do some bit fiddling on IEEE */
232 unsigned int low, high; /* assuming 32 bit ints */
233 int sign, iexp;
234 union { double d; unsigned int i[2]; } u; /* assuming 32 bit ints, 64 bit double */
235
236 ASSERT(sizeof(unsigned int ) == 4 );
237 ASSERT(sizeof(dbl ) == 8 );
238 ASSERT(sizeof(dbl ) == SIZEOF_DOUBLE);
239
240 u.d = dbl; /* grab chunks of the double */
241 low = u.i[L];
242 high = u.i[H];
243
244 if (low == 0 && (high & ~DMSBIT) == 0) {
245 *man_low = 0;
246 *man_high = 0;
247 *exp = 0L;
248 } else {
249 iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
250 sign = high;
251
252 high &= DHIGHBIT-1;
253 if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
254 high |= DHIGHBIT;
255 else {
256 iexp++;
257 /* A denorm, normalize the mantissa */
258 while (! (high & DHIGHBIT)) {
259 high <<= 1;
260 if (low & DMSBIT)
261 high++;
262 low <<= 1;
263 iexp--;
264 }
265 }
266 *exp = (I_) iexp;
267 *man_low = low;
268 *man_high = high;
269 if (sign < 0) {
270 *man_high = - *man_high;
271 }
272 }
273 }
274
275 void
276 __decodeFloat (MP_INT *man, I_ *exp, StgFloat flt)
277 {
278 /* Do some bit fiddling on IEEE */
279 int high, sign; /* assuming 32 bit ints */
280 union { float f; int i; } u; /* assuming 32 bit float and int */
281
282 ASSERT(sizeof(int ) == 4 );
283 ASSERT(sizeof(flt ) == SIZEOF_FLOAT );
284 ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
285 ASSERT(FNBIGIT*SIZEOF_LIMB_T >= SIZEOF_FLOAT );
286
287 u.f = flt; /* grab the float */
288 high = u.i;
289
290 /* we know the MP_INT* passed in has size zero, so we realloc
291 no matter what.
292 */
293 man->_mp_alloc = FNBIGIT;
294
295 if ((high & ~FMSBIT) == 0) {
296 man->_mp_size = 0;
297 *exp = 0;
298 } else {
299 man->_mp_size = FNBIGIT;
300 *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
301 sign = high;
302
303 high &= FHIGHBIT-1;
304 if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
305 high |= FHIGHBIT;
306 else {
307 (*exp)++;
308 /* A denorm, normalize the mantissa */
309 while (! (high & FHIGHBIT)) {
310 high <<= 1;
311 (*exp)--;
312 }
313 }
314 #if FNBIGIT == 1
315 man->_mp_d[0] = (mp_limb_t)high;
316 #else
317 #error Cannot cope with FNBIGIT
318 #endif
319 if (sign < 0)
320 man->_mp_size = -man->_mp_size;
321 }
322 }
323
324 /* Convenient union types for checking the layout of IEEE 754 types -
325 based on defs in GNU libc <ieee754.h>
326 */
327
328 void
329 __decodeFloat_Int (I_ *man, I_ *exp, StgFloat flt)
330 {
331 /* Do some bit fiddling on IEEE */
332 int high, sign; /* assuming 32 bit ints */
333 union { float f; int i; } u; /* assuming 32 bit float and int */
334
335 ASSERT(sizeof(int ) == 4 );
336 ASSERT(sizeof(flt ) == 4 );
337 ASSERT(sizeof(flt ) == SIZEOF_FLOAT );
338
339 u.f = flt; /* grab the float */
340 high = u.i;
341
342 if ((high & ~FMSBIT) == 0) {
343 *man = 0;
344 *exp = 0;
345 } else {
346 *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
347 sign = high;
348
349 high &= FHIGHBIT-1;
350 if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
351 high |= FHIGHBIT;
352 else {
353 (*exp)++;
354 /* A denorm, normalize the mantissa */
355 while (! (high & FHIGHBIT)) {
356 high <<= 1;
357 (*exp)--;
358 }
359 }
360 *man = high;
361 if (sign < 0)
362 *man = - *man;
363 }
364 }
365
366 union stg_ieee754_flt
367 {
368 float f;
369 struct {
370
371 #if WORDS_BIGENDIAN
372 unsigned int negative:1;
373 unsigned int exponent:8;
374 unsigned int mantissa:23;
375 #else
376 unsigned int mantissa:23;
377 unsigned int exponent:8;
378 unsigned int negative:1;
379 #endif
380 } ieee;
381 struct {
382
383 #if WORDS_BIGENDIAN
384 unsigned int negative:1;
385 unsigned int exponent:8;
386 unsigned int quiet_nan:1;
387 unsigned int mantissa:22;
388 #else
389 unsigned int mantissa:22;
390 unsigned int quiet_nan:1;
391 unsigned int exponent:8;
392 unsigned int negative:1;
393 #endif
394 } ieee_nan;
395 };
396
397 /*
398
399 To recap, here's the representation of a double precision
400 IEEE floating point number:
401
402 sign 63 sign bit (0==positive, 1==negative)
403 exponent 62-52 exponent (biased by 1023)
404 fraction 51-0 fraction (bits to right of binary point)
405 */
406
407 union stg_ieee754_dbl
408 {
409 double d;
410 struct {
411
412 #if WORDS_BIGENDIAN
413 unsigned int negative:1;
414 unsigned int exponent:11;
415 unsigned int mantissa0:20;
416 unsigned int mantissa1:32;
417 #else
418 #if FLOAT_WORDS_BIGENDIAN
419 unsigned int mantissa0:20;
420 unsigned int exponent:11;
421 unsigned int negative:1;
422 unsigned int mantissa1:32;
423 #else
424 unsigned int mantissa1:32;
425 unsigned int mantissa0:20;
426 unsigned int exponent:11;
427 unsigned int negative:1;
428 #endif
429 #endif
430 } ieee;
431 /* This format makes it easier to see if a NaN is a signalling NaN. */
432 struct {
433
434 #if WORDS_BIGENDIAN
435 unsigned int negative:1;
436 unsigned int exponent:11;
437 unsigned int quiet_nan:1;
438 unsigned int mantissa0:19;
439 unsigned int mantissa1:32;
440 #else
441 #if FLOAT_WORDS_BIGENDIAN
442 unsigned int mantissa0:19;
443 unsigned int quiet_nan:1;
444 unsigned int exponent:11;
445 unsigned int negative:1;
446 unsigned int mantissa1:32;
447 #else
448 unsigned int mantissa1:32;
449 unsigned int mantissa0:19;
450 unsigned int quiet_nan:1;
451 unsigned int exponent:11;
452 unsigned int negative:1;
453 #endif
454 #endif
455 } ieee_nan;
456 };
457
458 /*
459 * Predicates for testing for extreme IEEE fp values. Used
460 * by the bytecode evaluator and the Prelude.
461 *
462 */
463
464 /* In case you don't suppport IEEE, you'll just get dummy defs.. */
465 #ifdef IEEE_FLOATING_POINT
466
467 StgInt
468 isDoubleNaN(StgDouble d)
469 {
470 union stg_ieee754_dbl u;
471
472 u.d = d;
473
474 return (
475 u.ieee.exponent == 2047 /* 2^11 - 1 */ && /* Is the exponent all ones? */
476 (u.ieee.mantissa0 != 0 || u.ieee.mantissa1 != 0)
477 /* and the mantissa non-zero? */
478 );
479 }
480
481 StgInt
482 isDoubleInfinite(StgDouble d)
483 {
484 union stg_ieee754_dbl u;
485
486 u.d = d;
487
488 /* Inf iff exponent is all ones, mantissa all zeros */
489 return (
490 u.ieee.exponent == 2047 /* 2^11 - 1 */ &&
491 u.ieee.mantissa0 == 0 &&
492 u.ieee.mantissa1 == 0
493 );
494 }
495
496 StgInt
497 isDoubleDenormalized(StgDouble d)
498 {
499 union stg_ieee754_dbl u;
500
501 u.d = d;
502
503 /* A (single/double/quad) precision floating point number
504 is denormalised iff:
505 - exponent is zero
506 - mantissa is non-zero.
507 - (don't care about setting of sign bit.)
508
509 */
510 return (
511 u.ieee.exponent == 0 &&
512 (u.ieee.mantissa0 != 0 ||
513 u.ieee.mantissa1 != 0)
514 );
515
516 }
517
518 StgInt
519 isDoubleNegativeZero(StgDouble d)
520 {
521 union stg_ieee754_dbl u;
522
523 u.d = d;
524 /* sign (bit 63) set (only) => negative zero */
525
526 return (
527 u.ieee.negative == 1 &&
528 u.ieee.exponent == 0 &&
529 u.ieee.mantissa0 == 0 &&
530 u.ieee.mantissa1 == 0);
531 }
532
533 /* Same tests, this time for StgFloats. */
534
535 /*
536 To recap, here's the representation of a single precision
537 IEEE floating point number:
538
539 sign 31 sign bit (0 == positive, 1 == negative)
540 exponent 30-23 exponent (biased by 127)
541 fraction 22-0 fraction (bits to right of binary point)
542 */
543
544
545 StgInt
546 isFloatNaN(StgFloat f)
547 {
548 union stg_ieee754_flt u;
549 u.f = f;
550
551 /* Floating point NaN iff exponent is all ones, mantissa is
552 non-zero (but see below.) */
553 return (
554 u.ieee.exponent == 255 /* 2^8 - 1 */ &&
555 u.ieee.mantissa != 0);
556 }
557
558 StgInt
559 isFloatInfinite(StgFloat f)
560 {
561 union stg_ieee754_flt u;
562 u.f = f;
563
564 /* A float is Inf iff exponent is max (all ones),
565 and mantissa is min(all zeros.) */
566 return (
567 u.ieee.exponent == 255 /* 2^8 - 1 */ &&
568 u.ieee.mantissa == 0);
569 }
570
571 StgInt
572 isFloatDenormalized(StgFloat f)
573 {
574 union stg_ieee754_flt u;
575 u.f = f;
576
577 /* A (single/double/quad) precision floating point number
578 is denormalised iff:
579 - exponent is zero
580 - mantissa is non-zero.
581 - (don't care about setting of sign bit.)
582
583 */
584 return (
585 u.ieee.exponent == 0 &&
586 u.ieee.mantissa != 0);
587 }
588
589 StgInt
590 isFloatNegativeZero(StgFloat f)
591 {
592 union stg_ieee754_flt u;
593 u.f = f;
594
595 /* sign (bit 31) set (only) => negative zero */
596 return (
597 u.ieee.negative &&
598 u.ieee.exponent == 0 &&
599 u.ieee.mantissa == 0);
600 }
601
602 #else /* ! IEEE_FLOATING_POINT */
603
604 /* Dummy definitions of predicates - they all return false */
605 StgInt isDoubleNaN(d) StgDouble d; { return 0; }
606 StgInt isDoubleInfinite(d) StgDouble d; { return 0; }
607 StgInt isDoubleDenormalized(d) StgDouble d; { return 0; }
608 StgInt isDoubleNegativeZero(d) StgDouble d; { return 0; }
609 StgInt isFloatNaN(f) StgFloat f; { return 0; }
610 StgInt isFloatInfinite(f) StgFloat f; { return 0; }
611 StgInt isFloatDenormalized(f) StgFloat f; { return 0; }
612 StgInt isFloatNegativeZero(f) StgFloat f; { return 0; }
613
614 #endif /* ! IEEE_FLOATING_POINT */