5bd6aebb1cd1923608653d27599a247baca98af4
[ghc.git] / rts / StgPrimFloat.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Miscellaneous support for floating-point primitives
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include <math.h>
13
14 /*
15 * Encoding and decoding Doubles. Code based on the HBC code
16 * (lib/fltcode.c).
17 */
18
19 #ifdef _SHORT_LIMB
20 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_INT
21 #else
22 #ifdef _LONG_LONG_LIMB
23 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG_LONG
24 #else
25 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG
26 #endif
27 #endif
28
29 #if SIZEOF_LIMB_T == 4
30 #define GMP_BASE 4294967296.0
31 #elif SIZEOF_LIMB_T == 8
32 #define GMP_BASE 18446744073709551616.0
33 #else
34 #error Cannot cope with SIZEOF_LIMB_T -- please add definition of GMP_BASE
35 #endif
36
37 #define DNBIGIT ((SIZEOF_DOUBLE+SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
38 #define FNBIGIT ((SIZEOF_FLOAT +SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
39
40 #if IEEE_FLOATING_POINT
41 #define MY_DMINEXP ((DBL_MIN_EXP) - (DBL_MANT_DIG) - 1)
42 /* DMINEXP is defined in values.h on Linux (for example) */
43 #define DHIGHBIT 0x00100000
44 #define DMSBIT 0x80000000
45
46 #define MY_FMINEXP ((FLT_MIN_EXP) - (FLT_MANT_DIG) - 1)
47 #define FHIGHBIT 0x00800000
48 #define FMSBIT 0x80000000
49 #endif
50
51 #ifdef WORDS_BIGENDIAN
52 #define L 1
53 #define H 0
54 #else
55 #define L 0
56 #define H 1
57 #endif
58
59 #define __abs(a) (( (a) >= 0 ) ? (a) : (-(a)))
60
61 StgDouble
62 __encodeDouble (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
63 {
64 StgDouble r;
65 const mp_limb_t *const arr = (const mp_limb_t *)ba;
66 I_ i;
67
68 /* Convert MP_INT to a double; knows a lot about internal rep! */
69 for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
70 r = (r * GMP_BASE) + arr[i];
71
72 /* Now raise to the exponent */
73 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
74 r = ldexp(r, e);
75
76 /* sign is encoded in the size */
77 if (size < 0)
78 r = -r;
79
80 return r;
81 }
82
83 /* Special version for small Integers */
84 StgDouble
85 __int_encodeDouble (I_ j, I_ e)
86 {
87 StgDouble r;
88
89 r = (StgDouble)__abs(j);
90
91 /* Now raise to the exponent */
92 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
93 r = ldexp(r, e);
94
95 /* sign is encoded in the size */
96 if (j < 0)
97 r = -r;
98
99 return r;
100 }
101
102 StgFloat
103 __encodeFloat (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
104 {
105 StgFloat r;
106 const mp_limb_t *arr = (const mp_limb_t *)ba;
107 I_ i;
108
109 /* Convert MP_INT to a float; knows a lot about internal rep! */
110 for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
111 r = (r * GMP_BASE) + arr[i];
112
113 /* Now raise to the exponent */
114 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
115 r = ldexp(r, e);
116
117 /* sign is encoded in the size */
118 if (size < 0)
119 r = -r;
120
121 return r;
122 }
123
124 /* Special version for small Integers */
125 StgFloat
126 __int_encodeFloat (I_ j, I_ e)
127 {
128 StgFloat r;
129
130 r = (StgFloat)__abs(j);
131
132 /* Now raise to the exponent */
133 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
134 r = ldexp(r, e);
135
136 /* sign is encoded in the size */
137 if (j < 0)
138 r = -r;
139
140 return r;
141 }
142
143 /* This only supports IEEE floating point */
144
145 void
146 __decodeDouble (MP_INT *man, I_ *exp, StgDouble dbl)
147 {
148 /* Do some bit fiddling on IEEE */
149 unsigned int low, high; /* assuming 32 bit ints */
150 int sign, iexp;
151 union { double d; unsigned int i[2]; } u; /* assuming 32 bit ints, 64 bit double */
152
153 ASSERT(sizeof(unsigned int ) == 4 );
154 ASSERT(sizeof(dbl ) == SIZEOF_DOUBLE);
155 ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
156 ASSERT(DNBIGIT*SIZEOF_LIMB_T >= SIZEOF_DOUBLE);
157
158 u.d = dbl; /* grab chunks of the double */
159 low = u.i[L];
160 high = u.i[H];
161
162 /* we know the MP_INT* passed in has size zero, so we realloc
163 no matter what.
164 */
165 man->_mp_alloc = DNBIGIT;
166
167 if (low == 0 && (high & ~DMSBIT) == 0) {
168 man->_mp_size = 0;
169 *exp = 0L;
170 } else {
171 man->_mp_size = DNBIGIT;
172 iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
173 sign = high;
174
175 high &= DHIGHBIT-1;
176 if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
177 high |= DHIGHBIT;
178 else {
179 iexp++;
180 /* A denorm, normalize the mantissa */
181 while (! (high & DHIGHBIT)) {
182 high <<= 1;
183 if (low & DMSBIT)
184 high++;
185 low <<= 1;
186 iexp--;
187 }
188 }
189 *exp = (I_) iexp;
190 #if DNBIGIT == 2
191 man->_mp_d[0] = (mp_limb_t)low;
192 man->_mp_d[1] = (mp_limb_t)high;
193 #else
194 #if DNBIGIT == 1
195 man->_mp_d[0] = ((mp_limb_t)high) << 32 | (mp_limb_t)low;
196 #else
197 #error Cannot cope with DNBIGIT
198 #endif
199 #endif
200 if (sign < 0)
201 man->_mp_size = -man->_mp_size;
202 }
203 }
204
205 void
206 __decodeFloat (MP_INT *man, I_ *exp, StgFloat flt)
207 {
208 /* Do some bit fiddling on IEEE */
209 int high, sign; /* assuming 32 bit ints */
210 union { float f; int i; } u; /* assuming 32 bit float and int */
211
212 ASSERT(sizeof(int ) == 4 );
213 ASSERT(sizeof(flt ) == SIZEOF_FLOAT );
214 ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
215 ASSERT(FNBIGIT*SIZEOF_LIMB_T >= SIZEOF_FLOAT );
216
217 u.f = flt; /* grab the float */
218 high = u.i;
219
220 /* we know the MP_INT* passed in has size zero, so we realloc
221 no matter what.
222 */
223 man->_mp_alloc = FNBIGIT;
224
225 if ((high & ~FMSBIT) == 0) {
226 man->_mp_size = 0;
227 *exp = 0;
228 } else {
229 man->_mp_size = FNBIGIT;
230 *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
231 sign = high;
232
233 high &= FHIGHBIT-1;
234 if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
235 high |= FHIGHBIT;
236 else {
237 (*exp)++;
238 /* A denorm, normalize the mantissa */
239 while (! (high & FHIGHBIT)) {
240 high <<= 1;
241 (*exp)--;
242 }
243 }
244 #if FNBIGIT == 1
245 man->_mp_d[0] = (mp_limb_t)high;
246 #else
247 #error Cannot cope with FNBIGIT
248 #endif
249 if (sign < 0)
250 man->_mp_size = -man->_mp_size;
251 }
252 }
253
254 /* Convenient union types for checking the layout of IEEE 754 types -
255 based on defs in GNU libc <ieee754.h>
256 */
257
258 union stg_ieee754_flt
259 {
260 float f;
261 struct {
262
263 #if WORDS_BIGENDIAN
264 unsigned int negative:1;
265 unsigned int exponent:8;
266 unsigned int mantissa:23;
267 #else
268 unsigned int mantissa:23;
269 unsigned int exponent:8;
270 unsigned int negative:1;
271 #endif
272 } ieee;
273 struct {
274
275 #if WORDS_BIGENDIAN
276 unsigned int negative:1;
277 unsigned int exponent:8;
278 unsigned int quiet_nan:1;
279 unsigned int mantissa:22;
280 #else
281 unsigned int mantissa:22;
282 unsigned int quiet_nan:1;
283 unsigned int exponent:8;
284 unsigned int negative:1;
285 #endif
286 } ieee_nan;
287 };
288
289 /*
290
291 To recap, here's the representation of a double precision
292 IEEE floating point number:
293
294 sign 63 sign bit (0==positive, 1==negative)
295 exponent 62-52 exponent (biased by 1023)
296 fraction 51-0 fraction (bits to right of binary point)
297 */
298
299 union stg_ieee754_dbl
300 {
301 double d;
302 struct {
303
304 #if WORDS_BIGENDIAN
305 unsigned int negative:1;
306 unsigned int exponent:11;
307 unsigned int mantissa0:20;
308 unsigned int mantissa1:32;
309 #else
310 unsigned int mantissa1:32;
311 unsigned int mantissa0:20;
312 unsigned int exponent:11;
313 unsigned int negative:1;
314 #endif
315 } ieee;
316 /* This format makes it easier to see if a NaN is a signalling NaN. */
317 struct {
318
319 #if WORDS_BIGENDIAN
320 unsigned int negative:1;
321 unsigned int exponent:11;
322 unsigned int quiet_nan:1;
323 unsigned int mantissa0:19;
324 unsigned int mantissa1:32;
325 #else
326 unsigned int mantissa1:32;
327 unsigned int mantissa0:19;
328 unsigned int quiet_nan:1;
329 unsigned int exponent:11;
330 unsigned int negative:1;
331 #endif
332 } ieee_nan;
333 };
334
335 /*
336 * Predicates for testing for extreme IEEE fp values. Used
337 * by the bytecode evaluator and the Prelude.
338 *
339 */
340
341 /* In case you don't suppport IEEE, you'll just get dummy defs.. */
342 #ifdef IEEE_FLOATING_POINT
343
344 StgInt
345 isDoubleNaN(StgDouble d)
346 {
347 union stg_ieee754_dbl u;
348
349 u.d = d;
350
351 return (
352 u.ieee.exponent == 2047 /* 2^11 - 1 */ && /* Is the exponent all ones? */
353 (u.ieee.mantissa0 != 0 || u.ieee.mantissa1 != 0)
354 /* and the mantissa non-zero? */
355 );
356 }
357
358 StgInt
359 isDoubleInfinite(StgDouble d)
360 {
361 union stg_ieee754_dbl u;
362
363 u.d = d;
364
365 /* Inf iff exponent is all ones, mantissa all zeros */
366 return (
367 u.ieee.exponent == 2047 /* 2^11 - 1 */ &&
368 u.ieee.mantissa0 == 0 &&
369 u.ieee.mantissa1 == 0
370 );
371 }
372
373 StgInt
374 isDoubleDenormalized(StgDouble d)
375 {
376 union stg_ieee754_dbl u;
377
378 u.d = d;
379
380 /* A (single/double/quad) precision floating point number
381 is denormalised iff:
382 - exponent is zero
383 - mantissa is non-zero.
384 - (don't care about setting of sign bit.)
385
386 */
387 return (
388 u.ieee.exponent == 0 &&
389 (u.ieee.mantissa0 != 0 ||
390 u.ieee.mantissa1 != 0)
391 );
392
393 }
394
395 StgInt
396 isDoubleNegativeZero(StgDouble d)
397 {
398 union stg_ieee754_dbl u;
399
400 u.d = d;
401 /* sign (bit 63) set (only) => negative zero */
402
403 return (
404 u.ieee.negative == 1 &&
405 u.ieee.exponent == 0 &&
406 u.ieee.mantissa0 == 0 &&
407 u.ieee.mantissa1 == 0);
408 }
409
410 /* Same tests, this time for StgFloats. */
411
412 /*
413 To recap, here's the representation of a single precision
414 IEEE floating point number:
415
416 sign 31 sign bit (0 == positive, 1 == negative)
417 exponent 30-23 exponent (biased by 127)
418 fraction 22-0 fraction (bits to right of binary point)
419 */
420
421
422 StgInt
423 isFloatNaN(StgFloat f)
424 {
425 union stg_ieee754_flt u;
426 u.f = f;
427
428 /* Floating point NaN iff exponent is all ones, mantissa is
429 non-zero (but see below.) */
430 return (
431 u.ieee.exponent == 255 /* 2^8 - 1 */ &&
432 u.ieee.mantissa != 0);
433 }
434
435 StgInt
436 isFloatInfinite(StgFloat f)
437 {
438 union stg_ieee754_flt u;
439 u.f = f;
440
441 /* A float is Inf iff exponent is max (all ones),
442 and mantissa is min(all zeros.) */
443 return (
444 u.ieee.exponent == 255 /* 2^8 - 1 */ &&
445 u.ieee.mantissa == 0);
446 }
447
448 StgInt
449 isFloatDenormalized(StgFloat f)
450 {
451 union stg_ieee754_flt u;
452 u.f = f;
453
454 /* A (single/double/quad) precision floating point number
455 is denormalised iff:
456 - exponent is zero
457 - mantissa is non-zero.
458 - (don't care about setting of sign bit.)
459
460 */
461 return (
462 u.ieee.exponent == 0 &&
463 u.ieee.mantissa != 0);
464 }
465
466 StgInt
467 isFloatNegativeZero(StgFloat f)
468 {
469 union stg_ieee754_flt u;
470 u.f = f;
471
472 /* sign (bit 31) set (only) => negative zero */
473 return (
474 u.ieee.negative &&
475 u.ieee.exponent == 0 &&
476 u.ieee.mantissa == 0);
477 }
478
479 #else /* ! IEEE_FLOATING_POINT */
480
481 /* Dummy definitions of predicates - they all return false */
482 StgInt isDoubleNaN(d) StgDouble d; { return 0; }
483 StgInt isDoubleInfinite(d) StgDouble d; { return 0; }
484 StgInt isDoubleDenormalized(d) StgDouble d; { return 0; }
485 StgInt isDoubleNegativeZero(d) StgDouble d; { return 0; }
486 StgInt isFloatNaN(f) StgFloat f; { return 0; }
487 StgInt isFloatInfinite(f) StgFloat f; { return 0; }
488 StgInt isFloatDenormalized(f) StgFloat f; { return 0; }
489 StgInt isFloatNegativeZero(f) StgFloat f; { return 0; }
490
491 #endif /* ! IEEE_FLOATING_POINT */