Eliminate intermediate overflow for encodeFloat, fixes #5524
[packages/integer-gmp.git] / cbits / float.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) Lennart Augustsson
4 * (c) The GHC Team, 1998-2000
5 *
6 * Support for floating-point <-> gmp integer primitives
7 *
8 * ---------------------------------------------------------------------------*/
9
10 /* TODO: do we need PosixSource.h ? it lives in rts/ not public includes/ */
11 /* #include "PosixSource.h" */
12 #include "Rts.h"
13 #include "gmp.h"
14
15 #include <math.h>
16
17 #define IEEE_FLOATING_POINT 1
18
19 /*
20 * Encoding and decoding Doubles. Code based on the HBC code
21 * (lib/fltcode.c).
22 */
23
24 #ifdef _SHORT_LIMB
25 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_INT
26 #else
27 #ifdef _LONG_LONG_LIMB
28 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG_LONG
29 #else
30 #define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG
31 #endif
32 #endif
33
34 #if SIZEOF_LIMB_T == 4
35 #define GMP_BASE 4294967296.0
36 #define LIMBBITS_LOG_2 5
37 #elif SIZEOF_LIMB_T == 8
38 #define GMP_BASE 18446744073709551616.0
39 #define LIMBBITS_LOG_2 6
40 #else
41 #error Cannot cope with SIZEOF_LIMB_T -- please add definition of GMP_BASE
42 #endif
43
44 #define DNBIGIT ((SIZEOF_DOUBLE+SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
45 #define FNBIGIT ((SIZEOF_FLOAT +SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
46
47 #if IEEE_FLOATING_POINT
48 #define MY_DMINEXP ((DBL_MIN_EXP) - (DBL_MANT_DIG) - 1)
49 /* DMINEXP is defined in values.h on Linux (for example) */
50 #define DHIGHBIT 0x00100000
51 #define DMSBIT 0x80000000
52
53 #define MY_FMINEXP ((FLT_MIN_EXP) - (FLT_MANT_DIG) - 1)
54 #define FHIGHBIT 0x00800000
55 #define FMSBIT 0x80000000
56 #endif
57
58 #if defined(WORDS_BIGENDIAN) || defined(FLOAT_WORDS_BIGENDIAN)
59 #define L 1
60 #define H 0
61 #else
62 #define L 0
63 #define H 1
64 #endif
65
66 #define __abs(a) (( (a) >= 0 ) ? (a) : (-(a)))
67
68 StgDouble
69 integer_cbits_encodeDouble (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
70 {
71 StgDouble r;
72 const mp_limb_t *const arr = (const mp_limb_t *)ba;
73 I_ i;
74
75 /* Convert MP_INT to a double; knows a lot about internal rep! */
76 i = __abs(size)-1;
77 if ((i < 15) || (e >= 0)) /* overflows only if the final result does */
78 {
79 /* This would cause overflow if a large MP_INT is passed, even if the
80 * exponent would scale it back into range, so we do it only when it's safe. */
81 for(r = 0.0; i >= 0; i--)
82 r = (r * GMP_BASE) + arr[i];
83
84 } else { /* possibly more than 1024 bits in the MP_INT, but gets scaled down */
85
86 /* Find the first nonzero limb; normally it would be the first */
87 r = 0.0;
88 while((i >= 0) && (r == 0.0))
89 {
90 r = arr[i--];
91 }
92 if (i >= 0)
93 r = (r * GMP_BASE) + arr[i];
94 #if SIZEOF_LIMB_T < 8
95 if (i > 0)
96 r = (r * GMP_BASE) + arr[--i];
97 #endif
98 /* Now we have at least the 65 leading bits of the MP_INT or all of it.
99 * Any further bits would be rounded down, so from now on everything is
100 * multiplication by powers of 2.
101 * If i is positive, arr contains i limbs we haven't looked at yet, so
102 * adjust the exponent by i*8*SIZEOF_LIMB_T. Unfortunately, we must
103 * beware of overflow, so we can't simply add this to e. */
104 if (i > 0)
105 {
106 /* first add the number of whole limbs that would be cancelled */
107 i = i + e / (8 * SIZEOF_LIMB_T);
108 /* check for overflow */
109 if ((i > 0) && ((i >> (8*sizeof(I_) - 1 - LIMBBITS_LOG_2)) > 0))
110 {
111 /* overflow, give e a large dummy value */
112 e = 2147483647;
113 } else {
114 /* no overflow, get the exact value */
115 e = i * (8 * SIZEOF_LIMB_T) + (e % (8 * SIZEOF_LIMB_T));
116 }
117 }
118 }
119
120 /* Now raise to the exponent */
121 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
122 r = ldexp(r, e);
123
124 /* sign is encoded in the size */
125 if (size < 0)
126 r = -r;
127
128 return r;
129 }
130
131 StgFloat
132 integer_cbits_encodeFloat (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
133 {
134 StgFloat r;
135 const mp_limb_t *arr = (const mp_limb_t *)ba;
136 I_ i;
137
138 /* Convert MP_INT to a float; knows a lot about internal rep! */
139 i = __abs(size)-1;
140 /* just in case StgFloat is a double, check sizes */
141 #if SIZEOF_FLOAT == 4
142 if ((i < 2) || (e >= 0))
143 #else
144 if ((i < 15) || (e >= 0))
145 #endif
146 {
147 for(r = 0.0; i >= 0; i--)
148 r = (r * GMP_BASE) + arr[i];
149 } else {
150
151 /* Find the first nonzero limb; normally it would be the first */
152 r = 0.0;
153 while((i >= 0) && (r == 0.0))
154 {
155 r = arr[i--];
156 }
157 if (i >= 0)
158 r = (r * GMP_BASE) + arr[i];
159 #if (SIZEOF_LIMB_T < 8) && (SIZEOF_FLOAT > 4)
160 if (i > 0)
161 r = (r * GMP_BASE) + arr[--i];
162 #endif
163 /* Now we have enough leading bits of the MP_INT.
164 * Any further bits would be rounded down, so from now on everything is
165 * multiplication by powers of 2.
166 * If i is positive, arr contains i limbs we haven't looked at yet, so
167 * adjust the exponent by i*8*SIZEOF_LIMB_T. Unfortunately, we must
168 * beware of overflow, so we can't simply add this to e. */
169 if (i > 0)
170 {
171 /* first add the number of whole limbs that would be cancelled */
172 i = i + e / (8 * SIZEOF_LIMB_T);
173 /* check for overflow */
174 if ((i > 0) && ((i >> (8*sizeof(I_) - 1 - LIMBBITS_LOG_2)) > 0))
175 {
176 /* overflow, give e a large dummy value */
177 e = 2147483647;
178 } else {
179 /* no overflow, get the exact value */
180 e = i * (8 * SIZEOF_LIMB_T) + (e % (8 * SIZEOF_LIMB_T));
181 }
182 }
183 }
184
185 /* Now raise to the exponent */
186 if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
187 r = ldexp(r, e);
188
189 /* sign is encoded in the size */
190 if (size < 0)
191 r = -r;
192
193 return r;
194 }
195
196 /* This only supports IEEE floating point */
197
198 void
199 integer_cbits_decodeDouble (MP_INT *man, I_ *exp, StgDouble dbl)
200 {
201 /* Do some bit fiddling on IEEE */
202 unsigned int low, high; /* assuming 32 bit ints */
203 int sign, iexp;
204 union { double d; unsigned int i[2]; } u; /* assuming 32 bit ints, 64 bit double */
205
206 ASSERT(sizeof(unsigned int ) == 4 );
207 ASSERT(sizeof(dbl ) == SIZEOF_DOUBLE);
208 ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
209 ASSERT(DNBIGIT*SIZEOF_LIMB_T >= SIZEOF_DOUBLE);
210
211 u.d = dbl; /* grab chunks of the double */
212 low = u.i[L];
213 high = u.i[H];
214
215 /* we know the MP_INT* passed in has size zero, so we realloc
216 no matter what.
217 */
218 man->_mp_alloc = DNBIGIT;
219
220 if (low == 0 && (high & ~DMSBIT) == 0) {
221 man->_mp_size = 0;
222 *exp = 0L;
223 } else {
224 man->_mp_size = DNBIGIT;
225 iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
226 sign = high;
227
228 high &= DHIGHBIT-1;
229 if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
230 high |= DHIGHBIT;
231 else {
232 iexp++;
233 /* A denorm, normalize the mantissa */
234 while (! (high & DHIGHBIT)) {
235 high <<= 1;
236 if (low & DMSBIT)
237 high++;
238 low <<= 1;
239 iexp--;
240 }
241 }
242 *exp = (I_) iexp;
243 #if DNBIGIT == 2
244 man->_mp_d[0] = (mp_limb_t)low;
245 man->_mp_d[1] = (mp_limb_t)high;
246 #else
247 #if DNBIGIT == 1
248 man->_mp_d[0] = ((mp_limb_t)high) << 32 | (mp_limb_t)low;
249 #else
250 #error Cannot cope with DNBIGIT
251 #endif
252 #endif
253 if (sign < 0)
254 man->_mp_size = -man->_mp_size;
255 }
256 }