| @@ -15,33 +15,34 @@ | | | @@ -15,33 +15,34 @@ |
15 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | | 15 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | | 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | | 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
18 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | | 18 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
19 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | | 19 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
20 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | | 20 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
21 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | | 21 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
22 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | | 22 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
23 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | | 23 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
24 | * SUCH DAMAGE. | | 24 | * SUCH DAMAGE. |
25 | */ | | 25 | */ |
26 | | | 26 | |
27 | #include <sys/cdefs.h> | | 27 | #include <sys/cdefs.h> |
28 | __RCSID("$NetBSD: n_exp2.c,v 1.2 2014/03/12 19:42:18 martin Exp $"); | | 28 | __RCSID("$NetBSD: n_exp2.c,v 1.3 2014/03/23 15:26:47 martin Exp $"); |
29 | #ifdef __FBSDID | | 29 | #ifdef __FBSDID |
30 | __FBSDID("$FreeBSD: src/lib/msun/src/s_exp2.c,v 1.7 2008/02/22 02:27:34 das Exp $"); | | 30 | __FBSDID("$FreeBSD: src/lib/msun/src/s_exp2.c,v 1.7 2008/02/22 02:27:34 das Exp $"); |
31 | #endif | | 31 | #endif |
32 | | | 32 | |
33 | #include <stdint.h> | | 33 | #include <stdint.h> |
34 | #include <float.h> | | 34 | #include <float.h> |
| | | 35 | #include <string.h> |
35 | | | 36 | |
36 | #include "math.h" | | 37 | #include "math.h" |
37 | | | 38 | |
38 | #define TBLBITS 8 | | 39 | #define TBLBITS 8 |
39 | #define TBLSIZE (1 << TBLBITS) | | 40 | #define TBLSIZE (1 << TBLBITS) |
40 | | | 41 | |
41 | static const double | | 42 | static const double |
42 | huge = 0x1p126, | | 43 | huge = 0x1p126, |
43 | redux = 0x1.8p52 / TBLSIZE, | | 44 | redux = 0x1.8p52 / TBLSIZE, |
44 | P1 = 0x1.62e42fefa39efp-1, | | 45 | P1 = 0x1.62e42fefa39efp-1, |
45 | P2 = 0x1.ebfbdff82c575p-3, | | 46 | P2 = 0x1.ebfbdff82c575p-3, |
46 | P3 = 0x1.c6b08d704a0a6p-5, | | 47 | P3 = 0x1.c6b08d704a0a6p-5, |
47 | P4 = 0x1.3b2ab88f70400p-7, | | 48 | P4 = 0x1.3b2ab88f70400p-7, |
| @@ -334,59 +335,61 @@ static const double tbl[TBLSIZE * 2] = { | | | @@ -334,59 +335,61 @@ static const double tbl[TBLSIZE * 2] = { |
334 | * Note that the range of i is +-TBLSIZE/2, so we actually index the tables | | 335 | * Note that the range of i is +-TBLSIZE/2, so we actually index the tables |
335 | * by i0 = i + TBLSIZE/2. For cache efficiency, exp2t[] and eps[] are | | 336 | * by i0 = i + TBLSIZE/2. For cache efficiency, exp2t[] and eps[] are |
336 | * virtual tables, interleaved in the real table tbl[]. | | 337 | * virtual tables, interleaved in the real table tbl[]. |
337 | * | | 338 | * |
338 | * This method is due to Gal, with many details due to Gal and Bachelis: | | 339 | * This method is due to Gal, with many details due to Gal and Bachelis: |
339 | * | | 340 | * |
340 | * Gal, S. and Bachelis, B. An Accurate Elementary Mathematical Library | | 341 | * Gal, S. and Bachelis, B. An Accurate Elementary Mathematical Library |
341 | * for the IEEE Floating Point Standard. TOMS 17(1), 26-46 (1991). | | 342 | * for the IEEE Floating Point Standard. TOMS 17(1), 26-46 (1991). |
342 | */ | | 343 | */ |
343 | double | | 344 | double |
344 | exp2(double x) | | 345 | exp2(double x) |
345 | { | | 346 | { |
346 | double r, t, twopk, z; | | 347 | double r, t, twopk, z; |
347 | uint32_t hx, ix, i0; | | 348 | uint32_t hx, ix, i0, temp; |
348 | int k, big; | | 349 | int k, big; |
349 | | | 350 | |
350 | /* Filter out exceptional cases. */ | | 351 | /* Filter out exceptional cases. */ |
351 | hx = ((uint32_t*)&x)[0]; | | 352 | memcpy(&hx, &x, sizeof(hx)); |
352 | ix = hx & 0x7fffffff; /* high word of |x| */ | | 353 | ix = hx & 0x7fffffff; /* high word of |x| */ |
353 | if(ix >= 0x40900000) { /* |x| >= 1024 */ | | 354 | if(ix >= 0x40900000) { /* |x| >= 1024 */ |
354 | if(x >= 0x1.0p10) | | 355 | if(x >= 0x1.0p10) |
355 | return (huge * huge); /* overflow */ | | 356 | return (huge * huge); /* overflow */ |
356 | if(x <= -0x1.0ccp10) | | 357 | if(x <= -0x1.0ccp10) |
357 | return (twom1000 * twom1000); /* underflow */ | | 358 | return (twom1000 * twom1000); /* underflow */ |
358 | } else if (ix < 0x3c900000) { /* |x| < 0x1p-54 */ | | 359 | } else if (ix < 0x3c900000) { /* |x| < 0x1p-54 */ |
359 | return (1.0 + x); | | 360 | return (1.0 + x); |
360 | } | | 361 | } |
361 | | | 362 | |
362 | /* Reduce x, computing z, i0, and k. */ | | 363 | /* Reduce x, computing z, i0, and k. */ |
363 | (*(volatile double*)&t) = x + redux; | | 364 | (*(volatile double*)&t) = x + redux; |
364 | i0 = ((uint32_t*)&t)[1]; | | 365 | i0 = ((uint32_t*)&t)[1]; |
365 | i0 += TBLSIZE / 2; | | 366 | i0 += TBLSIZE / 2; |
366 | k = (i0 >> TBLBITS) << 20; | | 367 | k = (i0 >> TBLBITS) << 20; |
367 | i0 = (i0 & (TBLSIZE - 1)) << 1; | | 368 | i0 = (i0 & (TBLSIZE - 1)) << 1; |
368 | t -= redux; | | 369 | t -= redux; |
369 | z = x - t; | | 370 | z = x - t; |
370 | | | 371 | |
371 | /* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */ | | 372 | /* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */ |
372 | t = tbl[i0]; /* exp2t[i0] */ | | 373 | t = tbl[i0]; /* exp2t[i0] */ |
373 | z -= tbl[i0 + 1]; /* eps[i0] */ | | 374 | z -= tbl[i0 + 1]; /* eps[i0] */ |
374 | big = k >= -1021 << 20; | | 375 | big = k >= -1021 << 20; |
375 | if (big) { | | 376 | if (big) { |
376 | ((uint32_t*)&twopk)[0] = 0x3ff00000+k; | | 377 | temp = 0x3ff00000+k; |
377 | ((uint32_t*)&twopk)[1] = 0; | | 378 | twopk = 0.0; |
| | | 379 | memcpy(&twopk, &temp, sizeof(temp)); |
378 | } else { | | 380 | } else { |
379 | ((uint32_t*)&twopk)[0] = 0x3ff00000+k + (1000 << 20); | | 381 | temp = 0x3ff00000+k + (1000 << 20); |
380 | ((uint32_t*)&twopk)[1] = 0; | | 382 | twopk = 0.0; |
| | | 383 | memcpy(&twopk, &temp, sizeof(temp)); |
381 | } | | 384 | } |
382 | r = t + t * z * (P1 + z * (P2 + z * (P3 + z * (P4 + z * P5)))); | | 385 | r = t + t * z * (P1 + z * (P2 + z * (P3 + z * (P4 + z * P5)))); |
383 | | | 386 | |
384 | /* Scale by 2**(k>>20). */ | | 387 | /* Scale by 2**(k>>20). */ |
385 | if (big) { | | 388 | if (big) { |
386 | /*if (k == 1024 << 20) | | 389 | /*if (k == 1024 << 20) |
387 | return (r * 2.0 * 0x1p1023); */ | | 390 | return (r * 2.0 * 0x1p1023); */ |
388 | return (r * twopk); | | 391 | return (r * twopk); |
389 | } else { | | 392 | } else { |
390 | return (r * twopk * twom1000); | | 393 | return (r * twopk * twom1000); |
391 | } | | 394 | } |
392 | } | | 395 | } |