Browse Source

math: clean up inverse trigonometric functions

modifications:
* avoid unsigned->signed conversions
* removed various volatile hacks
* use FORCE_EVAL when evaluating only for side-effects
* factor out R() rational approximation instead of manual inline
* __invtrigl.h now only provides __invtrigl_R, __pio2_hi and __pio2_lo
* use 2*pio2_hi, 2*pio2_lo instead of pi_hi, pi_lo

otherwise the logic is not changed, long double versions will
need a revisit when a genaral long double cleanup happens
Szabolcs Nagy 12 năm trước cách đây
mục cha
commit
b12a73d5bf
12 tập tin đã thay đổi với 258 bổ sung377 xóa
  1. 10 31
      src/math/__invtrigl.c
  2. 4 62
      src/math/__invtrigl.h
  3. 37 37
      src/math/acos.c
  4. 37 40
      src/math/acosf.c
  5. 26 35
      src/math/acosl.c
  6. 36 35
      src/math/asin.c
  7. 25 26
      src/math/asinf.c
  8. 25 36
      src/math/asinl.c
  9. 13 19
      src/math/atan.c
  10. 19 21
      src/math/atan2l.c
  11. 13 15
      src/math/atanf.c
  12. 13 20
      src/math/atanl.c

+ 10 - 31
src/math/__invtrigl.c

@@ -1,36 +1,8 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/ld80/invtrig.c */
-/*-
- * Copyright (c) 2008 David Schultz <[email protected]>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
 #include "__invtrigl.h"
 
 #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
 
-/* coefficients used in asinl() and acosl() */
-const long double
+static const long double
 pS0 =  1.66666666666666666631e-01L,
 pS1 = -4.16313987993683104320e-01L,
 pS2 =  3.69068046323246813704e-01L,
@@ -44,9 +16,16 @@ qS3 = -1.68285799854822427013e+00L,
 qS4 =  3.90699412641738801874e-01L,
 qS5 = -3.14365703596053263322e-02L;
 
-const long double pi_hi = 3.1415926535897932384626433832795L;
-const long double pi_lo = -5.01655761266833202345e-20L;
 const long double pio2_hi = 1.57079632679489661926L;
 const long double pio2_lo = -2.50827880633416601173e-20L;
 
+/* used in asinl() and acosl() */
+/* R(x^2) is a rational approximation of (asin(x)-x)/x^3 with Remez algorithm */
+long double __invtrigl_R(long double z)
+{
+	long double p, q;
+	p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*(pS5+z*pS6))))));
+	q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*(qS4+z*qS5))));
+	return p/q;
+}
 #endif

+ 4 - 62
src/math/__invtrigl.h

@@ -1,68 +1,10 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/ld80/invtrig.h */
-/*-
- * Copyright (c) 2008 David Schultz <[email protected]>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "libm.h"
+#include <float.h>
 
 #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
-
-#define BIAS            (LDBL_MAX_EXP - 1)
-#define MANH_SIZE       LDBL_MANH_SIZE
-
-/* Constants shared by the long double inverse trig functions. */
-#define pS0     __pS0
-#define pS1     __pS1
-#define pS2     __pS2
-#define pS3     __pS3
-#define pS4     __pS4
-#define pS5     __pS5
-#define pS6     __pS6
-#define qS1     __qS1
-#define qS2     __qS2
-#define qS3     __qS3
-#define qS4     __qS4
-#define qS5     __qS5
-#define pi_hi   __pi_hi
-#define pi_lo   __pi_lo
+/* shared by acosl, asinl and atan2l */
 #define pio2_hi __pio2_hi
 #define pio2_lo __pio2_lo
+extern const long double pio2_hi, pio2_lo;
 
-extern const long double pS0, pS1, pS2, pS3, pS4, pS5, pS6;
-extern const long double qS1, qS2, qS3, qS4, qS5;
-extern const long double pi_hi, pi_lo, pio2_hi, pio2_lo;
-
-static long double P(long double x)
-{
-	return x * (pS0 + x * (pS1 + x * (pS2 + x * (pS3 +
-		x * (pS4 + x * (pS5 + x * pS6))))));
-}
-
-static long double Q(long double x)
-{
-	return 1.0 + x * (qS1 + x * (qS2 + x * (qS3 + x * (qS4 + x * qS5))));
-}
-
+long double __invtrigl_R(long double z);
 #endif

+ 37 - 37
src/math/acos.c

@@ -36,12 +36,8 @@
 #include "libm.h"
 
 static const double
-pi      = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
-pio2_hi = 1.57079632679489655800e+00; /* 0x3FF921FB, 0x54442D18 */
-// FIXME
-static const volatile double
-pio2_lo = 6.12323399573676603587e-17; /* 0x3C91A626, 0x33145C07 */
-static const double
+pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
+pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
 pS0 =  1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
 pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
 pS2 =  2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
@@ -53,49 +49,53 @@ qS2 =  2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
 qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
 qS4 =  7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
 
+static double R(double z)
+{
+	double p, q;
+	p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
+	q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
+	return p/q;
+}
+
 double acos(double x)
 {
-	double z,p,q,r,w,s,c,df;
-	int32_t hx,ix;
+	double z,w,s,c,df;
+	uint32_t hx,ix;
 
 	GET_HIGH_WORD(hx, x);
 	ix = hx & 0x7fffffff;
-	if (ix >= 0x3ff00000) {  /* |x| >= 1 */
+	/* |x| >= 1 or nan */
+	if (ix >= 0x3ff00000) {
 		uint32_t lx;
 
 		GET_LOW_WORD(lx,x);
-		if ((ix-0x3ff00000 | lx) == 0) {  /* |x|==1 */
-			if (hx > 0) return 0.0;  /* acos(1) = 0  */
-			return pi + 2.0*pio2_lo; /* acos(-1)= pi */
+		if ((ix-0x3ff00000 | lx) == 0) {
+			/* acos(1)=0, acos(-1)=pi */
+			if (hx >> 31)
+				return 2*pio2_hi + 0x1p-1000;
+			return 0;
 		}
-		return (x-x)/(x-x);  /* acos(|x|>1) is NaN */
+		return 0/(x-x);
 	}
-	if (ix < 0x3fe00000) {   /* |x| < 0.5 */
+	/* |x| < 0.5 */
+	if (ix < 0x3fe00000) {
 		if (ix <= 0x3c600000)  /* |x| < 2**-57 */
-			return pio2_hi + pio2_lo;
-		z = x*x;
-		p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
-		q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
-		r = p/q;
-		return pio2_hi - (x - (pio2_lo-x*r));
-	} else if (hx < 0) {     /* x < -0.5 */
+			return pio2_hi + 0x1p-1000;
+		return pio2_hi - (x - (pio2_lo-x*R(x*x)));
+	}
+	/* x < -0.5 */
+	if (hx >> 31) {
 		z = (1.0+x)*0.5;
-		p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
-		q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
-		s = sqrt(z);
-		r = p/q;
-		w = r*s-pio2_lo;
-		return pi - 2.0*(s+w);
-	} else {                 /* x > 0.5 */
-		z = (1.0-x)*0.5;
 		s = sqrt(z);
-		df = s;
-		SET_LOW_WORD(df,0);
-		c  = (z-df*df)/(s+df);
-		p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
-		q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
-		r = p/q;
-		w = r*s+c;
-		return 2.0*(df+w);
+		w = R(z)*s-pio2_lo;
+		return 2*(pio2_hi - (s+w));
 	}
+	/* x > 0.5 */
+	z = (1.0-x)*0.5;
+	s = sqrt(z);
+	df = s;
+	SET_LOW_WORD(df,0);
+	c = (z-df*df)/(s+df);
+	w = R(z)*s+c;
+	return 2*(df+w);
 }

+ 37 - 40
src/math/acosf.c

@@ -16,59 +16,56 @@
 #include "libm.h"
 
 static const float
-pi      = 3.1415925026e+00, /* 0x40490fda */
-pio2_hi = 1.5707962513e+00; /* 0x3fc90fda */
-static const volatile float
-pio2_lo = 7.5497894159e-08; /* 0x33a22168 */
-static const float
+pio2_hi = 1.5707962513e+00, /* 0x3fc90fda */
+pio2_lo = 7.5497894159e-08, /* 0x33a22168 */
 pS0 =  1.6666586697e-01,
 pS1 = -4.2743422091e-02,
 pS2 = -8.6563630030e-03,
 qS1 = -7.0662963390e-01;
 
+static float R(float z)
+{
+	float p, q;
+	p = z*(pS0+z*(pS1+z*pS2));
+	q = 1.0f+z*qS1;
+	return p/q;
+}
+
 float acosf(float x)
 {
-	float z,p,q,r,w,s,c,df;
-	int32_t hx,ix;
+	float z,w,s,c,df;
+	uint32_t hx,ix;
 
 	GET_FLOAT_WORD(hx, x);
 	ix = hx & 0x7fffffff;
-	if (ix >= 0x3f800000) {  /* |x| >= 1 */
-		if (ix == 0x3f800000) {  /* |x| == 1 */
-			if (hx > 0) return 0.0f;  /* acos(1) = 0 */
-			return pi + 2.0f*pio2_lo;  /* acos(-1)= pi */
+	/* |x| >= 1 or nan */
+	if (ix >= 0x3f800000) {
+		if (ix == 0x3f800000) {
+			if (hx >> 31)
+				return 2*pio2_hi + 0x1p-120f;
+			return 0;
 		}
-		return (x-x)/(x-x);  /* acos(|x|>1) is NaN */
+		return 0/(x-x);
 	}
-	if (ix < 0x3f000000) {   /* |x| < 0.5 */
+	/* |x| < 0.5 */
+	if (ix < 0x3f000000) {
 		if (ix <= 0x32800000) /* |x| < 2**-26 */
-			return pio2_hi + pio2_lo;
-		z = x*x;
-		p = z*(pS0+z*(pS1+z*pS2));
-		q = 1.0f+z*qS1;
-		r = p/q;
-		return pio2_hi - (x - (pio2_lo-x*r));
-	} else if (hx < 0) {     /* x < -0.5 */
-		z = (1.0f+x)*0.5f;
-		p = z*(pS0+z*(pS1+z*pS2));
-		q = 1.0f+z*qS1;
-		s = sqrtf(z);
-		r = p/q;
-		w = r*s-pio2_lo;
-		return pi - 2.0f*(s+w);
-	} else {                 /* x > 0.5 */
-		int32_t idf;
-
-		z = (1.0f-x)*0.5f;
+			return pio2_hi + 0x1p-120f;
+		return pio2_hi - (x - (pio2_lo-x*R(x*x)));
+	}
+	/* x < -0.5 */
+	if (hx >> 31) {
+		z = (1+x)*0.5f;
 		s = sqrtf(z);
-		df = s;
-		GET_FLOAT_WORD(idf,df);
-		SET_FLOAT_WORD(df,idf&0xfffff000);
-		c  = (z-df*df)/(s+df);
-		p = z*(pS0+z*(pS1+z*pS2));
-		q = 1.0f+z*qS1;
-		r = p/q;
-		w = r*s+c;
-		return 2.0f*(df+w);
+		w = R(z)*s-pio2_lo;
+		return 2*(pio2_hi - (s+w));
 	}
+	/* x > 0.5 */
+	z = (1-x)*0.5f;
+	s = sqrtf(z);
+	GET_FLOAT_WORD(hx,s);
+	SET_FLOAT_WORD(df,hx&0xfffff000);
+	c = (z-df*df)/(s+df);
+	w = R(z)*s+c;
+	return 2*(df+w);
 }

+ 26 - 35
src/math/acosl.c

@@ -23,55 +23,46 @@ long double acosl(long double x)
 }
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
 #include "__invtrigl.h"
-#define ACOS_CONST      (BIAS - 65)     /* 2**-65 */
 
 long double acosl(long double x)
 {
 	union IEEEl2bits u;
-	long double z, p, q, r, w, s, c, df;
+	long double z, w, s, c, df;
 	int16_t expsign, expt;
 	u.e = x;
 	expsign = u.xbits.expsign;
 	expt = expsign & 0x7fff;
-	if (expt >= BIAS) {        /* |x| >= 1 */
-		if (expt == BIAS &&
+	/* |x| >= 1 or nan */
+	if (expt >= 0x3fff) {
+		if (expt == 0x3fff &&
 			((u.bits.manh & ~LDBL_NBIT) | u.bits.manl) == 0) {
 			if (expsign > 0)
-				return 0.0;  /* acos(1) = 0 */
-			else
-				// FIXME
-				return pi_hi + 2.0 * pio2_lo;  /* acos(-1)= pi */
+				return 0;  /* acos(1) = 0 */
+			return 2*pio2_hi + 0x1p-1000;  /* acos(-1)= pi */
 		}
-		return (x - x) / (x - x);  /* acos(|x|>1) is NaN */
+		return 0/(x-x);  /* acos(|x|>1) is NaN */
 	}
-	if (expt < BIAS - 1) {     /* |x| < 0.5 */
-		if (expt < ACOS_CONST)
-			return pio2_hi + pio2_lo;  /* x tiny: acosl=pi/2 */
-		z = x * x;
-		p = P(z);
-		q = Q(z);
-		r = p / q;
-		return pio2_hi - (x - (pio2_lo - x * r));
-	} else if (expsign < 0) {  /* x < -0.5 */
+	/* |x| < 0.5 */
+	if (expt < 0x3fff - 1) {
+		if (expt < 0x3fff - 65)
+			return pio2_hi + 0x1p-1000;  /* x < 0x1p-65: acosl(x)=pi/2 */
+		return pio2_hi - (x - (pio2_lo - x * __invtrigl_R(x*x)));
+	}
+	/* x < -0.5 */
+	if (expsign < 0) {
 		z = (1.0 + x) * 0.5;
-		p = P(z);
-		q = Q(z);
-		s = sqrtl(z);
-		r = p / q;
-		w = r * s - pio2_lo;
-		return pi_hi - 2.0 * (s + w);
-	} else {                   /* x > 0.5 */
-		z = (1.0 - x) * 0.5;
 		s = sqrtl(z);
-		u.e = s;
-		u.bits.manl = 0;
-		df = u.e;
-		c = (z - df * df) / (s + df);
-		p = P(z);
-		q = Q(z);
-		r = p / q;
-		w = r * s + c;
-		return 2.0 * (df + w);
+		w = __invtrigl_R(z) * s - pio2_lo;
+		return 2*(pio2_hi - (s + w));
 	}
+	/* x > 0.5 */
+	z = (1.0 - x) * 0.5;
+	s = sqrtl(z);
+	u.e = s;
+	u.bits.manl = 0;
+	df = u.e;
+	c = (z - df * df) / (s + df);
+	w = __invtrigl_R(z) * s + c;
+	return 2*(df + w);
 }
 #endif

+ 36 - 35
src/math/asin.c

@@ -42,10 +42,8 @@
 #include "libm.h"
 
 static const double
-huge = 1.000e+300,
 pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
 pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
-pio4_hi = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
 /* coefficients for R(x^2) */
 pS0 =  1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
 pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
@@ -58,51 +56,54 @@ qS2 =  2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
 qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
 qS4 =  7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
 
+static double R(double z)
+{
+	double p, q;
+	p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
+	q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
+	return p/q;
+}
+
 double asin(double x)
 {
-	double t=0.0,w,p,q,c,r,s;
-	int32_t hx,ix;
+	double z,r,s;
+	uint32_t hx,ix;
 
 	GET_HIGH_WORD(hx, x);
 	ix = hx & 0x7fffffff;
-	if (ix >= 0x3ff00000) {           /* |x|>= 1 */
+	/* |x| >= 1 or nan */
+	if (ix >= 0x3ff00000) {
 		uint32_t lx;
-
 		GET_LOW_WORD(lx, x);
 		if ((ix-0x3ff00000 | lx) == 0)
 			/* asin(1) = +-pi/2 with inexact */
-			return x*pio2_hi + x*pio2_lo;
-		return (x-x)/(x-x);  /* asin(|x|>1) is NaN */
-	} else if (ix < 0x3fe00000) {  /* |x|<0.5 */
-		if (ix < 0x3e500000) {  /* if |x| < 2**-26 */
-			if (huge+x > 1.0)
-				return x; /* return x with inexact if x!=0*/
+			return x*pio2_hi + 0x1p-1000;
+		return 0/(x-x);
+	}
+	/* |x| < 0.5 */
+	if (ix < 0x3fe00000) {
+		if (ix < 0x3e500000) {
+			/* |x|<0x1p-26, return x with inexact if x!=0*/
+			FORCE_EVAL(x + 0x1p1000);
+			return x;
 		}
-		t = x*x;
-		p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
-		q = 1.0+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
-		w = p/q;
-		return x + x*w;
+		return x + x*R(x*x);
 	}
 	/* 1 > |x| >= 0.5 */
-	w = 1.0 - fabs(x);
-	t = w*0.5;
-	p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
-	q = 1.0+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
-	s = sqrt(t);
-	if (ix >= 0x3FEF3333) {  /* if |x| > 0.975 */
-		w = p/q;
-		t = pio2_hi-(2.0*(s+s*w)-pio2_lo);
+	z = (1 - fabs(x))*0.5;
+	s = sqrt(z);
+	r = R(z);
+	if (ix >= 0x3fef3333) {  /* if |x| > 0.975 */
+		x = pio2_hi-(2*(s+s*r)-pio2_lo);
 	} else {
-		w = s;
-		SET_LOW_WORD(w,0);
-		c = (t-w*w)/(s+w);
-		r = p/q;
-		p = 2.0*s*r-(pio2_lo-2.0*c);
-		q = pio4_hi - 2.0*w;
-		t = pio4_hi - (p-q);
+		double f,c;
+		/* f+c = sqrt(z) */
+		f = s;
+		SET_LOW_WORD(f,0);
+		c = (z-f*f)/(s+f);
+		x = 0.5*pio2_hi - (2*s*r - (pio2_lo-2*c) - (0.5*pio2_hi-2*f));
 	}
-	if (hx > 0)
-		return t;
-	return -t;
+	if (hx >> 31)
+		return -x;
+	return x;
 }

+ 25 - 26
src/math/asinf.c

@@ -12,52 +12,51 @@
  * is preserved.
  * ====================================================
  */
-
 #include "libm.h"
 
+static const double
+pio2 = 1.570796326794896558e+00;
+
 static const float
-huge = 1.000e+30,
 /* coefficients for R(x^2) */
 pS0 =  1.6666586697e-01,
 pS1 = -4.2743422091e-02,
 pS2 = -8.6563630030e-03,
 qS1 = -7.0662963390e-01;
 
-static const double
-pio2 = 1.570796326794896558e+00;
+static float R(float z)
+{
+	float p, q;
+	p = z*(pS0+z*(pS1+z*pS2));
+	q = 1.0f+z*qS1;
+	return p/q;
+}
 
 float asinf(float x)
 {
 	double s;
-	float t,w,p,q;
-	int32_t hx,ix;
+	float z;
+	uint32_t hx,ix;
 
 	GET_FLOAT_WORD(hx, x);
 	ix = hx & 0x7fffffff;
 	if (ix >= 0x3f800000) {  /* |x| >= 1 */
 		if (ix == 0x3f800000)  /* |x| == 1 */
-			return x*pio2;  /* asin(+-1) = +-pi/2 with inexact */
-		return (x-x)/(x-x);  /* asin(|x|>1) is NaN */
-	} else if (ix < 0x3f000000) {  /* |x|<0.5 */
+			return x*pio2 + 0x1p-120f;  /* asin(+-1) = +-pi/2 with inexact */
+		return 0/(x-x);  /* asin(|x|>1) is NaN */
+	}
+	if (ix < 0x3f000000) {  /* |x| < 0.5 */
 		if (ix < 0x39800000) {  /* |x| < 2**-12 */
-			if (huge+x > 1.0f)
-				return x; /* return x with inexact if x!=0 */
+			FORCE_EVAL(x + 0x1p120f);
+			return x; /* return x with inexact if x!=0 */
 		}
-		t = x*x;
-		p = t*(pS0+t*(pS1+t*pS2));
-		q = 1.0f+t*qS1;
-		w = p/q;
-		return x + x*w;
+		return x + x*R(x*x);
 	}
 	/* 1 > |x| >= 0.5 */
-	w = 1.0f - fabsf(x);
-	t = w*0.5f;
-	p = t*(pS0+t*(pS1+t*pS2));
-	q = 1.0f+t*qS1;
-	s = sqrt(t);
-	w = p/q;
-	t = pio2-2.0*(s+s*w);
-	if (hx > 0)
-		return t;
-	return -t;
+	z = (1 - fabsf(x))*0.5f;
+	s = sqrt(z);
+	x = pio2 - 2*(s+s*R(z));
+	if (hx >> 31)
+		return -x;
+	return x;
 }

+ 25 - 36
src/math/asinl.c

@@ -23,60 +23,49 @@ long double asinl(long double x)
 }
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
 #include "__invtrigl.h"
-static const long double huge = 1.000e+300;
-static const long double pio4_hi = 7.85398163397448309628e-01L;
-#define ASIN_LINEAR     (BIAS - 32)     /* 2**-32 */
 /* 0.95 */
-#define THRESH  ((0xe666666666666666ULL>>(64-(MANH_SIZE-1)))|LDBL_NBIT)
+#define THRESH  ((0xe666666666666666ULL>>(64-(LDBL_MANH_SIZE-1)))|LDBL_NBIT)
 
 long double asinl(long double x)
 {
 	union IEEEl2bits u;
-	long double t=0.0,w,p,q,c,r,s;
-	int16_t expsign, expt;
+	long double z,r,s;
+	uint16_t expsign, expt;
 
 	u.e = x;
 	expsign = u.xbits.expsign;
 	expt = expsign & 0x7fff;
-	if (expt >= BIAS) {          /* |x|>= 1 */
-		if (expt == BIAS &&
+	if (expt >= 0x3fff) {   /* |x| >= 1 or nan */
+		if (expt == 0x3fff &&
 		    ((u.bits.manh&~LDBL_NBIT)|u.bits.manl) == 0)
-			/* asin(1)=+-pi/2 with inexact */
-			return x*pio2_hi + x*pio2_lo;
-		return (x-x)/(x-x);  /* asin(|x|>1) is NaN */
-	} else if (expt < BIAS-1) {  /* |x|<0.5 */
-		if (expt < ASIN_LINEAR) {  /* if |x| is small, asinl(x)=x */
+			/* asin(+-1)=+-pi/2 with inexact */
+			return x*pio2_hi + 0x1p-1000;
+		return 0/(x-x);
+	}
+	if (expt < 0x3fff - 1) {  /* |x| < 0.5 */
+		if (expt < 0x3fff - 32) {  /* |x|<0x1p-32, asinl(x)=x */
 			/* return x with inexact if x!=0 */
-			if (huge+x > 1.0)
-				return x;
+			FORCE_EVAL(x + 0x1p1000);
+			return x;
 		}
-		t = x*x;
-		p = P(t);
-		q = Q(t);
-		w = p/q;
-		return x + x*w;
+		return x + x*__invtrigl_R(x*x);
 	}
 	/* 1 > |x| >= 0.5 */
-	w = 1.0 - fabsl(x);
-	t = w*0.5;
-	p = P(t);
-	q = Q(t);
-	s = sqrtl(t);
+	z = (1.0 - fabsl(x))*0.5;
+	s = sqrtl(z);
+	r = __invtrigl_R(z);
 	if (u.bits.manh >= THRESH) { /* if |x| is close to 1 */
-		w = p/q;
-		t = pio2_hi-(2.0*(s+s*w)-pio2_lo);
+		x = pio2_hi - (2*(s+s*r)-pio2_lo);
 	} else {
+		long double f, c;
 		u.e = s;
 		u.bits.manl = 0;
-		w = u.e;
-		c = (t-w*w)/(s+w);
-		r = p/q;
-		p = 2.0*s*r-(pio2_lo-2.0*c);
-		q = pio4_hi-2.0*w;
-		t = pio4_hi-(p-q);
+		f = u.e;
+		c = (z-f*f)/(s+f);
+		x = 0.5*pio2_hi-(2*s*r - (pio2_lo-2*c) - (0.5*pio2_hi-2*f));
 	}
-	if (expsign > 0)
-		return t;
-	return -t;
+	if (expsign>>15)
+		return -x;
+	return x;
 }
 #endif

+ 13 - 19
src/math/atan.c

@@ -60,32 +60,26 @@ static const double aT[] = {
   1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */
 };
 
-static const double huge = 1.0e300;
-
 double atan(double x)
 {
 	double w,s1,s2,z;
-	int32_t ix,hx,id;
+	uint32_t ix,sign;
+	int id;
 
-	GET_HIGH_WORD(hx, x);
-	ix = hx & 0x7fffffff;
+	GET_HIGH_WORD(ix, x);
+	sign = ix >> 31;
+	ix &= 0x7fffffff;
 	if (ix >= 0x44100000) {   /* if |x| >= 2^66 */
-		uint32_t low;
-
-		GET_LOW_WORD(low, x);
-		if (ix > 0x7ff00000 ||
-		    (ix == 0x7ff00000 && low != 0))  /* NaN */
-			return x+x;
-		if (hx > 0)
-			return  atanhi[3] + *(volatile double *)&atanlo[3];
-		else
-			return -atanhi[3] - *(volatile double *)&atanlo[3];
+		if (isnan(x))
+			return x;
+		z = atanhi[3] + 0x1p-1000;
+		return sign ? -z : z;
 	}
 	if (ix < 0x3fdc0000) {    /* |x| < 0.4375 */
 		if (ix < 0x3e400000) {  /* |x| < 2^-27 */
-			/* raise inexact */
-			if (huge+x > 1.0)
-				return x;
+			/* raise inexact if x!=0 */
+			FORCE_EVAL(x + 0x1p1000);
+			return x;
 		}
 		id = -1;
 	} else {
@@ -117,5 +111,5 @@ double atan(double x)
 	if (id < 0)
 		return x - x*(s1+s2);
 	z = atanhi[id] - (x*(s1+s2) - atanlo[id] - x);
-	return hx < 0 ? -z : z;
+	return sign ? -z : z;
 }

+ 19 - 21
src/math/atan2l.c

@@ -24,8 +24,6 @@ long double atan2l(long double y, long double x)
 }
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
 #include "__invtrigl.h"
-// FIXME:
-static const volatile long double tiny = 1.0e-300;
 
 long double atan2l(long double y, long double x)
 {
@@ -40,12 +38,12 @@ long double atan2l(long double y, long double x)
 	ux.e = x;
 	expsignx = ux.xbits.expsign;
 	exptx = expsignx & 0x7fff;
-	if ((exptx==BIAS+LDBL_MAX_EXP &&
+	if ((exptx==0x7fff &&
 	     ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)!=0) || /* x is NaN */
-	    (expty==BIAS+LDBL_MAX_EXP &&
+	    (expty==0x7fff &&
 	     ((uy.bits.manh&~LDBL_NBIT)|uy.bits.manl)!=0))   /* y is NaN */
 		return x+y;
-	if (expsignx==BIAS && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)==0) /* x=1.0 */
+	if (expsignx==0x3fff && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)==0) /* x=1.0 */
 		return atanl(y);
 	m = ((expsigny>>15)&1) | ((expsignx>>14)&2);  /* 2*sign(x)+sign(y) */
 
@@ -54,39 +52,39 @@ long double atan2l(long double y, long double x)
 		switch(m) {
 		case 0:
 		case 1: return y;           /* atan(+-0,+anything)=+-0 */
-		case 2: return  pi_hi+tiny; /* atan(+0,-anything) = pi */
-		case 3: return -pi_hi-tiny; /* atan(-0,-anything) =-pi */
+		case 2: return  2*pio2_hi+0x1p-1000; /* atan(+0,-anything) = pi */
+		case 3: return -2*pio2_hi-0x1p-1000; /* atan(-0,-anything) =-pi */
 		}
 	}
 	/* when x = 0 */
 	if (exptx==0 && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)==0)
-		return expsigny < 0 ? -pio2_hi-tiny : pio2_hi+tiny;
+		return expsigny < 0 ? -pio2_hi-0x1p-1000 : pio2_hi+0x1p-1000;
 	/* when x is INF */
-	if (exptx == BIAS+LDBL_MAX_EXP) {
-		if (expty == BIAS+LDBL_MAX_EXP) {
+	if (exptx == 0x7fff) {
+		if (expty == 0x7fff) {
 			switch(m) {
-			case 0: return  pio2_hi*0.5+tiny; /* atan(+INF,+INF) */
-			case 1: return -pio2_hi*0.5-tiny; /* atan(-INF,+INF) */
-			case 2: return  1.5*pio2_hi+tiny; /* atan(+INF,-INF) */
-			case 3: return -1.5*pio2_hi-tiny; /* atan(-INF,-INF) */
+			case 0: return  pio2_hi*0.5+0x1p-1000; /* atan(+INF,+INF) */
+			case 1: return -pio2_hi*0.5-0x1p-1000; /* atan(-INF,+INF) */
+			case 2: return  1.5*pio2_hi+0x1p-1000; /* atan(+INF,-INF) */
+			case 3: return -1.5*pio2_hi-0x1p-1000; /* atan(-INF,-INF) */
 			}
 		} else {
 			switch(m) {
 			case 0: return  0.0;        /* atan(+...,+INF) */
 			case 1: return -0.0;        /* atan(-...,+INF) */
-			case 2: return  pi_hi+tiny; /* atan(+...,-INF) */
-			case 3: return -pi_hi-tiny; /* atan(-...,-INF) */
+			case 2: return  2*pio2_hi+0x1p-1000; /* atan(+...,-INF) */
+			case 3: return -2*pio2_hi-0x1p-1000; /* atan(-...,-INF) */
 			}
 		}
 	}
 	/* when y is INF */
-	if (expty == BIAS+LDBL_MAX_EXP)
-		return expsigny < 0 ? -pio2_hi-tiny : pio2_hi+tiny;
+	if (expty == 0x7fff)
+		return expsigny < 0 ? -pio2_hi-0x1p-1000 : pio2_hi+0x1p-1000;
 
 	/* compute y/x */
 	k = expty-exptx;
 	if(k > LDBL_MANT_DIG+2) { /* |y/x| huge */
-		z = pio2_hi+pio2_lo;
+		z = pio2_hi+0x1p-1000;
 		m &= 1;
 	} else if (expsignx < 0 && k < -LDBL_MANT_DIG-2) /* |y/x| tiny, x<0 */
 		z = 0.0;
@@ -95,9 +93,9 @@ long double atan2l(long double y, long double x)
 	switch (m) {
 	case 0: return z;               /* atan(+,+) */
 	case 1: return -z;              /* atan(-,+) */
-	case 2: return pi_hi-(z-pi_lo); /* atan(+,-) */
+	case 2: return 2*pio2_hi-(z-2*pio2_lo); /* atan(+,-) */
 	default: /* case 3 */
-		return (z-pi_lo)-pi_hi; /* atan(-,-) */
+		return (z-2*pio2_lo)-2*pio2_hi; /* atan(-,-) */
 	}
 }
 #endif

+ 13 - 15
src/math/atanf.c

@@ -38,28 +38,26 @@ static const float aT[] = {
   6.1687607318e-02,
 };
 
-static const float huge = 1.0e30;
-
 float atanf(float x)
 {
 	float w,s1,s2,z;
-	int32_t ix,hx,id;
+	uint32_t ix,sign;
+	int id;
 
-	GET_FLOAT_WORD(hx, x);
-	ix = hx & 0x7fffffff;
+	GET_FLOAT_WORD(ix, x);
+	sign = ix>>31;
+	ix &= 0x7fffffff;
 	if (ix >= 0x4c800000) {  /* if |x| >= 2**26 */
-		if (ix > 0x7f800000)  /* NaN */
-			return x+x;
-		if (hx > 0)
-			return  atanhi[3] + *(volatile float *)&atanlo[3];
-		else
-			return -atanhi[3] - *(volatile float *)&atanlo[3];
+		if (isnan(x))
+			return x;
+		z = atanhi[3] + 0x1p-120f;
+		return sign ? -z : z;
 	}
 	if (ix < 0x3ee00000) {   /* |x| < 0.4375 */
 		if (ix < 0x39800000) {  /* |x| < 2**-12 */
-			/* raise inexact */
-			if(huge+x>1.0f)
-				return x;
+			/* raise inexact if x!=0 */
+			FORCE_EVAL(x + 0x1p120f);
+			return x;
 		}
 		id = -1;
 	} else {
@@ -91,5 +89,5 @@ float atanf(float x)
 	if (id < 0)
 		return x - x*(s1+s2);
 	z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x);
-	return hx < 0 ? -z : z;
+	return sign ? -z : z;
 }

+ 13 - 20
src/math/atanl.c

@@ -22,11 +22,6 @@ long double atanl(long double x)
 	return atan(x);
 }
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
-#include "__invtrigl.h"
-
-#define ATAN_CONST      (BIAS + 65)     /* 2**65 */
-#define ATAN_LINEAR     (BIAS - 32)     /* 2**-32 */
-static const long double huge = 1.0e300;
 
 static const long double atanhi[] = {
 	 4.63647609000806116202e-01L,
@@ -81,29 +76,27 @@ long double atanl(long double x)
 	u.e = x;
 	expsign = u.xbits.expsign;
 	expt = expsign & 0x7fff;
-	if (expt >= ATAN_CONST) { /* if |x| is large, atan(x)~=pi/2 */
-		if (expt == BIAS + LDBL_MAX_EXP &&
+	if (expt >= 0x3fff + 65) { /* if |x| is large, atan(x)~=pi/2 */
+		if (expt == 0x7fff &&
 		    ((u.bits.manh&~LDBL_NBIT)|u.bits.manl)!=0)  /* NaN */
 			return x+x;
-		if (expsign > 0)
-			return  atanhi[3]+atanlo[3];
-		else
-			return -atanhi[3]-atanlo[3];
+		z = atanhi[3] + 0x1p-1000;
+		return expsign < 0 ? -z : z;
 	}
 	/* Extract the exponent and the first few bits of the mantissa. */
 	/* XXX There should be a more convenient way to do this. */
-	expman = (expt << 8) | ((u.bits.manh >> (MANH_SIZE - 9)) & 0xff);
-	if (expman < ((BIAS - 2) << 8) + 0xc0) {  /* |x| < 0.4375 */
-		if (expt < ATAN_LINEAR) {   /* if |x| is small, atanl(x)~=x */
-			/* raise inexact */
-			if (huge+x > 1.0)
-				return x;
+	expman = (expt << 8) | ((u.bits.manh >> (LDBL_MANH_SIZE - 9)) & 0xff);
+	if (expman < ((0x3fff - 2) << 8) + 0xc0) {  /* |x| < 0.4375 */
+		if (expt < 0x3fff - 32) {   /* if |x| is small, atanl(x)~=x */
+			/* raise inexact if x!=0 */
+			FORCE_EVAL(x + 0x1p1000);
+			return x;
 		}
 		id = -1;
 	} else {
 		x = fabsl(x);
-		if (expman < (BIAS << 8) + 0x30) {  /* |x| < 1.1875 */
-			if (expman < ((BIAS - 1) << 8) + 0x60) { /*  7/16 <= |x| < 11/16 */
+		if (expman < (0x3fff << 8) + 0x30) {  /* |x| < 1.1875 */
+			if (expman < ((0x3fff - 1) << 8) + 0x60) { /*  7/16 <= |x| < 11/16 */
 				id = 0;
 				x = (2.0*x-1.0)/(2.0+x);
 			} else {                                 /* 11/16 <= |x| < 19/16 */
@@ -111,7 +104,7 @@ long double atanl(long double x)
 				x = (x-1.0)/(x+1.0);
 			}
 		} else {
-			if (expman < ((BIAS + 1) << 8) + 0x38) { /* |x| < 2.4375 */
+			if (expman < ((0x3fff + 1) << 8) + 0x38) { /* |x| < 2.4375 */
 				id = 2;
 				x = (x-1.5)/(1.0+1.5*x);
 			} else {                                 /* 2.4375 <= |x| < 2^ATAN_CONST */