пре 11 година · 535104ab6a
--- a/src/math/cbrt.c
+++ b/src/math/cbrt.c
@@ -15,7 +15,8 @@
 
				  * Return cube root of x
			
 
				  */
			
 
				 
			
 
				-#include "libm.h"
			
 
				+#include <math.h>
			
 
				+#include <stdint.h>
			
 
				 
			
 
				 static const uint32_t
			
 
				 B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
			
@@ -31,15 +32,10 @@ P4 =  0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */
 
				 
			
 
				 double cbrt(double x)
			
 
				 {
			
 
				-	int32_t hx;
			
 
				-	union dshape u;
			
 
				-	double r,s,t=0.0,w;
			
 
				-	uint32_t sign;
			
 
				-	uint32_t high,low;
			
 
				+	union {double f; uint64_t i;} u = {x};
			
 
				+	double_t r,s,t,w;
			
 
				+	uint32_t hx = u.i>>32 & 0x7fffffff;
			
 
				 
			
 
				-	EXTRACT_WORDS(hx, low, x);
			
 
				-	sign = hx & 0x80000000;
			
 
				-	hx ^= sign;
			
 
				 	if (hx >= 0x7ff00000)  /* cbrt(NaN,INF) is itself */
			
 
				 		return x+x;
			
 
				 
			
@@ -59,14 +55,16 @@ double cbrt(double x)
 
				 	 * division rounds towards minus infinity; this is also efficient.
			
 
				 	 */
			
 
				 	if (hx < 0x00100000) { /* zero or subnormal? */
			
 
				-		if ((hx|low) == 0)
			
 
				+		u.f = x*0x1p54;
			
 
				+		hx = u.i>>32 & 0x7fffffff;
			
 
				+		if (hx == 0)
			
 
				 			return x;  /* cbrt(0) is itself */
			
 
				-		SET_HIGH_WORD(t, 0x43500000); /* set t = 2**54 */
			
 
				-		t *= x;
			
 
				-		GET_HIGH_WORD(high, t);
			
 
				-		INSERT_WORDS(t, sign|((high&0x7fffffff)/3+B2), 0);
			
 
				+		hx = hx/3 + B2;
			
 
				 	} else
			
 
				-		INSERT_WORDS(t, sign|(hx/3+B1), 0);
			
 
				+		hx = hx/3 + B1;
			
 
				+	u.i &= 1ULL<<63;
			
 
				+	u.i |= (uint64_t)hx << 32;
			
 
				+	t = u.f;
			
 
				 
			
 
				 	/*
			
 
				 	 * New cbrt to 23 bits:
			
@@ -76,7 +74,7 @@ double cbrt(double x)
 
				 	 * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
			
 
				 	 * gives us bounds for r = t**3/x.
			
 
				 	 *
			
 
				-	 * Try to optimize for parallel evaluation as in k_tanf.c.
			
 
				+	 * Try to optimize for parallel evaluation as in __tanf.c.
			
 
				 	 */
			
 
				 	r = (t*t)*(t/x);
			
 
				 	t = t*((P0+r*(P1+r*P2))+((r*r)*r)*(P3+r*P4));
			
@@ -91,9 +89,9 @@ double cbrt(double x)
 
				 	 * 0.667; the error in the rounded t can be up to about 3 23-bit ulps
			
 
				 	 * before the final error is larger than 0.667 ulps.
			
 
				 	 */
			
 
				-	u.value = t;
			
 
				-	u.bits = (u.bits + 0x80000000) & 0xffffffffc0000000ULL;
			
 
				-	t = u.value;
			
 
				+	u.f = t;
			
 
				+	u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL;
			
 
				+	t = u.f;
			
 
				 
			
 
				 	/* one step Newton iteration to 53 bits with error < 0.667 ulps */
			
 
				 	s = t*t;         /* t*t is exact */
			
--- a/src/math/cbrtf.c
+++ b/src/math/cbrtf.c
@@ -17,7 +17,8 @@
 
				  * Return cube root of x
			
 
				  */
			
 
				 
			
 
				-#include "libm.h"
			
 
				+#include <math.h>
			
 
				+#include <stdint.h>
			
 
				 
			
 
				 static const unsigned
			
 
				 B1 = 709958130, /* B1 = (127-127.0/3-0.03306235651)*2**23 */
			
@@ -25,15 +26,10 @@ B2 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */
 
				 
			
 
				 float cbrtf(float x)
			
 
				 {
			
 
				-	double r,T;
			
 
				-	float t;
			
 
				-	int32_t hx;
			
 
				-	uint32_t sign;
			
 
				-	uint32_t high;
			
 
				+	double_t r,T;
			
 
				+	union {float f; uint32_t i;} u = {x};
			
 
				+	uint32_t hx = u.i & 0x7fffffff;
			
 
				 
			
 
				-	GET_FLOAT_WORD(hx, x);
			
 
				-	sign = hx & 0x80000000;
			
 
				-	hx ^= sign;
			
 
				 	if (hx >= 0x7f800000)  /* cbrt(NaN,INF) is itself */
			
 
				 		return x + x;
			
 
				 
			
@@ -41,28 +37,29 @@ float cbrtf(float x)
 
				 	if (hx < 0x00800000) {  /* zero or subnormal? */
			
 
				 		if (hx == 0)
			
 
				 			return x;  /* cbrt(+-0) is itself */
			
 
				-		SET_FLOAT_WORD(t, 0x4b800000);  /* set t = 2**24 */
			
 
				-		t *= x;
			
 
				-		GET_FLOAT_WORD(high, t);
			
 
				-		SET_FLOAT_WORD(t, sign|((high&0x7fffffff)/3+B2));
			
 
				+		u.f = x*0x1p24f;
			
 
				+		hx = u.i & 0x7fffffff;
			
 
				+		hx = hx/3 + B2;
			
 
				 	} else
			
 
				-		SET_FLOAT_WORD(t, sign|(hx/3+B1));
			
 
				+		hx = hx/3 + B1;
			
 
				+	u.i &= 0x80000000;
			
 
				+	u.i |= hx;
			
 
				 
			
 
				 	/*
			
 
				 	 * First step Newton iteration (solving t*t-x/t == 0) to 16 bits.  In
			
 
				 	 * double precision so that its terms can be arranged for efficiency
			
 
				 	 * without causing overflow or underflow.
			
 
				 	 */
			
 
				-	T = t;
			
 
				+	T = u.f;
			
 
				 	r = T*T*T;
			
 
				-	T = T*((double)x+x+r)/(x+r+r);
			
 
				+	T = T*((double_t)x+x+r)/(x+r+r);
			
 
				 
			
 
				 	/*
			
 
				 	 * Second step Newton iteration to 47 bits.  In double precision for
			
 
				 	 * efficiency and accuracy.
			
 
				 	 */
			
 
				 	r = T*T*T;
			
 
				-	T = T*((double)x+x+r)/(x+r+r);
			
 
				+	T = T*((double_t)x+x+r)/(x+r+r);
			
 
				 
			
 
				 	/* rounding to 24 bits is perfect in round-to-nearest mode */
			
 
				 	return T;
			
--- a/src/math/cbrtl.c
+++ b/src/math/cbrtl.c
@@ -23,58 +23,50 @@ long double cbrtl(long double x)
 
				 	return cbrt(x);
			
 
				 }
			
 
				 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
			
 
				-
			
 
				-#define BIAS (LDBL_MAX_EXP - 1)
			
 
				 static const unsigned B1 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */
			
 
				 
			
 
				 long double cbrtl(long double x)
			
 
				 {
			
 
				-	union IEEEl2bits u, v;
			
 
				+	union ldshape u = {x}, v;
			
 
				+	union {float f; uint32_t i;} uft;
			
 
				 	long double r, s, t, w;
			
 
				-	double dr, dt, dx;
			
 
				-	float ft, fx;
			
 
				-	uint32_t hx;
			
 
				-	uint16_t expsign;
			
 
				-	int k;
			
 
				-
			
 
				-	u.e = x;
			
 
				-	expsign = u.xbits.expsign;
			
 
				-	k = expsign & 0x7fff;
			
 
				+	double_t dr, dt, dx;
			
 
				+	float_t ft;
			
 
				+	int e = u.i.se & 0x7fff;
			
 
				+	int sign = u.i.se & 0x8000;
			
 
				 
			
 
				 	/*
			
 
				 	 * If x = +-Inf, then cbrt(x) = +-Inf.
			
 
				 	 * If x = NaN, then cbrt(x) = NaN.
			
 
				 	 */
			
 
				-	if (k == BIAS + LDBL_MAX_EXP)
			
 
				+	if (e == 0x7fff)
			
 
				 		return x + x;
			
 
				-
			
 
				-	if (k == 0) {
			
 
				+	if (e == 0) {
			
 
				+		/* Adjust subnormal numbers. */
			
 
				+		u.f *= 0x1p120;
			
 
				+		e = u.i.se & 0x7fff;
			
 
				 		/* If x = +-0, then cbrt(x) = +-0. */
			
 
				-		if ((u.bits.manh | u.bits.manl) == 0)
			
 
				+		if (e == 0)
			
 
				 			return x;
			
 
				-		/* Adjust subnormal numbers. */
			
 
				-		u.e *= 0x1.0p514;
			
 
				-		k = u.bits.exp;
			
 
				-		k -= BIAS + 514;
			
 
				-	} else
			
 
				-		k -= BIAS;
			
 
				-	u.xbits.expsign = BIAS;
			
 
				-	v.e = 1;
			
 
				-
			
 
				-	x = u.e;
			
 
				-	switch (k % 3) {
			
 
				+		e -= 120;
			
 
				+	}
			
 
				+	e -= 0x3fff;
			
 
				+	u.i.se = 0x3fff;
			
 
				+	x = u.f;
			
 
				+	switch (e % 3) {
			
 
				 	case 1:
			
 
				 	case -2:
			
 
				-		x = 2*x;
			
 
				-		k--;
			
 
				+		x *= 2;
			
 
				+		e--;
			
 
				 		break;
			
 
				 	case 2:
			
 
				 	case -1:
			
 
				-		x = 4*x;
			
 
				-		k -= 2;
			
 
				+		x *= 4;
			
 
				+		e -= 2;
			
 
				 		break;
			
 
				 	}
			
 
				-	v.xbits.expsign = (expsign & 0x8000) | (BIAS + k / 3);
			
 
				+	v.f = 1.0;
			
 
				+	v.i.se = sign | (0x3fff + e/3);
			
 
				 
			
 
				 	/*
			
 
				 	 * The following is the guts of s_cbrtf, with the handling of
			
@@ -83,9 +75,9 @@ long double cbrtl(long double x)
 
				 	 */
			
 
				 
			
 
				 	/* ~5-bit estimate: */
			
 
				-	fx = x;
			
 
				-	GET_FLOAT_WORD(hx, fx);
			
 
				-	SET_FLOAT_WORD(ft, ((hx & 0x7fffffff) / 3 + B1));
			
 
				+	uft.f = x;
			
 
				+	uft.i = (uft.i & 0x7fffffff)/3 + B1;
			
 
				+	ft = uft.f;
			
 
				 
			
 
				 	/* ~16-bit estimate: */
			
 
				 	dx = x;
			
@@ -126,7 +118,7 @@ long double cbrtl(long double x)
 
				 	r = (r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */
			
 
				 	t = t+t*r;       /* error <= 0.5 + 0.5/3 + epsilon */
			
 
				 
			
 
				-	t *= v.e;
			
 
				+	t *= v.f;
			
 
				 	return t;
			
 
				 }
			
 
				 #endif