|
@@ -1,83 +1,83 @@
|
|
|
-/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */
|
|
|
-/*
|
|
|
- * Conversion to float by Ian Lance Taylor, Cygnus Support, [email protected].
|
|
|
- */
|
|
|
-/*
|
|
|
- * ====================================================
|
|
|
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
|
- *
|
|
|
- * Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
|
- * Permission to use, copy, modify, and distribute this
|
|
|
- * software is freely granted, provided that this notice
|
|
|
- * is preserved.
|
|
|
- * ====================================================
|
|
|
- */
|
|
|
-
|
|
|
+#include <stdint.h>
|
|
|
+#include <math.h>
|
|
|
#include "libm.h"
|
|
|
+#include "sqrt_data.h"
|
|
|
|
|
|
-static const float tiny = 1.0e-30;
|
|
|
+#define FENV_SUPPORT 1
|
|
|
|
|
|
-float sqrtf(float x)
|
|
|
+static inline uint32_t mul32(uint32_t a, uint32_t b)
|
|
|
{
|
|
|
- float z;
|
|
|
- int32_t sign = (int)0x80000000;
|
|
|
- int32_t ix,s,q,m,t,i;
|
|
|
- uint32_t r;
|
|
|
+ return (uint64_t)a*b >> 32;
|
|
|
+}
|
|
|
|
|
|
- GET_FLOAT_WORD(ix, x);
|
|
|
+/* see sqrt.c for more detailed comments. */
|
|
|
|
|
|
- /* take care of Inf and NaN */
|
|
|
- if ((ix&0x7f800000) == 0x7f800000)
|
|
|
- return x*x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
|
|
|
+float sqrtf(float x)
|
|
|
+{
|
|
|
+ uint32_t ix, m, m1, m0, even, ey;
|
|
|
|
|
|
- /* take care of zero */
|
|
|
- if (ix <= 0) {
|
|
|
- if ((ix&~sign) == 0)
|
|
|
- return x; /* sqrt(+-0) = +-0 */
|
|
|
- if (ix < 0)
|
|
|
- return (x-x)/(x-x); /* sqrt(-ve) = sNaN */
|
|
|
- }
|
|
|
- /* normalize x */
|
|
|
- m = ix>>23;
|
|
|
- if (m == 0) { /* subnormal x */
|
|
|
- for (i = 0; (ix&0x00800000) == 0; i++)
|
|
|
- ix<<=1;
|
|
|
- m -= i - 1;
|
|
|
+ ix = asuint(x);
|
|
|
+ if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
|
|
|
+ /* x < 0x1p-126 or inf or nan. */
|
|
|
+ if (ix * 2 == 0)
|
|
|
+ return x;
|
|
|
+ if (ix == 0x7f800000)
|
|
|
+ return x;
|
|
|
+ if (ix > 0x7f800000)
|
|
|
+ return __math_invalidf(x);
|
|
|
+ /* x is subnormal, normalize it. */
|
|
|
+ ix = asuint(x * 0x1p23f);
|
|
|
+ ix -= 23 << 23;
|
|
|
}
|
|
|
- m -= 127; /* unbias exponent */
|
|
|
- ix = (ix&0x007fffff)|0x00800000;
|
|
|
- if (m&1) /* odd m, double x to make it even */
|
|
|
- ix += ix;
|
|
|
- m >>= 1; /* m = [m/2] */
|
|
|
|
|
|
- /* generate sqrt(x) bit by bit */
|
|
|
- ix += ix;
|
|
|
- q = s = 0; /* q = sqrt(x) */
|
|
|
- r = 0x01000000; /* r = moving bit from right to left */
|
|
|
+ /* x = 4^e m; with int e and m in [1, 4). */
|
|
|
+ even = ix & 0x00800000;
|
|
|
+ m1 = (ix << 8) | 0x80000000;
|
|
|
+ m0 = (ix << 7) & 0x7fffffff;
|
|
|
+ m = even ? m0 : m1;
|
|
|
|
|
|
- while (r != 0) {
|
|
|
- t = s + r;
|
|
|
- if (t <= ix) {
|
|
|
- s = t+r;
|
|
|
- ix -= t;
|
|
|
- q += r;
|
|
|
- }
|
|
|
- ix += ix;
|
|
|
- r >>= 1;
|
|
|
- }
|
|
|
+ /* 2^e is the exponent part of the return value. */
|
|
|
+ ey = ix >> 1;
|
|
|
+ ey += 0x3f800000 >> 1;
|
|
|
+ ey &= 0x7f800000;
|
|
|
+
|
|
|
+ /* compute r ~ 1/sqrt(m), s ~ sqrt(m) with 2 goldschmidt iterations. */
|
|
|
+ static const uint32_t three = 0xc0000000;
|
|
|
+ uint32_t r, s, d, u, i;
|
|
|
+ i = (ix >> 17) % 128;
|
|
|
+ r = (uint32_t)__rsqrt_tab[i] << 16;
|
|
|
+ /* |r*sqrt(m) - 1| < 0x1p-8 */
|
|
|
+ s = mul32(m, r);
|
|
|
+ /* |s/sqrt(m) - 1| < 0x1p-8 */
|
|
|
+ d = mul32(s, r);
|
|
|
+ u = three - d;
|
|
|
+ r = mul32(r, u) << 1;
|
|
|
+ /* |r*sqrt(m) - 1| < 0x1.7bp-16 */
|
|
|
+ s = mul32(s, u) << 1;
|
|
|
+ /* |s/sqrt(m) - 1| < 0x1.7bp-16 */
|
|
|
+ d = mul32(s, r);
|
|
|
+ u = three - d;
|
|
|
+ s = mul32(s, u);
|
|
|
+ /* -0x1.03p-28 < s/sqrt(m) - 1 < 0x1.fp-31 */
|
|
|
+ s = (s - 1)>>6;
|
|
|
+ /* s < sqrt(m) < s + 0x1.08p-23 */
|
|
|
|
|
|
- /* use floating add to find out rounding direction */
|
|
|
- if (ix != 0) {
|
|
|
- z = 1.0f - tiny; /* raise inexact flag */
|
|
|
- if (z >= 1.0f) {
|
|
|
- z = 1.0f + tiny;
|
|
|
- if (z > 1.0f)
|
|
|
- q += 2;
|
|
|
- else
|
|
|
- q += q & 1;
|
|
|
- }
|
|
|
+ /* compute nearest rounded result. */
|
|
|
+ uint32_t d0, d1, d2;
|
|
|
+ float y, t;
|
|
|
+ d0 = (m << 16) - s*s;
|
|
|
+ d1 = s - d0;
|
|
|
+ d2 = d1 + s + 1;
|
|
|
+ s += d1 >> 31;
|
|
|
+ s &= 0x007fffff;
|
|
|
+ s |= ey;
|
|
|
+ y = asfloat(s);
|
|
|
+ if (FENV_SUPPORT) {
|
|
|
+ /* handle rounding and inexact exception. */
|
|
|
+ uint32_t tiny = predict_false(d2==0) ? 0 : 0x01000000;
|
|
|
+ tiny |= (d1^d2) & 0x80000000;
|
|
|
+ t = asfloat(tiny);
|
|
|
+ y = eval_as_float(y + t);
|
|
|
}
|
|
|
- ix = (q>>1) + 0x3f000000;
|
|
|
- SET_FLOAT_WORD(z, ix + ((uint32_t)m << 23));
|
|
|
- return z;
|
|
|
+ return y;
|
|
|
}
|