Browse Source

math: rewrite remainder functions (remainder, remquo, fmod, modf)

* results are exact
* modfl follows truncl (raises inexact flag spuriously now)
* modf and modff only had cosmetic cleanup
* remainder is just a wrapper around remquo now
* using iterative shift+subtract for remquo and fmod
* ld80 and ld128 are supported as well
Szabolcs Nagy 11 năm trước cách đây
mục cha
commit
ee2ee92d62
11 tập tin đã thay đổi với 472 bổ sung1010 xóa
  1. 51 128
      src/math/fmod.c
  2. 50 89
      src/math/fmodf.c
  3. 80 133
      src/math/fmodl.c
  4. 14 17
      src/math/modf.c
  5. 13 16
      src/math/modff.c
  6. 34 83
      src/math/modfl.c
  7. 4 63
      src/math/remainder.c
  8. 4 56
      src/math/remainderf.c
  9. 61 150
      src/math/remquo.c
  10. 61 105
      src/math/remquof.c
  11. 100 170
      src/math/remquol.c

+ 51 - 128
src/math/fmod.c

@@ -1,145 +1,68 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_fmod.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/*
- * fmod(x,y)
- * Return x mod y in exact arithmetic
- * Method: shift and subtract
- */
-
-#include "libm.h"
-
-static const double Zero[] = {0.0, -0.0,};
+#include <math.h>
+#include <stdint.h>
 
 double fmod(double x, double y)
 {
-	int32_t n,hx,hy,hz,ix,iy,sx,i;
-	uint32_t lx,ly,lz;
+	union {double f; uint64_t i;} ux = {x}, uy = {y};
+	int ex = ux.i>>52 & 0x7ff;
+	int ey = uy.i>>52 & 0x7ff;
+	int sx = ux.i>>63;
+	uint64_t i;
 
-	EXTRACT_WORDS(hx, lx, x);
-	EXTRACT_WORDS(hy, ly, y);
-	sx = hx & 0x80000000;  /* sign of x */
-	hx ^= sx;              /* |x| */
-	hy &= 0x7fffffff;      /* |y| */
+	/* in the followings uxi should be ux.i, but then gcc wrongly adds */
+	/* float load/store to inner loops ruining performance and code size */
+	uint64_t uxi = ux.i;
 
-	/* purge off exception values */
-	if ((hy|ly) == 0 || hx >= 0x7ff00000 ||  /* y=0,or x not finite */
-	    (hy|((ly|-ly)>>31)) > 0x7ff00000)    /* or y is NaN */
+	if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff)
 		return (x*y)/(x*y);
-	if (hx <= hy) {
-		if (hx < hy || lx < ly)  /* |x| < |y| */
-			return x;
-		if (lx == ly)            /* |x| = |y|, return x*0 */
-			return Zero[(uint32_t)sx>>31];
+	if (uxi<<1 <= uy.i<<1) {
+		if (uxi<<1 == uy.i<<1)
+			return 0*x;
+		return x;
 	}
 
-	/* determine ix = ilogb(x) */
-	if (hx < 0x00100000) {  /* subnormal x */
-		if (hx == 0) {
-			for (ix = -1043, i = lx; i > 0; i <<= 1)
-				ix -= 1;
-		} else {
-			for (ix = -1022, i = hx<<11; i > 0; i <<= 1)
-				ix -= 1;
-		}
-	} else
-		ix = (hx>>20) - 1023;
-
-	/* determine iy = ilogb(y) */
-	if (hy < 0x00100000) {  /* subnormal y */
-		if (hy == 0) {
-			for (iy = -1043, i = ly; i > 0; i <<= 1)
-				iy -= 1;
-		} else {
-			for (iy = -1022, i = hy<<11; i > 0; i <<= 1)
-				iy -= 1;
-		}
-	} else
-		iy = (hy>>20) - 1023;
-
-	/* set up {hx,lx}, {hy,ly} and align y to x */
-	if (ix >= -1022)
-		hx = 0x00100000|(0x000fffff&hx);
-	else {       /* subnormal x, shift x to normal */
-		n = -1022-ix;
-		if (n <= 31) {
-			hx = (hx<<n)|(lx>>(32-n));
-			lx <<= n;
-		} else {
-			hx = lx<<(n-32);
-			lx = 0;
-		}
+	/* normalize x and y */
+	if (!ex) {
+		for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1);
+		uxi <<= -ex + 1;
+	} else {
+		uxi &= -1ULL >> 12;
+		uxi |= 1ULL << 52;
 	}
-	if(iy >= -1022)
-		hy = 0x00100000|(0x000fffff&hy);
-	else {       /* subnormal y, shift y to normal */
-		n = -1022-iy;
-		if (n <= 31) {
-			hy = (hy<<n)|(ly>>(32-n));
-			ly <<= n;
-		} else {
-			hy = ly<<(n-32);
-			ly = 0;
-		}
+	if (!ey) {
+		for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1);
+		uy.i <<= -ey + 1;
+	} else {
+		uy.i &= -1ULL >> 12;
+		uy.i |= 1ULL << 52;
 	}
 
-	/* fix point fmod */
-	n = ix - iy;
-	while (n--) {
-		hz = hx-hy;
-		lz = lx-ly;
-		if (lx < ly)
-			hz -= 1;
-		if (hz < 0) {
-			hx = hx+hx+(lx>>31);
-			lx = lx+lx;
-		} else {
-			if ((hz|lz) == 0)   /* return sign(x)*0 */
-				return Zero[(uint32_t)sx>>31];
-			hx = hz+hz+(lz>>31);
-			lx = lz+lz;
+	/* x mod y */
+	for (; ex > ey; ex--) {
+		i = uxi - uy.i;
+		if (i >> 63 == 0) {
+			if (i == 0)
+				return 0*x;
+			uxi = i;
 		}
+		uxi <<= 1;
 	}
-	hz = hx-hy;
-	lz = lx-ly;
-	if (lx < ly)
-		hz -= 1;
-	if (hz >= 0) {
-		hx = hz;
-		lx = lz;
+	i = uxi - uy.i;
+	if (i >> 63 == 0) {
+		if (i == 0)
+			return 0*x;
+		uxi = i;
 	}
+	for (; uxi>>52 == 0; uxi <<= 1, ex--);
 
-	/* convert back to floating value and restore the sign */
-	if ((hx|lx) == 0)          /* return sign(x)*0 */
-		return Zero[(uint32_t)sx>>31];
-	while (hx < 0x00100000) {  /* normalize x */
-		hx = hx+hx+(lx>>31);
-		lx = lx+lx;
-		iy -= 1;
-	}
-	if (iy >= -1022) {         /* normalize output */
-		hx = ((hx-0x00100000)|((iy+1023)<<20));
-		INSERT_WORDS(x, hx|sx, lx);
-	} else {                   /* subnormal output */
-		n = -1022 - iy;
-		if (n <= 20) {
-			lx = (lx>>n)|((uint32_t)hx<<(32-n));
-			hx >>= n;
-		} else if (n <= 31) {
-			lx = (hx<<(32-n))|(lx>>n);
-			hx = sx;
-		} else {
-			lx = hx>>(n-32); hx = sx;
-		}
-		INSERT_WORDS(x, hx|sx, lx);
+	/* scale result */
+	if (ex > 0) {
+		uxi -= 1ULL << 52;
+		uxi |= (uint64_t)ex << 52;
+	} else {
+		uxi >>= -ex + 1;
 	}
-	return x;  /* exact output */
+	uxi |= (uint64_t)sx << 63;
+	ux.i = uxi;
+	return ux.f;
 }

+ 50 - 89
src/math/fmodf.c

@@ -1,104 +1,65 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_fmodf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, [email protected].
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/*
- * fmodf(x,y)
- * Return x mod y in exact arithmetic
- * Method: shift and subtract
- */
-
-#include "libm.h"
-
-static const float Zero[] = {0.0, -0.0,};
+#include <math.h>
+#include <stdint.h>
 
 float fmodf(float x, float y)
 {
-	int32_t n,hx,hy,hz,ix,iy,sx,i;
+	union {float f; uint32_t i;} ux = {x}, uy = {y};
+	int ex = ux.i>>23 & 0xff;
+	int ey = uy.i>>23 & 0xff;
+	uint32_t sx = ux.i & 0x80000000;
+	uint32_t i;
+	uint32_t uxi = ux.i;
 
-	GET_FLOAT_WORD(hx, x);
-	GET_FLOAT_WORD(hy, y);
-	sx = hx & 0x80000000;  /* sign of x */
-	hx ^= sx;              /* |x| */
-	hy &= 0x7fffffff;      /* |y| */
-
-	/* purge off exception values */
-	if (hy == 0 || hx >= 0x7f800000 ||  /* y=0,or x not finite */
-	    hy > 0x7f800000)                /* or y is NaN */
+	if (uy.i<<1 == 0 || isnan(y) || ex == 0xff)
 		return (x*y)/(x*y);
-	if (hx < hy)                        /* |x| < |y| */
+	if (uxi<<1 <= uy.i<<1) {
+		if (uxi<<1 == uy.i<<1)
+			return 0*x;
 		return x;
-	if (hx == hy)                       /* |x| = |y|, return x*0 */
-		return Zero[(uint32_t)sx>>31];
-
-	/* determine ix = ilogb(x) */
-	if (hx < 0x00800000) {     /* subnormal x */
-		for (ix = -126, i = hx<<8; i > 0; i <<= 1)
-			ix -= 1;
-	} else
-		ix = (hx>>23) - 127;
-
-	/* determine iy = ilogb(y) */
-	if (hy < 0x00800000) {     /* subnormal y */
-		for (iy = -126, i = hy<<8; i >= 0; i <<= 1)
-			iy -= 1;
-	} else
-		iy = (hy>>23) - 127;
+	}
 
-	/* set up {hx,lx}, {hy,ly} and align y to x */
-	if (ix >= -126)
-		hx = 0x00800000|(0x007fffff&hx);
-	else {          /* subnormal x, shift x to normal */
-		n = -126-ix;
-		hx = hx<<n;
+	/* normalize x and y */
+	if (!ex) {
+		for (i = uxi<<9; i>>31 == 0; ex--, i <<= 1);
+		uxi <<= -ex + 1;
+	} else {
+		uxi &= -1U >> 9;
+		uxi |= 1U << 23;
 	}
-	if (iy >= -126)
-		hy = 0x00800000|(0x007fffff&hy);
-	else {          /* subnormal y, shift y to normal */
-		n = -126-iy;
-		hy = hy<<n;
+	if (!ey) {
+		for (i = uy.i<<9; i>>31 == 0; ey--, i <<= 1);
+		uy.i <<= -ey + 1;
+	} else {
+		uy.i &= -1U >> 9;
+		uy.i |= 1U << 23;
 	}
 
-	/* fix point fmod */
-	n = ix - iy;
-	while (n--) {
-		hz = hx-hy;
-		if (hz<0)
-			hx = hx+hx;
-		else {
-			if(hz == 0)   /* return sign(x)*0 */
-				return Zero[(uint32_t)sx>>31];
-			hx = hz+hz;
+	/* x mod y */
+	for (; ex > ey; ex--) {
+		i = uxi - uy.i;
+		if (i >> 31 == 0) {
+			if (i == 0)
+				return 0*x;
+			uxi = i;
 		}
+		uxi <<= 1;
 	}
-	hz = hx-hy;
-	if (hz >= 0)
-		hx = hz;
-
-	/* convert back to floating value and restore the sign */
-	if (hx == 0)               /* return sign(x)*0 */
-		return Zero[(uint32_t)sx>>31];
-	while (hx < 0x00800000) {  /* normalize x */
-		hx = hx+hx;
-		iy -= 1;
+	i = uxi - uy.i;
+	if (i >> 31 == 0) {
+		if (i == 0)
+			return 0*x;
+		uxi = i;
 	}
-	if (iy >= -126) {          /* normalize output */
-		hx = ((hx-0x00800000)|((iy+127)<<23));
-		SET_FLOAT_WORD(x, hx|sx);
-	} else {                   /* subnormal output */
-		n = -126 - iy;
-		hx >>= n;
-		SET_FLOAT_WORD(x, hx|sx);
+	for (; uxi>>23 == 0; uxi <<= 1, ex--);
+
+	/* scale result up */
+	if (ex > 0) {
+		uxi -= 1U << 23;
+		uxi |= (uint32_t)ex << 23;
+	} else {
+		uxi >>= -ex + 1;
 	}
-	return x;  /* exact output */
+	uxi |= sx;
+	ux.i = uxi;
+	return ux.f;
 }

+ 80 - 133
src/math/fmodl.c

@@ -1,15 +1,3 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_fmodl.c */
-/*-
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
 #include "libm.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
@@ -18,141 +6,100 @@ long double fmodl(long double x, long double y)
 	return fmod(x, y);
 }
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
-
-#define BIAS (LDBL_MAX_EXP - 1)
-
-#if LDBL_MANL_SIZE > 32
-typedef uint64_t manl_t;
-#else
-typedef uint32_t manl_t;
-#endif
-
-#if LDBL_MANH_SIZE > 32
-typedef uint64_t manh_t;
-#else
-typedef uint32_t manh_t;
-#endif
-
-/*
- * These macros add and remove an explicit integer bit in front of the
- * fractional mantissa, if the architecture doesn't have such a bit by
- * default already.
- */
-#ifdef LDBL_IMPLICIT_NBIT
-#define SET_NBIT(hx)    ((hx) | (1ULL << LDBL_MANH_SIZE))
-#define HFRAC_BITS      LDBL_MANH_SIZE
-#else
-#define SET_NBIT(hx)    (hx)
-#define HFRAC_BITS      (LDBL_MANH_SIZE - 1)
-#endif
-
-#define MANL_SHIFT      (LDBL_MANL_SIZE - 1)
-
-static const long double Zero[] = {0.0, -0.0,};
-
-/*
- * fmodl(x,y)
- * Return x mod y in exact arithmetic
- * Method: shift and subtract
- *
- * Assumptions:
- * - The low part of the mantissa fits in a manl_t exactly.
- * - The high part of the mantissa fits in an int64_t with enough room
- *   for an explicit integer bit in front of the fractional bits.
- */
 long double fmodl(long double x, long double y)
 {
-	union IEEEl2bits ux, uy;
-	int64_t hx,hz;  /* We need a carry bit even if LDBL_MANH_SIZE is 32. */
-	manh_t hy;
-	manl_t lx,ly,lz;
-	int ix,iy,n,sx;
-
-	ux.e = x;
-	uy.e = y;
-	sx = ux.bits.sign;
+	union ldshape ux = {x}, uy = {y};
+	int ex = ux.i.se & 0x7fff;
+	int ey = uy.i.se & 0x7fff;
+	int sx = ux.i.se & 0x8000;
 
-	/* purge off exception values */
-	if ((uy.bits.exp|uy.bits.manh|uy.bits.manl) == 0 || /* y=0 */
-	    ux.bits.exp == BIAS + LDBL_MAX_EXP ||           /* or x not finite */
-	    (uy.bits.exp == BIAS + LDBL_MAX_EXP &&
-	     ((uy.bits.manh&~LDBL_NBIT)|uy.bits.manl) != 0)) /* or y is NaN */
+	if (y == 0 || isnan(y) || ex == 0x7fff)
 		return (x*y)/(x*y);
-	if (ux.bits.exp <= uy.bits.exp) {
-		if (ux.bits.exp < uy.bits.exp ||
-		    (ux.bits.manh<=uy.bits.manh &&
-		     (ux.bits.manh<uy.bits.manh ||
-		      ux.bits.manl<uy.bits.manl)))  /* |x|<|y| return x or x-y */
-			return x;
-		if (ux.bits.manh==uy.bits.manh && ux.bits.manl==uy.bits.manl)
-			return Zero[sx];  /* |x| = |y| return x*0 */
+	ux.i.se = ex;
+	uy.i.se = ey;
+	if (ux.f <= uy.f) {
+		if (ux.f == uy.f)
+			return 0*x;
+		return x;
 	}
 
-	/* determine ix = ilogb(x) */
-	if (ux.bits.exp == 0) {  /* subnormal x */
-		ux.e *= 0x1.0p512;
-		ix = ux.bits.exp - (BIAS + 512);
-	} else {
-		ix = ux.bits.exp - BIAS;
+	/* normalize x and y */
+	if (!ex) {
+		ux.f *= 0x1p120f;
+		ex = ux.i.se - 120;
 	}
-
-	/* determine iy = ilogb(y) */
-	if (uy.bits.exp == 0) {  /* subnormal y */
-		uy.e *= 0x1.0p512;
-		iy = uy.bits.exp - (BIAS + 512);
-	} else {
-		iy = uy.bits.exp - BIAS;
+	if (!ey) {
+		uy.f *= 0x1p120f;
+		ey = uy.i.se - 120;
 	}
 
-	/* set up {hx,lx}, {hy,ly} and align y to x */
-	hx = SET_NBIT(ux.bits.manh);
-	hy = SET_NBIT(uy.bits.manh);
-	lx = ux.bits.manl;
-	ly = uy.bits.manl;
-
-	/* fix point fmod */
-	n = ix - iy;
-
-	while (n--) {
-		hz = hx-hy;
-		lz = lx-ly;
-		if (lx < ly)
-			hz -= 1;
-		if (hz < 0) {
-			hx = hx+hx+(lx>>MANL_SHIFT);
-			lx = lx+lx;
+	/* x mod y */
+#if LDBL_MANT_DIG == 64
+	uint64_t i, mx, my;
+	mx = ux.i.m;
+	my = uy.i.m;
+	for (; ex > ey; ex--) {
+		i = mx - my;
+		if (mx >= my) {
+			if (i == 0)
+				return 0*x;
+			mx = 2*i;
+		} else if (2*mx < mx) {
+			mx = 2*mx - my;
 		} else {
-			if ((hz|lz)==0)   /* return sign(x)*0 */
-				return Zero[sx];
-			hx = hz+hz+(lz>>MANL_SHIFT);
-			lx = lz+lz;
+			mx = 2*mx;
 		}
 	}
-	hz = hx-hy;
-	lz = lx-ly;
-	if (lx < ly)
-		hz -= 1;
-	if (hz >= 0) {
-		hx = hz;
-		lx = lz;
+	i = mx - my;
+	if (mx >= my) {
+		if (i == 0)
+			return 0*x;
+		mx = i;
 	}
-
-	/* convert back to floating value and restore the sign */
-	if ((hx|lx) == 0)   /* return sign(x)*0 */
-		return Zero[sx];
-	while (hx < (1ULL<<HFRAC_BITS)) {  /* normalize x */
-		hx = hx+hx+(lx>>MANL_SHIFT);
-		lx = lx+lx;
-		iy -= 1;
+	for (; mx >> 63 == 0; mx *= 2, ex--);
+	ux.i.m = mx;
+#elif LDBL_MANT_DIG == 113
+	uint64_t hi, lo, xhi, xlo, yhi, ylo;
+	xhi = (ux.i2.hi & -1ULL>>16) | 1ULL<<48;
+	yhi = (uy.i2.hi & -1ULL>>16) | 1ULL<<48;
+	xlo = ux.i2.lo;
+	ylo = ux.i2.lo;
+	for (; ex > ey; ex--) {
+		hi = xhi - yhi;
+		lo = xlo - ylo;
+		if (xlo < ylo)
+			hi -= 1;
+		if (hi >> 63 == 0) {
+			if ((hi|lo) == 0)
+				return 0*x;
+			xhi = 2*hi + (lo>>63);
+			xlo = 2*lo;
+		} else {
+			xhi = 2*xhi + (xlo>>63);
+			xlo = 2*xlo;
+		}
 	}
-	ux.bits.manh = hx; /* The mantissa is truncated here if needed. */
-	ux.bits.manl = lx;
-	if (iy < LDBL_MIN_EXP) {
-		ux.bits.exp = iy + (BIAS + 512);
-		ux.e *= 0x1p-512;
-	} else {
-		ux.bits.exp = iy + BIAS;
+	hi = xhi - yhi;
+	lo = xlo - ylo;
+	if (xlo < ylo)
+		hi -= 1;
+	if (hi >> 63 == 0) {
+		if ((hi|lo) == 0)
+			return 0*x;
+		xhi = hi;
+		xlo = lo;
 	}
-	return ux.e;       /* exact output */
+	for (; xhi >> 48 == 0; xhi = 2*xhi + (xlo>>63), xlo = 2*xlo, ex--);
+	ux.i2.hi = xhi;
+	ux.i2.lo = xlo;
+#endif
+
+	/* scale result */
+	if (ex <= 0) {
+		ux.i.se = (ex+120)|sx;
+		ux.f *= 0x1p-120f;
+	} else
+		ux.i.se = ex|sx;
+	return ux.f;
 }
 #endif

+ 14 - 17
src/math/modf.c

@@ -2,36 +2,33 @@
 
 double modf(double x, double *iptr)
 {
-	union {double x; uint64_t n;} u = {x};
+	union {double f; uint64_t i;} u = {x};
 	uint64_t mask;
-	int e;
-
-	e = (int)(u.n>>52 & 0x7ff) - 0x3ff;
+	int e = (int)(u.i>>52 & 0x7ff) - 0x3ff;
 
 	/* no fractional part */
 	if (e >= 52) {
 		*iptr = x;
-		if (e == 0x400 && u.n<<12 != 0) /* nan */
+		if (e == 0x400 && u.i<<12 != 0) /* nan */
 			return x;
-		u.n &= (uint64_t)1<<63;
-		return u.x;
+		u.i &= 1ULL<<63;
+		return u.f;
 	}
 
 	/* no integral part*/
 	if (e < 0) {
-		u.n &= (uint64_t)1<<63;
-		*iptr = u.x;
+		u.i &= 1ULL<<63;
+		*iptr = u.f;
 		return x;
 	}
 
-	mask = (uint64_t)-1>>12 >> e;
-	if ((u.n & mask) == 0) {
+	mask = -1ULL>>12>>e;
+	if ((u.i & mask) == 0) {
 		*iptr = x;
-		u.n &= (uint64_t)1<<63;
-		return u.x;
+		u.i &= 1ULL<<63;
+		return u.f;
 	}
-	u.n &= ~mask;
-	*iptr = u.x;
-	STRICT_ASSIGN(double, x, x - *iptr);
-	return x;
+	u.i &= ~mask;
+	*iptr = u.f;
+	return x - u.f;
 }

+ 13 - 16
src/math/modff.c

@@ -2,36 +2,33 @@
 
 float modff(float x, float *iptr)
 {
-	union {float x; uint32_t n;} u = {x};
+	union {float f; uint32_t i;} u = {x};
 	uint32_t mask;
-	int e;
-
-	e = (int)(u.n>>23 & 0xff) - 0x7f;
+	int e = (int)(u.i>>23 & 0xff) - 0x7f;
 
 	/* no fractional part */
 	if (e >= 23) {
 		*iptr = x;
-		if (e == 0x80 && u.n<<9 != 0) { /* nan */
+		if (e == 0x80 && u.i<<9 != 0) { /* nan */
 			return x;
 		}
-		u.n &= 0x80000000;
-		return u.x;
+		u.i &= 0x80000000;
+		return u.f;
 	}
 	/* no integral part */
 	if (e < 0) {
-		u.n &= 0x80000000;
-		*iptr = u.x;
+		u.i &= 0x80000000;
+		*iptr = u.f;
 		return x;
 	}
 
 	mask = 0x007fffff>>e;
-	if ((u.n & mask) == 0) {
+	if ((u.i & mask) == 0) {
 		*iptr = x;
-		u.n &= 0x80000000;
-		return u.x;
+		u.i &= 0x80000000;
+		return u.f;
 	}
-	u.n &= ~mask;
-	*iptr = u.x;
-	STRICT_ASSIGN(float, x, x - *iptr);
-	return x;
+	u.i &= ~mask;
+	*iptr = u.f;
+	return x - u.f;
 }

+ 34 - 83
src/math/modfl.c

@@ -1,40 +1,3 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_modfl.c */
-/*-
- * Copyright (c) 2007 David Schultz <[email protected]>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Derived from s_modf.c, which has the following Copyright:
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
 #include "libm.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
@@ -43,58 +6,46 @@ long double modfl(long double x, long double *iptr)
 	return modf(x, (double *)iptr);
 }
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
-
-#if LDBL_MANL_SIZE > 32
-#define MASK    ((uint64_t)-1)
-#else
-#define MASK    ((uint32_t)-1)
+#if LDBL_MANT_DIG == 64
+#define TOINT 0x1p63
+#elif LDBL_MANT_DIG == 113
+#define TOINT 0x1p112
 #endif
-/* Return the last n bits of a word, representing the fractional part. */
-#define GETFRAC(bits, n)        ((bits) & ~(MASK << (n)))
-/* The number of fraction bits in manh, not counting the integer bit */
-#define HIBITS  (LDBL_MANT_DIG - LDBL_MANL_SIZE)
-
-static const long double zero[] = { 0.0, -0.0 };
-
 long double modfl(long double x, long double *iptr)
 {
-	union IEEEl2bits ux;
-	int e;
+	union ldshape u = {x};
+	uint64_t mask;
+	int e = (u.i.se & 0x7fff) - 0x3fff;
+	int s = u.i.se >> 15;
+	long double absx;
+	long double y;
 
-	ux.e = x;
-	e = ux.bits.exp - LDBL_MAX_EXP + 1;
-	if (e < HIBITS) {                       /* Integer part is in manh. */
-		if (e < 0) {                    /* |x|<1 */
-			*iptr = zero[ux.bits.sign];
-			return x;
-		}
-		if ((GETFRAC(ux.bits.manh, HIBITS - 1 - e)|ux.bits.manl) == 0) {
-			/* x is an integer. */
-			*iptr = x;
-			return zero[ux.bits.sign];
-		}
-		/* Clear all but the top e+1 bits. */
-		ux.bits.manh >>= HIBITS - 1 - e;
-		ux.bits.manh <<= HIBITS - 1 - e;
-		ux.bits.manl = 0;
-		*iptr = ux.e;
-		return x - ux.e;
-	} else if (e >= LDBL_MANT_DIG - 1) {    /* x has no fraction part. */
+	/* no fractional part */
+	if (e >= LDBL_MANT_DIG-1) {
 		*iptr = x;
-		if (e == LDBL_MAX_EXP && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)) /* nan */
+		if (isnan(x))
 			return x;
-		return zero[ux.bits.sign];
-	} else {                                /* Fraction part is in manl. */
-		if (GETFRAC(ux.bits.manl, LDBL_MANT_DIG - 1 - e) == 0) {
-			/* x is integral. */
-			*iptr = x;
-			return zero[ux.bits.sign];
-		}
-		/* Clear all but the top e+1 bits. */
-		ux.bits.manl >>= LDBL_MANT_DIG - 1 - e;
-		ux.bits.manl <<= LDBL_MANT_DIG - 1 - e;
-		*iptr = ux.e;
-		return x - ux.e;
+		return s ? -0.0 : 0.0;
+	}
+
+	/* no integral part*/
+	if (e < 0) {
+		*iptr = s ? -0.0 : 0.0;
+		return x;
+	}
+
+	/* raises spurious inexact */
+	absx = s ? -x : x;
+	y = absx + TOINT - TOINT - absx;
+	if (y == 0) {
+		*iptr = x;
+		return s ? -0.0 : 0.0;
 	}
+	if (y > 0)
+		y -= 1;
+	if (s)
+		y = -y;
+	*iptr = x + y;
+	return -y;
 }
 #endif

+ 4 - 63
src/math/remainder.c

@@ -1,66 +1,7 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_remainder.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* remainder(x,p)
- * Return :
- *      returns  x REM p  =  x - [x/p]*p as if in infinite
- *      precise arithmetic, where [x/p] is the (infinite bit)
- *      integer nearest x/p (in half way case choose the even one).
- * Method :
- *      Based on fmod() return x-[x/p]chopped*p exactlp.
- */
+#include <math.h>
 
-#include "libm.h"
-
-double remainder(double x, double p)
+double remainder(double x, double y)
 {
-	int32_t hx,hp;
-	uint32_t sx,lx,lp;
-	double p_half;
-
-	EXTRACT_WORDS(hx, lx, x);
-	EXTRACT_WORDS(hp, lp, p);
-	sx = hx & 0x80000000;
-	hp &= 0x7fffffff;
-	hx &= 0x7fffffff;
-
-	/* purge off exception values */
-	if ((hp|lp) == 0 ||                                  /* p = 0 */
-	    hx >= 0x7ff00000 ||                              /* x not finite */
-	    (hp >= 0x7ff00000 && (hp-0x7ff00000 | lp) != 0)) /* p is NaN */
-		return (x*p)/(x*p);
-
-	if (hp <= 0x7fdfffff)
-		x = fmod(x, p+p);  /* now x < 2p */
-	if (((hx-hp)|(lx-lp)) == 0)
-		return 0.0*x;
-	x = fabs(x);
-	p = fabs(p);
-	if (hp < 0x00200000) {
-		if (x + x > p) {
-			x -= p;
-			if (x + x >= p)
-				x -= p;
-		}
-	} else {
-		p_half = 0.5*p;
-		if (x > p_half) {
-			x -= p;
-			if (x >= p_half)
-				x -= p;
-		}
-	}
-	GET_HIGH_WORD(hx, x);
-	if ((hx&0x7fffffff) == 0)
-		hx = 0;
-	SET_HIGH_WORD(x, hx^sx);
-	return x;
+	int q;
+	return remquo(x, y, &q);
 }

+ 4 - 56
src/math/remainderf.c

@@ -1,59 +1,7 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_remainderf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, [email protected].
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
+#include <math.h>
 
-#include "libm.h"
-
-float remainderf(float x, float p)
+float remainderf(float x, float y)
 {
-	int32_t hx,hp;
-	uint32_t sx;
-	float p_half;
-
-	GET_FLOAT_WORD(hx, x);
-	GET_FLOAT_WORD(hp, p);
-	sx = hx & 0x80000000;
-	hp &= 0x7fffffff;
-	hx &= 0x7fffffff;
-
-	/* purge off exception values */
-	if (hp == 0 || hx >= 0x7f800000 || hp > 0x7f800000)  /* p = 0, x not finite, p is NaN */
-		return (x*p)/(x*p);
-
-	if (hp <= 0x7effffff)
-		x = fmodf(x, p + p);  /* now x < 2p */
-	if (hx - hp == 0)
-		return 0.0f*x;
-	x = fabsf(x);
-	p = fabsf(p);
-	if (hp < 0x01000000) {
-		if (x + x > p) {
-			x -= p;
-			if (x + x >= p)
-				x -= p;
-		}
-	} else {
-		p_half = 0.5f*p;
-		if (x > p_half) {
-			x -= p;
-			if (x >= p_half)
-				x -= p;
-		}
-	}
-	GET_FLOAT_WORD(hx, x);
-	if ((hx & 0x7fffffff) == 0)
-		hx = 0;
-	SET_FLOAT_WORD(x, hx ^ sx);
-	return x;
+	int q;
+	return remquof(x, y, &q);
 }

+ 61 - 150
src/math/remquo.c

@@ -1,171 +1,82 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_remquo.c */
-/*-
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/*
- * Return the IEEE remainder and set *quo to the last n bits of the
- * quotient, rounded to the nearest integer.  We choose n=31 because
- * we wind up computing all the integer bits of the quotient anyway as
- * a side-effect of computing the remainder by the shift and subtract
- * method.  In practice, this is far more bits than are needed to use
- * remquo in reduction algorithms.
- */
-
-#include "libm.h"
-
-static const double Zero[] = {0.0, -0.0,};
+#include <math.h>
+#include <stdint.h>
 
 double remquo(double x, double y, int *quo)
 {
-	int32_t n,hx,hy,hz,ix,iy,sx,i;
-	uint32_t lx,ly,lz,q,sxy;
-
-	EXTRACT_WORDS(hx, lx, x);
-	EXTRACT_WORDS(hy, ly, y);
-	sxy = (hx ^ hy) & 0x80000000;
-	sx = hx & 0x80000000;   /* sign of x */
-	hx ^= sx;               /* |x| */
-	hy &= 0x7fffffff;       /* |y| */
+	union {double f; uint64_t i;} ux = {x}, uy = {y};
+	int ex = ux.i>>52 & 0x7ff;
+	int ey = uy.i>>52 & 0x7ff;
+	int sx = ux.i>>63;
+	int sy = uy.i>>63;
+	uint32_t q;
+	uint64_t i;
+	uint64_t uxi = ux.i;
 
-	/* purge off exception values */
-	if ((hy|ly) == 0 || hx >= 0x7ff00000 ||  /* y = 0, or x not finite */
-	    (hy|((ly|-ly)>>31)) > 0x7ff00000)    /* or y is NaN */
+	*quo = 0;
+	if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff)
 		return (x*y)/(x*y);
-	if (hx <= hy) {
-		if (hx < hy || lx < ly) {  /* |x| < |y| return x or x-y */
-			q = 0;
-			goto fixup;
-		}
-		if (lx == ly) {            /* |x| = |y| return x*0 */
-			*quo = sxy ? -1 : 1;
-			return Zero[(uint32_t)sx>>31];
-		}
-	}
+	if (ux.i<<1 == 0)
+		return x;
 
-	// FIXME: use ilogb?
-
-	/* determine ix = ilogb(x) */
-	if (hx < 0x00100000) {  /* subnormal x */
-		if (hx == 0) {
-			for (ix = -1043, i=lx; i>0; i<<=1) ix--;
-		} else {
-			for (ix = -1022, i=hx<<11; i>0; i<<=1) ix--;
-		}
-	} else
-		ix = (hx>>20) - 1023;
-
-	/* determine iy = ilogb(y) */
-	if (hy < 0x00100000) {  /* subnormal y */
-		if (hy == 0) {
-			for (iy = -1043, i=ly; i>0; i<<=1) iy--;
-		} else {
-			for (iy = -1022, i=hy<<11; i>0; i<<=1) iy--;
-		}
-	} else
-		iy = (hy>>20) - 1023;
-
-	/* set up {hx,lx}, {hy,ly} and align y to x */
-	if (ix >= -1022)
-		hx = 0x00100000|(0x000fffff&hx);
-	else {  /* subnormal x, shift x to normal */
-		n = -1022 - ix;
-		if (n <= 31) {
-			hx = (hx<<n)|(lx>>(32-n));
-			lx <<= n;
-		} else {
-			hx = lx<<(n-32);
-			lx = 0;
-		}
+	/* normalize x and y */
+	if (!ex) {
+		for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1);
+		uxi <<= -ex + 1;
+	} else {
+		uxi &= -1ULL >> 12;
+		uxi |= 1ULL << 52;
 	}
-	if (iy >= -1022)
-		hy = 0x00100000|(0x000fffff&hy);
-	else {  /* subnormal y, shift y to normal */
-		n = -1022 - iy;
-		if (n <= 31) {
-			hy = (hy<<n)|(ly>>(32-n));
-			ly <<= n;
-		} else {
-			hy = ly<<(n-32);
-			ly = 0;
-		}
+	if (!ey) {
+		for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1);
+		uy.i <<= -ey + 1;
+	} else {
+		uy.i &= -1ULL >> 12;
+		uy.i |= 1ULL << 52;
 	}
 
-	/* fix point fmod */
-	n = ix - iy;
 	q = 0;
-	while (n--) {
-		hz = hx - hy;
-		lz = lx - ly;
-		if (lx < ly)
-			hz--;
-		if (hz < 0) {
-			hx = hx + hx + (lx>>31);
-			lx = lx + lx;
-		} else {
-			hx = hz + hz + (lz>>31);
-			lx = lz + lz;
+	if (ex < ey) {
+		if (ex+1 == ey)
+			goto end;
+		return x;
+	}
+
+	/* x mod y */
+	for (; ex > ey; ex--) {
+		i = uxi - uy.i;
+		if (i >> 63 == 0) {
+			uxi = i;
 			q++;
 		}
+		uxi <<= 1;
 		q <<= 1;
 	}
-	hz = hx - hy;
-	lz = lx - ly;
-	if (lx < ly)
-		hz--;
-	if (hz >= 0) {
-		hx = hz;
-		lx = lz;
+	i = uxi - uy.i;
+	if (i >> 63 == 0) {
+		uxi = i;
 		q++;
 	}
-
-	/* convert back to floating value and restore the sign */
-	if ((hx|lx) == 0) {  /* return sign(x)*0 */
-		q &= 0x7fffffff;
-		*quo = sxy ? -q : q;
-		return Zero[(uint32_t)sx>>31];
-	}
-	while (hx < 0x00100000) {  /* normalize x */
-		hx = hx + hx + (lx>>31);
-		lx = lx + lx;
-		iy--;
+	if (uxi == 0)
+		ex = -60;
+	else
+		for (; uxi>>52 == 0; uxi <<= 1, ex--);
+end:
+	/* scale result and decide between |x| and |x|-|y| */
+	if (ex > 0) {
+		uxi -= 1ULL << 52;
+		uxi |= (uint64_t)ex << 52;
+	} else {
+		uxi >>= -ex + 1;
 	}
-	if (iy >= -1022) {         /* normalize output */
-		hx = (hx-0x00100000)|((iy+1023)<<20);
-	} else {                   /* subnormal output */
-		n = -1022 - iy;
-		if (n <= 20) {
-			lx = (lx>>n)|((uint32_t)hx<<(32-n));
-			hx >>= n;
-		} else if (n <= 31) {
-			lx = (hx<<(32-n))|(lx>>n);
-			hx = 0;
-		} else {
-			lx = hx>>(n-32);
-			hx = 0;
-		}
-	}
-fixup:
-	INSERT_WORDS(x, hx, lx);
-	y = fabs(y);
-	if (y < 0x1p-1021) {
-		if (x + x > y || (x + x == y && (q & 1))) {
-			q++;
-			x -= y;
-		}
-	} else if (x > 0.5*y || (x == 0.5*y && (q & 1))) {
-		q++;
+	ux.i = uxi;
+	x = ux.f;
+	if (sy)
+		y = -y;
+	if (ex == ey || (ex+1 == ey && (2*x > y || (2*x == y && q%2)))) {
 		x -= y;
+		q++;
 	}
-	GET_HIGH_WORD(hx, x);
-	SET_HIGH_WORD(x, hx ^ sx);
 	q &= 0x7fffffff;
-	*quo = sxy ? -q : q;
-	return x;
+	*quo = sx^sy ? -(int)q : (int)q;
+	return sx ? -x : x;
 }

+ 61 - 105
src/math/remquof.c

@@ -1,126 +1,82 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_remquof.c */
-/*-
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/*
- * Return the IEEE remainder and set *quo to the last n bits of the
- * quotient, rounded to the nearest integer.  We choose n=31 because
- * we wind up computing all the integer bits of the quotient anyway as
- * a side-effect of computing the remainder by the shift and subtract
- * method.  In practice, this is far more bits than are needed to use
- * remquo in reduction algorithms.
- */
-
-#include "libm.h"
-
-static const float Zero[] = {0.0, -0.0,};
+#include <math.h>
+#include <stdint.h>
 
 float remquof(float x, float y, int *quo)
 {
-	int32_t n,hx,hy,hz,ix,iy,sx,i;
-	uint32_t q,sxy;
+	union {float f; uint32_t i;} ux = {x}, uy = {y};
+	int ex = ux.i>>23 & 0xff;
+	int ey = uy.i>>23 & 0xff;
+	int sx = ux.i>>31;
+	int sy = uy.i>>31;
+	uint32_t q;
+	uint32_t i;
+	uint32_t uxi = ux.i;
 
-	GET_FLOAT_WORD(hx, x);
-	GET_FLOAT_WORD(hy, y);
-	sxy = (hx ^ hy) & 0x80000000;
-	sx = hx & 0x80000000;   /* sign of x */
-	hx ^= sx;               /* |x| */
-	hy &= 0x7fffffff;       /* |y| */
-
-	/* purge off exception values */
-	if (hy == 0 || hx >= 0x7f800000 || hy > 0x7f800000) /* y=0,NaN;or x not finite */
+	*quo = 0;
+	if (uy.i<<1 == 0 || isnan(y) || ex == 0xff)
 		return (x*y)/(x*y);
-	if (hx < hy) {       /* |x| < |y| return x or x-y */
-		q = 0;
-		goto fixup;
-	} else if(hx==hy) {  /* |x| = |y| return x*0*/
-		*quo = sxy ? -1 : 1;
-		return Zero[(uint32_t)sx>>31];
-	}
+	if (ux.i<<1 == 0)
+		return x;
 
-	/* determine ix = ilogb(x) */
-	if (hx < 0x00800000) {  /* subnormal x */
-		for (ix = -126, i=hx<<8; i>0; i<<=1) ix--;
-	} else
-		ix = (hx>>23) - 127;
-
-	/* determine iy = ilogb(y) */
-	if (hy < 0x00800000) {  /* subnormal y */
-		for (iy = -126, i=hy<<8; i>0; i<<=1) iy--;
-	} else
-		iy = (hy>>23) - 127;
-
-	/* set up {hx,lx}, {hy,ly} and align y to x */
-	if (ix >= -126)
-		hx = 0x00800000|(0x007fffff&hx);
-	else {  /* subnormal x, shift x to normal */
-		n = -126 - ix;
-		hx <<= n;
+	/* normalize x and y */
+	if (!ex) {
+		for (i = uxi<<9; i>>31 == 0; ex--, i <<= 1);
+		uxi <<= -ex + 1;
+	} else {
+		uxi &= -1U >> 9;
+		uxi |= 1U << 23;
 	}
-	if (iy >= -126)
-		hy = 0x00800000|(0x007fffff&hy);
-	else {  /* subnormal y, shift y to normal */
-		n = -126 - iy;
-		hy <<= n;
+	if (!ey) {
+		for (i = uy.i<<9; i>>31 == 0; ey--, i <<= 1);
+		uy.i <<= -ey + 1;
+	} else {
+		uy.i &= -1U >> 9;
+		uy.i |= 1U << 23;
 	}
 
-	/* fix point fmod */
-	n = ix - iy;
 	q = 0;
-	while (n--) {
-		hz = hx - hy;
-		if (hz < 0)
-			hx = hx << 1;
-		else {
-			hx = hz << 1;
+	if (ex < ey) {
+		if (ex+1 == ey)
+			goto end;
+		return x;
+	}
+
+	/* x mod y */
+	for (; ex > ey; ex--) {
+		i = uxi - uy.i;
+		if (i >> 31 == 0) {
+			uxi = i;
 			q++;
 		}
+		uxi <<= 1;
 		q <<= 1;
 	}
-	hz = hx - hy;
-	if (hz >= 0) {
-		hx = hz;
+	i = uxi - uy.i;
+	if (i >> 31 == 0) {
+		uxi = i;
 		q++;
 	}
-
-	/* convert back to floating value and restore the sign */
-	if (hx == 0) {                             /* return sign(x)*0 */
-		q &= 0x7fffffff;
-		*quo = sxy ? -q : q;
-		return Zero[(uint32_t)sx>>31];
-	}
-	while (hx < 0x00800000) {  /* normalize x */
-		hx <<= 1;
-		iy--;
+	if (uxi == 0)
+		ex = -30;
+	else
+		for (; uxi>>23 == 0; uxi <<= 1, ex--);
+end:
+	/* scale result and decide between |x| and |x|-|y| */
+	if (ex > 0) {
+		uxi -= 1U << 23;
+		uxi |= (uint32_t)ex << 23;
+	} else {
+		uxi >>= -ex + 1;
 	}
-	if (iy >= -126) {          /* normalize output */
-		hx = (hx-0x00800000)|((iy+127)<<23);
-	} else {                   /* subnormal output */
-		n = -126 - iy;
-		hx >>= n;
-	}
-fixup:
-	SET_FLOAT_WORD(x,hx);
-	y = fabsf(y);
-	if (y < 0x1p-125f) {
-		if (x + x > y || (x + x == y && (q & 1))) {
-			q++;
-			x -= y;
-		}
-	} else if (x > 0.5f*y || (x == 0.5f*y && (q & 1))) {
-		q++;
+	ux.i = uxi;
+	x = ux.f;
+	if (sy)
+		y = -y;
+	if (ex == ey || (ex+1 == ey && (2*x > y || (2*x == y && q%2)))) {
 		x -= y;
+		q++;
 	}
-	GET_FLOAT_WORD(hx, x);
-	SET_FLOAT_WORD(x, hx ^ sx);
 	q &= 0x7fffffff;
-	*quo = sxy ? -q : q;
-	return x;
+	*quo = sx^sy ? -(int)q : (int)q;
+	return sx ? -x : x;
 }

+ 100 - 170
src/math/remquol.c

@@ -1,15 +1,3 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_remquol.c */
-/*-
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
 #include "libm.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
@@ -18,177 +6,119 @@ long double remquol(long double x, long double y, int *quo)
 	return remquo(x, y, quo);
 }
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
-
-#define BIAS (LDBL_MAX_EXP - 1)
-
-#if LDBL_MANL_SIZE > 32
-typedef uint64_t manl_t;
-#else
-typedef uint32_t manl_t;
-#endif
-
-#if LDBL_MANH_SIZE > 32
-typedef uint64_t manh_t;
-#else
-typedef uint32_t manh_t;
-#endif
-
-/*
- * These macros add and remove an explicit integer bit in front of the
- * fractional mantissa, if the architecture doesn't have such a bit by
- * default already.
- */
-#ifdef LDBL_IMPLICIT_NBIT
-#define SET_NBIT(hx)    ((hx) | (1ULL << LDBL_MANH_SIZE))
-#define HFRAC_BITS      LDBL_MANH_SIZE
-#else
-#define SET_NBIT(hx)    (hx)
-#define HFRAC_BITS      (LDBL_MANH_SIZE - 1)
-#endif
-
-#define MANL_SHIFT      (LDBL_MANL_SIZE - 1)
-
-static const long double Zero[] = {0.0, -0.0};
-
-/*
- * Return the IEEE remainder and set *quo to the last n bits of the
- * quotient, rounded to the nearest integer.  We choose n=31 because
- * we wind up computing all the integer bits of the quotient anyway as
- * a side-effect of computing the remainder by the shift and subtract
- * method.  In practice, this is far more bits than are needed to use
- * remquo in reduction algorithms.
- *
- * Assumptions:
- * - The low part of the mantissa fits in a manl_t exactly.
- * - The high part of the mantissa fits in an int64_t with enough room
- *   for an explicit integer bit in front of the fractional bits.
- */
 long double remquol(long double x, long double y, int *quo)
 {
-	union IEEEl2bits ux, uy;
-	int64_t hx,hz;  /* We need a carry bit even if LDBL_MANH_SIZE is 32. */
-	manh_t hy;
-	manl_t lx,ly,lz;
-	int ix,iy,n,q,sx,sxy;
-
-	ux.e = x;
-	uy.e = y;
-	sx = ux.bits.sign;
-	sxy = sx ^ uy.bits.sign;
-	ux.bits.sign = 0;       /* |x| */
-	uy.bits.sign = 0;       /* |y| */
-	x = ux.e;
-
-	/* purge off exception values */
-	if ((uy.bits.exp|uy.bits.manh|uy.bits.manl)==0 || /* y=0 */
-	    (ux.bits.exp == BIAS + LDBL_MAX_EXP) ||       /* or x not finite */
-	    (uy.bits.exp == BIAS + LDBL_MAX_EXP &&
-		((uy.bits.manh&~LDBL_NBIT)|uy.bits.manl)!=0)) /* or y is NaN */
+	union ldshape ux = {x}, uy = {y};
+	int ex = ux.i.se & 0x7fff;
+	int ey = uy.i.se & 0x7fff;
+	int sx = ux.i.se >> 15;
+	int sy = uy.i.se >> 15;
+	uint32_t q;
+
+	*quo = 0;
+	if (y == 0 || isnan(y) || ex == 0x7fff)
 		return (x*y)/(x*y);
-	if (ux.bits.exp <= uy.bits.exp) {
-		if ((ux.bits.exp < uy.bits.exp) ||
-		    (ux.bits.manh <= uy.bits.manh &&
-		     (ux.bits.manh < uy.bits.manh ||
-		      ux.bits.manl < uy.bits.manl))) {
-			q = 0;
-			goto fixup;       /* |x|<|y| return x or x-y */
-		}
-		if (ux.bits.manh == uy.bits.manh && ux.bits.manl == uy.bits.manl) {
-			*quo = sxy ? -1 : 1;
-			return Zero[sx];  /* |x|=|y| return x*0*/
-		}
-	}
-
-	/* determine ix = ilogb(x) */
-	if (ux.bits.exp == 0) {  /* subnormal x */
-		ux.e *= 0x1.0p512;
-		ix = ux.bits.exp - (BIAS + 512);
-	} else {
-		ix = ux.bits.exp - BIAS;
+	if (x == 0)
+		return x;
+
+	/* normalize x and y */
+	if (!ex) {
+		ux.i.se = ex;
+		ux.f *= 0x1p120f;
+		ex = ux.i.se - 120;
 	}
-
-	/* determine iy = ilogb(y) */
-	if (uy.bits.exp == 0) {  /* subnormal y */
-		uy.e *= 0x1.0p512;
-		iy = uy.bits.exp - (BIAS + 512);
-	} else {
-		iy = uy.bits.exp - BIAS;
+	if (!ey) {
+		uy.i.se = ey;
+		uy.f *= 0x1p120f;
+		ey = uy.i.se - 120;
 	}
 
-	/* set up {hx,lx}, {hy,ly} and align y to x */
-	hx = SET_NBIT(ux.bits.manh);
-	hy = SET_NBIT(uy.bits.manh);
-	lx = ux.bits.manl;
-	ly = uy.bits.manl;
-
-	/* fix point fmod */
-	n = ix - iy;
 	q = 0;
-
-	while (n--) {
-		hz = hx - hy;
-		lz = lx - ly;
-		if (lx < ly)
-			hz -= 1;
-		if (hz < 0) {
-			hx = hx + hx + (lx>>MANL_SHIFT);
-			lx = lx + lx;
-		} else {
-			hx = hz + hz + (lz>>MANL_SHIFT);
-			lx = lz + lz;
+	if (ex >= ey) {
+		/* x mod y */
+#if LDBL_MANT_DIG == 64
+		uint64_t i, mx, my;
+		mx = ux.i.m;
+		my = uy.i.m;
+		for (; ex > ey; ex--) {
+			i = mx - my;
+			if (mx >= my) {
+				mx = 2*i;
+				q++;
+				q <<= 1;
+			} else if (2*mx < mx) {
+				mx = 2*mx - my;
+				q <<= 1;
+				q++;
+			} else {
+				mx = 2*mx;
+				q <<= 1;
+			}
+		}
+		i = mx - my;
+		if (mx >= my) {
+			mx = i;
 			q++;
 		}
-		q <<= 1;
-	}
-	hz = hx - hy;
-	lz = lx - ly;
-	if (lx < ly)
-		hz -= 1;
-	if (hz >= 0) {
-		hx = hz;
-		lx = lz;
-		q++;
-	}
-
-	/* convert back to floating value and restore the sign */
-	if ((hx|lx) == 0) {  /* return sign(x)*0 */
-		q &= 0x7fffffff;
-		*quo = sxy ? -q : q;
-		return Zero[sx];
-	}
-	while (hx < (1ULL<<HFRAC_BITS)) {  /* normalize x */
-		hx = hx + hx + (lx>>MANL_SHIFT);
-		lx = lx + lx;
-		iy -= 1;
-	}
-	ux.bits.manh = hx; /* The integer bit is truncated here if needed. */
-	ux.bits.manl = lx;
-	if (iy < LDBL_MIN_EXP) {
-		ux.bits.exp = iy + (BIAS + 512);
-		ux.e *= 0x1p-512;
-	} else {
-		ux.bits.exp = iy + BIAS;
-	}
-	ux.bits.sign = 0;
-	x = ux.e;
-fixup:
-	y = fabsl(y);
-	if (y < LDBL_MIN * 2) {
-		if (x + x > y || (x + x == y && (q & 1))) {
+		if (mx == 0)
+			ex = -120;
+		else
+			for (; mx >> 63 == 0; mx *= 2, ex--);
+		ux.i.m = mx;
+#elif LDBL_MANT_DIG == 113
+		uint64_t hi, lo, xhi, xlo, yhi, ylo;
+		xhi = (ux.i2.hi & -1ULL>>16) | 1ULL<<48;
+		yhi = (uy.i2.hi & -1ULL>>16) | 1ULL<<48;
+		xlo = ux.i2.lo;
+		ylo = ux.i2.lo;
+		for (; ex > ey; ex--) {
+			hi = xhi - yhi;
+			lo = xlo - ylo;
+			if (xlo < ylo)
+				hi -= 1;
+			if (hi >> 63 == 0) {
+				xhi = 2*hi + (lo>>63);
+				xlo = 2*lo;
+				q++;
+			} else {
+				xhi = 2*xhi + (xlo>>63);
+				xlo = 2*xlo;
+			}
+			q <<= 1;
+		}
+		hi = xhi - yhi;
+		lo = xlo - ylo;
+		if (xlo < ylo)
+			hi -= 1;
+		if (hi >> 63 == 0) {
+			xhi = hi;
+			xlo = lo;
 			q++;
-			x-=y;
 		}
-	} else if (x > 0.5*y || (x == 0.5*y && (q & 1))) {
-		q++;
-		x-=y;
+		if ((xhi|xlo) == 0)
+			ex = -120;
+		else
+			for (; xhi >> 48 == 0; xhi = 2*xhi + (xlo>>63), xlo = 2*xlo, ex--);
+		ux.i2.hi = xhi;
+		ux.i2.lo = xlo;
+#endif
 	}
 
-	ux.e = x;
-	ux.bits.sign ^= sx;
-	x = ux.e;
-
+	/* scale result and decide between |x| and |x|-|y| */
+	if (ex <= 0) {
+		ux.i.se = ex + 120;
+		ux.f *= 0x1p-120f;
+	} else
+		ux.i.se = ex;
+	x = ux.f;
+	if (sy)
+		y = -y;
+	if (ex == ey || (ex+1 == ey && (2*x > y || (2*x == y && q%2)))) {
+		x -= y;
+		q++;
+	}
 	q &= 0x7fffffff;
-	*quo = sxy ? -q : q;
-	return x;
+	*quo = sx^sy ? -(int)q : (int)q;
+	return sx ? -x : x;
 }
 #endif