Преглед на файлове

math: add fma TODO comments about the underflow issue

The underflow exception is not raised correctly in some
cornercases (see previous fma commit), added comments
with examples for fmaf, fmal and non-x86 fma.

In fmaf store the result before returning so it has the
correct precision when FLT_EVAL_METHOD!=0
Szabolcs Nagy преди 11 години
родител
ревизия
1e5eb73545
променени са 3 файла, в които са добавени 14 реда и са изтрити 2 реда
  1. 2 0
      src/math/fma.c
  2. 10 2
      src/math/fmaf.c
  3. 2 0
      src/math/fmal.c

+ 2 - 0
src/math/fma.c

@@ -441,6 +441,8 @@ double fma(double x, double y, double z)
 		/*
 		 * There is no need to worry about double rounding in directed
 		 * rounding modes.
+		 * TODO: underflow is not raised properly, example in downward rounding:
+		 * fma(0x1.000000001p-1000, 0x1.000000001p-30, -0x1p-1066)
 		 */
 		fesetround(oround);
 		adj = r.lo + xy.lo;

+ 10 - 2
src/math/fmaf.c

@@ -49,7 +49,14 @@ float fmaf(float x, float y, float z)
 		(hr & 0x7ff00000) == 0x7ff00000 ||  /* NaN */
 		result - xy == z ||                 /* exact */
 		fegetround() != FE_TONEAREST)       /* not round-to-nearest */
-		return (result);
+	{
+		/*
+		TODO: underflow is not raised correctly, example in
+		downward rouding: fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f)
+		*/
+		z = result;
+		return z;
+	}
 
 	/*
 	 * If result is inexact, and exactly halfway between two float values,
@@ -63,5 +70,6 @@ float fmaf(float x, float y, float z)
 	fesetround(FE_TONEAREST);
 	if (result == adjusted_result)
 		SET_LOW_WORD(adjusted_result, lr + 1);
-	return (adjusted_result);
+	z = adjusted_result;
+	return z;
 }

+ 2 - 0
src/math/fmal.c

@@ -262,6 +262,8 @@ long double fmal(long double x, long double y, long double z)
 		/*
 		 * There is no need to worry about double rounding in directed
 		 * rounding modes.
+		 * TODO: underflow is not raised correctly, example in downward rounding:
+		 * fmal(0x1.0000000001p-16000L, 0x1.0000000001p-400L, -0x1p-16440L)
 		 */
 		fesetround(oround);
 		adj = r.lo + xy.lo;