fma.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. #include <fenv.h>
  2. #include "libm.h"
  3. #if LDBL_MANT_DIG==64 && LDBL_MAX_EXP==16384
  4. union ld80 {
  5. long double x;
  6. struct {
  7. uint64_t m;
  8. uint16_t e : 15;
  9. uint16_t s : 1;
  10. uint16_t pad;
  11. } bits;
  12. };
  13. /* exact add, assumes exponent_x >= exponent_y */
  14. static void add(long double *hi, long double *lo, long double x, long double y)
  15. {
  16. long double r;
  17. r = x + y;
  18. *hi = r;
  19. r -= x;
  20. *lo = y - r;
  21. }
  22. /* exact mul, assumes no over/underflow */
  23. static void mul(long double *hi, long double *lo, long double x, long double y)
  24. {
  25. static const long double c = 1.0 + 0x1p32L;
  26. long double cx, xh, xl, cy, yh, yl;
  27. cx = c*x;
  28. xh = (x - cx) + cx;
  29. xl = x - xh;
  30. cy = c*y;
  31. yh = (y - cy) + cy;
  32. yl = y - yh;
  33. *hi = x*y;
  34. *lo = (xh*yh - *hi) + xh*yl + xl*yh + xl*yl;
  35. }
  36. /*
  37. assume (long double)(hi+lo) == hi
  38. return an adjusted hi so that rounding it to double (or less) precision is correct
  39. */
  40. static long double adjust(long double hi, long double lo)
  41. {
  42. union ld80 uhi, ulo;
  43. if (lo == 0)
  44. return hi;
  45. uhi.x = hi;
  46. if (uhi.bits.m & 0x3ff)
  47. return hi;
  48. ulo.x = lo;
  49. if (uhi.bits.s == ulo.bits.s)
  50. uhi.bits.m++;
  51. else {
  52. uhi.bits.m--;
  53. /* handle underflow and take care of ld80 implicit msb */
  54. if (uhi.bits.m == (uint64_t)-1/2) {
  55. uhi.bits.m *= 2;
  56. uhi.bits.e--;
  57. }
  58. }
  59. return uhi.x;
  60. }
  61. /* adjusted add so the result is correct when rounded to double (or less) precision */
  62. static long double dadd(long double x, long double y)
  63. {
  64. add(&x, &y, x, y);
  65. return adjust(x, y);
  66. }
  67. /* adjusted mul so the result is correct when rounded to double (or less) precision */
  68. static long double dmul(long double x, long double y)
  69. {
  70. mul(&x, &y, x, y);
  71. return adjust(x, y);
  72. }
  73. static int getexp(long double x)
  74. {
  75. union ld80 u;
  76. u.x = x;
  77. return u.bits.e;
  78. }
  79. double fma(double x, double y, double z)
  80. {
  81. long double hi, lo1, lo2, xy;
  82. int round, ez, exy;
  83. /* handle +-inf,nan */
  84. if (!isfinite(x) || !isfinite(y))
  85. return x*y + z;
  86. if (!isfinite(z))
  87. return z;
  88. /* handle +-0 */
  89. if (x == 0.0 || y == 0.0)
  90. return x*y + z;
  91. round = fegetround();
  92. if (z == 0.0) {
  93. if (round == FE_TONEAREST)
  94. return dmul(x, y);
  95. return x*y;
  96. }
  97. /* exact mul and add require nearest rounding */
  98. /* spurious inexact exceptions may be raised */
  99. fesetround(FE_TONEAREST);
  100. mul(&xy, &lo1, x, y);
  101. exy = getexp(xy);
  102. ez = getexp(z);
  103. if (ez > exy) {
  104. add(&hi, &lo2, z, xy);
  105. } else if (ez > exy - 12) {
  106. add(&hi, &lo2, xy, z);
  107. if (hi == 0) {
  108. fesetround(round);
  109. /* make sure that the sign of 0 is correct */
  110. return (xy + z) + lo1;
  111. }
  112. } else {
  113. /*
  114. ez <= exy - 12
  115. the 12 extra bits (1guard, 11round+sticky) are needed so with
  116. lo = dadd(lo1, lo2)
  117. elo <= ehi - 11, and we use the last 10 bits in adjust so
  118. dadd(hi, lo)
  119. gives correct result when rounded to double
  120. */
  121. hi = xy;
  122. lo2 = z;
  123. }
  124. fesetround(round);
  125. if (round == FE_TONEAREST)
  126. return dadd(hi, dadd(lo1, lo2));
  127. return hi + (lo1 + lo2);
  128. }
  129. #else
  130. /* origin: FreeBSD /usr/src/lib/msun/src/s_fma.c */
  131. /*-
  132. * Copyright (c) 2005-2011 David Schultz <[email protected]>
  133. * All rights reserved.
  134. *
  135. * Redistribution and use in source and binary forms, with or without
  136. * modification, are permitted provided that the following conditions
  137. * are met:
  138. * 1. Redistributions of source code must retain the above copyright
  139. * notice, this list of conditions and the following disclaimer.
  140. * 2. Redistributions in binary form must reproduce the above copyright
  141. * notice, this list of conditions and the following disclaimer in the
  142. * documentation and/or other materials provided with the distribution.
  143. *
  144. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  145. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  146. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  147. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  148. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  149. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  150. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  151. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  152. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  153. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  154. * SUCH DAMAGE.
  155. */
  156. /*
  157. * A struct dd represents a floating-point number with twice the precision
  158. * of a double. We maintain the invariant that "hi" stores the 53 high-order
  159. * bits of the result.
  160. */
  161. struct dd {
  162. double hi;
  163. double lo;
  164. };
  165. /*
  166. * Compute a+b exactly, returning the exact result in a struct dd. We assume
  167. * that both a and b are finite, but make no assumptions about their relative
  168. * magnitudes.
  169. */
  170. static inline struct dd dd_add(double a, double b)
  171. {
  172. struct dd ret;
  173. double s;
  174. ret.hi = a + b;
  175. s = ret.hi - a;
  176. ret.lo = (a - (ret.hi - s)) + (b - s);
  177. return (ret);
  178. }
  179. /*
  180. * Compute a+b, with a small tweak: The least significant bit of the
  181. * result is adjusted into a sticky bit summarizing all the bits that
  182. * were lost to rounding. This adjustment negates the effects of double
  183. * rounding when the result is added to another number with a higher
  184. * exponent. For an explanation of round and sticky bits, see any reference
  185. * on FPU design, e.g.,
  186. *
  187. * J. Coonen. An Implementation Guide to a Proposed Standard for
  188. * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980.
  189. */
  190. static inline double add_adjusted(double a, double b)
  191. {
  192. struct dd sum;
  193. uint64_t hibits, lobits;
  194. sum = dd_add(a, b);
  195. if (sum.lo != 0) {
  196. EXTRACT_WORD64(hibits, sum.hi);
  197. if ((hibits & 1) == 0) {
  198. /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
  199. EXTRACT_WORD64(lobits, sum.lo);
  200. hibits += 1 - ((hibits ^ lobits) >> 62);
  201. INSERT_WORD64(sum.hi, hibits);
  202. }
  203. }
  204. return (sum.hi);
  205. }
  206. /*
  207. * Compute ldexp(a+b, scale) with a single rounding error. It is assumed
  208. * that the result will be subnormal, and care is taken to ensure that
  209. * double rounding does not occur.
  210. */
  211. static inline double add_and_denormalize(double a, double b, int scale)
  212. {
  213. struct dd sum;
  214. uint64_t hibits, lobits;
  215. int bits_lost;
  216. sum = dd_add(a, b);
  217. /*
  218. * If we are losing at least two bits of accuracy to denormalization,
  219. * then the first lost bit becomes a round bit, and we adjust the
  220. * lowest bit of sum.hi to make it a sticky bit summarizing all the
  221. * bits in sum.lo. With the sticky bit adjusted, the hardware will
  222. * break any ties in the correct direction.
  223. *
  224. * If we are losing only one bit to denormalization, however, we must
  225. * break the ties manually.
  226. */
  227. if (sum.lo != 0) {
  228. EXTRACT_WORD64(hibits, sum.hi);
  229. bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1;
  230. if (bits_lost != 1 ^ (int)(hibits & 1)) {
  231. /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
  232. EXTRACT_WORD64(lobits, sum.lo);
  233. hibits += 1 - (((hibits ^ lobits) >> 62) & 2);
  234. INSERT_WORD64(sum.hi, hibits);
  235. }
  236. }
  237. return scalbn(sum.hi, scale);
  238. }
  239. /*
  240. * Compute a*b exactly, returning the exact result in a struct dd. We assume
  241. * that both a and b are normalized, so no underflow or overflow will occur.
  242. * The current rounding mode must be round-to-nearest.
  243. */
  244. static inline struct dd dd_mul(double a, double b)
  245. {
  246. static const double split = 0x1p27 + 1.0;
  247. struct dd ret;
  248. double ha, hb, la, lb, p, q;
  249. p = a * split;
  250. ha = a - p;
  251. ha += p;
  252. la = a - ha;
  253. p = b * split;
  254. hb = b - p;
  255. hb += p;
  256. lb = b - hb;
  257. p = ha * hb;
  258. q = ha * lb + la * hb;
  259. ret.hi = p + q;
  260. ret.lo = p - ret.hi + q + la * lb;
  261. return (ret);
  262. }
  263. /*
  264. * Fused multiply-add: Compute x * y + z with a single rounding error.
  265. *
  266. * We use scaling to avoid overflow/underflow, along with the
  267. * canonical precision-doubling technique adapted from:
  268. *
  269. * Dekker, T. A Floating-Point Technique for Extending the
  270. * Available Precision. Numer. Math. 18, 224-242 (1971).
  271. *
  272. * This algorithm is sensitive to the rounding precision. FPUs such
  273. * as the i387 must be set in double-precision mode if variables are
  274. * to be stored in FP registers in order to avoid incorrect results.
  275. * This is the default on FreeBSD, but not on many other systems.
  276. *
  277. * Hardware instructions should be used on architectures that support it,
  278. * since this implementation will likely be several times slower.
  279. */
  280. double fma(double x, double y, double z)
  281. {
  282. double xs, ys, zs, adj;
  283. struct dd xy, r;
  284. int oround;
  285. int ex, ey, ez;
  286. int spread;
  287. /*
  288. * Handle special cases. The order of operations and the particular
  289. * return values here are crucial in handling special cases involving
  290. * infinities, NaNs, overflows, and signed zeroes correctly.
  291. */
  292. if (!isfinite(x) || !isfinite(y))
  293. return (x * y + z);
  294. if (!isfinite(z))
  295. return (z);
  296. if (x == 0.0 || y == 0.0)
  297. return (x * y + z);
  298. if (z == 0.0)
  299. return (x * y);
  300. xs = frexp(x, &ex);
  301. ys = frexp(y, &ey);
  302. zs = frexp(z, &ez);
  303. oround = fegetround();
  304. spread = ex + ey - ez;
  305. /*
  306. * If x * y and z are many orders of magnitude apart, the scaling
  307. * will overflow, so we handle these cases specially. Rounding
  308. * modes other than FE_TONEAREST are painful.
  309. */
  310. if (spread < -DBL_MANT_DIG) {
  311. #ifdef FE_INEXACT
  312. feraiseexcept(FE_INEXACT);
  313. #endif
  314. #ifdef FE_UNDERFLOW
  315. if (!isnormal(z))
  316. feraiseexcept(FE_UNDERFLOW);
  317. #endif
  318. switch (oround) {
  319. default: /* FE_TONEAREST */
  320. return (z);
  321. #ifdef FE_TOWARDZERO
  322. case FE_TOWARDZERO:
  323. if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
  324. return (z);
  325. else
  326. return (nextafter(z, 0));
  327. #endif
  328. #ifdef FE_DOWNWARD
  329. case FE_DOWNWARD:
  330. if (x > 0.0 ^ y < 0.0)
  331. return (z);
  332. else
  333. return (nextafter(z, -INFINITY));
  334. #endif
  335. #ifdef FE_UPWARD
  336. case FE_UPWARD:
  337. if (x > 0.0 ^ y < 0.0)
  338. return (nextafter(z, INFINITY));
  339. else
  340. return (z);
  341. #endif
  342. }
  343. }
  344. if (spread <= DBL_MANT_DIG * 2)
  345. zs = scalbn(zs, -spread);
  346. else
  347. zs = copysign(DBL_MIN, zs);
  348. fesetround(FE_TONEAREST);
  349. /*
  350. * Basic approach for round-to-nearest:
  351. *
  352. * (xy.hi, xy.lo) = x * y (exact)
  353. * (r.hi, r.lo) = xy.hi + z (exact)
  354. * adj = xy.lo + r.lo (inexact; low bit is sticky)
  355. * result = r.hi + adj (correctly rounded)
  356. */
  357. xy = dd_mul(xs, ys);
  358. r = dd_add(xy.hi, zs);
  359. spread = ex + ey;
  360. if (r.hi == 0.0) {
  361. /*
  362. * When the addends cancel to 0, ensure that the result has
  363. * the correct sign.
  364. */
  365. fesetround(oround);
  366. volatile double vzs = zs; /* XXX gcc CSE bug workaround */
  367. return xy.hi + vzs + scalbn(xy.lo, spread);
  368. }
  369. if (oround != FE_TONEAREST) {
  370. /*
  371. * There is no need to worry about double rounding in directed
  372. * rounding modes.
  373. */
  374. fesetround(oround);
  375. adj = r.lo + xy.lo;
  376. return scalbn(r.hi + adj, spread);
  377. }
  378. adj = add_adjusted(r.lo, xy.lo);
  379. if (spread + ilogb(r.hi) > -1023)
  380. return scalbn(r.hi + adj, spread);
  381. else
  382. return add_and_denormalize(r.hi, adj, spread);
  383. }
  384. #endif