1
0

fma.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. #include <fenv.h>
  2. #include "libm.h"
  3. #if LDBL_MANT_DIG==64 && LDBL_MAX_EXP==16384
  4. union ld80 {
  5. long double x;
  6. struct {
  7. uint64_t m;
  8. uint16_t e : 15;
  9. uint16_t s : 1;
  10. uint16_t pad;
  11. } bits;
  12. };
  13. /* exact add, assumes exponent_x >= exponent_y */
  14. static void add(long double *hi, long double *lo, long double x, long double y)
  15. {
  16. long double r;
  17. r = x + y;
  18. *hi = r;
  19. r -= x;
  20. *lo = y - r;
  21. }
  22. /* exact mul, assumes no over/underflow */
  23. static void mul(long double *hi, long double *lo, long double x, long double y)
  24. {
  25. static const long double c = 1.0 + 0x1p32L;
  26. long double cx, xh, xl, cy, yh, yl;
  27. cx = c*x;
  28. xh = (x - cx) + cx;
  29. xl = x - xh;
  30. cy = c*y;
  31. yh = (y - cy) + cy;
  32. yl = y - yh;
  33. *hi = x*y;
  34. *lo = (xh*yh - *hi) + xh*yl + xl*yh + xl*yl;
  35. }
  36. /*
  37. assume (long double)(hi+lo) == hi
  38. return an adjusted hi so that rounding it to double is correct
  39. */
  40. static long double adjust(long double hi, long double lo)
  41. {
  42. union ld80 uhi, ulo;
  43. if (lo == 0)
  44. return hi;
  45. uhi.x = hi;
  46. if (uhi.bits.m & 0x3ff)
  47. return hi;
  48. ulo.x = lo;
  49. if (uhi.bits.s == ulo.bits.s)
  50. uhi.bits.m++;
  51. else
  52. uhi.bits.m--;
  53. return uhi.x;
  54. }
  55. static long double dadd(long double x, long double y)
  56. {
  57. add(&x, &y, x, y);
  58. return adjust(x, y);
  59. }
  60. static long double dmul(long double x, long double y)
  61. {
  62. mul(&x, &y, x, y);
  63. return adjust(x, y);
  64. }
  65. static int getexp(long double x)
  66. {
  67. union ld80 u;
  68. u.x = x;
  69. return u.bits.e;
  70. }
  71. double fma(double x, double y, double z)
  72. {
  73. long double hi, lo1, lo2, xy;
  74. int round, ez, exy;
  75. /* handle +-inf,nan */
  76. if (!isfinite(x) || !isfinite(y))
  77. return x*y + z;
  78. if (!isfinite(z))
  79. return z;
  80. /* handle +-0 */
  81. if (x == 0.0 || y == 0.0)
  82. return x*y + z;
  83. round = fegetround();
  84. if (z == 0.0) {
  85. if (round == FE_TONEAREST)
  86. return dmul(x, y);
  87. return x*y;
  88. }
  89. /* exact mul and add require nearest rounding */
  90. /* spurious inexact exceptions may be raised */
  91. fesetround(FE_TONEAREST);
  92. mul(&xy, &lo1, x, y);
  93. exy = getexp(xy);
  94. ez = getexp(z);
  95. if (ez > exy) {
  96. add(&hi, &lo2, z, xy);
  97. } else if (ez > exy - 12) {
  98. add(&hi, &lo2, xy, z);
  99. if (hi == 0) {
  100. fesetround(round);
  101. /* make sure that the sign of 0 is correct */
  102. return (xy + z) + lo1;
  103. }
  104. } else {
  105. /*
  106. ez <= exy - 12
  107. the 12 extra bits (1guard, 11round+sticky) are needed so with
  108. lo = dadd(lo1, lo2)
  109. elo <= ehi - 11, and we use the last 10 bits in adjust so
  110. dadd(hi, lo)
  111. gives correct result when rounded to double
  112. */
  113. hi = xy;
  114. lo2 = z;
  115. }
  116. fesetround(round);
  117. if (round == FE_TONEAREST)
  118. return dadd(hi, dadd(lo1, lo2));
  119. return hi + (lo1 + lo2);
  120. }
  121. #else
  122. /* origin: FreeBSD /usr/src/lib/msun/src/s_fma.c */
  123. /*-
  124. * Copyright (c) 2005-2011 David Schultz <[email protected]>
  125. * All rights reserved.
  126. *
  127. * Redistribution and use in source and binary forms, with or without
  128. * modification, are permitted provided that the following conditions
  129. * are met:
  130. * 1. Redistributions of source code must retain the above copyright
  131. * notice, this list of conditions and the following disclaimer.
  132. * 2. Redistributions in binary form must reproduce the above copyright
  133. * notice, this list of conditions and the following disclaimer in the
  134. * documentation and/or other materials provided with the distribution.
  135. *
  136. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  137. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  138. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  139. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  140. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  141. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  142. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  143. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  144. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  145. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  146. * SUCH DAMAGE.
  147. */
  148. /*
  149. * A struct dd represents a floating-point number with twice the precision
  150. * of a double. We maintain the invariant that "hi" stores the 53 high-order
  151. * bits of the result.
  152. */
  153. struct dd {
  154. double hi;
  155. double lo;
  156. };
  157. /*
  158. * Compute a+b exactly, returning the exact result in a struct dd. We assume
  159. * that both a and b are finite, but make no assumptions about their relative
  160. * magnitudes.
  161. */
  162. static inline struct dd dd_add(double a, double b)
  163. {
  164. struct dd ret;
  165. double s;
  166. ret.hi = a + b;
  167. s = ret.hi - a;
  168. ret.lo = (a - (ret.hi - s)) + (b - s);
  169. return (ret);
  170. }
  171. /*
  172. * Compute a+b, with a small tweak: The least significant bit of the
  173. * result is adjusted into a sticky bit summarizing all the bits that
  174. * were lost to rounding. This adjustment negates the effects of double
  175. * rounding when the result is added to another number with a higher
  176. * exponent. For an explanation of round and sticky bits, see any reference
  177. * on FPU design, e.g.,
  178. *
  179. * J. Coonen. An Implementation Guide to a Proposed Standard for
  180. * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980.
  181. */
  182. static inline double add_adjusted(double a, double b)
  183. {
  184. struct dd sum;
  185. uint64_t hibits, lobits;
  186. sum = dd_add(a, b);
  187. if (sum.lo != 0) {
  188. EXTRACT_WORD64(hibits, sum.hi);
  189. if ((hibits & 1) == 0) {
  190. /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
  191. EXTRACT_WORD64(lobits, sum.lo);
  192. hibits += 1 - ((hibits ^ lobits) >> 62);
  193. INSERT_WORD64(sum.hi, hibits);
  194. }
  195. }
  196. return (sum.hi);
  197. }
  198. /*
  199. * Compute ldexp(a+b, scale) with a single rounding error. It is assumed
  200. * that the result will be subnormal, and care is taken to ensure that
  201. * double rounding does not occur.
  202. */
  203. static inline double add_and_denormalize(double a, double b, int scale)
  204. {
  205. struct dd sum;
  206. uint64_t hibits, lobits;
  207. int bits_lost;
  208. sum = dd_add(a, b);
  209. /*
  210. * If we are losing at least two bits of accuracy to denormalization,
  211. * then the first lost bit becomes a round bit, and we adjust the
  212. * lowest bit of sum.hi to make it a sticky bit summarizing all the
  213. * bits in sum.lo. With the sticky bit adjusted, the hardware will
  214. * break any ties in the correct direction.
  215. *
  216. * If we are losing only one bit to denormalization, however, we must
  217. * break the ties manually.
  218. */
  219. if (sum.lo != 0) {
  220. EXTRACT_WORD64(hibits, sum.hi);
  221. bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1;
  222. if (bits_lost != 1 ^ (int)(hibits & 1)) {
  223. /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
  224. EXTRACT_WORD64(lobits, sum.lo);
  225. hibits += 1 - (((hibits ^ lobits) >> 62) & 2);
  226. INSERT_WORD64(sum.hi, hibits);
  227. }
  228. }
  229. return scalbn(sum.hi, scale);
  230. }
  231. /*
  232. * Compute a*b exactly, returning the exact result in a struct dd. We assume
  233. * that both a and b are normalized, so no underflow or overflow will occur.
  234. * The current rounding mode must be round-to-nearest.
  235. */
  236. static inline struct dd dd_mul(double a, double b)
  237. {
  238. static const double split = 0x1p27 + 1.0;
  239. struct dd ret;
  240. double ha, hb, la, lb, p, q;
  241. p = a * split;
  242. ha = a - p;
  243. ha += p;
  244. la = a - ha;
  245. p = b * split;
  246. hb = b - p;
  247. hb += p;
  248. lb = b - hb;
  249. p = ha * hb;
  250. q = ha * lb + la * hb;
  251. ret.hi = p + q;
  252. ret.lo = p - ret.hi + q + la * lb;
  253. return (ret);
  254. }
  255. /*
  256. * Fused multiply-add: Compute x * y + z with a single rounding error.
  257. *
  258. * We use scaling to avoid overflow/underflow, along with the
  259. * canonical precision-doubling technique adapted from:
  260. *
  261. * Dekker, T. A Floating-Point Technique for Extending the
  262. * Available Precision. Numer. Math. 18, 224-242 (1971).
  263. *
  264. * This algorithm is sensitive to the rounding precision. FPUs such
  265. * as the i387 must be set in double-precision mode if variables are
  266. * to be stored in FP registers in order to avoid incorrect results.
  267. * This is the default on FreeBSD, but not on many other systems.
  268. *
  269. * Hardware instructions should be used on architectures that support it,
  270. * since this implementation will likely be several times slower.
  271. */
  272. double fma(double x, double y, double z)
  273. {
  274. double xs, ys, zs, adj;
  275. struct dd xy, r;
  276. int oround;
  277. int ex, ey, ez;
  278. int spread;
  279. /*
  280. * Handle special cases. The order of operations and the particular
  281. * return values here are crucial in handling special cases involving
  282. * infinities, NaNs, overflows, and signed zeroes correctly.
  283. */
  284. if (!isfinite(x) || !isfinite(y))
  285. return (x * y + z);
  286. if (!isfinite(z))
  287. return (z);
  288. if (x == 0.0 || y == 0.0)
  289. return (x * y + z);
  290. if (z == 0.0)
  291. return (x * y);
  292. xs = frexp(x, &ex);
  293. ys = frexp(y, &ey);
  294. zs = frexp(z, &ez);
  295. oround = fegetround();
  296. spread = ex + ey - ez;
  297. /*
  298. * If x * y and z are many orders of magnitude apart, the scaling
  299. * will overflow, so we handle these cases specially. Rounding
  300. * modes other than FE_TONEAREST are painful.
  301. */
  302. if (spread < -DBL_MANT_DIG) {
  303. #ifdef FE_INEXACT
  304. feraiseexcept(FE_INEXACT);
  305. #endif
  306. #ifdef FE_UNDERFLOW
  307. if (!isnormal(z))
  308. feraiseexcept(FE_UNDERFLOW);
  309. #endif
  310. switch (oround) {
  311. default: /* FE_TONEAREST */
  312. return (z);
  313. #ifdef FE_TOWARDZERO
  314. case FE_TOWARDZERO:
  315. if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
  316. return (z);
  317. else
  318. return (nextafter(z, 0));
  319. #endif
  320. #ifdef FE_DOWNWARD
  321. case FE_DOWNWARD:
  322. if (x > 0.0 ^ y < 0.0)
  323. return (z);
  324. else
  325. return (nextafter(z, -INFINITY));
  326. #endif
  327. #ifdef FE_UPWARD
  328. case FE_UPWARD:
  329. if (x > 0.0 ^ y < 0.0)
  330. return (nextafter(z, INFINITY));
  331. else
  332. return (z);
  333. #endif
  334. }
  335. }
  336. if (spread <= DBL_MANT_DIG * 2)
  337. zs = scalbn(zs, -spread);
  338. else
  339. zs = copysign(DBL_MIN, zs);
  340. fesetround(FE_TONEAREST);
  341. /*
  342. * Basic approach for round-to-nearest:
  343. *
  344. * (xy.hi, xy.lo) = x * y (exact)
  345. * (r.hi, r.lo) = xy.hi + z (exact)
  346. * adj = xy.lo + r.lo (inexact; low bit is sticky)
  347. * result = r.hi + adj (correctly rounded)
  348. */
  349. xy = dd_mul(xs, ys);
  350. r = dd_add(xy.hi, zs);
  351. spread = ex + ey;
  352. if (r.hi == 0.0) {
  353. /*
  354. * When the addends cancel to 0, ensure that the result has
  355. * the correct sign.
  356. */
  357. fesetround(oround);
  358. volatile double vzs = zs; /* XXX gcc CSE bug workaround */
  359. return xy.hi + vzs + scalbn(xy.lo, spread);
  360. }
  361. if (oround != FE_TONEAREST) {
  362. /*
  363. * There is no need to worry about double rounding in directed
  364. * rounding modes.
  365. */
  366. fesetround(oround);
  367. adj = r.lo + xy.lo;
  368. return scalbn(r.hi + adj, spread);
  369. }
  370. adj = add_adjusted(r.lo, xy.lo);
  371. if (spread + ilogb(r.hi) > -1023)
  372. return scalbn(r.hi + adj, spread);
  373. else
  374. return add_and_denormalize(r.hi, adj, spread);
  375. }
  376. #endif