diff options
Diffstat (limited to 'arch/mips/math-emu/dp_sqrt.c')
-rw-r--r-- | arch/mips/math-emu/dp_sqrt.c | 164 |
1 files changed, 164 insertions, 0 deletions
diff --git a/arch/mips/math-emu/dp_sqrt.c b/arch/mips/math-emu/dp_sqrt.c new file mode 100644 index 00000000..a2a51b87 --- /dev/null +++ b/arch/mips/math-emu/dp_sqrt.c @@ -0,0 +1,164 @@ +/* IEEE754 floating point arithmetic + * double precision square root + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * + * ######################################################################## + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * ######################################################################## + */ + + +#include "ieee754dp.h" + +static const unsigned table[] = { + 0, 1204, 3062, 5746, 9193, 13348, 18162, 23592, + 29598, 36145, 43202, 50740, 58733, 67158, 75992, + 85215, 83599, 71378, 60428, 50647, 41945, 34246, + 27478, 21581, 16499, 12183, 8588, 5674, 3403, + 1742, 661, 130 +}; + +ieee754dp ieee754dp_sqrt(ieee754dp x) +{ + struct _ieee754_csr oldcsr; + ieee754dp y, z, t; + unsigned scalx, yh; + COMPXDP; + + EXPLODEXDP; + CLEARCX; + FLUSHXDP; + + /* x == INF or NAN? */ + switch (xc) { + case IEEE754_CLASS_QNAN: + /* sqrt(Nan) = Nan */ + return ieee754dp_nanxcpt(x, "sqrt"); + case IEEE754_CLASS_SNAN: + SETCX(IEEE754_INVALID_OPERATION); + return ieee754dp_nanxcpt(ieee754dp_indef(), "sqrt"); + case IEEE754_CLASS_ZERO: + /* sqrt(0) = 0 */ + return x; + case IEEE754_CLASS_INF: + if (xs) { + /* sqrt(-Inf) = Nan */ + SETCX(IEEE754_INVALID_OPERATION); + return ieee754dp_nanxcpt(ieee754dp_indef(), "sqrt"); + } + /* sqrt(+Inf) = Inf */ + return x; + case IEEE754_CLASS_DNORM: + DPDNORMX; + /* fall through */ + case IEEE754_CLASS_NORM: + if (xs) { + /* sqrt(-x) = Nan */ + SETCX(IEEE754_INVALID_OPERATION); + return ieee754dp_nanxcpt(ieee754dp_indef(), "sqrt"); + } + break; + } + + /* save old csr; switch off INX enable & flag; set RN rounding */ + oldcsr = ieee754_csr; + ieee754_csr.mx &= ~IEEE754_INEXACT; + ieee754_csr.sx &= ~IEEE754_INEXACT; + ieee754_csr.rm = IEEE754_RN; + + /* adjust exponent to prevent overflow */ + scalx = 0; + if (xe > 512) { /* x > 2**-512? */ + xe -= 512; /* x = x / 2**512 */ + scalx += 256; + } else if (xe < -512) { /* x < 2**-512? */ + xe += 512; /* x = x * 2**512 */ + scalx -= 256; + } + + y = x = builddp(0, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT); + + /* magic initial approximation to almost 8 sig. bits */ + yh = y.bits >> 32; + yh = (yh >> 1) + 0x1ff80000; + yh = yh - table[(yh >> 15) & 31]; + y.bits = ((u64) yh << 32) | (y.bits & 0xffffffff); + + /* Heron's rule once with correction to improve to ~18 sig. bits */ + /* t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0; */ + t = ieee754dp_div(x, y); + y = ieee754dp_add(y, t); + y.bits -= 0x0010000600000000LL; + y.bits &= 0xffffffff00000000LL; + + /* triple to almost 56 sig. bits: y ~= sqrt(x) to within 1 ulp */ + /* t=y*y; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)*y; */ + z = t = ieee754dp_mul(y, y); + t.parts.bexp += 0x001; + t = ieee754dp_add(t, z); + z = ieee754dp_mul(ieee754dp_sub(x, z), y); + + /* t=z/(t+x) ; pt[n0]+=0x00100000; y+=t; */ + t = ieee754dp_div(z, ieee754dp_add(t, x)); + t.parts.bexp += 0x001; + y = ieee754dp_add(y, t); + + /* twiddle last bit to force y correctly rounded */ + + /* set RZ, clear INEX flag */ + ieee754_csr.rm = IEEE754_RZ; + ieee754_csr.sx &= ~IEEE754_INEXACT; + + /* t=x/y; ...chopped quotient, possibly inexact */ + t = ieee754dp_div(x, y); + + if (ieee754_csr.sx & IEEE754_INEXACT || t.bits != y.bits) { + + if (!(ieee754_csr.sx & IEEE754_INEXACT)) + /* t = t-ulp */ + t.bits -= 1; + + /* add inexact to result status */ + oldcsr.cx |= IEEE754_INEXACT; + oldcsr.sx |= IEEE754_INEXACT; + + switch (oldcsr.rm) { + case IEEE754_RP: + y.bits += 1; + /* drop through */ + case IEEE754_RN: + t.bits += 1; + break; + } + + /* y=y+t; ...chopped sum */ + y = ieee754dp_add(y, t); + + /* adjust scalx for correctly rounded sqrt(x) */ + scalx -= 1; + } + + /* py[n0]=py[n0]+scalx; ...scale back y */ + y.parts.bexp += scalx; + + /* restore rounding mode, possibly set inexact */ + ieee754_csr = oldcsr; + + return y; +} |