// Excerpts from: // doubledouble.h // Excerpted and modified by Steve Vavasis, 2/00 for use in QMG 2.0. // doubledouble is originally written by Keith Briggs. // These excerpts are covered by the // QMG license, with permission from Keith Briggs. // The entire doubledouble package is available on line at // http://www-epidem.plantsci.cam.ac.uk/~kbriggs/doubledouble.html // and is covered by the Gnu Public License. // Keith Briggs. Last revised 98 Feb 09 /* Copyright (C) 1997 Keith Martin Briggs Except where otherwise indicated, this program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ // Change log: (See doubledouble.cc also) // 97 Dec 22 KMB added x86_FIX // Eliminates -ffloat-store requirement // 97 Aug 04 KMB added ldexp // 97Jul25 (WH) - fixed Qrand48 to return drand48 + (2^-47)*drand48 // 97Jul11 (WH) added qtoa declaration. // - made all constants (Pi etc) member constants, moved to quad.cc // - added doubledoubleRand48(). // - cleaned up constructors (fewer, with default arguments). // - added some code for integer exponent, but commented it out. It // looks hard. eg, adding numbers with different exponents. Almost // always the smaller would be essentially zero, but near boundaries... // 96Nov20 added normalizing constructor doubledouble(double,double) (needed by floor) #ifndef __QUAD_H__ #define __QUAD_H__ #define DEBUG_QUAD 0 /* C++ functions for doubledouble (i.e. double+double) precision. These functions use techniques due to Dekker, Linnainmaa, Kahan, Knuth and Priest. I credit Kahan with the addition algorithm (the simplification which permits the elimination of the tests and branches is due to Knuth); Dekker and Linnainmaa with the multiply, divide, and square root routines, and Priest for the initial transcription into C++. A doubledouble x is represented as a pair of doubles, x.hi and x.lo, such that the number represented by x is x.hi + x.lo, where |x.lo| <= 0.5*ulp(x.hi), (*) and ulp(y) means `unit in the last place of y'. For the software to work correctly, IEEE Standard Arithmetic is sufficient. That includes just about every modern workstation. Also sufficient is any platform that implements arithmetic with correct rounding, i.e., given double floating point numbers a and b, a op b is computed exactly and then rounded to the nearest double. The tie-breaking rule is not important. See: T. J. Dekker Point Technique for Extending the Available Precision, Numer. Math. 18 (1971), pp. 224-242. S. Linnainmaa Software for doubled-precision floating point computations ACM TOMS 7, 172-283 (1081). D. Priest On properties of floating point arithmetics: numerical stability and the cost of accurate computations. Ph.D. Diss, Berkeley 1992. and more references in http://www.cs.wisc.edu/~shoup/ntl/quad_float.txt. */ #include #include #include class doubledouble { protected: double hi, lo; public: // Next four statements inserted by Steve Vavasis. // Set control words for Intel. typedef short int FloatControlType; class Domain_Error {}; static FloatControlType set_float_control_to_53_bits(); static void restore_float_control(FloatControlType w); // Public data members, initialized in doubledouble.cc static const double Split; // cannot be initialized here; see ARM 9.4 // Public access to hi and lo inline double h() const { return hi; } inline double l() const { return lo; } // Constructors inline doubledouble():hi(0.0),lo(0.0) {} inline doubledouble(const int n) { hi=double(n); lo=0.0; } inline doubledouble(const double x, const double y); inline doubledouble(const doubledouble&); // Operators doubledouble& operator +=(const doubledouble&); doubledouble& operator +=(const double&); doubledouble& operator +=(const int); doubledouble& operator -=(const doubledouble&); doubledouble& operator -=(const double&); doubledouble& operator -=(const int); doubledouble& operator *=(const doubledouble&); doubledouble& operator *=(const double&); doubledouble& operator *=(const int); doubledouble& operator /=(const doubledouble&); doubledouble& operator /=(const double&); doubledouble& operator /=(const int); doubledouble& operator=(const doubledouble&); doubledouble& operator=(const double&); doubledouble& operator=(const int); // Get funny errors without this doubledouble& operator=(const char*); // Type conversion operator. Not really necessary... operator double() const { return hi+lo; } operator int() const { return (int)(hi+lo); } // inline doubledouble normalize(void) { double h=hi+lo; lo=lo+(hi-h); hi=h; return *this; } // Friends inline friend doubledouble operator -(const doubledouble& x); // Unary - friend doubledouble operator +(const doubledouble&, const doubledouble& ); friend doubledouble operator +(const double&, const doubledouble& ); friend doubledouble operator +(const int, const doubledouble& ); friend doubledouble operator +(const doubledouble&, const double& ); friend doubledouble operator +(const doubledouble&, const int ); friend doubledouble operator -(const doubledouble&, const doubledouble& ); friend doubledouble operator -(const double&, const doubledouble& ); friend doubledouble operator -(const int, const doubledouble& ); friend doubledouble operator -(const doubledouble&, const double& ); friend doubledouble operator -(const doubledouble&, const int ); friend doubledouble operator *(const doubledouble&, const doubledouble& ); friend doubledouble operator *(const double&, const doubledouble& ); friend doubledouble operator *(const int, const doubledouble& ); friend doubledouble operator *(const doubledouble&, const double& ); friend doubledouble operator *(const doubledouble&, const int ); friend doubledouble operator /(const doubledouble&, const doubledouble& ); friend doubledouble operator /(const doubledouble&, const double& ); friend doubledouble operator /(const doubledouble&, const int ); friend doubledouble operator /(const double&, const doubledouble& ); friend doubledouble operator /(const int, const doubledouble& ); friend doubledouble recip(const doubledouble &); friend doubledouble operator |(const doubledouble&, const doubledouble& ); // member functions }; // end class doubledouble void base_and_prec(void); // inline members inline doubledouble::doubledouble(const double x, const double y = 0.0) { hi=x+y; lo=y+(x-hi); // normalize } inline doubledouble::doubledouble(const doubledouble& x):hi(x.hi),lo(x.lo) {} inline doubledouble& doubledouble::operator=(const doubledouble& x){ hi=x.hi; lo=x.lo; return *this;} inline doubledouble& doubledouble::operator=(const double& x){ hi=x; lo=0.0; return *this;} inline doubledouble& doubledouble::operator=(const int x){ hi=x; lo=0.0; return *this;} // inline functions inline doubledouble operator-(const doubledouble& x) { return doubledouble(-x.hi, -x.lo); } doubledouble sqrt(const doubledouble&); doubledouble sqr(const doubledouble&); #endif // __QUAD_H__