00001 #ifndef __STDAIR_BAS_FLOAT_UTILS_GOOGLE_HPP 00002 #define __STDAIR_BAS_FLOAT_UTILS_GOOGLE_HPP 00003 00004 // Redistribution and use in source and binary forms, with or without 00005 // modification, are permitted provided that the following conditions are 00006 // met: 00007 // 00008 // * Redistributions of source code must retain the above copyright 00009 // notice, this list of conditions and the following disclaimer. 00010 // * Redistributions in binary form must reproduce the above 00011 // copyright notice, this list of conditions and the following disclaimer 00012 // in the documentation and/or other materials provided with the 00013 // distribution. 00014 // * Neither the name of Google Inc. nor the names of its 00015 // contributors may be used to endorse or promote products derived from 00016 // this software without specific prior written permission. 00017 // 00018 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00019 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00020 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00021 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 00022 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00023 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00024 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00025 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00026 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00027 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00028 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00029 // 00030 // Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee) 00031 // 00032 // The Google C++ Testing Framework (Google Test) 00033 00034 00035 // This template class serves as a compile-time function from size to 00036 // type. It maps a size in bytes to a primitive type with that 00037 // size. e.g. 00038 // 00039 // TypeWithSize<4>::UInt 00040 // 00041 // is typedef-ed to be unsigned int (unsigned integer made up of 4 00042 // bytes). 00043 // 00044 // Such functionality should belong to STL, but I cannot find it 00045 // there. 00046 // 00047 // Google Test uses this class in the implementation of floating-point 00048 // comparison. 00049 // 00050 // For now it only handles UInt (unsigned int) as that's all Google Test 00051 // needs. Other types can be easily added in the future if need 00052 // arises. 00053 template <size_t size> 00054 class TypeWithSize { 00055 public: 00056 // This prevents the user from using TypeWithSize<N> with incorrect 00057 // values of N. 00058 typedef void UInt; 00059 }; 00060 00061 // The specialization for size 4. 00062 template <> 00063 class TypeWithSize<4> { 00064 public: 00065 // unsigned int has size 4 in both gcc and MSVC. 00066 // 00067 // As base/basictypes.h doesn't compile on Windows, we cannot use 00068 // uint32, uint64, and etc here. 00069 typedef int Int; 00070 typedef unsigned int UInt; 00071 }; 00072 00073 // The specialization for size 8. 00074 template <> 00075 class TypeWithSize<8> { 00076 public: 00077 #if GTEST_OS_WINDOWS 00078 typedef __int64 Int; 00079 typedef unsigned __int64 UInt; 00080 #else 00081 typedef long long Int; // NOLINT 00082 typedef unsigned long long UInt; // NOLINT 00083 #endif // GTEST_OS_WINDOWS 00084 }; 00085 00086 00087 // This template class represents an IEEE floating-point number 00088 // (either single-precision or double-precision, depending on the 00089 // template parameters). 00090 // 00091 // The purpose of this class is to do more sophisticated number 00092 // comparison. (Due to round-off error, etc, it's very unlikely that 00093 // two floating-points will be equal exactly. Hence a naive 00094 // comparison by the == operation often doesn't work.) 00095 // 00096 // Format of IEEE floating-point: 00097 // 00098 // The most-significant bit being the leftmost, an IEEE 00099 // floating-point looks like 00100 // 00101 // sign_bit exponent_bits fraction_bits 00102 // 00103 // Here, sign_bit is a single bit that designates the sign of the 00104 // number. 00105 // 00106 // For float, there are 8 exponent bits and 23 fraction bits. 00107 // 00108 // For double, there are 11 exponent bits and 52 fraction bits. 00109 // 00110 // More details can be found at 00111 // http://en.wikipedia.org/wiki/IEEE_floating-point_standard. 00112 // 00113 // Template parameter: 00114 // 00115 // RawType: the raw floating-point type (either float or double) 00116 template <typename RawType> 00117 class FloatingPoint { 00118 public: 00119 // Defines the unsigned integer type that has the same size as the 00120 // floating point number. 00121 typedef typename TypeWithSize<sizeof(RawType)>::UInt Bits; 00122 00123 // Constants. 00124 00125 // # of bits in a number. 00126 static const size_t kBitCount = 8*sizeof(RawType); 00127 00128 // # of fraction bits in a number. 00129 static const size_t kFractionBitCount = 00130 std::numeric_limits<RawType>::digits - 1; 00131 00132 // # of exponent bits in a number. 00133 static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount; 00134 00135 // The mask for the sign bit. 00136 static const Bits kSignBitMask = static_cast<Bits>(1) << (kBitCount - 1); 00137 00138 // The mask for the fraction bits. 00139 static const Bits kFractionBitMask = 00140 ~static_cast<Bits>(0) >> (kExponentBitCount + 1); 00141 00142 // The mask for the exponent bits. 00143 static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask); 00144 00145 // How many ULP's (Units in the Last Place) we want to tolerate when 00146 // comparing two numbers. The larger the value, the more error we 00147 // allow. A 0 value means that two numbers must be exactly the same 00148 // to be considered equal. 00149 // 00150 // The maximum error of a single floating-point operation is 0.5 00151 // units in the last place. On Intel CPU's, all floating-point 00152 // calculations are done with 80-bit precision, while double has 64 00153 // bits. Therefore, 4 should be enough for ordinary use. 00154 // 00155 // See the following article for more details on ULP: 00156 // http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm. 00157 static const size_t kMaxUlps = 4; 00158 00159 // Constructs a FloatingPoint from a raw floating-point number. 00160 // 00161 // On an Intel CPU, passing a non-normalized NAN (Not a Number) 00162 // around may change its bits, although the new value is guaranteed 00163 // to be also a NAN. Therefore, don't expect this constructor to 00164 // preserve the bits in x when x is a NAN. 00165 explicit FloatingPoint(const RawType& x) { u_.value_ = x; } 00166 00167 // Static methods 00168 00169 // Reinterprets a bit pattern as a floating-point number. 00170 // 00171 // This function is needed to test the AlmostEquals() method. 00172 static RawType ReinterpretBits(const Bits bits) { 00173 FloatingPoint fp(0); 00174 fp.u_.bits_ = bits; 00175 return fp.u_.value_; 00176 } 00177 00178 // Returns the floating-point number that represent positive infinity. 00179 static RawType Infinity() { 00180 return ReinterpretBits(kExponentBitMask); 00181 } 00182 00183 // Non-static methods 00184 00185 // Returns the bits that represents this number. 00186 const Bits &bits() const { return u_.bits_; } 00187 00188 // Returns the exponent bits of this number. 00189 Bits exponent_bits() const { return kExponentBitMask & u_.bits_; } 00190 00191 // Returns the fraction bits of this number. 00192 Bits fraction_bits() const { return kFractionBitMask & u_.bits_; } 00193 00194 // Returns the sign bit of this number. 00195 Bits sign_bit() const { return kSignBitMask & u_.bits_; } 00196 00197 // Returns true iff this is NAN (not a number). 00198 bool is_nan() const { 00199 // It's a NAN if the exponent bits are all ones and the fraction 00200 // bits are not entirely zeros. 00201 return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0); 00202 } 00203 00204 // Returns true iff this number is at most kMaxUlps ULP's away from 00205 // rhs. In particular, this function: 00206 // 00207 // - returns false if either number is (or both are) NAN. 00208 // - treats really large numbers as almost equal to infinity. 00209 // - thinks +0.0 and -0.0 are 0 DLP's apart. 00210 bool AlmostEquals(const FloatingPoint& rhs) const { 00211 // The IEEE standard says that any comparison operation involving 00212 // a NAN must return false. 00213 if (is_nan() || rhs.is_nan()) return false; 00214 00215 return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_) 00216 <= kMaxUlps; 00217 } 00218 00219 private: 00220 // The data type used to store the actual floating-point number. 00221 union FloatingPointUnion { 00222 RawType value_; // The raw floating-point number. 00223 Bits bits_; // The bits that represent the number. 00224 }; 00225 00226 // Converts an integer from the sign-and-magnitude representation to 00227 // the biased representation. More precisely, let N be 2 to the 00228 // power of (kBitCount - 1), an integer x is represented by the 00229 // unsigned number x + N. 00230 // 00231 // For instance, 00232 // 00233 // -N + 1 (the most negative number representable using 00234 // sign-and-magnitude) is represented by 1; 00235 // 0 is represented by N; and 00236 // N - 1 (the biggest number representable using 00237 // sign-and-magnitude) is represented by 2N - 1. 00238 // 00239 // Read http://en.wikipedia.org/wiki/Signed_number_representations 00240 // for more details on signed number representations. 00241 static Bits SignAndMagnitudeToBiased(const Bits &sam) { 00242 if (kSignBitMask & sam) { 00243 // sam represents a negative number. 00244 return ~sam + 1; 00245 } else { 00246 // sam represents a positive number. 00247 return kSignBitMask | sam; 00248 } 00249 } 00250 00251 // Given two numbers in the sign-and-magnitude representation, 00252 // returns the distance between them as an unsigned number. 00253 static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1, 00254 const Bits &sam2) { 00255 const Bits biased1 = SignAndMagnitudeToBiased(sam1); 00256 const Bits biased2 = SignAndMagnitudeToBiased(sam2); 00257 return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1); 00258 } 00259 00260 FloatingPointUnion u_; 00261 }; 00262 00263 #endif // __STDAIR_BAS_FLOAT_UTILS_GOOGLE_HPP