// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "big-radix-floating-point.h" #include "binary-floating-point.h" #include "decimal.h" #include "../common/bit-population-count.h" #include "../common/leading-zero-bit-count.h" #include #include #include namespace Fortran::decimal { template bool BigRadixFloatingPointNumber::ParseNumber( const char *&p, bool &inexact) { SetToZero(); while (*p == ' ') { ++p; } const char *q{p}; isNegative_ = *q == '-'; if (*q == '-' || *q == '+') { ++q; } const char *start{q}; while (*q == '0') { ++q; } const char *first{q}; for (; *q >= '0' && *q <= '9'; ++q) { } const char *point{nullptr}; if (*q == '.') { point = q; for (++q; *q >= '0' && *q <= '9'; ++q) { } } if (q == start || (q == start + 1 && *start == '.')) { return false; // require at least one digit } // There's a valid number here; set the reference argument to point to // the first character afterward. p = q; // Strip off trailing zeroes if (point != nullptr) { while (q[-1] == '0') { --q; } if (q[-1] == '.') { point = nullptr; --q; } } if (point == nullptr) { while (q > first && q[-1] == '0') { --q; ++exponent_; } } // Trim any excess digits const char *limit{first + maxDigits * log10Radix + (point != nullptr)}; if (q > limit) { inexact = true; if (point >= limit) { q = point; point = nullptr; } if (point == nullptr) { exponent_ += q - limit; } q = limit; } if (point != nullptr) { exponent_ -= static_cast(q - point - 1); } if (q == first) { exponent_ = 0; // all zeros } // Rack the decimal digits up into big Digits. for (auto times{radix}; q-- > first;) { if (*q != '.') { if (times == radix) { digit_[digits_++] = *q - '0'; times = 10; } else { digit_[digits_ - 1] += times * (*q - '0'); times *= 10; } } } // Look for an optional exponent field. q = p; switch (*q) { case 'e': case 'E': case 'd': case 'D': case 'q': case 'Q': { bool negExpo{*++q == '-'}; if (*q == '-' || *q == '+') { ++q; } if (*q >= '0' && *q <= '9') { int expo{0}; while (*q == '0') { ++q; } const char *expDig{q}; while (*q >= '0' && *q <= '9') { expo = 10 * expo + *q++ - '0'; } if (q >= expDig + 8) { // There's a ridiculous number of nonzero exponent digits. // The decimal->binary conversion routine will cope with // returning 0 or Inf, but we must ensure that "expo" didn't // overflow back around to something legal. expo = 10 * Real::RANGE; exponent_ = 0; } p = q; // exponent was valid if (negExpo) { exponent_ -= expo; } else { exponent_ += expo; } } } break; default: break; } return true; } // This local utility class represents an unrounded nonnegative // binary floating-point value with an unbiased (i.e., signed) // binary exponent, an integer value (not a fraction) with an implied // binary point to its *right*, and some guard bits for rounding. template class IntermediateFloat { public: static constexpr int precision{PREC}; using IntType = HostUnsignedIntType; static constexpr IntType topBit{IntType{1} << (precision - 1)}; static constexpr IntType mask{topBit + (topBit - 1)}; IntermediateFloat() {} IntermediateFloat(const IntermediateFloat &) = default; // Assumes that exponent_ is valid on entry, and may increment it. // Returns the number of guard_ bits that have been determined. template bool SetTo(UINT n) { static constexpr int nBits{CHAR_BIT * sizeof n}; if constexpr (precision >= nBits) { value_ = n; guard_ = 0; return 0; } else { int shift{common::BitsNeededFor(n) - precision}; if (shift <= 0) { value_ = n; guard_ = 0; return 0; } else { value_ = n >> shift; exponent_ += shift; n <<= nBits - shift; guard_ = (n >> (nBits - guardBits)) | ((n << guardBits) != 0); return shift; } } } void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; } bool IsFull() const { return value_ >= topBit; } void AdjustExponent(int by) { exponent_ += by; } void SetGuard(int g) { guard_ |= (static_cast(g & 6) << (guardBits - 3)) | (g & 1); } ConversionToBinaryResult ToBinary( bool isNegative, FortranRounding) const; private: static constexpr int guardBits{3}; // guard, round, sticky using GuardType = int; static constexpr GuardType oneHalf{GuardType{1} << (guardBits - 1)}; IntType value_{0}; GuardType guard_{0}; int exponent_{0}; }; template ConversionToBinaryResult IntermediateFloat::ToBinary( bool isNegative, FortranRounding rounding) const { using Binary = BinaryFloatingPointNumber; // Create a fraction with a binary point to the left of the integer // value_, and bias the exponent. IntType fraction{value_}; GuardType guard{guard_}; int expo{exponent_ + Binary::exponentBias + (precision - 1)}; while (expo < 1 && (fraction > 0 || guard > oneHalf)) { guard = (guard & 1) | (guard >> 1) | ((static_cast(fraction) & 1) << (guardBits - 1)); fraction >>= 1; ++expo; } int flags{Exact}; if (guard != 0) { flags |= Inexact; } if (fraction == 0 && guard <= oneHalf) { return {Binary{}, static_cast(flags)}; } // The value is nonzero; normalize it. while (fraction < topBit && expo > 1) { --expo; fraction = 2 * fraction + (guard >> (guardBits - 2)); guard = (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1); } // Apply rounding bool incr{false}; switch (rounding) { case RoundNearest: case RoundDefault: incr = guard > oneHalf || (guard == oneHalf && (fraction & 1)); break; case RoundUp: incr = guard != 0 && !isNegative; break; case RoundDown: incr = guard != 0 && isNegative; break; case RoundToZero: break; case RoundCompatible: incr = guard >= oneHalf; break; } if (incr) { if (fraction == mask) { // rounding causes a carry ++expo; fraction = topBit; } else { ++fraction; } } if (expo == 1 && fraction < topBit) { expo = 0; // subnormal } if (expo >= Binary::maxExponent) { expo = Binary::maxExponent; // Inf flags |= Overflow; fraction = 0; } using Raw = typename Binary::RawType; Raw raw = static_cast(isNegative) << (Binary::bits - 1); raw |= static_cast(expo) << Binary::significandBits; if constexpr (Binary::implicitMSB) { fraction &= ~topBit; } raw |= fraction; return {Binary(raw), static_cast(flags)}; } template ConversionToBinaryResult BigRadixFloatingPointNumber::ConvertToBinary() { // On entry, *this holds a multi-precision integer value in a radix of a // large power of ten. Its radix point is defined to be to the right of its // digits, and "exponent_" is the power of ten by which it is to be scaled. Normalize(); if (digits_ == 0) { // zero value return {Real{SignBit()}}; } // The value is not zero: x = D. * 10.**E // Shift our perspective on the radix (& decimal) point so that // it sits to the *left* of the digits: i.e., x = .D * 10.**E exponent_ += digits_ * log10Radix; // Sanity checks for ridiculous exponents static constexpr int crazy{2 * Real::RANGE + log10Radix}; if (exponent_ < -crazy) { // underflow to +/-0. return {Real{SignBit()}, Inexact}; } else if (exponent_ > crazy) { // overflow to +/-Inf. return {Real{Infinity()}, Overflow}; } // Apply any negative decimal exponent by multiplication // by a power of two, adjusting the binary exponent to compensate. IntermediateFloat f; while (exponent_ < log10Radix) { // x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9) f.AdjustExponent(-9); digitLimit_ = digits_; if (int carry{MultiplyWithoutNormalization<512>()}) { // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) PushCarry(carry); exponent_ += log10Radix; } } // Apply any positive decimal exponent greater than // is needed to treat the topmost digit as an integer // part by multiplying by 10 or 10000 repeatedly. while (exponent_ > log10Radix) { digitLimit_ = digits_; int carry; if (exponent_ >= log10Radix + 4) { // x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4) exponent_ -= 4; carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>(); f.AdjustExponent(4); } else { // x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1) --exponent_; carry = MultiplyWithoutNormalization<5>(); f.AdjustExponent(1); } if (carry != 0) { // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) PushCarry(carry); exponent_ += log10Radix; } } // So exponent_ is now log10Radix, meaning that the // MSD can be taken as an integer part and transferred // to the binary result. // x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex) int guardShift{f.SetTo(digit_[--digits_])}; // Transfer additional bits until the result is normal. digitLimit_ = digits_; while (!f.IsFull()) { // x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1) f.AdjustExponent(-1); std::uint32_t carry = MultiplyWithoutNormalization<2>(); f.ShiftIn(carry); } // Get the next few bits for rounding. Allow for some guard bits // that may have already been set in f.SetTo() above. int guard{0}; if (guardShift == 0) { guard = MultiplyWithoutNormalization<4>(); } else if (guardShift == 1) { guard = MultiplyWithoutNormalization<2>(); } guard = guard + guard + !IsZero(); f.SetGuard(guard); return f.ToBinary(isNegative_, rounding_); } template ConversionToBinaryResult BigRadixFloatingPointNumber::ConvertToBinary(const char *&p) { bool inexact{false}; if (ParseNumber(p, inexact)) { auto result{ConvertToBinary()}; if (inexact) { result.flags = static_cast(result.flags | Inexact); } return result; } else { // Could not parse a decimal floating-point number. p has been // advanced over any leading spaces. if (toupper(p[0]) == 'N' && toupper(p[1]) == 'A' && toupper(p[2]) == 'N') { // NaN p += 3; return {Real{NaN()}}; } else { // Try to parse Inf, maybe with a sign const char *q{p}; isNegative_ = *q == '-'; if (*q == '-' || *q == '+') { ++q; } if (toupper(q[0]) == 'I' && toupper(q[1]) == 'N' && toupper(q[2]) == 'F') { p = q + 3; return {Real{Infinity()}}; } else { // Invalid input return {Real{NaN()}, Invalid}; } } } } template ConversionToBinaryResult ConvertToBinary( const char *&p, enum FortranRounding rounding) { return BigRadixFloatingPointNumber{rounding}.ConvertToBinary(p); } template ConversionToBinaryResult<8> ConvertToBinary<8>( const char *&, enum FortranRounding); template ConversionToBinaryResult<11> ConvertToBinary<11>( const char *&, enum FortranRounding); template ConversionToBinaryResult<24> ConvertToBinary<24>( const char *&, enum FortranRounding); template ConversionToBinaryResult<53> ConvertToBinary<53>( const char *&, enum FortranRounding); template ConversionToBinaryResult<64> ConvertToBinary<64>( const char *&, enum FortranRounding); template ConversionToBinaryResult<112> ConvertToBinary<112>( const char *&, enum FortranRounding); extern "C" { enum ConversionResultFlags ConvertDecimalToFloat( const char **p, float *f, enum FortranRounding rounding) { auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)}; std::memcpy(reinterpret_cast(f), reinterpret_cast(&result.binary), sizeof *f); return result.flags; } enum ConversionResultFlags ConvertDecimalToDouble( const char **p, double *d, enum FortranRounding rounding) { auto result{Fortran::decimal::ConvertToBinary<53>(*p, rounding)}; std::memcpy(reinterpret_cast(d), reinterpret_cast(&result.binary), sizeof *d); return result.flags; } #if __x86_64__ enum ConversionResultFlags ConvertDecimalToLongDouble( const char **p, long double *ld, enum FortranRounding rounding) { auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)}; std::memcpy(reinterpret_cast(ld), reinterpret_cast(&result.binary), sizeof *ld); return result.flags; } #endif } }