llvm/flang/lib/evaluate/real.h

608 lines
20 KiB
C
Raw Normal View History

// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef FORTRAN_EVALUATE_REAL_H_
#define FORTRAN_EVALUATE_REAL_H_
#include "common.h"
#include "integer.h"
#include <cinttypes>
#include <limits>
namespace Fortran::evaluate {
// Models IEEE-754 floating-point numbers. The first argument to this
// class template must be (or look like) an instance of Integer.
template<typename WORD, int PRECISION, bool IMPLICIT_MSB = true> class Real {
public:
using Word = WORD;
static constexpr int bits{Word::bits};
static constexpr int precision{PRECISION};
static constexpr bool implicitMSB{IMPLICIT_MSB};
static constexpr int significandBits{precision - implicitMSB};
static constexpr int exponentBits{bits - significandBits - 1 /*sign*/};
static_assert(precision > 0);
static_assert(exponentBits > 1);
static constexpr std::uint64_t maxExponent{(1 << exponentBits) - 1};
static constexpr std::uint64_t exponentBias{maxExponent / 2};
constexpr Real() {} // +0.0
constexpr Real(const Real &) = default;
constexpr Real(const Word &bits) : word_{bits} {}
constexpr Real &operator=(const Real &) = default;
template<typename INT>
static constexpr ValueWithRealFlags<Real> ConvertSigned(
const INT &n, Rounding rounding = Rounding::TiesToEven) {
bool isNegative{n.IsNegative()};
INT absN{n};
if (isNegative) {
absN = n.Negate().value; // overflow is safe to ignore
}
int leadz{absN.LEADZ()};
if (leadz >= absN.bits) {
return {}; // all bits zero -> +0.0
}
ValueWithRealFlags<Real> result;
std::uint64_t exponent{exponentBias + absN.bits - leadz - 1};
int bitsNeeded{absN.bits - (leadz + implicitMSB)};
int bitsLost{bitsNeeded - significandBits};
if (bitsLost <= 0) {
Fraction fraction{Fraction::Convert(absN).value};
result.flags |= result.value.Normalize(
isNegative, exponent, fraction.SHIFTL(-bitsLost));
} else {
Fraction fraction{Fraction::Convert(absN.SHIFTR(bitsLost)).value};
result.flags |= result.value.Normalize(isNegative, exponent, fraction);
RoundingBits roundingBits{absN, bitsLost};
result.flags |= result.value.Round(rounding, roundingBits);
}
return result;
}
// TODO conversion from (or to?) (other) real types
// TODO AINT/ANINT, CEILING, FLOOR, DIM, MAX, MIN, DPROD, FRACTION
// HUGE, INT/NINT, MAXEXPONENT, MINEXPONENT, NEAREST, OUT_OF_RANGE,
// PRECISION, HUGE, TINY, RRSPACING/SPACING, SCALE, SET_EXPONENT, SIGN
constexpr Word RawBits() const { return word_; }
constexpr std::uint64_t Exponent() const {
return word_.IBITS(significandBits, exponentBits).ToUInt64();
}
constexpr bool IsNegative() const {
return !IsNotANumber() && word_.BTEST(bits - 1);
}
constexpr bool IsNotANumber() const {
return Exponent() == maxExponent && !GetSignificand().IsZero();
}
constexpr bool IsInfinite() const {
return Exponent() == maxExponent && GetSignificand().IsZero();
}
constexpr bool IsZero() const {
return Exponent() == 0 && GetSignificand().IsZero();
}
constexpr bool IsDenormal() const {
return Exponent() == 0 && !GetSignificand().IsZero();
}
constexpr Real ABS() const { // non-arithmetic, no flags returned
Real result;
result.word_ = word_.IBCLR(bits - 1);
return result;
}
constexpr Real Negate() const {
Real result;
result.word_ = word_.IEOR(word_.MASKL(1));
return result;
}
constexpr DefaultIntrinsicInteger EXPONENT() const {
std::uint64_t exponent{Exponent()};
if (exponent == maxExponent) {
return DefaultIntrinsicInteger::HUGE();
} else {
return {static_cast<std::int64_t>(exponent - exponentBias)};
}
}
static constexpr Real EPSILON() {
Real epsilon;
epsilon.Normalize(false, exponentBias - precision, Fraction::MASKL(1));
return epsilon;
}
template<typename INT> constexpr ValueWithRealFlags<INT> ToInteger() const {
bool isNegative{IsNegative()};
std::uint64_t exponent{Exponent()};
Fraction fraction{GetFraction()};
ValueWithRealFlags<INT> result;
if (exponent == maxExponent && !fraction.IsZero()) { // NaN
result.flags.set(RealFlag::InvalidArgument);
result.value = result.value.HUGE();
} else if (exponent >= maxExponent || // +/-Inf
exponent >= exponentBias + result.value.bits) { // too big
if (isNegative) {
result.value = result.value.MASKL(1);
} else {
result.value = result.value.HUGE();
}
result.flags.set(RealFlag::Overflow);
} else if (exponent < exponentBias) { // |x| < 1.0 -> 0
if (!fraction.IsZero()) {
result.flags.set(RealFlag::Underflow);
result.flags.set(RealFlag::Inexact);
}
} else {
// finite number |x| >= 1.0
constexpr std::uint64_t noShiftExponent{exponentBias + precision - 1};
if (exponent < noShiftExponent) {
int rshift = noShiftExponent - exponent;
if (!fraction.IBITS(0, rshift).IsZero()) {
result.flags.set(RealFlag::Inexact);
}
auto truncated = result.value.Convert(fraction.SHIFTR(rshift));
if (truncated.overflow) {
result.flags.set(RealFlag::Overflow);
} else {
result.value = truncated.value;
}
} else {
int lshift = exponent - noShiftExponent;
if (lshift + precision >= result.value.bits) {
result.flags.set(RealFlag::Overflow);
} else {
result.value = result.value.Convert(fraction).value.SHIFTL(lshift);
}
}
if (result.flags.test(RealFlag::Overflow)) {
result.value = result.value.HUGE();
} else if (isNegative) {
auto negated = result.value.Negate();
if (negated.overflow) {
result.flags.set(RealFlag::Overflow);
result.value = result.value.HUGE();
} else {
result.value = negated.value;
}
}
}
return result;
}
constexpr Relation Compare(const Real &y) const {
if (IsNotANumber() || y.IsNotANumber()) { // NaN vs x, x vs NaN
return Relation::Unordered;
} else if (IsInfinite()) {
if (y.IsInfinite()) {
if (IsNegative()) { // -Inf vs +/-Inf
return y.IsNegative() ? Relation::Equal : Relation::Less;
} else { // +Inf vs +/-Inf
return y.IsNegative() ? Relation::Greater : Relation::Equal;
}
} else { // +/-Inf vs finite
return IsNegative() ? Relation::Less : Relation::Greater;
}
} else if (y.IsInfinite()) { // finite vs +/-Inf
return y.IsNegative() ? Relation::Greater : Relation::Less;
} else { // two finite numbers
bool isNegative{IsNegative()};
if (isNegative != y.IsNegative()) {
if (word_.IOR(y.word_).IBCLR(bits - 1).IsZero()) {
return Relation::Equal; // +/-0.0 == -/+0.0
} else {
return isNegative ? Relation::Less : Relation::Greater;
}
} else {
// same sign
Ordering order{CompareUnsigned(Exponent(), y.Exponent())};
if (order == Ordering::Equal) {
order = GetSignificand().CompareUnsigned(y.GetSignificand());
}
if (isNegative) {
order = Reverse(order);
}
return RelationFromOrdering(order);
}
}
}
constexpr ValueWithRealFlags<Real> Add(
const Real &y, Rounding rounding = Rounding::TiesToEven) const {
ValueWithRealFlags<Real> result;
if (IsNotANumber() || y.IsNotANumber()) {
result.value.word_ = NaNWord(); // NaN + x -> NaN
result.flags.set(RealFlag::InvalidArgument);
return result;
}
bool isNegative{IsNegative()};
bool yIsNegative{y.IsNegative()};
if (IsInfinite()) {
if (y.IsInfinite()) {
if (isNegative == yIsNegative) {
result.value = *this; // +/-Inf + +/-Inf -> +/-Inf
} else {
result.value.word_ = NaNWord(); // +/-Inf + -/+Inf -> NaN
result.flags.set(RealFlag::InvalidArgument);
}
} else {
result.value = *this; // +/-Inf + x -> +/-Inf
}
return result;
}
if (y.IsInfinite()) {
result.value = y; // x + +/-Inf -> +/-Inf
return result;
}
std::uint64_t exponent{Exponent()};
std::uint64_t yExponent{y.Exponent()};
if (exponent < yExponent) {
// y is larger in magnitude; simplify by reversing operands
return y.Add(*this, rounding);
}
if (exponent == yExponent && isNegative != yIsNegative) {
Ordering order{GetSignificand().CompareUnsigned(y.GetSignificand())};
if (order == Ordering::Less) {
// Same exponent, opposite signs, and y is larger in magnitude
return y.Add(*this, rounding);
}
if (order == Ordering::Equal) {
// x + (-x) -> +0.0, never -0.0
return {};
}
}
// Our exponent is greater than y's, or the exponents match and y is not
// of the opposite sign and greater magnitude. So (x+y) will have the
// same sign as x.
Fraction yFraction{y.GetFraction()};
Fraction fraction{GetFraction()};
int rshift = exponent - yExponent;
if (exponent > 0 && yExponent == 0) {
--rshift; // correct overshift when only y is denormal
}
RoundingBits roundingBits{yFraction, rshift};
yFraction = yFraction.SHIFTR(rshift);
bool carry{false};
if (isNegative != yIsNegative) {
// Opposite signs: subtract via addition of two's complement of y and
// the rounding bits.
yFraction = yFraction.NOT();
carry = roundingBits.Negate();
}
auto sum = fraction.AddUnsigned(yFraction, carry);
fraction = sum.value;
if (isNegative == yIsNegative && sum.carry) {
roundingBits.ShiftRight(sum.value.BTEST(0));
fraction = fraction.SHIFTR(1).IBSET(fraction.bits - 1);
++exponent;
}
result.flags |=
result.value.Normalize(isNegative, exponent, fraction, &roundingBits);
result.flags |= result.value.Round(rounding, roundingBits);
return result;
}
constexpr ValueWithRealFlags<Real> Subtract(
const Real &y, Rounding rounding = Rounding::TiesToEven) const {
return Add(y.Negate(), rounding);
}
constexpr ValueWithRealFlags<Real> Multiply(
const Real &y, Rounding rounding = Rounding::TiesToEven) const {
ValueWithRealFlags<Real> result;
if (IsNotANumber() || y.IsNotANumber()) {
result.value.word_ = NaNWord(); // NaN * x -> NaN
result.flags.set(RealFlag::InvalidArgument);
} else {
bool isNegative{IsNegative() != y.IsNegative()};
if (IsInfinite() || y.IsInfinite()) {
if (IsZero() || y.IsZero()) {
result.value.word_ = NaNWord(); // 0 * Inf -> NaN
result.flags.set(RealFlag::InvalidArgument);
} else {
result.value.Normalize(isNegative, maxExponent, Fraction{});
}
} else {
auto product = GetFraction().MultiplyUnsigned(y.GetFraction());
std::int64_t exponent = Exponent(), yExponent = y.Exponent();
// A zero exponent field value has the same weight as 1.
exponent += !exponent;
yExponent += !yExponent;
exponent += yExponent;
exponent -= exponentBias;
++exponent;
if (exponent < 1) {
int rshift = 1 - exponent;
exponent = 1;
bool sticky{false};
if (rshift >= product.upper.bits + product.lower.bits) {
sticky = !product.lower.IsZero() || !product.upper.IsZero();
} else if (rshift >= product.lower.bits) {
sticky = !product.lower.IsZero();
} else {
sticky = !product.lower.IAND(product.lower.MASKR(rshift)).IsZero();
}
product.lower = product.lower.DSHIFTR(product.upper, rshift);
product.upper = product.upper.SHIFTR(rshift);
if (sticky) {
product.lower = product.lower.IBSET(0);
}
}
int leadz{product.upper.LEADZ()};
if (leadz >= product.upper.bits) {
leadz += product.lower.LEADZ();
}
int lshift{leadz};
if (lshift > exponent - 1) {
lshift = exponent - 1;
}
exponent -= lshift;
product.upper = product.upper.DSHIFTL(product.lower, lshift);
product.lower = product.lower.SHIFTL(lshift);
RoundingBits roundingBits{product.lower, product.upper.bits};
result.flags |= result.value.Normalize(
isNegative, exponent, product.upper, &roundingBits);
result.flags |= result.value.Round(rounding, roundingBits);
}
}
return result;
}
constexpr ValueWithRealFlags<Real> Divide(
const Real &y, Rounding rounding = Rounding::TiesToEven) const {
ValueWithRealFlags<Real> result;
if (IsNotANumber() || y.IsNotANumber()) {
result.value.word_ = NaNWord(); // NaN / x -> NaN, x / NaN -> NaN
result.flags.set(RealFlag::InvalidArgument);
} else {
bool isNegative{IsNegative() != y.IsNegative()};
if (IsInfinite()) {
if (y.IsInfinite() || y.IsZero()) {
result.value.word_ = NaNWord(); // Inf/Inf -> NaN, Inf/0 -> Nan
result.flags.set(RealFlag::InvalidArgument);
} else {
result.value.Normalize(isNegative, maxExponent, Fraction{});
}
} else if (y.IsInfinite()) {
result.value.word_ = NaNWord(); // x/Inf -> NaN
result.flags.set(RealFlag::InvalidArgument);
} else {
auto qr = GetFraction().DivideUnsigned(y.GetFraction());
if (qr.divisionByZero) {
result.value.Normalize(isNegative, maxExponent, Fraction{});
result.flags.set(RealFlag::DivideByZero);
} else {
// To round, double the remainder and compare it to the divisor.
auto doubled = qr.remainder.AddUnsigned(qr.remainder);
Ordering drcmp{doubled.value.CompareUnsigned(y.GetFraction())};
RoundingBits roundingBits{
drcmp != Ordering::Less, drcmp != Ordering::Equal};
std::uint64_t exponent{Exponent() - y.Exponent() + exponentBias};
result.flags |=
result.value.Normalize(isNegative, exponent, qr.quotient);
result.flags |= result.value.Round(rounding, roundingBits);
}
}
}
return result;
}
private:
using Fraction = Integer<precision>; // all bits made explicit
using Significand = Integer<significandBits>; // no implicit bit
class RoundingBits {
public:
constexpr RoundingBits(
bool guard = false, bool round = false, bool sticky = false)
: guard_{guard}, round_{round}, sticky_{sticky} {}
template<typename FRACTION>
constexpr RoundingBits(const FRACTION &fraction, int rshift) {
if (rshift > 0 && rshift < fraction.bits + 1) {
guard_ = fraction.BTEST(rshift - 1);
}
if (rshift > 1 && rshift < fraction.bits + 2) {
round_ = fraction.BTEST(rshift - 2);
}
if (rshift > 2) {
if (rshift >= fraction.bits + 2) {
sticky_ = !fraction.IsZero();
} else {
auto mask = fraction.MASKR(rshift - 2);
sticky_ = !fraction.IAND(mask).IsZero();
}
}
}
constexpr bool Zero() const { return !(guard_ | round_ | sticky_); }
constexpr bool Negate() {
bool carry{!sticky_};
if (carry) {
carry = !round_;
} else {
round_ = !round_;
}
if (carry) {
carry = !guard_;
} else {
guard_ = !guard_;
}
return carry;
}
constexpr bool ShiftLeft() {
bool oldGuard{guard_};
guard_ = round_;
round_ = sticky_;
return oldGuard;
}
constexpr void ShiftRight(bool newGuard) {
sticky_ |= round_;
round_ = guard_;
guard_ = newGuard;
}
// Determines whether a value should be rounded by increasing its
// fraction, given a rounding mode and a summary of the lost bits.
constexpr bool MustRound(Rounding rounding, const Real &real) const {
bool round{false}; // to dodge bogus g++ warning about missing return
switch (rounding) {
case Rounding::TiesToEven:
round = guard_ && (round_ | sticky_ | real.RawBits().BTEST(0));
break;
case Rounding::ToZero: break;
case Rounding::Down: round = real.IsNegative() && !Zero(); break;
case Rounding::Up: round = !real.IsNegative() && !Zero(); break;
case Rounding::TiesAwayFromZero: round = guard_; break;
}
return round;
}
private:
bool guard_{false};
bool round_{false};
bool sticky_{false};
};
constexpr Significand GetSignificand() const {
return Significand::Convert(word_).value;
}
constexpr Fraction GetFraction() const {
Fraction result{Fraction::Convert(word_).value};
if constexpr (!implicitMSB) {
return result;
} else {
std::uint64_t exponent{Exponent()};
if (exponent > 0 && exponent < maxExponent) {
return result.IBSET(significandBits);
} else {
return result.IBCLR(significandBits);
}
}
}
// TODO: Configurable NaN representations
static constexpr Word NaNWord() {
return Word{maxExponent}
.SHIFTL(significandBits)
.IBSET(significandBits - 1)
.IBSET(significandBits - 2);
}
constexpr RealFlags Normalize(bool negative, std::uint64_t exponent,
const Fraction &fraction, RoundingBits *roundingBits = nullptr) {
if (exponent >= maxExponent) {
word_ = Word{maxExponent}.SHIFTL(significandBits); // Inf
if (negative) {
word_ = word_.IBSET(bits - 1);
}
return {RealFlag::Overflow};
}
if (fraction.BTEST(fraction.bits - 1)) {
// fraction is normalized
word_ = Word::Convert(fraction).value;
if (exponent == 0) {
exponent = 1;
}
} else {
std::uint64_t lshift = fraction.LEADZ();
if (lshift >= fraction.bits) {
// +/-0.0
word_ = Word{};
exponent = 0;
} else {
word_ = Word::Convert(fraction).value;
if (lshift < exponent) {
exponent -= lshift;
} else if (exponent > 0) {
lshift = exponent - 1;
exponent = 0;
} else if (lshift == 0) {
exponent = 1;
} else {
lshift = 0;
}
if (lshift > 0) {
word_ = word_.SHIFTL(lshift);
if (roundingBits != nullptr) {
for (; lshift > 0; --lshift) {
if (roundingBits->ShiftLeft()) {
word_ = word_.IBSET(lshift - 1);
}
}
}
}
}
}
if (implicitMSB) {
word_ = word_.IBCLR(significandBits);
}
word_ = word_.IOR(Word{exponent}.SHIFTL(significandBits));
if (negative) {
word_ = word_.IBSET(bits - 1);
}
return {};
}
// Rounds a result, if necessary.
RealFlags Round(Rounding rounding, const RoundingBits &bits) {
std::uint64_t exponent{Exponent()};
RealFlags flags;
if (!bits.Zero()) {
flags.set(RealFlag::Inexact);
}
if (exponent < maxExponent && bits.MustRound(rounding, *this)) {
typename Fraction::ValueWithCarry sum{
GetFraction().AddUnsigned(Fraction{}, true)};
if (sum.carry) {
// The fraction was all ones before rounding; sum.value is now zero
if (++exponent < maxExponent) {
sum.value = sum.value.IBSET(precision - 1);
} else {
// rounded away to an infinity
flags.set(RealFlag::Overflow);
}
}
flags |= Normalize(IsNegative(), exponent, sum.value);
}
return flags;
}
Word word_{}; // an Integer<>
};
using RealKind2 = Real<Integer<16>, 11>;
extern template class Real<Integer<16>, 11>;
using RealKind4 = Real<Integer<32>, 24>;
extern template class Real<Integer<32>, 24>;
using RealKind8 = Real<Integer<64>, 53>;
extern template class Real<Integer<64>, 53>;
using RealKind10 = Real<Integer<80>, 64, false>; // 80387
extern template class Real<Integer<80>, 64, false>;
using RealKind16 = Real<Integer<128>, 112>;
extern template class Real<Integer<128>, 112>;
// N.B. No "double-double" support.
} // namespace Fortran::evaluate
#endif // FORTRAN_EVALUATE_REAL_H_