Skip to content

Commit f494131

Browse files
committed
[APFloat] Fix getExactInverse for DoubleAPFloat
Some background: getExactInverse()'s callers expect that the result is not subnormal. DoubleAPFloat implemented getExactInverse() by going through semPPCDoubleDoubleLegacy. This means that numbers like 0x1p1022 which would have a normal inverse in semPPCDoubleDouble would not in semPPCDoubleDoubleLegacy. This commit refactors the logic into a single method on APFloat which uses getExactLog2Abs() and scalbn() to calculate the inverse without having to compute a reciprocal and test if it is inexact. This approach works for both IEEEFloat and DoubleAPFloat.
1 parent 4db8b64 commit f494131

File tree

3 files changed

+66
-52
lines changed

3 files changed

+66
-52
lines changed

llvm/include/llvm/ADT/APFloat.h

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -605,10 +605,6 @@ class IEEEFloat final {
605605
unsigned FormatMaxPadding = 3,
606606
bool TruncateZero = true) const;
607607

608-
/// If this value has an exact multiplicative inverse, store it in inv and
609-
/// return true.
610-
LLVM_ABI bool getExactInverse(APFloat *inv) const;
611-
612608
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const;
613609

614610
LLVM_ABI friend int ilogb(const IEEEFloat &Arg);
@@ -886,8 +882,6 @@ class DoubleAPFloat final {
886882
unsigned FormatMaxPadding,
887883
bool TruncateZero = true) const;
888884

889-
LLVM_ABI bool getExactInverse(APFloat *inv) const;
890-
891885
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const;
892886

893887
LLVM_ABI friend int ilogb(const DoubleAPFloat &X);
@@ -1500,9 +1494,9 @@ class APFloat : public APFloatBase {
15001494
LLVM_DUMP_METHOD void dump() const;
15011495
#endif
15021496

1503-
bool getExactInverse(APFloat *inv) const {
1504-
APFLOAT_DISPATCH_ON_SEMANTICS(getExactInverse(inv));
1505-
}
1497+
/// If this value has an exact, normal, multiplicative inverse, store it in
1498+
/// inv and return true.
1499+
bool getExactInverse(APFloat *Inv) const;
15061500

15071501
// If this is an exact power of two, return the exponent while ignoring the
15081502
// sign bit. If it's not an exact power of 2, return INT_MIN

llvm/lib/Support/APFloat.cpp

Lines changed: 52 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -4575,35 +4575,6 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
45754575

45764576
}
45774577

4578-
bool IEEEFloat::getExactInverse(APFloat *inv) const {
4579-
// Special floats and denormals have no exact inverse.
4580-
if (!isFiniteNonZero())
4581-
return false;
4582-
4583-
// Check that the number is a power of two by making sure that only the
4584-
// integer bit is set in the significand.
4585-
if (significandLSB() != semantics->precision - 1)
4586-
return false;
4587-
4588-
// Get the inverse.
4589-
IEEEFloat reciprocal(*semantics, 1ULL);
4590-
if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4591-
return false;
4592-
4593-
// Avoid multiplication with a denormal, it is not safe on all platforms and
4594-
// may be slower than a normal division.
4595-
if (reciprocal.isDenormal())
4596-
return false;
4597-
4598-
assert(reciprocal.isFiniteNonZero() &&
4599-
reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4600-
4601-
if (inv)
4602-
*inv = APFloat(reciprocal, *semantics);
4603-
4604-
return true;
4605-
}
4606-
46074578
int IEEEFloat::getExactLog2Abs() const {
46084579
if (!isFinite() || isZero())
46094580
return INT_MIN;
@@ -5731,17 +5702,6 @@ void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
57315702
.toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
57325703
}
57335704

5734-
bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
5735-
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5736-
APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5737-
if (!inv)
5738-
return Tmp.getExactInverse(nullptr);
5739-
APFloat Inv(semPPCDoubleDoubleLegacy);
5740-
auto Ret = Tmp.getExactInverse(&Inv);
5741-
*inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
5742-
return Ret;
5743-
}
5744-
57455705
int DoubleAPFloat::getExactLog2Abs() const {
57465706
// In order for Hi + Lo to be a power of two, the following must be true:
57475707
// 1. Hi must be a power of two.
@@ -5926,6 +5886,58 @@ FPClassTest APFloat::classify() const {
59265886
return isSignaling() ? fcSNan : fcQNan;
59275887
}
59285888

5889+
bool APFloat::getExactInverse(APFloat *Inv) const {
5890+
// Only finite, non-zero numbers can have a useful, representable inverse.
5891+
// This check filters out +/- zero, +/- infinity, and NaN.
5892+
if (!isFiniteNonZero())
5893+
return false;
5894+
5895+
// A number has an exact, representable inverse if and only if it is a power
5896+
// of two.
5897+
//
5898+
// Mathematical Rationale:
5899+
// 1. A binary floating-point number x is a dyadic rational, meaning it can
5900+
// be written as x = M / 2^k for integers M (the significand) and k.
5901+
// 2. The inverse is 1/x = 2^k / M.
5902+
// 3. For 1/x to also be a dyadic rational (and thus exactly representable
5903+
// in binary), its denominator M must also be a power of two.
5904+
// Let's say M = 2^m.
5905+
// 4. Substituting this back into the formula for x, we get
5906+
// x = (2^m) / (2^k) = 2^(m-k).
5907+
//
5908+
// This proves that x must be a power of two.
5909+
5910+
// getExactLog2Abs() returns the integer exponent if the number is a power of
5911+
// two or INT_MIN if it is not.
5912+
const int Exp = getExactLog2Abs();
5913+
if (Exp == INT_MIN)
5914+
return false;
5915+
5916+
// The inverse of +/- 2^Exp is +/- 2^(-Exp). We can compute this by
5917+
// scaling 1.0 by the negated exponent.
5918+
APFloat Reciprocal =
5919+
scalbn(APFloat::getOne(getSemantics(), /*Negative=*/isNegative()), -Exp,
5920+
rmTowardZero);
5921+
5922+
// scalbn might round if the resulting exponent -Exp is outside the
5923+
// representable range, causing overflow (to infinity) or underflow. We
5924+
// must verify that the result is still the exact power of two we expect.
5925+
if (Reciprocal.getExactLog2Abs() != -Exp)
5926+
return false;
5927+
5928+
// Avoid multiplication with a subnormal, it is not safe on all platforms and
5929+
// may be slower than a normal division.
5930+
if (Reciprocal.isDenormal())
5931+
return false;
5932+
5933+
assert(Reciprocal.isFiniteNonZero());
5934+
5935+
if (Inv)
5936+
*Inv = std::move(Reciprocal);
5937+
5938+
return true;
5939+
}
5940+
59295941
APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
59305942
roundingMode RM, bool *losesInfo) {
59315943
if (&getSemantics() == &ToSemantics) {

llvm/unittests/ADT/APFloatTest.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1918,6 +1918,15 @@ TEST(APFloatTest, exactInverse) {
19181918
EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(APFloat::PPCDoubleDouble(), "0.5")));
19191919
EXPECT_TRUE(APFloat(APFloat::x87DoubleExtended(), "2.0").getExactInverse(&inv));
19201920
EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(APFloat::x87DoubleExtended(), "0.5")));
1921+
// 0x1p1022 has a normal inverse for IEEE 754 binary64: 0x1p-1022.
1922+
EXPECT_TRUE(APFloat(0x1p1022).getExactInverse(&inv));
1923+
EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(0x1p-1022)));
1924+
// With regards to getExactInverse, IEEEdouble and PPCDoubleDouble should
1925+
// behave the same.
1926+
EXPECT_TRUE(
1927+
APFloat(APFloat::PPCDoubleDouble(), "0x1p1022").getExactInverse(&inv));
1928+
EXPECT_TRUE(
1929+
inv.bitwiseIsEqual(APFloat(APFloat::PPCDoubleDouble(), "0x1p-1022")));
19211930

19221931
// FLT_MIN
19231932
EXPECT_TRUE(APFloat(1.17549435e-38f).getExactInverse(&inv));
@@ -6661,13 +6670,12 @@ TEST_P(PPCDoubleDoubleFrexpValueTest, PPCDoubleDoubleFrexp) {
66616670

66626671
int ActualExponent;
66636672
const APFloat ActualFraction = frexp(Input, ActualExponent, RM);
6664-
if (ExpectedFraction.isNaN()) {
6673+
if (ExpectedFraction.isNaN())
66656674
EXPECT_TRUE(ActualFraction.isNaN());
6666-
} else {
6675+
else
66676676
EXPECT_EQ(ActualFraction.compare(ExpectedFraction), APFloat::cmpEqual)
66686677
<< ActualFraction << " vs " << ExpectedFraction << " for input "
66696678
<< Params.Input.Hi << " + " << Params.Input.Lo << " RM " << RM;
6670-
}
66716679
EXPECT_EQ(ActualExponent, Expected.Exponent)
66726680
<< "for input " << Params.Input.Hi << " + " << Params.Input.Lo
66736681
<< " RM " << RM;

0 commit comments

Comments
 (0)