Reapply "[APFloat] Fix getExactInverse for DoubleAPFloat"

majnemer · majnemer · commit 0a7eabcc56c6 · 2025-08-20T14:02:36.000-07:00
The previous implementation of getExactInverse used the following check to identify powers of two: // Check that the number is a power of two by making sure that only the // integer bit is set in the significand. if (significandLSB() != semantics->precision - 1) return false; This condition verifies that the only set bit in the significand is the integer bit, which is correct for normal numbers. However, this logic is not correct for subnormal values. APFloat represents subnormal numbers by shifting the significand right while holding the exponent at its minimum value. For a power of two in the subnormal range, its single set bit will therefore be at a position lower than precision - 1. The original check would consequently fail, causing the function to determine that these numbers do not have an exact multiplicative inverse. The new logic calculated this correctly but it seems that test/CodeGen/Thumb2/mve-vcvt-fixed-to-float.ll expected the old behavior. Seeing as how getExactInverse does not have tests or documentation, we conservatively maintain (and document) this behavior. This reverts commit 47e62e8.
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
@@ -605,10 +605,6 @@ class IEEEFloat final {
                          unsigned FormatMaxPadding = 3,
                          bool TruncateZero = true) const;
 
-  /// If this value has an exact multiplicative inverse, store it in inv and
-  /// return true.
-  LLVM_ABI bool getExactInverse(APFloat *inv) const;
-
   LLVM_ABI LLVM_READONLY int getExactLog2Abs() const;
 
   LLVM_ABI friend int ilogb(const IEEEFloat &Arg);
@@ -886,8 +882,6 @@ class DoubleAPFloat final {
                          unsigned FormatMaxPadding,
                          bool TruncateZero = true) const;
 
-  LLVM_ABI bool getExactInverse(APFloat *inv) const;
-
   LLVM_ABI LLVM_READONLY int getExactLog2Abs() const;
 
   LLVM_ABI friend int ilogb(const DoubleAPFloat &X);
@@ -1500,9 +1494,9 @@ class APFloat : public APFloatBase {
   LLVM_DUMP_METHOD void dump() const;
 #endif
 
-  bool getExactInverse(APFloat *inv) const {
-    APFLOAT_DISPATCH_ON_SEMANTICS(getExactInverse(inv));
-  }
+  /// If this value is normal and has an exact, normal, multiplicative inverse,
+  /// store it in inv and return true.
+  bool getExactInverse(APFloat *Inv) const;
 
   // If this is an exact power of two, return the exponent while ignoring the
   // sign bit. If it's not an exact power of 2, return INT_MIN
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
@@ -4575,35 +4575,6 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
 
 }
 
-bool IEEEFloat::getExactInverse(APFloat *inv) const {
-  // Special floats and denormals have no exact inverse.
-  if (!isFiniteNonZero())
-    return false;
-
-  // Check that the number is a power of two by making sure that only the
-  // integer bit is set in the significand.
-  if (significandLSB() != semantics->precision - 1)
-    return false;
-
-  // Get the inverse.
-  IEEEFloat reciprocal(*semantics, 1ULL);
-  if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
-    return false;
-
-  // Avoid multiplication with a denormal, it is not safe on all platforms and
-  // may be slower than a normal division.
-  if (reciprocal.isDenormal())
-    return false;
-
-  assert(reciprocal.isFiniteNonZero() &&
-         reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
-
-  if (inv)
-    *inv = APFloat(reciprocal, *semantics);
-
-  return true;
-}
-
 int IEEEFloat::getExactLog2Abs() const {
   if (!isFinite() || isZero())
     return INT_MIN;
@@ -5731,17 +5702,6 @@ void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
       .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
 }
 
-bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
-  assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
-  APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
-  if (!inv)
-    return Tmp.getExactInverse(nullptr);
-  APFloat Inv(semPPCDoubleDoubleLegacy);
-  auto Ret = Tmp.getExactInverse(&Inv);
-  *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
-  return Ret;
-}
-
 int DoubleAPFloat::getExactLog2Abs() const {
   // In order for Hi + Lo to be a power of two, the following must be true:
   // 1. Hi must be a power of two.
@@ -5926,6 +5886,64 @@ FPClassTest APFloat::classify() const {
   return isSignaling() ? fcSNan : fcQNan;
 }
 
+bool APFloat::getExactInverse(APFloat *Inv) const {
+  // Only finite, non-zero numbers can have a useful, representable inverse.
+  // This check filters out +/- zero, +/- infinity, and NaN.
+  if (!isFiniteNonZero())
+    return false;
+
+  // Historically, this function rejects subnormal inputs.  One reason why this
+  // might be important is that subnormals may behave differently under FTZ/DAZ
+  // runtime behavior.
+  if (isDenormal())
+    return false;
+
+  // A number has an exact, representable inverse if and only if it is a power
+  // of two.
+  //
+  // Mathematical Rationale:
+  // 1. A binary floating-point number x is a dyadic rational, meaning it can
+  //    be written as x = M / 2^k for integers M (the significand) and k.
+  // 2. The inverse is 1/x = 2^k / M.
+  // 3. For 1/x to also be a dyadic rational (and thus exactly representable
+  //    in binary), its denominator M must also be a power of two.
+  //    Let's say M = 2^m.
+  // 4. Substituting this back into the formula for x, we get
+  //    x = (2^m) / (2^k) = 2^(m-k).
+  //
+  // This proves that x must be a power of two.
+
+  // getExactLog2Abs() returns the integer exponent if the number is a power of
+  // two or INT_MIN if it is not.
+  const int Exp = getExactLog2Abs();
+  if (Exp == INT_MIN)
+    return false;
+
+  // The inverse of +/- 2^Exp is +/- 2^(-Exp). We can compute this by
+  // scaling 1.0 by the negated exponent.
+  APFloat Reciprocal =
+      scalbn(APFloat::getOne(getSemantics(), /*Negative=*/isNegative()), -Exp,
+             rmTowardZero);
+
+  // scalbn might round if the resulting exponent -Exp is outside the
+  // representable range, causing overflow (to infinity) or underflow. We
+  // must verify that the result is still the exact power of two we expect.
+  if (Reciprocal.getExactLog2Abs() != -Exp)
+    return false;
+
+  // Avoid multiplication with a subnormal, it is not safe on all platforms and
+  // may be slower than a normal division.
+  if (Reciprocal.isDenormal())
+    return false;
+
+  assert(Reciprocal.isFiniteNonZero());
+
+  if (Inv)
+    *Inv = std::move(Reciprocal);
+
+  return true;
+}
+
 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
                                    roundingMode RM, bool *losesInfo) {
   if (&getSemantics() == &ToSemantics) {
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
@@ -1918,6 +1918,15 @@ TEST(APFloatTest, exactInverse) {
   EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(APFloat::PPCDoubleDouble(), "0.5")));
   EXPECT_TRUE(APFloat(APFloat::x87DoubleExtended(), "2.0").getExactInverse(&inv));
   EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(APFloat::x87DoubleExtended(), "0.5")));
+  // 0x1p1022 has a normal inverse for IEEE 754 binary64: 0x1p-1022.
+  EXPECT_TRUE(APFloat(0x1p1022).getExactInverse(&inv));
+  EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(0x1p-1022)));
+  // With regards to getExactInverse, IEEEdouble and PPCDoubleDouble should
+  // behave the same.
+  EXPECT_TRUE(
+      APFloat(APFloat::PPCDoubleDouble(), "0x1p1022").getExactInverse(&inv));
+  EXPECT_TRUE(
+      inv.bitwiseIsEqual(APFloat(APFloat::PPCDoubleDouble(), "0x1p-1022")));
 
   // FLT_MIN
   EXPECT_TRUE(APFloat(1.17549435e-38f).getExactInverse(&inv));
@@ -1929,6 +1938,8 @@ TEST(APFloatTest, exactInverse) {
   EXPECT_FALSE(APFloat(0.0).getExactInverse(nullptr));
   // Denormalized float
   EXPECT_FALSE(APFloat(1.40129846e-45f).getExactInverse(nullptr));
+  // Largest subnormal
+  EXPECT_FALSE(APFloat(0x1p-127f).getExactInverse(nullptr));
 }
 
 TEST(APFloatTest, roundToIntegral) {
@@ -6661,13 +6672,12 @@ TEST_P(PPCDoubleDoubleFrexpValueTest, PPCDoubleDoubleFrexp) {
 
     int ActualExponent;
     const APFloat ActualFraction = frexp(Input, ActualExponent, RM);
-    if (ExpectedFraction.isNaN()) {
+    if (ExpectedFraction.isNaN())
       EXPECT_TRUE(ActualFraction.isNaN());
-    } else {
+    else
       EXPECT_EQ(ActualFraction.compare(ExpectedFraction), APFloat::cmpEqual)
           << ActualFraction << " vs " << ExpectedFraction << " for input "
           << Params.Input.Hi << " + " << Params.Input.Lo << " RM " << RM;
-    }
     EXPECT_EQ(ActualExponent, Expected.Exponent)
         << "for input " << Params.Input.Hi << " + " << Params.Input.Lo
         << " RM " << RM;