GH-48123 [C++][Float16] Reimplement arrow::WithinUlp and Enable it for float16 (#48224)

andishgar · pitrou · web-flow · commit f40d62e4ef3c · 2025-12-08T14:27:40.000+01:00
### Rationale for this change Refer to [this comment](#48123 (comment)). Additionally, this change enables `arrow::WithinUlp` for `float16`. ### What changes are included in this PR? Re-implement `arrow::WithinUlp` and enable it for `float16`, including relevant tests for corner cases around powers of two and `Float16`. ### Are these changes tested? Yes, I ran the relevant unit tests. ### Are there any user-facing changes? No. * GitHub Issue: #48123 Lead-authored-by: arash andishgar <arashandishgar1@gmail.com> Co-authored-by: Antoine Pitrou <antoine@python.org> Signed-off-by: Antoine Pitrou <antoine@python.org>
diff --git a/cpp/src/arrow/testing/gtest_util_test.cc b/cpp/src/arrow/testing/gtest_util_test.cc
@@ -16,6 +16,9 @@
 // under the License.
 
 #include <cmath>
+#include <memory>
+#include <type_traits>
+#include <vector>
 
 #include <gtest/gtest-spi.h>
 #include <gtest/gtest.h>
@@ -32,8 +35,9 @@
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/float16.h"
 
-namespace arrow {
+namespace arrow::util {
 
 // Test basic cases for contains NaN.
 class TestAssertContainsNaN : public ::testing::Test {};
@@ -198,8 +202,15 @@ void CheckWithinUlp(Float x, Float y, int n_ulp) {
   CheckWithinUlpSingle(-y, -x, n_ulp);
 
   for (int exp : {1, -1, 10, -10}) {
-    Float x_scaled = std::ldexp(x, exp);
-    Float y_scaled = std::ldexp(y, exp);
+    Float x_scaled(0);
+    Float y_scaled(0);
+    if constexpr (std::is_same_v<Float, Float16>) {
+      x_scaled = Float16(std::ldexp(x.ToFloat(), exp));
+      y_scaled = Float16(std::ldexp(y.ToFloat(), exp));
+    } else {
+      x_scaled = std::ldexp(x, exp);
+      y_scaled = std::ldexp(y, exp);
+    }
     CheckWithinUlpSingle(x_scaled, y_scaled, n_ulp);
     CheckWithinUlpSingle(y_scaled, x_scaled, n_ulp);
   }
@@ -219,8 +230,15 @@ void CheckNotWithinUlp(Float x, Float y, int n_ulp) {
   }
 
   for (int exp : {1, -1, 10, -10}) {
-    Float x_scaled = std::ldexp(x, exp);
-    Float y_scaled = std::ldexp(y, exp);
+    Float x_scaled(0);
+    Float y_scaled(0);
+    if constexpr (std::is_same_v<Float, Float16>) {
+      x_scaled = Float16(std::ldexp(x.ToFloat(), exp));
+      y_scaled = Float16(std::ldexp(y.ToFloat(), exp));
+    } else {
+      x_scaled = std::ldexp(x, exp);
+      y_scaled = std::ldexp(y, exp);
+    }
     CheckNotWithinUlpSingle(x_scaled, y_scaled, n_ulp);
     CheckNotWithinUlpSingle(y_scaled, x_scaled, n_ulp);
   }
@@ -242,6 +260,10 @@ TEST(TestWithinUlp, Double) {
   CheckWithinUlp(1.0, 0.9999999999999999, 1);
   CheckWithinUlp(1.0, 0.9999999999999988, 11);
   CheckNotWithinUlp(1.0, 0.9999999999999988, 10);
+  CheckWithinUlp(1.0000000000000002, 0.9999999999999999, 2);
+  CheckNotWithinUlp(1.0000000000000002, 0.9999999999999999, 1);
+  CheckWithinUlp(0.9999999999999988, 1.0000000000000007, 14);
+  CheckNotWithinUlp(0.9999999999999988, 1.0000000000000007, 13);
 
   CheckWithinUlp(123.4567, 123.45670000000015, 11);
   CheckNotWithinUlp(123.4567, 123.45670000000015, 10);
@@ -271,6 +293,10 @@ TEST(TestWithinUlp, Float) {
   CheckWithinUlp(1.0f, 0.99999994f, 1);
   CheckWithinUlp(1.0f, 0.99999934f, 11);
   CheckNotWithinUlp(1.0f, 0.99999934f, 10);
+  CheckWithinUlp(1.0000001f, 0.99999994f, 2);
+  CheckNotWithinUlp(1.0000001f, 0.99999994f, 1);
+  CheckWithinUlp(1.0000013f, 0.99999934f, 22);
+  CheckNotWithinUlp(1.0000013f, 0.99999934f, 21);
 
   CheckWithinUlp(123.456f, 123.456085f, 11);
   CheckNotWithinUlp(123.456f, 123.456085f, 10);
@@ -284,15 +310,65 @@ TEST(TestWithinUlp, Float) {
   CheckNotWithinUlp(12.34f, -12.34f, 10);
 }
 
+std::vector<Float16> ConvertToFloat16Vector(const std::vector<float>& float_values) {
+  std::vector<Float16> float16_vector;
+  float16_vector.reserve(float_values.size());
+  for (auto& value : float_values) {
+    float16_vector.emplace_back(value);
+  }
+  return float16_vector;
+}
+
+TEST(TestWithinUlp, Float16) {
+  for (Float16 f : ConvertToFloat16Vector({0.0f, 1e-8f, 1.0f, 123.456f})) {
+    CheckWithinUlp(f, f, 0);
+    CheckWithinUlp(f, f, 1);
+    CheckWithinUlp(f, f, 42);
+  }
+  CheckWithinUlp(Float16(-0.0f), Float16(0.0f), 1);
+  CheckWithinUlp(Float16(1.0f), Float16(1.00097656f), 1);
+  CheckWithinUlp(Float16(1.0f), Float16(1.01074219f), 11);
+  CheckNotWithinUlp(Float16(1.0f), Float16(1.00097656f), 0);
+  CheckNotWithinUlp(Float16(1.0f), Float16(1.01074219f), 10);
+  // left and right have a different exponent but are still very close
+  CheckWithinUlp(Float16(1.0f), Float16(0.999511719f), 1);
+  CheckWithinUlp(Float16(1.0f), Float16(0.994628906f), 11);
+  CheckNotWithinUlp(Float16(1.0f), Float16(0.994628906f), 10);
+  CheckWithinUlp(Float16(1.00097656), Float16(0.999511719f), 2);
+  CheckNotWithinUlp(Float16(1.00097656), Float16(0.999511719f), 1);
+  CheckWithinUlp(Float16(1.01074219f), Float16(0.994628906f), 22);
+  CheckNotWithinUlp(Float16(1.01074219f), Float16(0.994628906f), 21);
+
+  CheckWithinUlp(Float16(123.456f), Float16(124.143501f), 11);
+  // The assertion below does not work because ldexp(Float16(124.143501f), 10)
+  // results in inf in Float16.
+  // CheckNotWithinUlp(Float16(123.456f), Float16(124.143501f), 10);
+
+  CheckWithinUlp(std::numeric_limits<Float16>::infinity(),
+                 std::numeric_limits<Float16>::infinity(), 10);
+  CheckWithinUlp(-std::numeric_limits<Float16>::infinity(),
+                 -std::numeric_limits<Float16>::infinity(), 10);
+  CheckWithinUlp(std::numeric_limits<Float16>::quiet_NaN(),
+                 std::numeric_limits<Float16>::quiet_NaN(), 10);
+  CheckNotWithinUlp(std::numeric_limits<Float16>::infinity(),
+                    -std::numeric_limits<Float16>::infinity(), 10);
+  CheckNotWithinUlp(Float16(12.34f), -std::numeric_limits<Float16>::infinity(), 10);
+  CheckNotWithinUlp(Float16(12.34f), std::numeric_limits<Float16>::quiet_NaN(), 10);
+  CheckNotWithinUlp(Float16(12.34f), Float16(-12.34f), 10);
+}
+
 TEST(AssertTestWithinUlp, Basics) {
   AssertWithinUlp(123.4567, 123.45670000000015, 11);
   AssertWithinUlp(123.456f, 123.456085f, 11);
+  AssertWithinUlp(Float16(123.456f), Float16(124.143501f), 11);
 #ifndef _WIN32
   // GH-47442
   EXPECT_FATAL_FAILURE(AssertWithinUlp(123.4567, 123.45670000000015, 10),
                        "not within 10 ulps");
   EXPECT_FATAL_FAILURE(AssertWithinUlp(123.456f, 123.456085f, 10), "not within 10 ulps");
+  EXPECT_FATAL_FAILURE(AssertWithinUlp(Float16(123.456f), Float16(124.143501f), 10),
+                       "not within 10 ulps");
 #endif
 }
 
-}  // namespace arrow
+}  // namespace arrow::util
diff --git a/cpp/src/arrow/testing/math.cc b/cpp/src/arrow/testing/math.cc
@@ -17,53 +17,94 @@
 
 #include "arrow/testing/math.h"
 
+#include <algorithm>
 #include <cmath>
 #include <limits>
+#include <type_traits>
 
 #include <gtest/gtest.h>
 
+#include "arrow/util/float16.h"
 #include "arrow/util/logging_internal.h"
+#include "arrow/util/ubsan.h"
 
 namespace arrow {
 namespace {
 
 template <typename Float>
-bool WithinUlpOneWay(Float left, Float right, int n_ulps) {
-  // The delta between 1.0 and the FP value immediately before it.
-  // We're using this value because `frexp` returns a mantissa between 0.5 and 1.0.
-  static const Float kOneUlp = Float(1.0) - std::nextafter(Float(1.0), Float(0.0));
+struct FloatToUInt;
 
-  DCHECK_GE(n_ulps, 1);
+template <>
+struct FloatToUInt<double> {
+  using Type = uint64_t;
+};
 
-  if (left == 0) {
-    return left == right;
-  }
-  if (left < 0) {
-    left = -left;
-    right = -right;
+template <>
+struct FloatToUInt<float> {
+  using Type = uint32_t;
+};
+
+template <>
+struct FloatToUInt<util::Float16> {
+  using Type = uint16_t;
+};
+
+template <typename Float>
+struct UlpDistanceUtil {
+ public:
+  using UIntType = typename FloatToUInt<Float>::Type;
+  static constexpr UIntType kNumberOfBits = sizeof(Float) * 8;
+  static constexpr UIntType kSignMask = static_cast<UIntType>(1) << (kNumberOfBits - 1);
+
+  // This implementation is inspired by:
+  // https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
+  static UIntType UlpDistance(Float left, Float right) {
+    auto unsigned_left = util::SafeCopy<UIntType>(left);
+    auto unsigned_right = util::SafeCopy<UIntType>(right);
+    auto biased_left = ConvertSignAndMagnitudeToBiased(unsigned_left);
+    auto biased_right = ConvertSignAndMagnitudeToBiased(unsigned_right);
+    if (biased_left > biased_right) {
+      std::swap(biased_left, biased_right);
+    }
+    return biased_right - biased_left;
   }
 
-  int left_exp;
-  Float left_mant = std::frexp(left, &left_exp);
-  Float delta = static_cast<Float>(n_ulps) * kOneUlp;
-  Float lower_bound = std::ldexp(left_mant - delta, left_exp);
-  Float upper_bound = std::ldexp(left_mant + delta, left_exp);
-  return right >= lower_bound && right <= upper_bound;
-}
+ private:
+  // Source reference (GoogleTest):
+  // https://github.com/google/googletest/blob/1b96fa13f549387b7549cc89e1a785cf143a1a50/googletest/include/gtest/internal/gtest-internal.h#L345-L368
+  static UIntType ConvertSignAndMagnitudeToBiased(UIntType value) {
+    if (value & kSignMask) {
+      return ~value + 1;
+    } else {
+      return value | kSignMask;
+    }
+  }
+};
 
 template <typename Float>
 bool WithinUlpGeneric(Float left, Float right, int n_ulps) {
-  if (std::isnan(left) || std::isnan(right)) {
-    return std::isnan(left) == std::isnan(right);
-  }
-  if (!std::isfinite(left) || !std::isfinite(right)) {
-    return left == right;
+  if constexpr (std::is_same_v<Float, util::Float16>) {
+    if (left.is_nan() || right.is_nan()) {
+      return left.is_nan() == right.is_nan();
+    } else if (left.is_infinity() || right.is_infinity()) {
+      return left == right;
+    }
+  } else {
+    if (std::isnan(left) || std::isnan(right)) {
+      return std::isnan(left) == std::isnan(right);
+    }
+    if (!std::isfinite(left) || !std::isfinite(right)) {
+      return left == right;
+    }
   }
+
   if (n_ulps == 0) {
     return left == right;
   }
-  return (std::abs(left) <= std::abs(right)) ? WithinUlpOneWay(left, right, n_ulps)
-                                             : WithinUlpOneWay(right, left, n_ulps);
+
+  DCHECK_GE(n_ulps, 1);
+  return UlpDistanceUtil<Float>::UlpDistance(left, right) <=
+         static_cast<uint64_t>(n_ulps);
 }
 
 template <typename Float>
@@ -75,6 +116,10 @@ void AssertWithinUlpGeneric(Float left, Float right, int n_ulps) {
 
 }  // namespace
 
+bool WithinUlp(util::Float16 left, util::Float16 right, int n_ulps) {
+  return WithinUlpGeneric(left, right, n_ulps);
+}
+
 bool WithinUlp(float left, float right, int n_ulps) {
   return WithinUlpGeneric(left, right, n_ulps);
 }
@@ -83,6 +128,10 @@ bool WithinUlp(double left, double right, int n_ulps) {
   return WithinUlpGeneric(left, right, n_ulps);
 }
 
+void AssertWithinUlp(util::Float16 left, util::Float16 right, int n_ulps) {
+  AssertWithinUlpGeneric(left, right, n_ulps);
+}
+
 void AssertWithinUlp(float left, float right, int n_ulps) {
   AssertWithinUlpGeneric(left, right, n_ulps);
 }
diff --git a/cpp/src/arrow/testing/math.h b/cpp/src/arrow/testing/math.h
@@ -18,14 +18,19 @@
 #pragma once
 
 #include "arrow/testing/visibility.h"
+#include "arrow/type_fwd.h"
 
 namespace arrow {
 
+ARROW_TESTING_EXPORT
+bool WithinUlp(util::Float16 left, util::Float16 right, int n_ulps);
 ARROW_TESTING_EXPORT
 bool WithinUlp(float left, float right, int n_ulps);
 ARROW_TESTING_EXPORT
 bool WithinUlp(double left, double right, int n_ulps);
 
+ARROW_TESTING_EXPORT
+void AssertWithinUlp(util::Float16 left, util::Float16 right, int n_ulps);
 ARROW_TESTING_EXPORT
 void AssertWithinUlp(float left, float right, int n_ulps);
 ARROW_TESTING_EXPORT