From c102dcdb83ea6ca9cacb7610023a77f7b54a25e6 Mon Sep 17 00:00:00 2001
From: Nitish Naineni <naineninitish@gmail.com>
Date: Sun, 22 Mar 2026 00:36:56 -0500
Subject: [PATCH 1/8] unified float comparison helpers with a single isclose
 function

replaced 6 different float comparison implementations (fptest_close, is_about, almost_equal, CompareResults, WithinRel, REQUIRE_APPROX_EQ macros) with one canonical isclose function using the PEP-0485 symmetric formula. closes #7662
---
 c2h/include/c2h/catch2_test_helper.h          | 35 ++++++++++-----
 c2h/include/c2h/check_results.cuh             | 26 ++++++-----
 c2h/include/c2h/isclose.h                     | 45 +++++++++++++++++++
 cub/test/catch2_test_device_segmented_scan.cu | 25 +++--------
 cub/test/test_util.h                          | 39 +++++++---------
 .../catch2_test_thread_reduce.cu              | 16 +++----
 .../catch2_test_thread_reduce_check_sass.cu   | 13 ++----
 thrust/testing/catch2_test_complex.cu         | 13 ++----
 8 files changed, 117 insertions(+), 95 deletions(-)
 create mode 100644 c2h/include/c2h/isclose.h
diff --git a/c2h/include/c2h/catch2_test_helper.h b/c2h/include/c2h/catch2_test_helper.h
index c0637fd4e6a..7529a167e3f 100644
--- a/c2h/include/c2h/catch2_test_helper.h
+++ b/c2h/include/c2h/catch2_test_helper.h
@@ -22,6 +22,7 @@
 #include <c2h/checked_allocator.cuh>
 #include <c2h/device_policy.h>
 #include <c2h/extended_types.h>
+#include <c2h/isclose.h>
 #include <c2h/test_util_vec.h>
 #include <c2h/utility.h>
 #include <c2h/vector.h>
@@ -214,25 +215,37 @@ std::vector<T> to_vec(std::vector<T> const& vec)
 }
 } // namespace detail
 
-#define REQUIRE_APPROX_EQ(ref, out)                          \
-  {                                                          \
-    auto vec_ref = detail::to_vec(ref);                      \
-    auto vec_out = detail::to_vec(out);                      \
-    REQUIRE_THAT(vec_ref, Catch::Matchers::Approx(vec_out)); \
+#define REQUIRE_APPROX_EQ(ref, out)                                      \
+  {                                                                      \
+    auto vec_ref = detail::to_vec(ref);                                  \
+    auto vec_out = detail::to_vec(out);                                  \
+    for (size_t i = 0; i < vec_ref.size(); i++)                          \
+    {                                                                    \
+      INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]); \
+      REQUIRE(isclose(vec_ref[i], vec_out[i]));                          \
+    }                                                                    \
   }
 
-#define REQUIRE_APPROX_EQ_EPSILON(ref, out, eps)                          \
-  {                                                                       \
-    auto vec_ref = detail::to_vec(ref);                                   \
-    auto vec_out = detail::to_vec(out);                                   \
-    REQUIRE_THAT(vec_ref, Catch::Matchers::Approx(vec_out).epsilon(eps)); \
+#define REQUIRE_APPROX_EQ_EPSILON(ref, out, eps)                         \
+  {                                                                      \
+    auto vec_ref = detail::to_vec(ref);                                  \
+    auto vec_out = detail::to_vec(out);                                  \
+    for (size_t i = 0; i < vec_ref.size(); i++)                          \
+    {                                                                    \
+      INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]); \
+      REQUIRE(isclose(vec_ref[i], vec_out[i], eps));                     \
+    }                                                                    \
   }
 
 #define REQUIRE_APPROX_EQ_ABS(ref, out, abs)                             \
   {                                                                      \
     auto vec_ref = detail::to_vec(ref);                                  \
     auto vec_out = detail::to_vec(out);                                  \
-    REQUIRE_THAT(vec_ref, Catch::Matchers::Approx(vec_out).margin(abs)); \
+    for (size_t i = 0; i < vec_ref.size(); i++)                          \
+    {                                                                    \
+      INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]); \
+      REQUIRE(isclose(vec_ref[i], vec_out[i], 0 * vec_ref[i], abs));     \
+    }                                                                    \
   }
 
 namespace c2h::detail
diff --git a/c2h/include/c2h/check_results.cuh b/c2h/include/c2h/check_results.cuh
index 6176b8231c5..00cf98f15d9 100644
--- a/c2h/include/c2h/check_results.cuh
+++ b/c2h/include/c2h/check_results.cuh
@@ -11,7 +11,7 @@
 
 #include <test_util.h>
 
-#include <catch2/matchers/catch_matchers_floating_point.hpp>
+#include <c2h/isclose.h>
 
 template <typename T>
 void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_vector<T>& test_results)
@@ -42,8 +42,9 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
   {
     for (size_t i = 0; i < test_results.size(); ++i)
     {
-      REQUIRE_THAT(expected_data[i].x, Catch::Matchers::WithinRel(test_results[i].x, 0.01f));
-      REQUIRE_THAT(expected_data[i].y, Catch::Matchers::WithinRel(test_results[i].y, 0.01f));
+      INFO("index " << i);
+      REQUIRE(isclose(expected_data[i].x, test_results[i].x, 0.01f));
+      REQUIRE(isclose(expected_data[i].y, test_results[i].y, 0.01f));
     }
   }
   else if constexpr (cuda::std::is_same_v<T, __nv_bfloat162> || cuda::std::is_same_v<T, __half2>)
@@ -51,8 +52,9 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
     constexpr auto rel_err = cuda::std::is_same_v<T, __half2> ? 0.08f : 0.2f;
     for (size_t i = 0; i < test_results.size(); ++i)
     {
-      REQUIRE_THAT(expected_data[i].x, Catch::Matchers::WithinRel(test_results[i].x, rel_err));
-      REQUIRE_THAT(expected_data[i].y, Catch::Matchers::WithinRel(test_results[i].y, rel_err));
+      INFO("index " << i);
+      REQUIRE(isclose(static_cast<float>(expected_data[i].x), static_cast<float>(test_results[i].x), rel_err));
+      REQUIRE(isclose(static_cast<float>(expected_data[i].y), static_cast<float>(test_results[i].y), rel_err));
     }
   }
   else if constexpr (cuda::std::is_same_v<T, cuda::std::complex<__nv_bfloat16>>
@@ -62,11 +64,12 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
     for (size_t i = 0; i < test_results.size(); ++i)
     {
       auto expected_real = static_cast<float>(expected_data[i].real());
-      auto test_real     = test_results[i].real();
+      auto test_real     = static_cast<float>(test_results[i].real());
       auto expected_imag = static_cast<float>(expected_data[i].imag());
-      auto test_imag     = test_results[i].imag();
-      REQUIRE_THAT(expected_real, Catch::Matchers::WithinRel(test_real, rel_err));
-      REQUIRE_THAT(expected_imag, Catch::Matchers::WithinRel(test_imag, rel_err));
+      auto test_imag     = static_cast<float>(test_results[i].imag());
+      INFO("index " << i);
+      REQUIRE(isclose(expected_real, test_real, rel_err));
+      REQUIRE(isclose(expected_imag, test_imag, rel_err));
     }
   }
   else if constexpr (cuda::std::__is_cuda_std_complex_v<T>)
@@ -77,8 +80,9 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
       auto test_real     = test_results[i].real();
       auto expected_imag = expected_data[i].imag();
       auto test_imag     = test_results[i].imag();
-      REQUIRE_THAT(expected_real, Catch::Matchers::WithinRel(test_real));
-      REQUIRE_THAT(expected_imag, Catch::Matchers::WithinRel(test_imag));
+      INFO("index " << i);
+      REQUIRE(isclose(expected_real, test_real));
+      REQUIRE(isclose(expected_imag, test_imag));
     }
   }
   else
diff --git a/c2h/include/c2h/isclose.h b/c2h/include/c2h/isclose.h
new file mode 100644
index 00000000000..4ed29703485
--- /dev/null
+++ b/c2h/include/c2h/isclose.h
@@ -0,0 +1,45 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+// SPDX-License-Identifier: BSD-3
+
+#pragma once
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <type_traits>
+
+template <typename T>
+bool isclose(T a, T b, T r_tol, T a_tol)
+{
+  if constexpr (std::is_floating_point_v<T>)
+  {
+    if (a == b)
+    {
+      return true;
+    }
+    return std::abs(a - b) <= std::max(a_tol, r_tol * std::max(std::abs(a), std::abs(b)));
+  }
+  else
+  {
+    return a == b;
+  }
+}
+
+template <typename T>
+bool isclose(T a, T b, T r_tol)
+{
+  return isclose(a, b, r_tol, T(0));
+}
+
+template <typename T>
+bool isclose(T a, T b)
+{
+  if constexpr (std::is_floating_point_v<T>)
+  {
+    return isclose(a, b, T(1000) * std::numeric_limits<T>::epsilon(), T(0));
+  }
+  else
+  {
+    return a == b;
+  }
+}
diff --git a/cub/test/catch2_test_device_segmented_scan.cu b/cub/test/catch2_test_device_segmented_scan.cu
index 30c2770456f..2d481eda20e 100644
--- a/cub/test/catch2_test_device_segmented_scan.cu
+++ b/cub/test/catch2_test_device_segmented_scan.cu
@@ -16,6 +16,7 @@
 #include <c2h/catch2_test_helper.h>
 #include <c2h/custom_type.h>
 #include <c2h/extended_types.h>
+#include <c2h/isclose.h>
 
 DECLARE_LAUNCH_WRAPPER(cub::DeviceSegmentedScan::InclusiveSegmentedSum, device_inclusive_segmented_sum);
 DECLARE_LAUNCH_WRAPPER(cub::DeviceSegmentedScan::ExclusiveSegmentedSum, device_exclusive_segmented_sum);
@@ -73,34 +74,20 @@ bool check_segment(const c2h::host_vector<ValueT>& h_output,
   {
     if constexpr (cuda::std::is_floating_point_v<ValueT>)
     {
-      ValueT ref_v  = h_ref[pos];
-      ValueT act_v  = h_output[pos];
-      ValueT diff   = (ref_v - act_v);
-      ValueT adiff  = (diff > ValueT{0}) ? diff : -diff;
-      ValueT ref_av = (ref_v > ValueT{0}) ? ref_v : -ref_v;
-      ValueT act_av = (act_v > ValueT{0}) ? act_v : -act_v;
-
-      ValueT eps = ::cuda::std::numeric_limits<ValueT>::epsilon();
-      correct    = correct && (adiff < 3 * eps + 2 * eps * (::cuda::std::max(ref_av, act_av)));
+      correct = correct && isclose(h_ref[pos], h_output[pos]);
     }
     else if constexpr (cuda::std::is_same_v<ValueT, half_t> || cuda::std::is_same_v<ValueT, bfloat16_t>)
     {
-      float ref_v = h_ref[pos];
-      float act_v = h_output[pos];
+      float ref_v = static_cast<float>(h_ref[pos]);
+      float act_v = static_cast<float>(h_output[pos]);
       if (cuda::std::isfinite(ref_v) && cuda::std::isfinite(act_v))
       {
-        float diff   = (ref_v - act_v);
-        float adiff  = (diff > float{0}) ? diff : -diff;
-        float ref_av = (ref_v > float{0}) ? ref_v : -ref_v;
-        float act_av = (act_v > float{0}) ? act_v : -act_v;
-
-        float eps = float{1} / float{128};
-        correct   = correct && (adiff < 3 * eps + 5 * eps * (::cuda::std::max(ref_av, act_av)));
+        correct = correct && isclose(ref_v, act_v);
       }
     }
     else
     {
-      correct = correct && (h_ref[pos] == h_output[pos]);
+      correct = correct && isclose(h_ref[pos], h_output[pos]);
     }
     if (!correct)
     {
diff --git a/cub/test/test_util.h b/cub/test/test_util.h
index 8ac38867f49..89b3a7379e9 100644
--- a/cub/test/test_util.h
+++ b/cub/test/test_util.h
@@ -38,6 +38,7 @@
 #include "mersenne.h"
 #include <c2h/catch2_test_helper.h>
 #include <c2h/extended_types.h>
+#include <c2h/isclose.h>
 #include <c2h/test_util_vec.h>
 
 /******************************************************************************
@@ -1074,22 +1075,17 @@ int CompareResults(float* computed, float* reference, OffsetT len, bool verbose
 {
   for (OffsetT i = 0; i < len; i++)
   {
-    if (computed[i] != reference[i])
+    if (!isclose(computed[i], reference[i]))
     {
-      float difference = std::abs(computed[i] - reference[i]);
-      float fraction   = difference / std::abs(reference[i]);
-
-      if (fraction > 0.00015)
+      if (verbose)
       {
-        if (verbose)
-        {
-          std::cout
-            << "INCORRECT: [" << i << "]: "
-            << "(computed) " << CoutCast(computed[i]) << " != " << CoutCast(reference[i])
-            << " (difference:" << difference << ", fraction: " << fraction << ")";
-        }
-        return 1;
+        float difference = std::abs(computed[i] - reference[i]);
+        std::cout
+          << "INCORRECT: [" << i << "]: "
+          << "(computed) " << CoutCast(computed[i]) << " != " << CoutCast(reference[i]) << " (difference:" << difference
+          << ")";
       }
+      return 1;
     }
   }
   return 0;
@@ -1113,20 +1109,15 @@ int CompareResults(double* computed, double* reference, OffsetT len, bool verbos
 {
   for (OffsetT i = 0; i < len; i++)
   {
-    if (computed[i] != reference[i])
+    if (!isclose(computed[i], reference[i]))
     {
-      double difference = std::abs(computed[i] - reference[i]);
-      double fraction   = difference / std::abs(reference[i]);
-
-      if (fraction > 0.00015)
+      if (verbose)
       {
-        if (verbose)
-        {
-          std::cout << "INCORRECT: [" << i << "]: " << CoutCast(computed[i]) << " != " << CoutCast(reference[i])
-                    << " (difference:" << difference << ", fraction: " << fraction << ")";
-        }
-        return 1;
+        double difference = std::abs(computed[i] - reference[i]);
+        std::cout << "INCORRECT: [" << i << "]: " << CoutCast(computed[i]) << " != " << CoutCast(reference[i])
+                  << " (difference:" << difference << ")";
       }
+      return 1;
     }
   }
   return 0;
diff --git a/cub/test/thread_reduce/catch2_test_thread_reduce.cu b/cub/test/thread_reduce/catch2_test_thread_reduce.cu
index 4650cb99775..49119f687e9 100644
--- a/cub/test/thread_reduce/catch2_test_thread_reduce.cu
+++ b/cub/test/thread_reduce/catch2_test_thread_reduce.cu
@@ -20,6 +20,7 @@
 #include "c2h/catch2_test_helper.h"
 #include "c2h/extended_types.h"
 #include "c2h/generators.h"
+#include <c2h/isclose.h>
 #include <catch2/matchers/catch_matchers_floating_point.hpp>
 
 /***********************************************************************************************************************
@@ -170,18 +171,10 @@ using cub_operator_fp_list =
  * Verify results and kernel launch
  **********************************************************************************************************************/
 
-_CCCL_TEMPLATE(typename T)
-_CCCL_REQUIRES((cuda::std::is_floating_point_v<T>) )
-void verify_results(const T& expected_data, const T& test_results)
-{
-  REQUIRE_THAT(expected_data, Catch::Matchers::WithinRel(test_results, T{0.05}));
-}
-
-_CCCL_TEMPLATE(typename T)
-_CCCL_REQUIRES((!cuda::std::is_floating_point_v<T>) )
+template <typename T>
 void verify_results(const T& expected_data, const T& test_results)
 {
-  REQUIRE(expected_data == test_results);
+  REQUIRE(isclose(expected_data, test_results));
 }
 
 template <typename T, typename ReduceOperator>
@@ -330,7 +323,8 @@ C2H_TEST("ThreadReduce Narrow PrecisionType Tests",
     auto reference_result =
       std::accumulate(h_in_float.begin(), h_in_float.begin() + num_items, operator_identity, std_reduce_op);
     run_thread_reduce_kernel(num_items, d_in, d_out, reduce_op);
-    verify_results(reference_result, float{c2h::host_vector<value_t>(d_out)[0]});
+    float test_result = float{c2h::host_vector<value_t>(d_out)[0]};
+    REQUIRE(isclose(reference_result, test_result, 0.05f));
   }
 }
 
diff --git a/cub/test/thread_reduce/catch2_test_thread_reduce_check_sass.cu b/cub/test/thread_reduce/catch2_test_thread_reduce_check_sass.cu
index d428f21af1a..90c85bbf84d 100644
--- a/cub/test/thread_reduce/catch2_test_thread_reduce_check_sass.cu
+++ b/cub/test/thread_reduce/catch2_test_thread_reduce_check_sass.cu
@@ -23,6 +23,7 @@
 #  include "c2h/catch2_test_helper.h"
 #  include "c2h/extended_types.h"
 #  include "c2h/generators.h"
+#  include <c2h/isclose.h>
 #  include <catch2/matchers/catch_matchers_floating_point.hpp>
 
 /***********************************************************************************************************************
@@ -110,18 +111,10 @@ using cub_operator_fp_list = c2h::type_list<cuda::std::plus<>, cuda::minimum<>>;
  * Verify results and kernel launch
  **********************************************************************************************************************/
 
-_CCCL_TEMPLATE(typename T)
-_CCCL_REQUIRES((cuda::std::is_floating_point<T>::value))
-void verify_results(const T& expected_data, const T& test_results)
-{
-  REQUIRE_THAT(expected_data, Catch::Matchers::WithinRel(test_results, T{0.05}));
-}
-
-_CCCL_TEMPLATE(typename T)
-_CCCL_REQUIRES((!cuda::std::is_floating_point<T>::value))
+template <typename T>
 void verify_results(const T& expected_data, const T& test_results)
 {
-  REQUIRE(expected_data == test_results);
+  REQUIRE(isclose(expected_data, test_results));
 }
 
 template <typename T, typename ReduceOperator>
diff --git a/thrust/testing/catch2_test_complex.cu b/thrust/testing/catch2_test_complex.cu
index 2efa429af24..b30ebc37287 100644
--- a/thrust/testing/catch2_test_complex.cu
+++ b/thrust/testing/catch2_test_complex.cu
@@ -8,6 +8,7 @@
 #include <vector>
 
 #include "catch2_test_helper.h"
+#include <c2h/isclose.h>
 #include <unittest/random.h>
 #include <unittest/testframework.h>
 
@@ -42,11 +43,6 @@ struct other_floating_point_type<double>
 template <typename T>
 using other_floating_point_type_t = typename other_floating_point_type<T>::type;
 
-// Helper to compare complex numbers with approximate equality
-// Supports both scalar and thrust::complex<T> types
-double const DEFAULT_RELATIVE_TOL = 1e-4;
-double const DEFAULT_ABSOLUTE_TOL = 1e-4;
-
 template <typename T>
 inline constexpr bool is_complex = false;
 template <typename T>
@@ -54,18 +50,17 @@ inline constexpr bool is_complex<thrust::complex<T>> = true;
 template <typename T>
 inline constexpr bool is_complex<std::complex<T>> = true;
 
-// Overload for complex types
 template <typename T1, typename T2>
 ::cuda::std::enable_if_t<is_complex<T1> && is_complex<T2>> require_almost_equal(const T1& a, const T2& b)
 {
-  CHECK(a.real() == Catch::Approx(b.real()).margin(DEFAULT_ABSOLUTE_TOL).epsilon(DEFAULT_RELATIVE_TOL));
-  CHECK(a.imag() == Catch::Approx(b.imag()).margin(DEFAULT_ABSOLUTE_TOL).epsilon(DEFAULT_RELATIVE_TOL));
+  CHECK(isclose(static_cast<double>(a.real()), static_cast<double>(b.real())));
+  CHECK(isclose(static_cast<double>(a.imag()), static_cast<double>(b.imag())));
 }
 
 template <typename T1, typename T2>
 ::cuda::std::enable_if_t<!is_complex<T1> && !is_complex<T2>> require_almost_equal(const T1& a, const T2& b)
 {
-  CHECK(a == Catch::Approx(b).margin(DEFAULT_ABSOLUTE_TOL).epsilon(DEFAULT_RELATIVE_TOL));
+  CHECK(isclose(static_cast<double>(a), static_cast<double>(b)));
 }
 } // anonymous namespace
 

From 97f7b855aaed292180c761c27ae960b626322515 Mon Sep 17 00:00:00 2001
From: Nitish Naineni <96668765+NitishNaineni@users.noreply.github.com>
Date: Mon, 23 Mar 2026 09:23:37 -0500
Subject: [PATCH 2/8] Update c2h/include/c2h/isclose.h

updated documented identifier to BSD-3-Clause

Co-authored-by: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com>
---
 c2h/include/c2h/isclose.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/c2h/include/c2h/isclose.h b/c2h/include/c2h/isclose.h
index 4ed29703485..b540767822e 100644
--- a/c2h/include/c2h/isclose.h
+++ b/c2h/include/c2h/isclose.h
@@ -1,5 +1,5 @@
 // SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
-// SPDX-License-Identifier: BSD-3
+// SPDX-License-Identifier: BSD-3-Clause
 
 #pragma once
 

From 9a658be8f0006c616c3bd574a4d4709d53c4b916 Mon Sep 17 00:00:00 2001
From: Nitish Naineni <96668765+NitishNaineni@users.noreply.github.com>
Date: Mon, 23 Mar 2026 10:33:13 -0500
Subject: [PATCH 3/8] Update c2h/include/c2h/isclose.h

updated documented identifier to Apache-2.0

Co-authored-by: Bernhard Manfred Gruber <bernhardmgruber@gmail.com>
---
 c2h/include/c2h/isclose.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/c2h/include/c2h/isclose.h b/c2h/include/c2h/isclose.h
index b540767822e..891b678f7f2 100644
--- a/c2h/include/c2h/isclose.h
+++ b/c2h/include/c2h/isclose.h
@@ -1,5 +1,5 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
-// SPDX-License-Identifier: BSD-3-Clause
+// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 #pragma once
 

From b5fc9a5b02dac379cc87a10b40f11ef3d4d68b63 Mon Sep 17 00:00:00 2001
From: Nitish Naineni <naineninitish@gmail.com>
Date: Mon, 23 Mar 2026 10:35:02 -0500
Subject: [PATCH 4/8] add static_assert for unsupported types in isclose

---
 c2h/include/c2h/isclose.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/c2h/include/c2h/isclose.h b/c2h/include/c2h/isclose.h
index 891b678f7f2..bd7a9c7578e 100644
--- a/c2h/include/c2h/isclose.h
+++ b/c2h/include/c2h/isclose.h
@@ -21,6 +21,7 @@ bool isclose(T a, T b, T r_tol, T a_tol)
   }
   else
   {
+    static_assert(std::is_integral_v<T>, "isclose: unsupported type, expected floating point or integral");
     return a == b;
   }
 }
@@ -40,6 +41,7 @@ bool isclose(T a, T b)
   }
   else
   {
+    static_assert(std::is_integral_v<T>, "isclose: unsupported type, expected floating point or integral");
     return a == b;
   }
 }

From 32b5698fa0b1c77b0c821608828c44d098d3f25f Mon Sep 17 00:00:00 2001
From: Nitish Naineni <naineninitish@gmail.com>
Date: Mon, 23 Mar 2026 10:50:44 -0500
Subject: [PATCH 5/8] use T(1 << 8) as default tolerance multiplier

---
 c2h/include/c2h/isclose.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/c2h/include/c2h/isclose.h b/c2h/include/c2h/isclose.h
index bd7a9c7578e..feaf56cd414 100644
--- a/c2h/include/c2h/isclose.h
+++ b/c2h/include/c2h/isclose.h
@@ -37,7 +37,7 @@ bool isclose(T a, T b)
 {
   if constexpr (std::is_floating_point_v<T>)
   {
-    return isclose(a, b, T(1000) * std::numeric_limits<T>::epsilon(), T(0));
+    return isclose(a, b, T(1 << 8) * std::numeric_limits<T>::epsilon(), T(0));
   }
   else
   {

From 526c801b8d7923848449237f5cfb8bfdf9a4c695 Mon Sep 17 00:00:00 2001
From: Nitish Naineni <naineninitish@gmail.com>
Date: Mon, 23 Mar 2026 11:06:50 -0500
Subject: [PATCH 6/8] defer INFO formatting to only run on failure

---
 c2h/include/c2h/catch2_test_helper.h | 66 ++++++++++++++++------------
 c2h/include/c2h/check_results.cuh    | 46 ++++++++++++++-----
 2 files changed, 73 insertions(+), 39 deletions(-)

diff --git a/c2h/include/c2h/catch2_test_helper.h b/c2h/include/c2h/catch2_test_helper.h
index 7529a167e3f..d7f149812f8 100644
--- a/c2h/include/c2h/catch2_test_helper.h
+++ b/c2h/include/c2h/catch2_test_helper.h
@@ -215,37 +215,49 @@ std::vector<T> to_vec(std::vector<T> const& vec)
 }
 } // namespace detail
 
-#define REQUIRE_APPROX_EQ(ref, out)                                      \
-  {                                                                      \
-    auto vec_ref = detail::to_vec(ref);                                  \
-    auto vec_out = detail::to_vec(out);                                  \
-    for (size_t i = 0; i < vec_ref.size(); i++)                          \
-    {                                                                    \
-      INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]); \
-      REQUIRE(isclose(vec_ref[i], vec_out[i]));                          \
-    }                                                                    \
+#define REQUIRE_APPROX_EQ(ref, out)                                                \
+  {                                                                                \
+    auto vec_ref = detail::to_vec(ref);                                            \
+    auto vec_out = detail::to_vec(out);                                            \
+    for (size_t i = 0; i < vec_ref.size(); i++)                                    \
+    {                                                                              \
+      bool close = isclose(vec_ref[i], vec_out[i]);                                \
+      if (!close)                                                                  \
+      {                                                                            \
+        INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]);         \
+      }                                                                            \
+      REQUIRE(close);                                                              \
+    }                                                                              \
   }
 
-#define REQUIRE_APPROX_EQ_EPSILON(ref, out, eps)                         \
-  {                                                                      \
-    auto vec_ref = detail::to_vec(ref);                                  \
-    auto vec_out = detail::to_vec(out);                                  \
-    for (size_t i = 0; i < vec_ref.size(); i++)                          \
-    {                                                                    \
-      INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]); \
-      REQUIRE(isclose(vec_ref[i], vec_out[i], eps));                     \
-    }                                                                    \
+#define REQUIRE_APPROX_EQ_EPSILON(ref, out, eps)                                   \
+  {                                                                                \
+    auto vec_ref = detail::to_vec(ref);                                            \
+    auto vec_out = detail::to_vec(out);                                            \
+    for (size_t i = 0; i < vec_ref.size(); i++)                                    \
+    {                                                                              \
+      bool close = isclose(vec_ref[i], vec_out[i], eps);                           \
+      if (!close)                                                                  \
+      {                                                                            \
+        INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]);         \
+      }                                                                            \
+      REQUIRE(close);                                                              \
+    }                                                                              \
   }
 
-#define REQUIRE_APPROX_EQ_ABS(ref, out, abs)                             \
-  {                                                                      \
-    auto vec_ref = detail::to_vec(ref);                                  \
-    auto vec_out = detail::to_vec(out);                                  \
-    for (size_t i = 0; i < vec_ref.size(); i++)                          \
-    {                                                                    \
-      INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]); \
-      REQUIRE(isclose(vec_ref[i], vec_out[i], 0 * vec_ref[i], abs));     \
-    }                                                                    \
+#define REQUIRE_APPROX_EQ_ABS(ref, out, abs)                                       \
+  {                                                                                \
+    auto vec_ref = detail::to_vec(ref);                                            \
+    auto vec_out = detail::to_vec(out);                                            \
+    for (size_t i = 0; i < vec_ref.size(); i++)                                    \
+    {                                                                              \
+      bool close = isclose(vec_ref[i], vec_out[i], 0 * vec_ref[i], abs);           \
+      if (!close)                                                                  \
+      {                                                                            \
+        INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]);         \
+      }                                                                            \
+      REQUIRE(close);                                                              \
+    }                                                                              \
   }
 
 namespace c2h::detail
diff --git a/c2h/include/c2h/check_results.cuh b/c2h/include/c2h/check_results.cuh
index 00cf98f15d9..c04911f2a89 100644
--- a/c2h/include/c2h/check_results.cuh
+++ b/c2h/include/c2h/check_results.cuh
@@ -42,9 +42,14 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
   {
     for (size_t i = 0; i < test_results.size(); ++i)
     {
-      INFO("index " << i);
-      REQUIRE(isclose(expected_data[i].x, test_results[i].x, 0.01f));
-      REQUIRE(isclose(expected_data[i].y, test_results[i].y, 0.01f));
+      bool close_x = isclose(expected_data[i].x, test_results[i].x, 0.01f);
+      bool close_y = isclose(expected_data[i].y, test_results[i].y, 0.01f);
+      if (!close_x || !close_y)
+      {
+        INFO("index " << i);
+      }
+      REQUIRE(close_x);
+      REQUIRE(close_y);
     }
   }
   else if constexpr (cuda::std::is_same_v<T, __nv_bfloat162> || cuda::std::is_same_v<T, __half2>)
@@ -52,9 +57,16 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
     constexpr auto rel_err = cuda::std::is_same_v<T, __half2> ? 0.08f : 0.2f;
     for (size_t i = 0; i < test_results.size(); ++i)
     {
-      INFO("index " << i);
-      REQUIRE(isclose(static_cast<float>(expected_data[i].x), static_cast<float>(test_results[i].x), rel_err));
-      REQUIRE(isclose(static_cast<float>(expected_data[i].y), static_cast<float>(test_results[i].y), rel_err));
+      bool close_x =
+        isclose(static_cast<float>(expected_data[i].x), static_cast<float>(test_results[i].x), rel_err);
+      bool close_y =
+        isclose(static_cast<float>(expected_data[i].y), static_cast<float>(test_results[i].y), rel_err);
+      if (!close_x || !close_y)
+      {
+        INFO("index " << i);
+      }
+      REQUIRE(close_x);
+      REQUIRE(close_y);
     }
   }
   else if constexpr (cuda::std::is_same_v<T, cuda::std::complex<__nv_bfloat16>>
@@ -67,9 +79,14 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
       auto test_real     = static_cast<float>(test_results[i].real());
       auto expected_imag = static_cast<float>(expected_data[i].imag());
       auto test_imag     = static_cast<float>(test_results[i].imag());
-      INFO("index " << i);
-      REQUIRE(isclose(expected_real, test_real, rel_err));
-      REQUIRE(isclose(expected_imag, test_imag, rel_err));
+      bool close_real    = isclose(expected_real, test_real, rel_err);
+      bool close_imag    = isclose(expected_imag, test_imag, rel_err);
+      if (!close_real || !close_imag)
+      {
+        INFO("index " << i);
+      }
+      REQUIRE(close_real);
+      REQUIRE(close_imag);
     }
   }
   else if constexpr (cuda::std::__is_cuda_std_complex_v<T>)
@@ -80,9 +97,14 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
       auto test_real     = test_results[i].real();
       auto expected_imag = expected_data[i].imag();
       auto test_imag     = test_results[i].imag();
-      INFO("index " << i);
-      REQUIRE(isclose(expected_real, test_real));
-      REQUIRE(isclose(expected_imag, test_imag));
+      bool close_real    = isclose(expected_real, test_real);
+      bool close_imag    = isclose(expected_imag, test_imag);
+      if (!close_real || !close_imag)
+      {
+        INFO("index " << i);
+      }
+      REQUIRE(close_real);
+      REQUIRE(close_imag);
     }
   }
   else

From 1130c28802aaaa7e6e6655acecff2e5cb50764bf Mon Sep 17 00:00:00 2001
From: Nitish Naineni <naineninitish@gmail.com>
Date: Mon, 23 Mar 2026 11:14:09 -0500
Subject: [PATCH 7/8] use brace initialization for float test_result

---
 cub/test/thread_reduce/catch2_test_thread_reduce.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cub/test/thread_reduce/catch2_test_thread_reduce.cu b/cub/test/thread_reduce/catch2_test_thread_reduce.cu
index 49119f687e9..a71717af7d2 100644
--- a/cub/test/thread_reduce/catch2_test_thread_reduce.cu
+++ b/cub/test/thread_reduce/catch2_test_thread_reduce.cu
@@ -323,7 +323,7 @@ C2H_TEST("ThreadReduce Narrow PrecisionType Tests",
     auto reference_result =
       std::accumulate(h_in_float.begin(), h_in_float.begin() + num_items, operator_identity, std_reduce_op);
     run_thread_reduce_kernel(num_items, d_in, d_out, reduce_op);
-    float test_result = float{c2h::host_vector<value_t>(d_out)[0]};
+    float test_result{c2h::host_vector<value_t>(d_out)[0]};
     REQUIRE(isclose(reference_result, test_result, 0.05f));
   }
 }

From 6eb6f001d026d1406a860f76ecd57506f98ea4c5 Mon Sep 17 00:00:00 2001
From: Nitish Naineni <naineninitish@gmail.com>
Date: Mon, 23 Mar 2026 11:27:21 -0500
Subject: [PATCH 8/8] revert thrust complex test changes, isclose not
 accessible from thrust build

---
 thrust/testing/catch2_test_complex.cu | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/thrust/testing/catch2_test_complex.cu b/thrust/testing/catch2_test_complex.cu
index b30ebc37287..2efa429af24 100644
--- a/thrust/testing/catch2_test_complex.cu
+++ b/thrust/testing/catch2_test_complex.cu
@@ -8,7 +8,6 @@
 #include <vector>
 
 #include "catch2_test_helper.h"
-#include <c2h/isclose.h>
 #include <unittest/random.h>
 #include <unittest/testframework.h>
 
@@ -43,6 +42,11 @@ struct other_floating_point_type<double>
 template <typename T>
 using other_floating_point_type_t = typename other_floating_point_type<T>::type;
 
+// Helper to compare complex numbers with approximate equality
+// Supports both scalar and thrust::complex<T> types
+double const DEFAULT_RELATIVE_TOL = 1e-4;
+double const DEFAULT_ABSOLUTE_TOL = 1e-4;
+
 template <typename T>
 inline constexpr bool is_complex = false;
 template <typename T>
@@ -50,17 +54,18 @@ inline constexpr bool is_complex<thrust::complex<T>> = true;
 template <typename T>
 inline constexpr bool is_complex<std::complex<T>> = true;
 
+// Overload for complex types
 template <typename T1, typename T2>
 ::cuda::std::enable_if_t<is_complex<T1> && is_complex<T2>> require_almost_equal(const T1& a, const T2& b)
 {
-  CHECK(isclose(static_cast<double>(a.real()), static_cast<double>(b.real())));
-  CHECK(isclose(static_cast<double>(a.imag()), static_cast<double>(b.imag())));
+  CHECK(a.real() == Catch::Approx(b.real()).margin(DEFAULT_ABSOLUTE_TOL).epsilon(DEFAULT_RELATIVE_TOL));
+  CHECK(a.imag() == Catch::Approx(b.imag()).margin(DEFAULT_ABSOLUTE_TOL).epsilon(DEFAULT_RELATIVE_TOL));
 }
 
 template <typename T1, typename T2>
 ::cuda::std::enable_if_t<!is_complex<T1> && !is_complex<T2>> require_almost_equal(const T1& a, const T2& b)
 {
-  CHECK(isclose(static_cast<double>(a), static_cast<double>(b)));
+  CHECK(a == Catch::Approx(b).margin(DEFAULT_ABSOLUTE_TOL).epsilon(DEFAULT_RELATIVE_TOL));
 }
 } // anonymous namespace