NVIDIA · NitishNaineni · Mar 22, 2026 · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026
@@ -22,6 +22,7 @@
 #include <c2h/checked_allocator.cuh>
 #include <c2h/device_policy.h>
 #include <c2h/extended_types.h>
+#include <c2h/isclose.h>
 #include <c2h/test_util_vec.h>
 #include <c2h/utility.h>
 #include <c2h/vector.h>
@@ -214,25 +215,37 @@ std::vector<T> to_vec(std::vector<T> const& vec)
 }
 } // namespace detail
 
-#define REQUIRE_APPROX_EQ(ref, out)                          \
-  {                                                          \
-    auto vec_ref = detail::to_vec(ref);                      \
-    auto vec_out = detail::to_vec(out);                      \
-    REQUIRE_THAT(vec_ref, Catch::Matchers::Approx(vec_out)); \
+#define REQUIRE_APPROX_EQ(ref, out)                                      \
+  {                                                                      \
+    auto vec_ref = detail::to_vec(ref);                                  \
+    auto vec_out = detail::to_vec(out);                                  \
+    for (size_t i = 0; i < vec_ref.size(); i++)                          \
+    {                                                                    \
+      INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]); \
+      REQUIRE(isclose(vec_ref[i], vec_out[i]));                          \
+    }                                                                    \
   }
 
-#define REQUIRE_APPROX_EQ_EPSILON(ref, out, eps)                          \
-  {                                                                       \
-    auto vec_ref = detail::to_vec(ref);                                   \
-    auto vec_out = detail::to_vec(out);                                   \
-    REQUIRE_THAT(vec_ref, Catch::Matchers::Approx(vec_out).epsilon(eps)); \
+#define REQUIRE_APPROX_EQ_EPSILON(ref, out, eps)                         \
+  {                                                                      \
+    auto vec_ref = detail::to_vec(ref);                                  \
+    auto vec_out = detail::to_vec(out);                                  \
+    for (size_t i = 0; i < vec_ref.size(); i++)                          \
+    {                                                                    \
+      INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]); \
+      REQUIRE(isclose(vec_ref[i], vec_out[i], eps));                     \
+    }                                                                    \
   }
 
 #define REQUIRE_APPROX_EQ_ABS(ref, out, abs)                             \
   {                                                                      \
     auto vec_ref = detail::to_vec(ref);                                  \
     auto vec_out = detail::to_vec(out);                                  \
-    REQUIRE_THAT(vec_ref, Catch::Matchers::Approx(vec_out).margin(abs)); \
+    for (size_t i = 0; i < vec_ref.size(); i++)                          \
+    {                                                                    \
+      INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]); \
+      REQUIRE(isclose(vec_ref[i], vec_out[i], 0 * vec_ref[i], abs));     \
+    }                                                                    \
   }
 
 namespace c2h::detail

@@ -11,7 +11,7 @@
 
 #include <test_util.h>
 
-#include <catch2/matchers/catch_matchers_floating_point.hpp>
+#include <c2h/isclose.h>
 
 template <typename T>
 void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_vector<T>& test_results)
@@ -42,17 +42,19 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
   {
     for (size_t i = 0; i < test_results.size(); ++i)
     {
-      REQUIRE_THAT(expected_data[i].x, Catch::Matchers::WithinRel(test_results[i].x, 0.01f));
-      REQUIRE_THAT(expected_data[i].y, Catch::Matchers::WithinRel(test_results[i].y, 0.01f));
+      INFO("index " << i);
+      REQUIRE(isclose(expected_data[i].x, test_results[i].x, 0.01f));
+      REQUIRE(isclose(expected_data[i].y, test_results[i].y, 0.01f));
     }
   }
   else if constexpr (cuda::std::is_same_v<T, __nv_bfloat162> || cuda::std::is_same_v<T, __half2>)
   {
     constexpr auto rel_err = cuda::std::is_same_v<T, __half2> ? 0.08f : 0.2f;
     for (size_t i = 0; i < test_results.size(); ++i)
     {
-      REQUIRE_THAT(expected_data[i].x, Catch::Matchers::WithinRel(test_results[i].x, rel_err));
-      REQUIRE_THAT(expected_data[i].y, Catch::Matchers::WithinRel(test_results[i].y, rel_err));
+      INFO("index " << i);
+      REQUIRE(isclose(static_cast<float>(expected_data[i].x), static_cast<float>(test_results[i].x), rel_err));
+      REQUIRE(isclose(static_cast<float>(expected_data[i].y), static_cast<float>(test_results[i].y), rel_err));
     }
   }
   else if constexpr (cuda::std::is_same_v<T, cuda::std::complex<__nv_bfloat16>>
@@ -62,11 +64,12 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
     for (size_t i = 0; i < test_results.size(); ++i)
     {
       auto expected_real = static_cast<float>(expected_data[i].real());
-      auto test_real     = test_results[i].real();
+      auto test_real     = static_cast<float>(test_results[i].real());
       auto expected_imag = static_cast<float>(expected_data[i].imag());
-      auto test_imag     = test_results[i].imag();
-      REQUIRE_THAT(expected_real, Catch::Matchers::WithinRel(test_real, rel_err));
-      REQUIRE_THAT(expected_imag, Catch::Matchers::WithinRel(test_imag, rel_err));
+      auto test_imag     = static_cast<float>(test_results[i].imag());
+      INFO("index " << i);
+      REQUIRE(isclose(expected_real, test_real, rel_err));
+      REQUIRE(isclose(expected_imag, test_imag, rel_err));
     }
   }
   else if constexpr (cuda::std::__is_cuda_std_complex_v<T>)
@@ -77,8 +80,9 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
       auto test_real     = test_results[i].real();
       auto expected_imag = expected_data[i].imag();
       auto test_imag     = test_results[i].imag();
-      REQUIRE_THAT(expected_real, Catch::Matchers::WithinRel(test_real));
-      REQUIRE_THAT(expected_imag, Catch::Matchers::WithinRel(test_imag));
+      INFO("index " << i);
+      REQUIRE(isclose(expected_real, test_real));
+      REQUIRE(isclose(expected_imag, test_imag));
     }
   }
   else

@@ -0,0 +1,45 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+// SPDX-License-Identifier: BSD-3
-// SPDX-License-Identifier: BSD-3
+// SPDX-License-Identifier: BSD-3-Clause
-// SPDX-License-Identifier: BSD-3
+// SPDX-License-Identifier: BSD-3-Clause
+
+#pragma once
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <type_traits>
+
+template <typename T>
+bool isclose(T a, T b, T r_tol, T a_tol)
+{
+  if constexpr (std::is_floating_point_v<T>)
+  {
+    if (a == b)
+    {
+      return true;
+    }
+    return std::abs(a - b) <= std::max(a_tol, r_tol * std::max(std::abs(a), std::abs(b)));
+  }
+  else
+  {
+    return a == b;
+  }
+}
+
+template <typename T>
+bool isclose(T a, T b, T r_tol)
+{
+  return isclose(a, b, r_tol, T(0));
+}
+
+template <typename T>
+bool isclose(T a, T b)
+{
+  if constexpr (std::is_floating_point_v<T>)
+  {
+    return isclose(a, b, T(1000) * std::numeric_limits<T>::epsilon(), T(0));
+  }
+  else
+  {
+    return a == b;
+  }
+}
@@ -16,6 +16,7 @@
 #include <c2h/catch2_test_helper.h>
 #include <c2h/custom_type.h>
 #include <c2h/extended_types.h>
+#include <c2h/isclose.h>
 
 DECLARE_LAUNCH_WRAPPER(cub::DeviceSegmentedScan::InclusiveSegmentedSum, device_inclusive_segmented_sum);
 DECLARE_LAUNCH_WRAPPER(cub::DeviceSegmentedScan::ExclusiveSegmentedSum, device_exclusive_segmented_sum);
@@ -73,34 +74,20 @@ bool check_segment(const c2h::host_vector<ValueT>& h_output,
   {
     if constexpr (cuda::std::is_floating_point_v<ValueT>)
     {
-      ValueT ref_v  = h_ref[pos];
-      ValueT act_v  = h_output[pos];
-      ValueT diff   = (ref_v - act_v);
-      ValueT adiff  = (diff > ValueT{0}) ? diff : -diff;
-      ValueT ref_av = (ref_v > ValueT{0}) ? ref_v : -ref_v;
-      ValueT act_av = (act_v > ValueT{0}) ? act_v : -act_v;
-
-      ValueT eps = ::cuda::std::numeric_limits<ValueT>::epsilon();
-      correct    = correct && (adiff < 3 * eps + 2 * eps * (::cuda::std::max(ref_av, act_av)));
+      correct = correct && isclose(h_ref[pos], h_output[pos]);
     }
     else if constexpr (cuda::std::is_same_v<ValueT, half_t> || cuda::std::is_same_v<ValueT, bfloat16_t>)
     {
-      float ref_v = h_ref[pos];
-      float act_v = h_output[pos];
+      float ref_v = static_cast<float>(h_ref[pos]);
+      float act_v = static_cast<float>(h_output[pos]);
       if (cuda::std::isfinite(ref_v) && cuda::std::isfinite(act_v))
       {
-        float diff   = (ref_v - act_v);
-        float adiff  = (diff > float{0}) ? diff : -diff;
-        float ref_av = (ref_v > float{0}) ? ref_v : -ref_v;
-        float act_av = (act_v > float{0}) ? act_v : -act_v;
-
-        float eps = float{1} / float{128};
-        correct   = correct && (adiff < 3 * eps + 5 * eps * (::cuda::std::max(ref_av, act_av)));
+        correct = correct && isclose(ref_v, act_v);
       }
     }
     else
     {
-      correct = correct && (h_ref[pos] == h_output[pos]);
+      correct = correct && isclose(h_ref[pos], h_output[pos]);
     }
     if (!correct)
     {

@@ -38,6 +38,7 @@
 #include "mersenne.h"
 #include <c2h/catch2_test_helper.h>
 #include <c2h/extended_types.h>
+#include <c2h/isclose.h>
 #include <c2h/test_util_vec.h>
 
 /******************************************************************************
@@ -1074,22 +1075,17 @@ int CompareResults(float* computed, float* reference, OffsetT len, bool verbose
 {
   for (OffsetT i = 0; i < len; i++)
   {
-    if (computed[i] != reference[i])
+    if (!isclose(computed[i], reference[i]))
     {
-      float difference = std::abs(computed[i] - reference[i]);
-      float fraction   = difference / std::abs(reference[i]);
-
-      if (fraction > 0.00015)
+      if (verbose)
       {
-        if (verbose)
-        {
-          std::cout
-            << "INCORRECT: [" << i << "]: "
-            << "(computed) " << CoutCast(computed[i]) << " != " << CoutCast(reference[i])
-            << " (difference:" << difference << ", fraction: " << fraction << ")";
-        }
-        return 1;
+        float difference = std::abs(computed[i] - reference[i]);
+        std::cout
+          << "INCORRECT: [" << i << "]: "
+          << "(computed) " << CoutCast(computed[i]) << " != " << CoutCast(reference[i]) << " (difference:" << difference
+          << ")";
       }
+      return 1;
     }
   }
   return 0;
@@ -1113,20 +1109,15 @@ int CompareResults(double* computed, double* reference, OffsetT len, bool verbos
 {
   for (OffsetT i = 0; i < len; i++)
   {
-    if (computed[i] != reference[i])
+    if (!isclose(computed[i], reference[i]))
     {
-      double difference = std::abs(computed[i] - reference[i]);
-      double fraction   = difference / std::abs(reference[i]);
-
-      if (fraction > 0.00015)
+      if (verbose)
       {
-        if (verbose)
-        {
-          std::cout << "INCORRECT: [" << i << "]: " << CoutCast(computed[i]) << " != " << CoutCast(reference[i])
-                    << " (difference:" << difference << ", fraction: " << fraction << ")";
-        }
-        return 1;
+        double difference = std::abs(computed[i] - reference[i]);
+        std::cout << "INCORRECT: [" << i << "]: " << CoutCast(computed[i]) << " != " << CoutCast(reference[i])
+                  << " (difference:" << difference << ")";
       }
+      return 1;
     }
   }
   return 0;

@@ -20,6 +20,7 @@
 #include "c2h/catch2_test_helper.h"
 #include "c2h/extended_types.h"
 #include "c2h/generators.h"
+#include <c2h/isclose.h>
 #include <catch2/matchers/catch_matchers_floating_point.hpp>
 
 /***********************************************************************************************************************
@@ -170,18 +171,10 @@ using cub_operator_fp_list =
  * Verify results and kernel launch
  **********************************************************************************************************************/
 
-_CCCL_TEMPLATE(typename T)
-_CCCL_REQUIRES((cuda::std::is_floating_point_v<T>) )
-void verify_results(const T& expected_data, const T& test_results)
-{
-  REQUIRE_THAT(expected_data, Catch::Matchers::WithinRel(test_results, T{0.05}));
-}
-
-_CCCL_TEMPLATE(typename T)
-_CCCL_REQUIRES((!cuda::std::is_floating_point_v<T>) )
+template <typename T>
 void verify_results(const T& expected_data, const T& test_results)
 {
-  REQUIRE(expected_data == test_results);
+  REQUIRE(isclose(expected_data, test_results));
 }
 
 template <typename T, typename ReduceOperator>
@@ -330,7 +323,8 @@ C2H_TEST("ThreadReduce Narrow PrecisionType Tests",
     auto reference_result =
       std::accumulate(h_in_float.begin(), h_in_float.begin() + num_items, operator_identity, std_reduce_op);
     run_thread_reduce_kernel(num_items, d_in, d_out, reduce_op);
-    verify_results(reference_result, float{c2h::host_vector<value_t>(d_out)[0]});
+    float test_result = float{c2h::host_vector<value_t>(d_out)[0]};
-    float test_result = float{c2h::host_vector<value_t>(d_out)[0]};
+    float test_result{c2h::host_vector<value_t>(d_out)[0]};
-    float test_result = float{c2h::host_vector<value_t>(d_out)[0]};
+    float test_result{c2h::host_vector<value_t>(d_out)[0]};
+    REQUIRE(isclose(reference_result, test_result, 0.05f));
   }
 }
 

@@ -23,6 +23,7 @@
 #  include "c2h/catch2_test_helper.h"
 #  include "c2h/extended_types.h"
 #  include "c2h/generators.h"
+#  include <c2h/isclose.h>
 #  include <catch2/matchers/catch_matchers_floating_point.hpp>
 
 /***********************************************************************************************************************
@@ -110,18 +111,10 @@ using cub_operator_fp_list = c2h::type_list<cuda::std::plus<>, cuda::minimum<>>;
  * Verify results and kernel launch
  **********************************************************************************************************************/
 
-_CCCL_TEMPLATE(typename T)
-_CCCL_REQUIRES((cuda::std::is_floating_point<T>::value))
-void verify_results(const T& expected_data, const T& test_results)
-{
-  REQUIRE_THAT(expected_data, Catch::Matchers::WithinRel(test_results, T{0.05}));
-}
-
-_CCCL_TEMPLATE(typename T)
-_CCCL_REQUIRES((!cuda::std::is_floating_point<T>::value))
+template <typename T>
 void verify_results(const T& expected_data, const T& test_results)
 {
-  REQUIRE(expected_data == test_results);
+  REQUIRE(isclose(expected_data, test_results));
 }
 
 template <typename T, typename ReduceOperator>

diff --git a/thrust/testing/catch2_test_complex.cu b/thrust/testing/catch2_test_complex.cu
@@ -8,6 +8,7 @@
 #include <vector>
 
 #include "catch2_test_helper.h"
+#include <c2h/isclose.h>
 #include <unittest/random.h>
 #include <unittest/testframework.h>
 
@@ -42,30 +43,24 @@ struct other_floating_point_type<double>
 template <typename T>
 using other_floating_point_type_t = typename other_floating_point_type<T>::type;
 
-// Helper to compare complex numbers with approximate equality
-// Supports both scalar and thrust::complex<T> types
-double const DEFAULT_RELATIVE_TOL = 1e-4;
-double const DEFAULT_ABSOLUTE_TOL = 1e-4;
-
 template <typename T>
 inline constexpr bool is_complex = false;
 template <typename T>
 inline constexpr bool is_complex<thrust::complex<T>> = true;
 template <typename T>
 inline constexpr bool is_complex<std::complex<T>> = true;
 
-// Overload for complex types
 template <typename T1, typename T2>
 ::cuda::std::enable_if_t<is_complex<T1> && is_complex<T2>> require_almost_equal(const T1& a, const T2& b)
 {
-  CHECK(a.real() == Catch::Approx(b.real()).margin(DEFAULT_ABSOLUTE_TOL).epsilon(DEFAULT_RELATIVE_TOL));
-  CHECK(a.imag() == Catch::Approx(b.imag()).margin(DEFAULT_ABSOLUTE_TOL).epsilon(DEFAULT_RELATIVE_TOL));
+  CHECK(isclose(static_cast<double>(a.real()), static_cast<double>(b.real())));
+  CHECK(isclose(static_cast<double>(a.imag()), static_cast<double>(b.imag())));
 }
 
 template <typename T1, typename T2>
 ::cuda::std::enable_if_t<!is_complex<T1> && !is_complex<T2>> require_almost_equal(const T1& a, const T2& b)
 {
-  CHECK(a == Catch::Approx(b).margin(DEFAULT_ABSOLUTE_TOL).epsilon(DEFAULT_RELATIVE_TOL));
+  CHECK(isclose(static_cast<double>(a), static_cast<double>(b)));
 }
 } // anonymous namespace