diff --git a/c2h/include/c2h/catch2_test_helper.h b/c2h/include/c2h/catch2_test_helper.h
index c0637fd4e6a..d7f149812f8 100644
--- a/c2h/include/c2h/catch2_test_helper.h
+++ b/c2h/include/c2h/catch2_test_helper.h
@@ -22,6 +22,7 @@
 #include <c2h/checked_allocator.cuh>
 #include <c2h/device_policy.h>
 #include <c2h/extended_types.h>
+#include <c2h/isclose.h>
 #include <c2h/test_util_vec.h>
 #include <c2h/utility.h>
 #include <c2h/vector.h>
@@ -214,25 +215,49 @@ std::vector<T> to_vec(std::vector<T> const& vec)
 }
 } // namespace detail
 
-#define REQUIRE_APPROX_EQ(ref, out)                          \
-  {                                                          \
-    auto vec_ref = detail::to_vec(ref);                      \
-    auto vec_out = detail::to_vec(out);                      \
-    REQUIRE_THAT(vec_ref, Catch::Matchers::Approx(vec_out)); \
+#define REQUIRE_APPROX_EQ(ref, out)                                                \
+  {                                                                                \
+    auto vec_ref = detail::to_vec(ref);                                            \
+    auto vec_out = detail::to_vec(out);                                            \
+    for (size_t i = 0; i < vec_ref.size(); i++)                                    \
+    {                                                                              \
+      bool close = isclose(vec_ref[i], vec_out[i]);                                \
+      if (!close)                                                                  \
+      {                                                                            \
+        INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]);         \
+      }                                                                            \
+      REQUIRE(close);                                                              \
+    }                                                                              \
   }
 
-#define REQUIRE_APPROX_EQ_EPSILON(ref, out, eps)                          \
-  {                                                                       \
-    auto vec_ref = detail::to_vec(ref);                                   \
-    auto vec_out = detail::to_vec(out);                                   \
-    REQUIRE_THAT(vec_ref, Catch::Matchers::Approx(vec_out).epsilon(eps)); \
+#define REQUIRE_APPROX_EQ_EPSILON(ref, out, eps)                                   \
+  {                                                                                \
+    auto vec_ref = detail::to_vec(ref);                                            \
+    auto vec_out = detail::to_vec(out);                                            \
+    for (size_t i = 0; i < vec_ref.size(); i++)                                    \
+    {                                                                              \
+      bool close = isclose(vec_ref[i], vec_out[i], eps);                           \
+      if (!close)                                                                  \
+      {                                                                            \
+        INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]);         \
+      }                                                                            \
+      REQUIRE(close);                                                              \
+    }                                                                              \
   }
 
-#define REQUIRE_APPROX_EQ_ABS(ref, out, abs)                             \
-  {                                                                      \
-    auto vec_ref = detail::to_vec(ref);                                  \
-    auto vec_out = detail::to_vec(out);                                  \
-    REQUIRE_THAT(vec_ref, Catch::Matchers::Approx(vec_out).margin(abs)); \
+#define REQUIRE_APPROX_EQ_ABS(ref, out, abs)                                       \
+  {                                                                                \
+    auto vec_ref = detail::to_vec(ref);                                            \
+    auto vec_out = detail::to_vec(out);                                            \
+    for (size_t i = 0; i < vec_ref.size(); i++)                                    \
+    {                                                                              \
+      bool close = isclose(vec_ref[i], vec_out[i], 0 * vec_ref[i], abs);           \
+      if (!close)                                                                  \
+      {                                                                            \
+        INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]);         \
+      }                                                                            \
+      REQUIRE(close);                                                              \
+    }                                                                              \
   }
 
 namespace c2h::detail
diff --git a/c2h/include/c2h/check_results.cuh b/c2h/include/c2h/check_results.cuh
index 6176b8231c5..c04911f2a89 100644
--- a/c2h/include/c2h/check_results.cuh
+++ b/c2h/include/c2h/check_results.cuh
@@ -11,7 +11,7 @@
 
 #include <test_util.h>
 
-#include <catch2/matchers/catch_matchers_floating_point.hpp>
+#include <c2h/isclose.h>
 
 template <typename T>
 void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_vector<T>& test_results)
@@ -42,8 +42,14 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
   {
     for (size_t i = 0; i < test_results.size(); ++i)
     {
-      REQUIRE_THAT(expected_data[i].x, Catch::Matchers::WithinRel(test_results[i].x, 0.01f));
-      REQUIRE_THAT(expected_data[i].y, Catch::Matchers::WithinRel(test_results[i].y, 0.01f));
+      bool close_x = isclose(expected_data[i].x, test_results[i].x, 0.01f);
+      bool close_y = isclose(expected_data[i].y, test_results[i].y, 0.01f);
+      if (!close_x || !close_y)
+      {
+        INFO("index " << i);
+      }
+      REQUIRE(close_x);
+      REQUIRE(close_y);
     }
   }
   else if constexpr (cuda::std::is_same_v<T, __nv_bfloat162> || cuda::std::is_same_v<T, __half2>)
@@ -51,8 +57,16 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
     constexpr auto rel_err = cuda::std::is_same_v<T, __half2> ? 0.08f : 0.2f;
     for (size_t i = 0; i < test_results.size(); ++i)
     {
-      REQUIRE_THAT(expected_data[i].x, Catch::Matchers::WithinRel(test_results[i].x, rel_err));
-      REQUIRE_THAT(expected_data[i].y, Catch::Matchers::WithinRel(test_results[i].y, rel_err));
+      bool close_x =
+        isclose(static_cast<float>(expected_data[i].x), static_cast<float>(test_results[i].x), rel_err);
+      bool close_y =
+        isclose(static_cast<float>(expected_data[i].y), static_cast<float>(test_results[i].y), rel_err);
+      if (!close_x || !close_y)
+      {
+        INFO("index " << i);
+      }
+      REQUIRE(close_x);
+      REQUIRE(close_y);
     }
   }
   else if constexpr (cuda::std::is_same_v<T, cuda::std::complex<__nv_bfloat16>>
@@ -62,11 +76,17 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
     for (size_t i = 0; i < test_results.size(); ++i)
     {
       auto expected_real = static_cast<float>(expected_data[i].real());
-      auto test_real     = test_results[i].real();
+      auto test_real     = static_cast<float>(test_results[i].real());
       auto expected_imag = static_cast<float>(expected_data[i].imag());
-      auto test_imag     = test_results[i].imag();
-      REQUIRE_THAT(expected_real, Catch::Matchers::WithinRel(test_real, rel_err));
-      REQUIRE_THAT(expected_imag, Catch::Matchers::WithinRel(test_imag, rel_err));
+      auto test_imag     = static_cast<float>(test_results[i].imag());
+      bool close_real    = isclose(expected_real, test_real, rel_err);
+      bool close_imag    = isclose(expected_imag, test_imag, rel_err);
+      if (!close_real || !close_imag)
+      {
+        INFO("index " << i);
+      }
+      REQUIRE(close_real);
+      REQUIRE(close_imag);
     }
   }
   else if constexpr (cuda::std::__is_cuda_std_complex_v<T>)
@@ -77,8 +97,14 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
       auto test_real     = test_results[i].real();
       auto expected_imag = expected_data[i].imag();
       auto test_imag     = test_results[i].imag();
-      REQUIRE_THAT(expected_real, Catch::Matchers::WithinRel(test_real));
-      REQUIRE_THAT(expected_imag, Catch::Matchers::WithinRel(test_imag));
+      bool close_real    = isclose(expected_real, test_real);
+      bool close_imag    = isclose(expected_imag, test_imag);
+      if (!close_real || !close_imag)
+      {
+        INFO("index " << i);
+      }
+      REQUIRE(close_real);
+      REQUIRE(close_imag);
     }
   }
   else
diff --git a/c2h/include/c2h/isclose.h b/c2h/include/c2h/isclose.h
new file mode 100644
index 00000000000..feaf56cd414
--- /dev/null
+++ b/c2h/include/c2h/isclose.h
@@ -0,0 +1,47 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#pragma once
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <type_traits>
+
+template <typename T>
+bool isclose(T a, T b, T r_tol, T a_tol)
+{
+  if constexpr (std::is_floating_point_v<T>)
+  {
+    if (a == b)
+    {
+      return true;
+    }
+    return std::abs(a - b) <= std::max(a_tol, r_tol * std::max(std::abs(a), std::abs(b)));
+  }
+  else
+  {
+    static_assert(std::is_integral_v<T>, "isclose: unsupported type, expected floating point or integral");
+    return a == b;
+  }
+}
+
+template <typename T>
+bool isclose(T a, T b, T r_tol)
+{
+  return isclose(a, b, r_tol, T(0));
+}
+
+template <typename T>
+bool isclose(T a, T b)
+{
+  if constexpr (std::is_floating_point_v<T>)
+  {
+    return isclose(a, b, T(1 << 8) * std::numeric_limits<T>::epsilon(), T(0));
+  }
+  else
+  {
+    static_assert(std::is_integral_v<T>, "isclose: unsupported type, expected floating point or integral");
+    return a == b;
+  }
+}
diff --git a/cub/test/catch2_test_device_segmented_scan.cu b/cub/test/catch2_test_device_segmented_scan.cu
index 30c2770456f..2d481eda20e 100644
--- a/cub/test/catch2_test_device_segmented_scan.cu
+++ b/cub/test/catch2_test_device_segmented_scan.cu
@@ -16,6 +16,7 @@
 #include <c2h/catch2_test_helper.h>
 #include <c2h/custom_type.h>
 #include <c2h/extended_types.h>
+#include <c2h/isclose.h>
 
 DECLARE_LAUNCH_WRAPPER(cub::DeviceSegmentedScan::InclusiveSegmentedSum, device_inclusive_segmented_sum);
 DECLARE_LAUNCH_WRAPPER(cub::DeviceSegmentedScan::ExclusiveSegmentedSum, device_exclusive_segmented_sum);
@@ -73,34 +74,20 @@ bool check_segment(const c2h::host_vector<ValueT>& h_output,
   {
     if constexpr (cuda::std::is_floating_point_v<ValueT>)
     {
-      ValueT ref_v  = h_ref[pos];
-      ValueT act_v  = h_output[pos];
-      ValueT diff   = (ref_v - act_v);
-      ValueT adiff  = (diff > ValueT{0}) ? diff : -diff;
-      ValueT ref_av = (ref_v > ValueT{0}) ? ref_v : -ref_v;
-      ValueT act_av = (act_v > ValueT{0}) ? act_v : -act_v;
-
-      ValueT eps = ::cuda::std::numeric_limits<ValueT>::epsilon();
-      correct    = correct && (adiff < 3 * eps + 2 * eps * (::cuda::std::max(ref_av, act_av)));
+      correct = correct && isclose(h_ref[pos], h_output[pos]);
     }
     else if constexpr (cuda::std::is_same_v<ValueT, half_t> || cuda::std::is_same_v<ValueT, bfloat16_t>)
     {
-      float ref_v = h_ref[pos];
-      float act_v = h_output[pos];
+      float ref_v = static_cast<float>(h_ref[pos]);
+      float act_v = static_cast<float>(h_output[pos]);
       if (cuda::std::isfinite(ref_v) && cuda::std::isfinite(act_v))
       {
-        float diff   = (ref_v - act_v);
-        float adiff  = (diff > float{0}) ? diff : -diff;
-        float ref_av = (ref_v > float{0}) ? ref_v : -ref_v;
-        float act_av = (act_v > float{0}) ? act_v : -act_v;
-
-        float eps = float{1} / float{128};
-        correct   = correct && (adiff < 3 * eps + 5 * eps * (::cuda::std::max(ref_av, act_av)));
+        correct = correct && isclose(ref_v, act_v);
       }
     }
     else
     {
-      correct = correct && (h_ref[pos] == h_output[pos]);
+      correct = correct && isclose(h_ref[pos], h_output[pos]);
     }
     if (!correct)
     {
diff --git a/cub/test/test_util.h b/cub/test/test_util.h
index 8ac38867f49..89b3a7379e9 100644
--- a/cub/test/test_util.h
+++ b/cub/test/test_util.h
@@ -38,6 +38,7 @@
 #include "mersenne.h"
 #include <c2h/catch2_test_helper.h>
 #include <c2h/extended_types.h>
+#include <c2h/isclose.h>
 #include <c2h/test_util_vec.h>
 
 /******************************************************************************
@@ -1074,22 +1075,17 @@ int CompareResults(float* computed, float* reference, OffsetT len, bool verbose
 {
   for (OffsetT i = 0; i < len; i++)
   {
-    if (computed[i] != reference[i])
+    if (!isclose(computed[i], reference[i]))
     {
-      float difference = std::abs(computed[i] - reference[i]);
-      float fraction   = difference / std::abs(reference[i]);
-
-      if (fraction > 0.00015)
+      if (verbose)
       {
-        if (verbose)
-        {
-          std::cout
-            << "INCORRECT: [" << i << "]: "
-            << "(computed) " << CoutCast(computed[i]) << " != " << CoutCast(reference[i])
-            << " (difference:" << difference << ", fraction: " << fraction << ")";
-        }
-        return 1;
+        float difference = std::abs(computed[i] - reference[i]);
+        std::cout
+          << "INCORRECT: [" << i << "]: "
+          << "(computed) " << CoutCast(computed[i]) << " != " << CoutCast(reference[i]) << " (difference:" << difference
+          << ")";
       }
+      return 1;
     }
   }
   return 0;
@@ -1113,20 +1109,15 @@ int CompareResults(double* computed, double* reference, OffsetT len, bool verbos
 {
   for (OffsetT i = 0; i < len; i++)
   {
-    if (computed[i] != reference[i])
+    if (!isclose(computed[i], reference[i]))
     {
-      double difference = std::abs(computed[i] - reference[i]);
-      double fraction   = difference / std::abs(reference[i]);
-
-      if (fraction > 0.00015)
+      if (verbose)
       {
-        if (verbose)
-        {
-          std::cout << "INCORRECT: [" << i << "]: " << CoutCast(computed[i]) << " != " << CoutCast(reference[i])
-                    << " (difference:" << difference << ", fraction: " << fraction << ")";
-        }
-        return 1;
+        double difference = std::abs(computed[i] - reference[i]);
+        std::cout << "INCORRECT: [" << i << "]: " << CoutCast(computed[i]) << " != " << CoutCast(reference[i])
+                  << " (difference:" << difference << ")";
       }
+      return 1;
     }
   }
   return 0;
diff --git a/cub/test/thread_reduce/catch2_test_thread_reduce.cu b/cub/test/thread_reduce/catch2_test_thread_reduce.cu
index 4650cb99775..a71717af7d2 100644
--- a/cub/test/thread_reduce/catch2_test_thread_reduce.cu
+++ b/cub/test/thread_reduce/catch2_test_thread_reduce.cu
@@ -20,6 +20,7 @@
 #include "c2h/catch2_test_helper.h"
 #include "c2h/extended_types.h"
 #include "c2h/generators.h"
+#include <c2h/isclose.h>
 #include <catch2/matchers/catch_matchers_floating_point.hpp>
 
 /***********************************************************************************************************************
@@ -170,18 +171,10 @@ using cub_operator_fp_list =
  * Verify results and kernel launch
  **********************************************************************************************************************/
 
-_CCCL_TEMPLATE(typename T)
-_CCCL_REQUIRES((cuda::std::is_floating_point_v<T>) )
-void verify_results(const T& expected_data, const T& test_results)
-{
-  REQUIRE_THAT(expected_data, Catch::Matchers::WithinRel(test_results, T{0.05}));
-}
-
-_CCCL_TEMPLATE(typename T)
-_CCCL_REQUIRES((!cuda::std::is_floating_point_v<T>) )
+template <typename T>
 void verify_results(const T& expected_data, const T& test_results)
 {
-  REQUIRE(expected_data == test_results);
+  REQUIRE(isclose(expected_data, test_results));
 }
 
 template <typename T, typename ReduceOperator>
@@ -330,7 +323,8 @@ C2H_TEST("ThreadReduce Narrow PrecisionType Tests",
     auto reference_result =
       std::accumulate(h_in_float.begin(), h_in_float.begin() + num_items, operator_identity, std_reduce_op);
     run_thread_reduce_kernel(num_items, d_in, d_out, reduce_op);
-    verify_results(reference_result, float{c2h::host_vector<value_t>(d_out)[0]});
+    float test_result{c2h::host_vector<value_t>(d_out)[0]};
+    REQUIRE(isclose(reference_result, test_result, 0.05f));
   }
 }
 
diff --git a/cub/test/thread_reduce/catch2_test_thread_reduce_check_sass.cu b/cub/test/thread_reduce/catch2_test_thread_reduce_check_sass.cu
index d428f21af1a..90c85bbf84d 100644
--- a/cub/test/thread_reduce/catch2_test_thread_reduce_check_sass.cu
+++ b/cub/test/thread_reduce/catch2_test_thread_reduce_check_sass.cu
@@ -23,6 +23,7 @@
 #  include "c2h/catch2_test_helper.h"
 #  include "c2h/extended_types.h"
 #  include "c2h/generators.h"
+#  include <c2h/isclose.h>
 #  include <catch2/matchers/catch_matchers_floating_point.hpp>
 
 /***********************************************************************************************************************
@@ -110,18 +111,10 @@ using cub_operator_fp_list = c2h::type_list<cuda::std::plus<>, cuda::minimum<>>;
  * Verify results and kernel launch
  **********************************************************************************************************************/
 
-_CCCL_TEMPLATE(typename T)
-_CCCL_REQUIRES((cuda::std::is_floating_point<T>::value))
-void verify_results(const T& expected_data, const T& test_results)
-{
-  REQUIRE_THAT(expected_data, Catch::Matchers::WithinRel(test_results, T{0.05}));
-}
-
-_CCCL_TEMPLATE(typename T)
-_CCCL_REQUIRES((!cuda::std::is_floating_point<T>::value))
+template <typename T>
 void verify_results(const T& expected_data, const T& test_results)
 {
-  REQUIRE(expected_data == test_results);
+  REQUIRE(isclose(expected_data, test_results));
 }
 
 template <typename T, typename ReduceOperator>