diff --git a/c2h/include/c2h/catch2_test_helper.h b/c2h/include/c2h/catch2_test_helper.h index c0637fd4e6a..d7f149812f8 100644 --- a/c2h/include/c2h/catch2_test_helper.h +++ b/c2h/include/c2h/catch2_test_helper.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -214,25 +215,49 @@ std::vector to_vec(std::vector const& vec) } } // namespace detail -#define REQUIRE_APPROX_EQ(ref, out) \ - { \ - auto vec_ref = detail::to_vec(ref); \ - auto vec_out = detail::to_vec(out); \ - REQUIRE_THAT(vec_ref, Catch::Matchers::Approx(vec_out)); \ +#define REQUIRE_APPROX_EQ(ref, out) \ + { \ + auto vec_ref = detail::to_vec(ref); \ + auto vec_out = detail::to_vec(out); \ + for (size_t i = 0; i < vec_ref.size(); i++) \ + { \ + bool close = isclose(vec_ref[i], vec_out[i]); \ + if (!close) \ + { \ + INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]); \ + } \ + REQUIRE(close); \ + } \ } -#define REQUIRE_APPROX_EQ_EPSILON(ref, out, eps) \ - { \ - auto vec_ref = detail::to_vec(ref); \ - auto vec_out = detail::to_vec(out); \ - REQUIRE_THAT(vec_ref, Catch::Matchers::Approx(vec_out).epsilon(eps)); \ +#define REQUIRE_APPROX_EQ_EPSILON(ref, out, eps) \ + { \ + auto vec_ref = detail::to_vec(ref); \ + auto vec_out = detail::to_vec(out); \ + for (size_t i = 0; i < vec_ref.size(); i++) \ + { \ + bool close = isclose(vec_ref[i], vec_out[i], eps); \ + if (!close) \ + { \ + INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]); \ + } \ + REQUIRE(close); \ + } \ } -#define REQUIRE_APPROX_EQ_ABS(ref, out, abs) \ - { \ - auto vec_ref = detail::to_vec(ref); \ - auto vec_out = detail::to_vec(out); \ - REQUIRE_THAT(vec_ref, Catch::Matchers::Approx(vec_out).margin(abs)); \ +#define REQUIRE_APPROX_EQ_ABS(ref, out, abs) \ + { \ + auto vec_ref = detail::to_vec(ref); \ + auto vec_out = detail::to_vec(out); \ + for (size_t i = 0; i < vec_ref.size(); i++) \ + { \ + bool close = isclose(vec_ref[i], vec_out[i], 0 * vec_ref[i], abs); \ + if (!close) \ + { \ + INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]); \ + } \ + REQUIRE(close); \ + } \ } namespace c2h::detail diff --git a/c2h/include/c2h/check_results.cuh b/c2h/include/c2h/check_results.cuh index 6176b8231c5..c04911f2a89 100644 --- a/c2h/include/c2h/check_results.cuh +++ b/c2h/include/c2h/check_results.cuh @@ -11,7 +11,7 @@ #include -#include +#include template void verify_results(const c2h::host_vector& expected_data, const c2h::host_vector& test_results) @@ -42,8 +42,14 @@ void verify_results(const c2h::host_vector& expected_data, const c2h::host_ve { for (size_t i = 0; i < test_results.size(); ++i) { - REQUIRE_THAT(expected_data[i].x, Catch::Matchers::WithinRel(test_results[i].x, 0.01f)); - REQUIRE_THAT(expected_data[i].y, Catch::Matchers::WithinRel(test_results[i].y, 0.01f)); + bool close_x = isclose(expected_data[i].x, test_results[i].x, 0.01f); + bool close_y = isclose(expected_data[i].y, test_results[i].y, 0.01f); + if (!close_x || !close_y) + { + INFO("index " << i); + } + REQUIRE(close_x); + REQUIRE(close_y); } } else if constexpr (cuda::std::is_same_v || cuda::std::is_same_v) @@ -51,8 +57,16 @@ void verify_results(const c2h::host_vector& expected_data, const c2h::host_ve constexpr auto rel_err = cuda::std::is_same_v ? 0.08f : 0.2f; for (size_t i = 0; i < test_results.size(); ++i) { - REQUIRE_THAT(expected_data[i].x, Catch::Matchers::WithinRel(test_results[i].x, rel_err)); - REQUIRE_THAT(expected_data[i].y, Catch::Matchers::WithinRel(test_results[i].y, rel_err)); + bool close_x = + isclose(static_cast(expected_data[i].x), static_cast(test_results[i].x), rel_err); + bool close_y = + isclose(static_cast(expected_data[i].y), static_cast(test_results[i].y), rel_err); + if (!close_x || !close_y) + { + INFO("index " << i); + } + REQUIRE(close_x); + REQUIRE(close_y); } } else if constexpr (cuda::std::is_same_v> @@ -62,11 +76,17 @@ void verify_results(const c2h::host_vector& expected_data, const c2h::host_ve for (size_t i = 0; i < test_results.size(); ++i) { auto expected_real = static_cast(expected_data[i].real()); - auto test_real = test_results[i].real(); + auto test_real = static_cast(test_results[i].real()); auto expected_imag = static_cast(expected_data[i].imag()); - auto test_imag = test_results[i].imag(); - REQUIRE_THAT(expected_real, Catch::Matchers::WithinRel(test_real, rel_err)); - REQUIRE_THAT(expected_imag, Catch::Matchers::WithinRel(test_imag, rel_err)); + auto test_imag = static_cast(test_results[i].imag()); + bool close_real = isclose(expected_real, test_real, rel_err); + bool close_imag = isclose(expected_imag, test_imag, rel_err); + if (!close_real || !close_imag) + { + INFO("index " << i); + } + REQUIRE(close_real); + REQUIRE(close_imag); } } else if constexpr (cuda::std::__is_cuda_std_complex_v) @@ -77,8 +97,14 @@ void verify_results(const c2h::host_vector& expected_data, const c2h::host_ve auto test_real = test_results[i].real(); auto expected_imag = expected_data[i].imag(); auto test_imag = test_results[i].imag(); - REQUIRE_THAT(expected_real, Catch::Matchers::WithinRel(test_real)); - REQUIRE_THAT(expected_imag, Catch::Matchers::WithinRel(test_imag)); + bool close_real = isclose(expected_real, test_real); + bool close_imag = isclose(expected_imag, test_imag); + if (!close_real || !close_imag) + { + INFO("index " << i); + } + REQUIRE(close_real); + REQUIRE(close_imag); } } else diff --git a/c2h/include/c2h/isclose.h b/c2h/include/c2h/isclose.h new file mode 100644 index 00000000000..feaf56cd414 --- /dev/null +++ b/c2h/include/c2h/isclose.h @@ -0,0 +1,47 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#pragma once + +#include +#include +#include +#include + +template +bool isclose(T a, T b, T r_tol, T a_tol) +{ + if constexpr (std::is_floating_point_v) + { + if (a == b) + { + return true; + } + return std::abs(a - b) <= std::max(a_tol, r_tol * std::max(std::abs(a), std::abs(b))); + } + else + { + static_assert(std::is_integral_v, "isclose: unsupported type, expected floating point or integral"); + return a == b; + } +} + +template +bool isclose(T a, T b, T r_tol) +{ + return isclose(a, b, r_tol, T(0)); +} + +template +bool isclose(T a, T b) +{ + if constexpr (std::is_floating_point_v) + { + return isclose(a, b, T(1 << 8) * std::numeric_limits::epsilon(), T(0)); + } + else + { + static_assert(std::is_integral_v, "isclose: unsupported type, expected floating point or integral"); + return a == b; + } +} diff --git a/cub/test/catch2_test_device_segmented_scan.cu b/cub/test/catch2_test_device_segmented_scan.cu index 30c2770456f..2d481eda20e 100644 --- a/cub/test/catch2_test_device_segmented_scan.cu +++ b/cub/test/catch2_test_device_segmented_scan.cu @@ -16,6 +16,7 @@ #include #include #include +#include DECLARE_LAUNCH_WRAPPER(cub::DeviceSegmentedScan::InclusiveSegmentedSum, device_inclusive_segmented_sum); DECLARE_LAUNCH_WRAPPER(cub::DeviceSegmentedScan::ExclusiveSegmentedSum, device_exclusive_segmented_sum); @@ -73,34 +74,20 @@ bool check_segment(const c2h::host_vector& h_output, { if constexpr (cuda::std::is_floating_point_v) { - ValueT ref_v = h_ref[pos]; - ValueT act_v = h_output[pos]; - ValueT diff = (ref_v - act_v); - ValueT adiff = (diff > ValueT{0}) ? diff : -diff; - ValueT ref_av = (ref_v > ValueT{0}) ? ref_v : -ref_v; - ValueT act_av = (act_v > ValueT{0}) ? act_v : -act_v; - - ValueT eps = ::cuda::std::numeric_limits::epsilon(); - correct = correct && (adiff < 3 * eps + 2 * eps * (::cuda::std::max(ref_av, act_av))); + correct = correct && isclose(h_ref[pos], h_output[pos]); } else if constexpr (cuda::std::is_same_v || cuda::std::is_same_v) { - float ref_v = h_ref[pos]; - float act_v = h_output[pos]; + float ref_v = static_cast(h_ref[pos]); + float act_v = static_cast(h_output[pos]); if (cuda::std::isfinite(ref_v) && cuda::std::isfinite(act_v)) { - float diff = (ref_v - act_v); - float adiff = (diff > float{0}) ? diff : -diff; - float ref_av = (ref_v > float{0}) ? ref_v : -ref_v; - float act_av = (act_v > float{0}) ? act_v : -act_v; - - float eps = float{1} / float{128}; - correct = correct && (adiff < 3 * eps + 5 * eps * (::cuda::std::max(ref_av, act_av))); + correct = correct && isclose(ref_v, act_v); } } else { - correct = correct && (h_ref[pos] == h_output[pos]); + correct = correct && isclose(h_ref[pos], h_output[pos]); } if (!correct) { diff --git a/cub/test/test_util.h b/cub/test/test_util.h index 8ac38867f49..89b3a7379e9 100644 --- a/cub/test/test_util.h +++ b/cub/test/test_util.h @@ -38,6 +38,7 @@ #include "mersenne.h" #include #include +#include #include /****************************************************************************** @@ -1074,22 +1075,17 @@ int CompareResults(float* computed, float* reference, OffsetT len, bool verbose { for (OffsetT i = 0; i < len; i++) { - if (computed[i] != reference[i]) + if (!isclose(computed[i], reference[i])) { - float difference = std::abs(computed[i] - reference[i]); - float fraction = difference / std::abs(reference[i]); - - if (fraction > 0.00015) + if (verbose) { - if (verbose) - { - std::cout - << "INCORRECT: [" << i << "]: " - << "(computed) " << CoutCast(computed[i]) << " != " << CoutCast(reference[i]) - << " (difference:" << difference << ", fraction: " << fraction << ")"; - } - return 1; + float difference = std::abs(computed[i] - reference[i]); + std::cout + << "INCORRECT: [" << i << "]: " + << "(computed) " << CoutCast(computed[i]) << " != " << CoutCast(reference[i]) << " (difference:" << difference + << ")"; } + return 1; } } return 0; @@ -1113,20 +1109,15 @@ int CompareResults(double* computed, double* reference, OffsetT len, bool verbos { for (OffsetT i = 0; i < len; i++) { - if (computed[i] != reference[i]) + if (!isclose(computed[i], reference[i])) { - double difference = std::abs(computed[i] - reference[i]); - double fraction = difference / std::abs(reference[i]); - - if (fraction > 0.00015) + if (verbose) { - if (verbose) - { - std::cout << "INCORRECT: [" << i << "]: " << CoutCast(computed[i]) << " != " << CoutCast(reference[i]) - << " (difference:" << difference << ", fraction: " << fraction << ")"; - } - return 1; + double difference = std::abs(computed[i] - reference[i]); + std::cout << "INCORRECT: [" << i << "]: " << CoutCast(computed[i]) << " != " << CoutCast(reference[i]) + << " (difference:" << difference << ")"; } + return 1; } } return 0; diff --git a/cub/test/thread_reduce/catch2_test_thread_reduce.cu b/cub/test/thread_reduce/catch2_test_thread_reduce.cu index 4650cb99775..a71717af7d2 100644 --- a/cub/test/thread_reduce/catch2_test_thread_reduce.cu +++ b/cub/test/thread_reduce/catch2_test_thread_reduce.cu @@ -20,6 +20,7 @@ #include "c2h/catch2_test_helper.h" #include "c2h/extended_types.h" #include "c2h/generators.h" +#include #include /*********************************************************************************************************************** @@ -170,18 +171,10 @@ using cub_operator_fp_list = * Verify results and kernel launch **********************************************************************************************************************/ -_CCCL_TEMPLATE(typename T) -_CCCL_REQUIRES((cuda::std::is_floating_point_v) ) -void verify_results(const T& expected_data, const T& test_results) -{ - REQUIRE_THAT(expected_data, Catch::Matchers::WithinRel(test_results, T{0.05})); -} - -_CCCL_TEMPLATE(typename T) -_CCCL_REQUIRES((!cuda::std::is_floating_point_v) ) +template void verify_results(const T& expected_data, const T& test_results) { - REQUIRE(expected_data == test_results); + REQUIRE(isclose(expected_data, test_results)); } template @@ -330,7 +323,8 @@ C2H_TEST("ThreadReduce Narrow PrecisionType Tests", auto reference_result = std::accumulate(h_in_float.begin(), h_in_float.begin() + num_items, operator_identity, std_reduce_op); run_thread_reduce_kernel(num_items, d_in, d_out, reduce_op); - verify_results(reference_result, float{c2h::host_vector(d_out)[0]}); + float test_result{c2h::host_vector(d_out)[0]}; + REQUIRE(isclose(reference_result, test_result, 0.05f)); } } diff --git a/cub/test/thread_reduce/catch2_test_thread_reduce_check_sass.cu b/cub/test/thread_reduce/catch2_test_thread_reduce_check_sass.cu index d428f21af1a..90c85bbf84d 100644 --- a/cub/test/thread_reduce/catch2_test_thread_reduce_check_sass.cu +++ b/cub/test/thread_reduce/catch2_test_thread_reduce_check_sass.cu @@ -23,6 +23,7 @@ # include "c2h/catch2_test_helper.h" # include "c2h/extended_types.h" # include "c2h/generators.h" +# include # include /*********************************************************************************************************************** @@ -110,18 +111,10 @@ using cub_operator_fp_list = c2h::type_list, cuda::minimum<>>; * Verify results and kernel launch **********************************************************************************************************************/ -_CCCL_TEMPLATE(typename T) -_CCCL_REQUIRES((cuda::std::is_floating_point::value)) -void verify_results(const T& expected_data, const T& test_results) -{ - REQUIRE_THAT(expected_data, Catch::Matchers::WithinRel(test_results, T{0.05})); -} - -_CCCL_TEMPLATE(typename T) -_CCCL_REQUIRES((!cuda::std::is_floating_point::value)) +template void verify_results(const T& expected_data, const T& test_results) { - REQUIRE(expected_data == test_results); + REQUIRE(isclose(expected_data, test_results)); } template