Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 24 additions & 11 deletions c2h/include/c2h/catch2_test_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <c2h/checked_allocator.cuh>
#include <c2h/device_policy.h>
#include <c2h/extended_types.h>
#include <c2h/isclose.h>
#include <c2h/test_util_vec.h>
#include <c2h/utility.h>
#include <c2h/vector.h>
Expand Down Expand Up @@ -214,25 +215,37 @@ std::vector<T> to_vec(std::vector<T> const& vec)
}
} // namespace detail

#define REQUIRE_APPROX_EQ(ref, out) \
{ \
auto vec_ref = detail::to_vec(ref); \
auto vec_out = detail::to_vec(out); \
REQUIRE_THAT(vec_ref, Catch::Matchers::Approx(vec_out)); \
#define REQUIRE_APPROX_EQ(ref, out) \
{ \
auto vec_ref = detail::to_vec(ref); \
auto vec_out = detail::to_vec(out); \
for (size_t i = 0; i < vec_ref.size(); i++) \
{ \
INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]); \
REQUIRE(isclose(vec_ref[i], vec_out[i])); \
} \
}

#define REQUIRE_APPROX_EQ_EPSILON(ref, out, eps) \
{ \
auto vec_ref = detail::to_vec(ref); \
auto vec_out = detail::to_vec(out); \
REQUIRE_THAT(vec_ref, Catch::Matchers::Approx(vec_out).epsilon(eps)); \
#define REQUIRE_APPROX_EQ_EPSILON(ref, out, eps) \
{ \
auto vec_ref = detail::to_vec(ref); \
auto vec_out = detail::to_vec(out); \
for (size_t i = 0; i < vec_ref.size(); i++) \
{ \
INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]); \
REQUIRE(isclose(vec_ref[i], vec_out[i], eps)); \
} \
}

#define REQUIRE_APPROX_EQ_ABS(ref, out, abs) \
{ \
auto vec_ref = detail::to_vec(ref); \
auto vec_out = detail::to_vec(out); \
REQUIRE_THAT(vec_ref, Catch::Matchers::Approx(vec_out).margin(abs)); \
for (size_t i = 0; i < vec_ref.size(); i++) \
{ \
INFO("index " << i << ": " << vec_ref[i] << " vs " << vec_out[i]); \
REQUIRE(isclose(vec_ref[i], vec_out[i], 0 * vec_ref[i], abs)); \
} \
}

namespace c2h::detail
Expand Down
26 changes: 15 additions & 11 deletions c2h/include/c2h/check_results.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

#include <test_util.h>

#include <catch2/matchers/catch_matchers_floating_point.hpp>
#include <c2h/isclose.h>

template <typename T>
void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_vector<T>& test_results)
Expand Down Expand Up @@ -42,17 +42,19 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
{
for (size_t i = 0; i < test_results.size(); ++i)
{
REQUIRE_THAT(expected_data[i].x, Catch::Matchers::WithinRel(test_results[i].x, 0.01f));
REQUIRE_THAT(expected_data[i].y, Catch::Matchers::WithinRel(test_results[i].y, 0.01f));
INFO("index " << i);
REQUIRE(isclose(expected_data[i].x, test_results[i].x, 0.01f));
REQUIRE(isclose(expected_data[i].y, test_results[i].y, 0.01f));
}
}
else if constexpr (cuda::std::is_same_v<T, __nv_bfloat162> || cuda::std::is_same_v<T, __half2>)
{
constexpr auto rel_err = cuda::std::is_same_v<T, __half2> ? 0.08f : 0.2f;
for (size_t i = 0; i < test_results.size(); ++i)
{
REQUIRE_THAT(expected_data[i].x, Catch::Matchers::WithinRel(test_results[i].x, rel_err));
REQUIRE_THAT(expected_data[i].y, Catch::Matchers::WithinRel(test_results[i].y, rel_err));
INFO("index " << i);
REQUIRE(isclose(static_cast<float>(expected_data[i].x), static_cast<float>(test_results[i].x), rel_err));
REQUIRE(isclose(static_cast<float>(expected_data[i].y), static_cast<float>(test_results[i].y), rel_err));
}
}
else if constexpr (cuda::std::is_same_v<T, cuda::std::complex<__nv_bfloat16>>
Expand All @@ -62,11 +64,12 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
for (size_t i = 0; i < test_results.size(); ++i)
{
auto expected_real = static_cast<float>(expected_data[i].real());
auto test_real = test_results[i].real();
auto test_real = static_cast<float>(test_results[i].real());
auto expected_imag = static_cast<float>(expected_data[i].imag());
auto test_imag = test_results[i].imag();
REQUIRE_THAT(expected_real, Catch::Matchers::WithinRel(test_real, rel_err));
REQUIRE_THAT(expected_imag, Catch::Matchers::WithinRel(test_imag, rel_err));
auto test_imag = static_cast<float>(test_results[i].imag());
INFO("index " << i);
REQUIRE(isclose(expected_real, test_real, rel_err));
REQUIRE(isclose(expected_imag, test_imag, rel_err));
}
}
else if constexpr (cuda::std::__is_cuda_std_complex_v<T>)
Expand All @@ -77,8 +80,9 @@ void verify_results(const c2h::host_vector<T>& expected_data, const c2h::host_ve
auto test_real = test_results[i].real();
auto expected_imag = expected_data[i].imag();
auto test_imag = test_results[i].imag();
REQUIRE_THAT(expected_real, Catch::Matchers::WithinRel(test_real));
REQUIRE_THAT(expected_imag, Catch::Matchers::WithinRel(test_imag));
INFO("index " << i);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a lot of INFO added which would litter the output on failure in comparisons of large arrays.

I think it would be useful to save values of isclose checks, and place INFO("index" << i); in the branch executed only if isclose check is not met. REQUIRE would reuse the result of testing.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

made INFO conditional on failed check

REQUIRE(isclose(expected_real, test_real));
REQUIRE(isclose(expected_imag, test_imag));
}
}
else
Expand Down
45 changes: 45 additions & 0 deletions c2h/include/c2h/isclose.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
// SPDX-License-Identifier: BSD-3
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// SPDX-License-Identifier: BSD-3
// SPDX-License-Identifier: BSD-3-Clause

I see that some headers in c2h use BSD-3, but the documented identifier is BSD-3-Clause.


#pragma once

#include <algorithm>
#include <cmath>
#include <limits>
#include <type_traits>

template <typename T>
bool isclose(T a, T b, T r_tol, T a_tol)
{
if constexpr (std::is_floating_point_v<T>)
{
if (a == b)
{
return true;
}
return std::abs(a - b) <= std::max(a_tol, r_tol * std::max(std::abs(a), std::abs(b)));
}
else
{
return a == b;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If isclose is used on a complex type, it would use a == b branch, perhaps unexpectedly to the user. Perhaps it is worth adding static_assert(std::is_integral_v<T>, "Non-integral type using exact comparison");, or perhaps a static assertion that type T does not expose real and imag methods.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i have added an assert

}
}

template <typename T>
bool isclose(T a, T b, T r_tol)
{
return isclose(a, b, r_tol, T(0));
}

template <typename T>
bool isclose(T a, T b)
{
if constexpr (std::is_floating_point_v<T>)
{
return isclose(a, b, T(1000) * std::numeric_limits<T>::epsilon(), T(0));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would use powers of 2 here. T( 1 << 10 ) would mean discrepancy of 10 binary ULPs is tolerated.

10 binary units is about half of 23 total explicit mantissa units for single precision floating point numbers, but is all most of 11 total explicit half-precision explicit mantissa units.

Given that std::is_floating_point_v<std::float16_t> is true, perhaps we should use T(1 << 8) as default multiplier instead?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point, switched to T(1 << 8)

}
else
{
return a == b;
}
}
25 changes: 6 additions & 19 deletions cub/test/catch2_test_device_segmented_scan.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <c2h/catch2_test_helper.h>
#include <c2h/custom_type.h>
#include <c2h/extended_types.h>
#include <c2h/isclose.h>

DECLARE_LAUNCH_WRAPPER(cub::DeviceSegmentedScan::InclusiveSegmentedSum, device_inclusive_segmented_sum);
DECLARE_LAUNCH_WRAPPER(cub::DeviceSegmentedScan::ExclusiveSegmentedSum, device_exclusive_segmented_sum);
Expand Down Expand Up @@ -73,34 +74,20 @@ bool check_segment(const c2h::host_vector<ValueT>& h_output,
{
if constexpr (cuda::std::is_floating_point_v<ValueT>)
{
ValueT ref_v = h_ref[pos];
ValueT act_v = h_output[pos];
ValueT diff = (ref_v - act_v);
ValueT adiff = (diff > ValueT{0}) ? diff : -diff;
ValueT ref_av = (ref_v > ValueT{0}) ? ref_v : -ref_v;
ValueT act_av = (act_v > ValueT{0}) ? act_v : -act_v;

ValueT eps = ::cuda::std::numeric_limits<ValueT>::epsilon();
correct = correct && (adiff < 3 * eps + 2 * eps * (::cuda::std::max(ref_av, act_av)));
correct = correct && isclose(h_ref[pos], h_output[pos]);
}
else if constexpr (cuda::std::is_same_v<ValueT, half_t> || cuda::std::is_same_v<ValueT, bfloat16_t>)
{
float ref_v = h_ref[pos];
float act_v = h_output[pos];
float ref_v = static_cast<float>(h_ref[pos]);
float act_v = static_cast<float>(h_output[pos]);
if (cuda::std::isfinite(ref_v) && cuda::std::isfinite(act_v))
{
float diff = (ref_v - act_v);
float adiff = (diff > float{0}) ? diff : -diff;
float ref_av = (ref_v > float{0}) ? ref_v : -ref_v;
float act_av = (act_v > float{0}) ? act_v : -act_v;

float eps = float{1} / float{128};
correct = correct && (adiff < 3 * eps + 5 * eps * (::cuda::std::max(ref_av, act_av)));
correct = correct && isclose(ref_v, act_v);
}
}
else
{
correct = correct && (h_ref[pos] == h_output[pos]);
correct = correct && isclose(h_ref[pos], h_output[pos]);
}
if (!correct)
{
Expand Down
39 changes: 15 additions & 24 deletions cub/test/test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "mersenne.h"
#include <c2h/catch2_test_helper.h>
#include <c2h/extended_types.h>
#include <c2h/isclose.h>
#include <c2h/test_util_vec.h>

/******************************************************************************
Expand Down Expand Up @@ -1074,22 +1075,17 @@ int CompareResults(float* computed, float* reference, OffsetT len, bool verbose
{
for (OffsetT i = 0; i < len; i++)
{
if (computed[i] != reference[i])
if (!isclose(computed[i], reference[i]))
{
float difference = std::abs(computed[i] - reference[i]);
float fraction = difference / std::abs(reference[i]);

if (fraction > 0.00015)
if (verbose)
{
if (verbose)
{
std::cout
<< "INCORRECT: [" << i << "]: "
<< "(computed) " << CoutCast(computed[i]) << " != " << CoutCast(reference[i])
<< " (difference:" << difference << ", fraction: " << fraction << ")";
}
return 1;
float difference = std::abs(computed[i] - reference[i]);
std::cout
<< "INCORRECT: [" << i << "]: "
<< "(computed) " << CoutCast(computed[i]) << " != " << CoutCast(reference[i]) << " (difference:" << difference
<< ")";
}
return 1;
}
}
return 0;
Expand All @@ -1113,20 +1109,15 @@ int CompareResults(double* computed, double* reference, OffsetT len, bool verbos
{
for (OffsetT i = 0; i < len; i++)
{
if (computed[i] != reference[i])
if (!isclose(computed[i], reference[i]))
{
double difference = std::abs(computed[i] - reference[i]);
double fraction = difference / std::abs(reference[i]);

if (fraction > 0.00015)
if (verbose)
{
if (verbose)
{
std::cout << "INCORRECT: [" << i << "]: " << CoutCast(computed[i]) << " != " << CoutCast(reference[i])
<< " (difference:" << difference << ", fraction: " << fraction << ")";
}
return 1;
double difference = std::abs(computed[i] - reference[i]);
std::cout << "INCORRECT: [" << i << "]: " << CoutCast(computed[i]) << " != " << CoutCast(reference[i])
<< " (difference:" << difference << ")";
}
return 1;
}
}
return 0;
Expand Down
16 changes: 5 additions & 11 deletions cub/test/thread_reduce/catch2_test_thread_reduce.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "c2h/catch2_test_helper.h"
#include "c2h/extended_types.h"
#include "c2h/generators.h"
#include <c2h/isclose.h>
#include <catch2/matchers/catch_matchers_floating_point.hpp>

/***********************************************************************************************************************
Expand Down Expand Up @@ -170,18 +171,10 @@ using cub_operator_fp_list =
* Verify results and kernel launch
**********************************************************************************************************************/

_CCCL_TEMPLATE(typename T)
_CCCL_REQUIRES((cuda::std::is_floating_point_v<T>) )
void verify_results(const T& expected_data, const T& test_results)
{
REQUIRE_THAT(expected_data, Catch::Matchers::WithinRel(test_results, T{0.05}));
}

_CCCL_TEMPLATE(typename T)
_CCCL_REQUIRES((!cuda::std::is_floating_point_v<T>) )
template <typename T>
void verify_results(const T& expected_data, const T& test_results)
{
REQUIRE(expected_data == test_results);
REQUIRE(isclose(expected_data, test_results));
}

template <typename T, typename ReduceOperator>
Expand Down Expand Up @@ -330,7 +323,8 @@ C2H_TEST("ThreadReduce Narrow PrecisionType Tests",
auto reference_result =
std::accumulate(h_in_float.begin(), h_in_float.begin() + num_items, operator_identity, std_reduce_op);
run_thread_reduce_kernel(num_items, d_in, d_out, reduce_op);
verify_results(reference_result, float{c2h::host_vector<value_t>(d_out)[0]});
float test_result = float{c2h::host_vector<value_t>(d_out)[0]};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit:

Suggested change
float test_result = float{c2h::host_vector<value_t>(d_out)[0]};
float test_result{c2h::host_vector<value_t>(d_out)[0]};

REQUIRE(isclose(reference_result, test_result, 0.05f));
}
}

Expand Down
13 changes: 3 additions & 10 deletions cub/test/thread_reduce/catch2_test_thread_reduce_check_sass.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
# include "c2h/catch2_test_helper.h"
# include "c2h/extended_types.h"
# include "c2h/generators.h"
# include <c2h/isclose.h>
# include <catch2/matchers/catch_matchers_floating_point.hpp>

/***********************************************************************************************************************
Expand Down Expand Up @@ -110,18 +111,10 @@ using cub_operator_fp_list = c2h::type_list<cuda::std::plus<>, cuda::minimum<>>;
* Verify results and kernel launch
**********************************************************************************************************************/

_CCCL_TEMPLATE(typename T)
_CCCL_REQUIRES((cuda::std::is_floating_point<T>::value))
void verify_results(const T& expected_data, const T& test_results)
{
REQUIRE_THAT(expected_data, Catch::Matchers::WithinRel(test_results, T{0.05}));
}

_CCCL_TEMPLATE(typename T)
_CCCL_REQUIRES((!cuda::std::is_floating_point<T>::value))
template <typename T>
void verify_results(const T& expected_data, const T& test_results)
{
REQUIRE(expected_data == test_results);
REQUIRE(isclose(expected_data, test_results));
}

template <typename T, typename ReduceOperator>
Expand Down
13 changes: 4 additions & 9 deletions thrust/testing/catch2_test_complex.cu
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <vector>

#include "catch2_test_helper.h"
#include <c2h/isclose.h>
#include <unittest/random.h>
#include <unittest/testframework.h>

Expand Down Expand Up @@ -42,30 +43,24 @@ struct other_floating_point_type<double>
template <typename T>
using other_floating_point_type_t = typename other_floating_point_type<T>::type;

// Helper to compare complex numbers with approximate equality
// Supports both scalar and thrust::complex<T> types
double const DEFAULT_RELATIVE_TOL = 1e-4;
double const DEFAULT_ABSOLUTE_TOL = 1e-4;

template <typename T>
inline constexpr bool is_complex = false;
template <typename T>
inline constexpr bool is_complex<thrust::complex<T>> = true;
template <typename T>
inline constexpr bool is_complex<std::complex<T>> = true;

// Overload for complex types
template <typename T1, typename T2>
::cuda::std::enable_if_t<is_complex<T1> && is_complex<T2>> require_almost_equal(const T1& a, const T2& b)
{
CHECK(a.real() == Catch::Approx(b.real()).margin(DEFAULT_ABSOLUTE_TOL).epsilon(DEFAULT_RELATIVE_TOL));
CHECK(a.imag() == Catch::Approx(b.imag()).margin(DEFAULT_ABSOLUTE_TOL).epsilon(DEFAULT_RELATIVE_TOL));
CHECK(isclose(static_cast<double>(a.real()), static_cast<double>(b.real())));
CHECK(isclose(static_cast<double>(a.imag()), static_cast<double>(b.imag())));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why cast to double? The cast results in relative tolerance specific to double precision to be used when comparing complex values with less precise real/imaginary types.

Is anything wrong with checking isclose(a.rea(), b.real()) and isclose(a.imag(), b.imag())?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the issue is that a common type is needed, perhaps use std::common_type<T1, T2>: https://en.cppreference.com/w/cpp/types/common_type.html

}

template <typename T1, typename T2>
::cuda::std::enable_if_t<!is_complex<T1> && !is_complex<T2>> require_almost_equal(const T1& a, const T2& b)
{
CHECK(a == Catch::Approx(b).margin(DEFAULT_ABSOLUTE_TOL).epsilon(DEFAULT_RELATIVE_TOL));
CHECK(isclose(static_cast<double>(a), static_cast<double>(b)));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question here.

}
} // anonymous namespace

Expand Down
Loading