-
Notifications
You must be signed in to change notification settings - Fork 367
unify float comparison helpers with isclose #8131
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
c102dcd
97f7b85
9a658be
b5fc9a5
32b5698
526c801
1130c28
6eb6f00
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,45 @@ | ||||||
| // SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. | ||||||
| // SPDX-License-Identifier: BSD-3 | ||||||
|
||||||
| // SPDX-License-Identifier: BSD-3 | |
| // SPDX-License-Identifier: BSD-3-Clause |
I see that some headers in c2h use BSD-3, but the documented identifier is BSD-3-Clause.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If isclose is used on a complex type, it would use a == b branch, perhaps unexpectedly to the user. Perhaps it is worth adding static_assert(std::is_integral_v<T>, "Non-integral type using exact comparison");, or perhaps a static assertion that type T does not expose real and imag methods.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i have added an assert
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would use powers of 2 here. T( 1 << 10 ) would mean discrepancy of 10 binary ULPs is tolerated.
10 binary units is about half of 23 total explicit mantissa units for single precision floating point numbers, but is all most of 11 total explicit half-precision explicit mantissa units.
Given that std::is_floating_point_v<std::float16_t> is true, perhaps we should use T(1 << 8) as default multiplier instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
good point, switched to T(1 << 8)
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |||||
| #include "c2h/catch2_test_helper.h" | ||||||
| #include "c2h/extended_types.h" | ||||||
| #include "c2h/generators.h" | ||||||
| #include <c2h/isclose.h> | ||||||
| #include <catch2/matchers/catch_matchers_floating_point.hpp> | ||||||
|
|
||||||
| /*********************************************************************************************************************** | ||||||
|
|
@@ -170,18 +171,10 @@ using cub_operator_fp_list = | |||||
| * Verify results and kernel launch | ||||||
| **********************************************************************************************************************/ | ||||||
|
|
||||||
| _CCCL_TEMPLATE(typename T) | ||||||
| _CCCL_REQUIRES((cuda::std::is_floating_point_v<T>) ) | ||||||
| void verify_results(const T& expected_data, const T& test_results) | ||||||
| { | ||||||
| REQUIRE_THAT(expected_data, Catch::Matchers::WithinRel(test_results, T{0.05})); | ||||||
| } | ||||||
|
|
||||||
| _CCCL_TEMPLATE(typename T) | ||||||
| _CCCL_REQUIRES((!cuda::std::is_floating_point_v<T>) ) | ||||||
| template <typename T> | ||||||
| void verify_results(const T& expected_data, const T& test_results) | ||||||
| { | ||||||
| REQUIRE(expected_data == test_results); | ||||||
| REQUIRE(isclose(expected_data, test_results)); | ||||||
| } | ||||||
|
|
||||||
| template <typename T, typename ReduceOperator> | ||||||
|
|
@@ -330,7 +323,8 @@ C2H_TEST("ThreadReduce Narrow PrecisionType Tests", | |||||
| auto reference_result = | ||||||
| std::accumulate(h_in_float.begin(), h_in_float.begin() + num_items, operator_identity, std_reduce_op); | ||||||
| run_thread_reduce_kernel(num_items, d_in, d_out, reduce_op); | ||||||
| verify_results(reference_result, float{c2h::host_vector<value_t>(d_out)[0]}); | ||||||
| float test_result = float{c2h::host_vector<value_t>(d_out)[0]}; | ||||||
|
||||||
| float test_result = float{c2h::host_vector<value_t>(d_out)[0]}; | |
| float test_result{c2h::host_vector<value_t>(d_out)[0]}; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ | |
| #include <vector> | ||
|
|
||
| #include "catch2_test_helper.h" | ||
| #include <c2h/isclose.h> | ||
| #include <unittest/random.h> | ||
| #include <unittest/testframework.h> | ||
|
|
||
|
|
@@ -42,30 +43,24 @@ struct other_floating_point_type<double> | |
| template <typename T> | ||
| using other_floating_point_type_t = typename other_floating_point_type<T>::type; | ||
|
|
||
| // Helper to compare complex numbers with approximate equality | ||
| // Supports both scalar and thrust::complex<T> types | ||
| double const DEFAULT_RELATIVE_TOL = 1e-4; | ||
| double const DEFAULT_ABSOLUTE_TOL = 1e-4; | ||
|
|
||
| template <typename T> | ||
| inline constexpr bool is_complex = false; | ||
| template <typename T> | ||
| inline constexpr bool is_complex<thrust::complex<T>> = true; | ||
| template <typename T> | ||
| inline constexpr bool is_complex<std::complex<T>> = true; | ||
|
|
||
| // Overload for complex types | ||
| template <typename T1, typename T2> | ||
| ::cuda::std::enable_if_t<is_complex<T1> && is_complex<T2>> require_almost_equal(const T1& a, const T2& b) | ||
| { | ||
| CHECK(a.real() == Catch::Approx(b.real()).margin(DEFAULT_ABSOLUTE_TOL).epsilon(DEFAULT_RELATIVE_TOL)); | ||
| CHECK(a.imag() == Catch::Approx(b.imag()).margin(DEFAULT_ABSOLUTE_TOL).epsilon(DEFAULT_RELATIVE_TOL)); | ||
| CHECK(isclose(static_cast<double>(a.real()), static_cast<double>(b.real()))); | ||
| CHECK(isclose(static_cast<double>(a.imag()), static_cast<double>(b.imag()))); | ||
|
||
| } | ||
|
|
||
| template <typename T1, typename T2> | ||
| ::cuda::std::enable_if_t<!is_complex<T1> && !is_complex<T2>> require_almost_equal(const T1& a, const T2& b) | ||
| { | ||
| CHECK(a == Catch::Approx(b).margin(DEFAULT_ABSOLUTE_TOL).epsilon(DEFAULT_RELATIVE_TOL)); | ||
| CHECK(isclose(static_cast<double>(a), static_cast<double>(b))); | ||
|
||
| } | ||
| } // anonymous namespace | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is a lot of
INFOadded which would litter the output on failure in comparisons of large arrays.I think it would be useful to save values of
isclosechecks, and placeINFO("index" << i);in the branch executed only ifisclosecheck is not met.REQUIREwould reuse the result of testing.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
made INFO conditional on failed check