Skip to content
127 changes: 70 additions & 57 deletions cpp/tests/groupby/groupby_test_util.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2024, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand All @@ -17,112 +17,125 @@
#include <cudf/table/table.hpp>
#include <cudf/types.hpp>

void test_single_agg(cudf::column_view const& keys,
cudf::column_view const& values,
cudf::column_view const& expect_keys,
cudf::column_view const& expect_vals,
std::unique_ptr<cudf::groupby_aggregation>&& agg,
namespace cudf ::test {

void test_single_agg(column_view const& keys,
column_view const& values,
column_view const& expect_keys,
column_view const& expect_vals,
std::unique_ptr<groupby_aggregation>&& agg,
force_use_sort_impl use_sort,
cudf::null_policy include_null_keys,
cudf::sorted keys_are_sorted,
std::vector<cudf::order> const& column_order,
std::vector<cudf::null_order> const& null_precedence,
cudf::sorted reference_keys_are_sorted)
null_policy include_null_keys,
sorted keys_are_sorted,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
sorted reference_keys_are_sorted,
std::source_location const& location)
{
SCOPED_TRACE("Original failure location: " + std::string{location.file_name()} + ":" +
std::to_string(location.line()));

auto const [sorted_expect_keys, sorted_expect_vals] = [&]() {
if (reference_keys_are_sorted == cudf::sorted::NO) {
if (reference_keys_are_sorted == sorted::NO) {
auto const sort_expect_order =
cudf::sorted_order(cudf::table_view{{expect_keys}}, column_order, null_precedence);
auto sorted_expect_keys = cudf::gather(cudf::table_view{{expect_keys}}, *sort_expect_order);
auto sorted_expect_vals = cudf::gather(cudf::table_view{{expect_vals}}, *sort_expect_order);
return std::make_pair(std::move(sorted_expect_keys), std::move(sorted_expect_vals));
} else {
auto sorted_expect_keys = std::make_unique<cudf::table>(cudf::table_view{{expect_keys}});
auto sorted_expect_vals = std::make_unique<cudf::table>(cudf::table_view{{expect_vals}});
sorted_order(table_view{{expect_keys}}, column_order, null_precedence);
auto sorted_expect_keys = gather(table_view{{expect_keys}}, *sort_expect_order);
auto sorted_expect_vals = gather(table_view{{expect_vals}}, *sort_expect_order);
return std::make_pair(std::move(sorted_expect_keys), std::move(sorted_expect_vals));
}
auto sorted_expect_keys = std::make_unique<table>(table_view{{expect_keys}});
auto sorted_expect_vals = std::make_unique<table>(table_view{{expect_vals}});
return std::make_pair(std::move(sorted_expect_keys), std::move(sorted_expect_vals));
}();

std::vector<cudf::groupby::aggregation_request> requests;
std::vector<groupby::aggregation_request> requests;
requests.emplace_back();
requests[0].values = values;

requests[0].aggregations.push_back(std::move(agg));

if (use_sort == force_use_sort_impl::YES) {
// WAR to force cudf::groupby to use sort implementation
requests[0].aggregations.push_back(
cudf::make_nth_element_aggregation<cudf::groupby_aggregation>(0));
// WAR to force groupby to use sort implementation
requests[0].aggregations.push_back(make_nth_element_aggregation<groupby_aggregation>(0));
}

// since the default behavior of cudf::groupby(...) for an empty null_precedence vector is
// null_order::AFTER whereas for cudf::sorted_order(...) it's null_order::BEFORE
auto const precedence = null_precedence.empty()
? std::vector<cudf::null_order>(1, cudf::null_order::BEFORE)
: null_precedence;
// since the default behavior of groupby(...) for an empty null_precedence vector is
// null_order::AFTER whereas for sorted_order(...) it's null_order::BEFORE
auto const precedence =
null_precedence.empty() ? std::vector<null_order>(1, null_order::BEFORE) : null_precedence;

cudf::groupby::groupby gb_obj(
cudf::table_view({keys}), include_null_keys, keys_are_sorted, column_order, precedence);
groupby::groupby gb_obj(
table_view({keys}), include_null_keys, keys_are_sorted, column_order, precedence);

auto result = gb_obj.aggregate(requests, cudf::test::get_default_stream());
auto result = gb_obj.aggregate(requests, test::get_default_stream());

if (use_sort == force_use_sort_impl::YES && keys_are_sorted == cudf::sorted::NO) {
if (use_sort == force_use_sort_impl::YES && keys_are_sorted == sorted::NO) {
CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_expect_keys, result.first->view());
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(sorted_expect_vals->get_column(0),
*result.second[0].results[0]);

} else {
auto const sort_order = cudf::sorted_order(result.first->view(), column_order, precedence);
auto const sorted_keys = cudf::gather(result.first->view(), *sort_order);
auto const sorted_vals =
cudf::gather(cudf::table_view({result.second[0].results[0]->view()}), *sort_order);
auto const sort_order = sorted_order(result.first->view(), column_order, precedence);
auto const sorted_keys = gather(result.first->view(), *sort_order);
auto const sorted_vals = gather(table_view({result.second[0].results[0]->view()}), *sort_order);

CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_expect_keys, *sorted_keys);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(sorted_expect_vals->get_column(0),
sorted_vals->get_column(0));
}
}

void test_sum_agg(cudf::column_view const& keys,
cudf::column_view const& values,
cudf::column_view const& expected_keys,
cudf::column_view const& expected_values)
void test_sum_agg(column_view const& keys,
column_view const& values,
column_view const& expected_keys,
column_view const& expected_values,
std::source_location const& location)
{
auto const do_test = [&](auto const use_sort_option) {
test_single_agg(keys,
values,
expected_keys,
expected_values,
cudf::make_sum_aggregation<cudf::groupby_aggregation>(),
make_sum_aggregation<groupby_aggregation>(),
use_sort_option,
cudf::null_policy::INCLUDE);
null_policy::INCLUDE,
sorted::NO,
{},
{},
sorted::NO,
location);
};
do_test(force_use_sort_impl::YES);
do_test(force_use_sort_impl::NO);
}

void test_single_scan(cudf::column_view const& keys,
cudf::column_view const& values,
cudf::column_view const& expect_keys,
cudf::column_view const& expect_vals,
std::unique_ptr<cudf::groupby_scan_aggregation>&& agg,
cudf::null_policy include_null_keys,
cudf::sorted keys_are_sorted,
std::vector<cudf::order> const& column_order,
std::vector<cudf::null_order> const& null_precedence)
void test_single_scan(column_view const& keys,
column_view const& values,
column_view const& expect_keys,
column_view const& expect_vals,
std::unique_ptr<groupby_scan_aggregation>&& agg,
null_policy include_null_keys,
sorted keys_are_sorted,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
std::source_location const& location)
{
std::vector<cudf::groupby::scan_request> requests;
SCOPED_TRACE("Original failure location: " + std::string{location.file_name()} + ":" +
std::to_string(location.line()));

std::vector<groupby::scan_request> requests;
requests.emplace_back();
requests[0].values = values;

requests[0].aggregations.push_back(std::move(agg));

cudf::groupby::groupby gb_obj(
cudf::table_view({keys}), include_null_keys, keys_are_sorted, column_order, null_precedence);
groupby::groupby gb_obj(
table_view({keys}), include_null_keys, keys_are_sorted, column_order, null_precedence);

// cudf::groupby scan uses sort implementation
// groupby scan uses sort implementation
auto result = gb_obj.scan(requests);

CUDF_TEST_EXPECT_TABLES_EQUAL(cudf::table_view({expect_keys}), result.first->view());
CUDF_TEST_EXPECT_TABLES_EQUAL(table_view({expect_keys}), result.first->view());
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expect_vals, *result.second[0].results[0]);
}

} // namespace cudf::test
62 changes: 35 additions & 27 deletions cpp/tests/groupby/groupby_test_util.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2024, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand All @@ -8,31 +8,39 @@
#include <cudf/groupby.hpp>
#include <cudf/types.hpp>

#include <source_location>

enum class force_use_sort_impl : bool { NO, YES };

void test_single_agg(cudf::column_view const& keys,
cudf::column_view const& values,
cudf::column_view const& expect_keys,
cudf::column_view const& expect_vals,
std::unique_ptr<cudf::groupby_aggregation>&& agg,
force_use_sort_impl use_sort = force_use_sort_impl::NO,
cudf::null_policy include_null_keys = cudf::null_policy::EXCLUDE,
cudf::sorted keys_are_sorted = cudf::sorted::NO,
std::vector<cudf::order> const& column_order = {},
std::vector<cudf::null_order> const& null_precedence = {},
cudf::sorted reference_keys_are_sorted = cudf::sorted::NO);

void test_sum_agg(cudf::column_view const& keys,
cudf::column_view const& values,
cudf::column_view const& expected_keys,
cudf::column_view const& expected_values);

void test_single_scan(cudf::column_view const& keys,
cudf::column_view const& values,
cudf::column_view const& expect_keys,
cudf::column_view const& expect_vals,
std::unique_ptr<cudf::groupby_scan_aggregation>&& agg,
cudf::null_policy include_null_keys = cudf::null_policy::EXCLUDE,
cudf::sorted keys_are_sorted = cudf::sorted::NO,
std::vector<cudf::order> const& column_order = {},
std::vector<cudf::null_order> const& null_precedence = {});
namespace cudf::test {

void test_single_agg(column_view const& keys,
column_view const& values,
column_view const& expect_keys,
column_view const& expect_vals,
std::unique_ptr<groupby_aggregation>&& agg,
force_use_sort_impl use_sort = force_use_sort_impl::NO,
null_policy include_null_keys = null_policy::EXCLUDE,
sorted keys_are_sorted = sorted::NO,
std::vector<order> const& column_order = {},
std::vector<null_order> const& null_precedence = {},
sorted reference_keys_are_sorted = sorted::NO,
std::source_location const& location = std::source_location::current());
void test_sum_agg(column_view const& keys,
column_view const& values,
column_view const& expected_keys,
column_view const& expected_values,
std::source_location const& location = std::source_location::current());

void test_single_scan(column_view const& keys,
column_view const& values,
column_view const& expect_keys,
column_view const& expect_vals,
std::unique_ptr<groupby_scan_aggregation>&& agg,
null_policy include_null_keys = null_policy::EXCLUDE,
sorted keys_are_sorted = sorted::NO,
std::vector<order> const& column_order = {},
std::vector<null_order> const& null_precedence = {},
std::source_location const& location = std::source_location::current());

} // namespace cudf::test
58 changes: 29 additions & 29 deletions cpp/tests/groupby/rank_scan_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -28,34 +28,34 @@ void test_rank_scans(cudf::column_view const& keys,
cudf::column_view const& expected_rank,
cudf::column_view const& expected_percent_rank)
{
test_single_scan(keys,
order,
keys,
expected_dense,
cudf::make_rank_aggregation<cudf::groupby_scan_aggregation>(
cudf::rank_method::DENSE, {}, cudf::null_policy::INCLUDE),
cudf::null_policy::INCLUDE,
cudf::sorted::YES);
test_single_scan(keys,
order,
keys,
expected_rank,
cudf::make_rank_aggregation<cudf::groupby_scan_aggregation>(
cudf::rank_method::MIN, {}, cudf::null_policy::INCLUDE),
cudf::null_policy::INCLUDE,
cudf::sorted::YES);
test_single_scan(keys,
order,
keys,
expected_percent_rank,
cudf::make_rank_aggregation<cudf::groupby_scan_aggregation>(
cudf::rank_method::MIN,
{},
cudf::null_policy::INCLUDE,
{},
cudf::rank_percentage::ONE_NORMALIZED),
cudf::null_policy::INCLUDE,
cudf::sorted::YES);
cudf::test::test_single_scan(keys,
order,
keys,
expected_dense,
cudf::make_rank_aggregation<cudf::groupby_scan_aggregation>(
cudf::rank_method::DENSE, {}, cudf::null_policy::INCLUDE),
cudf::null_policy::INCLUDE,
cudf::sorted::YES);
cudf::test::test_single_scan(keys,
order,
keys,
expected_rank,
cudf::make_rank_aggregation<cudf::groupby_scan_aggregation>(
cudf::rank_method::MIN, {}, cudf::null_policy::INCLUDE),
cudf::null_policy::INCLUDE,
cudf::sorted::YES);
cudf::test::test_single_scan(keys,
order,
keys,
expected_percent_rank,
cudf::make_rank_aggregation<cudf::groupby_scan_aggregation>(
cudf::rank_method::MIN,
{},
cudf::null_policy::INCLUDE,
{},
cudf::rank_percentage::ONE_NORMALIZED),
cudf::null_policy::INCLUDE,
cudf::sorted::YES);
}

struct groupby_rank_scan_test : public cudf::test::BaseFixture {};
Expand Down