Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 14 additions & 128 deletions sycl/test-e2e/GroupAlgorithm/root_group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,136 +18,22 @@
#include <sycl/group_barrier.hpp>
#include <sycl/kernel_bundle.hpp>

static constexpr int WorkGroupSize = 32;

void testFeatureMacro() {
static_assert(SYCL_EXT_ONEAPI_ROOT_GROUP == 1,
"SYCL_EXT_ONEAPI_ROOT_GROUP must have a value of 1");
}

void testQueriesAndProperties() {
sycl::queue q;
const auto bundle =
sycl::get_kernel_bundle<sycl::bundle_state::executable>(q.get_context());
const auto kernel = bundle.get_kernel<class QueryKernel>();
const auto local_range = sycl::range<1>(1);
const auto maxWGs =
kernel
.ext_oneapi_get_info<sycl::ext::oneapi::experimental::info::
kernel_queue_specific::max_num_work_groups>(
q, local_range, 0);
const auto wgRange = sycl::range<3>{WorkGroupSize, 1, 1};
const auto maxWGsWithLimits =
kernel
.ext_oneapi_get_info<sycl::ext::oneapi::experimental::info::
kernel_queue_specific::max_num_work_groups>(
q, wgRange, wgRange.size() * sizeof(int));
const auto props = sycl::ext::oneapi::experimental::properties{
sycl::ext::oneapi::experimental::use_root_sync};
q.single_task<class QueryKernel>(props, []() {});

static auto check_max_num_work_group_sync = [](auto Result) {
static_assert(std::is_same_v<std::remove_cv_t<decltype(Result)>, size_t>,
"max_num_work_group_sync query must return size_t");
assert(Result >= 1 && "max_num_work_group_sync query failed");
};
check_max_num_work_group_sync(maxWGs);
check_max_num_work_group_sync(maxWGsWithLimits);
}

void testRootGroup() {
sycl::queue q;
const auto bundle =
sycl::get_kernel_bundle<sycl::bundle_state::executable>(q.get_context());
const auto kernel = bundle.get_kernel<class RootGroupKernel>();
const auto maxWGs =
kernel
.ext_oneapi_get_info<sycl::ext::oneapi::experimental::info::
kernel_queue_specific::max_num_work_groups>(
q, WorkGroupSize, 0);
const auto props = sycl::ext::oneapi::experimental::properties{
sycl::ext::oneapi::experimental::use_root_sync};
sycl::buffer<int> dataBuf{sycl::range{maxWGs * WorkGroupSize}};
const auto range = sycl::nd_range<1>{maxWGs * WorkGroupSize, WorkGroupSize};
q.submit([&](sycl::handler &h) {
sycl::accessor data{dataBuf, h};
h.parallel_for<
class RootGroupKernel>(range, props, [=](sycl::nd_item<1> it) {
volatile float X = 1.0f;
volatile float Y = 1.0f;
auto root = it.ext_oneapi_get_root_group();
data[root.get_local_id()] = root.get_local_id();
sycl::group_barrier(root);
// Delay half of the workgroups with extra work to check that the barrier
// synchronizes the whole device.
if (it.get_group(0) % 2 == 0) {
X += sycl::sin(X);
Y += sycl::cos(Y);
}
root =
sycl::ext::oneapi::experimental::this_work_item::get_root_group<1>();
int sum = data[root.get_local_id()] +
data[root.get_local_range() - root.get_local_id() - 1];
sycl::group_barrier(root);
data[root.get_local_id()] = sum;
});
});
sycl::host_accessor data{dataBuf};
const int workItemCount = static_cast<int>(range.get_global_range().size());
for (int i = 0; i < workItemCount; i++) {
assert(data[i] == (workItemCount - 1));
struct RootGroupKernel {
RootGroupKernel() {}
void operator()(sycl::nd_item<1> it) const {
auto root = it.ext_oneapi_get_root_group();
sycl::group_barrier(root);
}
}

void testRootGroupFunctions() {
sycl::queue q;
const auto bundle =
sycl::get_kernel_bundle<sycl::bundle_state::executable>(q.get_context());
const auto kernel = bundle.get_kernel<class RootGroupFunctionsKernel>();
const auto maxWGs =
kernel
.ext_oneapi_get_info<sycl::ext::oneapi::experimental::info::
kernel_queue_specific::max_num_work_groups>(
q, WorkGroupSize, 0);
const auto props = sycl::ext::oneapi::experimental::properties{
sycl::ext::oneapi::experimental::use_root_sync};

constexpr int testCount = 9;
sycl::buffer<bool> testResultsBuf{sycl::range{testCount}};
const auto range = sycl::nd_range<1>{maxWGs * WorkGroupSize, WorkGroupSize};
q.submit([&](sycl::handler &h) {
sycl::accessor testResults{testResultsBuf, h};
h.parallel_for<class RootGroupFunctionsKernel>(
range, props, [=](sycl::nd_item<1> it) {
const auto root = it.ext_oneapi_get_root_group();
if (root.leader() || root.get_local_id() == 3) {
testResults[0] = root.get_group_id() == sycl::id<1>(0);
testResults[1] = root.leader()
? root.get_local_id() == sycl::id<1>(0)
: root.get_local_id() == sycl::id<1>(3);
testResults[2] = root.get_group_range() == sycl::range<1>(1);
testResults[3] = root.get_local_range() == it.get_global_range();
testResults[4] =
root.get_max_local_range() == root.get_local_range();
testResults[5] = root.get_group_linear_id() == 0;
testResults[6] =
root.get_local_linear_id() == root.get_local_id().get(0);
testResults[7] = root.get_group_linear_range() == 1;
testResults[8] =
root.get_local_linear_range() == root.get_local_range().size();
}
});
});
sycl::host_accessor testResults{testResultsBuf};
for (int i = 0; i < testCount; i++) {
assert(testResults[i]);
auto get(sycl::ext::oneapi::experimental::properties_tag) {
return sycl::ext::oneapi::experimental::properties{
sycl::ext::oneapi::experimental::use_root_sync};
}
}
};

int main() {
testFeatureMacro();
testQueriesAndProperties();
testRootGroup();
testRootGroupFunctions();
sycl::queue q;
sycl::range<1> R1{1};
sycl::nd_range<1> NDR1{R1, R1};
q.submit([&](sycl::handler &h) { h.parallel_for(NDR1, RootGroupKernel()); });
return EXIT_SUCCESS;
}
}
Loading