-
Notifications
You must be signed in to change notification settings - Fork 32
[SYClomatic-test] Add help function for Block Load header #619
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
ShengchenJ
merged 38 commits into
oneapi-src:SYCLomatic
from
abhilash1910:block_load_test
May 13, 2024
Merged
Changes from all commits
Commits
Show all changes
38 commits
Select commit
Hold shift + click to select a range
c08ae48
block load tests
abhilash1910 513b103
update test
abhilash1910 d8b7cb7
use onedpl iterator
abhilash1910 be5eefe
rm duplicate function & iterator
abhilash1910 66ec770
Update help_function/src/onedpl_test_group_load.cpp
abhilash1910 146c0d1
Update help_function/src/onedpl_test_group_load.cpp
abhilash1910 ec5d9a8
Update help_function/src/onedpl_test_group_load.cpp
abhilash1910 400f48d
rm unwanted test
abhilash1910 5878cb7
update review comments
abhilash1910 022e628
Merge branch 'SYCLomatic' into block_load_test
abhilash1910 ae0e1d6
rebase from upstream
abhilash1910 d2ae0ee
update tests
abhilash1910 23b7023
update template param
abhilash1910 7c2ff7e
fix compile issues
abhilash1910 fe51693
update & fix all tests
abhilash1910 c5447c0
revert accidental deletion
abhilash1910 d99a828
replace striped expected output
abhilash1910 5ddb779
update striped test case
abhilash1910 ab0277a
use subgroup size
abhilash1910 300ac08
re-add to prev line
abhilash1910 9c21a36
review commits
abhilash1910 7cd159c
refactor tests
abhilash1910 2b54c4c
Merge branch 'SYCLomatic' into block_load_test
abhilash1910 24e80cc
update test
abhilash1910 0f4c9ee
fix return no code exec
abhilash1910 890d4ff
fix compile issues and pass test
abhilash1910 6ec45aa
add comments
abhilash1910 3a5c119
apply barrier for sync and update test case
abhilash1910 8b42494
review comments
abhilash1910 e79a7fc
update
abhilash1910 68618bc
rename file
abhilash1910 1905ed9
remove barrier and use output buffer
abhilash1910 cf9fdd9
rm unused variables
abhilash1910 31382c1
fix test case
abhilash1910 5a5881b
add local changes and run tests
abhilash1910 7f6ed89
format changes
abhilash1910 92650b8
revert rename
abhilash1910 1a9d205
clang format
abhilash1910 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,239 @@ | ||
// ====------ onedpl_test_group_load.cpp------------ *- C++ -* ----===// | ||
|
||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// | ||
// ===----------------------------------------------------------------------===// | ||
#include <dpct/dpct.hpp> | ||
#include <dpct/dpl_utils.hpp> | ||
#include <iostream> | ||
#include <oneapi/dpl/iterator> | ||
#include <sycl/sycl.hpp> | ||
|
||
template <dpct::group::load_algorithm T> | ||
bool helper_validation_function(const int *ptr, const char *func_name) { | ||
if constexpr (T == dpct::group::load_algorithm::BLOCK_LOAD_DIRECT) { | ||
for (int i = 0; i < 512; ++i) { | ||
if (ptr[i] != i) { | ||
std::cout << func_name << "_blocked" | ||
<< " failed\n"; | ||
std::ostream_iterator<int> Iter(std::cout, ", "); | ||
std::copy(ptr, ptr + 512, Iter); | ||
std::cout << std::endl; | ||
return false; | ||
} | ||
} | ||
std::cout << func_name << "_blocked" | ||
<< " pass\n"; | ||
} else { | ||
int expected[512]; | ||
int num_threads = 128; | ||
int items_per_thread = 4; | ||
for (int i = 0; i < num_threads; ++i) { | ||
for (int j = 0; j < items_per_thread; ++j) { | ||
expected[i * items_per_thread + j] = j * num_threads + i; | ||
} | ||
} | ||
for (int i = 0; i < 512; ++i) { | ||
if (ptr[i] != expected[i]) { | ||
std::cout << func_name << "_striped" | ||
<< " failed\n"; | ||
std::ostream_iterator<int> Iter(std::cout, ", "); | ||
std::copy(ptr, ptr + 512, Iter); | ||
std::cout << std::endl; | ||
return false; | ||
} | ||
} | ||
std::cout << func_name << "_striped" | ||
<< " pass\n"; | ||
} | ||
return true; | ||
} | ||
|
||
bool subgroup_helper_validation_function(const int *ptr, const uint32_t *sg_sz, | ||
const char *func_name) { | ||
int expected[512]; | ||
danhoeflinger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
int num_threads = 128; | ||
int items_per_thread = 4; | ||
uint32_t sg_sz_val = *sg_sz; | ||
for (int i = 0; i < num_threads; ++i) { | ||
for (int j = 0; j < items_per_thread; ++j) { | ||
expected[items_per_thread * i + j] = | ||
(i / sg_sz_val) * sg_sz_val * items_per_thread + sg_sz_val * j + | ||
i % sg_sz_val; | ||
} | ||
} | ||
for (int i = 0; i < 512; ++i) { | ||
if (ptr[i] != expected[i]) { | ||
danhoeflinger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
std::cout << " failed\n"; | ||
std::ostream_iterator<int> Iter(std::cout, ", "); | ||
std::copy(ptr, ptr + 512, Iter); | ||
std::cout << std::endl; | ||
return false; | ||
} | ||
} | ||
|
||
std::cout << func_name << " pass\n"; | ||
return true; | ||
} | ||
|
||
template <dpct::group::load_algorithm T> bool test_group_load() { | ||
// Tests dpct::group::load_algorithm::BLOCK_LOAD_DIRECT & | ||
// dpct::group::load_algorithm::BLOCK_LOAD_STRIPED in its entirety as API | ||
// functions | ||
sycl::queue q(dpct::get_default_queue()); | ||
oneapi::dpl::counting_iterator<int> count_it(0); | ||
sycl::buffer<int, 1> buffer(count_it, count_it + 512); | ||
int data_out[512]; | ||
for (int i = 0; i < 512; i++) | ||
data_out[i] = 0; | ||
sycl::buffer<int, 1> buffer_out(data_out, 512); | ||
|
||
q.submit([&](sycl::handler &h) { | ||
using group_load = | ||
dpct::group::workgroup_load<4, T, int, const int *, sycl::nd_item<3>>; | ||
size_t temp_storage_size = group_load::get_local_memory_size(128); | ||
sycl::local_accessor<uint8_t, 1> tacc(sycl::range<1>(temp_storage_size), h); | ||
sycl::accessor data_accessor_read(buffer, h, sycl::read_only); | ||
sycl::accessor data_accessor_write(buffer_out, h, sycl::write_only); | ||
h.parallel_for( | ||
sycl::nd_range<3>(sycl::range<3>(1, 1, 128), sycl::range<3>(1, 1, 128)), | ||
[=](sycl::nd_item<3> item) { | ||
int thread_data[4]; | ||
auto *d_r = | ||
data_accessor_read.get_multi_ptr<sycl::access::decorated::yes>() | ||
.get(); | ||
auto *tmp = tacc.get_multi_ptr<sycl::access::decorated::yes>().get(); | ||
group_load(tmp).load(item, d_r, thread_data); | ||
// Write thread_data of each work item at index to the global buffer | ||
int global_index = | ||
item.get_group(2) * item.get_local_range().get(2) + | ||
item.get_local_id(2); // Each thread_data has 4 elements | ||
#pragma unroll | ||
for (int i = 0; i < 4; ++i) { | ||
data_accessor_write[global_index * 4 + i] = thread_data[i]; | ||
} | ||
}); | ||
}); | ||
q.wait_and_throw(); | ||
|
||
sycl::host_accessor data_accessor(buffer_out, sycl::read_only); | ||
const int *ptr = data_accessor.get_multi_ptr<sycl::access::decorated::yes>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not a big deal from my side, but |
||
return helper_validation_function<T>(ptr, "test_group_load"); | ||
} | ||
|
||
bool test_load_subgroup_striped_standalone() { | ||
// Tests dpct::group::load_subgroup_striped as standalone method | ||
sycl::queue q(dpct::get_default_queue()); | ||
int data[512]; | ||
for (int i = 0; i < 512; i++) | ||
data[i] = i; | ||
sycl::buffer<int, 1> buffer(data, 512); | ||
sycl::buffer<uint32_t, 1> sg_sz_buf{sycl::range<1>(1)}; | ||
int data_out[512]; | ||
for (int i = 0; i < 512; i++) | ||
data_out[i] = 0; | ||
sycl::buffer<int, 1> buffer_out(data_out, 512); | ||
|
||
q.submit([&](sycl::handler &h) { | ||
sycl::accessor dacc_read(buffer, h, sycl::read_only); | ||
sycl::accessor dacc_write(buffer_out, h, sycl::write_only); | ||
sycl::accessor sg_sz_dacc(sg_sz_buf, h, sycl::read_write); | ||
h.parallel_for( | ||
sycl::nd_range<3>(sycl::range<3>(1, 1, 128), sycl::range<3>(1, 1, 128)), | ||
[=](sycl::nd_item<3> item) { | ||
int thread_data[4]; | ||
auto *d_r = | ||
dacc_read.get_multi_ptr<sycl::access::decorated::yes>().get(); | ||
auto *sg_sz_acc = | ||
sg_sz_dacc.get_multi_ptr<sycl::access::decorated::yes>().get(); | ||
size_t gid = item.get_global_linear_id(); | ||
if (gid == 0) { | ||
sg_sz_acc[0] = item.get_sub_group().get_local_linear_range(); | ||
} | ||
dpct::group::uninitialized_load_subgroup_striped<4, int>(item, d_r, | ||
thread_data); | ||
// Write thread_data of each work item at index to the global buffer | ||
int global_index = | ||
(item.get_group(2) * item.get_local_range().get(2)) + | ||
item.get_local_id(2); // Each thread_data has 4 elements | ||
#pragma unroll | ||
for (int i = 0; i < 4; ++i) { | ||
dacc_write[global_index * 4 + i] = thread_data[i]; | ||
} | ||
}); | ||
}); | ||
q.wait_and_throw(); | ||
|
||
sycl::host_accessor data_accessor(buffer_out, sycl::read_only); | ||
const int *ptr = data_accessor.get_multi_ptr<sycl::access::decorated::yes>(); | ||
sycl::host_accessor data_accessor_sg(sg_sz_buf, sycl::read_only); | ||
const uint32_t *ptr_sg = | ||
data_accessor_sg.get_multi_ptr<sycl::access::decorated::yes>(); | ||
return subgroup_helper_validation_function( | ||
ptr, ptr_sg, "test_subgroup_striped_standalone"); | ||
} | ||
|
||
template <dpct::group::load_algorithm T> bool test_group_load_standalone() { | ||
// Tests dpct::group::load_algorithm::BLOCK_LOAD_DIRECT & | ||
// dpct::group::load_algorithm::BLOCK_LOAD_STRIPED as standalone methods | ||
sycl::queue q(dpct::get_default_queue()); | ||
int data[512]; | ||
for (int i = 0; i < 512; i++) | ||
data[i] = i; | ||
sycl::buffer<int, 1> buffer(data, 512); | ||
int data_out[512]; | ||
for (int i = 0; i < 512; i++) | ||
data_out[i] = 0; | ||
sycl::buffer<int, 1> buffer_out(data_out, 512); | ||
|
||
q.submit([&](sycl::handler &h) { | ||
sycl::accessor dacc_read(buffer, h, sycl::read_only); | ||
sycl::accessor dacc_write(buffer_out, h, sycl::write_only); | ||
h.parallel_for( | ||
sycl::nd_range<3>(sycl::range<3>(1, 1, 128), sycl::range<3>(1, 1, 128)), | ||
[=](sycl::nd_item<3> item) { | ||
int thread_data[4]; | ||
auto *d_r = | ||
dacc_read.get_multi_ptr<sycl::access::decorated::yes>().get(); | ||
if (T == dpct::group::load_algorithm::BLOCK_LOAD_DIRECT) { | ||
dpct::group::load_blocked<4, int>(item, d_r, thread_data); | ||
} else { | ||
dpct::group::load_striped<4, int>(item, d_r, thread_data); | ||
} | ||
// Write thread_data of each work item at index to the global buffer | ||
int global_index = | ||
item.get_group(2) * item.get_local_range().get(2) + | ||
item.get_local_id(2); // Each thread_data has 4 elements | ||
#pragma unroll | ||
for (int i = 0; i < 4; ++i) { | ||
dacc_write[global_index * 4 + i] = thread_data[i]; | ||
} | ||
}); | ||
}); | ||
q.wait_and_throw(); | ||
|
||
sycl::host_accessor data_accessor(buffer_out, sycl::read_only); | ||
const int *ptr = data_accessor.get_multi_ptr<sycl::access::decorated::yes>(); | ||
return helper_validation_function<T>(ptr, "test_group_load"); | ||
} | ||
|
||
int main() { | ||
|
||
return !( | ||
// Calls test_group_load with blocked and striped strategies , should pass | ||
// both results. | ||
test_group_load<dpct::group::load_algorithm::BLOCK_LOAD_DIRECT>() && | ||
test_group_load<dpct::group::load_algorithm::BLOCK_LOAD_STRIPED>() && | ||
// Calls test_load_subgroup_striped_standalone and should pass | ||
test_load_subgroup_striped_standalone() && | ||
// Calls test_group_load_standalone with blocked and striped strategies as | ||
// free functions, should pass both results. | ||
test_group_load_standalone< | ||
dpct::group::load_algorithm::BLOCK_LOAD_STRIPED>() && | ||
test_group_load_standalone< | ||
dpct::group::load_algorithm::BLOCK_LOAD_DIRECT>()); | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.