-
Notifications
You must be signed in to change notification settings - Fork 794
[SYCL][COMPAT][cuda] Add "ptr_to_integer" syclcompat functions. #14283
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
9e77065
3dcd427
e5e3183
a054077
5b8b643
c2d2a50
054e90e
054baf7
ea085b3
0d2064a
18137d4
7ea41d8
888f0d5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -947,6 +947,42 @@ public: | |
| }; | ||
| ``` | ||
|
|
||
| ### ptr_to_int | ||
|
|
||
| The following cuda backend specific function is introduced in order to | ||
| translate from local memory pointers to `uint32_t` or `size_t` variables that | ||
| contain a byte address to the local (local refers to`.shared` in nvptx) memory | ||
| state space. | ||
|
|
||
| ``` c++ | ||
| namespace syclcompat { | ||
| template <typename T> | ||
| __syclcompat_inline__ | ||
| std::enable_if_t<std::is_same_v<T, uint32_t> || std::is_same_v<T, size_t>, | ||
| T> | ||
| ptr_to_int(void *ptr) | ||
| } // syclcompat | ||
| ``` | ||
|
|
||
| These variables can be used in inline PTX instructions that take address | ||
| operands. Such inline PTX instructions are commonly used in optimized | ||
| libraries. A simplified example usage of the above functions is as follows: | ||
|
|
||
| ``` c++ | ||
| half *data = syclcompat::local_mem<half[NUM_ELEMENTS]>(); | ||
| // ... | ||
| // ... | ||
| T addr = | ||
| syclcompat::ptr_to_int<T>(reinterpret_cast<char *>(data) + (id % 8) * 16); | ||
|
|
||
| uint32_t fragment; | ||
| #if defined(__NVPTX__) | ||
| asm volatile("ldmatrix.sync.aligned.m8n8.x1.shared.b16 {%0}, [%1];\n" | ||
| : "=r"(fragment) | ||
| : "r"(addr)); | ||
| #endif | ||
| ``` | ||
|
||
|
|
||
| ### Device Management | ||
|
|
||
| Multiple SYCL functionalities are exposed through utility functions to manage | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,63 @@ | ||
| // REQUIRES: cuda | ||
| // RUN: %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_75 -o %t.out | ||
| // RUN: %{run} %t.out | ||
| #include <sycl/detail/core.hpp> | ||
| #include <sycl/group_barrier.hpp> | ||
| #include <syclcompat/memory.hpp> | ||
|
|
||
| using namespace sycl; | ||
| #define NUM_ELEMENTS 64 | ||
|
|
||
| template <class T> void test(queue stream) { | ||
| half *res = malloc_shared<half>(NUM_ELEMENTS, stream); | ||
|
|
||
| for (int i = 0; i < NUM_ELEMENTS; ++i) { | ||
| res[i] = 0.5; | ||
| } | ||
|
|
||
| sycl::nd_range<1> global_range{sycl::range{32}, sycl::range{32}}; | ||
|
|
||
| stream | ||
| .submit([&](handler &h) { | ||
| h.parallel_for<T>(global_range, [=](nd_item<1> item) { | ||
| sycl::group work_group = item.get_group(); | ||
| int id = item.get_global_linear_id(); | ||
| half *data = syclcompat::local_mem<half[NUM_ELEMENTS]>(); | ||
|
|
||
| data[id * 2] = id; | ||
| data[id * 2 + 1] = id + 0.5; | ||
|
|
||
| T addr = | ||
| syclcompat::ptr_to_int<T>(reinterpret_cast<char *>(data) + (id % 8) * 16); | ||
|
|
||
| uint32_t fragment; | ||
| #if defined(__NVPTX__) | ||
| asm volatile("ldmatrix.sync.aligned.m8n8.x1.shared.b16 {%0}, [%1];\n" | ||
| : "=r"(fragment) | ||
| : "r"(addr)); | ||
| #endif | ||
| sycl::group_barrier(work_group); | ||
|
|
||
| half *data_ptr = reinterpret_cast<half *>(&fragment); | ||
| res[id * 2] = data_ptr[0]; | ||
| res[id * 2 + 1] = data_ptr[1]; | ||
| }); | ||
| }) | ||
| .wait(); | ||
|
|
||
| for (int i = 0; i < NUM_ELEMENTS; i++) { | ||
| assert(res[i] == static_cast<half>(i / 2.0)); | ||
| } | ||
|
|
||
| free(res, stream); | ||
| }; | ||
|
|
||
| int main() { | ||
|
|
||
| queue stream{property::queue::in_order{}}; | ||
| test<size_t>(stream); | ||
| test<uint32_t>(stream); | ||
|
|
||
| std::cout << "PASS" << std::endl; | ||
| return 0; | ||
| } |
Uh oh!
There was an error while loading. Please reload this page.