-
Notifications
You must be signed in to change notification settings - Fork 796
[SYCL] Implement work group memory extension #15178
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 37 commits
652caa8
76daf77
21e082b
025cbc4
b94f7c9
0d6d694
448071f
9f2973a
852315f
4234022
ae5eb7e
8ce0280
4ee31a5
7b1b90b
cf7476e
50c0954
44811b8
ad1046f
d343a2e
3f1bc30
103e233
bfa5830
2031478
76f0acc
e0ad435
3513251
2cec997
4c8b196
a0b70e2
ed8f125
9460876
ac7130a
e2889b3
d6c78b9
8f7a07b
ae59899
8cff603
3ceead1
3228aeb
0e95ee5
52f13f0
71d1013
d48bc42
f6515bc
3e4c73c
c84229e
d2fddd8
0f677c2
6ef823e
6dc262a
4de6d50
5653f04
40eb63e
f6a0df7
026501c
2ce21b3
a9b2875
3821df4
d73b0b1
c1087ad
31481b8
396169f
dc37b2c
236139f
2beda8e
dbafe31
1b968df
e6b66c3
84ef6a8
f24af09
7dfa80b
3957cb5
91820d8
34bc23d
3acf835
604c640
5510208
d9418f9
e90a3b7
3b9a55a
b9ed6f4
af08c19
b2a97a2
77a6de1
3cb0ba4
1783f75
5a6085f
6affbc3
ed3c60f
fd89473
24f87b0
cba30a3
3b10242
f38f400
38a8d79
4df2f48
9a7e3f1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,103 @@ | ||
| //===-------------------- work_group_memory.hpp ---------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <type_traits> | ||
|
|
||
| namespace sycl { | ||
| inline namespace _V1 { | ||
| namespace detail { | ||
| template <typename T> struct is_unbounded_array : std::false_type {}; | ||
|
|
||
| template <typename T> struct is_unbounded_array<T[]> : std::true_type {}; | ||
|
|
||
| template <typename T> | ||
| inline constexpr bool is_unbounded_array_v = is_unbounded_array<T>::value; | ||
|
|
||
| class work_group_memory_impl { | ||
| public: | ||
| work_group_memory_impl() : wgm_size{0}, buffer_size{0} {} | ||
| work_group_memory_impl(const work_group_memory_impl &rhs) = default; | ||
| work_group_memory_impl & | ||
| operator=(const work_group_memory_impl &rhs) = default; | ||
| work_group_memory_impl(size_t wgm_size, size_t buffer_size) | ||
| : wgm_size{wgm_size}, buffer_size{buffer_size} {} | ||
| size_t wgm_size; | ||
| size_t buffer_size; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I haven't looked at this closely, but this looks suspicious. The It looks like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Initial testing shows that the L0 kernel only ends up with one parameter, but I will make this into a proper test. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder why there is only one Level Zero parameter. Is it because When I look at the code, I don't see any uses of If all we need is Or, you could use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, my understanding is that the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The relevant lines are 793-798 in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, thanks for the explanation! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @lbushi25 is right that this will only result in a single kernel argument, which is due to it being a "SYCL special class" so the arguments of its All that said however, I also have some concerns here:
#ifdef __SYCL_DEVICE_ONLY__
decoratedPtr ptr;
// To ensure we have the same object size on host and device, we add padding.
[[maybe_unused]] char padding[sizeof(std::shared_ptr<work_group_memory_impl>) - sizeof(decoratedPtr)];
#else
std::shared_ptr<work_group_memory_impl> impl;
#endifNote that if we're sure we only ever need the two There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
But I was also confused at first, so I think it worth explicitly declaring members of the base class as |
||
| }; | ||
|
|
||
| inline size_t getWorkGroupMemoryOwnSize(detail::work_group_memory_impl *wgm) { | ||
| return wgm->wgm_size; | ||
| } | ||
|
|
||
| // The following 3 functions help us get the address of the first element of a | ||
| // multi-dimensional array, be it bounded or unbounded. A scalar is also | ||
| // included. In that case, it just returns the address of the scalar. | ||
| template <typename DataT> auto getData(DataT &scalar) { return &scalar; } | ||
|
|
||
| template <typename DataT, size_t N> auto getData(DataT (&bounded_arr)[N]) { | ||
| return getData(bounded_arr[0]); | ||
| } | ||
|
|
||
| template <typename DataT> auto getData(DataT (&unbounded_arr)[]) { | ||
| return getData(unbounded_arr[0]); | ||
| } | ||
|
|
||
| } // namespace detail | ||
|
|
||
| namespace ext::oneapi::experimental { | ||
| template <typename DataT, typename PropertyListT = empty_properties_t> | ||
| class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory | ||
| : sycl::detail::work_group_memory_impl { | ||
| public: | ||
| using value_type = std::remove_all_extents_t<DataT>; | ||
|
|
||
| private: | ||
| using decoratedPtr = typename sycl::detail::DecoratedType< | ||
| DataT, access::address_space::local_space>::type *; | ||
|
||
|
|
||
| public: | ||
| work_group_memory() = default; | ||
| work_group_memory(const work_group_memory &rhs) = default; | ||
| work_group_memory &operator=(const work_group_memory &rhs) = default; | ||
| template <typename T = DataT, | ||
| typename = std::enable_if_t<!sycl::detail::is_unbounded_array_v<T>>> | ||
| work_group_memory(handler &) | ||
| : sycl::detail::work_group_memory_impl(sizeof(work_group_memory), | ||
| sizeof(DataT)) {} | ||
| template <typename T = DataT, | ||
| typename = std::enable_if_t<sycl::detail::is_unbounded_array_v<T>>> | ||
| work_group_memory(size_t num, handler &) | ||
| : sycl::detail::work_group_memory_impl( | ||
| sizeof(work_group_memory), | ||
| num * sizeof(std::remove_extent_t<DataT>)) {} | ||
| template <access::decorated IsDecorated = access::decorated::no> | ||
| multi_ptr<value_type, access::address_space::local_space, IsDecorated> | ||
| get_multi_ptr() const { | ||
| return sycl::address_space_cast<access::address_space::local_space, | ||
| IsDecorated, value_type>( | ||
| sycl::detail::getData(*ptr)); | ||
|
||
| } | ||
| DataT *operator&() const { return ptr; } | ||
| operator DataT &() const { return *(this->operator&()); } | ||
|
||
| template <typename T = DataT, | ||
| typename = std::enable_if_t<!std::is_array_v<T>>> | ||
| const work_group_memory &operator=(const DataT &value) const { | ||
| *ptr = value; | ||
| return *this; | ||
| } | ||
| #ifdef __SYCL_DEVICE_ONLY__ | ||
| void __init(decoratedPtr ptr) { this->ptr = ptr; } | ||
| #endif | ||
| private: | ||
| decoratedPtr ptr; | ||
| }; | ||
| } // namespace ext::oneapi::experimental | ||
| } // namespace _V1 | ||
| } // namespace sycl | ||
Uh oh!
There was an error while loading. Please reload this page.