-
Notifications
You must be signed in to change notification settings - Fork 790
[SYCL] Postpone creation of HostKernel copy #20240
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 18 commits
cc71f38
c917410
941a5ff
6258fed
5e89d2f
a90a0ea
f65d5ba
91deab6
cf65f74
4c85aa5
63b7572
855d6a2
cb688cd
b81d48b
a64c17c
8de865a
79d19a9
a091ac4
5de3ae6
06e3a92
6d38d49
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -235,8 +235,64 @@ class HostKernel : public HostKernelBase { | |
#endif | ||
}; | ||
|
||
// the class keeps reference to a lambda allocated externally on stack | ||
class HostKernelRefBase : public HostKernelBase { | ||
public: | ||
HostKernelRefBase &operator=(const HostKernelRefBase &) = delete; | ||
|
||
virtual std::unique_ptr<HostKernelBase> takeOrCopyOwnership() const = 0; | ||
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES | ||
// The kernels that are passed via HostKernelRefBase are instantiated along | ||
// ctor call with GetInstantiateKernelOnHostPtr(). | ||
void InstantiateKernelOnHost() override {} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we put |
||
#endif | ||
}; | ||
|
||
// Primary template for movable objects. | ||
template <class KernelType, class KernelTypeUniversalRef, class KernelArgType, | ||
int Dims> | ||
class HostKernelRef : public HostKernelRefBase { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd prefer to have Maybe adding template <typename KernelType>
static HostKernelRef HostKernelRef::create(KernelType &&Kernel) {} and avoiding using the @vinser52 , WDYT? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I did not get you idea and why do we need one more derived class (
Are you trying to avoid There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Most APIs will be accepting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ahh, now I got your point.
This means rename. For me |
||
KernelType &&MKernel; | ||
|
||
public: | ||
HostKernelRef(KernelType &&Kernel) : MKernel(std::move(Kernel)) {} | ||
vinser52 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
HostKernelRef(const KernelType &Kernel) = delete; | ||
|
||
virtual char *getPtr() override { return reinterpret_cast<char *>(&MKernel); } | ||
virtual std::unique_ptr<HostKernelBase> takeOrCopyOwnership() const override { | ||
std::unique_ptr<HostKernelBase> Kernel; | ||
Kernel.reset( | ||
new HostKernel<KernelType, KernelArgType, Dims>(std::move(MKernel))); | ||
return Kernel; | ||
} | ||
|
||
~HostKernelRef() noexcept override = default; | ||
}; | ||
|
||
// Specialization for copyable objects. | ||
template <class KernelType, class KernelTypeUniversalRef, class KernelArgType, | ||
int Dims> | ||
class HostKernelRef<KernelType, KernelTypeUniversalRef &, KernelArgType, Dims> | ||
: public HostKernelRefBase { | ||
const KernelType &MKernel; | ||
|
||
public: | ||
HostKernelRef(const KernelType &Kernel) : MKernel(Kernel) {} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we delete copy ctor here as well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to create HostKernelRef from constant reference, as in sycl/include/sycl/queue.hpp, so we can't.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we explicitly delete copy ctor from |
||
|
||
virtual char *getPtr() override { | ||
return const_cast<char *>(reinterpret_cast<const char *>(&MKernel)); | ||
} | ||
virtual std::unique_ptr<HostKernelBase> takeOrCopyOwnership() const override { | ||
std::unique_ptr<HostKernelBase> Kernel; | ||
Kernel.reset(new HostKernel<KernelType, KernelArgType, Dims>(MKernel)); | ||
return Kernel; | ||
} | ||
|
||
~HostKernelRef() noexcept override = default; | ||
}; | ||
|
||
// This function is needed for host-side compilation to keep kernels | ||
// instantitated. This is important for debuggers to be able to associate | ||
// instantiated. This is important for debuggers to be able to associate | ||
// kernel code instructions with source code lines. | ||
template <class KernelType, class KernelArgType, int Dims> | ||
constexpr void *GetInstantiateKernelOnHostPtr() { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -149,44 +149,48 @@ void launch_grouped(handler &h, range<3> r, range<3> size, | |
} | ||
|
||
template <typename KernelType> | ||
void launch_grouped(const queue &q, range<1> r, range<1> size, | ||
const KernelType &k, | ||
constexpr bool enable_kernel_function_overload = | ||
!std::is_same_v<typename std::decay_t<KernelType>, sycl::kernel>; | ||
|
||
template <typename KernelType, typename = typename std::enable_if_t< | ||
enable_kernel_function_overload<KernelType>>> | ||
void launch_grouped(const queue &q, range<1> r, range<1> size, KernelType &&k, | ||
const sycl::detail::code_location &codeLoc = | ||
sycl::detail::code_location::current()) { | ||
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT | ||
detail::submit_kernel_direct(q, | ||
ext::oneapi::experimental::empty_properties_t{}, | ||
nd_range<1>(r, size), k); | ||
detail::submit_kernel_direct( | ||
q, ext::oneapi::experimental::empty_properties_t{}, nd_range<1>(r, size), | ||
std::forward<KernelType>(k)); | ||
#else | ||
submit( | ||
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); }, | ||
codeLoc); | ||
#endif | ||
} | ||
template <typename KernelType> | ||
void launch_grouped(const queue &q, range<2> r, range<2> size, | ||
const KernelType &k, | ||
template <typename KernelType, typename = typename std::enable_if_t< | ||
enable_kernel_function_overload<KernelType>>> | ||
void launch_grouped(const queue &q, range<2> r, range<2> size, KernelType &&k, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the change to rvalue ref strictly related to the HostKernel delayed allocation optimization, or is this an additional one? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change is needed to test that the move semantic is supported. It can be even dropped, if we wish. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The launch_grouped functions are compliant with the sycl_khr_free_function_commands extension PR, so if we change the implementation, I believe we need to update that PR. If it is not necessary to update these functions here, I would rather leave them as they are defined in the doc. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @slawekptak, this change introduces the ability to pass a kernel functor as an rvalue. The ability to pass it as an lvalue is still there. What is your concern? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm thinking about compliance with the extension documentation. Once we change in the header, we probably need to update the extension PR. It's not a big deal - I can update it. |
||
const sycl::detail::code_location &codeLoc = | ||
sycl::detail::code_location::current()) { | ||
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT | ||
detail::submit_kernel_direct(q, | ||
ext::oneapi::experimental::empty_properties_t{}, | ||
nd_range<2>(r, size), k); | ||
detail::submit_kernel_direct( | ||
q, ext::oneapi::experimental::empty_properties_t{}, nd_range<2>(r, size), | ||
std::forward<KernelType>(k)); | ||
#else | ||
submit( | ||
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); }, | ||
codeLoc); | ||
#endif | ||
} | ||
template <typename KernelType> | ||
void launch_grouped(const queue &q, range<3> r, range<3> size, | ||
const KernelType &k, | ||
template <typename KernelType, typename = typename std::enable_if_t< | ||
enable_kernel_function_overload<KernelType>>> | ||
void launch_grouped(const queue &q, range<3> r, range<3> size, KernelType &&k, | ||
const sycl::detail::code_location &codeLoc = | ||
sycl::detail::code_location::current()) { | ||
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT | ||
detail::submit_kernel_direct(q, | ||
ext::oneapi::experimental::empty_properties_t{}, | ||
nd_range<3>(r, size), k); | ||
detail::submit_kernel_direct( | ||
q, ext::oneapi::experimental::empty_properties_t{}, nd_range<3>(r, size), | ||
std::forward<KernelType>(k)); | ||
#else | ||
submit( | ||
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); }, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
// RUN: %clangxx -fsycl -c -fno-color-diagnostics -Xclang -fdump-record-layouts %s -o %t.out | FileCheck %s | ||
// REQUIRES: linux | ||
// UNSUPPORTED: libcxx | ||
|
||
// clang-format off | ||
|
||
#include <sycl/detail/cg_types.hpp> | ||
|
||
void foo(sycl::detail::HostKernelRefBase *) {} | ||
|
||
// CHECK: 0 | class sycl::detail::HostKernelRefBase | ||
// CHECK-NEXT: 0 | class sycl::detail::HostKernelBase (primary base) | ||
// CHECK-NEXT: 0 | (HostKernelBase vtable pointer) | ||
// CHECK-NEXT: | [sizeof=8, dsize=8, align=8, | ||
// CHECK-NEXT: | nvsize=8, nvalign=8] |
Uh oh!
There was an error while loading. Please reload this page.