-
Notifications
You must be signed in to change notification settings - Fork 790
[SYCL] Postpone creation of HostKernel copy #20240
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
cc71f38
c917410
941a5ff
6258fed
5e89d2f
a90a0ea
f65d5ba
91deab6
cf65f74
4c85aa5
63b7572
855d6a2
cb688cd
b81d48b
a64c17c
8de865a
79d19a9
a091ac4
5de3ae6
06e3a92
6d38d49
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -235,6 +235,93 @@ class HostKernel : public HostKernelBase { | |
#endif | ||
}; | ||
|
||
// the class keeps reference to a lambda allocated externally on stack | ||
class HostKernelRefBase : public HostKernelBase { | ||
public: | ||
virtual std::shared_ptr<HostKernelBase> takeOrCopyOwnership() const = 0; | ||
}; | ||
|
||
template <class KernelType, class KernelArgType, int Dims> | ||
class HostKernelRef : public HostKernelRefBase { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd prefer to have Maybe adding template <typename KernelType>
static HostKernelRef HostKernelRef::create(KernelType &&Kernel) {} and avoiding using the @vinser52 , WDYT? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I did not get you idea and why do we need one more derived class (
Are you trying to avoid There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Most APIs will be accepting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ahh, now I got your point.
This means rename. For me |
||
const KernelType &MKernel; | ||
|
||
public: | ||
HostKernelRef(const KernelType &Kernel) : MKernel(Kernel) {} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we delete copy ctor here as well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to create HostKernelRef from constant reference, as in sycl/include/sycl/queue.hpp, so we can't.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we explicitly delete copy ctor from |
||
|
||
virtual char *getPtr() override { | ||
return const_cast<char *>(reinterpret_cast<const char *>(&MKernel)); | ||
} | ||
virtual std::shared_ptr<HostKernelBase> takeOrCopyOwnership() const override { | ||
|
||
std::shared_ptr<HostKernelBase> Kernel; | ||
Kernel.reset(new HostKernel<KernelType, KernelArgType, Dims>(MKernel)); | ||
return Kernel; | ||
} | ||
|
||
~HostKernelRef() noexcept override = default; | ||
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES | ||
// This function is needed for host-side compilation to keep kernels | ||
// instantitated. This is important for debuggers to be able to associate | ||
// kernel code instructions with source code lines. | ||
// NOTE: InstatiateKernelOnHost() should not be called. | ||
void InstantiateKernelOnHost() override { | ||
|
||
using IDBuilder = sycl::detail::Builder; | ||
constexpr bool HasKernelHandlerArg = | ||
KernelLambdaHasKernelHandlerArgT<KernelType, KernelArgType>::value; | ||
if constexpr (std::is_same_v<KernelArgType, void>) { | ||
runKernelWithoutArg(MKernel, std::bool_constant<HasKernelHandlerArg>()); | ||
} else if constexpr (std::is_same_v<KernelArgType, sycl::id<Dims>>) { | ||
sycl::id ID = InitializedVal<Dims, id>::template get<0>(); | ||
runKernelWithArg<const KernelArgType &>( | ||
MKernel, ID, std::bool_constant<HasKernelHandlerArg>()); | ||
} else if constexpr (std::is_same_v<KernelArgType, item<Dims, true>> || | ||
std::is_same_v<KernelArgType, item<Dims, false>>) { | ||
constexpr bool HasOffset = | ||
std::is_same_v<KernelArgType, item<Dims, true>>; | ||
if constexpr (!HasOffset) { | ||
KernelArgType Item = IDBuilder::createItem<Dims, HasOffset>( | ||
InitializedVal<Dims, range>::template get<1>(), | ||
InitializedVal<Dims, id>::template get<0>()); | ||
runKernelWithArg<KernelArgType>( | ||
MKernel, Item, std::bool_constant<HasKernelHandlerArg>()); | ||
} else { | ||
KernelArgType Item = IDBuilder::createItem<Dims, HasOffset>( | ||
InitializedVal<Dims, range>::template get<1>(), | ||
InitializedVal<Dims, id>::template get<0>(), | ||
InitializedVal<Dims, id>::template get<0>()); | ||
runKernelWithArg<KernelArgType>( | ||
MKernel, Item, std::bool_constant<HasKernelHandlerArg>()); | ||
} | ||
} else if constexpr (std::is_same_v<KernelArgType, nd_item<Dims>>) { | ||
sycl::range<Dims> Range = InitializedVal<Dims, range>::template get<1>(); | ||
sycl::id<Dims> ID = InitializedVal<Dims, id>::template get<0>(); | ||
sycl::group<Dims> Group = | ||
IDBuilder::createGroup<Dims>(Range, Range, Range, ID); | ||
sycl::item<Dims, true> GlobalItem = | ||
IDBuilder::createItem<Dims, true>(Range, ID, ID); | ||
sycl::item<Dims, false> LocalItem = | ||
IDBuilder::createItem<Dims, false>(Range, ID); | ||
KernelArgType NDItem = | ||
IDBuilder::createNDItem<Dims>(GlobalItem, LocalItem, Group); | ||
runKernelWithArg<const KernelArgType>( | ||
MKernel, NDItem, std::bool_constant<HasKernelHandlerArg>()); | ||
} else if constexpr (std::is_same_v<KernelArgType, sycl::group<Dims>>) { | ||
sycl::range<Dims> Range = InitializedVal<Dims, range>::template get<1>(); | ||
sycl::id<Dims> ID = InitializedVal<Dims, id>::template get<0>(); | ||
KernelArgType Group = | ||
IDBuilder::createGroup<Dims>(Range, Range, Range, ID); | ||
runKernelWithArg<KernelArgType>( | ||
MKernel, Group, std::bool_constant<HasKernelHandlerArg>()); | ||
} else { | ||
// Assume that anything else can be default-constructed. If not, this | ||
// should fail to compile and the implementor should implement a generic | ||
// case for the new argument type. | ||
runKernelWithArg<KernelArgType>( | ||
MKernel, KernelArgType{}, std::bool_constant<HasKernelHandlerArg>()); | ||
} | ||
} | ||
#endif | ||
}; | ||
|
||
// This function is needed for host-side compilation to keep kernels | ||
// instantitated. This is important for debuggers to be able to associate | ||
// kernel code instructions with source code lines. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -65,14 +65,14 @@ auto get_native(const SyclObjectT &Obj) | |
template <int Dims> | ||
event __SYCL_EXPORT submit_kernel_direct_with_event_impl( | ||
const queue &Queue, const nd_range<Dims> &Range, | ||
std::shared_ptr<detail::HostKernelBase> &HostKernel, | ||
detail::HostKernelRefBase &HostKernel, | ||
vinser52 marked this conversation as resolved.
Show resolved
Hide resolved
vinser52 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
detail::DeviceKernelInfo *DeviceKernelInfo, | ||
const detail::code_location &CodeLoc, bool IsTopCodeLoc); | ||
|
||
template <int Dims> | ||
void __SYCL_EXPORT submit_kernel_direct_without_event_impl( | ||
const queue &Queue, const nd_range<Dims> &Range, | ||
std::shared_ptr<detail::HostKernelBase> &HostKernel, | ||
detail::HostKernelRefBase &HostKernel, | ||
detail::DeviceKernelInfo *DeviceKernelInfo, | ||
const detail::code_location &CodeLoc, bool IsTopCodeLoc); | ||
|
||
|
@@ -180,8 +180,15 @@ auto submit_kernel_direct( | |
"must be either sycl::nd_item or be convertible from sycl::nd_item"); | ||
using TransformedArgType = sycl::nd_item<Dims>; | ||
|
||
std::shared_ptr<detail::HostKernelBase> HostKernel = std::make_shared< | ||
detail::HostKernel<KernelType, TransformedArgType, Dims>>(KernelFunc); | ||
HostKernelRef<KernelType, TransformedArgType, Dims> HostKernel(KernelFunc); | ||
|
||
|
||
// Instantiating the kernel on the host improves debugging. | ||
// Passing this pointer to another translation unit prevents optimization. | ||
#ifndef NDEBUG | ||
// TODO: call library to prevent dropping call due to optimization | ||
(void) | ||
detail::GetInstantiateKernelOnHostPtr<KernelType, LambdaArgType, Dims>(); | ||
#endif | ||
|
||
detail::DeviceKernelInfo *DeviceKernelInfoPtr = | ||
&detail::getDeviceKernelInfo<NameT>(); | ||
|
Uh oh!
There was an error while loading. Please reload this page.