Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion sycl/include/sycl/detail/cg_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ class HostKernel : public HostKernelBase {
KernelType MKernel;

public:
HostKernel(KernelType Kernel) : MKernel(Kernel) {}
HostKernel(const KernelType &Kernel) : MKernel(Kernel) {}
HostKernel(KernelType &&Kernel) : MKernel(std::move(Kernel)) {}

char *getPtr() override { return reinterpret_cast<char *>(&MKernel); }

Expand Down
47 changes: 26 additions & 21 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -715,17 +715,18 @@ class __SYCL_EXPORT handler {
/// \param KernelFunc is a SYCL kernel function
/// \param ParamDescs is the vector of kernel parameter descriptors.
template <typename KernelName, typename KernelType, int Dims,
typename LambdaArgType>
void StoreLambda(KernelType KernelFunc) {
typename LambdaArgType, typename KernelTypeUniversalRef>
void StoreLambda(KernelTypeUniversalRef &&KernelFunc) {
constexpr bool IsCallableWithKernelHandler =
detail::KernelLambdaHasKernelHandlerArgT<KernelType,
LambdaArgType>::value;

// Not using `std::make_unique` to avoid unnecessary instantiations of
// `std::unique_ptr<HostKernel<...>>`. Only
// `std::unique_ptr<HostKernelBase>` is necessary.
MHostKernel.reset(
new detail::HostKernel<KernelType, LambdaArgType, Dims>(KernelFunc));

MHostKernel.reset(new detail::HostKernel<KernelType, LambdaArgType, Dims>(
std::forward<KernelTypeUniversalRef>(KernelFunc)));

constexpr bool KernelHasName =
detail::getKernelName<KernelName>() != nullptr &&
Expand All @@ -739,7 +740,7 @@ class __SYCL_EXPORT handler {
#ifdef __INTEL_SYCL_USE_INTEGRATION_HEADERS
static_assert(
!KernelHasName ||
sizeof(KernelFunc) == detail::getKernelSize<KernelName>(),
sizeof(KernelType) == detail::getKernelSize<KernelName>(),
"Unexpected kernel lambda size. This can be caused by an "
"external host compiler producing a lambda with an "
"unexpected layout. This is a limitation of the compiler."
Expand Down Expand Up @@ -1133,7 +1134,7 @@ class __SYCL_EXPORT handler {
typename KernelName, typename KernelType, int Dims,
typename PropertiesT = ext::oneapi::experimental::empty_properties_t>
void parallel_for_lambda_impl(range<Dims> UserRange, PropertiesT Props,
KernelType KernelFunc) {
const KernelType &KernelFunc) {
#ifndef __SYCL_DEVICE_ONLY__
throwIfActionIsCreated();
throwOnKernelParameterMisuse<KernelName, KernelType>();
Expand Down Expand Up @@ -1545,19 +1546,21 @@ class __SYCL_EXPORT handler {
// methods side.

template <typename... TypesToForward, typename... ArgsTy>
static void kernel_single_task_unpack(handler *h, ArgsTy... Args) {
h->kernel_single_task<TypesToForward..., Props...>(Args...);
static void kernel_single_task_unpack(handler *h, ArgsTy&&... Args) {
h->kernel_single_task<TypesToForward..., Props...>(std::forward<ArgsTy>(Args)...);
}

template <typename... TypesToForward, typename... ArgsTy>
static void kernel_parallel_for_unpack(handler *h, ArgsTy... Args) {
h->kernel_parallel_for<TypesToForward..., Props...>(Args...);
static void kernel_parallel_for_unpack(handler *h, ArgsTy &&...Args) {
h->kernel_parallel_for<TypesToForward..., Props...>(
std::forward<ArgsTy>(Args)...);
}

template <typename... TypesToForward, typename... ArgsTy>
static void kernel_parallel_for_work_group_unpack(handler *h,
ArgsTy... Args) {
h->kernel_parallel_for_work_group<TypesToForward..., Props...>(Args...);
ArgsTy &&...Args) {
h->kernel_parallel_for_work_group<TypesToForward..., Props...>(
std::forward<ArgsTy>(Args)...);
}
};

Expand Down Expand Up @@ -1622,9 +1625,9 @@ class __SYCL_EXPORT handler {
void kernel_single_task_wrapper(const KernelType &KernelFunc) {
unpack<KernelName, KernelType, PropertiesT,
detail::KernelLambdaHasKernelHandlerArgT<KernelType>::value>(
KernelFunc, [&](auto Unpacker, auto... args) {
KernelFunc, [&](auto Unpacker, auto &&...args) {
Unpacker.template kernel_single_task_unpack<KernelName, KernelType>(
args...);
std::forward<decltype(args)>(args)...);
});
}

Expand All @@ -1635,9 +1638,10 @@ class __SYCL_EXPORT handler {
unpack<KernelName, KernelType, PropertiesT,
detail::KernelLambdaHasKernelHandlerArgT<KernelType,
ElementType>::value>(
KernelFunc, [&](auto Unpacker, auto... args) {
KernelFunc, [&](auto Unpacker, auto &&...args) {
Unpacker.template kernel_parallel_for_unpack<KernelName, ElementType,
KernelType>(args...);
KernelType>(
std::forward<decltype(args)>(args)...);
});
}

Expand All @@ -1648,9 +1652,10 @@ class __SYCL_EXPORT handler {
unpack<KernelName, KernelType, PropertiesT,
detail::KernelLambdaHasKernelHandlerArgT<KernelType,
ElementType>::value>(
KernelFunc, [&](auto Unpacker, auto... args) {
KernelFunc, [&](auto Unpacker, auto &&...args) {
Unpacker.template kernel_parallel_for_work_group_unpack<
KernelName, ElementType, KernelType>(args...);
KernelName, ElementType, KernelType>(
std::forward<decltype(args)>(args)...);
});
}

Expand Down Expand Up @@ -1900,21 +1905,21 @@ class __SYCL_EXPORT handler {
void parallel_for(range<1> NumWorkItems, const KernelType &KernelFunc) {
parallel_for_lambda_impl<KernelName>(
NumWorkItems, ext::oneapi::experimental::empty_properties_t{},
std::move(KernelFunc));
KernelFunc);
}

template <typename KernelName = detail::auto_name, typename KernelType>
void parallel_for(range<2> NumWorkItems, const KernelType &KernelFunc) {
parallel_for_lambda_impl<KernelName>(
NumWorkItems, ext::oneapi::experimental::empty_properties_t{},
std::move(KernelFunc));
KernelFunc);
}

template <typename KernelName = detail::auto_name, typename KernelType>
void parallel_for(range<3> NumWorkItems, const KernelType &KernelFunc) {
parallel_for_lambda_impl<KernelName>(
NumWorkItems, ext::oneapi::experimental::empty_properties_t{},
std::move(KernelFunc));
KernelFunc);
}

/// Enqueues a command to the SYCL runtime to invoke \p Func once.
Expand Down
43 changes: 43 additions & 0 deletions sycl/test-e2e/Basic/test_num_kernel_copies.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include <sycl/detail/core.hpp>

size_t copy_count = 0;
size_t move_count = 0;

template <int N> class kernel {
public:
kernel() {};
kernel(const kernel &other) { copy_count++; };
kernel(kernel &&other) { ++move_count; }

void operator()(sycl::id<1> id) const {}
void operator()(sycl::nd_item<1> id) const {}
void operator()() const {}
};
template <int N> struct sycl::is_device_copyable<kernel<N>> : std::true_type {};

int main(int argc, char **argv) {
sycl::queue q;

kernel<0> krn0;
q.parallel_for(sycl::range<1>{1}, krn0);
assert(copy_count == 1);
assert(move_count == 0);
copy_count = 0;

kernel<1> krn1;
q.parallel_for(sycl::nd_range<1>{1, 1}, krn1);
assert(copy_count == 1);
assert(move_count == 0);
copy_count = 0;

kernel<2> krn2;
q.single_task(krn2);
assert(copy_count == 1);
assert(move_count == 0);
copy_count = 0;

return 0;
}
Loading