Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion sycl/include/sycl/detail/cg_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ class HostKernel : public HostKernelBase {
KernelType MKernel;

public:
HostKernel(KernelType Kernel) : MKernel(Kernel) {}
HostKernel(const KernelType &Kernel) : MKernel(Kernel) {}
HostKernel(KernelType &&Kernel) : MKernel(std::move(Kernel)) {}

char *getPtr() override { return reinterpret_cast<char *>(&MKernel); }

Expand Down
47 changes: 26 additions & 21 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -715,17 +715,17 @@ class __SYCL_EXPORT handler {
/// \param KernelFunc is a SYCL kernel function
/// \param ParamDescs is the vector of kernel parameter descriptors.
template <typename KernelName, typename KernelType, int Dims,
typename LambdaArgType>
void StoreLambda(KernelType KernelFunc) {
typename LambdaArgType, typename KernelTypeUniversalRef>
void StoreLambda(KernelTypeUniversalRef &&KernelFunc) {
constexpr bool IsCallableWithKernelHandler =
detail::KernelLambdaHasKernelHandlerArgT<KernelType,
LambdaArgType>::value;

// Not using `std::make_unique` to avoid unnecessary instantiations of
// `std::unique_ptr<HostKernel<...>>`. Only
// `std::unique_ptr<HostKernelBase>` is necessary.
MHostKernel.reset(
new detail::HostKernel<KernelType, LambdaArgType, Dims>(KernelFunc));
MHostKernel.reset(new detail::HostKernel<KernelType, LambdaArgType, Dims>(
std::forward<KernelTypeUniversalRef>(KernelFunc)));

constexpr bool KernelHasName =
detail::getKernelName<KernelName>() != nullptr &&
Expand All @@ -739,7 +739,7 @@ class __SYCL_EXPORT handler {
#ifdef __INTEL_SYCL_USE_INTEGRATION_HEADERS
static_assert(
!KernelHasName ||
sizeof(KernelFunc) == detail::getKernelSize<KernelName>(),
sizeof(KernelType) == detail::getKernelSize<KernelName>(),
"Unexpected kernel lambda size. This can be caused by an "
"external host compiler producing a lambda with an "
"unexpected layout. This is a limitation of the compiler."
Expand Down Expand Up @@ -1133,7 +1133,7 @@ class __SYCL_EXPORT handler {
typename KernelName, typename KernelType, int Dims,
typename PropertiesT = ext::oneapi::experimental::empty_properties_t>
void parallel_for_lambda_impl(range<Dims> UserRange, PropertiesT Props,
KernelType KernelFunc) {
const KernelType &KernelFunc) {
#ifndef __SYCL_DEVICE_ONLY__
throwIfActionIsCreated();
throwOnKernelParameterMisuse<KernelName, KernelType>();
Expand Down Expand Up @@ -1545,19 +1545,22 @@ class __SYCL_EXPORT handler {
// methods side.

template <typename... TypesToForward, typename... ArgsTy>
static void kernel_single_task_unpack(handler *h, ArgsTy... Args) {
h->kernel_single_task<TypesToForward..., Props...>(Args...);
static void kernel_single_task_unpack(handler *h, ArgsTy &&...Args) {
h->kernel_single_task<TypesToForward..., Props...>(
std::forward<ArgsTy>(Args)...);
}

template <typename... TypesToForward, typename... ArgsTy>
static void kernel_parallel_for_unpack(handler *h, ArgsTy... Args) {
h->kernel_parallel_for<TypesToForward..., Props...>(Args...);
static void kernel_parallel_for_unpack(handler *h, ArgsTy &&...Args) {
h->kernel_parallel_for<TypesToForward..., Props...>(
std::forward<ArgsTy>(Args)...);
}

template <typename... TypesToForward, typename... ArgsTy>
static void kernel_parallel_for_work_group_unpack(handler *h,
ArgsTy... Args) {
h->kernel_parallel_for_work_group<TypesToForward..., Props...>(Args...);
ArgsTy &&...Args) {
h->kernel_parallel_for_work_group<TypesToForward..., Props...>(
std::forward<ArgsTy>(Args)...);
}
};

Expand Down Expand Up @@ -1622,9 +1625,9 @@ class __SYCL_EXPORT handler {
void kernel_single_task_wrapper(const KernelType &KernelFunc) {
unpack<KernelName, KernelType, PropertiesT,
detail::KernelLambdaHasKernelHandlerArgT<KernelType>::value>(
KernelFunc, [&](auto Unpacker, auto... args) {
KernelFunc, [&](auto Unpacker, auto &&...args) {
Unpacker.template kernel_single_task_unpack<KernelName, KernelType>(
args...);
std::forward<decltype(args)>(args)...);
});
}

Expand All @@ -1635,9 +1638,10 @@ class __SYCL_EXPORT handler {
unpack<KernelName, KernelType, PropertiesT,
detail::KernelLambdaHasKernelHandlerArgT<KernelType,
ElementType>::value>(
KernelFunc, [&](auto Unpacker, auto... args) {
KernelFunc, [&](auto Unpacker, auto &&...args) {
Unpacker.template kernel_parallel_for_unpack<KernelName, ElementType,
KernelType>(args...);
KernelType>(
std::forward<decltype(args)>(args)...);
});
}

Expand All @@ -1648,9 +1652,10 @@ class __SYCL_EXPORT handler {
unpack<KernelName, KernelType, PropertiesT,
detail::KernelLambdaHasKernelHandlerArgT<KernelType,
ElementType>::value>(
KernelFunc, [&](auto Unpacker, auto... args) {
KernelFunc, [&](auto Unpacker, auto &&...args) {
Unpacker.template kernel_parallel_for_work_group_unpack<
KernelName, ElementType, KernelType>(args...);
KernelName, ElementType, KernelType>(
std::forward<decltype(args)>(args)...);
});
}

Expand Down Expand Up @@ -1900,21 +1905,21 @@ class __SYCL_EXPORT handler {
void parallel_for(range<1> NumWorkItems, const KernelType &KernelFunc) {
parallel_for_lambda_impl<KernelName>(
NumWorkItems, ext::oneapi::experimental::empty_properties_t{},
std::move(KernelFunc));
KernelFunc);
}

template <typename KernelName = detail::auto_name, typename KernelType>
void parallel_for(range<2> NumWorkItems, const KernelType &KernelFunc) {
parallel_for_lambda_impl<KernelName>(
NumWorkItems, ext::oneapi::experimental::empty_properties_t{},
std::move(KernelFunc));
KernelFunc);
}

template <typename KernelName = detail::auto_name, typename KernelType>
void parallel_for(range<3> NumWorkItems, const KernelType &KernelFunc) {
parallel_for_lambda_impl<KernelName>(
NumWorkItems, ext::oneapi::experimental::empty_properties_t{},
std::move(KernelFunc));
KernelFunc);
}

/// Enqueues a command to the SYCL runtime to invoke \p Func once.
Expand Down
43 changes: 43 additions & 0 deletions sycl/test-e2e/Basic/test_num_kernel_copies.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include <sycl/detail/core.hpp>

size_t copy_count = 0;
size_t move_count = 0;

template <int N> class kernel {
public:
kernel() {};
kernel(const kernel &other) { copy_count++; };
kernel(kernel &&other) { ++move_count; }

void operator()(sycl::id<1> id) const {}
void operator()(sycl::nd_item<1> id) const {}
void operator()() const {}
};
template <int N> struct sycl::is_device_copyable<kernel<N>> : std::true_type {};

int main(int argc, char **argv) {
sycl::queue q;

kernel<0> krn0;
q.parallel_for(sycl::range<1>{1}, krn0);
assert(copy_count == 1);
assert(move_count == 0);
copy_count = 0;

kernel<1> krn1;
q.parallel_for(sycl::nd_range<1>{1, 1}, krn1);
assert(copy_count == 1);
assert(move_count == 0);
copy_count = 0;

kernel<2> krn2;
q.single_task(krn2);
assert(copy_count == 1);
assert(move_count == 0);
copy_count = 0;

return 0;
}
Loading