Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,21 @@ template <typename KernelName = sycl::detail::auto_name, typename KernelType>
void single_task(queue Q, const KernelType &KernelObj,
const sycl::detail::code_location &CodeLoc =
sycl::detail::code_location::current()) {
submit(
std::move(Q),
[&](handler &CGH) { single_task<KernelName>(CGH, KernelObj); }, CodeLoc);
// TODO The handler-less path does not support kernel function properties
// and kernel functions with the kernel_handler type argument yet.
if constexpr (!(ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
void>::value)) {
detail::submit_kernel_direct_single_task<KernelName>(
std::move(Q), empty_properties_t{}, KernelObj, CodeLoc);
} else {
submit(
std::move(Q),
[&](handler &CGH) { single_task<KernelName>(CGH, KernelObj); },
CodeLoc);
}
}

template <typename... ArgsT>
Expand Down Expand Up @@ -268,8 +280,8 @@ void nd_launch(queue Q, nd_range<Dimensions> Range, const KernelType &KernelObj,
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dimensions>>::value)) {
detail::submit_kernel_direct<KernelName>(std::move(Q), empty_properties_t{},
Range, KernelObj);
detail::submit_kernel_direct_parallel_for<KernelName>(
std::move(Q), empty_properties_t{}, Range, KernelObj);
} else {
submit(std::move(Q), [&](handler &CGH) {
nd_launch<KernelName>(CGH, Range, KernelObj,
Expand Down
19 changes: 15 additions & 4 deletions sycl/include/sycl/khr/free_function_commands.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ void launch_grouped(const queue &q, range<1> r, range<1> size, KernelType &&k,
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<1>>::value)) {
detail::submit_kernel_direct(
detail::submit_kernel_direct_parallel_for(
q, ext::oneapi::experimental::empty_properties_t{},
nd_range<1>(r, size), std::forward<KernelType>(k));
} else {
Expand All @@ -185,7 +185,7 @@ void launch_grouped(const queue &q, range<2> r, range<2> size, KernelType &&k,
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<2>>::value)) {
detail::submit_kernel_direct(
detail::submit_kernel_direct_parallel_for(
q, ext::oneapi::experimental::empty_properties_t{},
nd_range<2>(r, size), std::forward<KernelType>(k));
} else {
Expand All @@ -206,7 +206,7 @@ void launch_grouped(const queue &q, range<3> r, range<3> size, KernelType &&k,
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<3>>::value)) {
detail::submit_kernel_direct(
detail::submit_kernel_direct_parallel_for(
q, ext::oneapi::experimental::empty_properties_t{},
nd_range<3>(r, size), std::forward<KernelType>(k));
} else {
Expand Down Expand Up @@ -323,7 +323,18 @@ template <typename KernelType>
void launch_task(const sycl::queue &q, const KernelType &k,
const sycl::detail::code_location &codeLoc =
sycl::detail::code_location::current()) {
submit(q, [&](handler &h) { launch_task<KernelType>(h, k); }, codeLoc);
// TODO The handler-less path does not support kernel function properties
// and kernel functions with the kernel_handler type argument yet.
if constexpr (!(ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
void>::value)) {
detail::submit_kernel_direct_single_task(
q, ext::oneapi::experimental::empty_properties_t{}, k, codeLoc);
} else {
submit(q, [&](handler &h) { launch_task<KernelType>(h, k); }, codeLoc);
}
}

template <typename... Args>
Expand Down
93 changes: 85 additions & 8 deletions sycl/include/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ class __SYCL_EXPORT SubmissionInfo {

template <typename KernelName = detail::auto_name, bool EventNeeded = false,
typename PropertiesT, typename KernelTypeUniversalRef, int Dims>
auto submit_kernel_direct(
auto submit_kernel_direct_parallel_for(
const queue &Queue, PropertiesT Props, const nd_range<Dims> &Range,
KernelTypeUniversalRef &&KernelFunc,
const detail::code_location &CodeLoc = detail::code_location::current()) {
Expand Down Expand Up @@ -216,6 +216,68 @@ auto submit_kernel_direct(
}
}

template <typename KernelName = detail::auto_name, bool EventNeeded = false,
typename PropertiesT, typename KernelTypeUniversalRef>
auto submit_kernel_direct_single_task(
const queue &Queue, PropertiesT Props, KernelTypeUniversalRef &&KernelFunc,
const detail::code_location &CodeLoc = detail::code_location::current()) {
// TODO Properties not supported yet
(void)Props;
static_assert(
std::is_same_v<PropertiesT,
ext::oneapi::experimental::empty_properties_t>,
"Setting properties not supported yet for no-CGH kernel submit.");
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);

using KernelType =
std::remove_const_t<std::remove_reference_t<KernelTypeUniversalRef>>;

using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;

detail::KernelWrapper<detail::WrapAs::single_task, NameT, KernelType, void,
PropertiesT>::wrap(KernelFunc);

HostKernelRef<KernelType, KernelTypeUniversalRef, void, 1> HostKernel(
std::forward<KernelTypeUniversalRef>(KernelFunc));

// Instantiating the kernel on the host improves debugging.
// Passing this pointer to another translation unit prevents optimization.
#ifndef NDEBUG
// TODO: call library to prevent dropping call due to optimization
(void)detail::GetInstantiateKernelOnHostPtr<KernelType, void, 1>();
#endif

detail::DeviceKernelInfo *DeviceKernelInfoPtr =
&detail::getDeviceKernelInfo<NameT>();
constexpr auto Info = detail::CompileTimeKernelInfo<NameT>;

assert(Info.Name != std::string_view{} && "Kernel must have a name!");

static_assert(
Info.Name == std::string_view{} || sizeof(KernelType) == Info.KernelSize,
"Unexpected kernel lambda size. This can be caused by an "
"external host compiler producing a lambda with an "
"unexpected layout. This is a limitation of the compiler."
"In many cases the difference is related to capturing constexpr "
"variables. In such cases removing constexpr specifier aligns the "
"captures between the host compiler and the device compiler."
"\n"
"In case of MSVC, passing "
"-fsycl-host-compiler-options='/std:c++latest' "
"might also help.");

if constexpr (EventNeeded) {
return submit_kernel_direct_with_event_impl(
Queue, nd_range<1>{1, 1}, HostKernel, DeviceKernelInfoPtr,
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
} else {
submit_kernel_direct_without_event_impl(
Queue, nd_range<1>{1, 1}, HostKernel, DeviceKernelInfoPtr,
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
}
}

} // namespace detail

namespace ext ::oneapi ::experimental {
Expand Down Expand Up @@ -2727,12 +2789,27 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
"Use queue.submit() instead");

detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return submit(
[&](handler &CGH) {
CGH.template single_task<KernelName, KernelType, PropertiesT>(
Properties, KernelFunc);
},
TlsCodeLocCapture.query());

// TODO The handler-less path does not support kernel
// function properties and kernel functions with the kernel_handler
// type argument yet.
if constexpr (
std::is_same_v<PropertiesT,
ext::oneapi::experimental::empty_properties_t> &&
!(ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod<
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<KernelType, void>::value)) {
return detail::submit_kernel_direct_single_task<KernelName, true>(
*this, ext::oneapi::experimental::empty_properties_t{}, KernelFunc,
TlsCodeLocCapture.query());
} else {
return submit(
[&](handler &CGH) {
CGH.template single_task<KernelName, KernelType, PropertiesT>(
Properties, KernelFunc);
},
TlsCodeLocCapture.query());
}
}

/// single_task version with a kernel represented as a lambda.
Expand Down Expand Up @@ -3291,7 +3368,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dims>>::value)) {
return detail::submit_kernel_direct<KernelName, true>(
return detail::submit_kernel_direct_parallel_for<KernelName, true>(
*this, ext::oneapi::experimental::empty_properties_t{}, Range,
Rest..., TlsCodeLocCapture.query());
} else {
Expand Down
3 changes: 2 additions & 1 deletion sycl/test-e2e/Basic/test_num_kernel_copies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ int main(int argc, char **argv) {

kernel<2> krn2;
q.single_task(krn2);
assert(copy_count == 1);
// The kernel is copied on the scheduler-based path only
assert(copy_count == 0);
assert(move_count == 0);
copy_count = 0;

Expand Down
2 changes: 1 addition & 1 deletion sycl/test/basic_tests/kernel_size_mismatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ int main() {
(void)A;
// expected-no-diagnostics
#else
// expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement '{{.*}}': Unexpected kernel lambda size. This can be caused by an external host compiler producing a lambda with an unexpected layout. This is a limitation of the compiler.}}
// expected-error-re@sycl/queue.hpp:* {{static assertion failed due to requirement '{{.*}}': Unexpected kernel lambda size. This can be caused by an external host compiler producing a lambda with an unexpected layout. This is a limitation of the compiler.}}
#endif
}).wait();
}
3 changes: 3 additions & 0 deletions sycl/test/basic_tests/single_task_error_message.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ int main() {
.single_task([&](sycl::handler &cgh) {
// expected-error-re@sycl/queue.hpp:* {{static assertion failed due to requirement '{{.*}}': sycl::queue.single_task() requires a kernel instead of command group.{{.*}} Use queue.submit() instead}}
// expected-error-re@sycl/detail/cg_types.hpp:* {{no matching function for call to object of type '(lambda at {{.*}}single_task_error_message.cpp:{{.*}})'}}
// TODO Investigate why this function template is not instantiated
// (if this is expected).
// expected-error@sycl/detail/cg_types.hpp:* {{no matching function for call to 'runKernelWithoutArg'}}
})
.wait();
}
Expand Down
Loading