Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,21 @@ template <typename KernelName = sycl::detail::auto_name, typename KernelType>
void single_task(queue Q, const KernelType &KernelObj,
const sycl::detail::code_location &CodeLoc =
sycl::detail::code_location::current()) {
submit(
std::move(Q),
[&](handler &CGH) { single_task<KernelName>(CGH, KernelObj); }, CodeLoc);
// TODO The handler-less path does not support kernel function properties
// and kernel functions with the kernel_handler type argument yet.
if constexpr (!(ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
void>::value)) {
detail::submit_kernel_direct_single_task<KernelName>(
std::move(Q), empty_properties_t{}, KernelObj, CodeLoc);
} else {
submit(
std::move(Q),
[&](handler &CGH) { single_task<KernelName>(CGH, KernelObj); },
CodeLoc);
}
}

template <typename... ArgsT>
Expand Down Expand Up @@ -268,8 +280,8 @@ void nd_launch(queue Q, nd_range<Dimensions> Range, const KernelType &KernelObj,
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dimensions>>::value)) {
detail::submit_kernel_direct<KernelName>(std::move(Q), empty_properties_t{},
Range, KernelObj);
detail::submit_kernel_direct_parallel_for<KernelName>(
std::move(Q), empty_properties_t{}, Range, KernelObj);
} else {
submit(std::move(Q), [&](handler &CGH) {
nd_launch<KernelName>(CGH, Range, KernelObj,
Expand Down
25 changes: 19 additions & 6 deletions sycl/include/sycl/khr/free_function_commands.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ void launch_grouped(const queue &q, range<1> r, range<1> size, KernelType &&k,
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<1>>::value)) {
detail::submit_kernel_direct(
detail::submit_kernel_direct_parallel_for(
q, ext::oneapi::experimental::empty_properties_t{},
nd_range<1>(r, size), std::forward<KernelType>(k));
} else {
Expand All @@ -185,7 +185,7 @@ void launch_grouped(const queue &q, range<2> r, range<2> size, KernelType &&k,
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<2>>::value)) {
detail::submit_kernel_direct(
detail::submit_kernel_direct_parallel_for(
q, ext::oneapi::experimental::empty_properties_t{},
nd_range<2>(r, size), std::forward<KernelType>(k));
} else {
Expand All @@ -206,7 +206,7 @@ void launch_grouped(const queue &q, range<3> r, range<3> size, KernelType &&k,
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<3>>::value)) {
detail::submit_kernel_direct(
detail::submit_kernel_direct_parallel_for(
q, ext::oneapi::experimental::empty_properties_t{},
nd_range<3>(r, size), std::forward<KernelType>(k));
} else {
Expand Down Expand Up @@ -319,11 +319,24 @@ void launch_task(handler &h, const KernelType &k) {
h.single_task(k);
}

template <typename KernelType>
void launch_task(const sycl::queue &q, const KernelType &k,
template <typename KernelType, typename = typename std::enable_if_t<
enable_kernel_function_overload<KernelType>>>
void launch_task(const sycl::queue &q, KernelType &&k,
const sycl::detail::code_location &codeLoc =
sycl::detail::code_location::current()) {
submit(q, [&](handler &h) { launch_task<KernelType>(h, k); }, codeLoc);
// TODO The handler-less path does not support kernel function properties
// and kernel functions with the kernel_handler type argument yet.
if constexpr (!(ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
void>::value)) {
detail::submit_kernel_direct_single_task(
q, ext::oneapi::experimental::empty_properties_t{},
std::forward<KernelType>(k), codeLoc);
} else {
submit(q, [&](handler &h) { launch_task<KernelType>(h, k); }, codeLoc);
}
}

template <typename... Args>
Expand Down
114 changes: 88 additions & 26 deletions sycl/include/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,14 +157,14 @@ class __SYCL_EXPORT SubmissionInfo {

} // namespace v1

template <typename KernelName = detail::auto_name, bool EventNeeded = false,
template <detail::WrapAs WrapAs, typename LambdaArgType,
typename KernelName = detail::auto_name, bool EventNeeded = false,
typename PropertiesT, typename KernelTypeUniversalRef, int Dims>
auto submit_kernel_direct(
const queue &Queue, PropertiesT Props, const nd_range<Dims> &Range,
KernelTypeUniversalRef &&KernelFunc,
const queue &Queue, [[maybe_unused]] PropertiesT Props,
const nd_range<Dims> &Range, KernelTypeUniversalRef &&KernelFunc,
const detail::code_location &CodeLoc = detail::code_location::current()) {
// TODO Properties not supported yet
(void)Props;
static_assert(
std::is_same_v<PropertiesT,
ext::oneapi::experimental::empty_properties_t>,
Expand All @@ -176,34 +176,39 @@ auto submit_kernel_direct(

using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
using LambdaArgType =
sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
static_assert(
std::is_convertible_v<sycl::nd_item<Dims>, LambdaArgType>,
"Kernel argument of a sycl::parallel_for with sycl::nd_range "
"must be either sycl::nd_item or be convertible from sycl::nd_item");
using TransformedArgType = sycl::nd_item<Dims>;

#ifndef __SYCL_DEVICE_ONLY__
detail::checkValueRange<Dims>(Range);
#endif

detail::KernelWrapper<detail::WrapAs::parallel_for, NameT, KernelType,
TransformedArgType, PropertiesT>::wrap(KernelFunc);
detail::KernelWrapper<WrapAs, NameT, KernelType, LambdaArgType,
PropertiesT>::wrap(KernelFunc);

HostKernelRef<KernelType, KernelTypeUniversalRef, TransformedArgType, Dims>
HostKernelRef<KernelType, KernelTypeUniversalRef, LambdaArgType, Dims>
HostKernel(std::forward<KernelTypeUniversalRef>(KernelFunc));

// Instantiating the kernel on the host improves debugging.
// Passing this pointer to another translation unit prevents optimization.
#ifndef NDEBUG
// TODO: call library to prevent dropping call due to optimization
// TODO: call library to prevent dropping call due to optimization.
(void)
detail::GetInstantiateKernelOnHostPtr<KernelType, LambdaArgType, Dims>();
#endif

detail::DeviceKernelInfo *DeviceKernelInfoPtr =
&detail::getDeviceKernelInfo<NameT>();
constexpr auto Info = detail::CompileTimeKernelInfo<NameT>;

assert(Info.Name != std::string_view{} && "Kernel must have a name!");

static_assert(
Info.Name == std::string_view{} || sizeof(KernelType) == Info.KernelSize,
"Unexpected kernel lambda size. This can be caused by an "
"external host compiler producing a lambda with an "
"unexpected layout. This is a limitation of the compiler."
"In many cases the difference is related to capturing constexpr "
"variables. In such cases removing constexpr specifier aligns the "
"captures between the host compiler and the device compiler."
"\n"
"In case of MSVC, passing "
"-fsycl-host-compiler-options='/std:c++latest' "
"might also help.");

if constexpr (EventNeeded) {
return submit_kernel_direct_with_event_impl(
Expand All @@ -216,6 +221,48 @@ auto submit_kernel_direct(
}
}

template <typename KernelName = detail::auto_name, bool EventNeeded = false,
typename PropertiesT, typename KernelTypeUniversalRef, int Dims>
auto submit_kernel_direct_parallel_for(
const queue &Queue, PropertiesT Props, const nd_range<Dims> &Range,
KernelTypeUniversalRef &&KernelFunc,
const detail::code_location &CodeLoc = detail::code_location::current()) {

using KernelType =
std::remove_const_t<std::remove_reference_t<KernelTypeUniversalRef>>;

using LambdaArgType =
sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
static_assert(
std::is_convertible_v<sycl::nd_item<Dims>, LambdaArgType>,
"Kernel argument of a sycl::parallel_for with sycl::nd_range "
"must be either sycl::nd_item or be convertible from sycl::nd_item");
using TransformedArgType = sycl::nd_item<Dims>;

#ifndef __SYCL_DEVICE_ONLY__
detail::checkValueRange<Dims>(Range);
#endif

return submit_kernel_direct<detail::WrapAs::parallel_for, TransformedArgType,
KernelName, EventNeeded, PropertiesT,
KernelTypeUniversalRef, Dims>(
Queue, Props, Range, std::forward<KernelTypeUniversalRef>(KernelFunc),
CodeLoc);
}

template <typename KernelName = detail::auto_name, bool EventNeeded = false,
typename PropertiesT, typename KernelTypeUniversalRef>
auto submit_kernel_direct_single_task(
const queue &Queue, PropertiesT Props, KernelTypeUniversalRef &&KernelFunc,
const detail::code_location &CodeLoc = detail::code_location::current()) {

return submit_kernel_direct<detail::WrapAs::single_task, void, KernelName,
EventNeeded, PropertiesT, KernelTypeUniversalRef,
1>(
Queue, Props, nd_range<1>{1, 1},
std::forward<KernelTypeUniversalRef>(KernelFunc), CodeLoc);
}

} // namespace detail

namespace ext ::oneapi ::experimental {
Expand Down Expand Up @@ -2727,12 +2774,27 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
"Use queue.submit() instead");

detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return submit(
[&](handler &CGH) {
CGH.template single_task<KernelName, KernelType, PropertiesT>(
Properties, KernelFunc);
},
TlsCodeLocCapture.query());

// TODO The handler-less path does not support kernel
// function properties and kernel functions with the kernel_handler
// type argument yet.
if constexpr (
std::is_same_v<PropertiesT,
ext::oneapi::experimental::empty_properties_t> &&
!(ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod<
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<KernelType, void>::value)) {
return detail::submit_kernel_direct_single_task<KernelName, true>(
*this, ext::oneapi::experimental::empty_properties_t{}, KernelFunc,
TlsCodeLocCapture.query());
} else {
return submit(
[&](handler &CGH) {
CGH.template single_task<KernelName, KernelType, PropertiesT>(
Properties, KernelFunc);
},
TlsCodeLocCapture.query());
}
}

/// single_task version with a kernel represented as a lambda.
Expand Down Expand Up @@ -3291,7 +3353,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dims>>::value)) {
return detail::submit_kernel_direct<KernelName, true>(
return detail::submit_kernel_direct_parallel_for<KernelName, true>(
*this, ext::oneapi::experimental::empty_properties_t{}, Range,
Rest..., TlsCodeLocCapture.query());
} else {
Expand Down
3 changes: 2 additions & 1 deletion sycl/test-e2e/Basic/test_num_kernel_copies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ int main(int argc, char **argv) {

kernel<2> krn2;
q.single_task(krn2);
assert(copy_count == 1);
// The kernel is copied on the scheduler-based path only.
assert(copy_count == 0);
assert(move_count == 0);
copy_count = 0;

Expand Down
2 changes: 1 addition & 1 deletion sycl/test/basic_tests/kernel_size_mismatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ int main() {
(void)A;
// expected-no-diagnostics
#else
// expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement '{{.*}}': Unexpected kernel lambda size. This can be caused by an external host compiler producing a lambda with an unexpected layout. This is a limitation of the compiler.}}
// expected-error-re@sycl/queue.hpp:* {{static assertion failed due to requirement '{{.*}}': Unexpected kernel lambda size. This can be caused by an external host compiler producing a lambda with an unexpected layout. This is a limitation of the compiler.}}
#endif
}).wait();
}
3 changes: 3 additions & 0 deletions sycl/test/basic_tests/single_task_error_message.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ int main() {
.single_task([&](sycl::handler &cgh) {
// expected-error-re@sycl/queue.hpp:* {{static assertion failed due to requirement '{{.*}}': sycl::queue.single_task() requires a kernel instead of command group.{{.*}} Use queue.submit() instead}}
// expected-error-re@sycl/detail/cg_types.hpp:* {{no matching function for call to object of type '(lambda at {{.*}}single_task_error_message.cpp:{{.*}})'}}
// TODO Investigate why this function template is not instantiated
// (if this is expected).
// expected-error@sycl/detail/cg_types.hpp:* {{no matching function for call to 'runKernelWithoutArg'}}
})
.wait();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ class FreeFunctionCommandsEventsTests : public ::testing::Test {
protected:
void SetUp() override {
counter_urEnqueueKernelLaunch = 0;
counter_urEnqueueKernelLaunchWithEvent = 0;
counter_urUSMEnqueueMemcpy = 0;
counter_urUSMEnqueueFill = 0;
counter_urUSMEnqueuePrefetch = 0;
Expand Down Expand Up @@ -281,6 +282,57 @@ TEST_F(FreeFunctionCommandsEventsTests,
ASSERT_EQ(counter_urEnqueueKernelLaunchWithEvent, size_t{1});
}

TEST_F(FreeFunctionCommandsEventsTests, LaunchTaskShortcutMoveKernel) {
mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch",
&redefined_urEnqueueKernelLaunch);

TestMoveFunctor::MoveCtorCalls = 0;
TestMoveFunctor MoveOnly;
std::mutex CvMutex;
std::condition_variable Cv;
bool ready = false;

// This kernel submission uses scheduler-bypass path, so the HostKernel
// shouldn't be constructed.

sycl::khr::launch_task(Queue, std::move(MoveOnly));

ASSERT_EQ(TestMoveFunctor::MoveCtorCalls, 0);
ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1});

// Another kernel submission is queued behind a host task,
// to force the scheduler-based submission. In this case, the HostKernel
// should be constructed.

// Replace the callback with an event based one, since the scheduler
// needs to create an event internally
mock::getCallbacks().set_replace_callback(
"urEnqueueKernelLaunch", &redefined_urEnqueueKernelLaunchWithEvent);

Queue.submit([&](sycl::handler &CGH) {
CGH.host_task([&] {
std::unique_lock<std::mutex> lk(CvMutex);
Cv.wait(lk, [&ready] { return ready; });
});
});

sycl::khr::launch_task(Queue, std::move(MoveOnly));

{
std::unique_lock<std::mutex> lk(CvMutex);
ready = true;
}
Cv.notify_one();

Queue.wait();

// Move ctor for TestMoveFunctor is called during move construction of
// HostKernel. Copy ctor is called by InstantiateKernelOnHost, can't delete
// it.
ASSERT_EQ(TestMoveFunctor::MoveCtorCalls, 1);
ASSERT_EQ(counter_urEnqueueKernelLaunchWithEvent, size_t{1});
}

TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchGroupedKernelNoEvent) {
mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch",
&redefined_urEnqueueKernelLaunch);
Expand Down
Loading