Skip to content

Commit a3542db

Browse files
committed
[SYCL] Handler-less kernel submit path (single_task)
Extend the handler-less kernel submission path to support the single_task functions.
1 parent 8ef08d4 commit a3542db

File tree

6 files changed

+122
-19
lines changed

6 files changed

+122
-19
lines changed

sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,21 @@ template <typename KernelName = sycl::detail::auto_name, typename KernelType>
152152
void single_task(queue Q, const KernelType &KernelObj,
153153
const sycl::detail::code_location &CodeLoc =
154154
sycl::detail::code_location::current()) {
155-
submit(
156-
std::move(Q),
157-
[&](handler &CGH) { single_task<KernelName>(CGH, KernelObj); }, CodeLoc);
155+
// TODO The handler-less path does not support kernel function properties
156+
// and kernel functions with the kernel_handler type argument yet.
157+
if constexpr (!(ext::oneapi::experimental::detail::
158+
HasKernelPropertiesGetMethod<
159+
const KernelType &>::value) &&
160+
!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
161+
void>::value)) {
162+
detail::submit_kernel_direct_single_task<KernelName>(
163+
std::move(Q), empty_properties_t{}, KernelObj, CodeLoc);
164+
} else {
165+
submit(
166+
std::move(Q),
167+
[&](handler &CGH) { single_task<KernelName>(CGH, KernelObj); },
168+
CodeLoc);
169+
}
158170
}
159171

160172
template <typename... ArgsT>
@@ -268,8 +280,8 @@ void nd_launch(queue Q, nd_range<Dimensions> Range, const KernelType &KernelObj,
268280
const KernelType &>::value) &&
269281
!(detail::KernelLambdaHasKernelHandlerArgT<
270282
KernelType, sycl::nd_item<Dimensions>>::value)) {
271-
detail::submit_kernel_direct<KernelName>(std::move(Q), empty_properties_t{},
272-
Range, KernelObj);
283+
detail::submit_kernel_direct_parallel_for<KernelName>(
284+
std::move(Q), empty_properties_t{}, Range, KernelObj);
273285
} else {
274286
submit(std::move(Q), [&](handler &CGH) {
275287
nd_launch<KernelName>(CGH, Range, KernelObj,

sycl/include/sycl/khr/free_function_commands.hpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ void launch_grouped(const queue &q, range<1> r, range<1> size, KernelType &&k,
164164
const KernelType &>::value) &&
165165
!(detail::KernelLambdaHasKernelHandlerArgT<
166166
KernelType, sycl::nd_item<1>>::value)) {
167-
detail::submit_kernel_direct(
167+
detail::submit_kernel_direct_parallel_for(
168168
q, ext::oneapi::experimental::empty_properties_t{},
169169
nd_range<1>(r, size), std::forward<KernelType>(k));
170170
} else {
@@ -185,7 +185,7 @@ void launch_grouped(const queue &q, range<2> r, range<2> size, KernelType &&k,
185185
const KernelType &>::value) &&
186186
!(detail::KernelLambdaHasKernelHandlerArgT<
187187
KernelType, sycl::nd_item<2>>::value)) {
188-
detail::submit_kernel_direct(
188+
detail::submit_kernel_direct_parallel_for(
189189
q, ext::oneapi::experimental::empty_properties_t{},
190190
nd_range<2>(r, size), std::forward<KernelType>(k));
191191
} else {
@@ -206,7 +206,7 @@ void launch_grouped(const queue &q, range<3> r, range<3> size, KernelType &&k,
206206
const KernelType &>::value) &&
207207
!(detail::KernelLambdaHasKernelHandlerArgT<
208208
KernelType, sycl::nd_item<3>>::value)) {
209-
detail::submit_kernel_direct(
209+
detail::submit_kernel_direct_parallel_for(
210210
q, ext::oneapi::experimental::empty_properties_t{},
211211
nd_range<3>(r, size), std::forward<KernelType>(k));
212212
} else {
@@ -323,7 +323,18 @@ template <typename KernelType>
323323
void launch_task(const sycl::queue &q, const KernelType &k,
324324
const sycl::detail::code_location &codeLoc =
325325
sycl::detail::code_location::current()) {
326-
submit(q, [&](handler &h) { launch_task<KernelType>(h, k); }, codeLoc);
326+
// TODO The handler-less path does not support kernel function properties
327+
// and kernel functions with the kernel_handler type argument yet.
328+
if constexpr (!(ext::oneapi::experimental::detail::
329+
HasKernelPropertiesGetMethod<
330+
const KernelType &>::value) &&
331+
!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
332+
void>::value)) {
333+
detail::submit_kernel_direct_single_task(
334+
q, ext::oneapi::experimental::empty_properties_t{}, k, codeLoc);
335+
} else {
336+
submit(q, [&](handler &h) { launch_task<KernelType>(h, k); }, codeLoc);
337+
}
327338
}
328339

329340
template <typename... Args>

sycl/include/sycl/queue.hpp

Lines changed: 84 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ class __SYCL_EXPORT SubmissionInfo {
159159

160160
template <typename KernelName = detail::auto_name, bool EventNeeded = false,
161161
typename PropertiesT, typename KernelTypeUniversalRef, int Dims>
162-
auto submit_kernel_direct(
162+
auto submit_kernel_direct_parallel_for(
163163
const queue &Queue, PropertiesT Props, const nd_range<Dims> &Range,
164164
KernelTypeUniversalRef &&KernelFunc,
165165
const detail::code_location &CodeLoc = detail::code_location::current()) {
@@ -216,6 +216,68 @@ auto submit_kernel_direct(
216216
}
217217
}
218218

219+
template <typename KernelName = detail::auto_name, bool EventNeeded = false,
220+
typename PropertiesT, typename KernelTypeUniversalRef>
221+
auto submit_kernel_direct_single_task(
222+
const queue &Queue, PropertiesT Props, KernelTypeUniversalRef &&KernelFunc,
223+
const detail::code_location &CodeLoc = detail::code_location::current()) {
224+
// TODO Properties not supported yet
225+
(void)Props;
226+
static_assert(
227+
std::is_same_v<PropertiesT,
228+
ext::oneapi::experimental::empty_properties_t>,
229+
"Setting properties not supported yet for no-CGH kernel submit.");
230+
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
231+
232+
using KernelType =
233+
std::remove_const_t<std::remove_reference_t<KernelTypeUniversalRef>>;
234+
235+
using NameT =
236+
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
237+
238+
detail::KernelWrapper<detail::WrapAs::single_task, NameT, KernelType, void,
239+
PropertiesT>::wrap(KernelFunc);
240+
241+
HostKernelRef<KernelType, KernelTypeUniversalRef, void, 1> HostKernel(
242+
std::forward<KernelTypeUniversalRef>(KernelFunc));
243+
244+
// Instantiating the kernel on the host improves debugging.
245+
// Passing this pointer to another translation unit prevents optimization.
246+
#ifndef NDEBUG
247+
// TODO: call library to prevent dropping call due to optimization
248+
(void)detail::GetInstantiateKernelOnHostPtr<KernelType, void, 1>();
249+
#endif
250+
251+
detail::DeviceKernelInfo *DeviceKernelInfoPtr =
252+
&detail::getDeviceKernelInfo<NameT>();
253+
constexpr auto Info = detail::CompileTimeKernelInfo<NameT>;
254+
255+
assert(Info.Name != std::string_view{} && "Kernel must have a name!");
256+
257+
static_assert(
258+
Info.Name == std::string_view{} || sizeof(KernelType) == Info.KernelSize,
259+
"Unexpected kernel lambda size. This can be caused by an "
260+
"external host compiler producing a lambda with an "
261+
"unexpected layout. This is a limitation of the compiler."
262+
"In many cases the difference is related to capturing constexpr "
263+
"variables. In such cases removing constexpr specifier aligns the "
264+
"captures between the host compiler and the device compiler."
265+
"\n"
266+
"In case of MSVC, passing "
267+
"-fsycl-host-compiler-options='/std:c++latest' "
268+
"might also help.");
269+
270+
if constexpr (EventNeeded) {
271+
return submit_kernel_direct_with_event_impl(
272+
Queue, nd_range<1>{1, 1}, HostKernel, DeviceKernelInfoPtr,
273+
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
274+
} else {
275+
submit_kernel_direct_without_event_impl(
276+
Queue, nd_range<1>{1, 1}, HostKernel, DeviceKernelInfoPtr,
277+
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
278+
}
279+
}
280+
219281
} // namespace detail
220282

221283
namespace ext ::oneapi ::experimental {
@@ -2727,12 +2789,26 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
27272789
"Use queue.submit() instead");
27282790

27292791
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2730-
return submit(
2731-
[&](handler &CGH) {
2732-
CGH.template single_task<KernelName, KernelType, PropertiesT>(
2733-
Properties, KernelFunc);
2734-
},
2735-
TlsCodeLocCapture.query());
2792+
2793+
// TODO The handler-less path does not support kernel
2794+
// function properties and kernel functions with the kernel_handler
2795+
// type argument yet.
2796+
if constexpr (!(ext::oneapi::experimental::detail::
2797+
HasKernelPropertiesGetMethod<
2798+
const KernelType &>::value) &&
2799+
!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
2800+
void>::value)) {
2801+
return detail::submit_kernel_direct_single_task<KernelName, true>(
2802+
*this, ext::oneapi::experimental::empty_properties_t{}, KernelFunc,
2803+
TlsCodeLocCapture.query());
2804+
} else {
2805+
return submit(
2806+
[&](handler &CGH) {
2807+
CGH.template single_task<KernelName, KernelType, PropertiesT>(
2808+
Properties, KernelFunc);
2809+
},
2810+
TlsCodeLocCapture.query());
2811+
}
27362812
}
27372813

27382814
/// single_task version with a kernel represented as a lambda.
@@ -3291,7 +3367,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
32913367
const KernelType &>::value) &&
32923368
!(detail::KernelLambdaHasKernelHandlerArgT<
32933369
KernelType, sycl::nd_item<Dims>>::value)) {
3294-
return detail::submit_kernel_direct<KernelName, true>(
3370+
return detail::submit_kernel_direct_parallel_for<KernelName, true>(
32953371
*this, ext::oneapi::experimental::empty_properties_t{}, Range,
32963372
Rest..., TlsCodeLocCapture.query());
32973373
} else {

sycl/test-e2e/Basic/test_num_kernel_copies.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ int main(int argc, char **argv) {
3636

3737
kernel<2> krn2;
3838
q.single_task(krn2);
39-
assert(copy_count == 1);
39+
// The kernel is copied on the scheduler-based path only
40+
assert(copy_count == 0);
4041
assert(move_count == 0);
4142
copy_count = 0;
4243

sycl/test/basic_tests/kernel_size_mismatch.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ int main() {
1313
(void)A;
1414
// expected-no-diagnostics
1515
#else
16-
// expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement '{{.*}}': Unexpected kernel lambda size. This can be caused by an external host compiler producing a lambda with an unexpected layout. This is a limitation of the compiler.}}
16+
// expected-error-re@sycl/queue.hpp:* {{static assertion failed due to requirement '{{.*}}': Unexpected kernel lambda size. This can be caused by an external host compiler producing a lambda with an unexpected layout. This is a limitation of the compiler.}}
1717
#endif
1818
}).wait();
1919
}

sycl/test/basic_tests/single_task_error_message.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ int main() {
1212
.single_task([&](sycl::handler &cgh) {
1313
// expected-error-re@sycl/queue.hpp:* {{static assertion failed due to requirement '{{.*}}': sycl::queue.single_task() requires a kernel instead of command group.{{.*}} Use queue.submit() instead}}
1414
// expected-error-re@sycl/detail/cg_types.hpp:* {{no matching function for call to object of type '(lambda at {{.*}}single_task_error_message.cpp:{{.*}})'}}
15+
// TODO Investigate why this function template is not instantiated
16+
// (if this is expected).
17+
// expected-error@sycl/detail/cg_types.hpp:* {{no matching function for call to 'runKernelWithoutArg'}}
1518
})
1619
.wait();
1720
}

0 commit comments

Comments
 (0)