Skip to content

Commit 40c39d6

Browse files
committed
Basic no-handler single task support
1 parent 506c1a9 commit 40c39d6

File tree

3 files changed

+70
-9
lines changed

3 files changed

+70
-9
lines changed

sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,13 @@ template <typename KernelName = sycl::detail::auto_name, typename KernelType>
152152
void single_task(queue Q, const KernelType &KernelObj,
153153
const sycl::detail::code_location &CodeLoc =
154154
sycl::detail::code_location::current()) {
155+
/*
155156
submit(
156157
std::move(Q),
157158
[&](handler &CGH) { single_task<KernelName>(CGH, KernelObj); }, CodeLoc);
159+
*/
160+
detail::submit_kernel_direct_single_task<KernelName>(std::move(Q), empty_properties_t{},
161+
KernelObj);
158162
}
159163

160164
template <typename... ArgsT>
@@ -262,7 +266,7 @@ void nd_launch(queue Q, nd_range<Dimensions> Range, const KernelType &KernelObj,
262266
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
263267
// TODO The handler-less path does not support reductions yet.
264268
if constexpr (sizeof...(ReductionsT) == 0) {
265-
detail::submit_kernel_direct<KernelName>(std::move(Q), empty_properties_t{},
269+
detail::submit_kernel_direct_parallel_for<KernelName>(std::move(Q), empty_properties_t{},
266270
Range, KernelObj);
267271
} else
268272
#endif
@@ -298,7 +302,7 @@ void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
298302
ext::oneapi::experimental::detail::LaunchConfigAccess<nd_range<Dimensions>,
299303
Properties>
300304
ConfigAccess(Config);
301-
detail::submit_kernel_direct<KernelName>(
305+
detail::submit_kernel_direct_parallel_for<KernelName>(
302306
std::move(Q), ConfigAccess.getProperties(), ConfigAccess.getRange(),
303307
KernelObj);
304308
} else

sycl/include/sycl/khr/free_function_commands.hpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ void launch_grouped(const queue &q, range<1> r, range<1> size, KernelType &&k,
158158
const sycl::detail::code_location &codeLoc =
159159
sycl::detail::code_location::current()) {
160160
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
161-
detail::submit_kernel_direct(
161+
detail::submit_kernel_direct_parallel_for(
162162
q, ext::oneapi::experimental::empty_properties_t{}, nd_range<1>(r, size),
163163
std::forward<KernelType>(k));
164164
#else
@@ -173,7 +173,7 @@ void launch_grouped(const queue &q, range<2> r, range<2> size, KernelType &&k,
173173
const sycl::detail::code_location &codeLoc =
174174
sycl::detail::code_location::current()) {
175175
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
176-
detail::submit_kernel_direct(
176+
detail::submit_kernel_direct_parallel_for(
177177
q, ext::oneapi::experimental::empty_properties_t{}, nd_range<2>(r, size),
178178
std::forward<KernelType>(k));
179179
#else
@@ -188,7 +188,7 @@ void launch_grouped(const queue &q, range<3> r, range<3> size, KernelType &&k,
188188
const sycl::detail::code_location &codeLoc =
189189
sycl::detail::code_location::current()) {
190190
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
191-
detail::submit_kernel_direct(
191+
detail::submit_kernel_direct_parallel_for(
192192
q, ext::oneapi::experimental::empty_properties_t{}, nd_range<3>(r, size),
193193
std::forward<KernelType>(k));
194194
#else
@@ -305,7 +305,10 @@ template <typename KernelType>
305305
void launch_task(const sycl::queue &q, const KernelType &k,
306306
const sycl::detail::code_location &codeLoc =
307307
sycl::detail::code_location::current()) {
308-
submit(q, [&](handler &h) { launch_task<KernelType>(h, k); }, codeLoc);
308+
//submit(q, [&](handler &h) { launch_task<KernelType>(h, k); }, codeLoc);
309+
detail::submit_kernel_direct_single_task(q,
310+
ext::oneapi::experimental::empty_properties_t{},
311+
k, codeLoc);
309312
}
310313

311314
template <typename... Args>

sycl/include/sycl/queue.hpp

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ class __SYCL_EXPORT SubmissionInfo {
158158

159159
template <typename KernelName = detail::auto_name, bool EventNeeded = false,
160160
typename PropertiesT, typename KernelTypeUniversalRef, int Dims>
161-
auto submit_kernel_direct(
161+
auto submit_kernel_direct_parallel_for(
162162
const queue &Queue, PropertiesT Props, const nd_range<Dims> &Range,
163163
KernelTypeUniversalRef &&KernelFunc,
164164
const detail::code_location &CodeLoc = detail::code_location::current()) {
@@ -211,6 +211,53 @@ auto submit_kernel_direct(
211211
}
212212
}
213213

214+
template <typename KernelName = detail::auto_name, bool EventNeeded = false,
215+
typename PropertiesT, typename KernelTypeUniversalRef>
216+
auto submit_kernel_direct_single_task(
217+
const queue &Queue, PropertiesT Props, KernelTypeUniversalRef &&KernelFunc,
218+
const detail::code_location &CodeLoc = detail::code_location::current()) {
219+
// TODO Properties not supported yet
220+
(void)Props;
221+
static_assert(
222+
std::is_same_v<PropertiesT,
223+
ext::oneapi::experimental::empty_properties_t>,
224+
"Setting properties not supported yet for no-CGH kernel submit.");
225+
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
226+
227+
using KernelType =
228+
std::remove_const_t<std::remove_reference_t<KernelTypeUniversalRef>>;
229+
230+
using NameT =
231+
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
232+
233+
detail::KernelWrapper<detail::WrapAs::single_task, NameT, KernelType,
234+
void, PropertiesT>::wrap(KernelFunc);
235+
236+
HostKernelRef<KernelType, KernelTypeUniversalRef, void, 1>
237+
HostKernel(std::forward<KernelTypeUniversalRef>(KernelFunc));
238+
239+
// Instantiating the kernel on the host improves debugging.
240+
// Passing this pointer to another translation unit prevents optimization.
241+
#ifndef NDEBUG
242+
// TODO: call library to prevent dropping call due to optimization
243+
(void)
244+
detail::GetInstantiateKernelOnHostPtr<KernelType, void, 1>();
245+
#endif
246+
247+
detail::DeviceKernelInfo *DeviceKernelInfoPtr =
248+
&detail::getDeviceKernelInfo<NameT>();
249+
250+
if constexpr (EventNeeded) {
251+
return submit_kernel_direct_with_event_impl(
252+
Queue, nd_range<1>{1, 1}, HostKernel, DeviceKernelInfoPtr,
253+
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
254+
} else {
255+
submit_kernel_direct_without_event_impl(
256+
Queue, nd_range<1>{1, 1}, HostKernel, DeviceKernelInfoPtr,
257+
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
258+
}
259+
}
260+
214261
} // namespace detail
215262

216263
namespace ext ::oneapi ::experimental {
@@ -2720,14 +2767,21 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
27202767
void(kernel_handler)>::value),
27212768
"sycl::queue.single_task() requires a kernel instead of command group. "
27222769
"Use queue.submit() instead");
2723-
2770+
/*
27242771
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
27252772
return submit(
27262773
[&](handler &CGH) {
27272774
CGH.template single_task<KernelName, KernelType, PropertiesT>(
27282775
Properties, KernelFunc);
27292776
},
27302777
TlsCodeLocCapture.query());
2778+
*/
2779+
2780+
(void)Properties;
2781+
return detail::submit_kernel_direct_single_task<KernelName, true>(
2782+
*this, ext::oneapi::experimental::empty_properties_t{},
2783+
KernelFunc, CodeLoc);
2784+
27312785
}
27322786

27332787
/// single_task version with a kernel represented as a lambda.
@@ -3278,7 +3332,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
32783332
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
32793333
// TODO The handler-less path does not support reductions yet.
32803334
if constexpr (sizeof...(RestT) == 1) {
3281-
return detail::submit_kernel_direct<KernelName, true>(
3335+
return detail::submit_kernel_direct_parallel_for<KernelName, true>(
32823336
*this, ext::oneapi::experimental::empty_properties_t{}, Range,
32833337
Rest...);
32843338
} else

0 commit comments

Comments
 (0)