@@ -158,7 +158,7 @@ class __SYCL_EXPORT SubmissionInfo {
158158
159159template <typename KernelName = detail::auto_name, bool EventNeeded = false ,
160160 typename PropertiesT, typename KernelTypeUniversalRef, int Dims>
161- auto submit_kernel_direct (
161+ auto submit_kernel_direct_parallel_for (
162162 const queue &Queue, PropertiesT Props, const nd_range<Dims> &Range,
163163 KernelTypeUniversalRef &&KernelFunc,
164164 const detail::code_location &CodeLoc = detail::code_location::current()) {
@@ -211,6 +211,53 @@ auto submit_kernel_direct(
211211 }
212212}
213213
214+ template <typename KernelName = detail::auto_name, bool EventNeeded = false ,
215+ typename PropertiesT, typename KernelTypeUniversalRef>
216+ auto submit_kernel_direct_single_task (
217+ const queue &Queue, PropertiesT Props, KernelTypeUniversalRef &&KernelFunc,
218+ const detail::code_location &CodeLoc = detail::code_location::current()) {
219+ // TODO Properties not supported yet
220+ (void )Props;
221+ static_assert (
222+ std::is_same_v<PropertiesT,
223+ ext::oneapi::experimental::empty_properties_t >,
224+ " Setting properties not supported yet for no-CGH kernel submit." );
225+ detail::tls_code_loc_t TlsCodeLocCapture (CodeLoc);
226+
227+ using KernelType =
228+ std::remove_const_t <std::remove_reference_t <KernelTypeUniversalRef>>;
229+
230+ using NameT =
231+ typename detail::get_kernel_name_t <KernelName, KernelType>::name;
232+
233+ detail::KernelWrapper<detail::WrapAs::single_task, NameT, KernelType,
234+ void , PropertiesT>::wrap (KernelFunc);
235+
236+ HostKernelRef<KernelType, KernelTypeUniversalRef, void , 1 >
237+ HostKernel (std::forward<KernelTypeUniversalRef>(KernelFunc));
238+
239+ // Instantiating the kernel on the host improves debugging.
240+ // Passing this pointer to another translation unit prevents optimization.
241+ #ifndef NDEBUG
242+ // TODO: call library to prevent dropping call due to optimization
243+ (void )
244+ detail::GetInstantiateKernelOnHostPtr<KernelType, void , 1 >();
245+ #endif
246+
247+ detail::DeviceKernelInfo *DeviceKernelInfoPtr =
248+ &detail::getDeviceKernelInfo<NameT>();
249+
250+ if constexpr (EventNeeded) {
251+ return submit_kernel_direct_with_event_impl (
252+ Queue, nd_range<1 >{1 , 1 }, HostKernel, DeviceKernelInfoPtr,
253+ TlsCodeLocCapture.query (), TlsCodeLocCapture.isToplevel ());
254+ } else {
255+ submit_kernel_direct_without_event_impl (
256+ Queue, nd_range<1 >{1 , 1 }, HostKernel, DeviceKernelInfoPtr,
257+ TlsCodeLocCapture.query (), TlsCodeLocCapture.isToplevel ());
258+ }
259+ }
260+
214261} // namespace detail
215262
216263namespace ext ::oneapi ::experimental {
@@ -2720,14 +2767,21 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
27202767 void (kernel_handler)>::value),
27212768 " sycl::queue.single_task() requires a kernel instead of command group. "
27222769 " Use queue.submit() instead" );
2723-
2770+ /*
27242771 detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
27252772 return submit(
27262773 [&](handler &CGH) {
27272774 CGH.template single_task<KernelName, KernelType, PropertiesT>(
27282775 Properties, KernelFunc);
27292776 },
27302777 TlsCodeLocCapture.query());
2778+ */
2779+
2780+ (void )Properties;
2781+ return detail::submit_kernel_direct_single_task<KernelName, true >(
2782+ *this , ext::oneapi::experimental::empty_properties_t {},
2783+ KernelFunc, CodeLoc);
2784+
27312785 }
27322786
27332787 // / single_task version with a kernel represented as a lambda.
@@ -3278,7 +3332,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
32783332#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
32793333 // TODO The handler-less path does not support reductions yet.
32803334 if constexpr (sizeof ...(RestT) == 1 ) {
3281- return detail::submit_kernel_direct <KernelName, true >(
3335+ return detail::submit_kernel_direct_parallel_for <KernelName, true >(
32823336 *this , ext::oneapi::experimental::empty_properties_t {}, Range,
32833337 Rest...);
32843338 } else
0 commit comments