@@ -158,7 +158,7 @@ class __SYCL_EXPORT SubmissionInfo {
158158
159159template  <typename  KernelName = detail::auto_name, bool  EventNeeded = false ,
160160          typename  PropertiesT, typename  KernelTypeUniversalRef, int  Dims>
161- auto  submit_kernel_direct (
161+ auto  submit_kernel_direct_parallel_for (
162162    const  queue &Queue, PropertiesT Props, const  nd_range<Dims> &Range,
163163    KernelTypeUniversalRef &&KernelFunc,
164164    const  detail::code_location &CodeLoc = detail::code_location::current()) {
@@ -211,6 +211,53 @@ auto submit_kernel_direct(
211211  }
212212}
213213
214+ template  <typename  KernelName = detail::auto_name, bool  EventNeeded = false ,
215+           typename  PropertiesT, typename  KernelTypeUniversalRef>
216+ auto  submit_kernel_direct_single_task (
217+     const  queue &Queue, PropertiesT Props, KernelTypeUniversalRef &&KernelFunc,
218+     const  detail::code_location &CodeLoc = detail::code_location::current()) {
219+   //  TODO Properties not supported yet
220+   (void )Props;
221+   static_assert (
222+       std::is_same_v<PropertiesT,
223+                      ext::oneapi::experimental::empty_properties_t >,
224+       " Setting properties not supported yet for no-CGH kernel submit."  );
225+   detail::tls_code_loc_t  TlsCodeLocCapture (CodeLoc);
226+ 
227+   using  KernelType =
228+       std::remove_const_t <std::remove_reference_t <KernelTypeUniversalRef>>;
229+ 
230+   using  NameT =
231+       typename  detail::get_kernel_name_t <KernelName, KernelType>::name;
232+ 
233+   detail::KernelWrapper<detail::WrapAs::single_task, NameT, KernelType,
234+                         void , PropertiesT>::wrap (KernelFunc);
235+ 
236+   HostKernelRef<KernelType, KernelTypeUniversalRef, void , 1 >
237+       HostKernel (std::forward<KernelTypeUniversalRef>(KernelFunc));
238+ 
239+   //  Instantiating the kernel on the host improves debugging.
240+   //  Passing this pointer to another translation unit prevents optimization.
241+ #ifndef  NDEBUG
242+   //  TODO: call library to prevent dropping call due to optimization
243+   (void )
244+       detail::GetInstantiateKernelOnHostPtr<KernelType, void , 1 >();
245+ #endif 
246+ 
247+   detail::DeviceKernelInfo *DeviceKernelInfoPtr =
248+       &detail::getDeviceKernelInfo<NameT>();
249+ 
250+   if  constexpr  (EventNeeded) {
251+     return  submit_kernel_direct_with_event_impl (
252+         Queue, nd_range<1 >{1 , 1 }, HostKernel, DeviceKernelInfoPtr,
253+         TlsCodeLocCapture.query (), TlsCodeLocCapture.isToplevel ());
254+   } else  {
255+     submit_kernel_direct_without_event_impl (
256+         Queue, nd_range<1 >{1 , 1 }, HostKernel, DeviceKernelInfoPtr,
257+         TlsCodeLocCapture.query (), TlsCodeLocCapture.isToplevel ());
258+   }
259+ }
260+ 
214261} //  namespace detail
215262
216263namespace  ext  ::oneapi ::experimental {
@@ -2720,14 +2767,21 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
27202767                                    void (kernel_handler)>::value),
27212768        " sycl::queue.single_task() requires a kernel instead of command group. " 
27222769        " Use queue.submit() instead"  );
2723- 
2770+ /* 
27242771    detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); 
27252772    return submit( 
27262773        [&](handler &CGH) { 
27272774          CGH.template single_task<KernelName, KernelType, PropertiesT>( 
27282775              Properties, KernelFunc); 
27292776        }, 
27302777        TlsCodeLocCapture.query()); 
2778+ */ 
2779+ 
2780+       (void )Properties;
2781+       return  detail::submit_kernel_direct_single_task<KernelName, true >(
2782+           *this , ext::oneapi::experimental::empty_properties_t {},
2783+           KernelFunc, CodeLoc);
2784+ 
27312785  }
27322786
27332787  // / single_task version with a kernel represented as a lambda.
@@ -3275,7 +3329,6 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
32753329  parallel_for (nd_range<Dims> Range, RestT &&...Rest) {
32763330    constexpr  detail::code_location CodeLoc = getCodeLocation<KernelName>();
32773331    detail::tls_code_loc_t  TlsCodeLocCapture (CodeLoc);
3278- 
32793332    using  KernelType = std::tuple_element_t <0 , std::tuple<RestT...>>;
32803333
32813334    //  TODO The handler-less path does not support reductions and kernel
@@ -3284,7 +3337,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
32843337                  !(ext::oneapi::experimental::detail::
32853338                        HasKernelPropertiesGetMethod<
32863339                            const  KernelType &>::value)) {
3287-       return  detail::submit_kernel_direct <KernelName, true >(
3340+       return  detail::submit_kernel_direct_parallel_for <KernelName, true >(
32883341          *this , ext::oneapi::experimental::empty_properties_t {}, Range,
32893342          Rest...);
32903343    } else  {
0 commit comments