@@ -142,16 +142,9 @@ class __SYCL_EXPORT SubmissionInfo {
142142
143143} // namespace v1
144144
145- template <typename KernelName = detail::auto_name, typename PropertiesT,
146- typename KernelType, int Dims>
147- event submit_kernel_direct_with_event (
148- const queue &Queue, PropertiesT Props, nd_range<Dims> Range,
149- const KernelType &KernelFunc,
150- const detail::code_location &CodeLoc = detail::code_location::current());
151-
152- template <typename KernelName = detail::auto_name, typename PropertiesT,
153- typename KernelType, int Dims>
154- void submit_kernel_direct_without_event (
145+ template <typename KernelName = detail::auto_name, bool EventNeeded = false ,
146+ typename PropertiesT, typename KernelType, int Dims>
147+ auto submit_kernel_direct (
155148 const queue &Queue, PropertiesT Props, nd_range<Dims> Range,
156149 const KernelType &KernelFunc,
157150 const detail::code_location &CodeLoc = detail::code_location::current());
@@ -3228,7 +3221,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
32283221 detail::tls_code_loc_t TlsCodeLocCapture (CodeLoc);
32293222#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
32303223 if constexpr (sizeof ...(RestT) == 1 ) {
3231- return detail::submit_kernel_direct_with_event <KernelName>(
3224+ return detail::submit_kernel_direct <KernelName, true >(
32323225 *this , ext::oneapi::experimental::empty_properties_t {}, Range,
32333226 Rest...);
32343227 } else {
@@ -3619,15 +3612,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
36193612 const queue &Q, PropertiesT Props, CommandGroupFunc &&CGF,
36203613 const sycl::detail::code_location &CodeLoc);
36213614
3622- template <typename KernelName, typename PropertiesT, typename KernelType,
3623- int Dims>
3624- friend void sycl::detail::submit_kernel_direct_without_event (
3625- const queue &Queue, PropertiesT Props, nd_range<Dims> Range,
3626- const KernelType &KernelFunc, const detail::code_location &CodeLoc);
3627-
3628- template <typename KernelName, typename PropertiesT, typename KernelType,
3629- int Dims>
3630- friend event sycl::detail::submit_kernel_direct_with_event (
3615+ template <typename KernelName, bool EventNeeded, typename PropertiesT,
3616+ typename KernelType, int Dims>
3617+ friend auto sycl::detail::submit_kernel_direct (
36313618 const queue &Queue, PropertiesT Props, nd_range<Dims> Range,
36323619 const KernelType &KernelFunc, const detail::code_location &CodeLoc);
36333620
@@ -3913,12 +3900,11 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
39133900
39143901namespace detail {
39153902
3916- template <typename KernelName, typename PropertiesT, typename KernelType,
3917- int Dims>
3918- event submit_kernel_direct_with_event (const queue &Queue, PropertiesT Props,
3919- nd_range<Dims> Range,
3920- const KernelType &KernelFunc,
3921- const detail::code_location &CodeLoc) {
3903+ template <typename KernelName, bool EventNeeded, typename PropertiesT,
3904+ typename KernelType, int Dims>
3905+ auto submit_kernel_direct (const queue &Queue, PropertiesT Props,
3906+ nd_range<Dims> Range, const KernelType &KernelFunc,
3907+ const detail::code_location &CodeLoc) {
39223908 // TODO Properties not supported yet
39233909 (void )Props;
39243910 static_assert (
@@ -3946,48 +3932,17 @@ event submit_kernel_direct_with_event(const queue &Queue, PropertiesT Props,
39463932 detail::KernelWrapper<detail::WrapAs::parallel_for, NameT, KernelType,
39473933 TransformedArgType, PropertiesT>::wrap (KernelFunc);
39483934
3949- return Queue.submit_kernel_direct_with_event_impl (
3950- Range, HostKernel, DeviceKernelInfoPtr, TlsCodeLocCapture.query (),
3951- TlsCodeLocCapture.isToplevel ());
3935+ if constexpr (EventNeeded) {
3936+ return Queue.submit_kernel_direct_with_event_impl (
3937+ Range, HostKernel, DeviceKernelInfoPtr, TlsCodeLocCapture.query (),
3938+ TlsCodeLocCapture.isToplevel ());
3939+ } else {
3940+ Queue.submit_kernel_direct_without_event_impl (
3941+ Range, HostKernel, DeviceKernelInfoPtr, TlsCodeLocCapture.query (),
3942+ TlsCodeLocCapture.isToplevel ());
3943+ }
39523944}
39533945
3954- template <typename KernelName, typename PropertiesT, typename KernelType,
3955- int Dims>
3956- void submit_kernel_direct_without_event (const queue &Queue, PropertiesT Props,
3957- nd_range<Dims> Range,
3958- const KernelType &KernelFunc,
3959- const detail::code_location &CodeLoc) {
3960- // TODO Properties not supported yet
3961- (void )Props;
3962- static_assert (
3963- std::is_same_v<PropertiesT,
3964- ext::oneapi::experimental::empty_properties_t >,
3965- " Setting properties not supported yet for no-CGH kernel submit." );
3966- detail::tls_code_loc_t TlsCodeLocCapture (CodeLoc);
3967-
3968- using NameT =
3969- typename detail::get_kernel_name_t <KernelName, KernelType>::name;
3970- using LambdaArgType =
3971- sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
3972- static_assert (
3973- std::is_convertible_v<sycl::nd_item<Dims>, LambdaArgType>,
3974- " Kernel argument of a sycl::parallel_for with sycl::nd_range "
3975- " must be either sycl::nd_item or be convertible from sycl::nd_item" );
3976- using TransformedArgType = sycl::nd_item<Dims>;
3977-
3978- std::shared_ptr<detail::HostKernelBase> HostKernel = std::make_shared<
3979- detail::HostKernel<KernelType, TransformedArgType, Dims>>(KernelFunc);
3980-
3981- detail::DeviceKernelInfo *DeviceKernelInfoPtr =
3982- &detail::getDeviceKernelInfo<NameT>();
3983-
3984- detail::KernelWrapper<detail::WrapAs::parallel_for, NameT, KernelType,
3985- TransformedArgType, PropertiesT>::wrap (KernelFunc);
3986-
3987- Queue.submit_kernel_direct_without_event_impl (
3988- Range, HostKernel, DeviceKernelInfoPtr, TlsCodeLocCapture.query (),
3989- TlsCodeLocCapture.isToplevel ());
3990- }
39913946} // namespace detail
39923947
39933948} // namespace _V1
0 commit comments