@@ -62,6 +62,20 @@ template <backend BackendName, class SyclObjectT>
6262auto get_native (const SyclObjectT &Obj)
6363 -> backend_return_t<BackendName, SyclObjectT>;
6464
65+ template <int Dims>
66+ event __SYCL_EXPORT submit_kernel_direct_with_event_impl (
67+ const queue &Queue, nd_range<Dims> Range,
68+ std::shared_ptr<detail::HostKernelBase> &HostKernel,
69+ detail::DeviceKernelInfo *DeviceKernelInfo,
70+ const detail::code_location &CodeLoc, bool IsTopCodeLoc);
71+
72+ template <int Dims>
73+ void __SYCL_EXPORT submit_kernel_direct_without_event_impl (
74+ const queue &Queue, nd_range<Dims> Range,
75+ std::shared_ptr<detail::HostKernelBase> &HostKernel,
76+ detail::DeviceKernelInfo *DeviceKernelInfo,
77+ const detail::code_location &CodeLoc, bool IsTopCodeLoc);
78+
6579namespace detail {
6680class queue_impl ;
6781
@@ -147,7 +161,44 @@ template <typename KernelName = detail::auto_name, bool EventNeeded = false,
147161auto submit_kernel_direct (
148162 const queue &Queue, PropertiesT Props, nd_range<Dims> Range,
149163 const KernelType &KernelFunc,
150- const detail::code_location &CodeLoc = detail::code_location::current());
164+ const detail::code_location &CodeLoc = detail::code_location::current()) {
165+ // TODO Properties not supported yet
166+ (void )Props;
167+ static_assert (
168+ std::is_same_v<PropertiesT,
169+ ext::oneapi::experimental::empty_properties_t >,
170+ " Setting properties not supported yet for no-CGH kernel submit." );
171+ detail::tls_code_loc_t TlsCodeLocCapture (CodeLoc);
172+
173+ using NameT =
174+ typename detail::get_kernel_name_t <KernelName, KernelType>::name;
175+ using LambdaArgType =
176+ sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
177+ static_assert (
178+ std::is_convertible_v<sycl::nd_item<Dims>, LambdaArgType>,
179+ " Kernel argument of a sycl::parallel_for with sycl::nd_range "
180+ " must be either sycl::nd_item or be convertible from sycl::nd_item" );
181+ using TransformedArgType = sycl::nd_item<Dims>;
182+
183+ std::shared_ptr<detail::HostKernelBase> HostKernel = std::make_shared<
184+ detail::HostKernel<KernelType, TransformedArgType, Dims>>(KernelFunc);
185+
186+ detail::DeviceKernelInfo *DeviceKernelInfoPtr =
187+ &detail::getDeviceKernelInfo<NameT>();
188+
189+ detail::KernelWrapper<detail::WrapAs::parallel_for, NameT, KernelType,
190+ TransformedArgType, PropertiesT>::wrap (KernelFunc);
191+
192+ if constexpr (EventNeeded) {
193+ return submit_kernel_direct_with_event_impl (
194+ Queue, Range, HostKernel, DeviceKernelInfoPtr,
195+ TlsCodeLocCapture.query (), TlsCodeLocCapture.isToplevel ());
196+ } else {
197+ submit_kernel_direct_without_event_impl (
198+ Queue, Range, HostKernel, DeviceKernelInfoPtr,
199+ TlsCodeLocCapture.query (), TlsCodeLocCapture.isToplevel ());
200+ }
201+ }
151202
152203} // namespace detail
153204
@@ -3612,12 +3663,6 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
36123663 const queue &Q, PropertiesT Props, CommandGroupFunc &&CGF,
36133664 const sycl::detail::code_location &CodeLoc);
36143665
3615- template <typename KernelName, bool EventNeeded, typename PropertiesT,
3616- typename KernelType, int Dims>
3617- friend auto sycl::detail::submit_kernel_direct (
3618- const queue &Queue, PropertiesT Props, nd_range<Dims> Range,
3619- const KernelType &KernelFunc, const detail::code_location &CodeLoc);
3620-
36213666 template <typename PropertiesT>
36223667 void ProcessSubmitProperties (PropertiesT Props,
36233668 detail::v1::SubmissionInfo &SI) const {
@@ -3702,18 +3747,6 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
37023747 const detail::code_location &CodeLoc,
37033748 bool IsTopCodeLoc) const ;
37043749
3705- template <int Dims>
3706- event submit_kernel_direct_with_event_impl (
3707- nd_range<Dims> Range, std::shared_ptr<detail::HostKernelBase> &HostKernel,
3708- detail::DeviceKernelInfo *DeviceKernelInfo,
3709- const detail::code_location &CodeLoc, bool IsTopCodeLoc) const ;
3710-
3711- template <int Dims>
3712- void submit_kernel_direct_without_event_impl (
3713- nd_range<Dims> Range, std::shared_ptr<detail::HostKernelBase> &HostKernel,
3714- detail::DeviceKernelInfo *DeviceKernelInfo,
3715- const detail::code_location &CodeLoc, bool IsTopCodeLoc) const ;
3716-
37173750 // / A template-free version of submit_without_event as const member function.
37183751 void submit_without_event_impl (const detail::type_erased_cgfo_ty &CGH,
37193752 const detail::v1::SubmissionInfo &SubmitInfo,
@@ -3898,53 +3931,6 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
38983931 }
38993932};
39003933
3901- namespace detail {
3902-
3903- template <typename KernelName, bool EventNeeded, typename PropertiesT,
3904- typename KernelType, int Dims>
3905- auto submit_kernel_direct (const queue &Queue, PropertiesT Props,
3906- nd_range<Dims> Range, const KernelType &KernelFunc,
3907- const detail::code_location &CodeLoc) {
3908- // TODO Properties not supported yet
3909- (void )Props;
3910- static_assert (
3911- std::is_same_v<PropertiesT,
3912- ext::oneapi::experimental::empty_properties_t >,
3913- " Setting properties not supported yet for no-CGH kernel submit." );
3914- detail::tls_code_loc_t TlsCodeLocCapture (CodeLoc);
3915-
3916- using NameT =
3917- typename detail::get_kernel_name_t <KernelName, KernelType>::name;
3918- using LambdaArgType =
3919- sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
3920- static_assert (
3921- std::is_convertible_v<sycl::nd_item<Dims>, LambdaArgType>,
3922- " Kernel argument of a sycl::parallel_for with sycl::nd_range "
3923- " must be either sycl::nd_item or be convertible from sycl::nd_item" );
3924- using TransformedArgType = sycl::nd_item<Dims>;
3925-
3926- std::shared_ptr<detail::HostKernelBase> HostKernel = std::make_shared<
3927- detail::HostKernel<KernelType, TransformedArgType, Dims>>(KernelFunc);
3928-
3929- detail::DeviceKernelInfo *DeviceKernelInfoPtr =
3930- &detail::getDeviceKernelInfo<NameT>();
3931-
3932- detail::KernelWrapper<detail::WrapAs::parallel_for, NameT, KernelType,
3933- TransformedArgType, PropertiesT>::wrap (KernelFunc);
3934-
3935- if constexpr (EventNeeded) {
3936- return Queue.submit_kernel_direct_with_event_impl (
3937- Range, HostKernel, DeviceKernelInfoPtr, TlsCodeLocCapture.query (),
3938- TlsCodeLocCapture.isToplevel ());
3939- } else {
3940- Queue.submit_kernel_direct_without_event_impl (
3941- Range, HostKernel, DeviceKernelInfoPtr, TlsCodeLocCapture.query (),
3942- TlsCodeLocCapture.isToplevel ());
3943- }
3944- }
3945-
3946- } // namespace detail
3947-
39483934} // namespace _V1
39493935} // namespace sycl
39503936
0 commit comments