Skip to content

Commit eed0591

Browse files
committed
Make free functions from the queue kernel direct submit methods
1 parent 066b421 commit eed0591

File tree

2 files changed

+108
-114
lines changed

2 files changed

+108
-114
lines changed

sycl/include/sycl/queue.hpp

Lines changed: 52 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,20 @@ template <backend BackendName, class SyclObjectT>
6262
auto get_native(const SyclObjectT &Obj)
6363
-> backend_return_t<BackendName, SyclObjectT>;
6464

65+
template <int Dims>
66+
event __SYCL_EXPORT submit_kernel_direct_with_event_impl(
67+
const queue &Queue, nd_range<Dims> Range,
68+
std::shared_ptr<detail::HostKernelBase> &HostKernel,
69+
detail::DeviceKernelInfo *DeviceKernelInfo,
70+
const detail::code_location &CodeLoc, bool IsTopCodeLoc);
71+
72+
template <int Dims>
73+
void __SYCL_EXPORT submit_kernel_direct_without_event_impl(
74+
const queue &Queue, nd_range<Dims> Range,
75+
std::shared_ptr<detail::HostKernelBase> &HostKernel,
76+
detail::DeviceKernelInfo *DeviceKernelInfo,
77+
const detail::code_location &CodeLoc, bool IsTopCodeLoc);
78+
6579
namespace detail {
6680
class queue_impl;
6781

@@ -147,7 +161,44 @@ template <typename KernelName = detail::auto_name, bool EventNeeded = false,
147161
auto submit_kernel_direct(
148162
const queue &Queue, PropertiesT Props, nd_range<Dims> Range,
149163
const KernelType &KernelFunc,
150-
const detail::code_location &CodeLoc = detail::code_location::current());
164+
const detail::code_location &CodeLoc = detail::code_location::current()) {
165+
// TODO Properties not supported yet
166+
(void)Props;
167+
static_assert(
168+
std::is_same_v<PropertiesT,
169+
ext::oneapi::experimental::empty_properties_t>,
170+
"Setting properties not supported yet for no-CGH kernel submit.");
171+
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
172+
173+
using NameT =
174+
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
175+
using LambdaArgType =
176+
sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
177+
static_assert(
178+
std::is_convertible_v<sycl::nd_item<Dims>, LambdaArgType>,
179+
"Kernel argument of a sycl::parallel_for with sycl::nd_range "
180+
"must be either sycl::nd_item or be convertible from sycl::nd_item");
181+
using TransformedArgType = sycl::nd_item<Dims>;
182+
183+
std::shared_ptr<detail::HostKernelBase> HostKernel = std::make_shared<
184+
detail::HostKernel<KernelType, TransformedArgType, Dims>>(KernelFunc);
185+
186+
detail::DeviceKernelInfo *DeviceKernelInfoPtr =
187+
&detail::getDeviceKernelInfo<NameT>();
188+
189+
detail::KernelWrapper<detail::WrapAs::parallel_for, NameT, KernelType,
190+
TransformedArgType, PropertiesT>::wrap(KernelFunc);
191+
192+
if constexpr (EventNeeded) {
193+
return submit_kernel_direct_with_event_impl(
194+
Queue, Range, HostKernel, DeviceKernelInfoPtr,
195+
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
196+
} else {
197+
submit_kernel_direct_without_event_impl(
198+
Queue, Range, HostKernel, DeviceKernelInfoPtr,
199+
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
200+
}
201+
}
151202

152203
} // namespace detail
153204

@@ -3612,12 +3663,6 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
36123663
const queue &Q, PropertiesT Props, CommandGroupFunc &&CGF,
36133664
const sycl::detail::code_location &CodeLoc);
36143665

3615-
template <typename KernelName, bool EventNeeded, typename PropertiesT,
3616-
typename KernelType, int Dims>
3617-
friend auto sycl::detail::submit_kernel_direct(
3618-
const queue &Queue, PropertiesT Props, nd_range<Dims> Range,
3619-
const KernelType &KernelFunc, const detail::code_location &CodeLoc);
3620-
36213666
template <typename PropertiesT>
36223667
void ProcessSubmitProperties(PropertiesT Props,
36233668
detail::v1::SubmissionInfo &SI) const {
@@ -3702,18 +3747,6 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
37023747
const detail::code_location &CodeLoc,
37033748
bool IsTopCodeLoc) const;
37043749

3705-
template <int Dims>
3706-
event submit_kernel_direct_with_event_impl(
3707-
nd_range<Dims> Range, std::shared_ptr<detail::HostKernelBase> &HostKernel,
3708-
detail::DeviceKernelInfo *DeviceKernelInfo,
3709-
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;
3710-
3711-
template <int Dims>
3712-
void submit_kernel_direct_without_event_impl(
3713-
nd_range<Dims> Range, std::shared_ptr<detail::HostKernelBase> &HostKernel,
3714-
detail::DeviceKernelInfo *DeviceKernelInfo,
3715-
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;
3716-
37173750
/// A template-free version of submit_without_event as const member function.
37183751
void submit_without_event_impl(const detail::type_erased_cgfo_ty &CGH,
37193752
const detail::v1::SubmissionInfo &SubmitInfo,
@@ -3898,53 +3931,6 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
38983931
}
38993932
};
39003933

3901-
namespace detail {
3902-
3903-
template <typename KernelName, bool EventNeeded, typename PropertiesT,
3904-
typename KernelType, int Dims>
3905-
auto submit_kernel_direct(const queue &Queue, PropertiesT Props,
3906-
nd_range<Dims> Range, const KernelType &KernelFunc,
3907-
const detail::code_location &CodeLoc) {
3908-
// TODO Properties not supported yet
3909-
(void)Props;
3910-
static_assert(
3911-
std::is_same_v<PropertiesT,
3912-
ext::oneapi::experimental::empty_properties_t>,
3913-
"Setting properties not supported yet for no-CGH kernel submit.");
3914-
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
3915-
3916-
using NameT =
3917-
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
3918-
using LambdaArgType =
3919-
sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
3920-
static_assert(
3921-
std::is_convertible_v<sycl::nd_item<Dims>, LambdaArgType>,
3922-
"Kernel argument of a sycl::parallel_for with sycl::nd_range "
3923-
"must be either sycl::nd_item or be convertible from sycl::nd_item");
3924-
using TransformedArgType = sycl::nd_item<Dims>;
3925-
3926-
std::shared_ptr<detail::HostKernelBase> HostKernel = std::make_shared<
3927-
detail::HostKernel<KernelType, TransformedArgType, Dims>>(KernelFunc);
3928-
3929-
detail::DeviceKernelInfo *DeviceKernelInfoPtr =
3930-
&detail::getDeviceKernelInfo<NameT>();
3931-
3932-
detail::KernelWrapper<detail::WrapAs::parallel_for, NameT, KernelType,
3933-
TransformedArgType, PropertiesT>::wrap(KernelFunc);
3934-
3935-
if constexpr (EventNeeded) {
3936-
return Queue.submit_kernel_direct_with_event_impl(
3937-
Range, HostKernel, DeviceKernelInfoPtr, TlsCodeLocCapture.query(),
3938-
TlsCodeLocCapture.isToplevel());
3939-
} else {
3940-
Queue.submit_kernel_direct_without_event_impl(
3941-
Range, HostKernel, DeviceKernelInfoPtr, TlsCodeLocCapture.query(),
3942-
TlsCodeLocCapture.isToplevel());
3943-
}
3944-
}
3945-
3946-
} // namespace detail
3947-
39483934
} // namespace _V1
39493935
} // namespace sycl
39503936

sycl/source/queue.cpp

Lines changed: 56 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -319,54 +319,6 @@ event queue::submit_with_event_impl(
319319
return impl->submit_with_event(CGH, SubmitInfo, CodeLoc, IsTopCodeLoc);
320320
}
321321

322-
template <int Dims>
323-
event queue::submit_kernel_direct_with_event_impl(
324-
nd_range<Dims> Range, std::shared_ptr<detail::HostKernelBase> &HostKernel,
325-
detail::DeviceKernelInfo *DeviceKernelInfo,
326-
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const {
327-
return impl->submit_kernel_direct_with_event(
328-
Range, HostKernel, DeviceKernelInfo, CodeLoc, IsTopCodeLoc);
329-
}
330-
331-
template event queue::submit_kernel_direct_with_event_impl<1>(
332-
nd_range<1> Range, std::shared_ptr<detail::HostKernelBase> &HostKernel,
333-
detail::DeviceKernelInfo *DeviceKernelInfo,
334-
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;
335-
336-
template event queue::submit_kernel_direct_with_event_impl<2>(
337-
nd_range<2> Range, std::shared_ptr<detail::HostKernelBase> &HostKernel,
338-
detail::DeviceKernelInfo *DeviceKernelInfo,
339-
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;
340-
341-
template event queue::submit_kernel_direct_with_event_impl<3>(
342-
nd_range<3> Range, std::shared_ptr<detail::HostKernelBase> &HostKernel,
343-
detail::DeviceKernelInfo *DeviceKernelInfo,
344-
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;
345-
346-
template <int Dims>
347-
void queue::submit_kernel_direct_without_event_impl(
348-
nd_range<Dims> Range, std::shared_ptr<detail::HostKernelBase> &HostKernel,
349-
detail::DeviceKernelInfo *DeviceKernelInfo,
350-
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const {
351-
impl->submit_kernel_direct_without_event(Range, HostKernel, DeviceKernelInfo,
352-
CodeLoc, IsTopCodeLoc);
353-
}
354-
355-
template void queue::submit_kernel_direct_without_event_impl<1>(
356-
nd_range<1> Range, std::shared_ptr<detail::HostKernelBase> &HostKernel,
357-
detail::DeviceKernelInfo *DeviceKernelInfo,
358-
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;
359-
360-
template void queue::submit_kernel_direct_without_event_impl<2>(
361-
nd_range<2> Range, std::shared_ptr<detail::HostKernelBase> &HostKernel,
362-
detail::DeviceKernelInfo *DeviceKernelInfo,
363-
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;
364-
365-
template void queue::submit_kernel_direct_without_event_impl<3>(
366-
nd_range<3> Range, std::shared_ptr<detail::HostKernelBase> &HostKernel,
367-
detail::DeviceKernelInfo *DeviceKernelInfo,
368-
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;
369-
370322
void queue::submit_without_event_impl(
371323
const detail::type_erased_cgfo_ty &CGH,
372324
const detail::v1::SubmissionInfo &SubmitInfo,
@@ -528,6 +480,62 @@ void queue::ext_oneapi_set_external_event(const event &external_event) {
528480

529481
const property_list &queue::getPropList() const { return impl->getPropList(); }
530482

483+
template <int Dims>
484+
event submit_kernel_direct_with_event_impl(
485+
const queue &Queue, nd_range<Dims> Range,
486+
std::shared_ptr<detail::HostKernelBase> &HostKernel,
487+
detail::DeviceKernelInfo *DeviceKernelInfo,
488+
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
489+
return getSyclObjImpl(Queue)->submit_kernel_direct_with_event(
490+
Range, HostKernel, DeviceKernelInfo, CodeLoc, IsTopCodeLoc);
491+
}
492+
493+
template event submit_kernel_direct_with_event_impl<1>(
494+
const queue &Queue, nd_range<1> Range,
495+
std::shared_ptr<detail::HostKernelBase> &HostKernel,
496+
detail::DeviceKernelInfo *DeviceKernelInfo,
497+
const detail::code_location &CodeLoc, bool IsTopCodeLoc);
498+
499+
template event submit_kernel_direct_with_event_impl<2>(
500+
const queue &Queue, nd_range<2> Range,
501+
std::shared_ptr<detail::HostKernelBase> &HostKernel,
502+
detail::DeviceKernelInfo *DeviceKernelInfo,
503+
const detail::code_location &CodeLoc, bool IsTopCodeLoc);
504+
505+
template event submit_kernel_direct_with_event_impl<3>(
506+
const queue &Queue, nd_range<3> Range,
507+
std::shared_ptr<detail::HostKernelBase> &HostKernel,
508+
detail::DeviceKernelInfo *DeviceKernelInfo,
509+
const detail::code_location &CodeLoc, bool IsTopCodeLoc);
510+
511+
template <int Dims>
512+
void submit_kernel_direct_without_event_impl(
513+
const queue &Queue, nd_range<Dims> Range,
514+
std::shared_ptr<detail::HostKernelBase> &HostKernel,
515+
detail::DeviceKernelInfo *DeviceKernelInfo,
516+
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
517+
getSyclObjImpl(Queue)->submit_kernel_direct_without_event(
518+
Range, HostKernel, DeviceKernelInfo, CodeLoc, IsTopCodeLoc);
519+
}
520+
521+
template void submit_kernel_direct_without_event_impl<1>(
522+
const queue &Queue, nd_range<1> Range,
523+
std::shared_ptr<detail::HostKernelBase> &HostKernel,
524+
detail::DeviceKernelInfo *DeviceKernelInfo,
525+
const detail::code_location &CodeLoc, bool IsTopCodeLoc);
526+
527+
template void submit_kernel_direct_without_event_impl<2>(
528+
const queue &Queue, nd_range<2> Range,
529+
std::shared_ptr<detail::HostKernelBase> &HostKernel,
530+
detail::DeviceKernelInfo *DeviceKernelInfo,
531+
const detail::code_location &CodeLoc, bool IsTopCodeLoc);
532+
533+
template void submit_kernel_direct_without_event_impl<3>(
534+
const queue &Queue, nd_range<3> Range,
535+
std::shared_ptr<detail::HostKernelBase> &HostKernel,
536+
detail::DeviceKernelInfo *DeviceKernelInfo,
537+
const detail::code_location &CodeLoc, bool IsTopCodeLoc);
538+
531539
} // namespace _V1
532540
} // namespace sycl
533541

0 commit comments

Comments
 (0)