Skip to content

Commit 066b421

Browse files
committed
Consolidate the event-based and event-less functions
1 parent de94db6 commit 066b421

File tree

3 files changed

+33
-78
lines changed

3 files changed

+33
-78
lines changed

sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,8 @@ void nd_launch(queue Q, nd_range<Dimensions> Range, const KernelType &KernelObj,
262262
ReductionsT &&...Reductions) {
263263
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
264264
if constexpr (sizeof...(ReductionsT) == 0) {
265-
detail::submit_kernel_direct_without_event<KernelName>(
266-
std::move(Q), empty_properties_t{}, Range, KernelObj);
265+
detail::submit_kernel_direct<KernelName>(std::move(Q), empty_properties_t{},
266+
Range, KernelObj);
267267
} else {
268268
#endif
269269
submit(std::move(Q), [&](handler &CGH) {
@@ -298,7 +298,7 @@ void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
298298
ext::oneapi::experimental::detail::LaunchConfigAccess<nd_range<Dimensions>,
299299
Properties>
300300
ConfigAccess(Config);
301-
detail::submit_kernel_direct_without_event<KernelName>(
301+
detail::submit_kernel_direct<KernelName>(
302302
std::move(Q), ConfigAccess.getProperties(), ConfigAccess.getRange(),
303303
KernelObj);
304304
} else {

sycl/include/sycl/khr/free_function_commands.hpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,9 @@ void launch_grouped(const queue &q, range<1> r, range<1> size,
154154
const sycl::detail::code_location &codeLoc =
155155
sycl::detail::code_location::current()) {
156156
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
157-
detail::submit_kernel_direct_without_event(
158-
q, ext::oneapi::experimental::empty_properties_t{}, nd_range<1>(r, size),
159-
k);
157+
detail::submit_kernel_direct(q,
158+
ext::oneapi::experimental::empty_properties_t{},
159+
nd_range<1>(r, size), k);
160160
#else
161161
submit(
162162
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); },
@@ -169,9 +169,9 @@ void launch_grouped(const queue &q, range<2> r, range<2> size,
169169
const sycl::detail::code_location &codeLoc =
170170
sycl::detail::code_location::current()) {
171171
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
172-
detail::submit_kernel_direct_without_event(
173-
q, ext::oneapi::experimental::empty_properties_t{}, nd_range<2>(r, size),
174-
k);
172+
detail::submit_kernel_direct(q,
173+
ext::oneapi::experimental::empty_properties_t{},
174+
nd_range<2>(r, size), k);
175175
#else
176176
submit(
177177
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); },
@@ -184,9 +184,9 @@ void launch_grouped(const queue &q, range<3> r, range<3> size,
184184
const sycl::detail::code_location &codeLoc =
185185
sycl::detail::code_location::current()) {
186186
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
187-
detail::submit_kernel_direct_without_event(
188-
q, ext::oneapi::experimental::empty_properties_t{}, nd_range<3>(r, size),
189-
k);
187+
detail::submit_kernel_direct(q,
188+
ext::oneapi::experimental::empty_properties_t{},
189+
nd_range<3>(r, size), k);
190190
#else
191191
submit(
192192
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); },

sycl/include/sycl/queue.hpp

Lines changed: 21 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -142,16 +142,9 @@ class __SYCL_EXPORT SubmissionInfo {
142142

143143
} // namespace v1
144144

145-
template <typename KernelName = detail::auto_name, typename PropertiesT,
146-
typename KernelType, int Dims>
147-
event submit_kernel_direct_with_event(
148-
const queue &Queue, PropertiesT Props, nd_range<Dims> Range,
149-
const KernelType &KernelFunc,
150-
const detail::code_location &CodeLoc = detail::code_location::current());
151-
152-
template <typename KernelName = detail::auto_name, typename PropertiesT,
153-
typename KernelType, int Dims>
154-
void submit_kernel_direct_without_event(
145+
template <typename KernelName = detail::auto_name, bool EventNeeded = false,
146+
typename PropertiesT, typename KernelType, int Dims>
147+
auto submit_kernel_direct(
155148
const queue &Queue, PropertiesT Props, nd_range<Dims> Range,
156149
const KernelType &KernelFunc,
157150
const detail::code_location &CodeLoc = detail::code_location::current());
@@ -3228,7 +3221,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
32283221
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
32293222
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
32303223
if constexpr (sizeof...(RestT) == 1) {
3231-
return detail::submit_kernel_direct_with_event<KernelName>(
3224+
return detail::submit_kernel_direct<KernelName, true>(
32323225
*this, ext::oneapi::experimental::empty_properties_t{}, Range,
32333226
Rest...);
32343227
} else {
@@ -3619,15 +3612,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
36193612
const queue &Q, PropertiesT Props, CommandGroupFunc &&CGF,
36203613
const sycl::detail::code_location &CodeLoc);
36213614

3622-
template <typename KernelName, typename PropertiesT, typename KernelType,
3623-
int Dims>
3624-
friend void sycl::detail::submit_kernel_direct_without_event(
3625-
const queue &Queue, PropertiesT Props, nd_range<Dims> Range,
3626-
const KernelType &KernelFunc, const detail::code_location &CodeLoc);
3627-
3628-
template <typename KernelName, typename PropertiesT, typename KernelType,
3629-
int Dims>
3630-
friend event sycl::detail::submit_kernel_direct_with_event(
3615+
template <typename KernelName, bool EventNeeded, typename PropertiesT,
3616+
typename KernelType, int Dims>
3617+
friend auto sycl::detail::submit_kernel_direct(
36313618
const queue &Queue, PropertiesT Props, nd_range<Dims> Range,
36323619
const KernelType &KernelFunc, const detail::code_location &CodeLoc);
36333620

@@ -3913,12 +3900,11 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
39133900

39143901
namespace detail {
39153902

3916-
template <typename KernelName, typename PropertiesT, typename KernelType,
3917-
int Dims>
3918-
event submit_kernel_direct_with_event(const queue &Queue, PropertiesT Props,
3919-
nd_range<Dims> Range,
3920-
const KernelType &KernelFunc,
3921-
const detail::code_location &CodeLoc) {
3903+
template <typename KernelName, bool EventNeeded, typename PropertiesT,
3904+
typename KernelType, int Dims>
3905+
auto submit_kernel_direct(const queue &Queue, PropertiesT Props,
3906+
nd_range<Dims> Range, const KernelType &KernelFunc,
3907+
const detail::code_location &CodeLoc) {
39223908
// TODO Properties not supported yet
39233909
(void)Props;
39243910
static_assert(
@@ -3946,48 +3932,17 @@ event submit_kernel_direct_with_event(const queue &Queue, PropertiesT Props,
39463932
detail::KernelWrapper<detail::WrapAs::parallel_for, NameT, KernelType,
39473933
TransformedArgType, PropertiesT>::wrap(KernelFunc);
39483934

3949-
return Queue.submit_kernel_direct_with_event_impl(
3950-
Range, HostKernel, DeviceKernelInfoPtr, TlsCodeLocCapture.query(),
3951-
TlsCodeLocCapture.isToplevel());
3935+
if constexpr (EventNeeded) {
3936+
return Queue.submit_kernel_direct_with_event_impl(
3937+
Range, HostKernel, DeviceKernelInfoPtr, TlsCodeLocCapture.query(),
3938+
TlsCodeLocCapture.isToplevel());
3939+
} else {
3940+
Queue.submit_kernel_direct_without_event_impl(
3941+
Range, HostKernel, DeviceKernelInfoPtr, TlsCodeLocCapture.query(),
3942+
TlsCodeLocCapture.isToplevel());
3943+
}
39523944
}
39533945

3954-
template <typename KernelName, typename PropertiesT, typename KernelType,
3955-
int Dims>
3956-
void submit_kernel_direct_without_event(const queue &Queue, PropertiesT Props,
3957-
nd_range<Dims> Range,
3958-
const KernelType &KernelFunc,
3959-
const detail::code_location &CodeLoc) {
3960-
// TODO Properties not supported yet
3961-
(void)Props;
3962-
static_assert(
3963-
std::is_same_v<PropertiesT,
3964-
ext::oneapi::experimental::empty_properties_t>,
3965-
"Setting properties not supported yet for no-CGH kernel submit.");
3966-
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
3967-
3968-
using NameT =
3969-
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
3970-
using LambdaArgType =
3971-
sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
3972-
static_assert(
3973-
std::is_convertible_v<sycl::nd_item<Dims>, LambdaArgType>,
3974-
"Kernel argument of a sycl::parallel_for with sycl::nd_range "
3975-
"must be either sycl::nd_item or be convertible from sycl::nd_item");
3976-
using TransformedArgType = sycl::nd_item<Dims>;
3977-
3978-
std::shared_ptr<detail::HostKernelBase> HostKernel = std::make_shared<
3979-
detail::HostKernel<KernelType, TransformedArgType, Dims>>(KernelFunc);
3980-
3981-
detail::DeviceKernelInfo *DeviceKernelInfoPtr =
3982-
&detail::getDeviceKernelInfo<NameT>();
3983-
3984-
detail::KernelWrapper<detail::WrapAs::parallel_for, NameT, KernelType,
3985-
TransformedArgType, PropertiesT>::wrap(KernelFunc);
3986-
3987-
Queue.submit_kernel_direct_without_event_impl(
3988-
Range, HostKernel, DeviceKernelInfoPtr, TlsCodeLocCapture.query(),
3989-
TlsCodeLocCapture.isToplevel());
3990-
}
39913946
} // namespace detail
39923947

39933948
} // namespace _V1

0 commit comments

Comments
 (0)