Skip to content

Commit 2980531

Browse files
committed
Split the kernel submit code into a command submission lambda and generic
part. Added "kernel" to the direct submission function names.
1 parent 12ef6da commit 2980531

File tree

5 files changed

+107
-105
lines changed

5 files changed

+107
-105
lines changed

sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,19 +114,19 @@ event submit_with_event_impl(const queue &Q, PropertiesT Props,
114114

115115
template <typename KernelName, typename PropertiesT, typename KernelType,
116116
int Dims>
117-
void submit_direct_impl(const queue &Q, PropertiesT Props, nd_range<Dims> Range,
117+
void submit_kernel_direct_impl(const queue &Q, PropertiesT Props, nd_range<Dims> Range,
118118
const KernelType &KernelFunc,
119119
const sycl::detail::code_location &CodeLoc) {
120-
Q.submit_direct_without_event<KernelName, PropertiesT, KernelType, Dims>(
120+
Q.submit_kernel_direct_without_event<KernelName, PropertiesT, KernelType, Dims>(
121121
Props, Range, KernelFunc, CodeLoc);
122122
}
123123

124124
template <typename KernelName, typename PropertiesT, typename KernelType,
125125
int Dims>
126-
event submit_direct_with_event_impl(
126+
event submit_kernel_direct_with_event_impl(
127127
const queue &Q, PropertiesT Props, nd_range<Dims> Range,
128128
const KernelType &KernelFunc, const sycl::detail::code_location &CodeLoc) {
129-
return Q.submit_direct_with_event<KernelName, PropertiesT, KernelType, Dims>(
129+
return Q.submit_kernel_direct_with_event<KernelName, PropertiesT, KernelType, Dims>(
130130
Props, Range, KernelFunc, CodeLoc);
131131
}
132132
} // namespace detail
@@ -152,7 +152,7 @@ void submit(const queue &Q, PropertiesT Props, nd_range<Dims> Range,
152152
const KernelType &KernelFunc,
153153
const sycl::detail::code_location &CodeLoc =
154154
sycl::detail::code_location::current()) {
155-
sycl::ext::oneapi::experimental::detail::submit_direct_impl<
155+
sycl::ext::oneapi::experimental::detail::submit_kernel_direct_impl<
156156
KernelName, PropertiesT, KernelType, Dims>(Q, Props, Range, KernelFunc,
157157
CodeLoc);
158158
}
@@ -180,7 +180,7 @@ event submit_with_event(const queue &Q, PropertiesT Props, nd_range<Dims> Range,
180180
const KernelType &KernelFunc,
181181
const sycl::detail::code_location &CodeLoc =
182182
sycl::detail::code_location::current()) {
183-
return sycl::ext::oneapi::experimental::detail::submit_direct_with_event_impl<
183+
return sycl::ext::oneapi::experimental::detail::submit_kernel_direct_with_event_impl<
184184
KernelName, PropertiesT, KernelType, Dims>(Q, Props, Range, KernelFunc,
185185
CodeLoc);
186186
}

sycl/include/sycl/queue.hpp

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -214,13 +214,13 @@ event submit_with_event_impl(const queue &Q, PropertiesT Props,
214214

215215
template <typename KernelName, typename PropertiesT, typename KernelType,
216216
int Dims>
217-
void submit_direct_impl(const queue &Q, PropertiesT Props, nd_range<Dims> Range,
217+
void submit_kernel_direct_impl(const queue &Q, PropertiesT Props, nd_range<Dims> Range,
218218
const KernelType &KernelFunc,
219219
const sycl::detail::code_location &CodeLoc);
220220

221221
template <typename KernelName, typename PropertiesT, typename KernelType,
222222
int Dims>
223-
event submit_direct_with_event_impl(const queue &Q, PropertiesT Props,
223+
event submit_kernel_direct_with_event_impl(const queue &Q, PropertiesT Props,
224224
nd_range<Dims> Range,
225225
const KernelType &KernelFunc,
226226
const sycl::detail::code_location &CodeLoc);
@@ -2718,7 +2718,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
27182718
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
27192719

27202720
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
2721-
return submit_direct_with_event<detail::WrapAs::single_task, KernelName>(
2721+
return submit_kernel_direct_with_event<detail::WrapAs::single_task, KernelName>(
27222722
ext::oneapi::experimental::empty_properties_t{}, nd_range<1>{1, 1},
27232723
KernelFunc);
27242724

@@ -3279,7 +3279,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
32793279
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
32803280
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
32813281
if constexpr (sizeof...(RestT) == 1) {
3282-
return submit_direct_with_event<detail::WrapAs::parallel_for, KernelName,
3282+
return submit_kernel_direct_with_event<detail::WrapAs::parallel_for, KernelName,
32833283
sycl::nd_item<Dims>>(
32843284
ext::oneapi::experimental::empty_properties_t{}, Range, Rest...);
32853285
} else {
@@ -3674,13 +3674,13 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
36743674

36753675
template <typename KernelName, typename PropertiesT, typename KernelType,
36763676
int Dims>
3677-
friend void ext::oneapi::experimental::detail::submit_direct_impl(
3677+
friend void ext::oneapi::experimental::detail::submit_kernel_direct_impl(
36783678
const queue &Q, PropertiesT Props, nd_range<Dims> Range,
36793679
const KernelType &KernelFunc, const sycl::detail::code_location &CodeLoc);
36803680

36813681
template <typename KernelName, typename PropertiesT, typename KernelType,
36823682
int Dims>
3683-
friend event ext::oneapi::experimental::detail::submit_direct_with_event_impl(
3683+
friend event ext::oneapi::experimental::detail::submit_kernel_direct_with_event_impl(
36843684
const queue &Q, PropertiesT Props, nd_range<Dims> Range,
36853685
const KernelType &KernelFunc, const sycl::detail::code_location &CodeLoc);
36863686

@@ -3796,33 +3796,33 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
37963796
const detail::code_location &CodeLoc,
37973797
bool IsTopCodeLoc) const;
37983798

3799-
event submit_direct_with_event_impl(
3800-
nd_range<1> Range, const detail::v1::SubmissionInfo &SubmitInfo,
3799+
event submit_kernel_direct_with_event_impl(
3800+
nd_range<1> Range,
38013801
const detail::v1::KernelRuntimeInfo &KRInfo,
38023802
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;
38033803

3804-
event submit_direct_with_event_impl(
3805-
nd_range<2> Range, const detail::v1::SubmissionInfo &SubmitInfo,
3804+
event submit_kernel_direct_with_event_impl(
3805+
nd_range<2> Range,
38063806
const detail::v1::KernelRuntimeInfo &KRInfo,
38073807
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;
38083808

3809-
event submit_direct_with_event_impl(
3810-
nd_range<3> Range, const detail::v1::SubmissionInfo &SubmitInfo,
3809+
event submit_kernel_direct_with_event_impl(
3810+
nd_range<3> Range,
38113811
const detail::v1::KernelRuntimeInfo &KRInfo,
38123812
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;
38133813

3814-
void submit_direct_without_event_impl(
3815-
nd_range<1> Range, const detail::v1::SubmissionInfo &SubmitInfo,
3814+
void submit_kernel_direct_without_event_impl(
3815+
nd_range<1> Range,
38163816
const detail::v1::KernelRuntimeInfo &KRInfo,
38173817
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;
38183818

3819-
void submit_direct_without_event_impl(
3820-
nd_range<2> Range, const detail::v1::SubmissionInfo &SubmitInfo,
3819+
void submit_kernel_direct_without_event_impl(
3820+
nd_range<2> Range,
38213821
const detail::v1::KernelRuntimeInfo &KRInfo,
38223822
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;
38233823

3824-
void submit_direct_without_event_impl(
3825-
nd_range<3> Range, const detail::v1::SubmissionInfo &SubmitInfo,
3824+
void submit_kernel_direct_without_event_impl(
3825+
nd_range<3> Range,
38263826
const detail::v1::KernelRuntimeInfo &KRInfo,
38273827
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;
38283828

@@ -3871,50 +3871,46 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
38713871
template <detail::WrapAs WrapAsVal, typename KernelName = detail::auto_name,
38723872
typename ElementType = void, typename PropertiesT,
38733873
typename KernelType, int Dims>
3874-
event submit_direct_with_event(PropertiesT Props, nd_range<Dims> Range,
3874+
event submit_kernel_direct_with_event(PropertiesT Props, nd_range<Dims> Range,
38753875
const KernelType &KernelFunc,
38763876
const detail::code_location &CodeLoc =
38773877
detail::code_location::current()) const {
38783878
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
3879-
detail::v1::SubmissionInfo SI{};
38803879
detail::v1::KernelRuntimeInfo KRInfo{};
38813880

38823881
using NameT =
38833882
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
38843883

3885-
ProcessSubmitProperties(Props, SI);
38863884
ProcessKernelRuntimeInfo<NameT, KernelType, Dims, WrapAsVal>(KernelFunc,
38873885
KRInfo);
38883886

38893887
detail::KernelWrapper<WrapAsVal, NameT, KernelType, ElementType,
38903888
PropertiesT>::wrap(KernelFunc);
38913889

3892-
return submit_direct_with_event_impl(Range, SI, KRInfo,
3890+
return submit_kernel_direct_with_event_impl(Range, KRInfo,
38933891
TlsCodeLocCapture.query(),
38943892
TlsCodeLocCapture.isToplevel());
38953893
}
38963894

38973895
template <typename KernelName = detail::auto_name, typename PropertiesT,
38983896
typename KernelType, int Dims>
3899-
void submit_direct_without_event(PropertiesT Props, nd_range<Dims> Range,
3897+
void submit_kernel_direct_without_event(PropertiesT Props, nd_range<Dims> Range,
39003898
const KernelType &KernelFunc,
39013899
const detail::code_location &CodeLoc =
39023900
detail::code_location::current()) const {
39033901
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
3904-
detail::v1::SubmissionInfo SI{};
39053902
detail::v1::KernelRuntimeInfo KRInfo{};
39063903

39073904
using NameT =
39083905
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
39093906

3910-
ProcessSubmitProperties(Props, SI);
39113907
ProcessKernelRuntimeInfo<NameT, KernelType, Dims,
39123908
detail::WrapAs::parallel_for>(KernelFunc, KRInfo);
39133909

39143910
detail::KernelWrapper<detail::WrapAs::parallel_for, NameT, KernelType,
39153911
sycl::nd_item<Dims>, PropertiesT>::wrap(KernelFunc);
39163912

3917-
submit_direct_without_event_impl(Range, SI, KRInfo,
3913+
submit_kernel_direct_without_event_impl(Range, KRInfo,
39183914
TlsCodeLocCapture.query(),
39193915
TlsCodeLocCapture.isToplevel());
39203916
}

sycl/source/detail/queue_impl.cpp

Lines changed: 40 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -447,26 +447,54 @@ std::vector<ArgDesc> queue_impl::extractArgsAndReqsFromLambda(
447447
return Args;
448448
}
449449

450-
detail::EventImplPtr queue_impl::submit_direct_impl(
451-
const NDRDescT &NDRDesc, const v1::SubmissionInfo &SubmitInfo,
450+
detail::EventImplPtr queue_impl::submit_kernel_direct_impl(
451+
const NDRDescT &NDRDesc,
452452
const v1::KernelRuntimeInfo &KRInfo, bool CallerNeedsEvent,
453453
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
454-
(void)SubmitInfo;
455454

456-
std::unique_ptr<detail::CG> CommandGroup;
457-
detail::CG::StorageInitHelper CGData;
458-
std::vector<detail::ArgDesc> Args;
459-
std::vector<std::shared_ptr<detail::stream_impl>> StreamStorage;
460-
std::vector<std::shared_ptr<const void>> AuxiliaryResources;
455+
// No special captures supported yet for the no-handler path
456+
assert(!KRInfo.DeviceKernelInfoPtr()->HasSpecialCaptures);
457+
458+
SubmitCommandFuncType SubmitKernelFunc = [&](detail::CG::StorageInitHelper &CGData) -> EventImplPtr {
459+
std::unique_ptr<detail::CG> CommandGroup;
460+
std::vector<detail::ArgDesc> Args;
461+
std::vector<std::shared_ptr<detail::stream_impl>> StreamStorage;
462+
std::vector<std::shared_ptr<const void>> AuxiliaryResources;
463+
464+
Args = extractArgsAndReqsFromLambda(
465+
KRInfo.GetKernelFuncPtr(), KRInfo.DeviceKernelInfoPtr()->ParamDescGetter,
466+
KRInfo.DeviceKernelInfoPtr()->NumParams);
467+
468+
CommandGroup.reset(new detail::CGExecKernel(
469+
std::move(NDRDesc), KRInfo.HostKernel(),
470+
nullptr, // MKernel
471+
nullptr, // MKernelBundle
472+
std::move(CGData), std::move(Args), toKernelNameStrT(KRInfo.KernelName()),
473+
*KRInfo.DeviceKernelInfoPtr(), std::move(StreamStorage),
474+
std::move(AuxiliaryResources), detail::CGType::Kernel,
475+
UR_KERNEL_CACHE_CONFIG_DEFAULT,
476+
false, // MKernelIsCooperative
477+
false, // MKernelUsesClusterLaunch
478+
0, // MKernelWorkGroupMemorySize
479+
CodeLoc));
480+
CommandGroup->MIsTopCodeLoc = IsTopCodeLoc;
481+
482+
EventImplPtr EventImpl = detail::Scheduler::getInstance().addCG(
483+
std::move(CommandGroup), *this, CallerNeedsEvent);
484+
return EventImpl;
485+
};
461486

487+
return submit_generic_direct(CallerNeedsEvent, SubmitKernelFunc);
488+
}
489+
490+
detail::EventImplPtr queue_impl::submit_generic_direct(
491+
bool CallerNeedsEvent, SubmitCommandFuncType &SubmitCommandFunc) {
492+
detail::CG::StorageInitHelper CGData;
462493
std::unique_lock<std::mutex> Lock(MMutex);
463494

464495
// Graphs are not supported yet for the no-handler path
465496
assert(!hasCommandGraph());
466497

467-
// No special captures supported yet for the no-handler path
468-
assert(!KRInfo.DeviceKernelInfoPtr()->HasSpecialCaptures);
469-
470498
// Set the No Last Event Mode to false, since the no-handler path
471499
// does not support it yet.
472500
MNoLastEventMode.store(false, std::memory_order_relaxed);
@@ -501,27 +529,7 @@ detail::EventImplPtr queue_impl::submit_direct_impl(
501529
}
502530
}
503531

504-
Args = extractArgsAndReqsFromLambda(
505-
KRInfo.GetKernelFuncPtr(), KRInfo.DeviceKernelInfoPtr()->ParamDescGetter,
506-
KRInfo.DeviceKernelInfoPtr()->NumParams);
507-
508-
CommandGroup.reset(new detail::CGExecKernel(
509-
std::move(NDRDesc), KRInfo.HostKernel(),
510-
nullptr, // MKernel
511-
nullptr, // MKernelBundle
512-
std::move(CGData), std::move(Args), toKernelNameStrT(KRInfo.KernelName()),
513-
*KRInfo.DeviceKernelInfoPtr(), std::move(StreamStorage),
514-
std::move(AuxiliaryResources), detail::CGType::Kernel,
515-
UR_KERNEL_CACHE_CONFIG_DEFAULT,
516-
false, // MKernelIsCooperative
517-
false, // MKernelUsesClusterLaunch
518-
0, // MKernelWorkGroupMemorySize
519-
CodeLoc));
520-
521-
CommandGroup->MIsTopCodeLoc = IsTopCodeLoc;
522-
523-
EventImplPtr EventImpl = detail::Scheduler::getInstance().addCG(
524-
std::move(CommandGroup), *this, CallerNeedsEvent);
532+
EventImplPtr EventImpl = SubmitCommandFunc(CGData);
525533

526534
// Sync with the last event for in order queue
527535
if (isInOrder() && !EventImpl->isDiscarded()) {

sycl/source/detail/queue_impl.hpp

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -366,60 +366,54 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
366366
return createSyclObjFromImpl<event>(ResEvent);
367367
}
368368

369-
event submit_direct_with_event(nd_range<1> Range,
370-
const detail::v1::SubmissionInfo &SubmitInfo,
369+
event submit_kernel_direct_with_event(nd_range<1> Range,
371370
const detail::v1::KernelRuntimeInfo &KRInfo,
372371
const detail::code_location &CodeLoc,
373372
bool IsTopCodeLoc) {
374-
detail::EventImplPtr EventImpl = submit_direct_impl(
375-
NDRDescT{Range}, SubmitInfo, KRInfo, true, CodeLoc, IsTopCodeLoc);
373+
detail::EventImplPtr EventImpl = submit_kernel_direct_impl(
374+
NDRDescT{Range}, KRInfo, true, CodeLoc, IsTopCodeLoc);
376375
return createSyclObjFromImpl<event>(EventImpl);
377376
}
378377

379-
event submit_direct_with_event(nd_range<2> Range,
380-
const detail::v1::SubmissionInfo &SubmitInfo,
378+
event submit_kernel_direct_with_event(nd_range<2> Range,
381379
const detail::v1::KernelRuntimeInfo &KRInfo,
382380
const detail::code_location &CodeLoc,
383381
bool IsTopCodeLoc) {
384-
detail::EventImplPtr EventImpl = submit_direct_impl(
385-
NDRDescT{Range}, SubmitInfo, KRInfo, true, CodeLoc, IsTopCodeLoc);
382+
detail::EventImplPtr EventImpl = submit_kernel_direct_impl(
383+
NDRDescT{Range}, KRInfo, true, CodeLoc, IsTopCodeLoc);
386384
return createSyclObjFromImpl<event>(EventImpl);
387385
}
388386

389-
event submit_direct_with_event(nd_range<3> Range,
390-
const detail::v1::SubmissionInfo &SubmitInfo,
387+
event submit_kernel_direct_with_event(nd_range<3> Range,
391388
const detail::v1::KernelRuntimeInfo &KRInfo,
392389
const detail::code_location &CodeLoc,
393390
bool IsTopCodeLoc) {
394-
detail::EventImplPtr EventImpl = submit_direct_impl(
395-
NDRDescT{Range}, SubmitInfo, KRInfo, true, CodeLoc, IsTopCodeLoc);
391+
detail::EventImplPtr EventImpl = submit_kernel_direct_impl(
392+
NDRDescT{Range}, KRInfo, true, CodeLoc, IsTopCodeLoc);
396393
return createSyclObjFromImpl<event>(EventImpl);
397394
}
398395

399-
void submit_direct_without_event(nd_range<1> Range,
400-
const detail::v1::SubmissionInfo &SubmitInfo,
396+
void submit_kernel_direct_without_event(nd_range<1> Range,
401397
const detail::v1::KernelRuntimeInfo &KRInfo,
402398
const detail::code_location &CodeLoc,
403399
bool IsTopCodeLoc) {
404-
submit_direct_impl(NDRDescT{Range}, SubmitInfo, KRInfo, false, CodeLoc,
400+
submit_kernel_direct_impl(NDRDescT{Range}, KRInfo, false, CodeLoc,
405401
IsTopCodeLoc);
406402
}
407403

408-
void submit_direct_without_event(nd_range<2> Range,
409-
const detail::v1::SubmissionInfo &SubmitInfo,
404+
void submit_kernel_direct_without_event(nd_range<2> Range,
410405
const detail::v1::KernelRuntimeInfo &KRInfo,
411406
const detail::code_location &CodeLoc,
412407
bool IsTopCodeLoc) {
413-
submit_direct_impl(NDRDescT{Range}, SubmitInfo, KRInfo, false, CodeLoc,
408+
submit_kernel_direct_impl(NDRDescT{Range}, KRInfo, false, CodeLoc,
414409
IsTopCodeLoc);
415410
}
416411

417-
void submit_direct_without_event(nd_range<3> Range,
418-
const detail::v1::SubmissionInfo &SubmitInfo,
412+
void submit_kernel_direct_without_event(nd_range<3> Range,
419413
const detail::v1::KernelRuntimeInfo &KRInfo,
420414
const detail::code_location &CodeLoc,
421415
bool IsTopCodeLoc) {
422-
submit_direct_impl(NDRDescT{Range}, SubmitInfo, KRInfo, false, CodeLoc,
416+
submit_kernel_direct_impl(NDRDescT{Range}, KRInfo, false, CodeLoc,
423417
IsTopCodeLoc);
424418
}
425419

@@ -965,13 +959,17 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
965959
/// scope or in the top level scope.
966960
///
967961
/// \return a SYCL event representing submitted command group or nullptr.
968-
detail::EventImplPtr submit_direct_impl(const NDRDescT &NDRDesc,
969-
const v1::SubmissionInfo &SubmitInfo,
962+
detail::EventImplPtr submit_kernel_direct_impl(const NDRDescT &NDRDesc,
970963
const v1::KernelRuntimeInfo &KRInfo,
971964
bool CallerNeedsEvent,
972965
const detail::code_location &CodeLoc,
973966
bool IsTopCodeLoc);
974967

968+
using SubmitCommandFuncType = std::function<EventImplPtr(detail::CG::StorageInitHelper &CGData)>;
969+
970+
detail::EventImplPtr submit_generic_direct(bool CallerNeedsEvent,
971+
SubmitCommandFuncType &SubmitCommandFunc);
972+
975973
/// Helper function for submitting a memory operation with a handler.
976974
/// \param DepEvents is a vector of dependencies of the operation.
977975
/// \param HandlerFunc is a function that submits the operation with a

0 commit comments

Comments
 (0)