Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
74 commits
Select commit Hold shift + click to select a range
3223842
[SYCL] Handler-less kernel submit API
slawekptak Jul 3, 2025
fde19ca
Fix formatting
slawekptak Jul 3, 2025
13424de
Fix formatting
slawekptak Jul 4, 2025
fbc789d
Change the ExtendedSubmissionInfo to KernelRuntimeInfo,
slawekptak Jul 7, 2025
591b3ec
Added copy/move constructor and assignment operator
slawekptak Jul 8, 2025
d235b7c
Merge branch 'sycl' into no_handler_lib_entry
slawekptak Jul 8, 2025
6641601
Add a no event submit and no handler compile flag
slawekptak Jul 11, 2025
0f41d5a
Merge branch 'sycl' into no_handler_lib_entry
slawekptak Jul 14, 2025
a6e711e
Added a new configure option to build no handler submit path, changed
slawekptak Jul 14, 2025
9c8040e
Host task dependency test
slawekptak Jul 17, 2025
31cbdb9
Add a check for special captures
slawekptak Jul 18, 2025
c5cd091
Merge branch 'sycl' into no_handler_lib_entry
slawekptak Aug 11, 2025
998d592
Switch to the common kernel wrappers, fix the KRInfo function call
slawekptak Aug 11, 2025
4000c07
Enable no handler in the preview lib build, add no handler unit
slawekptak Aug 12, 2025
f8e9cd6
Merge branch 'sycl' into no_handler_lib_entry
slawekptak Aug 12, 2025
01af8bb
Unused argument fix and IsTopCodeLoc assignment
slawekptak Aug 12, 2025
4469e59
Implemented the barrier and un-enqueued commands synchronization
slawekptak Aug 13, 2025
ac1a5cf
Fix formatting
slawekptak Aug 13, 2025
5865f3a
Fixed #ifdef, added comment to a new function.
slawekptak Aug 13, 2025
072803c
Merge branch 'sycl' into no_handler_lib_entry
slawekptak Aug 19, 2025
27b3110
Address review comments
slawekptak Aug 20, 2025
9041e94
Updated Linux symbols
slawekptak Aug 21, 2025
ac2c5bb
Addressed more review comments
slawekptak Aug 21, 2025
8e155fb
Fix formatting
slawekptak Aug 21, 2025
502f637
Fix formatting, remove unused properties argument
slawekptak Aug 21, 2025
d708c93
Fix ProcessKernelRuntimeInfo call
slawekptak Aug 21, 2025
e9f6e4e
Fix unit test build and ProcessKernelRuntimeInfo calls
slawekptak Aug 21, 2025
057a7a5
Fix formatting
slawekptak Aug 21, 2025
77d92ca
Added single_task shortcut function support for no-handler
slawekptak Aug 22, 2025
85aaa5c
Fix formatting
slawekptak Aug 25, 2025
a54422a
Merge branch 'sycl' into no_handler_lib_entry
slawekptak Aug 29, 2025
967d35e
Update KernelRuntimeInfo - change kernel name based cache pointer
slawekptak Aug 29, 2025
ec1ef89
Switch to DeviceKernelInfo use on the no-handler path
slawekptak Aug 29, 2025
1f95b9b
KernelName fix
slawekptak Aug 29, 2025
12ef6da
Update Windows symbols
slawekptak Aug 29, 2025
2980531
Split the kernel submit code into a command submission lambda and gen…
slawekptak Sep 1, 2025
01e0f9f
Fix formatting
slawekptak Sep 2, 2025
63d1345
Rename submit_generic_direct to submit_direct
slawekptak Sep 2, 2025
4001fea
Fix unused Props argument
slawekptak Sep 4, 2025
6c9525b
Update Linux symbols
slawekptak Sep 4, 2025
f871b10
Merge branch 'sycl' into no_handler_lib_entry
slawekptak Sep 16, 2025
18df56b
Define the SubmitCommandFuncType template type and rebase
slawekptak Sep 16, 2025
3375e77
Use the KernelData structure in the no-handler path
slawekptak Sep 17, 2025
72dc199
Rename KernelRuntimeInfo to KernelDataDesc
slawekptak Sep 17, 2025
9715916
Merge branch 'sycl' into no_handler_lib_entry
slawekptak Sep 17, 2025
177277b
Minor changes in the template variables
slawekptak Sep 17, 2025
eb9a5d6
Remove unused type
slawekptak Sep 17, 2025
1f8ea92
Remove KernelDataDesc and pass the arguments directly,
slawekptak Sep 22, 2025
74438ae
Merge branch 'sycl' into no_handler_lib_entry
slawekptak Sep 22, 2025
0e48eb6
Code cleanup
slawekptak Sep 22, 2025
0d47ac7
Update Linux symbols
slawekptak Sep 22, 2025
ab6812a
Update Windows symbols
slawekptak Sep 22, 2025
a35286d
Address review comments
slawekptak Sep 22, 2025
27a5cf5
Rvalue reference for KernelData
slawekptak Sep 22, 2025
9144f84
Add a static_assert to check if properties are empty.
slawekptak Sep 23, 2025
ca0b632
Remove unused function
slawekptak Sep 23, 2025
42e2b30
Fix unused parameter
slawekptak Sep 23, 2025
943f1f7
Merge two overloads of submit_kernel_direct_impl
slawekptak Sep 23, 2025
76bcaf2
Template instantiations for submit_kernel_direct_with_event_impl
slawekptak Sep 24, 2025
6588fe8
Change kernel direct submit functions in queue_impl to templates
slawekptak Sep 24, 2025
3c0e33c
Update Linux symbols
slawekptak Sep 24, 2025
8a20b8a
Update Windows symbols
slawekptak Sep 24, 2025
2be3d3d
Convert the kernel direct submit functions to free functions
slawekptak Sep 25, 2025
f139c93
Minor fixes
slawekptak Sep 25, 2025
8023ec1
Add missing calls.
slawekptak Sep 25, 2025
de94db6
Remove extern template definitions
slawekptak Sep 26, 2025
066b421
Consolidate the event-based and event-less functions
slawekptak Sep 26, 2025
eed0591
Make free functions from the queue kernel direct submit methods
slawekptak Sep 26, 2025
552f448
Address review comments
slawekptak Sep 26, 2025
f5c0d77
Update Linux symbols
slawekptak Sep 26, 2025
b442d37
ifdef fix
slawekptak Sep 26, 2025
5fa8ccc
Removed unused function declaration
slawekptak Sep 26, 2025
fa6d2f8
Export template instantiations
slawekptak Sep 26, 2025
f04ed3f
Update Windows symbols
slawekptak Sep 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,16 @@ event submit_with_event_impl(const queue &Q, PropertiesT Props,
return Q.submit_with_event<__SYCL_USE_FALLBACK_ASSERT>(
Props, detail::type_erased_cgfo_ty{CGF}, nullptr, CodeLoc);
}

template <typename PropertiesT, typename KernelName,
typename KernelType, int Dims>
event submit_with_event_impl(const queue &Q, PropertiesT Props,
nd_range<Dims> Range,
const KernelType &KernelFunc,
const sycl::detail::code_location &CodeLoc) {
return Q.submit_with_event<__SYCL_USE_FALLBACK_ASSERT, PropertiesT,
KernelName, KernelType, Dims>(Props, Range, KernelFunc, CodeLoc);
}
} // namespace detail

template <typename CommandGroupFunc, typename PropertiesT>
Expand Down Expand Up @@ -144,6 +154,19 @@ event submit_with_event(const queue &Q, CommandGroupFunc &&CGF,
std::forward<CommandGroupFunc>(CGF), CodeLoc);
}

#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
template <typename PropertiesT, typename KernelName = sycl::detail::auto_name,
typename KernelType, int Dims>
event submit_with_event(const queue &Q, PropertiesT Props,
nd_range<Dims> Range,
const KernelType &KernelFunc,
const sycl::detail::code_location &CodeLoc =
sycl::detail::code_location::current()) {
return sycl::ext::oneapi::experimental::detail::submit_with_event_impl
<PropertiesT, KernelName, KernelType, Dims>(Q, Props, Range, KernelFunc, CodeLoc);
}
#endif

template <typename KernelName = sycl::detail::auto_name, typename KernelType>
void single_task(handler &CGH, const KernelType &KernelObj) {
CGH.single_task<KernelName>(KernelObj);
Expand Down
118 changes: 118 additions & 0 deletions sycl/include/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,36 @@ class __SYCL_EXPORT SubmissionInfo {
ext::oneapi::experimental::event_mode_enum::none;
};

using KernelParamDescGetterFuncPtr = detail::kernel_param_desc_t (*)(int);

// This class is intended to store the kernel runtime information,
// extracted from the compile time kernel structures.
class __SYCL_EXPORT KernelRuntimeInfo {
public:
KernelRuntimeInfo() {}

std::string_view &KernelName() { return MKernelName; }
std::unique_ptr<detail::HostKernelBase> &HostKernel() { return MHostKernel; }
int &KernelNumArgs() { return MKernelNumArgs; }
KernelParamDescGetterFuncPtr &KernelParamDescGetter() {
return MKernelParamDescGetter;
}
bool &KernelIsESIMD() { return MKernelIsESIMD; }
bool &KernelHasSpecialCaptures() { return MKernelHasSpecialCaptures; }
detail::KernelNameBasedCacheT *&KernelNameBasedCachePtr() {
return MKernelNameBasedCachePtr;
}

private:
std::string_view MKernelName;
std::unique_ptr<detail::HostKernelBase> MHostKernel;
int MKernelNumArgs = 0;
KernelParamDescGetterFuncPtr MKernelParamDescGetter = nullptr;
bool MKernelIsESIMD = false;
bool MKernelHasSpecialCaptures = true;
detail::KernelNameBasedCacheT *MKernelNameBasedCachePtr = nullptr;
};

} // namespace v1
} // namespace detail

Expand All @@ -167,6 +197,13 @@ template <typename CommandGroupFunc, typename PropertiesT>
event submit_with_event_impl(const queue &Q, PropertiesT Props,
CommandGroupFunc &&CGF,
const sycl::detail::code_location &CodeLoc);

template <typename PropertiesT, typename KernelName,
typename KernelType, int Dims>
event submit_with_event_impl(const queue &Q, PropertiesT Props,
nd_range<Dims> Range,
const KernelType &KernelFunc,
const sycl::detail::code_location &CodeLoc);
} // namespace detail
} // namespace ext::oneapi::experimental

Expand Down Expand Up @@ -3215,11 +3252,17 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
parallel_for(nd_range<Dims> Range, RestT &&...Rest) {
constexpr detail::code_location CodeLoc = getCodeLocation<KernelName>();
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
return submit_with_event<false, ext::oneapi::experimental::empty_properties_t,
KernelName>(sycl::ext::oneapi::experimental::empty_properties_t{},
Range, Rest..., CodeLoc);
#else
return submit(
[&](handler &CGH) {
CGH.template parallel_for<KernelName>(Range, Rest...);
},
TlsCodeLocCapture.query());
#endif
}

/// parallel_for version with a kernel represented as a lambda + nd_range that
Expand Down Expand Up @@ -3596,6 +3639,13 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
const queue &Q, PropertiesT Props, CommandGroupFunc &&CGF,
const sycl::detail::code_location &CodeLoc);

template <typename PropertiesT, typename KernelName,
typename KernelType, int Dims>
friend event ext::oneapi::experimental::detail::submit_with_event_impl(
const queue &Q, PropertiesT Props, nd_range<Dims> Range,
const KernelType &KernelFunc,
const sycl::detail::code_location &CodeLoc);

template <typename PropertiesT>
void ProcessSubmitProperties(PropertiesT Props,
detail::v1::SubmissionInfo &SI) const {
Expand All @@ -3609,6 +3659,36 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
}
}

template <int Dims, typename LambdaArgType> struct TransformUserItemType {
using type = std::conditional_t<
std::is_convertible_v<nd_item<Dims>, LambdaArgType>, nd_item<Dims>,
std::conditional_t<std::is_convertible_v<item<Dims>, LambdaArgType>,
item<Dims>, LambdaArgType>>;
};

template <typename PropertiesT, typename KernelName, typename KernelType,
int Dims>
void ProcessKernelRuntimeInfo(
PropertiesT Props, const KernelType &KernelFunc,
detail::v1::KernelRuntimeInfo &KRInfo) const {
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
using TransformedArgType = std::conditional_t<
std::is_integral<LambdaArgType>::value && Dims == 1, item<Dims>,
typename TransformUserItemType<Dims, LambdaArgType>::type>;

KRInfo.HostKernel().reset(
new detail::HostKernel<KernelType, TransformedArgType, Dims>(
KernelFunc));
KRInfo.KernelName() = detail::getKernelName<NameT>();
KRInfo.KernelNumArgs() = detail::getKernelNumParams<NameT>();
KRInfo.KernelParamDescGetter() = &(detail::getKernelParamDesc<NameT>);
KRInfo.KernelIsESIMD() = detail::isKernelESIMD<NameT>();
KRInfo.KernelHasSpecialCaptures() = detail::hasSpecialCaptures<NameT>();
KRInfo.KernelNameBasedCachePtr() = detail::getKernelNameBasedCache<NameT>();
}

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
/// TODO: Unused. Remove these when ABI-break window is open.
/// Not using `type_erased_cgfo_ty` on purpose.
Expand Down Expand Up @@ -3680,6 +3760,24 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
const detail::code_location &CodeLoc,
bool IsTopCodeLoc) const;

event submit_with_event_impl(
nd_range<1> Range,
const detail::v1::SubmissionInfo &ExtSubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;

event submit_with_event_impl(
nd_range<2> Range,
const detail::v1::SubmissionInfo &ExtSubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;

event submit_with_event_impl(
nd_range<3> Range,
const detail::v1::SubmissionInfo &ExtSubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;

/// A template-free version of submit_without_event as const member function.
void submit_without_event_impl(const detail::type_erased_cgfo_ty &CGH,
const detail::v1::SubmissionInfo &SubmitInfo,
Expand Down Expand Up @@ -3763,6 +3861,26 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
TlsCodeLocCapture.isToplevel());
}

template <bool UseFallbackAssert, typename PropertiesT, typename KernelName,
typename KernelType, int Dims>
event submit_with_event(PropertiesT Props, nd_range<Dims> Range,
const KernelType &KernelFunc,
const detail::code_location &CodeLoc =
detail::code_location::current()) const {
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
detail::v1::SubmissionInfo SI{};
detail::v1::KernelRuntimeInfo KRInfo{};

ProcessSubmitProperties(Props, SI);
ProcessKernelRuntimeInfo<PropertiesT, KernelName,
KernelType, Dims>(Props, KernelFunc, KRInfo);

// TODO UseFallbackAssert

return submit_with_event_impl(Range, SI, KRInfo,
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
}

/// Submits a command group function object to the queue, in order to be
/// scheduled for execution on the device.
///
Expand Down
39 changes: 39 additions & 0 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,45 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
return createSyclObjFromImpl<event>(ResEvent);
}

event
submit_with_event(nd_range<1> Range,
const detail::v1::SubmissionInfo &SubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
(void)Range;
(void)SubmitInfo;
(void)KRInfo;
(void)CodeLoc;
(void)IsTopCodeLoc;
return event();
}

event
submit_with_event(nd_range<2> Range,
const detail::v1::SubmissionInfo &SubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
(void)Range;
(void)SubmitInfo;
(void)KRInfo;
(void)CodeLoc;
(void)IsTopCodeLoc;
return event();
}

event
submit_with_event(nd_range<3> Range,
const detail::v1::SubmissionInfo &SubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
(void)Range;
(void)SubmitInfo;
(void)KRInfo;
(void)CodeLoc;
(void)IsTopCodeLoc;
return event();
}

void submit_without_event(const detail::type_erased_cgfo_ty &CGF,
const v1::SubmissionInfo &SubmitInfo,
const detail::code_location &Loc,
Expand Down
27 changes: 27 additions & 0 deletions sycl/source/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,33 @@ event queue::submit_with_event_impl(
return impl->submit_with_event(CGH, SubmitInfo, CodeLoc, IsTopCodeLoc);
}

event queue::submit_with_event_impl(
nd_range<1> Range,
const detail::v1::SubmissionInfo &SubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const {
return impl->submit_with_event(Range, SubmitInfo, KRInfo,
CodeLoc, IsTopCodeLoc);
}

event queue::submit_with_event_impl(
nd_range<2> Range,
const detail::v1::SubmissionInfo &SubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const {
return impl->submit_with_event(Range, SubmitInfo, KRInfo,
CodeLoc, IsTopCodeLoc);
}

event queue::submit_with_event_impl(
nd_range<3> Range,
const detail::v1::SubmissionInfo &SubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const {
return impl->submit_with_event(Range, SubmitInfo, KRInfo,
CodeLoc, IsTopCodeLoc);
}

void queue::submit_without_event_impl(
const detail::type_erased_cgfo_ty &CGH,
const detail::v1::SubmissionInfo &SubmitInfo,
Expand Down
Loading