Skip to content

Commit b45075f

Browse files
committed
[SYCL] Extend no-handler submission path to support kernel properties.
1 parent bab58c1 commit b45075f

File tree

7 files changed

+139
-108
lines changed

7 files changed

+139
-108
lines changed

sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -152,15 +152,12 @@ template <typename KernelName = sycl::detail::auto_name, typename KernelType>
152152
void single_task(queue Q, const KernelType &KernelObj,
153153
const sycl::detail::code_location &CodeLoc =
154154
sycl::detail::code_location::current()) {
155-
// TODO The handler-less path does not support kernel function properties
156-
// and kernel functions with the kernel_handler type argument yet.
157-
if constexpr (!(ext::oneapi::experimental::detail::
158-
HasKernelPropertiesGetMethod<
159-
const KernelType &>::value) &&
160-
!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
155+
// TODO The handler-less path does not support kernel functions with the
156+
// kernel_handler type argument yet.
157+
if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
161158
void>::value)) {
162159
detail::submit_kernel_direct_single_task<KernelName>(
163-
std::move(Q), empty_properties_t{}, KernelObj, CodeLoc);
160+
std::move(Q), KernelObj, empty_properties_t{}, CodeLoc);
164161
} else {
165162
submit(
166163
std::move(Q),
@@ -271,17 +268,13 @@ template <typename KernelName = sycl::detail::auto_name, int Dimensions,
271268
typename KernelType, typename... ReductionsT>
272269
void nd_launch(queue Q, nd_range<Dimensions> Range, const KernelType &KernelObj,
273270
ReductionsT &&...Reductions) {
274-
// TODO The handler-less path does not support reductions, kernel
275-
// function properties and kernel functions with the kernel_handler
276-
// type argument yet.
271+
// TODO The handler-less path does not support reductions, and
272+
// kernel functions with the kernel_handler type argument yet.
277273
if constexpr (sizeof...(ReductionsT) == 0 &&
278-
!(ext::oneapi::experimental::detail::
279-
HasKernelPropertiesGetMethod<
280-
const KernelType &>::value) &&
281274
!(detail::KernelLambdaHasKernelHandlerArgT<
282275
KernelType, sycl::nd_item<Dimensions>>::value)) {
283-
detail::submit_kernel_direct_parallel_for<KernelName>(
284-
std::move(Q), empty_properties_t{}, Range, KernelObj);
276+
detail::submit_kernel_direct_parallel_for<KernelName>(std::move(Q), Range,
277+
KernelObj);
285278
} else {
286279
submit(std::move(Q), [&](handler &CGH) {
287280
nd_launch<KernelName>(CGH, Range, KernelObj,
@@ -308,13 +301,25 @@ template <typename KernelName = sycl::detail::auto_name, int Dimensions,
308301
typename Properties, typename KernelType, typename... ReductionsT>
309302
void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
310303
const KernelType &KernelObj, ReductionsT &&...Reductions) {
311-
// TODO This overload of the nd_launch function takes the kernel function
312-
// properties, which are not yet supported for the handler-less path,
313-
// so it only supports handler based submission for now
314-
submit(std::move(Q), [&](handler &CGH) {
315-
nd_launch<KernelName>(CGH, Config, KernelObj,
316-
std::forward<ReductionsT>(Reductions)...);
317-
});
304+
// TODO The handler-less path does not support reductions, and
305+
// kernel functions with the kernel_handler type argument yet.
306+
if constexpr (sizeof...(ReductionsT) == 0 &&
307+
!(detail::KernelLambdaHasKernelHandlerArgT<
308+
KernelType, sycl::nd_item<Dimensions>>::value)) {
309+
310+
ext::oneapi::experimental::detail::LaunchConfigAccess<nd_range<Dimensions>,
311+
Properties>
312+
LaunchConfigAccess(Config);
313+
314+
detail::submit_kernel_direct_parallel_for<KernelName>(
315+
std::move(Q), LaunchConfigAccess.getRange(), KernelObj,
316+
LaunchConfigAccess.getProperties());
317+
} else {
318+
submit(std::move(Q), [&](handler &CGH) {
319+
nd_launch<KernelName>(CGH, Config, KernelObj,
320+
std::forward<ReductionsT>(Reductions)...);
321+
});
322+
}
318323
}
319324

320325
template <int Dimensions, typename... ArgsT>

sycl/include/sycl/khr/free_function_commands.hpp

Lines changed: 20 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -157,16 +157,12 @@ template <typename KernelType, typename = typename std::enable_if_t<
157157
void launch_grouped(const queue &q, range<1> r, range<1> size, KernelType &&k,
158158
const sycl::detail::code_location &codeLoc =
159159
sycl::detail::code_location::current()) {
160-
// TODO The handler-less path does not support kernel function properties
161-
// and kernel functions with the kernel_handler type argument yet.
162-
if constexpr (!(ext::oneapi::experimental::detail::
163-
HasKernelPropertiesGetMethod<
164-
const KernelType &>::value) &&
165-
!(detail::KernelLambdaHasKernelHandlerArgT<
160+
// TODO The handler-less path does not support kernel functions with the
161+
// kernel_handler type argument yet.
162+
if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<
166163
KernelType, sycl::nd_item<1>>::value)) {
167-
detail::submit_kernel_direct_parallel_for(
168-
q, ext::oneapi::experimental::empty_properties_t{},
169-
nd_range<1>(r, size), std::forward<KernelType>(k));
164+
detail::submit_kernel_direct_parallel_for(q, nd_range<1>(r, size),
165+
std::forward<KernelType>(k));
170166
} else {
171167
submit(
172168
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); },
@@ -178,16 +174,12 @@ template <typename KernelType, typename = typename std::enable_if_t<
178174
void launch_grouped(const queue &q, range<2> r, range<2> size, KernelType &&k,
179175
const sycl::detail::code_location &codeLoc =
180176
sycl::detail::code_location::current()) {
181-
// TODO The handler-less path does not support kernel function properties
182-
// and kernel functions with the kernel_handler type argument yet.
183-
if constexpr (!(ext::oneapi::experimental::detail::
184-
HasKernelPropertiesGetMethod<
185-
const KernelType &>::value) &&
186-
!(detail::KernelLambdaHasKernelHandlerArgT<
177+
// TODO The handler-less path does not support kernel functions with the
178+
// kernel_handler type argument yet.
179+
if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<
187180
KernelType, sycl::nd_item<2>>::value)) {
188-
detail::submit_kernel_direct_parallel_for(
189-
q, ext::oneapi::experimental::empty_properties_t{},
190-
nd_range<2>(r, size), std::forward<KernelType>(k));
181+
detail::submit_kernel_direct_parallel_for(q, nd_range<2>(r, size),
182+
std::forward<KernelType>(k));
191183
} else {
192184
submit(
193185
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); },
@@ -199,16 +191,12 @@ template <typename KernelType, typename = typename std::enable_if_t<
199191
void launch_grouped(const queue &q, range<3> r, range<3> size, KernelType &&k,
200192
const sycl::detail::code_location &codeLoc =
201193
sycl::detail::code_location::current()) {
202-
// TODO The handler-less path does not support kernel function properties
203-
// and kernel functions with the kernel_handler type argument yet.
204-
if constexpr (!(ext::oneapi::experimental::detail::
205-
HasKernelPropertiesGetMethod<
206-
const KernelType &>::value) &&
207-
!(detail::KernelLambdaHasKernelHandlerArgT<
194+
// TODO The handler-less path does not support kernel functions with the
195+
// kernel_handler type argument yet.
196+
if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<
208197
KernelType, sycl::nd_item<3>>::value)) {
209-
detail::submit_kernel_direct_parallel_for(
210-
q, ext::oneapi::experimental::empty_properties_t{},
211-
nd_range<3>(r, size), std::forward<KernelType>(k));
198+
detail::submit_kernel_direct_parallel_for(q, nd_range<3>(r, size),
199+
std::forward<KernelType>(k));
212200
} else {
213201
submit(
214202
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); },
@@ -324,16 +312,13 @@ template <typename KernelType, typename = typename std::enable_if_t<
324312
void launch_task(const sycl::queue &q, KernelType &&k,
325313
const sycl::detail::code_location &codeLoc =
326314
sycl::detail::code_location::current()) {
327-
// TODO The handler-less path does not support kernel function properties
328-
// and kernel functions with the kernel_handler type argument yet.
329-
if constexpr (!(ext::oneapi::experimental::detail::
330-
HasKernelPropertiesGetMethod<
331-
const KernelType &>::value) &&
332-
!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
315+
// TODO The handler-less path does not support kernel functions with the
316+
// kernel_handler type argument yet.
317+
if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
333318
void>::value)) {
334319
detail::submit_kernel_direct_single_task(
335-
q, ext::oneapi::experimental::empty_properties_t{},
336-
std::forward<KernelType>(k), codeLoc);
320+
q, std::forward<KernelType>(k),
321+
ext::oneapi::experimental::empty_properties_t{}, codeLoc);
337322
} else {
338323
submit(q, [&](handler &h) { launch_task<KernelType>(h, k); }, codeLoc);
339324
}

sycl/include/sycl/queue.hpp

Lines changed: 65 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,15 @@ event __SYCL_EXPORT submit_kernel_direct_with_event_impl(
6868
const queue &Queue, const nd_range<Dims> &Range,
6969
detail::HostKernelRefBase &HostKernel,
7070
detail::DeviceKernelInfo *DeviceKernelInfo,
71+
const detail::KernelPropertyHolderStructTy &Props,
7172
const detail::code_location &CodeLoc, bool IsTopCodeLoc);
7273

7374
template <int Dims>
7475
void __SYCL_EXPORT submit_kernel_direct_without_event_impl(
7576
const queue &Queue, const nd_range<Dims> &Range,
7677
detail::HostKernelRefBase &HostKernel,
7778
detail::DeviceKernelInfo *DeviceKernelInfo,
79+
const detail::KernelPropertyHolderStructTy &Props,
7880
const detail::code_location &CodeLoc, bool IsTopCodeLoc);
7981

8082
namespace detail {
@@ -159,16 +161,14 @@ class __SYCL_EXPORT SubmissionInfo {
159161

160162
template <detail::WrapAs WrapAs, typename LambdaArgType,
161163
typename KernelName = detail::auto_name, bool EventNeeded = false,
162-
typename PropertiesT, typename KernelTypeUniversalRef, int Dims>
164+
typename PropertiesT = ext::oneapi::experimental::empty_properties_t,
165+
typename KernelTypeUniversalRef, int Dims>
163166
auto submit_kernel_direct(
164-
const queue &Queue, [[maybe_unused]] PropertiesT Props,
165-
const nd_range<Dims> &Range, KernelTypeUniversalRef &&KernelFunc,
167+
const queue &Queue, const nd_range<Dims> &Range,
168+
KernelTypeUniversalRef &&KernelFunc,
169+
const PropertiesT &ExtraProps =
170+
ext::oneapi::experimental::empty_properties_t{},
166171
const detail::code_location &CodeLoc = detail::code_location::current()) {
167-
// TODO Properties not supported yet
168-
static_assert(
169-
std::is_same_v<PropertiesT,
170-
ext::oneapi::experimental::empty_properties_t>,
171-
"Setting properties not supported yet for no-CGH kernel submit.");
172172
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
173173

174174
using KernelType =
@@ -210,22 +210,42 @@ auto submit_kernel_direct(
210210
"-fsycl-host-compiler-options='/std:c++latest' "
211211
"might also help.");
212212

213+
detail::KernelPropertyHolderStructTy ParsedProperties;
214+
if constexpr (ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod<
215+
const KernelType &>::value) {
216+
// Merge properties via get() and manually specified properties.
217+
// get() method is used for specifying kernel properties but properties
218+
// passed via launch_config (ExtraProps) should be kernel launch properties.
219+
// They are mutually exclusive, so there should not be any conflict when
220+
// merging properties. merge_properties() throws if there's a conflict.
221+
auto MergedProps =
222+
sycl::ext::oneapi::experimental::detail::merge_properties(
223+
ExtraProps,
224+
KernelFunc.get(ext::oneapi::experimental::properties_tag{}));
225+
226+
ParsedProperties = extractKernelProperties(MergedProps);
227+
} else {
228+
ParsedProperties = extractKernelProperties(ExtraProps);
229+
}
230+
213231
if constexpr (EventNeeded) {
214232
return submit_kernel_direct_with_event_impl(
215-
Queue, Range, HostKernel, DeviceKernelInfoPtr,
233+
Queue, Range, HostKernel, DeviceKernelInfoPtr, ParsedProperties,
216234
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
217235
} else {
218236
submit_kernel_direct_without_event_impl(
219-
Queue, Range, HostKernel, DeviceKernelInfoPtr,
237+
Queue, Range, HostKernel, DeviceKernelInfoPtr, ParsedProperties,
220238
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
221239
}
222240
}
223241

224242
template <typename KernelName = detail::auto_name, bool EventNeeded = false,
225-
typename PropertiesT, typename KernelTypeUniversalRef, int Dims>
243+
typename PropertiesT = ext::oneapi::experimental::empty_properties_t,
244+
typename KernelTypeUniversalRef, int Dims>
226245
auto submit_kernel_direct_parallel_for(
227-
const queue &Queue, PropertiesT Props, const nd_range<Dims> &Range,
246+
const queue &Queue, const nd_range<Dims> &Range,
228247
KernelTypeUniversalRef &&KernelFunc,
248+
const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t{},
229249
const detail::code_location &CodeLoc = detail::code_location::current()) {
230250

231251
using KernelType =
@@ -246,21 +266,23 @@ auto submit_kernel_direct_parallel_for(
246266
return submit_kernel_direct<detail::WrapAs::parallel_for, TransformedArgType,
247267
KernelName, EventNeeded, PropertiesT,
248268
KernelTypeUniversalRef, Dims>(
249-
Queue, Props, Range, std::forward<KernelTypeUniversalRef>(KernelFunc),
269+
Queue, Range, std::forward<KernelTypeUniversalRef>(KernelFunc), Props,
250270
CodeLoc);
251271
}
252272

253273
template <typename KernelName = detail::auto_name, bool EventNeeded = false,
254-
typename PropertiesT, typename KernelTypeUniversalRef>
274+
typename PropertiesT = ext::oneapi::experimental::empty_properties_t,
275+
typename KernelTypeUniversalRef>
255276
auto submit_kernel_direct_single_task(
256-
const queue &Queue, PropertiesT Props, KernelTypeUniversalRef &&KernelFunc,
277+
const queue &Queue, KernelTypeUniversalRef &&KernelFunc,
278+
const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t{},
257279
const detail::code_location &CodeLoc = detail::code_location::current()) {
258280

259281
return submit_kernel_direct<detail::WrapAs::single_task, void, KernelName,
260282
EventNeeded, PropertiesT, KernelTypeUniversalRef,
261283
1>(
262-
Queue, Props, nd_range<1>{1, 1},
263-
std::forward<KernelTypeUniversalRef>(KernelFunc), CodeLoc);
284+
Queue, nd_range<1>{1, 1},
285+
std::forward<KernelTypeUniversalRef>(KernelFunc), Props, CodeLoc);
264286
}
265287

266288
} // namespace detail
@@ -2775,18 +2797,12 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
27752797

27762798
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
27772799

2778-
// TODO The handler-less path does not support kernel
2779-
// function properties and kernel functions with the kernel_handler
2780-
// type argument yet.
2781-
if constexpr (
2782-
std::is_same_v<PropertiesT,
2783-
ext::oneapi::experimental::empty_properties_t> &&
2784-
!(ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod<
2785-
const KernelType &>::value) &&
2786-
!(detail::KernelLambdaHasKernelHandlerArgT<KernelType, void>::value)) {
2800+
// TODO The handler-less path does not support kernel functions
2801+
// with the kernel_handler type argument yet.
2802+
if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
2803+
void>::value)) {
27872804
return detail::submit_kernel_direct_single_task<KernelName, true>(
2788-
*this, ext::oneapi::experimental::empty_properties_t{}, KernelFunc,
2789-
TlsCodeLocCapture.query());
2805+
*this, KernelFunc, Properties, TlsCodeLocCapture.query());
27902806
} else {
27912807
return submit(
27922808
[&](handler &CGH) {
@@ -3323,11 +3339,22 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
33233339
RestT &&...Rest) {
33243340
constexpr detail::code_location CodeLoc = getCodeLocation<KernelName>();
33253341
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
3326-
return submit(
3327-
[&](handler &CGH) {
3328-
CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
3329-
},
3330-
TlsCodeLocCapture.query());
3342+
using KernelType = std::tuple_element_t<0, std::tuple<RestT...>>;
3343+
3344+
// TODO The handler-less path does not support reductions, and
3345+
// kernel functions with the kernel_handler type argument yet.
3346+
if constexpr (sizeof...(RestT) == 1 &&
3347+
!(detail::KernelLambdaHasKernelHandlerArgT<
3348+
KernelType, sycl::nd_item<Dims>>::value)) {
3349+
3350+
return detail::submit_kernel_direct_parallel_for<KernelName, true>(
3351+
*this, Range, Rest..., Properties, TlsCodeLocCapture.query());
3352+
} else
3353+
return submit(
3354+
[&](handler &CGH) {
3355+
CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
3356+
},
3357+
TlsCodeLocCapture.query());
33313358
}
33323359

33333360
/// parallel_for version with a kernel represented as a lambda + nd_range that
@@ -3344,18 +3371,15 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
33443371
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
33453372
using KernelType = std::tuple_element_t<0, std::tuple<RestT...>>;
33463373

3347-
// TODO The handler-less path does not support reductions, kernel
3348-
// function properties and kernel functions with the kernel_handler
3349-
// type argument yet.
3374+
// TODO The handler-less path does not support reductions, and
3375+
// kernel functions with the kernel_handler type argument yet.
33503376
if constexpr (sizeof...(RestT) == 1 &&
3351-
!(ext::oneapi::experimental::detail::
3352-
HasKernelPropertiesGetMethod<
3353-
const KernelType &>::value) &&
33543377
!(detail::KernelLambdaHasKernelHandlerArgT<
33553378
KernelType, sycl::nd_item<Dims>>::value)) {
33563379
return detail::submit_kernel_direct_parallel_for<KernelName, true>(
3357-
*this, ext::oneapi::experimental::empty_properties_t{}, Range,
3358-
Rest..., TlsCodeLocCapture.query());
3380+
*this, Range, Rest...,
3381+
ext::oneapi::experimental::empty_properties_t{},
3382+
TlsCodeLocCapture.query());
33593383
} else {
33603384
return submit(
33613385
[&](handler &CGH) {

sycl/source/detail/queue_impl.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,13 +567,19 @@ EventImplPtr queue_impl::submit_command_to_graph(
567567
EventImplPtr queue_impl::submit_kernel_direct_impl(
568568
const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel,
569569
detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent,
570+
const detail::KernelPropertyHolderStructTy &Props,
570571
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
571572

572573
KernelData KData;
573574

574575
KData.setDeviceKernelInfoPtr(DeviceKernelInfo);
575576
KData.setNDRDesc(NDRDesc);
576577

578+
// Validate and set kernel launch properties.
579+
KData.validateAndSetKernelLaunchProperties(
580+
Props, getCommandGraph() != nullptr /*HasGraph?*/,
581+
getDeviceImpl() /*device_impl*/);
582+
577583
auto SubmitKernelFunc = [&](detail::CG::StorageInitHelper &CGData,
578584
bool SchedulerBypass) -> EventImplPtr {
579585
if (SchedulerBypass) {

0 commit comments

Comments
 (0)