Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 27 additions & 22 deletions sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,15 +152,12 @@ template <typename KernelName = sycl::detail::auto_name, typename KernelType>
void single_task(queue Q, const KernelType &KernelObj,
const sycl::detail::code_location &CodeLoc =
sycl::detail::code_location::current()) {
// TODO The handler-less path does not support kernel function properties
// and kernel functions with the kernel_handler type argument yet.
if constexpr (!(ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
// TODO The handler-less path does not support kernel functions with the
// kernel_handler type argument yet.
if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
void>::value)) {
detail::submit_kernel_direct_single_task<KernelName>(
std::move(Q), empty_properties_t{}, KernelObj, CodeLoc);
std::move(Q), KernelObj, empty_properties_t{}, CodeLoc);
} else {
submit(
std::move(Q),
Expand Down Expand Up @@ -271,17 +268,13 @@ template <typename KernelName = sycl::detail::auto_name, int Dimensions,
typename KernelType, typename... ReductionsT>
void nd_launch(queue Q, nd_range<Dimensions> Range, const KernelType &KernelObj,
ReductionsT &&...Reductions) {
// TODO The handler-less path does not support reductions, kernel
// function properties and kernel functions with the kernel_handler
// type argument yet.
// TODO The handler-less path does not support reductions, and
// kernel functions with the kernel_handler type argument yet.
if constexpr (sizeof...(ReductionsT) == 0 &&
!(ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dimensions>>::value)) {
detail::submit_kernel_direct_parallel_for<KernelName>(
std::move(Q), empty_properties_t{}, Range, KernelObj);
detail::submit_kernel_direct_parallel_for<KernelName>(std::move(Q), Range,
KernelObj);
} else {
submit(std::move(Q), [&](handler &CGH) {
nd_launch<KernelName>(CGH, Range, KernelObj,
Expand All @@ -308,13 +301,25 @@ template <typename KernelName = sycl::detail::auto_name, int Dimensions,
typename Properties, typename KernelType, typename... ReductionsT>
void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
const KernelType &KernelObj, ReductionsT &&...Reductions) {
// TODO This overload of the nd_launch function takes the kernel function
// properties, which are not yet supported for the handler-less path,
// so it only supports handler based submission for now
submit(std::move(Q), [&](handler &CGH) {
nd_launch<KernelName>(CGH, Config, KernelObj,
std::forward<ReductionsT>(Reductions)...);
});
// TODO The handler-less path does not support reductions, and
// kernel functions with the kernel_handler type argument yet.
if constexpr (sizeof...(ReductionsT) == 0 &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dimensions>>::value)) {

ext::oneapi::experimental::detail::LaunchConfigAccess<nd_range<Dimensions>,
Properties>
LaunchConfigAccess(Config);

detail::submit_kernel_direct_parallel_for<KernelName>(
std::move(Q), LaunchConfigAccess.getRange(), KernelObj,
LaunchConfigAccess.getProperties());
} else {
submit(std::move(Q), [&](handler &CGH) {
nd_launch<KernelName>(CGH, Config, KernelObj,
std::forward<ReductionsT>(Reductions)...);
});
}
}

template <int Dimensions, typename... ArgsT>
Expand Down
55 changes: 20 additions & 35 deletions sycl/include/sycl/khr/free_function_commands.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,16 +157,12 @@ template <typename KernelType, typename = typename std::enable_if_t<
void launch_grouped(const queue &q, range<1> r, range<1> size, KernelType &&k,
const sycl::detail::code_location &codeLoc =
sycl::detail::code_location::current()) {
// TODO The handler-less path does not support kernel function properties
// and kernel functions with the kernel_handler type argument yet.
if constexpr (!(ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<
// TODO The handler-less path does not support kernel functions with the
// kernel_handler type argument yet.
if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<1>>::value)) {
detail::submit_kernel_direct_parallel_for(
q, ext::oneapi::experimental::empty_properties_t{},
nd_range<1>(r, size), std::forward<KernelType>(k));
detail::submit_kernel_direct_parallel_for(q, nd_range<1>(r, size),
std::forward<KernelType>(k));
} else {
submit(
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); },
Expand All @@ -178,16 +174,12 @@ template <typename KernelType, typename = typename std::enable_if_t<
void launch_grouped(const queue &q, range<2> r, range<2> size, KernelType &&k,
const sycl::detail::code_location &codeLoc =
sycl::detail::code_location::current()) {
// TODO The handler-less path does not support kernel function properties
// and kernel functions with the kernel_handler type argument yet.
if constexpr (!(ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<
// TODO The handler-less path does not support kernel functions with the
// kernel_handler type argument yet.
if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<2>>::value)) {
detail::submit_kernel_direct_parallel_for(
q, ext::oneapi::experimental::empty_properties_t{},
nd_range<2>(r, size), std::forward<KernelType>(k));
detail::submit_kernel_direct_parallel_for(q, nd_range<2>(r, size),
std::forward<KernelType>(k));
} else {
submit(
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); },
Expand All @@ -199,16 +191,12 @@ template <typename KernelType, typename = typename std::enable_if_t<
void launch_grouped(const queue &q, range<3> r, range<3> size, KernelType &&k,
const sycl::detail::code_location &codeLoc =
sycl::detail::code_location::current()) {
// TODO The handler-less path does not support kernel function properties
// and kernel functions with the kernel_handler type argument yet.
if constexpr (!(ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<
// TODO The handler-less path does not support kernel functions with the
// kernel_handler type argument yet.
if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<3>>::value)) {
detail::submit_kernel_direct_parallel_for(
q, ext::oneapi::experimental::empty_properties_t{},
nd_range<3>(r, size), std::forward<KernelType>(k));
detail::submit_kernel_direct_parallel_for(q, nd_range<3>(r, size),
std::forward<KernelType>(k));
} else {
submit(
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); },
Expand Down Expand Up @@ -324,16 +312,13 @@ template <typename KernelType, typename = typename std::enable_if_t<
void launch_task(const sycl::queue &q, KernelType &&k,
const sycl::detail::code_location &codeLoc =
sycl::detail::code_location::current()) {
// TODO The handler-less path does not support kernel function properties
// and kernel functions with the kernel_handler type argument yet.
if constexpr (!(ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
// TODO The handler-less path does not support kernel functions with the
// kernel_handler type argument yet.
if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
void>::value)) {
detail::submit_kernel_direct_single_task(
q, ext::oneapi::experimental::empty_properties_t{},
std::forward<KernelType>(k), codeLoc);
q, std::forward<KernelType>(k),
ext::oneapi::experimental::empty_properties_t{}, codeLoc);
} else {
submit(q, [&](handler &h) { launch_task<KernelType>(h, k); }, codeLoc);
}
Expand Down
106 changes: 65 additions & 41 deletions sycl/include/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,15 @@ event __SYCL_EXPORT submit_kernel_direct_with_event_impl(
const queue &Queue, const nd_range<Dims> &Range,
detail::HostKernelRefBase &HostKernel,
detail::DeviceKernelInfo *DeviceKernelInfo,
const detail::KernelPropertyHolderStructTy &Props,
const detail::code_location &CodeLoc, bool IsTopCodeLoc);

template <int Dims>
void __SYCL_EXPORT submit_kernel_direct_without_event_impl(
const queue &Queue, const nd_range<Dims> &Range,
detail::HostKernelRefBase &HostKernel,
detail::DeviceKernelInfo *DeviceKernelInfo,
const detail::KernelPropertyHolderStructTy &Props,
const detail::code_location &CodeLoc, bool IsTopCodeLoc);

namespace detail {
Expand Down Expand Up @@ -159,16 +161,14 @@ class __SYCL_EXPORT SubmissionInfo {

template <detail::WrapAs WrapAs, typename LambdaArgType,
typename KernelName = detail::auto_name, bool EventNeeded = false,
typename PropertiesT, typename KernelTypeUniversalRef, int Dims>
typename PropertiesT = ext::oneapi::experimental::empty_properties_t,
typename KernelTypeUniversalRef, int Dims>
auto submit_kernel_direct(
const queue &Queue, [[maybe_unused]] PropertiesT Props,
const nd_range<Dims> &Range, KernelTypeUniversalRef &&KernelFunc,
const queue &Queue, const nd_range<Dims> &Range,
KernelTypeUniversalRef &&KernelFunc,
const PropertiesT &ExtraProps =
ext::oneapi::experimental::empty_properties_t{},
const detail::code_location &CodeLoc = detail::code_location::current()) {
// TODO Properties not supported yet
static_assert(
std::is_same_v<PropertiesT,
ext::oneapi::experimental::empty_properties_t>,
"Setting properties not supported yet for no-CGH kernel submit.");
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);

using KernelType =
Expand Down Expand Up @@ -210,22 +210,42 @@ auto submit_kernel_direct(
"-fsycl-host-compiler-options='/std:c++latest' "
"might also help.");

detail::KernelPropertyHolderStructTy ParsedProperties;
if constexpr (ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod<
const KernelType &>::value) {
// Merge properties via get() and manually specified properties.
// get() method is used for specifying kernel properties but properties
// passed via launch_config (ExtraProps) should be kernel launch properties.
// They are mutually exclusive, so there should not be any conflict when
// merging properties. merge_properties() throws if there's a conflict.
auto MergedProps =
sycl::ext::oneapi::experimental::detail::merge_properties(
ExtraProps,
KernelFunc.get(ext::oneapi::experimental::properties_tag{}));

ParsedProperties = extractKernelProperties(MergedProps);
} else {
ParsedProperties = extractKernelProperties(ExtraProps);
}

if constexpr (EventNeeded) {
return submit_kernel_direct_with_event_impl(
Queue, Range, HostKernel, DeviceKernelInfoPtr,
Queue, Range, HostKernel, DeviceKernelInfoPtr, ParsedProperties,
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
} else {
submit_kernel_direct_without_event_impl(
Queue, Range, HostKernel, DeviceKernelInfoPtr,
Queue, Range, HostKernel, DeviceKernelInfoPtr, ParsedProperties,
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
}
}

template <typename KernelName = detail::auto_name, bool EventNeeded = false,
typename PropertiesT, typename KernelTypeUniversalRef, int Dims>
typename PropertiesT = ext::oneapi::experimental::empty_properties_t,
typename KernelTypeUniversalRef, int Dims>
auto submit_kernel_direct_parallel_for(
const queue &Queue, PropertiesT Props, const nd_range<Dims> &Range,
const queue &Queue, const nd_range<Dims> &Range,
KernelTypeUniversalRef &&KernelFunc,
const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t{},
const detail::code_location &CodeLoc = detail::code_location::current()) {

using KernelType =
Expand All @@ -246,21 +266,23 @@ auto submit_kernel_direct_parallel_for(
return submit_kernel_direct<detail::WrapAs::parallel_for, TransformedArgType,
KernelName, EventNeeded, PropertiesT,
KernelTypeUniversalRef, Dims>(
Queue, Props, Range, std::forward<KernelTypeUniversalRef>(KernelFunc),
Queue, Range, std::forward<KernelTypeUniversalRef>(KernelFunc), Props,
CodeLoc);
}

template <typename KernelName = detail::auto_name, bool EventNeeded = false,
typename PropertiesT, typename KernelTypeUniversalRef>
typename PropertiesT = ext::oneapi::experimental::empty_properties_t,
typename KernelTypeUniversalRef>
auto submit_kernel_direct_single_task(
const queue &Queue, PropertiesT Props, KernelTypeUniversalRef &&KernelFunc,
const queue &Queue, KernelTypeUniversalRef &&KernelFunc,
const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t{},
const detail::code_location &CodeLoc = detail::code_location::current()) {

return submit_kernel_direct<detail::WrapAs::single_task, void, KernelName,
EventNeeded, PropertiesT, KernelTypeUniversalRef,
1>(
Queue, Props, nd_range<1>{1, 1},
std::forward<KernelTypeUniversalRef>(KernelFunc), CodeLoc);
Queue, nd_range<1>{1, 1},
std::forward<KernelTypeUniversalRef>(KernelFunc), Props, CodeLoc);
}

} // namespace detail
Expand Down Expand Up @@ -2775,18 +2797,12 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {

detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);

// TODO The handler-less path does not support kernel
// function properties and kernel functions with the kernel_handler
// type argument yet.
if constexpr (
std::is_same_v<PropertiesT,
ext::oneapi::experimental::empty_properties_t> &&
!(ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod<
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<KernelType, void>::value)) {
// TODO The handler-less path does not support kernel functions
// with the kernel_handler type argument yet.
if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
void>::value)) {
return detail::submit_kernel_direct_single_task<KernelName, true>(
*this, ext::oneapi::experimental::empty_properties_t{}, KernelFunc,
TlsCodeLocCapture.query());
*this, KernelFunc, Properties, TlsCodeLocCapture.query());
} else {
return submit(
[&](handler &CGH) {
Expand Down Expand Up @@ -3323,11 +3339,22 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
RestT &&...Rest) {
constexpr detail::code_location CodeLoc = getCodeLocation<KernelName>();
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return submit(
[&](handler &CGH) {
CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
},
TlsCodeLocCapture.query());
using KernelType = std::tuple_element_t<0, std::tuple<RestT...>>;

// TODO The handler-less path does not support reductions, and
// kernel functions with the kernel_handler type argument yet.
if constexpr (sizeof...(RestT) == 1 &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dims>>::value)) {

return detail::submit_kernel_direct_parallel_for<KernelName, true>(
*this, Range, Rest..., Properties, TlsCodeLocCapture.query());
} else
return submit(
[&](handler &CGH) {
CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
},
TlsCodeLocCapture.query());
}

/// parallel_for version with a kernel represented as a lambda + nd_range that
Expand All @@ -3344,18 +3371,15 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
using KernelType = std::tuple_element_t<0, std::tuple<RestT...>>;

// TODO The handler-less path does not support reductions, kernel
// function properties and kernel functions with the kernel_handler
// type argument yet.
// TODO The handler-less path does not support reductions, and
// kernel functions with the kernel_handler type argument yet.
if constexpr (sizeof...(RestT) == 1 &&
!(ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dims>>::value)) {
return detail::submit_kernel_direct_parallel_for<KernelName, true>(
*this, ext::oneapi::experimental::empty_properties_t{}, Range,
Rest..., TlsCodeLocCapture.query());
*this, Range, Rest...,
ext::oneapi::experimental::empty_properties_t{},
TlsCodeLocCapture.query());
} else {
return submit(
[&](handler &CGH) {
Expand Down
6 changes: 6 additions & 0 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -567,13 +567,19 @@ EventImplPtr queue_impl::submit_command_to_graph(
EventImplPtr queue_impl::submit_kernel_direct_impl(
const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel,
detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent,
const detail::KernelPropertyHolderStructTy &Props,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {

KernelData KData;

KData.setDeviceKernelInfoPtr(DeviceKernelInfo);
KData.setNDRDesc(NDRDesc);

// Validate and set kernel launch properties.
KData.validateAndSetKernelLaunchProperties(
Props, getCommandGraph() != nullptr /*HasGraph?*/,
getDeviceImpl() /*device_impl*/);

auto SubmitKernelFunc = [&](detail::CG::StorageInitHelper &CGData,
bool SchedulerBypass) -> EventImplPtr {
if (SchedulerBypass) {
Expand Down
Loading
Loading