Skip to content
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
197 changes: 190 additions & 7 deletions sycl/include/sycl/detail/kernel_launch_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
#include <sycl/detail/is_device_copyable.hpp>
#include <sycl/ext/intel/experimental/fp_control_kernel_properties.hpp>
#include <sycl/ext/intel/experimental/kernel_execution_properties.hpp>
#include <sycl/ext/oneapi/experimental/cluster_group_prop.hpp>
#include <sycl/ext/oneapi/experimental/graph.hpp>
#include <sycl/ext/oneapi/experimental/use_root_sync_prop.hpp>
#include <sycl/ext/oneapi/experimental/virtual_functions.hpp>
#include <sycl/ext/oneapi/kernel_properties/properties.hpp>
#include <sycl/ext/oneapi/work_group_scratch_memory.hpp>
Expand Down Expand Up @@ -253,21 +256,201 @@ struct KernelWrapper<
}
}; // KernelWrapper struct

struct KernelLaunchPropertyWrapper {
template <typename KernelName, typename PropertyProcessor,
typename KernelType>
static void parseProperties([[maybe_unused]] PropertyProcessor h,
[[maybe_unused]] const KernelType &KernelFunc) {
// This class encapsulates everything related to parsing kernel launch
// properties.
class KernelLaunchPropertyWrapper {
public:
// This struct is used to store kernel launch properties.
// std::optional is used to indicate that the property is not set.
// In some code paths, kernel launch properties are set multiple times
// for the same kernel, that is why using std::optional to avoid overriding
// previously set properties.
// This struct is used to pass kernel launch properties across the ABI
// boundary.
struct KernelLaunchPropertiesT {
struct ScopeForwardProgressProperty {
std::optional<sycl::ext::oneapi::experimental::forward_progress_guarantee>
Guarantee;
std::optional<sycl::ext::oneapi::experimental::execution_scope> ExecScope;
std::optional<sycl::ext::oneapi::experimental::execution_scope>
CoordinationScope;
};

std::optional<ur_kernel_cache_config_t> MCacheConfig = std::nullopt;
std::optional<bool> MIsCooperative = std::nullopt;
std::optional<uint32_t> MWorkGroupMemorySize = std::nullopt;
std::optional<bool> MUsesClusterLaunch = std::nullopt;
size_t MClusterDims = 0;
std::array<size_t, 3> MClusterSize = {0, 0, 0};

// Forward progress guarantee properties for work_item, sub_group and
// work_group scopes. We need to store them for validation later.
std::array<ScopeForwardProgressProperty, 3> MForwardProgressProperties;
};

/// Process runtime kernel properties.
template <typename PropertiesT>
static KernelLaunchPropertiesT
processKernelLaunchProperties(PropertiesT Props) {
using namespace sycl::ext::oneapi::experimental;
using namespace sycl::ext::oneapi::experimental::detail;
KernelLaunchPropertiesT retval;

// Process Kernel cache configuration property.
{
if constexpr (PropertiesT::template has_property<
sycl::ext::intel::experimental::cache_config_key>()) {
auto Config = Props.template get_property<
sycl::ext::intel::experimental::cache_config_key>();
if (Config == sycl::ext::intel::experimental::large_slm) {
retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM;
} else if (Config == sycl::ext::intel::experimental::large_data) {
retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA;
}
} else {
std::ignore = Props;
}
}

// Process Kernel cooperative property.
{
if constexpr (PropertiesT::template has_property<use_root_sync_key>())
retval.MIsCooperative = true;
}

// Process device progress properties.
{
using forward_progress =
sycl::ext::oneapi::experimental::forward_progress_guarantee;
if constexpr (PropertiesT::template has_property<
work_group_progress_key>()) {
auto prop = Props.template get_property<work_group_progress_key>();
retval.MForwardProgressProperties[0].Guarantee = prop.guarantee;
retval.MForwardProgressProperties[0].ExecScope =
execution_scope::work_group;
retval.MForwardProgressProperties[0].CoordinationScope =
prop.coordinationScope;

// If we are here, the device supports the guarantee required but there
// is a caveat in that if the guarantee required is a concurrent
// guarantee, then we most likely also need to enable cooperative launch
// of the kernel. That is, although the device supports the required
// guarantee, some setup work is needed to truly make the device provide
// that guarantee at runtime. Otherwise, we will get the default
// guarantee which is weaker than concurrent. Same reasoning applies for
// sub_group but not for work_item.
// TODO: Further design work is probably needed to reflect this behavior
// in Unified Runtime.
if constexpr (prop.guarantee == forward_progress::concurrent)
retval.MIsCooperative = true;
}
if constexpr (PropertiesT::template has_property<
sub_group_progress_key>()) {
auto prop = Props.template get_property<sub_group_progress_key>();
retval.MForwardProgressProperties[1].Guarantee = prop.guarantee;
retval.MForwardProgressProperties[1].ExecScope =
execution_scope::sub_group;
retval.MForwardProgressProperties[1].CoordinationScope =
prop.coordinationScope;

// Same reasoning as above for work_group applies here.
if constexpr (prop.guarantee == forward_progress::concurrent)
retval.MIsCooperative = true;
}
if constexpr (PropertiesT::template has_property<
work_item_progress_key>()) {
auto prop = Props.template get_property<work_item_progress_key>();
retval.MForwardProgressProperties[2].Guarantee = prop.guarantee;
retval.MForwardProgressProperties[2].ExecScope =
execution_scope::work_item;
retval.MForwardProgressProperties[2].CoordinationScope =
prop.coordinationScope;
}
}

// Process work group scratch memory property.
{
if constexpr (PropertiesT::template has_property<
work_group_scratch_size>()) {
auto WorkGroupMemSize =
Props.template get_property<work_group_scratch_size>();
retval.MWorkGroupMemorySize = WorkGroupMemSize.size;
}
}

// Parse cluster properties.
{
constexpr std::size_t ClusterDim = getClusterDim<PropertiesT>();
if constexpr (ClusterDim > 0) {

auto ClusterSize =
Props.template get_property<cuda::cluster_size_key<ClusterDim>>()
.get_cluster_size();
retval.MUsesClusterLaunch = true;
retval.MClusterDims = ClusterDim;
if (ClusterDim == 1) {
retval.MClusterSize[0] = ClusterSize[0];
} else if (ClusterDim == 2) {
retval.MClusterSize[0] = ClusterSize[0];
retval.MClusterSize[1] = ClusterSize[1];
} else if (ClusterDim == 3) {
retval.MClusterSize[0] = ClusterSize[0];
retval.MClusterSize[1] = ClusterSize[1];
retval.MClusterSize[2] = ClusterSize[2];
} else {
assert(ClusterDim <= 3 &&
"Only 1D, 2D, and 3D cluster launch is supported.");
}
}
}

return retval;
}

/// Process kernel properties.
/// Note: it is important that this function *does not* depend on kernel
/// name or kernel type, because then it will be instantiated for every
/// kernel, even though body of those instantiated functions could be almost
/// the same, thus unnecessary increasing compilation time.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that case, shouldn't it be

template <...>
inline constexpr KernelLaunchPropertiesT WhateverName = <...>;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's copy-paste code. We anyway don't depend on KernelName in this function, this comment is just a warning about not changing this function to take KernelName as template param.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think your changes are extensive enough in the exact same area that I'd like to see this changed as part of your patch [series].

template <
bool IsESIMDKernel,
typename PropertiesT = ext::oneapi::experimental::empty_properties_t>
static KernelLaunchPropertiesT processKernelProperties(PropertiesT Props) {
static_assert(
ext::oneapi::experimental::is_property_list<PropertiesT>::value,
"Template type is not a property list.");
static_assert(
!PropertiesT::template has_property<
sycl::ext::intel::experimental::fp_control_key>() ||
(PropertiesT::template has_property<
sycl::ext::intel::experimental::fp_control_key>() &&
IsESIMDKernel),
"Floating point control property is supported for ESIMD kernels only.");
static_assert(
!PropertiesT::template has_property<
sycl::ext::oneapi::experimental::indirectly_callable_key>(),
"indirectly_callable property cannot be applied to SYCL kernels");

return processKernelLaunchProperties(Props);
}

// Returns KernelLaunchPropertiesT or std::nullopt based on whether the
// kernel functor has a get method that returns properties.
template <typename KernelName, bool isESIMD, typename KernelType>
static std::optional<KernelLaunchPropertiesT>
parseProperties([[maybe_unused]] const KernelType &KernelFunc) {
#ifndef __SYCL_DEVICE_ONLY__
// If there are properties provided by get method then process them.
if constexpr (ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<const KernelType &>::value) {

h->template processProperties<
detail::CompileTimeKernelInfo<KernelName>.IsESIMD>(
return processKernelProperties<isESIMD>(
KernelFunc.get(ext::oneapi::experimental::properties_tag{}));
}
#endif
// If there are no properties provided by get method then return empty
// optional.
return std::nullopt;
}
}; // KernelLaunchPropertyWrapper struct

Expand Down
37 changes: 22 additions & 15 deletions sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,17 +259,12 @@ template <typename KernelName = sycl::detail::auto_name, int Dimensions,
typename KernelType, typename... ReductionsT>
void nd_launch(queue Q, nd_range<Dimensions> Range, const KernelType &KernelObj,
ReductionsT &&...Reductions) {
// TODO The handler-less path does not support reductions, kernel
// function properties and kernel functions with the kernel_handler
// type argument yet.
// TODO The handler-less path does not support reductions, and
// kernel functions with the kernel_handler type argument yet.
if constexpr (sizeof...(ReductionsT) == 0 &&
!(ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
const KernelType &>::value) &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dimensions>>::value)) {
detail::submit_kernel_direct<KernelName>(std::move(Q), empty_properties_t{},
Range, KernelObj);
detail::submit_kernel_direct<KernelName>(std::move(Q), Range, KernelObj);
} else {
submit(std::move(Q), [&](handler &CGH) {
nd_launch<KernelName>(CGH, Range, KernelObj,
Expand All @@ -296,13 +291,25 @@ template <typename KernelName = sycl::detail::auto_name, int Dimensions,
typename Properties, typename KernelType, typename... ReductionsT>
void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
const KernelType &KernelObj, ReductionsT &&...Reductions) {
// TODO This overload of the nd_launch function takes the kernel function
// properties, which are not yet supported for the handler-less path,
// so it only supports handler based submission for now
submit(std::move(Q), [&](handler &CGH) {
nd_launch<KernelName>(CGH, Config, KernelObj,
std::forward<ReductionsT>(Reductions)...);
});
// TODO The handler-less path does not support reductions, and
// kernel functions with the kernel_handler type argument yet.
if constexpr (sizeof...(ReductionsT) == 0 &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dimensions>>::value)) {

ext::oneapi::experimental::detail::LaunchConfigAccess<nd_range<Dimensions>,
Properties>
ConfigAccess(Config);

detail::submit_kernel_direct<KernelName>(std::move(Q),
ConfigAccess.getRange(), KernelObj,
Config.getProperties());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is note in https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc#launch-configuration which says, that only kernel launch properties can be used here. I wonder if this affect the properties parsing logic in some way?

} else {
submit(std::move(Q), [&](handler &CGH) {
nd_launch<KernelName>(CGH, Config, KernelObj,
std::forward<ReductionsT>(Reductions)...);
});
}
}

template <int Dimensions, typename... ArgsT>
Expand Down
Loading
Loading