intel · uditagarwal97 · Oct 13, 2025 · Oct 13, 2025 · Oct 13, 2025 · Oct 14, 2025
@@ -14,6 +14,9 @@
 #include <sycl/detail/is_device_copyable.hpp>
 #include <sycl/ext/intel/experimental/fp_control_kernel_properties.hpp>
 #include <sycl/ext/intel/experimental/kernel_execution_properties.hpp>
+#include <sycl/ext/oneapi/experimental/cluster_group_prop.hpp>
+#include <sycl/ext/oneapi/experimental/graph.hpp>
+#include <sycl/ext/oneapi/experimental/use_root_sync_prop.hpp>
 #include <sycl/ext/oneapi/experimental/virtual_functions.hpp>
 #include <sycl/ext/oneapi/kernel_properties/properties.hpp>
 #include <sycl/ext/oneapi/work_group_scratch_memory.hpp>
@@ -253,21 +256,201 @@ struct KernelWrapper<
   }
 }; // KernelWrapper struct
 
-struct KernelLaunchPropertyWrapper {
-  template <typename KernelName, typename PropertyProcessor,
-            typename KernelType>
-  static void parseProperties([[maybe_unused]] PropertyProcessor h,
-                              [[maybe_unused]] const KernelType &KernelFunc) {
+// This class encapsulates everything related to parsing kernel launch
+// properties.
+class KernelLaunchPropertyWrapper {
+public:
+  // This struct is used to store kernel launch properties.
+  // std::optional is used to indicate that the property is not set.
+  // In some code paths, kernel launch properties are set multiple times
+  // for the same kernel, that is why using std::optional to avoid overriding
+  // previously set properties.
+  // This struct is used to pass kernel launch properties across the ABI
+  // boundary.
+  struct KernelLaunchPropertiesT {
+    struct ScopeForwardProgressProperty {
+      std::optional<sycl::ext::oneapi::experimental::forward_progress_guarantee>
+          Guarantee;
+      std::optional<sycl::ext::oneapi::experimental::execution_scope> ExecScope;
+      std::optional<sycl::ext::oneapi::experimental::execution_scope>
+          CoordinationScope;
+    };
+
+    std::optional<ur_kernel_cache_config_t> MCacheConfig = std::nullopt;
+    std::optional<bool> MIsCooperative = std::nullopt;
+    std::optional<uint32_t> MWorkGroupMemorySize = std::nullopt;
+    std::optional<bool> MUsesClusterLaunch = std::nullopt;
+    size_t MClusterDims = 0;
+    std::array<size_t, 3> MClusterSize = {0, 0, 0};
+
+    // Forward progress guarantee properties for work_item, sub_group and
+    // work_group scopes. We need to store them for validation later.
+    std::array<ScopeForwardProgressProperty, 3> MForwardProgressProperties;
+  };
+
+  /// Process runtime kernel properties.
+  template <typename PropertiesT>
+  static KernelLaunchPropertiesT
+  processKernelLaunchProperties(PropertiesT Props) {
+    using namespace sycl::ext::oneapi::experimental;
+    using namespace sycl::ext::oneapi::experimental::detail;
+    KernelLaunchPropertiesT retval;
+
+    // Process Kernel cache configuration property.
+    {
+      if constexpr (PropertiesT::template has_property<
+                        sycl::ext::intel::experimental::cache_config_key>()) {
+        auto Config = Props.template get_property<
+            sycl::ext::intel::experimental::cache_config_key>();
+        if (Config == sycl::ext::intel::experimental::large_slm) {
+          retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM;
+        } else if (Config == sycl::ext::intel::experimental::large_data) {
+          retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA;
+        }
+      } else {
+        std::ignore = Props;
+      }
+    }
+
+    // Process Kernel cooperative property.
+    {
+      if constexpr (PropertiesT::template has_property<use_root_sync_key>())
+        retval.MIsCooperative = true;
+    }
+
+    // Process device progress properties.
+    {
+      using forward_progress =
+          sycl::ext::oneapi::experimental::forward_progress_guarantee;
+      if constexpr (PropertiesT::template has_property<
+                        work_group_progress_key>()) {
+        auto prop = Props.template get_property<work_group_progress_key>();
+        retval.MForwardProgressProperties[0].Guarantee = prop.guarantee;
+        retval.MForwardProgressProperties[0].ExecScope =
+            execution_scope::work_group;
+        retval.MForwardProgressProperties[0].CoordinationScope =
+            prop.coordinationScope;
+
+        // If we are here, the device supports the guarantee required but there
+        // is a caveat in that if the guarantee required is a concurrent
+        // guarantee, then we most likely also need to enable cooperative launch
+        // of the kernel. That is, although the device supports the required
+        // guarantee, some setup work is needed to truly make the device provide
+        // that guarantee at runtime. Otherwise, we will get the default
+        // guarantee which is weaker than concurrent. Same reasoning applies for
+        // sub_group but not for work_item.
+        // TODO: Further design work is probably needed to reflect this behavior
+        // in Unified Runtime.
+        if constexpr (prop.guarantee == forward_progress::concurrent)
+          retval.MIsCooperative = true;
+      }
+      if constexpr (PropertiesT::template has_property<
+                        sub_group_progress_key>()) {
+        auto prop = Props.template get_property<sub_group_progress_key>();
+        retval.MForwardProgressProperties[1].Guarantee = prop.guarantee;
+        retval.MForwardProgressProperties[1].ExecScope =
+            execution_scope::sub_group;
+        retval.MForwardProgressProperties[1].CoordinationScope =
+            prop.coordinationScope;
+
+        // Same reasoning as above for work_group applies here.
+        if constexpr (prop.guarantee == forward_progress::concurrent)
+          retval.MIsCooperative = true;
+      }
+      if constexpr (PropertiesT::template has_property<
+                        work_item_progress_key>()) {
+        auto prop = Props.template get_property<work_item_progress_key>();
+        retval.MForwardProgressProperties[2].Guarantee = prop.guarantee;
+        retval.MForwardProgressProperties[2].ExecScope =
+            execution_scope::work_item;
+        retval.MForwardProgressProperties[2].CoordinationScope =
+            prop.coordinationScope;
+      }
+    }
+
+    // Process work group scratch memory property.
+    {
+      if constexpr (PropertiesT::template has_property<
+                        work_group_scratch_size>()) {
+        auto WorkGroupMemSize =
+            Props.template get_property<work_group_scratch_size>();
+        retval.MWorkGroupMemorySize = WorkGroupMemSize.size;
+      }
+    }
+
+    // Parse cluster properties.
+    {
+      constexpr std::size_t ClusterDim = getClusterDim<PropertiesT>();
+      if constexpr (ClusterDim > 0) {
+
+        auto ClusterSize =
+            Props.template get_property<cuda::cluster_size_key<ClusterDim>>()
+                .get_cluster_size();
+        retval.MUsesClusterLaunch = true;
+        retval.MClusterDims = ClusterDim;
+        if (ClusterDim == 1) {
+          retval.MClusterSize[0] = ClusterSize[0];
+        } else if (ClusterDim == 2) {
+          retval.MClusterSize[0] = ClusterSize[0];
+          retval.MClusterSize[1] = ClusterSize[1];
+        } else if (ClusterDim == 3) {
+          retval.MClusterSize[0] = ClusterSize[0];
+          retval.MClusterSize[1] = ClusterSize[1];
+          retval.MClusterSize[2] = ClusterSize[2];
+        } else {
+          assert(ClusterDim <= 3 &&
+                 "Only 1D, 2D, and 3D cluster launch is supported.");
+        }
+      }
+    }
+
+    return retval;
+  }
+
+  /// Process kernel properties.
+  /// Note: it is important that this function *does not* depend on kernel
+  /// name or kernel type, because then it will be instantiated for every
+  /// kernel, even though body of those instantiated functions could be almost
+  /// the same, thus unnecessary increasing compilation time.
+  template <
+      bool IsESIMDKernel,
+      typename PropertiesT = ext::oneapi::experimental::empty_properties_t>
+  static KernelLaunchPropertiesT processKernelProperties(PropertiesT Props) {
+    static_assert(
+        ext::oneapi::experimental::is_property_list<PropertiesT>::value,
+        "Template type is not a property list.");
+    static_assert(
+        !PropertiesT::template has_property<
+            sycl::ext::intel::experimental::fp_control_key>() ||
+            (PropertiesT::template has_property<
+                 sycl::ext::intel::experimental::fp_control_key>() &&
+             IsESIMDKernel),
+        "Floating point control property is supported for ESIMD kernels only.");
+    static_assert(
+        !PropertiesT::template has_property<
+            sycl::ext::oneapi::experimental::indirectly_callable_key>(),
+        "indirectly_callable property cannot be applied to SYCL kernels");
+
+    return processKernelLaunchProperties(Props);
+  }
+
+  // Returns KernelLaunchPropertiesT or std::nullopt based on whether the
+  // kernel functor has a get method that returns properties.
+  template <typename KernelName, bool isESIMD, typename KernelType>
+  static std::optional<KernelLaunchPropertiesT>
+  parseProperties([[maybe_unused]] const KernelType &KernelFunc) {
 #ifndef __SYCL_DEVICE_ONLY__
     // If there are properties provided by get method then process them.
     if constexpr (ext::oneapi::experimental::detail::
                       HasKernelPropertiesGetMethod<const KernelType &>::value) {
 
-      h->template processProperties<
-          detail::CompileTimeKernelInfo<KernelName>.IsESIMD>(
+      return processKernelProperties<isESIMD>(
           KernelFunc.get(ext::oneapi::experimental::properties_tag{}));
     }
 #endif
+    // If there are no properties provided by get method then return empty
+    // optional.
+    return std::nullopt;
   }
 }; // KernelLaunchPropertyWrapper struct
 

@@ -259,17 +259,12 @@ template <typename KernelName = sycl::detail::auto_name, int Dimensions,
           typename KernelType, typename... ReductionsT>
 void nd_launch(queue Q, nd_range<Dimensions> Range, const KernelType &KernelObj,
                ReductionsT &&...Reductions) {
-  // TODO The handler-less path does not support reductions, kernel
-  // function properties and kernel functions with the kernel_handler
-  // type argument yet.
+  // TODO The handler-less path does not support reductions, and
+  // kernel functions with the kernel_handler type argument yet.
   if constexpr (sizeof...(ReductionsT) == 0 &&
-                !(ext::oneapi::experimental::detail::
-                      HasKernelPropertiesGetMethod<
-                          const KernelType &>::value) &&
                 !(detail::KernelLambdaHasKernelHandlerArgT<
                     KernelType, sycl::nd_item<Dimensions>>::value)) {
-    detail::submit_kernel_direct<KernelName>(std::move(Q), empty_properties_t{},
-                                             Range, KernelObj);
+    detail::submit_kernel_direct<KernelName>(std::move(Q), Range, KernelObj);
   } else {
     submit(std::move(Q), [&](handler &CGH) {
       nd_launch<KernelName>(CGH, Range, KernelObj,
@@ -296,13 +291,25 @@ template <typename KernelName = sycl::detail::auto_name, int Dimensions,
           typename Properties, typename KernelType, typename... ReductionsT>
 void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
                const KernelType &KernelObj, ReductionsT &&...Reductions) {
-  // TODO This overload of the nd_launch function takes the kernel function
-  // properties, which are not yet supported for the handler-less path,
-  // so it only supports handler based submission for now
-  submit(std::move(Q), [&](handler &CGH) {
-    nd_launch<KernelName>(CGH, Config, KernelObj,
-                          std::forward<ReductionsT>(Reductions)...);
-  });
+  // TODO The handler-less path does not support reductions, and
+  // kernel functions with the kernel_handler type argument yet.
+  if constexpr (sizeof...(ReductionsT) == 0 &&
+                !(detail::KernelLambdaHasKernelHandlerArgT<
+                    KernelType, sycl::nd_item<Dimensions>>::value)) {
+
+    ext::oneapi::experimental::detail::LaunchConfigAccess<nd_range<Dimensions>,
+                                                          Properties>
+        ConfigAccess(Config);
+
+    detail::submit_kernel_direct<KernelName>(std::move(Q),
+                                             ConfigAccess.getRange(), KernelObj,
+                                             Config.getProperties());
+  } else {
+    submit(std::move(Q), [&](handler &CGH) {
+      nd_launch<KernelName>(CGH, Config, KernelObj,
+                            std::forward<ReductionsT>(Reductions)...);
+    });
+  }
 }
 
 template <int Dimensions, typename... ArgsT>