From e07b316d3efb431f87f8967589a511bd32a40b41 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Tue, 9 Jan 2024 15:24:59 +0000 Subject: [PATCH 1/5] [SYCL] Add sycl_ext_named_sub_group_sizes kernel properties --- .../experimental/named_sub_group_sizes.hpp | 45 +++++++++++++++++++ sycl/include/sycl/sycl.hpp | 1 + ...properties_kernel_named_sub_group_size.cpp | 24 ++++++++++ .../properties/properties_kernel_negative.cpp | 5 --- 4 files changed, 70 insertions(+), 5 deletions(-) create mode 100644 sycl/include/sycl/ext/oneapi/experimental/named_sub_group_sizes.hpp create mode 100644 sycl/test/extensions/properties/properties_kernel_named_sub_group_size.cpp diff --git a/sycl/include/sycl/ext/oneapi/experimental/named_sub_group_sizes.hpp b/sycl/include/sycl/ext/oneapi/experimental/named_sub_group_sizes.hpp new file mode 100644 index 0000000000000..e76d09de9c798 --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/named_sub_group_sizes.hpp @@ -0,0 +1,45 @@ +//== named_sub_group_sizes.hpp --- SYCL extension for named sub-group sizes ==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +struct named_sub_group_size { + static constexpr uint32_t primary = 0; + static constexpr uint32_t automatic = -1; +}; + +inline constexpr sub_group_size_key::value_t + sub_group_size_primary; + +inline constexpr sub_group_size_key::value_t + sub_group_size_automatic; + +namespace detail { +template <> +struct PropertyMetaInfo< + sub_group_size_key::value_t> { + // sub_group_size_automatic means that the kernel can be compiled with + // any sub-group size. That is, if the kernel has the sub_group_size_automatic + // property, then no sycl-sub-group-size IR attribute needs to be attached. + // Specializing PropertyMetaInfo for sub_group_size_automatic and setting + // name to an empty string will result in no sycl-sub-group-size IR being + // attached. + static constexpr const char *name = ""; + static constexpr const char *value = 0; +}; +} // namespace detail + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index 56d8fceb34dc2..cfbb3de75d360 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -127,6 +127,7 @@ can be disabled by setting SYCL_DISABLE_FSYCL_SYCLHPP_WARNING macro.") #include #include #include +#include #include #include #include diff --git a/sycl/test/extensions/properties/properties_kernel_named_sub_group_size.cpp b/sycl/test/extensions/properties/properties_kernel_named_sub_group_size.cpp new file mode 100644 index 0000000000000..3d02ac4d1f259 --- /dev/null +++ b/sycl/test/extensions/properties/properties_kernel_named_sub_group_size.cpp @@ -0,0 +1,24 @@ +// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s +// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s +// expected-no-diagnostics +#include + +int main() { + sycl::queue q; + sycl::nd_range<1> ndr{6, 2}; + + // CHECK: spir_kernel void @{{.*}}Kernel1() + // CHECK-SAME: !intel_reqd_sub_group_size ![[SGSizeAttr:[0-9]+]] + sycl::ext::oneapi::experimental::properties P1{ + sycl::ext::oneapi::experimental::sub_group_size_primary}; + q.parallel_for(ndr, P1, [=](auto id) {}); + + // CHECK: spir_kernel void @{{.*}}Kernel2() + // CHECK-NOT: intel_reqd_sub_group_size + // CHECK-SAME: { + sycl::ext::oneapi::experimental::properties P2{ + sycl::ext::oneapi::experimental::sub_group_size_automatic}; + q.parallel_for(ndr, P2, [=](auto id) {}); +} + +// CHECK: ![[SGSizeAttr]] = !{i32 0} \ No newline at end of file diff --git a/sycl/test/extensions/properties/properties_kernel_negative.cpp b/sycl/test/extensions/properties/properties_kernel_negative.cpp index e3c7314182208..37bb9b076b77b 100644 --- a/sycl/test/extensions/properties/properties_kernel_negative.cpp +++ b/sycl/test/extensions/properties/properties_kernel_negative.cpp @@ -301,11 +301,6 @@ void check_sub_group_size() { // expected-error@+1 {{too few template arguments for variable template 'sub_group_size'}} auto WGSize0 = sycl::ext::oneapi::experimental::sub_group_size<>; - // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: sub_group_size_key property must contain a non-zero value.}} - // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'sub_group_size<0>' must be initialized by a constant expression}} - // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::sub_group_size<0>' requested here}} - auto WGSize1 = sycl::ext::oneapi::experimental::sub_group_size<0>; - sycl::queue Q; // expected-error-re@sycl/ext/oneapi/properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} From 091b728a96ce7aac658a0b5cc03de20aa84093d8 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Mon, 26 Feb 2024 07:54:43 -0800 Subject: [PATCH 2/5] Don't use 0 for named_sub_group_size value --- .../sycl/ext/oneapi/experimental/named_sub_group_sizes.hpp | 4 ++-- .../properties/properties_kernel_named_sub_group_size.cpp | 2 +- .../extensions/properties/properties_kernel_negative.cpp | 5 +++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/named_sub_group_sizes.hpp b/sycl/include/sycl/ext/oneapi/experimental/named_sub_group_sizes.hpp index e76d09de9c798..0d895ddec468d 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/named_sub_group_sizes.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/named_sub_group_sizes.hpp @@ -15,8 +15,8 @@ inline namespace _V1 { namespace ext::oneapi::experimental { struct named_sub_group_size { - static constexpr uint32_t primary = 0; - static constexpr uint32_t automatic = -1; + static constexpr uint32_t primary = -1; + static constexpr uint32_t automatic = -2; }; inline constexpr sub_group_size_key::value_t diff --git a/sycl/test/extensions/properties/properties_kernel_named_sub_group_size.cpp b/sycl/test/extensions/properties/properties_kernel_named_sub_group_size.cpp index 3d02ac4d1f259..a5b74f9ccb183 100644 --- a/sycl/test/extensions/properties/properties_kernel_named_sub_group_size.cpp +++ b/sycl/test/extensions/properties/properties_kernel_named_sub_group_size.cpp @@ -21,4 +21,4 @@ int main() { q.parallel_for(ndr, P2, [=](auto id) {}); } -// CHECK: ![[SGSizeAttr]] = !{i32 0} \ No newline at end of file +// CHECK: ![[SGSizeAttr]] = !{i32 -1} diff --git a/sycl/test/extensions/properties/properties_kernel_negative.cpp b/sycl/test/extensions/properties/properties_kernel_negative.cpp index 37bb9b076b77b..e3c7314182208 100644 --- a/sycl/test/extensions/properties/properties_kernel_negative.cpp +++ b/sycl/test/extensions/properties/properties_kernel_negative.cpp @@ -301,6 +301,11 @@ void check_sub_group_size() { // expected-error@+1 {{too few template arguments for variable template 'sub_group_size'}} auto WGSize0 = sycl::ext::oneapi::experimental::sub_group_size<>; + // expected-error-re@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: sub_group_size_key property must contain a non-zero value.}} + // expected-error@sycl/ext/oneapi/kernel_properties/properties.hpp:* {{constexpr variable 'sub_group_size<0>' must be initialized by a constant expression}} + // expected-note@+1 {{in instantiation of variable template specialization 'sycl::ext::oneapi::experimental::sub_group_size<0>' requested here}} + auto WGSize1 = sycl::ext::oneapi::experimental::sub_group_size<0>; + sycl::queue Q; // expected-error-re@sycl/ext/oneapi/properties/properties.hpp:* {{static assertion failed due to requirement {{.+}}: Failed to merge property lists due to conflicting properties.}} From d8cfdb431caeff5c7cec947289225dcb1f3e005d Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Wed, 13 Aug 2025 22:21:13 -0700 Subject: [PATCH 3/5] Fix test, add feature macro and move extension to experimental Signed-off-by: Larsen, Steffen --- ..._ext_oneapi_named_sub_group_sizes.asciidoc | 11 +++--- sycl/source/feature_test.hpp.in | 1 + ...properties_kernel_named_sub_group_size.cpp | 38 ++++++++++++++----- 3 files changed, 35 insertions(+), 15 deletions(-) rename sycl/doc/extensions/{proposed => experimental}/sycl_ext_oneapi_named_sub_group_sizes.asciidoc (96%) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_named_sub_group_sizes.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_named_sub_group_sizes.asciidoc similarity index 96% rename from sycl/doc/extensions/proposed/sycl_ext_oneapi_named_sub_group_sizes.asciidoc rename to sycl/doc/extensions/experimental/sycl_ext_oneapi_named_sub_group_sizes.asciidoc index c5dd94b83ca4e..1dbaf97708209 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_named_sub_group_sizes.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_named_sub_group_sizes.asciidoc @@ -51,11 +51,12 @@ This extension also depends on the following other SYCL extensions: == Status -This is a proposed extension specification, intended to gather community -feedback. Interfaces defined in this specification may not be implemented yet -or may be in a preliminary state. The specification itself may also change in -incompatible ways before it is finalized. Shipping software products should not -rely on APIs defined in this specification. +This is an experimental extension specification, intended to provide early +access to features and gather community feedback. Interfaces defined in this +specification are implemented in {dpcpp}, but they are not finalized and may +change incompatibly in future versions of {dpcpp} without prior notice. +*Shipping software products should not rely on APIs defined in this +specification.* == Overview diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in index 1e26cf0b8a23e..3121e55a39495 100644 --- a/sycl/source/feature_test.hpp.in +++ b/sycl/source/feature_test.hpp.in @@ -124,6 +124,7 @@ inline namespace _V1 { #define SYCL_KHR_DEFAULT_CONTEXT 1 #define SYCL_EXT_INTEL_EVENT_MODE 1 #define SYCL_EXT_ONEAPI_TANGLE 1 +#define SYCL_EXT_ONEAPI_NAMED_SUB_GROUP_SIZES 1 // Unfinished KHR extensions. These extensions are only available if the // __DPCPP_ENABLE_UNFINISHED_KHR_EXTENSIONS macro is defined. diff --git a/sycl/test/extensions/properties/properties_kernel_named_sub_group_size.cpp b/sycl/test/extensions/properties/properties_kernel_named_sub_group_size.cpp index a5b74f9ccb183..fa05a234e2e27 100644 --- a/sycl/test/extensions/properties/properties_kernel_named_sub_group_size.cpp +++ b/sycl/test/extensions/properties/properties_kernel_named_sub_group_size.cpp @@ -3,22 +3,40 @@ // expected-no-diagnostics #include +struct SGSizePrimaryKernelFunctor { + SGSizePrimaryKernelFunctor() {} + + void operator()(sycl::nd_item<1>) const {} + + auto get(sycl::ext::oneapi::experimental::properties_tag) const { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::sub_group_size_primary}; + } +}; + +struct SGSizeAutoKernelFunctor { + SGSizeAutoKernelFunctor() {} + + void operator()(sycl::nd_item<1>) const {} + + auto get(sycl::ext::oneapi::experimental::properties_tag) const { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::sub_group_size_automatic}; + } +}; + int main() { - sycl::queue q; - sycl::nd_range<1> ndr{6, 2}; + sycl::queue Q; + sycl::nd_range<1> NDRange{6, 2}; - // CHECK: spir_kernel void @{{.*}}Kernel1() + // CHECK: spir_kernel void @{{.*}}SGSizePrimaryKernelFunctor() // CHECK-SAME: !intel_reqd_sub_group_size ![[SGSizeAttr:[0-9]+]] - sycl::ext::oneapi::experimental::properties P1{ - sycl::ext::oneapi::experimental::sub_group_size_primary}; - q.parallel_for(ndr, P1, [=](auto id) {}); + Q.parallel_for(NDRange, SGSizePrimaryKernelFunctor{}); - // CHECK: spir_kernel void @{{.*}}Kernel2() + // CHECK: spir_kernel void @{{.*}}SGSizeAutoKernelFunctor() // CHECK-NOT: intel_reqd_sub_group_size // CHECK-SAME: { - sycl::ext::oneapi::experimental::properties P2{ - sycl::ext::oneapi::experimental::sub_group_size_automatic}; - q.parallel_for(ndr, P2, [=](auto id) {}); + Q.parallel_for(NDRange, SGSizeAutoKernelFunctor{}); } // CHECK: ![[SGSizeAttr]] = !{i32 -1} From b5edd9910c62d36e28a7791a8035ec9380c498f7 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 14 Aug 2025 03:21:05 -0700 Subject: [PATCH 4/5] Add primary sub-group size device query Signed-off-by: Larsen, Steffen --- sycl/include/sycl/info/device_traits.def | 2 + .../detail/ur_device_info_ret_types.inc | 1 + .../SubGroup/primary_sub_group_size.cpp | 31 ++++++++++ sycl/test/abi/sycl_symbols_linux.dump | 1 + unified-runtime/include/ur_api.h | 3 + unified-runtime/include/ur_print.hpp | 16 +++++ .../core/EXP-SUB-GROUP-PRIMARY-SIZE.rst | 60 +++++++++++++++++++ .../core/exp-sub-group-primary-size.yml | 24 ++++++++ .../source/adapters/cuda/device.cpp | 7 +++ .../source/adapters/hip/device.cpp | 6 ++ .../source/adapters/level_zero/device.cpp | 3 + .../source/adapters/native_cpu/device.cpp | 2 + .../source/adapters/opencl/device.cpp | 4 ++ .../conformance/device/urDeviceGetInfo.cpp | 15 +++++ unified-runtime/tools/urinfo/urinfo.hpp | 2 + 15 files changed, 177 insertions(+) create mode 100644 sycl/test-e2e/SubGroup/primary_sub_group_size.cpp create mode 100644 unified-runtime/scripts/core/EXP-SUB-GROUP-PRIMARY-SIZE.rst create mode 100644 unified-runtime/scripts/core/exp-sub-group-primary-size.yml diff --git a/sycl/include/sycl/info/device_traits.def b/sycl/include/sycl/info/device_traits.def index 7ca82b196bb82..d5471d92cb374 100644 --- a/sycl/include/sycl/info/device_traits.def +++ b/sycl/include/sycl/info/device_traits.def @@ -208,6 +208,8 @@ __SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_srgb, bool, UR_DEVICE_INFO_IMAGE_SRGB) __SYCL_PARAM_TRAITS_SPEC(device, ext_intel_mem_channel, bool, UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT) +__SYCL_PARAM_TRAITS_SPEC(device, primary_sub_group_size, uint32_t, + UR_DEVICE_INFO_SUB_GROUP_PRIMARY_SIZE_EXP) //Deprecated oneapi/intel extension //TODO:Remove when possible diff --git a/sycl/source/detail/ur_device_info_ret_types.inc b/sycl/source/detail/ur_device_info_ret_types.inc index e1e724262b85f..ff332e0bdb953 100644 --- a/sycl/source/detail/ur_device_info_ret_types.inc +++ b/sycl/source/detail/ur_device_info_ret_types.inc @@ -162,6 +162,7 @@ MAP(UR_DEVICE_INFO_NODE_MASK, uint32_t) // These aren't present in the specification, extracted from ur_api.h // instead. MAP(UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP, ur_exp_device_2d_block_array_capability_flags_t) +MAP(UR_DEVICE_INFO_SUB_GROUP_PRIMARY_SIZE_EXP, uint32_t) MAP(UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP, ur_bool_t) MAP(UR_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT_EXP, ur_bool_t) MAP(UR_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT_EXP, ur_bool_t) diff --git a/sycl/test-e2e/SubGroup/primary_sub_group_size.cpp b/sycl/test-e2e/SubGroup/primary_sub_group_size.cpp new file mode 100644 index 0000000000000..39a1e774f9948 --- /dev/null +++ b/sycl/test-e2e/SubGroup/primary_sub_group_size.cpp @@ -0,0 +1,31 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include + +struct SGSizePrimaryKernelFunctor { + SGSizePrimaryKernelFunctor(uint32_t *OutPtr) : Out{OutPtr} {} + + void operator()(sycl::nd_item<1> Item) const { + *Out = Item.get_sub_group().get_max_local_range()[0]; + } + + auto get(sycl::ext::oneapi::experimental::properties_tag) const { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::sub_group_size_primary}; + } + + uint32_t *Out; +}; + +int main() { + sycl::queue Q; + + uint32_t *OutPtr = sycl::malloc(1, Q); + Q.parallel_for(sycl::nd_range<1>{1, 1}, SGSizePrimaryKernelFunctor{OutPtr}) + .wait(); + + assert(*OutPtr == + Q.get_device().get_info()); + return 0; +} diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 71269e088a88a..cd06538e42d87 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3970,6 +3970,7 @@ _ZNK4sycl3_V16device13get_info_implINS0_4info6device21image_max_buffer_sizeEEENS _ZNK4sycl3_V16device13get_info_implINS0_4info6device21is_compiler_availableEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_4info6device22execution_capabilitiesEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_4info6device22ext_intel_gpu_eu_countEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_4info6device22primary_sub_group_sizeEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_4info6device22usm_device_allocationsEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_4info6device22usm_shared_allocationsEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_4info6device22usm_system_allocationsEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index 1bba8a950e75f..a805569435756 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -2424,6 +2424,9 @@ typedef enum ur_device_info_t { /// [::ur_exp_device_2d_block_array_capability_flags_t] return a bit-field /// of Intel GPU 2D block array capabilities UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP = 0x2022, + /// [uint32_t][optional-query] return the primary sub-group size of the + /// device. + UR_DEVICE_INFO_SUB_GROUP_PRIMARY_SIZE_EXP = 0x2023, /// [::ur_bool_t] returns true if the device supports enqueueing of /// allocations and frees. UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP = 0x2050, diff --git a/unified-runtime/include/ur_print.hpp b/unified-runtime/include/ur_print.hpp index 91c9973a3a6e9..eb7b655d14dd2 100644 --- a/unified-runtime/include/ur_print.hpp +++ b/unified-runtime/include/ur_print.hpp @@ -3113,6 +3113,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: os << "UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP"; break; + case UR_DEVICE_INFO_SUB_GROUP_PRIMARY_SIZE_EXP: + os << "UR_DEVICE_INFO_SUB_GROUP_PRIMARY_SIZE_EXP"; + break; case UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP: os << "UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP"; break; @@ -5241,6 +5244,19 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, os << ")"; } break; + case UR_DEVICE_INFO_SUB_GROUP_PRIMARY_SIZE_EXP: { + const uint32_t *tptr = (const uint32_t *)ptr; + if (sizeof(uint32_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) + << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; case UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { diff --git a/unified-runtime/scripts/core/EXP-SUB-GROUP-PRIMARY-SIZE.rst b/unified-runtime/scripts/core/EXP-SUB-GROUP-PRIMARY-SIZE.rst new file mode 100644 index 0000000000000..91416c7c93eee --- /dev/null +++ b/unified-runtime/scripts/core/EXP-SUB-GROUP-PRIMARY-SIZE.rst @@ -0,0 +1,60 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +.. _experimental-sub-group-primary-size: + +================================================================================ +Sub-group primary size +================================================================================ + +.. warning:: + + Experimental features: + + * May be replaced, updated, or removed at any time. + * Do not require maintaining API/ABI stability of their own additions over + time. + * Do not require conformance testing of their own additions. + + +Motivation +-------------------------------------------------------------------------------- +Some devices expose a "primary" sub-group size, which is a device-specific named +size that is independent of the kernels run on it. Usually, this sub-group size +can be specified by name in kernel code, but in order for the host code to know +this size, the corresponding device info query is introduced. + +API +-------------------------------------------------------------------------------- + +Enums +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ${x}_device_info_t + * ${X}_DEVICE_INFO_SUB_GROUP_PRIMARY_SIZE_EXP + +Changelog +-------------------------------------------------------------------------------- + ++-----------+------------------------+ +| Revision | Changes | ++===========+========================+ +| 1.0 | Initial Draft | ++-----------+------------------------+ + + +Support +-------------------------------------------------------------------------------- + +Adapters which support this experimental feature *must* return ${X}_RESULT_SUCCESS from +the ${x}DeviceGetInfo call with the new ${X}_DEVICE_INFO_SUB_GROUP_PRIMARY_SIZE_EXP +device descriptor. + + +Contributors +-------------------------------------------------------------------------------- + +* Steffen Larsen `steffen.larsen@intel.com `_ diff --git a/unified-runtime/scripts/core/exp-sub-group-primary-size.yml b/unified-runtime/scripts/core/exp-sub-group-primary-size.yml new file mode 100644 index 0000000000000..dcd05812ed3d7 --- /dev/null +++ b/unified-runtime/scripts/core/exp-sub-group-primary-size.yml @@ -0,0 +1,24 @@ +# +# Copyright (C) 2025 Intel Corporation +# +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# See YaML.md for syntax definition +# +--- #-------------------------------------------------------------------------- +type: header +desc: "Intel $OneApi Unified Runtime Experimental device descriptor for querying the primary sub-group size" +ordinal: "99" +--- #-------------------------------------------------------------------------- +type: enum +extend: true +typed_etors: true +desc: "Extension enum to $x_device_info_t to query primary sub-group size." +name: $x_device_info_t +etors: + - name: SUB_GROUP_PRIMARY_SIZE_EXP + value: "0x2023" + desc: "[uint32_t][optional-query] return a 32-bit unsigned integer representing the primary sub-group size of the device." + diff --git a/unified-runtime/source/adapters/cuda/device.cpp b/unified-runtime/source/adapters/cuda/device.cpp index 03d9a13999f84..6ebc90e8cd2f1 100644 --- a/unified-runtime/source/adapters/cuda/device.cpp +++ b/unified-runtime/source/adapters/cuda/device.cpp @@ -1146,6 +1146,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: return ReturnValue( static_cast(0)); + case UR_DEVICE_INFO_SUB_GROUP_PRIMARY_SIZE_EXP: { + // NVIDIA devices only support one sub-group size (the warp size) + int WarpSize = 0; + UR_CHECK_ERROR(cuDeviceGetAttribute( + &WarpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, hDevice->get())); + return ReturnValue(static_cast(WarpSize)); + } case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP: return ReturnValue(true); diff --git a/unified-runtime/source/adapters/hip/device.cpp b/unified-runtime/source/adapters/hip/device.cpp index c48033ec88826..cde6f4dde58eb 100644 --- a/unified-runtime/source/adapters/hip/device.cpp +++ b/unified-runtime/source/adapters/hip/device.cpp @@ -999,6 +999,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: return ReturnValue( static_cast(0)); + case UR_DEVICE_INFO_SUB_GROUP_PRIMARY_SIZE_EXP: { + int WarpSize = 0; + UR_CHECK_ERROR(hipDeviceGetAttribute(&WarpSize, hipDeviceAttributeWarpSize, + hDevice->get())); + return ReturnValue(static_cast(WarpSize)); + } case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { int RuntimeVersion = 0; UR_CHECK_ERROR(hipRuntimeGetVersion(&RuntimeVersion)); diff --git a/unified-runtime/source/adapters/level_zero/device.cpp b/unified-runtime/source/adapters/level_zero/device.cpp index 5410125ede43c..6645cb3a6e962 100644 --- a/unified-runtime/source/adapters/level_zero/device.cpp +++ b/unified-runtime/source/adapters/level_zero/device.cpp @@ -1271,6 +1271,9 @@ ur_result_t urDeviceGetInfo( return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; #endif } + case UR_DEVICE_INFO_SUB_GROUP_PRIMARY_SIZE_EXP: + // Currently not supported. + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_ASYNC_BARRIER: return ReturnValue(false); case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORT: diff --git a/unified-runtime/source/adapters/native_cpu/device.cpp b/unified-runtime/source/adapters/native_cpu/device.cpp index 369b4cd7ed013..eb41a932f9f7c 100644 --- a/unified-runtime/source/adapters/native_cpu/device.cpp +++ b/unified-runtime/source/adapters/native_cpu/device.cpp @@ -376,6 +376,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: return ReturnValue( static_cast(0)); + case UR_DEVICE_INFO_SUB_GROUP_PRIMARY_SIZE_EXP: + return ReturnValue(static_cast(1)); case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { // Currently for Native CPU fences are implemented using OCK // builtins, so we have different capabilities than atomic operations diff --git a/unified-runtime/source/adapters/opencl/device.cpp b/unified-runtime/source/adapters/opencl/device.cpp index 9c9c82ea47bf4..0dddcf1b70b26 100644 --- a/unified-runtime/source/adapters/opencl/device.cpp +++ b/unified-runtime/source/adapters/opencl/device.cpp @@ -1359,6 +1359,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD | UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE); } + case UR_DEVICE_INFO_SUB_GROUP_PRIMARY_SIZE_EXP: + // Currently not supported. Depends on + // https://github.com/intel/llvm/pull/11301. + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_BFLOAT16_CONVERSIONS_NATIVE: { bool Supported = false; UR_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( diff --git a/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp b/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp index 9980f8f7e1a2e..d000cc90c80ac 100644 --- a/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp +++ b/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp @@ -2527,6 +2527,21 @@ TEST_P(urDeviceGetInfoTest, Success2DBlockArrayCapabilities) { 0); } +TEST_P(urDeviceGetInfoTest, SuccessSubGroupPrimarySize) { + size_t property_size = 0; + const ur_device_info_t property_name = SUB_GROUP_PRIMARY_SIZE_EXP; + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urDeviceGetInfo(device, property_name, 0, nullptr, &property_size)); + ASSERT_EQ(property_size, sizeof(uint32_t)); + + uint32_t property_value = 0; + ASSERT_QUERY_RETURNS_VALUE(urDeviceGetInfo(device, property_name, + property_size, &property_value, + nullptr), + property_value); +} + TEST_P(urDeviceGetInfoTest, SuccessUseNativeAssert) { size_t property_size = 0; const ur_device_info_t property_name = UR_DEVICE_INFO_USE_NATIVE_ASSERT; diff --git a/unified-runtime/tools/urinfo/urinfo.hpp b/unified-runtime/tools/urinfo/urinfo.hpp index b08661787cccf..cb416202d3a0c 100644 --- a/unified-runtime/tools/urinfo/urinfo.hpp +++ b/unified-runtime/tools/urinfo/urinfo.hpp @@ -445,6 +445,8 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, printDeviceInfo( hDevice, UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP); std::cout << prefix; + printDeviceInfo(hDevice, UR_DEVICE_INFO_SUB_GROUP_PRIMARY_SIZE_EXP); + std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP); std::cout << prefix; From ee6dc2c0b48fdccbcb5c701bfb09397b62b712dd Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 14 Aug 2025 06:03:32 -0700 Subject: [PATCH 5/5] Fix test Signed-off-by: Larsen, Steffen --- sycl/test-e2e/SubGroup/primary_sub_group_size.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sycl/test-e2e/SubGroup/primary_sub_group_size.cpp b/sycl/test-e2e/SubGroup/primary_sub_group_size.cpp index 39a1e774f9948..f928199ad1970 100644 --- a/sycl/test-e2e/SubGroup/primary_sub_group_size.cpp +++ b/sycl/test-e2e/SubGroup/primary_sub_group_size.cpp @@ -1,7 +1,11 @@ +// REQUIRES: aspect-usm_shared_allocations // RUN: %{build} -o %t.out // RUN: %{run} %t.out #include +#include +#include +#include struct SGSizePrimaryKernelFunctor { SGSizePrimaryKernelFunctor(uint32_t *OutPtr) : Out{OutPtr} {} @@ -21,7 +25,7 @@ struct SGSizePrimaryKernelFunctor { int main() { sycl::queue Q; - uint32_t *OutPtr = sycl::malloc(1, Q); + uint32_t *OutPtr = sycl::malloc_shared(1, Q); Q.parallel_for(sycl::nd_range<1>{1, 1}, SGSizePrimaryKernelFunctor{OutPtr}) .wait();