diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 8fb38aa01edfe..150206625f586 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -86,6 +86,7 @@ def AspectExt_oneapi_cuda_cluster_group : Aspect<"ext_oneapi_cuda_cluster_group" def AspectExt_intel_fpga_task_sequence : Aspect<"ext_intel_fpga_task_sequence">; def AspectExt_oneapi_atomic16 : Aspect<"ext_oneapi_atomic16">; def AspectExt_oneapi_virtual_functions : Aspect<"ext_oneapi_virtual_functions">; +def AspectExt_intel_spill_memory_size : Aspect<"ext_intel_spill_memory_size">; // Deprecated aspects def AspectInt64_base_atomics : Aspect<"int64_base_atomics">; def AspectInt64_extended_atomics : Aspect<"int64_extended_atomics">; @@ -150,7 +151,8 @@ def : TargetInfo<"__TestAspectList", AspectExt_oneapi_queue_profiling_tag, AspectExt_oneapi_virtual_mem, AspectExt_oneapi_cuda_cluster_group, AspectExt_intel_fpga_task_sequence, AspectExt_oneapi_atomic16, - AspectExt_oneapi_virtual_functions], + AspectExt_oneapi_virtual_functions, + AspectExt_intel_spill_memory_size], []>; // This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT // match. diff --git a/sycl/doc/extensions/supported/sycl_ext_intel_kernel_queries.asciidoc b/sycl/doc/extensions/supported/sycl_ext_intel_kernel_queries.asciidoc new file mode 100644 index 0000000000000..b030436af5322 --- /dev/null +++ b/sycl/doc/extensions/supported/sycl_ext_intel_kernel_queries.asciidoc @@ -0,0 +1,139 @@ += sycl_ext_intel_kernel_queries + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en +:dpcpp: pass:[DPC++] +:endnote: —{nbsp}end{nbsp}note + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + + +== Notice + +[%hardbreaks] +Copyright (C) 2025 Intel Corporation. All rights reserved. + +Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks +of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by +permission by Khronos. + + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm/issues + + +== Dependencies + +This extension is written against the SYCL 2020 revision 9 specification. +All references below to the "core SYCL specification" or to section numbers in +the SYCL specification refer to that revision. + + +== Status + +This extension is implemented and fully supported by {dpcpp}. + + +== Overview + +This extension contains a collection of queries that provide low-level +information about kernels. +These queries generally forward directly to the backend and expose concepts that +are specific to a particular implementation. +As a result, these queries may not be supported for all devices. +Each query has an associate device aspect, which tells whether the query is +supported on that device. + + +== Specification + +=== Feature test macro + +This extension provides a feature-test macro as described in the core SYCL +specification. +An implementation supporting this extension must predefine the macro +`SYCL_EXT_INTEL_KERNEL_QUERIES` to one of the values defined in the table below. +Applications can test for the existence of this macro to determine if the +implementation supports this feature, or applications can test the macro's value +to determine which of the extension's features the implementation supports. + +[%header,cols="1,5"] +|=== +|Value +|Description + +|1 +|Initial version of this extension. +|=== + +=== Spill memory size + +This query returns the kernel's spill memory size that is allocated by the +compiler, as reported by Level Zero. + +==== New device aspect + +This extension adds the following new device aspect. + +[source,c++] +---- +namespace sycl { + +enum class aspect { + ext_intel_spill_memory_size + + // ... +}; + +} +---- + +''' + +`*ext_intel_spill_memory_size*` + +Indicates that the `spill_memory_size` kernel information descriptor may be used +to query kernels for this device. + +''' + +==== New device specific kernel information descriptor + +This extension adds the following information descriptor that can be used with +`kernel::get_info(const device&)`. + +''' + +`*ext::intel::info::kernel_device_specific::spill_memory_size*` + +[source,c++] +---- +namespace sycl::ext::intel::info::kernel_device_specific { +struct spill_memory_size { + using return_type = size_t; +}; +} // namespace sycl::ext::intel::info::kernel_device_specific +---- + +_Remarks:_ Template parameter to `kernel::get_info(const device&)`. + +_Returns:_ The spill memory size that is allocated by the compiler for this +kernel for the given device. + +_Throws:_ An `exception` with the `errc::feature_not_supported` error code if +the device does not have `aspect::ext_intel_spill_memory_size`. + +''' diff --git a/sycl/include/sycl/detail/info_desc_helpers.hpp b/sycl/include/sycl/detail/info_desc_helpers.hpp index 9177ffd0cb988..0ee64ab4dd568 100644 --- a/sycl/include/sycl/detail/info_desc_helpers.hpp +++ b/sycl/include/sycl/detail/info_desc_helpers.hpp @@ -88,6 +88,15 @@ struct IsKernelInfo #include #undef __SYCL_PARAM_TRAITS_SPEC +#define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT, UrCode) \ + template <> \ + struct is_##DescType##_info_desc \ + : std::true_type { \ + using return_type = Namespace::info::DescType::Desc::return_type; \ + }; +#include +#undef __SYCL_PARAM_TRAITS_SPEC + #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, UrCode) \ template <> \ struct is_##DescType##_info_desc : std::true_type { \ diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index 8a931dde35a71..d039e4bc2fee5 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -72,3 +72,4 @@ __SYCL_ASPECT(ext_oneapi_bindless_images_sample_1d_usm, 78) __SYCL_ASPECT(ext_oneapi_bindless_images_sample_2d_usm, 79) __SYCL_ASPECT(ext_oneapi_atomic16, 80) __SYCL_ASPECT(ext_oneapi_virtual_functions, 81) +__SYCL_ASPECT(ext_intel_spill_memory_size, 82) diff --git a/sycl/include/sycl/info/ext_intel_kernel_info_traits.def b/sycl/include/sycl/info/ext_intel_kernel_info_traits.def new file mode 100644 index 0000000000000..4a0d7d27d43f5 --- /dev/null +++ b/sycl/include/sycl/info/ext_intel_kernel_info_traits.def @@ -0,0 +1 @@ +__SYCL_PARAM_TRAITS_SPEC(ext::intel, kernel_device_specific, spill_memory_size, size_t, UR_KERNEL_INFO_SPILL_MEM_SIZE) diff --git a/sycl/include/sycl/info/info_desc.hpp b/sycl/include/sycl/info/info_desc.hpp index a5db6699ea647..aea8a8e40e675 100644 --- a/sycl/include/sycl/info/info_desc.hpp +++ b/sycl/include/sycl/info/info_desc.hpp @@ -261,6 +261,7 @@ struct work_item_progress_capabilities; } // namespace ext::oneapi::experimental::info::device #include #include +#include #include #include diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 5cb7fa1e29585..9d630d20dbfeb 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -786,6 +786,11 @@ bool device_impl::has(aspect Aspect) const { BE == sycl::backend::opencl; return (is_cpu() || is_gpu()) && isCompatibleBE; } + case aspect::ext_intel_spill_memory_size: { + backend BE = getBackend(); + bool isCompatibleBE = BE == sycl::backend::ext_oneapi_level_zero; + return is_gpu() && isCompatibleBE; + } } return false; // This device aspect has not been implemented yet. diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index e6085e4f0c05a..570d912cdabb4 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -416,6 +416,23 @@ inline typename syclex::info::kernel_queue_specific::max_num_work_groups:: DynamicLocalMemorySize); } +template <> +inline typename ext::intel::info::kernel_device_specific::spill_memory_size:: + return_type + kernel_impl::get_info< + ext::intel::info::kernel_device_specific::spill_memory_size>( + const device &Device) const { + if (!Device.has(aspect::ext_intel_spill_memory_size)) + throw exception( + make_error_code(errc::feature_not_supported), + "This device does not have the ext_intel_spill_memory_size aspect"); + + return get_kernel_device_specific_info< + ext::intel::info::kernel_device_specific::spill_memory_size>( + this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), + getAdapter()); +} + template <> inline typename syclex::info::kernel_queue_specific::max_work_group_size:: return_type diff --git a/sycl/source/detail/kernel_info.hpp b/sycl/source/detail/kernel_info.hpp index 4ec11d32fd4ac..b3bbe9584200d 100644 --- a/sycl/source/detail/kernel_info.hpp +++ b/sycl/source/detail/kernel_info.hpp @@ -146,6 +146,58 @@ uint32_t get_kernel_device_specific_info_with_input(ur_kernel_handle_t Kernel, return Result; } + +template <> +inline ext::intel::info::kernel_device_specific::spill_memory_size::return_type +get_kernel_device_specific_info< + ext::intel::info::kernel_device_specific::spill_memory_size>( + ur_kernel_handle_t Kernel, ur_device_handle_t Device, + const AdapterPtr &Adapter) { + size_t ResultSize = 0; + + // First call to get the number of device images + Adapter->call( + Kernel, UR_KERNEL_INFO_SPILL_MEM_SIZE, 0, nullptr, &ResultSize); + + size_t DeviceCount = ResultSize / sizeof(uint32_t); + + // Second call to retrieve the data + std::vector Device2SpillMap(DeviceCount); + Adapter->call( + Kernel, UR_KERNEL_INFO_SPILL_MEM_SIZE, ResultSize, Device2SpillMap.data(), + nullptr); + + ur_program_handle_t Program; + Adapter->call(Kernel, UR_KERNEL_INFO_PROGRAM, + sizeof(ur_program_handle_t), + &Program, nullptr); + // Retrieve the associated device list + size_t URDevicesSize = 0; + Adapter->call(Program, UR_PROGRAM_INFO_DEVICES, + 0, nullptr, &URDevicesSize); + + std::vector URDevices(URDevicesSize / + sizeof(ur_device_handle_t)); + Adapter->call(Program, UR_PROGRAM_INFO_DEVICES, + URDevicesSize, URDevices.data(), + nullptr); + assert(Device2SpillMap.size() == URDevices.size()); + + // Map the result back to the program devices. UR provides the following + // guarantee: + // The order of the devices is guaranteed (i.e., the same as queried by + // urDeviceGet) by the UR within a single application even if the runtime is + // reinitialized. + for (size_t idx = 0; idx < URDevices.size(); ++idx) { + if (URDevices[idx] == Device) + return size_t{Device2SpillMap[idx]}; + } + throw exception( + make_error_code(errc::runtime), + "ext::intel::info::kernel::spill_memory_size failed to retrieve " + "the requested value"); +} + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/ur_info_code.hpp b/sycl/source/detail/ur_info_code.hpp index 863515ec469da..9d32c60660d6b 100644 --- a/sycl/source/detail/ur_info_code.hpp +++ b/sycl/source/detail/ur_info_code.hpp @@ -72,6 +72,15 @@ template struct UrInfoCode; #include #undef __SYCL_PARAM_TRAITS_SPEC +#define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT, UrCode) \ + template <> struct UrInfoCode { \ + static constexpr ur_kernel_info_t value = \ + static_cast(UrCode); \ + }; + +#include +#undef __SYCL_PARAM_TRAITS_SPEC + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/kernel.cpp b/sycl/source/kernel.cpp index 2be4fc6273bdd..d8d53f01f7d2f 100644 --- a/sycl/source/kernel.cpp +++ b/sycl/source/kernel.cpp @@ -101,6 +101,14 @@ kernel::get_info(const device &Device, const range<3> &WGSize) const { #undef __SYCL_PARAM_TRAITS_SPEC +#define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT, UrCode) \ + template __SYCL_EXPORT ReturnT \ + kernel::get_info(const device &) const; + +#include + +#undef __SYCL_PARAM_TRAITS_SPEC + template __SYCL_EXPORT uint32_t kernel::get_info( const device &, const sycl::range<3> &) const; diff --git a/sycl/test-e2e/Basic/aspects.cpp b/sycl/test-e2e/Basic/aspects.cpp index ea1bbec27762d..eaa33c0073397 100644 --- a/sycl/test-e2e/Basic/aspects.cpp +++ b/sycl/test-e2e/Basic/aspects.cpp @@ -90,6 +90,9 @@ int main() { if (plt.has(aspect::ext_oneapi_virtual_functions)) { std::cout << " ext_oneapi_virtual_functions" << std::endl; } + if (plt.has(aspect::ext_intel_spill_memory_size)) { + std::cout << " ext_intel_spill_memory_size" << std::endl; + } } std::cout << "Passed." << std::endl; return 0; diff --git a/sycl/test-e2e/Basic/kernel_info.cpp b/sycl/test-e2e/Basic/kernel_info.cpp index f0fdad910c658..b3814cdeb6ea3 100644 --- a/sycl/test-e2e/Basic/kernel_info.cpp +++ b/sycl/test-e2e/Basic/kernel_info.cpp @@ -101,6 +101,12 @@ int main() { krn.get_info(dev); assert(compileNumSg <= maxNumSg); + size_t spillMemSz = 0; + if (dev.has(aspect::ext_intel_spill_memory_size)) { + spillMemSz = krn.get_info< + ext::intel::info::kernel_device_specific::spill_memory_size>(dev); + } + // Use ext_oneapi_get_kernel_info extension and check that answers match. const size_t wgSizeExt = syclex::get_kernel_info< SingleTask, info::kernel_device_specific::work_group_size>(ctx, dev); @@ -125,6 +131,13 @@ int main() { dev); assert(compileNumSgExt == compileNumSg); + if (dev.has(aspect::ext_intel_spill_memory_size)) { + const size_t spillMemSizeExt = syclex::get_kernel_info< + SingleTask, + ext::intel::info::kernel_device_specific::spill_memory_size>(ctx, dev); + assert(spillMemSizeExt == spillMemSz); + } + // Use ext_oneapi_get_kernel_info extension with queue parameter and check the // result. const size_t wgSizeExtQ = diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 2190e45096ff0..cbe08c9c12d4f 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3976,6 +3976,7 @@ _ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21ke _ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific14num_sub_groupsEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueERKNS0_5rangeILi2EEE _ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific18max_sub_group_sizeEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueERKNS0_5rangeILi3EEE _ZNK4sycl3_V16kernel3getEv +_ZNK4sycl3_V16kernel8get_infoINS0_3ext5intel4info22kernel_device_specific17spill_memory_sizeEEENS0_6detail35is_kernel_device_specific_info_descIT_E11return_typeERKNS0_6deviceE _ZNK4sycl3_V16kernel8get_infoINS0_4info22kernel_device_specific15work_group_sizeEEENS0_6detail35is_kernel_device_specific_info_descIT_E11return_typeERKNS0_6deviceE _ZNK4sycl3_V16kernel8get_infoINS0_4info22kernel_device_specific16global_work_sizeEEENS0_6detail35is_kernel_device_specific_info_descIT_E11return_typeERKNS0_6deviceE _ZNK4sycl3_V16kernel8get_infoINS0_4info22kernel_device_specific16private_mem_sizeEEENS0_6detail35is_kernel_device_specific_info_descIT_E11return_typeERKNS0_6deviceE diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 1bdf96ed2f233..06907b875aac5 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -101,6 +101,7 @@ ??$get_info@Ureference_count@context@info@_V1@sycl@@@context@_V1@sycl@@QEBAIXZ ??$get_info@Ureference_count@event@info@_V1@sycl@@@event@_V1@sycl@@QEBAIXZ ??$get_info@Ureference_count@queue@info@_V1@sycl@@@queue@_V1@sycl@@QEBAIXZ +??$get_info@Uspill_memory_size@kernel_device_specific@info@intel@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KAEBVdevice@12@@Z ??$get_info@Usupports_fusion@device@info@experimental@codeplay@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA_NXZ ??$get_info@Uuuid@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$array@E$0BA@@std@@XZ ??$get_info@Uwork_group_size@kernel_device_specific@info@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KAEBVdevice@12@@Z diff --git a/sycl/test/include_deps/sycl_accessor.hpp.cpp b/sycl/test/include_deps/sycl_accessor.hpp.cpp index 3c8ef05401445..3105135fe0540 100644 --- a/sycl/test/include_deps/sycl_accessor.hpp.cpp +++ b/sycl/test/include_deps/sycl_accessor.hpp.cpp @@ -78,6 +78,7 @@ // CHECK-NEXT: info/event_profiling_traits.def // CHECK-NEXT: info/ext_codeplay_device_traits.def // CHECK-NEXT: info/ext_intel_device_traits.def +// CHECK-NEXT: info/ext_intel_kernel_info_traits.def // CHECK-NEXT: info/ext_oneapi_device_traits.def // CHECK-NEXT: info/ext_oneapi_kernel_queue_specific_traits.def // CHECK-NEXT: info/sycl_backend_traits.def diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index ac0f6c202262a..e1aaefbf594d7 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -79,6 +79,7 @@ // CHECK-NEXT: info/event_profiling_traits.def // CHECK-NEXT: info/ext_codeplay_device_traits.def // CHECK-NEXT: info/ext_intel_device_traits.def +// CHECK-NEXT: info/ext_intel_kernel_info_traits.def // CHECK-NEXT: info/ext_oneapi_device_traits.def // CHECK-NEXT: info/ext_oneapi_kernel_queue_specific_traits.def // CHECK-NEXT: info/sycl_backend_traits.def diff --git a/unified-runtime/source/adapters/level_zero/kernel.cpp b/unified-runtime/source/adapters/level_zero/kernel.cpp index 230a317ec20df..5ccab131f3cc7 100644 --- a/unified-runtime/source/adapters/level_zero/kernel.cpp +++ b/unified-runtime/source/adapters/level_zero/kernel.cpp @@ -752,10 +752,24 @@ ur_result_t urKernelGetInfo( case UR_KERNEL_INFO_NUM_ARGS: return ReturnValue(uint32_t{Kernel->ZeKernelProperties->numKernelArgs}); case UR_KERNEL_INFO_SPILL_MEM_SIZE: { - std::vector spills = { - uint32_t{Kernel->ZeKernelProperties->spillMemSize}}; - return ReturnValue(static_cast(spills.data()), - spills.size()); + try { + std::vector Spills; + Spills.reserve(Kernel->ZeKernels.size()); + for (auto &ZeKernel : Kernel->ZeKernels) { + ze_kernel_properties_t props; + props.stype = ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES; + props.pNext = nullptr; + ZE2UR_CALL(zeKernelGetProperties, (ZeKernel, &props)); + uint32_t spillMemSize = props.spillMemSize; + Spills.push_back(spillMemSize); + } + return ReturnValue(static_cast(Spills.data()), + Spills.size()); + } catch (const std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } case UR_KERNEL_INFO_REFERENCE_COUNT: return ReturnValue(uint32_t{Kernel->RefCount.load()});