From 90e726e4afc0b41e69ddcc3f929eed5ef549bd92 Mon Sep 17 00:00:00 2001 From: Jeff Bolz Date: Sun, 31 Aug 2025 16:58:39 -0500 Subject: [PATCH] vulkan: initialize vulkan-hpp to allow using extension function pointers Use this to query register count for shader compiles on NVIDIA. Currently this is only for performance debug, but it could eventually be used in some heuristics like split_k. --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 43 ++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index cd1c66ba7b476..7158bc7ceff8d 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -5,8 +5,14 @@ #include "ggml-cpu.h" #endif +// See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers- +#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 + #include +// See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers- +VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE + #include #include #include @@ -121,6 +127,8 @@ struct vk_pipeline_struct { bool needed {}; // set to true when the shader has been compiled bool compiled {}; + // number of registers used, extracted from pipeline executable properties + uint32_t register_count {}; }; typedef std::shared_ptr vk_pipeline; @@ -429,6 +437,8 @@ struct vk_device_struct { bool coopmat2; + bool pipeline_executable_properties_support {}; + size_t idx; bool mul_mat_l[GGML_TYPE_COUNT]; @@ -1519,6 +1529,20 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin vk_instance.pfn_vkSetDebugUtilsObjectNameEXT(device->device, &static_cast(duoni)); } + if (device->pipeline_executable_properties_support) { + vk::PipelineExecutableInfoKHR executableInfo; + executableInfo.pipeline = pipeline->pipeline; + + auto statistics = device->device.getPipelineExecutableStatisticsKHR(executableInfo); + for (auto & s : statistics) { + // "Register Count" is reported by NVIDIA drivers. + if (strcmp(s.name, "Register Count") == 0) { + VK_LOG_DEBUG(pipeline->name << " " << s.name << ": " << s.value.u64 << " registers"); + pipeline->register_count = (uint32_t)s.value.u64; + } + } + } + { std::lock_guard guard(device->mutex); device->all_pipelines.push_back(pipeline); @@ -3509,6 +3533,7 @@ static vk_device ggml_vk_get_device(size_t idx) { bool amd_shader_core_properties2 = false; bool pipeline_robustness = false; bool coopmat2_support = false; + bool pipeline_executable_properties_support = false; device->coopmat_support = false; device->integer_dot_product = false; bool bfloat16_support = false; @@ -3551,6 +3576,8 @@ static vk_device ggml_vk_get_device(size_t idx) { !getenv("GGML_VK_DISABLE_BFLOAT16")) { bfloat16_support = true; #endif + } else if (strcmp("VK_KHR_pipeline_executable_properties", properties.extensionName) == 0) { + pipeline_executable_properties_support = true; } } @@ -3771,8 +3798,18 @@ static vk_device ggml_vk_get_device(size_t idx) { device_extensions.push_back("VK_KHR_shader_integer_dot_product"); } + VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR pep_features {}; + pep_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR; + if (pipeline_executable_properties_support) { + last_struct->pNext = (VkBaseOutStructure *)&pep_features; + last_struct = (VkBaseOutStructure *)&pep_features; + device_extensions.push_back("VK_KHR_pipeline_executable_properties"); + } + vkGetPhysicalDeviceFeatures2(device->physical_device, &device_features2); + device->pipeline_executable_properties_support = pipeline_executable_properties_support; + device->fp16 = device->fp16 && vk12_features.shaderFloat16; #if defined(VK_KHR_shader_bfloat16) @@ -4288,6 +4325,9 @@ static void ggml_vk_instance_init() { } VK_LOG_DEBUG("ggml_vk_instance_init()"); + // See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers- + VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); + uint32_t api_version = vk::enumerateInstanceVersion(); if (api_version < VK_API_VERSION_1_2) { @@ -4355,6 +4395,9 @@ static void ggml_vk_instance_init() { vk_perf_logger_enabled = getenv("GGML_VK_PERF_LOGGER") != nullptr; + // See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers- + VULKAN_HPP_DEFAULT_DISPATCHER.init(vk_instance.instance); + std::vector devices = vk_instance.instance.enumeratePhysicalDevices(); // Emulate behavior of CUDA_VISIBLE_DEVICES for Vulkan