Skip to content

Commit 90e726e

Browse files
committed
vulkan: initialize vulkan-hpp to allow using extension function pointers
Use this to query register count for shader compiles on NVIDIA. Currently this is only for performance debug, but it could eventually be used in some heuristics like split_k.
1 parent c1c354e commit 90e726e

File tree

1 file changed

+43
-0
lines changed

1 file changed

+43
-0
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,14 @@
55
#include "ggml-cpu.h"
66
#endif
77

8+
// See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers-
9+
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
10+
811
#include <vulkan/vulkan.hpp>
912

13+
// See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers-
14+
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
15+
1016
#include <algorithm>
1117
#include <cmath>
1218
#include <iomanip>
@@ -121,6 +127,8 @@ struct vk_pipeline_struct {
121127
bool needed {};
122128
// set to true when the shader has been compiled
123129
bool compiled {};
130+
// number of registers used, extracted from pipeline executable properties
131+
uint32_t register_count {};
124132
};
125133

126134
typedef std::shared_ptr<vk_pipeline_struct> vk_pipeline;
@@ -429,6 +437,8 @@ struct vk_device_struct {
429437

430438
bool coopmat2;
431439

440+
bool pipeline_executable_properties_support {};
441+
432442
size_t idx;
433443

434444
bool mul_mat_l[GGML_TYPE_COUNT];
@@ -1519,6 +1529,20 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin
15191529
vk_instance.pfn_vkSetDebugUtilsObjectNameEXT(device->device, &static_cast<VkDebugUtilsObjectNameInfoEXT &>(duoni));
15201530
}
15211531

1532+
if (device->pipeline_executable_properties_support) {
1533+
vk::PipelineExecutableInfoKHR executableInfo;
1534+
executableInfo.pipeline = pipeline->pipeline;
1535+
1536+
auto statistics = device->device.getPipelineExecutableStatisticsKHR(executableInfo);
1537+
for (auto & s : statistics) {
1538+
// "Register Count" is reported by NVIDIA drivers.
1539+
if (strcmp(s.name, "Register Count") == 0) {
1540+
VK_LOG_DEBUG(pipeline->name << " " << s.name << ": " << s.value.u64 << " registers");
1541+
pipeline->register_count = (uint32_t)s.value.u64;
1542+
}
1543+
}
1544+
}
1545+
15221546
{
15231547
std::lock_guard<std::recursive_mutex> guard(device->mutex);
15241548
device->all_pipelines.push_back(pipeline);
@@ -3509,6 +3533,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
35093533
bool amd_shader_core_properties2 = false;
35103534
bool pipeline_robustness = false;
35113535
bool coopmat2_support = false;
3536+
bool pipeline_executable_properties_support = false;
35123537
device->coopmat_support = false;
35133538
device->integer_dot_product = false;
35143539
bool bfloat16_support = false;
@@ -3551,6 +3576,8 @@ static vk_device ggml_vk_get_device(size_t idx) {
35513576
!getenv("GGML_VK_DISABLE_BFLOAT16")) {
35523577
bfloat16_support = true;
35533578
#endif
3579+
} else if (strcmp("VK_KHR_pipeline_executable_properties", properties.extensionName) == 0) {
3580+
pipeline_executable_properties_support = true;
35543581
}
35553582
}
35563583

@@ -3771,8 +3798,18 @@ static vk_device ggml_vk_get_device(size_t idx) {
37713798
device_extensions.push_back("VK_KHR_shader_integer_dot_product");
37723799
}
37733800

3801+
VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR pep_features {};
3802+
pep_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR;
3803+
if (pipeline_executable_properties_support) {
3804+
last_struct->pNext = (VkBaseOutStructure *)&pep_features;
3805+
last_struct = (VkBaseOutStructure *)&pep_features;
3806+
device_extensions.push_back("VK_KHR_pipeline_executable_properties");
3807+
}
3808+
37743809
vkGetPhysicalDeviceFeatures2(device->physical_device, &device_features2);
37753810

3811+
device->pipeline_executable_properties_support = pipeline_executable_properties_support;
3812+
37763813
device->fp16 = device->fp16 && vk12_features.shaderFloat16;
37773814

37783815
#if defined(VK_KHR_shader_bfloat16)
@@ -4288,6 +4325,9 @@ static void ggml_vk_instance_init() {
42884325
}
42894326
VK_LOG_DEBUG("ggml_vk_instance_init()");
42904327

4328+
// See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers-
4329+
VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
4330+
42914331
uint32_t api_version = vk::enumerateInstanceVersion();
42924332

42934333
if (api_version < VK_API_VERSION_1_2) {
@@ -4355,6 +4395,9 @@ static void ggml_vk_instance_init() {
43554395

43564396
vk_perf_logger_enabled = getenv("GGML_VK_PERF_LOGGER") != nullptr;
43574397

4398+
// See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers-
4399+
VULKAN_HPP_DEFAULT_DISPATCHER.init(vk_instance.instance);
4400+
43584401
std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
43594402

43604403
// Emulate behavior of CUDA_VISIBLE_DEVICES for Vulkan

0 commit comments

Comments
 (0)