|
5 | 5 | #include "ggml-cpu.h" |
6 | 6 | #endif |
7 | 7 |
|
| 8 | +// See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers- |
| 9 | +#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 |
| 10 | + |
8 | 11 | #include <vulkan/vulkan.hpp> |
9 | 12 |
|
| 13 | +// See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers- |
| 14 | +VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE |
| 15 | + |
10 | 16 | #include <algorithm> |
11 | 17 | #include <cmath> |
12 | 18 | #include <iomanip> |
@@ -121,6 +127,8 @@ struct vk_pipeline_struct { |
121 | 127 | bool needed {}; |
122 | 128 | // set to true when the shader has been compiled |
123 | 129 | bool compiled {}; |
| 130 | + // number of registers used, extracted from pipeline executable properties |
| 131 | + uint32_t register_count {}; |
124 | 132 | }; |
125 | 133 |
|
126 | 134 | typedef std::shared_ptr<vk_pipeline_struct> vk_pipeline; |
@@ -429,6 +437,8 @@ struct vk_device_struct { |
429 | 437 |
|
430 | 438 | bool coopmat2; |
431 | 439 |
|
| 440 | + bool pipeline_executable_properties_support {}; |
| 441 | + |
432 | 442 | size_t idx; |
433 | 443 |
|
434 | 444 | bool mul_mat_l[GGML_TYPE_COUNT]; |
@@ -1519,6 +1529,20 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin |
1519 | 1529 | vk_instance.pfn_vkSetDebugUtilsObjectNameEXT(device->device, &static_cast<VkDebugUtilsObjectNameInfoEXT &>(duoni)); |
1520 | 1530 | } |
1521 | 1531 |
|
| 1532 | + if (device->pipeline_executable_properties_support) { |
| 1533 | + vk::PipelineExecutableInfoKHR executableInfo; |
| 1534 | + executableInfo.pipeline = pipeline->pipeline; |
| 1535 | + |
| 1536 | + auto statistics = device->device.getPipelineExecutableStatisticsKHR(executableInfo); |
| 1537 | + for (auto & s : statistics) { |
| 1538 | + // "Register Count" is reported by NVIDIA drivers. |
| 1539 | + if (strcmp(s.name, "Register Count") == 0) { |
| 1540 | + VK_LOG_DEBUG(pipeline->name << " " << s.name << ": " << s.value.u64 << " registers"); |
| 1541 | + pipeline->register_count = (uint32_t)s.value.u64; |
| 1542 | + } |
| 1543 | + } |
| 1544 | + } |
| 1545 | + |
1522 | 1546 | { |
1523 | 1547 | std::lock_guard<std::recursive_mutex> guard(device->mutex); |
1524 | 1548 | device->all_pipelines.push_back(pipeline); |
@@ -3509,6 +3533,7 @@ static vk_device ggml_vk_get_device(size_t idx) { |
3509 | 3533 | bool amd_shader_core_properties2 = false; |
3510 | 3534 | bool pipeline_robustness = false; |
3511 | 3535 | bool coopmat2_support = false; |
| 3536 | + bool pipeline_executable_properties_support = false; |
3512 | 3537 | device->coopmat_support = false; |
3513 | 3538 | device->integer_dot_product = false; |
3514 | 3539 | bool bfloat16_support = false; |
@@ -3551,6 +3576,8 @@ static vk_device ggml_vk_get_device(size_t idx) { |
3551 | 3576 | !getenv("GGML_VK_DISABLE_BFLOAT16")) { |
3552 | 3577 | bfloat16_support = true; |
3553 | 3578 | #endif |
| 3579 | + } else if (strcmp("VK_KHR_pipeline_executable_properties", properties.extensionName) == 0) { |
| 3580 | + pipeline_executable_properties_support = true; |
3554 | 3581 | } |
3555 | 3582 | } |
3556 | 3583 |
|
@@ -3771,8 +3798,18 @@ static vk_device ggml_vk_get_device(size_t idx) { |
3771 | 3798 | device_extensions.push_back("VK_KHR_shader_integer_dot_product"); |
3772 | 3799 | } |
3773 | 3800 |
|
| 3801 | + VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR pep_features {}; |
| 3802 | + pep_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR; |
| 3803 | + if (pipeline_executable_properties_support) { |
| 3804 | + last_struct->pNext = (VkBaseOutStructure *)&pep_features; |
| 3805 | + last_struct = (VkBaseOutStructure *)&pep_features; |
| 3806 | + device_extensions.push_back("VK_KHR_pipeline_executable_properties"); |
| 3807 | + } |
| 3808 | + |
3774 | 3809 | vkGetPhysicalDeviceFeatures2(device->physical_device, &device_features2); |
3775 | 3810 |
|
| 3811 | + device->pipeline_executable_properties_support = pipeline_executable_properties_support; |
| 3812 | + |
3776 | 3813 | device->fp16 = device->fp16 && vk12_features.shaderFloat16; |
3777 | 3814 |
|
3778 | 3815 | #if defined(VK_KHR_shader_bfloat16) |
@@ -4288,6 +4325,9 @@ static void ggml_vk_instance_init() { |
4288 | 4325 | } |
4289 | 4326 | VK_LOG_DEBUG("ggml_vk_instance_init()"); |
4290 | 4327 |
|
| 4328 | + // See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers- |
| 4329 | + VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); |
| 4330 | + |
4291 | 4331 | uint32_t api_version = vk::enumerateInstanceVersion(); |
4292 | 4332 |
|
4293 | 4333 | if (api_version < VK_API_VERSION_1_2) { |
@@ -4355,6 +4395,9 @@ static void ggml_vk_instance_init() { |
4355 | 4395 |
|
4356 | 4396 | vk_perf_logger_enabled = getenv("GGML_VK_PERF_LOGGER") != nullptr; |
4357 | 4397 |
|
| 4398 | + // See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers- |
| 4399 | + VULKAN_HPP_DEFAULT_DISPATCHER.init(vk_instance.instance); |
| 4400 | + |
4358 | 4401 | std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices(); |
4359 | 4402 |
|
4360 | 4403 | // Emulate behavior of CUDA_VISIBLE_DEVICES for Vulkan |
|
0 commit comments