|
5 | 5 | #include "ggml-cpu.h"
|
6 | 6 | #endif
|
7 | 7 |
|
| 8 | +// See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers- |
| 9 | +#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 |
| 10 | + |
8 | 11 | #include <vulkan/vulkan.hpp>
|
9 | 12 |
|
| 13 | +// See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers- |
| 14 | +VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE |
| 15 | + |
10 | 16 | #include <algorithm>
|
11 | 17 | #include <cmath>
|
12 | 18 | #include <iomanip>
|
@@ -121,6 +127,8 @@ struct vk_pipeline_struct {
|
121 | 127 | bool needed {};
|
122 | 128 | // set to true when the shader has been compiled
|
123 | 129 | bool compiled {};
|
| 130 | + // number of registers used, extracted from pipeline executable properties |
| 131 | + uint32_t register_count {}; |
124 | 132 | };
|
125 | 133 |
|
126 | 134 | typedef std::shared_ptr<vk_pipeline_struct> vk_pipeline;
|
@@ -429,6 +437,8 @@ struct vk_device_struct {
|
429 | 437 |
|
430 | 438 | bool coopmat2;
|
431 | 439 |
|
| 440 | + bool pipeline_executable_properties_support {}; |
| 441 | + |
432 | 442 | size_t idx;
|
433 | 443 |
|
434 | 444 | bool mul_mat_l[GGML_TYPE_COUNT];
|
@@ -1603,6 +1613,20 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin
|
1603 | 1613 | vk_instance.pfn_vkSetDebugUtilsObjectNameEXT(device->device, &static_cast<VkDebugUtilsObjectNameInfoEXT &>(duoni));
|
1604 | 1614 | }
|
1605 | 1615 |
|
| 1616 | + if (device->pipeline_executable_properties_support) { |
| 1617 | + vk::PipelineExecutableInfoKHR executableInfo; |
| 1618 | + executableInfo.pipeline = pipeline->pipeline; |
| 1619 | + |
| 1620 | + auto statistics = device->device.getPipelineExecutableStatisticsKHR(executableInfo); |
| 1621 | + for (auto & s : statistics) { |
| 1622 | + // "Register Count" is reported by NVIDIA drivers. |
| 1623 | + if (strcmp(s.name, "Register Count") == 0) { |
| 1624 | + VK_LOG_DEBUG(pipeline->name << " " << s.name << ": " << s.value.u64 << " registers"); |
| 1625 | + pipeline->register_count = (uint32_t)s.value.u64; |
| 1626 | + } |
| 1627 | + } |
| 1628 | + } |
| 1629 | + |
1606 | 1630 | {
|
1607 | 1631 | std::lock_guard<std::recursive_mutex> guard(device->mutex);
|
1608 | 1632 | device->all_pipelines.push_back(pipeline);
|
@@ -3610,6 +3634,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
3610 | 3634 | bool amd_shader_core_properties2 = false;
|
3611 | 3635 | bool pipeline_robustness = false;
|
3612 | 3636 | bool coopmat2_support = false;
|
| 3637 | + bool pipeline_executable_properties_support = false; |
3613 | 3638 | device->coopmat_support = false;
|
3614 | 3639 | device->integer_dot_product = false;
|
3615 | 3640 | bool bfloat16_support = false;
|
@@ -3652,6 +3677,8 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
3652 | 3677 | !getenv("GGML_VK_DISABLE_BFLOAT16")) {
|
3653 | 3678 | bfloat16_support = true;
|
3654 | 3679 | #endif
|
| 3680 | + } else if (strcmp("VK_KHR_pipeline_executable_properties", properties.extensionName) == 0) { |
| 3681 | + pipeline_executable_properties_support = true; |
3655 | 3682 | }
|
3656 | 3683 | }
|
3657 | 3684 |
|
@@ -3878,8 +3905,18 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
3878 | 3905 | device_extensions.push_back("VK_KHR_shader_integer_dot_product");
|
3879 | 3906 | }
|
3880 | 3907 |
|
| 3908 | + VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR pep_features {}; |
| 3909 | + pep_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR; |
| 3910 | + if (pipeline_executable_properties_support) { |
| 3911 | + last_struct->pNext = (VkBaseOutStructure *)&pep_features; |
| 3912 | + last_struct = (VkBaseOutStructure *)&pep_features; |
| 3913 | + device_extensions.push_back("VK_KHR_pipeline_executable_properties"); |
| 3914 | + } |
| 3915 | + |
3881 | 3916 | vkGetPhysicalDeviceFeatures2(device->physical_device, &device_features2);
|
3882 | 3917 |
|
| 3918 | + device->pipeline_executable_properties_support = pipeline_executable_properties_support; |
| 3919 | + |
3883 | 3920 | device->fp16 = device->fp16 && vk12_features.shaderFloat16;
|
3884 | 3921 |
|
3885 | 3922 | #if defined(VK_KHR_shader_bfloat16)
|
@@ -4395,6 +4432,9 @@ static void ggml_vk_instance_init() {
|
4395 | 4432 | }
|
4396 | 4433 | VK_LOG_DEBUG("ggml_vk_instance_init()");
|
4397 | 4434 |
|
| 4435 | + // See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers- |
| 4436 | + VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); |
| 4437 | + |
4398 | 4438 | uint32_t api_version = vk::enumerateInstanceVersion();
|
4399 | 4439 |
|
4400 | 4440 | if (api_version < VK_API_VERSION_1_2) {
|
@@ -4462,6 +4502,9 @@ static void ggml_vk_instance_init() {
|
4462 | 4502 |
|
4463 | 4503 | vk_perf_logger_enabled = getenv("GGML_VK_PERF_LOGGER") != nullptr;
|
4464 | 4504 |
|
| 4505 | + // See https://github.com/KhronosGroup/Vulkan-Hpp?tab=readme-ov-file#extensions--per-device-function-pointers- |
| 4506 | + VULKAN_HPP_DEFAULT_DISPATCHER.init(vk_instance.instance); |
| 4507 | + |
4465 | 4508 | std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
|
4466 | 4509 |
|
4467 | 4510 | // Emulate behavior of CUDA_VISIBLE_DEVICES for Vulkan
|
|
0 commit comments