@@ -1370,6 +1370,7 @@ struct vk_instance_t {
13701370 PFN_vkCmdInsertDebugUtilsLabelEXT pfn_vkCmdInsertDebugUtilsLabelEXT = {};
13711371
13721372 std::vector<size_t> device_indices;
1373+ std::vector<bool> device_supports_membudget;
13731374 vk_device devices[GGML_VK_MAX_DEVICES];
13741375};
13751376
@@ -4340,15 +4341,16 @@ static void ggml_vk_instance_init() {
43404341 vk_instance.pfn_vkCmdBeginDebugUtilsLabelEXT = (PFN_vkCmdBeginDebugUtilsLabelEXT) vkGetInstanceProcAddr(vk_instance.instance, "vkCmdBeginDebugUtilsLabelEXT");
43414342 vk_instance.pfn_vkCmdEndDebugUtilsLabelEXT = (PFN_vkCmdEndDebugUtilsLabelEXT) vkGetInstanceProcAddr(vk_instance.instance, "vkCmdEndDebugUtilsLabelEXT");
43424343 vk_instance.pfn_vkCmdInsertDebugUtilsLabelEXT = (PFN_vkCmdInsertDebugUtilsLabelEXT) vkGetInstanceProcAddr(vk_instance.instance, "vkCmdInsertDebugUtilsLabelEXT");
4343-
43444344 }
43454345
43464346 vk_perf_logger_enabled = getenv("GGML_VK_PERF_LOGGER") != nullptr;
43474347
4348+ std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
4349+
43484350 // Emulate behavior of CUDA_VISIBLE_DEVICES for Vulkan
43494351 char * devices_env = getenv("GGML_VK_VISIBLE_DEVICES");
43504352 if (devices_env != nullptr) {
4351- size_t num_available_devices = vk_instance.instance.enumeratePhysicalDevices() .size();
4353+ size_t num_available_devices = devices .size();
43524354
43534355 std::string devices(devices_env);
43544356 std::replace(devices.begin(), devices.end(), ',', ' ');
@@ -4363,8 +4365,6 @@ static void ggml_vk_instance_init() {
43634365 vk_instance.device_indices.push_back(tmp);
43644366 }
43654367 } else {
4366- std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
4367-
43684368 // If no vulkan devices are found, return early
43694369 if (devices.empty()) {
43704370 GGML_LOG_INFO("ggml_vulkan: No devices found.\n");
@@ -4469,6 +4469,19 @@ static void ggml_vk_instance_init() {
44694469 GGML_LOG_DEBUG("ggml_vulkan: Found %zu Vulkan devices:\n", vk_instance.device_indices.size());
44704470
44714471 for (size_t i = 0; i < vk_instance.device_indices.size(); i++) {
4472+ vk::PhysicalDevice vkdev = devices[vk_instance.device_indices[i]];
4473+ std::vector<vk::ExtensionProperties> extensionprops = vkdev.enumerateDeviceExtensionProperties();
4474+
4475+ bool membudget_supported = false;
4476+ for (const auto & ext : extensionprops) {
4477+ if (strcmp(VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, ext.extensionName) == 0) {
4478+ membudget_supported = true;
4479+ break;
4480+ }
4481+ }
4482+
4483+ vk_instance.device_supports_membudget.push_back(membudget_supported);
4484+
44724485 ggml_vk_print_gpu_info(i);
44734486 }
44744487}
@@ -11654,15 +11667,29 @@ void ggml_backend_vk_get_device_description(int device, char * description, size
1165411667
1165511668void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
1165611669 GGML_ASSERT(device < (int) vk_instance.device_indices.size());
11670+ GGML_ASSERT(device < (int) vk_instance.device_supports_membudget.size());
1165711671
1165811672 vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];
11673+ vk::PhysicalDeviceMemoryBudgetPropertiesEXT budgetprops;
11674+ vk::PhysicalDeviceMemoryProperties2 memprops = {};
11675+ bool membudget_supported = vk_instance.device_supports_membudget[device];
11676+
11677+ if (membudget_supported) {
11678+ memprops.pNext = &budgetprops;
11679+ }
11680+ vkdev.getMemoryProperties2(&memprops);
1165911681
11660- vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties();
11682+ for (uint32_t i = 0; i < memprops.memoryProperties.memoryHeapCount; ++i) {
11683+ const vk::MemoryHeap & heap = memprops.memoryProperties.memoryHeaps[i];
1166111684
11662- for (const vk::MemoryHeap& heap : memprops.memoryHeaps) {
1166311685 if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
1166411686 *total = heap.size;
11665- *free = heap.size;
11687+
11688+ if (membudget_supported && i < budgetprops.heapUsage.size()) {
11689+ *free = budgetprops.heapBudget[i] - budgetprops.heapUsage[i];
11690+ } else {
11691+ *free = heap.size;
11692+ }
1166611693 break;
1166711694 }
1166811695 }
0 commit comments