@@ -1370,6 +1370,7 @@ struct vk_instance_t {
1370
1370
PFN_vkCmdInsertDebugUtilsLabelEXT pfn_vkCmdInsertDebugUtilsLabelEXT = {};
1371
1371
1372
1372
std::vector<size_t> device_indices;
1373
+ std::vector<bool> device_supports_membudget;
1373
1374
vk_device devices[GGML_VK_MAX_DEVICES];
1374
1375
};
1375
1376
@@ -4340,15 +4341,16 @@ static void ggml_vk_instance_init() {
4340
4341
vk_instance.pfn_vkCmdBeginDebugUtilsLabelEXT = (PFN_vkCmdBeginDebugUtilsLabelEXT) vkGetInstanceProcAddr(vk_instance.instance, "vkCmdBeginDebugUtilsLabelEXT");
4341
4342
vk_instance.pfn_vkCmdEndDebugUtilsLabelEXT = (PFN_vkCmdEndDebugUtilsLabelEXT) vkGetInstanceProcAddr(vk_instance.instance, "vkCmdEndDebugUtilsLabelEXT");
4342
4343
vk_instance.pfn_vkCmdInsertDebugUtilsLabelEXT = (PFN_vkCmdInsertDebugUtilsLabelEXT) vkGetInstanceProcAddr(vk_instance.instance, "vkCmdInsertDebugUtilsLabelEXT");
4343
-
4344
4344
}
4345
4345
4346
4346
vk_perf_logger_enabled = getenv("GGML_VK_PERF_LOGGER") != nullptr;
4347
4347
4348
+ std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
4349
+
4348
4350
// Emulate behavior of CUDA_VISIBLE_DEVICES for Vulkan
4349
4351
char * devices_env = getenv("GGML_VK_VISIBLE_DEVICES");
4350
4352
if (devices_env != nullptr) {
4351
- size_t num_available_devices = vk_instance.instance.enumeratePhysicalDevices() .size();
4353
+ size_t num_available_devices = devices .size();
4352
4354
4353
4355
std::string devices(devices_env);
4354
4356
std::replace(devices.begin(), devices.end(), ',', ' ');
@@ -4363,8 +4365,6 @@ static void ggml_vk_instance_init() {
4363
4365
vk_instance.device_indices.push_back(tmp);
4364
4366
}
4365
4367
} else {
4366
- std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
4367
-
4368
4368
// If no vulkan devices are found, return early
4369
4369
if (devices.empty()) {
4370
4370
GGML_LOG_INFO("ggml_vulkan: No devices found.\n");
@@ -4469,6 +4469,19 @@ static void ggml_vk_instance_init() {
4469
4469
GGML_LOG_DEBUG("ggml_vulkan: Found %zu Vulkan devices:\n", vk_instance.device_indices.size());
4470
4470
4471
4471
for (size_t i = 0; i < vk_instance.device_indices.size(); i++) {
4472
+ vk::PhysicalDevice vkdev = devices[vk_instance.device_indices[i]];
4473
+ std::vector<vk::ExtensionProperties> extensionprops = vkdev.enumerateDeviceExtensionProperties();
4474
+
4475
+ bool membudget_supported = false;
4476
+ for (const auto & ext : extensionprops) {
4477
+ if (strcmp(VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, ext.extensionName) == 0) {
4478
+ membudget_supported = true;
4479
+ break;
4480
+ }
4481
+ }
4482
+
4483
+ vk_instance.device_supports_membudget.push_back(membudget_supported);
4484
+
4472
4485
ggml_vk_print_gpu_info(i);
4473
4486
}
4474
4487
}
@@ -11654,15 +11667,29 @@ void ggml_backend_vk_get_device_description(int device, char * description, size
11654
11667
11655
11668
void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
11656
11669
GGML_ASSERT(device < (int) vk_instance.device_indices.size());
11670
+ GGML_ASSERT(device < (int) vk_instance.device_supports_membudget.size());
11657
11671
11658
11672
vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];
11673
+ vk::PhysicalDeviceMemoryBudgetPropertiesEXT budgetprops;
11674
+ vk::PhysicalDeviceMemoryProperties2 memprops = {};
11675
+ bool membudget_supported = vk_instance.device_supports_membudget[device];
11676
+
11677
+ if (membudget_supported) {
11678
+ memprops.pNext = &budgetprops;
11679
+ }
11680
+ vkdev.getMemoryProperties2(&memprops);
11659
11681
11660
- vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties();
11682
+ for (uint32_t i = 0; i < memprops.memoryProperties.memoryHeapCount; ++i) {
11683
+ const vk::MemoryHeap & heap = memprops.memoryProperties.memoryHeaps[i];
11661
11684
11662
- for (const vk::MemoryHeap& heap : memprops.memoryHeaps) {
11663
11685
if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
11664
11686
*total = heap.size;
11665
- *free = heap.size;
11687
+
11688
+ if (membudget_supported && i < budgetprops.heapUsage.size()) {
11689
+ *free = budgetprops.heapBudget[i] - budgetprops.heapUsage[i];
11690
+ } else {
11691
+ *free = heap.size;
11692
+ }
11666
11693
break;
11667
11694
}
11668
11695
}
0 commit comments