@@ -1079,7 +1079,8 @@ static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
10791079 // Fall back to host memory type
10801080 buf = ggml_vk_create_buffer (device, size, vk::MemoryPropertyFlagBits::eDeviceLocal, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
10811081 } else {
1082- buf = ggml_vk_create_buffer (device, size, vk::MemoryPropertyFlagBits::eDeviceLocal);
1082+ // use rebar if available, otherwise fallback to device only visible memory
1083+ buf = ggml_vk_create_buffer (device, size, vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, vk::MemoryPropertyFlagBits::eDeviceLocal);
10831084 }
10841085 } catch (const vk::SystemError& e) {
10851086 std::cerr << " ggml_vulkan: Device memory allocation of size " << size << " failed." << std::endl;
@@ -2806,7 +2807,11 @@ static void ggml_vk_buffer_read_async(vk_context subctx, vk_buffer& src, size_t
28062807
28072808static void ggml_vk_buffer_read (vk_buffer& src, size_t offset, void * dst, size_t size) {
28082809 VK_LOG_DEBUG (" ggml_vk_buffer_read(" << src->buffer << " , " << offset << " , " << size << " )" );
2809- if (src->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) {
2810+
2811+ // If the device is not an UMA device the memory is host-accessible through rebar. While writing
2812+ // through PCIe is sufficient fast reading back data from PCIe is slower than going through
2813+ // the HW device to host copy path.
2814+ if (src->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible && src->device ->uma ) {
28102815 GGML_ASSERT (src->memory_property_flags & vk::MemoryPropertyFlagBits::eHostCoherent);
28112816
28122817 memcpy (dst, (uint8_t *) src->ptr + offset, size);
0 commit comments