@@ -566,6 +566,7 @@ struct vk_device_struct {
566
566
567
567
bool disable_fusion;
568
568
bool disable_host_visible_vidmem;
569
+ bool allow_sysmem_fallback;
569
570
570
571
#ifdef GGML_VULKAN_MEMORY_DEBUG
571
572
std::unique_ptr<vk_memory_logger> memory_logger;
@@ -1808,8 +1809,8 @@ static uint32_t find_properties(const vk::PhysicalDeviceMemoryProperties* mem_pr
1808
1809
return UINT32_MAX;
1809
1810
}
1810
1811
1811
- static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, vk::MemoryPropertyFlags req_flags, vk::MemoryPropertyFlags fallback_flags = vk::MemoryPropertyFlags(0) ) {
1812
- VK_LOG_DEBUG("ggml_vk_create_buffer(" << device->name << ", " << size << ", " << to_string(req_flags) << ", " << to_string(fallback_flags ) << ")");
1812
+ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std::initializer_list< vk::MemoryPropertyFlags> & req_flags_list ) {
1813
+ VK_LOG_DEBUG("ggml_vk_create_buffer(" << device->name << ", " << size << ", " << to_string(req_flags_list.begin()[0]) << ", " << to_string(req_flags_list.begin()[req_flags_list.size()-1] ) << ")");
1813
1814
if (size > device->max_memory_allocation_size) {
1814
1815
throw vk::OutOfDeviceMemoryError("Requested buffer size exceeds device memory allocation limit");
1815
1816
}
@@ -1836,42 +1837,27 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, vk::Memor
1836
1837
1837
1838
vk::PhysicalDeviceMemoryProperties mem_props = device->physical_device.getMemoryProperties();
1838
1839
1839
- uint32_t memory_type_index = UINT32_MAX;
1840
+ for (auto &req_flags : req_flags_list) {
1841
+ uint32_t memory_type_index = find_properties(&mem_props, &mem_req, req_flags);
1840
1842
1841
- memory_type_index = find_properties(&mem_props, &mem_req, req_flags);
1842
- buf->memory_property_flags = req_flags;
1843
+ if (memory_type_index == UINT32_MAX) {
1844
+ continue;
1845
+ }
1846
+ buf->memory_property_flags = req_flags;
1843
1847
1844
- if (memory_type_index == UINT32_MAX && fallback_flags) {
1845
- memory_type_index = find_properties(&mem_props, &mem_req, fallback_flags);
1846
- buf->memory_property_flags = fallback_flags;
1848
+ try {
1849
+ buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index });
1850
+ break;
1851
+ } catch (const vk::SystemError& e) {
1852
+ // loop and retry
1853
+ }
1847
1854
}
1848
1855
1849
- if (memory_type_index == UINT32_MAX ) {
1856
+ if (buf->device_memory == VK_NULL_HANDLE ) {
1850
1857
device->device.destroyBuffer(buf->buffer);
1851
1858
throw vk::OutOfDeviceMemoryError("No suitable memory type found");
1852
1859
}
1853
1860
1854
- try {
1855
- buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index });
1856
- } catch (const vk::SystemError& e) {
1857
- if (buf->memory_property_flags != fallback_flags) {
1858
- // Try again with fallback flags
1859
- memory_type_index = find_properties(&mem_props, &mem_req, fallback_flags);
1860
- buf->memory_property_flags = fallback_flags;
1861
-
1862
- try {
1863
- buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index });
1864
- }
1865
- catch (const vk::SystemError& e) {
1866
- device->device.destroyBuffer(buf->buffer);
1867
- throw e;
1868
- }
1869
- } else {
1870
- // Out of Host/Device memory, clean up buffer
1871
- device->device.destroyBuffer(buf->buffer);
1872
- throw e;
1873
- }
1874
- }
1875
1861
buf->ptr = nullptr;
1876
1862
1877
1863
if (buf->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) {
@@ -1892,7 +1878,7 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, vk::Memor
1892
1878
1893
1879
static vk_buffer ggml_vk_create_buffer_check(vk_device& device, size_t size, vk::MemoryPropertyFlags req_flags, vk::MemoryPropertyFlags fallback_flags = vk::MemoryPropertyFlags(0)) {
1894
1880
try {
1895
- return ggml_vk_create_buffer(device, size, req_flags, fallback_flags);
1881
+ return ggml_vk_create_buffer(device, size, { req_flags, fallback_flags} );
1896
1882
} catch (const vk::SystemError& e) {
1897
1883
std::cerr << "ggml_vulkan: Memory allocation of size " << size << " failed." << std::endl;
1898
1884
std::cerr << "ggml_vulkan: " << e.what() << std::endl;
@@ -1904,15 +1890,29 @@ static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
1904
1890
vk_buffer buf;
1905
1891
try {
1906
1892
if (device->prefer_host_memory) {
1907
- buf = ggml_vk_create_buffer(device, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, vk::MemoryPropertyFlagBits::eDeviceLocal);
1893
+ buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
1894
+ vk::MemoryPropertyFlagBits::eDeviceLocal});
1908
1895
} else if (device->uma) {
1909
1896
// Fall back to host memory type
1910
- buf = ggml_vk_create_buffer(device, size, vk::MemoryPropertyFlagBits::eDeviceLocal, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
1897
+ buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal,
1898
+ vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent});
1911
1899
} else if (device->disable_host_visible_vidmem) {
1912
- buf = ggml_vk_create_buffer(device, size, vk::MemoryPropertyFlagBits::eDeviceLocal, vk::MemoryPropertyFlagBits::eDeviceLocal);
1900
+ if (device->allow_sysmem_fallback) {
1901
+ buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal,
1902
+ vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent});
1903
+ } else {
1904
+ buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal});
1905
+ }
1913
1906
} else {
1914
1907
// use rebar if available, otherwise fallback to device only visible memory
1915
- buf = ggml_vk_create_buffer(device, size, vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, vk::MemoryPropertyFlagBits::eDeviceLocal);
1908
+ if (device->allow_sysmem_fallback) {
1909
+ buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
1910
+ vk::MemoryPropertyFlagBits::eDeviceLocal,
1911
+ vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent});
1912
+ } else {
1913
+ buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
1914
+ vk::MemoryPropertyFlagBits::eDeviceLocal});
1915
+ }
1916
1916
}
1917
1917
} catch (const vk::SystemError& e) {
1918
1918
std::cerr << "ggml_vulkan: Device memory allocation of size " << size << " failed." << std::endl;
@@ -3437,6 +3437,9 @@ static vk_device ggml_vk_get_device(size_t idx) {
3437
3437
const char* GGML_VK_DISABLE_HOST_VISIBLE_VIDMEM = getenv("GGML_VK_DISABLE_HOST_VISIBLE_VIDMEM");
3438
3438
device->disable_host_visible_vidmem = GGML_VK_DISABLE_HOST_VISIBLE_VIDMEM != nullptr;
3439
3439
3440
+ const char* GGML_VK_ALLOW_SYSMEM_FALLBACK = getenv("GGML_VK_ALLOW_SYSMEM_FALLBACK");
3441
+ device->allow_sysmem_fallback = GGML_VK_ALLOW_SYSMEM_FALLBACK != nullptr;
3442
+
3440
3443
bool fp16_storage = false;
3441
3444
bool fp16_compute = false;
3442
3445
bool maintenance4_support = false;
@@ -4774,8 +4777,8 @@ static vk_buffer ggml_vk_create_buffer_temp(ggml_backend_vk_context * ctx, size_
4774
4777
static void * ggml_vk_host_malloc(vk_device& device, size_t size) {
4775
4778
VK_LOG_MEMORY("ggml_vk_host_malloc(" << size << ")");
4776
4779
vk_buffer buf = ggml_vk_create_buffer(device, size,
4777
- vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached,
4778
- vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
4780
+ { vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached,
4781
+ vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent} );
4779
4782
4780
4783
if(!(buf->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible)) {
4781
4784
fprintf(stderr, "WARNING: failed to allocate %.2f MB of pinned memory\n",
@@ -9182,7 +9185,7 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
9182
9185
if (ctx->prealloc_split_k != nullptr) {
9183
9186
ggml_vk_destroy_buffer(ctx->prealloc_split_k);
9184
9187
}
9185
- ctx->prealloc_split_k = ggml_vk_create_buffer_check(ctx->device, sizeof(float) * d_ne * split_k, vk::MemoryPropertyFlagBits::eDeviceLocal);
9188
+ ctx->prealloc_split_k = ggml_vk_create_buffer_check(ctx->device, sizeof(float) * d_ne * split_k, { vk::MemoryPropertyFlagBits::eDeviceLocal} );
9186
9189
}
9187
9190
}
9188
9191
@@ -9192,9 +9195,9 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
9192
9195
9193
9196
ggml_pipeline_allocate_descriptor_sets(ctx);
9194
9197
9195
- vk_buffer d_X = ggml_vk_create_buffer_check(ctx->device, sizeof(X_TYPE) * x_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
9196
- vk_buffer d_Y = ggml_vk_create_buffer_check(ctx->device, sizeof(Y_TYPE) * y_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
9197
- vk_buffer d_D = ggml_vk_create_buffer_check(ctx->device, sizeof(float) * d_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
9198
+ vk_buffer d_X = ggml_vk_create_buffer_check(ctx->device, sizeof(X_TYPE) * x_ne, { vk::MemoryPropertyFlagBits::eDeviceLocal} );
9199
+ vk_buffer d_Y = ggml_vk_create_buffer_check(ctx->device, sizeof(Y_TYPE) * y_ne, { vk::MemoryPropertyFlagBits::eDeviceLocal} );
9200
+ vk_buffer d_D = ggml_vk_create_buffer_check(ctx->device, sizeof(float) * d_ne, { vk::MemoryPropertyFlagBits::eDeviceLocal} );
9198
9201
9199
9202
X_TYPE* x = (X_TYPE *) malloc(sizeof(X_TYPE) * x_ne);
9200
9203
Y_TYPE* y = (Y_TYPE *) malloc(sizeof(Y_TYPE) * y_ne);
@@ -9420,8 +9423,8 @@ static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_
9420
9423
const size_t qx_sz = ne * ggml_type_size(quant)/ggml_blck_size(quant);
9421
9424
float * x = (float *) malloc(x_sz);
9422
9425
void * qx = malloc(qx_sz);
9423
- vk_buffer qx_buf = ggml_vk_create_buffer_check(ctx->device, qx_sz, vk::MemoryPropertyFlagBits::eDeviceLocal);
9424
- vk_buffer x_buf = ggml_vk_create_buffer_check(ctx->device, x_sz_f16, vk::MemoryPropertyFlagBits::eDeviceLocal);
9426
+ vk_buffer qx_buf = ggml_vk_create_buffer_check(ctx->device, qx_sz, { vk::MemoryPropertyFlagBits::eDeviceLocal} );
9427
+ vk_buffer x_buf = ggml_vk_create_buffer_check(ctx->device, x_sz_f16, { vk::MemoryPropertyFlagBits::eDeviceLocal} );
9425
9428
float * x_ref = (float *) malloc(x_sz);
9426
9429
ggml_fp16_t * x_chk = (ggml_fp16_t *) malloc(x_sz_f16);
9427
9430
@@ -9526,8 +9529,8 @@ static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_
9526
9529
// float * x = (float *) malloc(x_sz);
9527
9530
// block_q8_1 * qx = (block_q8_1 *)malloc(qx_sz);
9528
9531
// block_q8_1 * qx_res = (block_q8_1 *)malloc(qx_sz);
9529
- // vk_buffer x_buf = ggml_vk_create_buffer_check(ctx->device, x_sz, vk::MemoryPropertyFlagBits::eDeviceLocal);
9530
- // vk_buffer qx_buf = ggml_vk_create_buffer_check(ctx->device, qx_sz, vk::MemoryPropertyFlagBits::eDeviceLocal);
9532
+ // vk_buffer x_buf = ggml_vk_create_buffer_check(ctx->device, x_sz, { vk::MemoryPropertyFlagBits::eDeviceLocal} );
9533
+ // vk_buffer qx_buf = ggml_vk_create_buffer_check(ctx->device, qx_sz, { vk::MemoryPropertyFlagBits::eDeviceLocal} );
9531
9534
//
9532
9535
// for (size_t i = 0; i < ne; i++) {
9533
9536
// x[i] = rand() / (float)RAND_MAX;
@@ -9674,10 +9677,10 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
9674
9677
float * x = (float *) malloc(x_sz);
9675
9678
float * y = (float *) malloc(y_sz);
9676
9679
void * qx = malloc(qx_sz);
9677
- vk_buffer qx_buf = ggml_vk_create_buffer_check(ctx->device, qx_sz, vk::MemoryPropertyFlagBits::eDeviceLocal);
9678
- vk_buffer y_buf = ggml_vk_create_buffer_check(ctx->device, y_sz, vk::MemoryPropertyFlagBits::eDeviceLocal);
9679
- vk_buffer qy_buf = ggml_vk_create_buffer_check(ctx->device, qy_sz, vk::MemoryPropertyFlagBits::eDeviceLocal);
9680
- vk_buffer d_buf = ggml_vk_create_buffer_check(ctx->device, d_sz, vk::MemoryPropertyFlagBits::eDeviceLocal);
9680
+ vk_buffer qx_buf = ggml_vk_create_buffer_check(ctx->device, qx_sz, { vk::MemoryPropertyFlagBits::eDeviceLocal} );
9681
+ vk_buffer y_buf = ggml_vk_create_buffer_check(ctx->device, y_sz, { vk::MemoryPropertyFlagBits::eDeviceLocal} );
9682
+ vk_buffer qy_buf = ggml_vk_create_buffer_check(ctx->device, qy_sz, { vk::MemoryPropertyFlagBits::eDeviceLocal} );
9683
+ vk_buffer d_buf = ggml_vk_create_buffer_check(ctx->device, d_sz, { vk::MemoryPropertyFlagBits::eDeviceLocal} );
9681
9684
float * d = (float *) malloc(d_sz);
9682
9685
float * d_chk = (float *) malloc(d_sz);
9683
9686
@@ -9704,7 +9707,7 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
9704
9707
if (ctx->prealloc_split_k != nullptr) {
9705
9708
ggml_vk_destroy_buffer(ctx->prealloc_split_k);
9706
9709
}
9707
- ctx->prealloc_split_k = ggml_vk_create_buffer_check(ctx->device, sizeof(float) * d_ne * split_k, vk::MemoryPropertyFlagBits::eDeviceLocal);
9710
+ ctx->prealloc_split_k = ggml_vk_create_buffer_check(ctx->device, sizeof(float) * d_ne * split_k, { vk::MemoryPropertyFlagBits::eDeviceLocal} );
9708
9711
}
9709
9712
}
9710
9713
if (mmq) {
0 commit comments