@@ -96,8 +96,6 @@ static bool is_pow2(uint32_t x) { return x > 1 && (x & (x-1)) == 0; }
9696
9797#define GGML_VK_MAX_NODES 8192
9898
99- #define MAX_VK_BUFFERS 256
100- 
10199#define VK_CHECK(err, msg)                                          \
102100    do {                                                            \
103101        vk::Result err_ = (err);                                    \
@@ -1311,7 +1309,6 @@ struct ggml_vk_garbage_collector {
13111309    std::vector<vk_semaphore> tl_semaphores;
13121310    std::vector<vk_semaphore> semaphores;
13131311    std::vector<vk::Event> events;
1314-     std::vector<vk_buffer> temp_buffers;
13151312    std::vector<vk_context> contexts;
13161313};
13171314
@@ -1482,8 +1479,6 @@ struct ggml_backend_vk_context {
14821479    // and set to true after the buffer contents are consumed.
14831480    bool prealloc_x_need_sync, prealloc_y_need_sync, prealloc_split_k_need_sync;
14841481
1485-     vk_buffer buffer_pool[MAX_VK_BUFFERS];
1486- 
14871482    vk_context_ref compute_ctx;
14881483    vk_context_ref transfer_ctx;
14891484
@@ -5149,71 +5144,6 @@ static vk_pipeline ggml_vk_get_dequantize_mul_mat_vec_id(ggml_backend_vk_context
51495144    return ctx->device->pipeline_dequant_mul_mat_vec_id_f32[a_type];
51505145}
51515146
5152- static vk_buffer ggml_vk_pool_malloc(ggml_backend_vk_context * ctx, size_t size) {
5153-     VK_LOG_DEBUG("ggml_vk_pool_malloc(" << size << ")");
5154-     VK_LOG_MEMORY("ggml_vk_pool_malloc");
5155- 
5156-     int best_i = -1;
5157-     size_t best_size = std::numeric_limits<size_t>::max(); //smallest unused buffer that fits our needs
5158-     int worst_i = -1;
5159-     size_t worst_size = 0; //largest unused buffer seen so far
5160-     for (int i = 0; i < MAX_VK_BUFFERS; ++i) {
5161-         vk_buffer &b = ctx->buffer_pool[i];
5162-         if (b != nullptr && b->size >= size && b->size < best_size) {
5163-             best_i = i;
5164-             best_size = b->size;
5165-         }
5166-         if (b != nullptr && b->size > worst_size) {
5167-             worst_i = i;
5168-             worst_size = b->size;
5169-         }
5170-     }
5171-     if(best_i != -1) {
5172-         //found the smallest buffer that fits our needs
5173-         vk_buffer b = ctx->buffer_pool[best_i];
5174-         ctx->buffer_pool[best_i].reset();
5175-         return b;
5176-     }
5177-     if(worst_i != -1) {
5178-         //no buffer that fits our needs, resize largest one to save memory
5179-         vk_buffer& b = ctx->buffer_pool[worst_i];
5180-         ggml_vk_destroy_buffer(b);
5181-     }
5182- 
5183-     return ggml_vk_create_buffer_device(ctx->device, size);
5184- }
5185- 
5186- static void ggml_vk_pool_free(ggml_backend_vk_context * ctx, vk_buffer& buffer) {
5187-     VK_LOG_DEBUG("ggml_vk_pool_free(" << buffer->size << ")");
5188-     for (int i = 0; i < MAX_VK_BUFFERS; ++i) {
5189-         vk_buffer& b = ctx->buffer_pool[i];
5190-         if (b == nullptr) {
5191-             b = buffer;
5192-             return;
5193-         }
5194-     }
5195-     std::cerr << "ggml_vulkan: WARNING: vk buffer pool full, increase MAX_VK_BUFFERS" << std::endl;
5196-     ggml_vk_destroy_buffer(buffer);
5197- }
5198- 
5199- // Returns an available temporary buffer that may only be used temporarily, it will be reused
5200- static vk_buffer ggml_vk_create_buffer_temp(ggml_backend_vk_context * ctx, size_t size) {
5201-     // Try to find existing temp buffer with enough capacity
5202-     for (auto& buffer : ctx->gc.temp_buffers) {
5203-         if (buffer->size >= size) {
5204-             return buffer;
5205-         }
5206-     }
5207- 
5208-     VK_LOG_MEMORY("ggml_vk_create_buffer_temp(" << size << ")");
5209- 
5210-     // Otherwise create new buffer
5211-     vk_buffer buf = ggml_vk_pool_malloc(ctx, size);
5212-     ctx->gc.temp_buffers.push_back(buf);
5213- 
5214-     return buf;
5215- }
5216- 
52175147static void * ggml_vk_host_malloc(vk_device& device, size_t size) {
52185148    VK_LOG_MEMORY("ggml_vk_host_malloc(" << size << ")");
52195149    vk_buffer buf = ggml_vk_create_buffer(device, size,
@@ -11794,10 +11724,6 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_cgraph *
1179411724// Clean up after graph processing is done
1179511725static void ggml_vk_graph_cleanup(ggml_backend_vk_context * ctx) {
1179611726    VK_LOG_DEBUG("ggml_vk_graph_cleanup()");
11797-     for (auto& buffer : ctx->gc.temp_buffers) {
11798-         ggml_vk_pool_free(ctx, buffer);
11799-     }
11800-     ctx->gc.temp_buffers.clear();
1180111727    ctx->prealloc_y_last_pipeline_used = {};
1180211728
1180311729    ctx->unsynced_nodes_written.clear();
@@ -11840,10 +11766,6 @@ static void ggml_vk_cleanup(ggml_backend_vk_context * ctx) {
1184011766    ggml_vk_destroy_buffer(ctx->prealloc_split_k);
1184111767    ctx->prealloc_y_last_pipeline_used = nullptr;
1184211768
11843-     for (auto& buffer : ctx->buffer_pool) {
11844-         ggml_vk_destroy_buffer(buffer);
11845-     }
11846- 
1184711769    ctx->prealloc_size_x = 0;
1184811770    ctx->prealloc_size_y = 0;
1184911771    ctx->prealloc_size_split_k = 0;
0 commit comments