@@ -1194,8 +1194,8 @@ struct ggml_backend_vk_context {
11941194 bool almost_ready_fence_pending {};
11951195
11961196 // Cache most recent tensor that was converted into prealloc_y, and what pipeline it used to convert.
1197- vk_pipeline prealloc_y_last_pipeline_used;
1198- const ggml_tensor *prealloc_y_last_tensor_used;
1197+ vk_pipeline_struct * prealloc_y_last_pipeline_used {} ;
1198+ const ggml_tensor * prealloc_y_last_tensor_used {} ;
11991199
12001200 vk_buffer buffer_pool[MAX_VK_BUFFERS];
12011201
@@ -5655,18 +5655,18 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
56555655 ggml_vk_dispatch_pipeline(ctx, subctx, to_fp16_vk_0, { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0, x_sz * ne02 * ne03 } }, pc, { (uint32_t)(x_ne * ne02 * ne03), 1, 1});
56565656 }
56575657 if (y_non_contig) {
5658- if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1 ||
5658+ if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1.get() ||
56595659 ctx->prealloc_y_last_tensor_used != src1) {
56605660 ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
5661- ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1;
5661+ ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1.get() ;
56625662 ctx->prealloc_y_last_tensor_used = src1;
56635663 }
56645664 }
56655665 if (quantize_y) {
5666- if (ctx->prealloc_y_last_pipeline_used != to_q8_1 ||
5666+ if (ctx->prealloc_y_last_pipeline_used != to_q8_1.get() ||
56675667 ctx->prealloc_y_last_tensor_used != src1) {
56685668 ggml_vk_quantize_q8_1(ctx, subctx, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE }, y_ne * ne12 * ne13);
5669- ctx->prealloc_y_last_pipeline_used = to_q8_1;
5669+ ctx->prealloc_y_last_pipeline_used = to_q8_1.get() ;
56705670 ctx->prealloc_y_last_tensor_used = src1;
56715671 }
56725672 }
@@ -5843,10 +5843,10 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
58435843 }
58445844 if (y_non_contig) {
58455845 GGML_ASSERT(y_sz == ggml_type_size(src1->type) * y_ne);
5846- if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1 ||
5846+ if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1.get() ||
58475847 ctx->prealloc_y_last_tensor_used != src1) {
58485848 ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
5849- ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1;
5849+ ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1.get() ;
58505850 ctx->prealloc_y_last_tensor_used = src1;
58515851 }
58525852 }
@@ -6278,10 +6278,10 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
62786278 { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0, x_sz * ne02 * ne03 } }, pc, { (uint32_t)(x_ne * ne02 * ne03), 1, 1});
62796279 }
62806280 if (y_non_contig) {
6281- if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1 ||
6281+ if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1.get() ||
62826282 ctx->prealloc_y_last_tensor_used != src1) {
62836283 ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
6284- ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1;
6284+ ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1.get() ;
62856285 ctx->prealloc_y_last_tensor_used = src1;
62866286 }
62876287 }
@@ -6471,10 +6471,10 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
64716471 }
64726472 if (y_non_contig) {
64736473 GGML_ASSERT(y_sz == ggml_type_size(src1->type) * y_ne);
6474- if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1 ||
6474+ if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1.get() ||
64756475 ctx->prealloc_y_last_tensor_used != src1) {
64766476 ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
6477- ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1;
6477+ ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1.get() ;
64786478 ctx->prealloc_y_last_tensor_used = src1;
64796479 }
64806480 }
@@ -10383,7 +10383,7 @@ static void ggml_vk_cleanup(ggml_backend_vk_context * ctx) {
1038310383 ggml_vk_destroy_buffer(ctx->prealloc_x);
1038410384 ggml_vk_destroy_buffer(ctx->prealloc_y);
1038510385 ggml_vk_destroy_buffer(ctx->prealloc_split_k);
10386- ctx->prealloc_y_last_pipeline_used = {} ;
10386+ ctx->prealloc_y_last_pipeline_used = nullptr ;
1038710387
1038810388 for (auto& buffer : ctx->buffer_pool) {
1038910389 ggml_vk_destroy_buffer(buffer);
@@ -10932,7 +10932,7 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
1093210932 compute_ctx->s->buffer.writeTimestamp(vk::PipelineStageFlagBits::eAllCommands, ctx->device->query_pool, 0);
1093310933 }
1093410934
10935- ctx->prealloc_y_last_pipeline_used = {} ;
10935+ ctx->prealloc_y_last_pipeline_used = nullptr ;
1093610936 ctx->prealloc_y_last_tensor_used = nullptr;
1093710937
1093810938 // Submit after enough work has accumulated, to overlap CPU cmdbuffer generation with GPU execution.
0 commit comments