Skip to content

Commit 5b34711

Browse files
committed
don't use shared pointer for prealloc_y_last_pipeline_used
1 parent e83d57f commit 5b34711

File tree

1 file changed

+14
-14
lines changed

1 file changed

+14
-14
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1194,8 +1194,8 @@ struct ggml_backend_vk_context {
11941194
bool almost_ready_fence_pending {};
11951195

11961196
// Cache most recent tensor that was converted into prealloc_y, and what pipeline it used to convert.
1197-
vk_pipeline prealloc_y_last_pipeline_used;
1198-
const ggml_tensor *prealloc_y_last_tensor_used;
1197+
vk_pipeline_struct * prealloc_y_last_pipeline_used {};
1198+
const ggml_tensor * prealloc_y_last_tensor_used {};
11991199

12001200
vk_buffer buffer_pool[MAX_VK_BUFFERS];
12011201

@@ -5655,18 +5655,18 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
56555655
ggml_vk_dispatch_pipeline(ctx, subctx, to_fp16_vk_0, { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0, x_sz * ne02 * ne03 } }, pc, { (uint32_t)(x_ne * ne02 * ne03), 1, 1});
56565656
}
56575657
if (y_non_contig) {
5658-
if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1 ||
5658+
if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1.get() ||
56595659
ctx->prealloc_y_last_tensor_used != src1) {
56605660
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
5661-
ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1;
5661+
ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1.get();
56625662
ctx->prealloc_y_last_tensor_used = src1;
56635663
}
56645664
}
56655665
if (quantize_y) {
5666-
if (ctx->prealloc_y_last_pipeline_used != to_q8_1 ||
5666+
if (ctx->prealloc_y_last_pipeline_used != to_q8_1.get() ||
56675667
ctx->prealloc_y_last_tensor_used != src1) {
56685668
ggml_vk_quantize_q8_1(ctx, subctx, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE }, y_ne * ne12 * ne13);
5669-
ctx->prealloc_y_last_pipeline_used = to_q8_1;
5669+
ctx->prealloc_y_last_pipeline_used = to_q8_1.get();
56705670
ctx->prealloc_y_last_tensor_used = src1;
56715671
}
56725672
}
@@ -5843,10 +5843,10 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
58435843
}
58445844
if (y_non_contig) {
58455845
GGML_ASSERT(y_sz == ggml_type_size(src1->type) * y_ne);
5846-
if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1 ||
5846+
if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1.get() ||
58475847
ctx->prealloc_y_last_tensor_used != src1) {
58485848
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
5849-
ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1;
5849+
ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1.get();
58505850
ctx->prealloc_y_last_tensor_used = src1;
58515851
}
58525852
}
@@ -6278,10 +6278,10 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
62786278
{ vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0, x_sz * ne02 * ne03 } }, pc, { (uint32_t)(x_ne * ne02 * ne03), 1, 1});
62796279
}
62806280
if (y_non_contig) {
6281-
if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1 ||
6281+
if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1.get() ||
62826282
ctx->prealloc_y_last_tensor_used != src1) {
62836283
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
6284-
ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1;
6284+
ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1.get();
62856285
ctx->prealloc_y_last_tensor_used = src1;
62866286
}
62876287
}
@@ -6471,10 +6471,10 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
64716471
}
64726472
if (y_non_contig) {
64736473
GGML_ASSERT(y_sz == ggml_type_size(src1->type) * y_ne);
6474-
if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1 ||
6474+
if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1.get() ||
64756475
ctx->prealloc_y_last_tensor_used != src1) {
64766476
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
6477-
ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1;
6477+
ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1.get();
64786478
ctx->prealloc_y_last_tensor_used = src1;
64796479
}
64806480
}
@@ -10383,7 +10383,7 @@ static void ggml_vk_cleanup(ggml_backend_vk_context * ctx) {
1038310383
ggml_vk_destroy_buffer(ctx->prealloc_x);
1038410384
ggml_vk_destroy_buffer(ctx->prealloc_y);
1038510385
ggml_vk_destroy_buffer(ctx->prealloc_split_k);
10386-
ctx->prealloc_y_last_pipeline_used = {};
10386+
ctx->prealloc_y_last_pipeline_used = nullptr;
1038710387

1038810388
for (auto& buffer : ctx->buffer_pool) {
1038910389
ggml_vk_destroy_buffer(buffer);
@@ -10932,7 +10932,7 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
1093210932
compute_ctx->s->buffer.writeTimestamp(vk::PipelineStageFlagBits::eAllCommands, ctx->device->query_pool, 0);
1093310933
}
1093410934

10935-
ctx->prealloc_y_last_pipeline_used = {};
10935+
ctx->prealloc_y_last_pipeline_used = nullptr;
1093610936
ctx->prealloc_y_last_tensor_used = nullptr;
1093710937

1093810938
// Submit after enough work has accumulated, to overlap CPU cmdbuffer generation with GPU execution.

0 commit comments

Comments
 (0)