Skip to content

Commit 696fccf

Browse files
authored
vulkan: Skip syncing for prealloc_y when it is reused (ggml-org#15544)
1 parent ef47691 commit 696fccf

File tree

1 file changed

+15
-20
lines changed

1 file changed

+15
-20
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5800,11 +5800,6 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
58005800
ggml_vk_sync_buffers(ctx, subctx);
58015801
}
58025802
}
5803-
if (y_non_contig || quantize_y) {
5804-
if (ctx->prealloc_y_need_sync) {
5805-
ggml_vk_sync_buffers(ctx, subctx);
5806-
}
5807-
}
58085803

58095804
if (x_non_contig) {
58105805
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_0, src0, { d_Qx, qx_buf_offset, VK_WHOLE_SIZE }, { d_X, 0, VK_WHOLE_SIZE });
@@ -5816,6 +5811,9 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
58165811
if (y_non_contig) {
58175812
if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1.get() ||
58185813
ctx->prealloc_y_last_tensor_used != src1) {
5814+
if (ctx->prealloc_y_need_sync) {
5815+
ggml_vk_sync_buffers(ctx, subctx);
5816+
}
58195817
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
58205818
ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1.get();
58215819
ctx->prealloc_y_last_tensor_used = src1;
@@ -5824,6 +5822,9 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
58245822
if (quantize_y) {
58255823
if (ctx->prealloc_y_last_pipeline_used != to_q8_1.get() ||
58265824
ctx->prealloc_y_last_tensor_used != src1) {
5825+
if (ctx->prealloc_y_need_sync) {
5826+
ggml_vk_sync_buffers(ctx, subctx);
5827+
}
58275828
ggml_vk_quantize_q8_1(ctx, subctx, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE }, y_ne * ne12 * ne13);
58285829
ctx->prealloc_y_last_pipeline_used = to_q8_1.get();
58295830
ctx->prealloc_y_last_tensor_used = src1;
@@ -6008,11 +6009,6 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
60086009
ggml_vk_sync_buffers(ctx, subctx);
60096010
}
60106011
}
6011-
if (y_non_contig) {
6012-
if (ctx->prealloc_y_need_sync) {
6013-
ggml_vk_sync_buffers(ctx, subctx);
6014-
}
6015-
}
60166012

60176013
if (x_non_contig) {
60186014
GGML_ASSERT(x_sz == ggml_vk_align_size(ggml_type_size(src0->type) * x_ne, ctx->device->properties.limits.minStorageBufferOffsetAlignment));
@@ -6022,6 +6018,9 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
60226018
GGML_ASSERT(y_sz == ggml_type_size(src1->type) * y_ne);
60236019
if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1.get() ||
60246020
ctx->prealloc_y_last_tensor_used != src1) {
6021+
if (ctx->prealloc_y_need_sync) {
6022+
ggml_vk_sync_buffers(ctx, subctx);
6023+
}
60256024
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
60266025
ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1.get();
60276026
ctx->prealloc_y_last_tensor_used = src1;
@@ -6454,11 +6453,6 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
64546453
ggml_vk_sync_buffers(ctx, subctx);
64556454
}
64566455
}
6457-
if (y_non_contig) {
6458-
if (ctx->prealloc_y_need_sync) {
6459-
ggml_vk_sync_buffers(ctx, subctx);
6460-
}
6461-
}
64626456

64636457
if (x_non_contig) {
64646458
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_0, src0, { d_Qx, qx_buf_offset, VK_WHOLE_SIZE }, { d_X, 0, VK_WHOLE_SIZE });
@@ -6471,6 +6465,9 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
64716465
if (y_non_contig) {
64726466
if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1.get() ||
64736467
ctx->prealloc_y_last_tensor_used != src1) {
6468+
if (ctx->prealloc_y_need_sync) {
6469+
ggml_vk_sync_buffers(ctx, subctx);
6470+
}
64746471
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
64756472
ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1.get();
64766473
ctx->prealloc_y_last_tensor_used = src1;
@@ -6668,11 +6665,6 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
66686665
ggml_vk_sync_buffers(ctx, subctx);
66696666
}
66706667
}
6671-
if (y_non_contig) {
6672-
if (ctx->prealloc_y_need_sync) {
6673-
ggml_vk_sync_buffers(ctx, subctx);
6674-
}
6675-
}
66766668

66776669
if (x_non_contig) {
66786670
GGML_ASSERT(x_sz == ggml_vk_align_size(ggml_type_size(src0->type) * x_ne, ctx->device->properties.limits.minStorageBufferOffsetAlignment));
@@ -6682,6 +6674,9 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
66826674
GGML_ASSERT(y_sz == ggml_type_size(src1->type) * y_ne);
66836675
if (ctx->prealloc_y_last_pipeline_used != to_fp16_vk_1.get() ||
66846676
ctx->prealloc_y_last_tensor_used != src1) {
6677+
if (ctx->prealloc_y_need_sync) {
6678+
ggml_vk_sync_buffers(ctx, subctx);
6679+
}
66856680
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
66866681
ctx->prealloc_y_last_pipeline_used = to_fp16_vk_1.get();
66876682
ctx->prealloc_y_last_tensor_used = src1;

0 commit comments

Comments
 (0)