Skip to content

Commit ab3ab2a

Browse files
committed
vulkan: adapt mmv quantize_y path to conditional sync logic
1 parent b637c76 commit ab3ab2a

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6158,9 +6158,7 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
61586158
if (ctx->prealloc_x_need_sync) {
61596159
ggml_vk_sync_buffers(ctx, subctx);
61606160
}
6161-
}
61626161

6163-
if (x_non_contig) {
61646162
GGML_ASSERT(x_sz == ggml_vk_align_size(ggml_type_size(src0->type) * x_ne, ctx->device->properties.limits.minStorageBufferOffsetAlignment));
61656163
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_0, src0, { d_Qx, qx_buf_offset, VK_WHOLE_SIZE }, { d_X, 0, VK_WHOLE_SIZE });
61666164
}
@@ -6179,6 +6177,9 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
61796177
if (quantize_y) {
61806178
if (ctx->prealloc_y_last_pipeline_used != to_q8_1.get() ||
61816179
ctx->prealloc_y_last_tensor_used != src1) {
6180+
if (ctx->prealloc_y_need_sync) {
6181+
ggml_vk_sync_buffers(ctx, subctx);
6182+
}
61826183
ggml_vk_quantize_q8_1(ctx, subctx, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE }, y_ne * ne12 * ne13, true);
61836184
ctx->prealloc_y_last_pipeline_used = to_q8_1.get();
61846185
ctx->prealloc_y_last_tensor_used = src1;

0 commit comments

Comments
 (0)