Skip to content

Commit b36a186

Browse files
committed
vulkan: automatically deduce size of push constants
1 parent e562eec commit b36a186

File tree

1 file changed

+54
-28
lines changed

1 file changed

+54
-28
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 54 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4061,7 +4061,33 @@ static vk_submission ggml_vk_begin_submission(vk_device& device, vk_queue& q, bo
40614061
return s;
40624062
}
40634063

4064-
static void ggml_vk_dispatch_pipeline(ggml_backend_vk_context* ctx, vk_context& subctx, vk_pipeline& pipeline, std::initializer_list<vk::DescriptorBufferInfo> const& descriptor_buffer_infos, size_t push_constant_size, const void* push_constants, std::array<uint32_t, 3> elements) {
4064+
template <typename T> size_t push_constant_size(const T &t) {
4065+
static_assert(std::is_class<T>::value, "T must be a struct/class");
4066+
GGML_UNUSED(t);
4067+
return sizeof(T);
4068+
}
4069+
template <typename T> size_t push_constant_size(const std::vector<T> &t) {
4070+
GGML_UNUSED(t);
4071+
return sizeof(T) * t.size();
4072+
}
4073+
template <typename T, uint32_t N> size_t push_constant_size(const std::array<T, N> &t) {
4074+
GGML_UNUSED(t);
4075+
return sizeof(T) * N;
4076+
}
4077+
4078+
template <typename T> const T *push_constant_data(const T &t) {
4079+
static_assert(std::is_class<T>::value, "T must be a struct/class");
4080+
return &t;
4081+
}
4082+
template <typename T> const T *push_constant_data(const std::vector<T> &t) {
4083+
return t.data();
4084+
}
4085+
template <typename T, uint32_t N> const T *push_constant_data(const std::array<T, N> &t) {
4086+
return t.data();
4087+
}
4088+
4089+
template <typename T>
4090+
static void ggml_vk_dispatch_pipeline(ggml_backend_vk_context* ctx, vk_context& subctx, vk_pipeline& pipeline, std::initializer_list<vk::DescriptorBufferInfo> const& descriptor_buffer_infos, const T &push_constants, std::array<uint32_t, 3> elements) {
40654091
const uint32_t wg0 = CEIL_DIV(elements[0], pipeline->wg_denoms[0]);
40664092
const uint32_t wg1 = CEIL_DIV(elements[1], pipeline->wg_denoms[1]);
40674093
const uint32_t wg2 = CEIL_DIV(elements[2], pipeline->wg_denoms[2]);
@@ -4077,7 +4103,7 @@ static void ggml_vk_dispatch_pipeline(ggml_backend_vk_context* ctx, vk_context&
40774103
vk::WriteDescriptorSet write_descriptor_set{ descriptor_set, 0, 0, pipeline->parameter_count, vk::DescriptorType::eStorageBuffer, nullptr, descriptor_buffer_infos.begin() };
40784104
ctx->device->device.updateDescriptorSets({ write_descriptor_set }, {});
40794105

4080-
subctx->s->buffer.pushConstants(pipeline->layout, vk::ShaderStageFlagBits::eCompute, 0, push_constant_size, push_constants);
4106+
subctx->s->buffer.pushConstants(pipeline->layout, vk::ShaderStageFlagBits::eCompute, 0, push_constant_size(push_constants), push_constant_data(push_constants));
40814107
subctx->s->buffer.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->pipeline);
40824108
subctx->s->buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute,
40834109
pipeline->layout,
@@ -4540,18 +4566,18 @@ static void ggml_vk_matmul(
45404566
ggml_vk_sync_buffers(subctx);
45414567
if (split_k == 1) {
45424568
const vk_mat_mat_push_constants pc = { m, n, k, stride_a, stride_b, stride_d, batch_stride_a, batch_stride_b, batch_stride_d, k, ne02, ne12, broadcast2, broadcast3, padded_n };
4543-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, d }, sizeof(vk_mat_mat_push_constants), &pc, { m, n, batch });
4569+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, d }, pc, { m, n, batch });
45444570
return;
45454571
}
45464572

45474573
GGML_ASSERT(batch_stride_d == m * n);
45484574

45494575
const vk_mat_mat_push_constants pc1 = { m, n, k, stride_a, stride_b, stride_d, batch_stride_a, batch_stride_b, batch_stride_d, CEIL_DIV(k, split_k), ne02, ne12, broadcast2, broadcast3, padded_n };
45504576
// Make sure enough workgroups get assigned for split k to work
4551-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, split_k_buffer }, sizeof(vk_mat_mat_push_constants), &pc1, { (CEIL_DIV(m, pipeline->wg_denoms[0]) * pipeline->wg_denoms[0]) * split_k, n, batch });
4577+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, split_k_buffer }, pc1, { (CEIL_DIV(m, pipeline->wg_denoms[0]) * pipeline->wg_denoms[0]) * split_k, n, batch });
45524578
ggml_vk_sync_buffers(subctx);
45534579
const std::array<uint32_t, 2> pc2 = { (uint32_t)(m * n * batch), split_k };
4554-
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_matmul_split_k_reduce, { split_k_buffer, d }, pc2.size() * sizeof(uint32_t), pc2.data(), { m * n * batch, 1, 1 });
4580+
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_matmul_split_k_reduce, { split_k_buffer, d }, pc2, { m * n * batch, 1, 1 });
45554581
}
45564582

45574583
static vk_pipeline ggml_vk_guess_matmul_id_pipeline(ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, uint32_t m, uint32_t n, bool aligned, ggml_type src0_type) {
@@ -4599,7 +4625,7 @@ static void ggml_vk_matmul_id(
45994625
ggml_vk_sync_buffers(subctx);
46004626
const vk_mat_mat_id_push_constants pc = { m, n, k, stride_a, stride_b, stride_d, batch_stride_a, batch_stride_b, batch_stride_d,
46014627
nei0, nei1, nbi1, ne11, padded_n };
4602-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, d, ids }, sizeof(vk_mat_mat_id_push_constants), &pc, { m, nei1, n_as });
4628+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { a, b, d, ids }, pc, { m, nei1, n_as });
46034629
}
46044630

46054631
static bool ggml_vk_dim01_contiguous(const ggml_tensor * tensor) {
@@ -4720,7 +4746,7 @@ static void ggml_vk_cpy_to_contiguous(ggml_backend_vk_context * ctx, vk_context&
47204746
};
47214747
init_pushconst_fastdiv(pc);
47224748
ggml_vk_sync_buffers(subctx);
4723-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { in, out }, sizeof(vk_op_unary_push_constants), &pc, elements);
4749+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { in, out }, pc, elements);
47244750
}
47254751

47264752
static vk_pipeline ggml_vk_get_quantize_pipeline(ggml_backend_vk_context * ctx, ggml_type type) {
@@ -4739,7 +4765,7 @@ static void ggml_vk_quantize_q8_1(ggml_backend_vk_context * ctx, vk_context& sub
47394765
vk_pipeline pipeline = ggml_vk_get_quantize_pipeline(ctx, GGML_TYPE_Q8_1);
47404766

47414767
ggml_vk_sync_buffers(subctx);
4742-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { in, out }, sizeof(uint32_t), &ne, { ne, 1, 1 });
4768+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { in, out }, std::array<uint32_t, 1>{ne}, { ne, 1, 1 });
47434769
}
47444770

47454771
static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
@@ -4939,7 +4965,7 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
49394965
} else if (qx_needs_dequant) {
49404966
const std::vector<uint32_t> pc = { (uint32_t)ne01, (uint32_t)ne10, (uint32_t)ne10, (uint32_t)ne10, (uint32_t)(ggml_nelements(src0)) };
49414967
ggml_vk_sync_buffers(subctx);
4942-
ggml_vk_dispatch_pipeline(ctx, subctx, to_fp16_vk_0, { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0, x_sz * ne02 * ne03 } }, pc.size() * sizeof(uint32_t), pc.data(), { (uint32_t)(x_ne * ne02 * ne03), 1, 1});
4968+
ggml_vk_dispatch_pipeline(ctx, subctx, to_fp16_vk_0, { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0, x_sz * ne02 * ne03 } }, pc, { (uint32_t)(x_ne * ne02 * ne03), 1, 1});
49434969
}
49444970
if (y_non_contig) {
49454971
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
@@ -5155,7 +5181,7 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
51555181
ggml_vk_sync_buffers(subctx);
51565182
ggml_vk_dispatch_pipeline(ctx, subctx, dmmv,
51575183
{ vk_subbuffer{ d_X, x_buf_offset, x_sz * ne02 * ne03 }, vk_subbuffer{ d_Y, y_buf_offset, y_sz * ne12 * ne13 }, vk_subbuffer{ d_D, d_buf_offset, d_sz * ne22 * ne23} },
5158-
sizeof(vk_mat_vec_push_constants), &pc, { groups_x, (uint32_t)(ne12 * ne13), groups_z });
5184+
pc, { groups_x, (uint32_t)(ne12 * ne13), groups_z });
51595185
}
51605186

51615187
static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
@@ -5243,7 +5269,7 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_c
52435269
}
52445270

52455271
ggml_vk_sync_buffers(subctx);
5246-
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_mul_mat_vec_p021_f16_f32[gqa_ratio - 1], { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz }, vk_subbuffer{ d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, vk_subbuffer{ d_D, d_buffer_offset, d_sz + d_shader_offset } }, 6 * sizeof(uint32_t), &pc, { 1, (uint32_t)ne01, workgroups_z });
5272+
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_mul_mat_vec_p021_f16_f32[gqa_ratio - 1], { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz }, vk_subbuffer{ d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, vk_subbuffer{ d_D, d_buffer_offset, d_sz + d_shader_offset } }, pc, { 1, (uint32_t)ne01, workgroups_z });
52475273
}
52485274

52495275
static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
@@ -5326,7 +5352,7 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
53265352
const std::array<uint32_t, 9> pc = { (uint32_t)ne00, (uint32_t)ne01, row_stride_x, channel_stride_x, channel_stride_y, (uint32_t)(ne12 / ne02), (uint32_t)ne12, (uint32_t)(qy_shader_offset / ggml_type_size(src1->type)), (uint32_t)(d_shader_offset / ggml_type_size(dst->type)) };
53275353
ggml_vk_sync_buffers(subctx);
53285354
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_mul_mat_vec_nc_f16_f32,
5329-
{ vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz }, vk_subbuffer{ d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, vk_subbuffer{ d_D, d_buffer_offset, d_sz + d_shader_offset } }, 7 * sizeof(uint32_t), &pc, { 1, (uint32_t)ne01, (uint32_t)ne12 });
5355+
{ vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz }, vk_subbuffer{ d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, vk_subbuffer{ d_D, d_buffer_offset, d_sz + d_shader_offset } }, pc, { 1, (uint32_t)ne01, (uint32_t)ne12 });
53305356
}
53315357

53325358
static void ggml_vk_mul_mat(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
@@ -5542,7 +5568,7 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
55425568
const std::vector<uint32_t> pc = { (uint32_t)ne01, (uint32_t)ne10, (uint32_t)ne10, (uint32_t)ne10, (uint32_t)(ggml_nelements(src0)) };
55435569
ggml_vk_sync_buffers(subctx);
55445570
ggml_vk_dispatch_pipeline(ctx, subctx, to_fp16_vk_0,
5545-
{ vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0, x_sz * ne02 * ne03 } }, pc.size() * sizeof(uint32_t), pc.data(), { (uint32_t)(x_ne * ne02 * ne03), 1, 1});
5571+
{ vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0, x_sz * ne02 * ne03 } }, pc, { (uint32_t)(x_ne * ne02 * ne03), 1, 1});
55465572
}
55475573
if (y_non_contig) {
55485574
ggml_vk_cpy_to_contiguous(ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0, VK_WHOLE_SIZE });
@@ -5762,7 +5788,7 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
57625788
ggml_vk_dispatch_pipeline(ctx, subctx, dmmv,
57635789
{ vk_subbuffer{ d_X, x_buf_offset, x_sz * ne02 * ne03 },
57645790
vk_subbuffer{ d_Y, y_buf_offset, y_sz * ne12 * ne13 }, vk_subbuffer{ d_D, d_buf_offset, d_sz * ne22 * ne23}, vk_subbuffer{ d_ids, ids_buf_offset, ids_sz } },
5765-
sizeof(vk_mat_vec_id_push_constants), &pc, { groups_x, (uint32_t)nei0, groups_z });
5791+
pc, { groups_x, (uint32_t)nei0, groups_z });
57665792
}
57675793

57685794
static void ggml_vk_mul_mat_id(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, const ggml_tensor * src2, ggml_tensor * dst, bool dryrun = false) {
@@ -6112,7 +6138,7 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx
61126138
// there's no more than one tile of rows (i.e. workgroups_x would have been
61136139
// one). We reuse workgroups_x to mean the number of splits, so we need to
61146140
// cancel out the divide by wg_denoms[0].
6115-
sizeof(vk_flash_attn_push_constants), &pc, { workgroups_x * pipeline->wg_denoms[0], workgroups_y, workgroups_z });
6141+
pc, { workgroups_x * pipeline->wg_denoms[0], workgroups_y, workgroups_z });
61166142

61176143
ggml_vk_sync_buffers(subctx);
61186144
const std::array<uint32_t, 3> pc2 = { D, (uint32_t)ne1, split_k };
@@ -6121,7 +6147,7 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx
61216147
vk_subbuffer{ctx->prealloc_split_k, 0, VK_WHOLE_SIZE},
61226148
vk_subbuffer{d_D, d_buf_offset, VK_WHOLE_SIZE},
61236149
},
6124-
pc2.size() * uint32_t{sizeof(uint32_t)}, pc2.data(), { (uint32_t)ne1, 1, 1 });
6150+
pc2, { (uint32_t)ne1, 1, 1 });
61256151
} else {
61266152
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline,
61276153
{
@@ -6131,7 +6157,7 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx
61316157
vk_subbuffer{d_M, m_buf_offset, VK_WHOLE_SIZE},
61326158
vk_subbuffer{d_D, d_buf_offset, VK_WHOLE_SIZE},
61336159
},
6134-
sizeof(vk_flash_attn_push_constants), &pc, { workgroups_x, workgroups_y, workgroups_z });
6160+
pc, { workgroups_x, workgroups_y, workgroups_z });
61356161
}
61366162
}
61376163

@@ -6800,7 +6826,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
68006826
}
68016827

68026828
ggml_vk_sync_buffers(subctx);
6803-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, subbuf_y, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
6829+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, subbuf_y, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68046830
} else if (op == GGML_OP_ROPE || op == GGML_OP_ROPE_BACK) {
68056831
// Empty src2 is possible in rope, but the shader needs a buffer
68066832
vk_subbuffer subbuf_z;
@@ -6811,26 +6837,26 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
68116837
}
68126838

68136839
ggml_vk_sync_buffers(subctx);
6814-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, subbuf_z, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
6840+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, subbuf_z, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68156841
} else if (op == GGML_OP_IM2COL) {
68166842
// im2col uses only src1 and dst buffers
68176843
ggml_vk_sync_buffers(subctx);
6818-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
6844+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68196845
} else if (op == GGML_OP_COUNT_EQUAL) {
68206846
ggml_vk_sync_buffers(subctx);
68216847
// count_equal assumes that destination buffer is initialized with zeroes
68226848
ggml_vk_buffer_memset_async(subctx, d_D, d_buf_offset, 0, d_sz);
68236849
ggml_vk_sync_buffers(subctx);
6824-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
6850+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68256851
} else if (use_src2) {
68266852
ggml_vk_sync_buffers(subctx);
6827-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_Z, z_buf_offset, z_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
6853+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_Z, z_buf_offset, z_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68286854
} else if (use_src1) {
68296855
ggml_vk_sync_buffers(subctx);
6830-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
6856+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_Y, y_buf_offset, y_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68316857
} else {
68326858
ggml_vk_sync_buffers(subctx);
6833-
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
6859+
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{ d_X, x_buf_offset, x_sz }, vk_subbuffer{ d_D, d_buf_offset, d_sz } }, pc, elements);
68346860
}
68356861
}
68366862

@@ -6999,7 +7025,7 @@ static void ggml_vk_op_f32_wkv(ggml_backend_vk_context * ctx, vk_context& subctx
69997025
vk_subbuffer{ d_srcs[4], src_offsets[4], src_sizes[4] },
70007026
vk_subbuffer{ d_srcs[5], src_offsets[5], src_sizes[5] },
70017027
vk_subbuffer{ d_D, dst_offset, dst_size }
7002-
}, sizeof(vk_op_rwkv_wkv6_push_constants), &pc, elements);
7028+
}, pc, elements);
70037029
} else if (version == 7) {
70047030
ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, {
70057031
vk_subbuffer{ d_srcs[0], src_offsets[0], src_sizes[0] },
@@ -7010,7 +7036,7 @@ static void ggml_vk_op_f32_wkv(ggml_backend_vk_context * ctx, vk_context& subctx
70107036
vk_subbuffer{ d_srcs[5], src_offsets[5], src_sizes[5] },
70117037
vk_subbuffer{ d_srcs[6], src_offsets[6], src_sizes[6] },
70127038
vk_subbuffer{ d_D, dst_offset, dst_size }
7013-
}, sizeof(vk_op_rwkv_wkv7_push_constants), &pc, elements);
7039+
}, pc, elements);
70147040
} else {
70157041
// shouldn't happen
70167042
GGML_ASSERT(false);
@@ -7147,7 +7173,7 @@ static void ggml_vk_op_f32_opt_step_adamw(ggml_backend_vk_context * ctx, vk_cont
71477173
vk_subbuffer{ d_GM, gm_offset, gm_size },
71487174
vk_subbuffer{ d_GV, gv_offset, gv_size },
71497175
vk_subbuffer{ d_P, p_offset, p_size },
7150-
}, sizeof(vk_op_push_constants), &pc, elements);
7176+
}, pc, elements);
71517177
}
71527178

71537179
static void ggml_vk_opt_step_adamw(ggml_backend_vk_context * ctx, vk_context& subctx, ggml_tensor * dst, bool dryrun = false) {
@@ -8005,7 +8031,7 @@ static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_
80058031
vk_context subctx = ggml_vk_create_context(ctx, ctx->device->compute_queue);
80068032
ggml_vk_ctx_begin(ctx->device, subctx);
80078033
const std::vector<uint32_t> pc = { 1, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne };
8008-
ggml_vk_dispatch_pipeline(ctx, subctx, p, { vk_subbuffer{ qx_buf, 0, qx_sz }, vk_subbuffer{ x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1});
8034+
ggml_vk_dispatch_pipeline(ctx, subctx, p, { vk_subbuffer{ qx_buf, 0, qx_sz }, vk_subbuffer{ x_buf, 0, x_sz_f16 } }, pc, { (uint32_t)ne, 1, 1});
80098035
ggml_vk_ctx_end(subctx);
80108036

80118037
auto begin = std::chrono::high_resolution_clock::now();

0 commit comments

Comments
 (0)