@@ -268,6 +268,10 @@ struct vk_subbuffer {
268268 vk_buffer buffer;
269269 uint64_t offset;
270270 uint64_t size;
271+
272+ operator vk::DescriptorBufferInfo () const {
273+ return { buffer->buffer , offset, size };
274+ }
271275};
272276
273277struct vk_semaphore {
@@ -1063,13 +1067,14 @@ static vk_subbuffer ggml_vk_subbuffer(vk_buffer& buf) {
10631067
10641068static void ggml_vk_sync_buffers (vk_context& ctx) {
10651069 VK_LOG_DEBUG (" ggml_vk_sync_buffers()" );
1066- const std::vector<vk::MemoryBarrier> mem_barriers{ { { vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite }, { vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite } } };
1067-
10681070 ctx->s ->buffer .pipelineBarrier (
10691071 ctx->q ->stage_flags ,
10701072 ctx->q ->stage_flags ,
10711073 {},
1072- mem_barriers,
1074+ { {
1075+ {vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite | vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite},
1076+ {vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite | vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite}
1077+ } },
10731078 {},
10741079 {}
10751080 );
@@ -2420,28 +2425,23 @@ static vk_submission ggml_vk_begin_submission(vk_device& device, vk_queue& q, bo
24202425 return s;
24212426}
24222427
2423- static void ggml_vk_dispatch_pipeline (ggml_backend_vk_context * ctx, vk_context& subctx, vk_pipeline& pipeline, std::vector<vk_subbuffer>&& buffers, size_t push_constant_size, const void * push_constants, std::array<uint32_t , 3 > elements) {
2428+
2429+
2430+ static void ggml_vk_dispatch_pipeline (ggml_backend_vk_context* ctx, vk_context& subctx, vk_pipeline& pipeline, std::initializer_list<vk::DescriptorBufferInfo> const & descriptor_buffer_infos, size_t push_constant_size, const void * push_constants, std::array<uint32_t , 3 > elements) {
24242431 const uint32_t wg0 = CEIL_DIV (elements[0 ], pipeline->wg_denoms [0 ]);
24252432 const uint32_t wg1 = CEIL_DIV (elements[1 ], pipeline->wg_denoms [1 ]);
24262433 const uint32_t wg2 = CEIL_DIV (elements[2 ], pipeline->wg_denoms [2 ]);
24272434 VK_LOG_DEBUG (" ggml_vk_dispatch_pipeline(" << pipeline->name << " , {" ;
2428- for (auto & buffer : buffers ) {
2429- std::cerr << " (" << buffer. buffer << " , " << buffer.offset << " , " << buffer.size << " ), " ;
2435+ for (auto & buffer : descriptor_buffer_infos ) {
2436+ std::cerr << " (" << buffer << " , " << buffer.offset << " , " << buffer.size << " ), " ;
24302437 }
24312438 std::cerr << " }, (" << wg0 << " ," << wg1 << " ," << wg2 << " ))" );
2432- std::vector<vk::DescriptorBufferInfo> descriptor_buffer_infos;
2433- std::vector<vk::WriteDescriptorSet> write_descriptor_sets;
24342439 GGML_ASSERT (pipeline->descriptor_set_idx < pipeline->descriptor_sets .size ());
2435- GGML_ASSERT (buffers.size () == pipeline->parameter_count );
2436- vk::DescriptorSet& descriptor_set = pipeline->descriptor_sets [pipeline->descriptor_set_idx ++];
2437- for (uint32_t i = 0 ; i < pipeline->parameter_count ; i++) {
2438- descriptor_buffer_infos.push_back ({buffers[i].buffer ->buffer , buffers[i].offset , buffers[i].size });
2439- }
2440- for (uint32_t i = 0 ; i < pipeline->parameter_count ; i++) {
2441- write_descriptor_sets.push_back ({descriptor_set, i, 0 , 1 , vk::DescriptorType::eStorageBuffer, nullptr , &descriptor_buffer_infos[i]});
2442- }
2440+ GGML_ASSERT (descriptor_buffer_infos.size () == pipeline->parameter_count );
24432441
2444- ctx->device ->device .updateDescriptorSets (write_descriptor_sets, {});
2442+ vk::DescriptorSet& descriptor_set = pipeline->descriptor_sets [pipeline->descriptor_set_idx ++];
2443+ vk::WriteDescriptorSet write_descriptor_set{ descriptor_set, 0 , 0 , pipeline->parameter_count , vk::DescriptorType::eStorageBuffer, nullptr , descriptor_buffer_infos.begin () };
2444+ ctx->device ->device .updateDescriptorSets ({ write_descriptor_set }, {});
24452445
24462446 subctx->s ->buffer .pushConstants (pipeline->layout , vk::ShaderStageFlagBits::eCompute, 0 , push_constant_size, push_constants);
24472447 subctx->s ->buffer .bindPipeline (vk::PipelineBindPoint::eCompute, pipeline->pipeline );
@@ -3123,7 +3123,7 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
31233123 } else if (qx_needs_dequant) {
31243124 const std::vector<uint32_t > pc = { (uint32_t )ne01, (uint32_t )ne10, (uint32_t )ne10, (uint32_t )ne10, (uint32_t )(ggml_nelements (src0)) };
31253125 ggml_vk_sync_buffers (subctx);
3126- ggml_vk_dispatch_pipeline (ctx, subctx, to_fp16_vk_0, { { d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, { d_X, 0 , x_sz * ne02 * ne03 } }, pc.size () * sizeof (uint32_t ), pc.data (), { (uint32_t )(x_ne * ne02 * ne03), 1 , 1 });
3126+ ggml_vk_dispatch_pipeline (ctx, subctx, to_fp16_vk_0, { vk_subbuffer { d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer { d_X, 0 , x_sz * ne02 * ne03 } }, pc.size () * sizeof (uint32_t ), pc.data (), { (uint32_t )(x_ne * ne02 * ne03), 1 , 1 });
31273127 }
31283128 if (y_non_contig) {
31293129 ggml_vk_cpy_to_contiguous (ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0 , VK_WHOLE_SIZE });
@@ -3312,7 +3312,7 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
33123312 };
33133313 ggml_vk_sync_buffers (subctx);
33143314 ggml_vk_dispatch_pipeline (ctx, subctx, dmmv,
3315- { { d_X, x_buf_offset, x_sz * ne02 * ne03 }, { d_Y, y_buf_offset, y_sz * ne12 * ne13 }, { d_D, d_buf_offset, d_sz * ne22 * ne23} },
3315+ { vk_subbuffer { d_X, x_buf_offset, x_sz * ne02 * ne03 }, vk_subbuffer { d_Y, y_buf_offset, y_sz * ne12 * ne13 }, vk_subbuffer { d_D, d_buf_offset, d_sz * ne22 * ne23} },
33163316 sizeof (vk_mat_vec_push_constants), &pc, { groups_x, (uint32_t )(ne12 * ne13), groups_z });
33173317}
33183318
@@ -3384,7 +3384,7 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_c
33843384 // compute
33853385 const std::array<uint32_t , 6 > pc = { (uint32_t )ne00, (uint32_t )ne01, (uint32_t )ne02, (uint32_t )ne12, (uint32_t )(qy_shader_offset / ggml_type_size (src1->type )), (uint32_t )(d_shader_offset / ggml_type_size (dst->type )) };
33863386 ggml_vk_sync_buffers (subctx);
3387- ggml_vk_dispatch_pipeline (ctx, subctx, ctx->device ->pipeline_mul_mat_vec_p021_f16_f32 , { { d_Qx, qx_buf_offset, qx_sz }, { d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 6 * sizeof (uint32_t ), &pc, { 1 , (uint32_t )ne01, (uint32_t )ne12 });
3387+ ggml_vk_dispatch_pipeline (ctx, subctx, ctx->device ->pipeline_mul_mat_vec_p021_f16_f32 , { vk_subbuffer { d_Qx, qx_buf_offset, qx_sz }, vk_subbuffer { d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, vk_subbuffer { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 6 * sizeof (uint32_t ), &pc, { 1 , (uint32_t )ne01, (uint32_t )ne12 });
33883388}
33893389
33903390static void ggml_vk_mul_mat_vec_nc_f16_f32 (ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
@@ -3459,7 +3459,8 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
34593459 // compute
34603460 const std::array<uint32_t , 7 > pc = { (uint32_t )ne00, (uint32_t )ne01, row_stride_x, channel_stride_x, (uint32_t )(ne12 / ne02), (uint32_t )(qy_shader_offset / ggml_type_size (src1->type )), (uint32_t )(d_shader_offset / ggml_type_size (dst->type )) };
34613461 ggml_vk_sync_buffers (subctx);
3462- ggml_vk_dispatch_pipeline (ctx, subctx, ctx->device ->pipeline_mul_mat_vec_nc_f16_f32 , { { d_Qx, qx_buf_offset, qx_sz }, { d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 7 * sizeof (uint32_t ), &pc, { 1 , (uint32_t )ne01, (uint32_t )ne12 });
3462+ ggml_vk_dispatch_pipeline (ctx, subctx, ctx->device ->pipeline_mul_mat_vec_nc_f16_f32 ,
3463+ { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz }, vk_subbuffer{ d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, vk_subbuffer{ d_D, d_buffer_offset, d_sz + d_shader_offset } }, 7 * sizeof (uint32_t ), &pc, { 1 , (uint32_t )ne01, (uint32_t )ne12 });
34633464}
34643465
34653466static void ggml_vk_mul_mat (ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
@@ -3634,7 +3635,8 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
36343635 } else if (qx_needs_dequant) {
36353636 const std::vector<uint32_t > pc = { (uint32_t )ne01, (uint32_t )ne10, (uint32_t )ne10, (uint32_t )ne10, (uint32_t )(ggml_nelements (src0)) };
36363637 ggml_vk_sync_buffers (subctx);
3637- ggml_vk_dispatch_pipeline (ctx, subctx, to_fp16_vk_0, { { d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, { d_X, 0 , x_sz * ne02 * ne03 } }, pc.size () * sizeof (uint32_t ), pc.data (), { (uint32_t )(x_ne * ne02 * ne03), 1 , 1 });
3638+ ggml_vk_dispatch_pipeline (ctx, subctx, to_fp16_vk_0,
3639+ { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0 , x_sz * ne02 * ne03 } }, pc.size () * sizeof (uint32_t ), pc.data (), { (uint32_t )(x_ne * ne02 * ne03), 1 , 1 });
36383640 }
36393641 if (y_non_contig) {
36403642 ggml_vk_cpy_to_contiguous (ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0 , VK_WHOLE_SIZE });
@@ -3834,7 +3836,8 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
38343836 };
38353837 ggml_vk_sync_buffers (subctx);
38363838 ggml_vk_dispatch_pipeline (ctx, subctx, dmmv,
3837- { { d_X, x_buf_offset, x_sz * ne02 * ne03 }, { d_Y, y_buf_offset, y_sz * ne12 * ne13 }, { d_D, d_buf_offset, d_sz * ne22 * ne23}, { d_ids, ids_buf_offset, ids_sz } },
3839+ { vk_subbuffer{ d_X, x_buf_offset, x_sz * ne02 * ne03 },
3840+ vk_subbuffer{ d_Y, y_buf_offset, y_sz * ne12 * ne13 }, vk_subbuffer{ d_D, d_buf_offset, d_sz * ne22 * ne23}, vk_subbuffer{ d_ids, ids_buf_offset, ids_sz } },
38383841 sizeof (vk_mat_vec_id_push_constants), &pc, { groups_x, (uint32_t )nei0, groups_z });
38393842}
38403843
@@ -4381,7 +4384,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
43814384 }
43824385
43834386 ggml_vk_sync_buffers (subctx);
4384- ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { { d_X, x_buf_offset, x_sz }, subbuf_y, { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4387+ ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { vk_subbuffer { d_X, x_buf_offset, x_sz }, subbuf_y, vk_subbuffer { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
43854388 } else if (op == GGML_OP_ROPE) {
43864389 // Empty src2 is possible in rope, but the shader needs a buffer
43874390 vk_subbuffer subbuf_z;
@@ -4392,20 +4395,20 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
43924395 }
43934396
43944397 ggml_vk_sync_buffers (subctx);
4395- ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { { d_X, x_buf_offset, x_sz }, { d_Y, y_buf_offset, y_sz }, subbuf_z, { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4398+ ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { vk_subbuffer { d_X, x_buf_offset, x_sz }, vk_subbuffer { d_Y, y_buf_offset, y_sz }, subbuf_z, vk_subbuffer { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
43964399 } else if (op == GGML_OP_IM2COL) {
43974400 // im2col uses only src1 and dst buffers
43984401 ggml_vk_sync_buffers (subctx);
4399- ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { { d_Y, y_buf_offset, y_sz }, { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4402+ ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { vk_subbuffer { d_Y, y_buf_offset, y_sz }, vk_subbuffer { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
44004403 } else if (use_src2) {
44014404 ggml_vk_sync_buffers (subctx);
4402- ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { { d_X, x_buf_offset, x_sz }, { d_Y, y_buf_offset, y_sz }, { d_Z, z_buf_offset, z_sz }, { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4405+ ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { vk_subbuffer { d_X, x_buf_offset, x_sz }, vk_subbuffer { d_Y, y_buf_offset, y_sz }, vk_subbuffer { d_Z, z_buf_offset, z_sz }, vk_subbuffer { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
44034406 } else if (use_src1) {
44044407 ggml_vk_sync_buffers (subctx);
4405- ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { { d_X, x_buf_offset, x_sz }, { d_Y, y_buf_offset, y_sz }, { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4408+ ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { vk_subbuffer { d_X, x_buf_offset, x_sz }, vk_subbuffer { d_Y, y_buf_offset, y_sz }, vk_subbuffer { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
44064409 } else {
44074410 ggml_vk_sync_buffers (subctx);
4408- ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { { d_X, x_buf_offset, x_sz }, { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4411+ ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { vk_subbuffer { d_X, x_buf_offset, x_sz }, vk_subbuffer { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
44094412 }
44104413 } else {
44114414 GGML_ASSERT (op != GGML_OP_SOFT_MAX);
@@ -4442,10 +4445,10 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
44424445
44434446 if (use_src1) {
44444447 ggml_vk_sync_buffers (subctx);
4445- ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { { d_X, x_buf_offset + x_offset, x_sz }, { d_Y, y_buf_offset + y_offset, y_sz }, { d_D, d_buf_offset + d_offset, d_sz } }, sizeof (PC), &pc, elements);
4448+ ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { vk_subbuffer { d_X, x_buf_offset + x_offset, x_sz }, vk_subbuffer { d_Y, y_buf_offset + y_offset, y_sz }, vk_subbuffer { d_D, d_buf_offset + d_offset, d_sz } }, sizeof (PC), &pc, elements);
44464449 } else {
44474450 ggml_vk_sync_buffers (subctx);
4448- ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { { d_X, x_buf_offset + x_offset, x_sz }, { d_D, d_buf_offset + d_offset, d_sz } }, sizeof (PC), &pc, elements);
4451+ ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { vk_subbuffer { d_X, x_buf_offset + x_offset, x_sz }, vk_subbuffer { d_D, d_buf_offset + d_offset, d_sz } }, sizeof (PC), &pc, elements);
44494452 }
44504453 }
44514454 }
0 commit comments