@@ -93,6 +93,10 @@ struct vk_pipeline_struct {
9393 uint32_t parameter_count;
9494 std::array<uint32_t , 3 > wg_denoms;
9595 uint32_t align;
96+ // set to true to request the pipeline is compiled after the dryrun
97+ bool needed {};
98+ // set to true when the shader has been compiled
99+ bool compiled {};
96100};
97101
98102typedef std::shared_ptr<vk_pipeline_struct> vk_pipeline;
@@ -186,16 +190,19 @@ struct vk_device_struct {
186190 bool mul_mat_id_m;
187191 bool mul_mat_id_s;
188192
189- vk_matmul_pipeline pipeline_matmul_f32;
190- vk_matmul_pipeline pipeline_matmul_f32_f16;
193+ // set to true to indicate that some shaders need to be compiled after the dryrun
194+ bool need_compiles {};
195+
196+ vk_matmul_pipeline pipeline_matmul_f32 {};
197+ vk_matmul_pipeline pipeline_matmul_f32_f16 {};
191198 vk_matmul_pipeline2 pipeline_matmul_f16;
192199 vk_matmul_pipeline2 pipeline_matmul_f16_f32;
193200 vk_pipeline pipeline_matmul_split_k_reduce;
194201
195202 vk_matmul_pipeline2 pipeline_dequant_mul_mat_mat_f16[GGML_TYPE_COUNT];
196203 vk_matmul_pipeline2 pipeline_dequant_mul_mat_mat[GGML_TYPE_COUNT];
197204
198- vk_matmul_pipeline pipeline_matmul_id_f32;
205+ vk_matmul_pipeline pipeline_matmul_id_f32 {} ;
199206 vk_matmul_pipeline2 pipeline_matmul_id_f16;
200207 vk_matmul_pipeline2 pipeline_matmul_id_f16_f32;
201208
@@ -758,13 +765,6 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin
758765 GGML_ASSERT (parameter_count > 0 );
759766 GGML_ASSERT (wg_denoms[0 ] > 0 && wg_denoms[1 ] > 0 && wg_denoms[2 ] > 0 ); // NOLINT
760767
761- pipeline = std::make_shared<vk_pipeline_struct>();
762- pipeline->name = name;
763- pipeline->parameter_count = parameter_count;
764- pipeline->push_constant_size = push_constant_size;
765- pipeline->wg_denoms = wg_denoms;
766- pipeline->align = align;
767-
768768 vk::ShaderModuleCreateInfo shader_module_create_info ({}, spv_size, reinterpret_cast <const uint32_t *>(spv_data));
769769 pipeline->shader_module = device->device .createShaderModule (shader_module_create_info);
770770
@@ -833,6 +833,7 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin
833833 }
834834
835835 pipeline->pipeline = device->device .createComputePipeline (VK_NULL_HANDLE, compute_pipeline_create_info).value ;
836+ pipeline->compiled = true ;
836837
837838 {
838839 std::lock_guard<std::mutex> guard (device->mutex );
@@ -844,11 +845,6 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin
844845 assert (compile_count > 0 );
845846 compile_count--;
846847
847- // "Progress bar" for shader compiles
848- static uint32_t total_compile_count = 0 ;
849- if ((total_compile_count++ % 10 ) == 0 ) {
850- std::cerr << " ." ;
851- }
852848 }
853849 compile_count_cond.notify_all ();
854850}
@@ -874,6 +870,10 @@ static void ggml_vk_destroy_pipeline(vk::Device& device, vk_pipeline& pipeline)
874870static void ggml_pipeline_request_descriptor_sets (vk_device& device, vk_pipeline& pipeline, uint32_t n) {
875871 VK_LOG_DEBUG (" ggml_pipeline_request_descriptor_sets(" << pipeline->name << " , " << n << " )" );
876872 device->pipeline_descriptor_set_requirements [pipeline->name ] += n;
873+ if (!pipeline->compiled ) {
874+ pipeline->needed = true ;
875+ device->need_compiles = true ;
876+ }
877877}
878878
879879static void ggml_pipeline_allocate_descriptor_sets (vk_device& device) {
@@ -1356,8 +1356,6 @@ static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vec
13561356static void ggml_vk_load_shaders (vk_device& device) {
13571357 VK_LOG_DEBUG (" ggml_vk_load_shaders(" << device->name << " )" );
13581358
1359- std::cerr << " ggml_vulkan: Compiling shaders" ;
1360-
13611359 // some shaders require the subgroup size to be 16 or larger
13621360 const uint32_t subgroup_size_16 = std::max (device->subgroup_size , 16u );
13631361
@@ -1494,13 +1492,30 @@ static void ggml_vk_load_shaders(vk_device& device) {
14941492 }
14951493 }
14961494
1497- device->pipeline_matmul_f32 = std::make_shared<vk_matmul_pipeline_struct>();
1498- device->pipeline_matmul_f32_f16 = std::make_shared<vk_matmul_pipeline_struct>();
1499-
1500- device->pipeline_matmul_id_f32 = std::make_shared<vk_matmul_pipeline_struct>();
1501-
1495+ if (!device->pipeline_matmul_f32 ) {
1496+ device->pipeline_matmul_f32 = std::make_shared<vk_matmul_pipeline_struct>();
1497+ }
1498+ if (!device->pipeline_matmul_f32_f16 ) {
1499+ device->pipeline_matmul_f32_f16 = std::make_shared<vk_matmul_pipeline_struct>();
1500+ }
1501+ if (!device->pipeline_matmul_id_f32 ) {
1502+ device->pipeline_matmul_id_f32 = std::make_shared<vk_matmul_pipeline_struct>();
1503+ }
15021504 std::vector<std::future<void >> compiles;
15031505 auto const &ggml_vk_create_pipeline = [&](vk_device& device, vk_pipeline& pipeline, const std::string &name, size_t spv_size, const void * spv_data, const std::string &entrypoint, uint32_t parameter_count, uint32_t push_constant_size, std::array<uint32_t , 3 > wg_denoms, const std::vector<uint32_t >& specialization_constants, uint32_t align, bool disable_robustness = false ) {
1506+ if (!pipeline) {
1507+ pipeline = std::make_shared<vk_pipeline_struct>();
1508+ pipeline->name = name;
1509+ pipeline->parameter_count = parameter_count;
1510+ pipeline->push_constant_size = push_constant_size;
1511+ pipeline->wg_denoms = wg_denoms;
1512+ pipeline->align = align;
1513+ }
1514+
1515+ if (!pipeline->needed || pipeline->compiled ) {
1516+ return ;
1517+ }
1518+
15041519 {
15051520 // wait until fewer than N compiles are in progress
15061521 uint32_t N = std::max (1u , std::thread::hardware_concurrency ());
@@ -1940,7 +1955,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
19401955 for (auto &c : compiles) {
19411956 c.wait ();
19421957 }
1943- std::cerr << " Done! " << std::endl ;
1958+ device-> need_compiles = false ;
19441959}
19451960
19461961static vk_device ggml_vk_get_device (size_t idx) {
@@ -7225,6 +7240,9 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
72257240 for (int i = 0 ; i < cgraph->n_nodes ; i++) {
72267241 ggml_vk_build_graph (ctx, cgraph->nodes [i], i, nullptr , 0 , true , false , false );
72277242 }
7243+ if (ctx->device ->need_compiles ) {
7244+ ggml_vk_load_shaders (ctx->device );
7245+ }
72287246 ggml_vk_preallocate_buffers (ctx);
72297247 ggml_pipeline_allocate_descriptor_sets (ctx->device );
72307248
0 commit comments