@@ -1404,10 +1404,10 @@ static void ggml_vk_load_shaders(vk_device& device) {
14041404 // spec constants and tile sizes for non-quant matmul/matmul_id
14051405 l_warptile = { 256 , 128 , 256 , 64 };
14061406 m_warptile = { 256 , 128 , 128 , 64 };
1407- s_warptile = { 128 , 32 , 16 , 64 };
1407+ s_warptile = { 128 , 64 , 64 , 64 };
14081408 l_wg_denoms = {128 , 256 , 1 };
14091409 m_wg_denoms = {128 , 128 , 1 };
1410- s_wg_denoms = { 32 , 16 , 1 };
1410+ s_wg_denoms = { 64 , 64 , 1 };
14111411
14121412 // spec constants and tile sizes for quant matmul (non-Qi_K)
14131413 l_warptile_mmq = { 256 , 128 , 256 , 64 };
@@ -2017,11 +2017,11 @@ static void ggml_vk_load_shaders(vk_device& device) {
20172017
20182018 ggml_vk_create_pipeline (device, device->pipeline_sum_rows_f32 , " sum_rows_f32" , sum_rows_f32_len, sum_rows_f32_data, " main" , 2 , sizeof (vk_op_push_constants), {1 , 1 , 1 }, { device->subgroup_size }, 1 );
20192019
2020- ggml_vk_create_pipeline (device, device->pipeline_im2col_f32 , " im2col_f32" , im2col_f32_len, im2col_f32_data, " main" , 2 , sizeof (vk_op_im2col_push_constants), {256 , 1 , 1 }, {}, 1 );
2020+ ggml_vk_create_pipeline (device, device->pipeline_im2col_f32 , " im2col_f32" , im2col_f32_len, im2col_f32_data, " main" , 2 , sizeof (vk_op_im2col_push_constants), {512 , 1 , 1 }, { device-> subgroup_size }, 1 , true );
20212021 if (device->float_controls_rte_fp16 ) {
2022- ggml_vk_create_pipeline (device, device->pipeline_im2col_f32_f16 , " im2col_f32_f16" , im2col_f32_f16_rte_len, im2col_f32_f16_rte_data, " main" , 2 , sizeof (vk_op_im2col_push_constants), {256 , 1 , 1 }, {}, 1 );
2022+ ggml_vk_create_pipeline (device, device->pipeline_im2col_f32_f16 , " im2col_f32_f16" , im2col_f32_f16_rte_len, im2col_f32_f16_rte_data, " main" , 2 , sizeof (vk_op_im2col_push_constants), {512 , 1 , 1 }, { device-> subgroup_size }, 1 , true );
20232023 } else {
2024- ggml_vk_create_pipeline (device, device->pipeline_im2col_f32_f16 , " im2col_f32_f16" , im2col_f32_f16_len, im2col_f32_f16_data, " main" , 2 , sizeof (vk_op_im2col_push_constants), {256 , 1 , 1 }, {}, 1 );
2024+ ggml_vk_create_pipeline (device, device->pipeline_im2col_f32_f16 , " im2col_f32_f16" , im2col_f32_f16_len, im2col_f32_f16_data, " main" , 2 , sizeof (vk_op_im2col_push_constants), {512 , 1 , 1 }, { device-> subgroup_size }, 1 , true );
20252025 }
20262026
20272027 ggml_vk_create_pipeline (device, device->pipeline_timestep_embedding_f32 , " timestep_embedding_f32" , timestep_embedding_f32_len, timestep_embedding_f32_data, " main" , 2 , sizeof (vk_op_timestep_embedding_push_constants), {256 , 1 , 1 }, {}, 1 );
0 commit comments