@@ -1845,48 +1845,54 @@ static void ggml_vk_load_shaders(vk_device& device) {
18451845 }
18461846
18471847 // mul mat vec
1848- // computing two rows per workgroup is a benefit for Q4_0 -> Q5_1, but not for Q8_0.
1848+
1849+ // AMD GCN and Intel graphics cards perform best when the number of rows per shader is doubled
1850+ uint32_t rm = 1 ;
1851+ if ((device->vendor_id == VK_VENDOR_ID_AMD && device->subgroup_min_size == 64 && device->subgroup_max_size == 64 ) || device->vendor_id == VK_VENDOR_ID_INTEL)
1852+ rm = 2 ;
1853+
1854+ // computing additional rows per workgroup is a benefit for Q4_0 -> Q5_1, but not for Q8_0.
18491855 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_F32 ], " mul_mat_vec_f32_f32_f32" , mul_mat_vec_f32_f32_f32_len, mul_mat_vec_f32_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 );
18501856 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_F16 ], " mul_mat_vec_f16_f32_f32" , mul_mat_vec_f16_f32_f32_len, mul_mat_vec_f16_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 );
1851- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q4_0], " mul_mat_vec_q4_0_f32_f32" , mul_mat_vec_q4_0_f32_f32_len, mul_mat_vec_q4_0_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
1852- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q4_1], " mul_mat_vec_q4_1_f32_f32" , mul_mat_vec_q4_1_f32_f32_len, mul_mat_vec_q4_1_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
1853- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q5_0], " mul_mat_vec_q5_0_f32_f32" , mul_mat_vec_q5_0_f32_f32_len, mul_mat_vec_q5_0_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
1854- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q5_1], " mul_mat_vec_q5_1_f32_f32" , mul_mat_vec_q5_1_f32_f32_len, mul_mat_vec_q5_1_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
1855- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q8_0], " mul_mat_vec_q8_0_f32_f32" , mul_mat_vec_q8_0_f32_f32_len, mul_mat_vec_q8_0_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device->subgroup_size , 1 }, 1 , true );
1857+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q4_0], " mul_mat_vec_q4_0_f32_f32" , mul_mat_vec_q4_0_f32_f32_len, mul_mat_vec_q4_0_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1858+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q4_1], " mul_mat_vec_q4_1_f32_f32" , mul_mat_vec_q4_1_f32_f32_len, mul_mat_vec_q4_1_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1859+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q5_0], " mul_mat_vec_q5_0_f32_f32" , mul_mat_vec_q5_0_f32_f32_len, mul_mat_vec_q5_0_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1860+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q5_1], " mul_mat_vec_q5_1_f32_f32" , mul_mat_vec_q5_1_f32_f32_len, mul_mat_vec_q5_1_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1861+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q8_0], " mul_mat_vec_q8_0_f32_f32" , mul_mat_vec_q8_0_f32_f32_len, mul_mat_vec_q8_0_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 *rm , 1 , 1 }, {device->subgroup_size , 1 *rm }, 1 , true );
18561862 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q2_K], " mul_mat_vec_q2_k_f32_f32" , mul_mat_vec_q2_k_f32_f32_len, mul_mat_vec_q2_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
18571863 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q3_K], " mul_mat_vec_q3_k_f32_f32" , mul_mat_vec_q3_k_f32_f32_len, mul_mat_vec_q3_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
18581864 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q4_K], " mul_mat_vec_q4_k_f32_f32" , mul_mat_vec_q4_k_f32_f32_len, mul_mat_vec_q4_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
18591865 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q5_K], " mul_mat_vec_q5_k_f32_f32" , mul_mat_vec_q5_k_f32_f32_len, mul_mat_vec_q5_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
18601866 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q6_K], " mul_mat_vec_q6_k_f32_f32" , mul_mat_vec_q6_k_f32_f32_len, mul_mat_vec_q6_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
1861- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_IQ4_NL], " mul_mat_vec_iq4_nl_f32_f32" , mul_mat_vec_iq4_nl_f32_f32_len, mul_mat_vec_iq4_nl_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
1867+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_IQ4_NL], " mul_mat_vec_iq4_nl_f32_f32" , mul_mat_vec_iq4_nl_f32_f32_len, mul_mat_vec_iq4_nl_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
18621868
18631869 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_F32 ], " mul_mat_vec_f32_f16_f32" , mul_mat_vec_f32_f16_f32_len, mul_mat_vec_f32_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 );
18641870 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_F16 ], " mul_mat_vec_f16_f16_f32" , mul_mat_vec_f16_f16_f32_len, mul_mat_vec_f16_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 );
1865- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q4_0], " mul_mat_vec_q4_0_f16_f32" , mul_mat_vec_q4_0_f16_f32_len, mul_mat_vec_q4_0_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
1866- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q4_1], " mul_mat_vec_q4_1_f16_f32" , mul_mat_vec_q4_1_f16_f32_len, mul_mat_vec_q4_1_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
1867- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q5_0], " mul_mat_vec_q5_0_f16_f32" , mul_mat_vec_q5_0_f16_f32_len, mul_mat_vec_q5_0_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
1868- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q5_1], " mul_mat_vec_q5_1_f16_f32" , mul_mat_vec_q5_1_f16_f32_len, mul_mat_vec_q5_1_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
1869- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q8_0], " mul_mat_vec_q8_0_f16_f32" , mul_mat_vec_q8_0_f16_f32_len, mul_mat_vec_q8_0_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device->subgroup_size , 1 }, 1 , true );
1871+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q4_0], " mul_mat_vec_q4_0_f16_f32" , mul_mat_vec_q4_0_f16_f32_len, mul_mat_vec_q4_0_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1872+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q4_1], " mul_mat_vec_q4_1_f16_f32" , mul_mat_vec_q4_1_f16_f32_len, mul_mat_vec_q4_1_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1873+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q5_0], " mul_mat_vec_q5_0_f16_f32" , mul_mat_vec_q5_0_f16_f32_len, mul_mat_vec_q5_0_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1874+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q5_1], " mul_mat_vec_q5_1_f16_f32" , mul_mat_vec_q5_1_f16_f32_len, mul_mat_vec_q5_1_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1875+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q8_0], " mul_mat_vec_q8_0_f16_f32" , mul_mat_vec_q8_0_f16_f32_len, mul_mat_vec_q8_0_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 *rm , 1 , 1 }, {device->subgroup_size , 1 *rm }, 1 , true );
18701876 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q2_K], " mul_mat_vec_q2_k_f16_f32" , mul_mat_vec_q2_k_f16_f32_len, mul_mat_vec_q2_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
18711877 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q3_K], " mul_mat_vec_q3_k_f16_f32" , mul_mat_vec_q3_k_f16_f32_len, mul_mat_vec_q3_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
18721878 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q4_K], " mul_mat_vec_q4_k_f16_f32" , mul_mat_vec_q4_k_f16_f32_len, mul_mat_vec_q4_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
18731879 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q5_K], " mul_mat_vec_q5_k_f16_f32" , mul_mat_vec_q5_k_f16_f32_len, mul_mat_vec_q5_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
18741880 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q6_K], " mul_mat_vec_q6_k_f16_f32" , mul_mat_vec_q6_k_f16_f32_len, mul_mat_vec_q6_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
1875- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_IQ4_NL], " mul_mat_vec_iq4_nl_f16_f32" , mul_mat_vec_iq4_nl_f16_f32_len, mul_mat_vec_iq4_nl_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size }, 1 , true );
1881+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_IQ4_NL], " mul_mat_vec_iq4_nl_f16_f32" , mul_mat_vec_iq4_nl_f16_f32_len, mul_mat_vec_iq4_nl_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
18761882
18771883 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_F32 ], " mul_mat_vec_id_f32_f32" , mul_mat_vec_id_f32_f32_len, mul_mat_vec_id_f32_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 );
18781884 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_F16 ], " mul_mat_vec_id_f16_f32" , mul_mat_vec_id_f16_f32_len, mul_mat_vec_id_f16_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 );
1879- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q4_0], " mul_mat_vec_id_q4_0_f32" , mul_mat_vec_id_q4_0_f32_len, mul_mat_vec_id_q4_0_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
1880- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q4_1], " mul_mat_vec_id_q4_1_f32" , mul_mat_vec_id_q4_1_f32_len, mul_mat_vec_id_q4_1_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
1881- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q5_0], " mul_mat_vec_id_q5_0_f32" , mul_mat_vec_id_q5_0_f32_len, mul_mat_vec_id_q5_0_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
1882- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q5_1], " mul_mat_vec_id_q5_1_f32" , mul_mat_vec_id_q5_1_f32_len, mul_mat_vec_id_q5_1_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
1883- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q8_0], " mul_mat_vec_id_q8_0_f32" , mul_mat_vec_id_q8_0_f32_len, mul_mat_vec_id_q8_0_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {device->subgroup_size , 1 }, 1 , true );
1885+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q4_0], " mul_mat_vec_id_q4_0_f32" , mul_mat_vec_id_q4_0_f32_len, mul_mat_vec_id_q4_0_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1886+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q4_1], " mul_mat_vec_id_q4_1_f32" , mul_mat_vec_id_q4_1_f32_len, mul_mat_vec_id_q4_1_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1887+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q5_0], " mul_mat_vec_id_q5_0_f32" , mul_mat_vec_id_q5_0_f32_len, mul_mat_vec_id_q5_0_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1888+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q5_1], " mul_mat_vec_id_q5_1_f32" , mul_mat_vec_id_q5_1_f32_len, mul_mat_vec_id_q5_1_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1889+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q8_0], " mul_mat_vec_id_q8_0_f32" , mul_mat_vec_id_q8_0_f32_len, mul_mat_vec_id_q8_0_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 *rm , 1 , 1 }, {device->subgroup_size , 1 *rm }, 1 , true );
18841890 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q2_K], " mul_mat_vec_id_q2_k_f32" , mul_mat_vec_id_q2_k_f32_len, mul_mat_vec_id_q2_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
18851891 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q3_K], " mul_mat_vec_id_q3_k_f32" , mul_mat_vec_id_q3_k_f32_len, mul_mat_vec_id_q3_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
18861892 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q4_K], " mul_mat_vec_id_q4_k_f32" , mul_mat_vec_id_q4_k_f32_len, mul_mat_vec_id_q4_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
18871893 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q5_K], " mul_mat_vec_id_q5_k_f32" , mul_mat_vec_id_q5_k_f32_len, mul_mat_vec_id_q5_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
18881894 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q6_K], " mul_mat_vec_id_q6_k_f32" , mul_mat_vec_id_q6_k_f32_len, mul_mat_vec_id_q6_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
1889- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_IQ4_NL], " mul_mat_vec_id_iq4_nl_f32" , mul_mat_vec_id_iq4_nl_f32_len, mul_mat_vec_id_iq4_nl_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
1895+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_IQ4_NL], " mul_mat_vec_id_iq4_nl_f32" , mul_mat_vec_id_iq4_nl_f32_len, mul_mat_vec_id_iq4_nl_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
18901896
18911897 // dequant shaders
18921898 ggml_vk_create_pipeline (device, device->pipeline_dequant [GGML_TYPE_F32 ], " f32_to_f16" , dequant_f32_len, dequant_f32_data, " main" , 2 , 5 * sizeof (uint32_t ), {256 * 16 , 1 , 1 }, {}, 1 );
@@ -2243,13 +2249,16 @@ static vk_device ggml_vk_get_device(size_t idx) {
22432249
22442250 device->pipeline_robustness = pl_robustness_features.pipelineRobustness ;
22452251
2252+ if (device->subgroup_size_control ) {
2253+ device->subgroup_min_size = subgroup_size_control_props.minSubgroupSize ;
2254+ device->subgroup_max_size = subgroup_size_control_props.maxSubgroupSize ;
2255+ }
2256+
22462257 device->subgroup_size_control = device->subgroup_size_control &&
22472258 (subgroup_size_control_props.requiredSubgroupSizeStages & vk::ShaderStageFlagBits::eCompute) &&
22482259 subgroup_size_control_features.subgroupSizeControl ;
22492260
22502261 if (device->subgroup_size_control ) {
2251- device->subgroup_min_size = subgroup_size_control_props.minSubgroupSize ;
2252- device->subgroup_max_size = subgroup_size_control_props.maxSubgroupSize ;
22532262 device->subgroup_require_full_support = subgroup_size_control_features.computeFullSubgroups ;
22542263 device_extensions.push_back (" VK_EXT_subgroup_size_control" );
22552264 }
0 commit comments