@@ -1783,48 +1783,54 @@ static void ggml_vk_load_shaders(vk_device& device) {
17831783 }
17841784
17851785 // mul mat vec
1786- // computing four rows per workgroup is a benefit for Q4_0 -> Q5_1, but not for Q8_0
1786+
1787+ // AMD GCN graphics cards perform best when the number of rows per shader is doubled
1788+ uint32_t rm = 1 ;
1789+ if ((device->vendor_id == VK_VENDOR_ID_AMD) && (device->subgroup_size == 64 ))
1790+ rm = 2 ;
1791+
1792+ // computing additional rows per workgroup is a benefit for Q4_0 -> Q5_1, but not for Q8_0.
17871793 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_F32 ], " mul_mat_vec_f32_f32_f32" , mul_mat_vec_f32_f32_f32_len, mul_mat_vec_f32_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 );
17881794 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_F16 ], " mul_mat_vec_f16_f32_f32" , mul_mat_vec_f16_f32_f32_len, mul_mat_vec_f16_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 );
1789- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q4_0], " mul_mat_vec_q4_0_f32_f32" , mul_mat_vec_q4_0_f32_f32_len, mul_mat_vec_q4_0_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {4 , 1 , 1 }, {device->subgroup_size , 4 }, 1 , true );
1790- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q4_1], " mul_mat_vec_q4_1_f32_f32" , mul_mat_vec_q4_1_f32_f32_len, mul_mat_vec_q4_1_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {4 , 1 , 1 }, {device->subgroup_size , 4 }, 1 , true );
1791- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q5_0], " mul_mat_vec_q5_0_f32_f32" , mul_mat_vec_q5_0_f32_f32_len, mul_mat_vec_q5_0_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {4 , 1 , 1 }, {device->subgroup_size , 4 }, 1 , true );
1792- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q5_1], " mul_mat_vec_q5_1_f32_f32" , mul_mat_vec_q5_1_f32_f32_len, mul_mat_vec_q5_1_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {4 , 1 , 1 }, {device->subgroup_size , 4 }, 1 , true );
1793- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q8_0], " mul_mat_vec_q8_0_f32_f32" , mul_mat_vec_q8_0_f32_f32_len, mul_mat_vec_q8_0_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
1795+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q4_0], " mul_mat_vec_q4_0_f32_f32" , mul_mat_vec_q4_0_f32_f32_len, mul_mat_vec_q4_0_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1796+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q4_1], " mul_mat_vec_q4_1_f32_f32" , mul_mat_vec_q4_1_f32_f32_len, mul_mat_vec_q4_1_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1797+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q5_0], " mul_mat_vec_q5_0_f32_f32" , mul_mat_vec_q5_0_f32_f32_len, mul_mat_vec_q5_0_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1798+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q5_1], " mul_mat_vec_q5_1_f32_f32" , mul_mat_vec_q5_1_f32_f32_len, mul_mat_vec_q5_1_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1799+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q8_0], " mul_mat_vec_q8_0_f32_f32" , mul_mat_vec_q8_0_f32_f32_len, mul_mat_vec_q8_0_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 *rm , 1 , 1 }, {device->subgroup_size , 1 *rm }, 1 , true );
17941800 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q2_K], " mul_mat_vec_q2_k_f32_f32" , mul_mat_vec_q2_k_f32_f32_len, mul_mat_vec_q2_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device->subgroup_size }, 1 , true );
17951801 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q3_K], " mul_mat_vec_q3_k_f32_f32" , mul_mat_vec_q3_k_f32_f32_len, mul_mat_vec_q3_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device->subgroup_size }, 1 , true );
17961802 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q4_K], " mul_mat_vec_q4_k_f32_f32" , mul_mat_vec_q4_k_f32_f32_len, mul_mat_vec_q4_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device->subgroup_size }, 1 , true );
17971803 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q5_K], " mul_mat_vec_q5_k_f32_f32" , mul_mat_vec_q5_k_f32_f32_len, mul_mat_vec_q5_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device->subgroup_size }, 1 , true );
17981804 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q6_K], " mul_mat_vec_q6_k_f32_f32" , mul_mat_vec_q6_k_f32_f32_len, mul_mat_vec_q6_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
1799- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_IQ4_NL], " mul_mat_vec_iq4_nl_f32_f32" , mul_mat_vec_iq4_nl_f32_f32_len, mul_mat_vec_iq4_nl_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {4 , 1 , 1 }, {device->subgroup_size , 4 }, 1 , true );
1805+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_IQ4_NL], " mul_mat_vec_iq4_nl_f32_f32" , mul_mat_vec_iq4_nl_f32_f32_len, mul_mat_vec_iq4_nl_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
18001806
18011807 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_F32 ], " mul_mat_vec_f32_f16_f32" , mul_mat_vec_f32_f16_f32_len, mul_mat_vec_f32_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 );
18021808 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_F16 ], " mul_mat_vec_f16_f16_f32" , mul_mat_vec_f16_f16_f32_len, mul_mat_vec_f16_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 );
1803- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q4_0], " mul_mat_vec_q4_0_f16_f32" , mul_mat_vec_q4_0_f16_f32_len, mul_mat_vec_q4_0_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {4 , 1 , 1 }, {device->subgroup_size , 4 }, 1 , true );
1804- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q4_1], " mul_mat_vec_q4_1_f16_f32" , mul_mat_vec_q4_1_f16_f32_len, mul_mat_vec_q4_1_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {4 , 1 , 1 }, {device->subgroup_size , 4 }, 1 , true );
1805- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q5_0], " mul_mat_vec_q5_0_f16_f32" , mul_mat_vec_q5_0_f16_f32_len, mul_mat_vec_q5_0_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {4 , 1 , 1 }, {device->subgroup_size , 4 }, 1 , true );
1806- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q5_1], " mul_mat_vec_q5_1_f16_f32" , mul_mat_vec_q5_1_f16_f32_len, mul_mat_vec_q5_1_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {4 , 1 , 1 }, {device->subgroup_size , 4 }, 1 , true );
1807- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q8_0], " mul_mat_vec_q8_0_f16_f32" , mul_mat_vec_q8_0_f16_f32_len, mul_mat_vec_q8_0_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
1809+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q4_0], " mul_mat_vec_q4_0_f16_f32" , mul_mat_vec_q4_0_f16_f32_len, mul_mat_vec_q4_0_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1810+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q4_1], " mul_mat_vec_q4_1_f16_f32" , mul_mat_vec_q4_1_f16_f32_len, mul_mat_vec_q4_1_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1811+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q5_0], " mul_mat_vec_q5_0_f16_f32" , mul_mat_vec_q5_0_f16_f32_len, mul_mat_vec_q5_0_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1812+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q5_1], " mul_mat_vec_q5_1_f16_f32" , mul_mat_vec_q5_1_f16_f32_len, mul_mat_vec_q5_1_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1813+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q8_0], " mul_mat_vec_q8_0_f16_f32" , mul_mat_vec_q8_0_f16_f32_len, mul_mat_vec_q8_0_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 *rm , 1 , 1 }, {device->subgroup_size , 1 *rm }, 1 , true );
18081814 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q2_K], " mul_mat_vec_q2_k_f16_f32" , mul_mat_vec_q2_k_f16_f32_len, mul_mat_vec_q2_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device->subgroup_size }, 1 , true );
18091815 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q3_K], " mul_mat_vec_q3_k_f16_f32" , mul_mat_vec_q3_k_f16_f32_len, mul_mat_vec_q3_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device->subgroup_size }, 1 , true );
18101816 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q4_K], " mul_mat_vec_q4_k_f16_f32" , mul_mat_vec_q4_k_f16_f32_len, mul_mat_vec_q4_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device->subgroup_size }, 1 , true );
18111817 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q5_K], " mul_mat_vec_q5_k_f16_f32" , mul_mat_vec_q5_k_f16_f32_len, mul_mat_vec_q5_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device->subgroup_size }, 1 , true );
18121818 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q6_K], " mul_mat_vec_q6_k_f16_f32" , mul_mat_vec_q6_k_f16_f32_len, mul_mat_vec_q6_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
1813- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_IQ4_NL], " mul_mat_vec_iq4_nl_f16_f32" , mul_mat_vec_iq4_nl_f16_f32_len, mul_mat_vec_iq4_nl_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {4 , 1 , 1 }, {device->subgroup_size }, 1 , true );
1819+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_IQ4_NL], " mul_mat_vec_iq4_nl_f16_f32" , mul_mat_vec_iq4_nl_f16_f32_len, mul_mat_vec_iq4_nl_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size }, 1 , true );
18141820
18151821 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_F32 ], " mul_mat_vec_id_f32_f32" , mul_mat_vec_id_f32_f32_len, mul_mat_vec_id_f32_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 );
18161822 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_F16 ], " mul_mat_vec_id_f16_f32" , mul_mat_vec_id_f16_f32_len, mul_mat_vec_id_f16_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 );
1817- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q4_0], " mul_mat_vec_id_q4_0_f32" , mul_mat_vec_id_q4_0_f32_len, mul_mat_vec_id_q4_0_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {4 , 1 , 1 }, {device->subgroup_size , 4 }, 1 , true );
1818- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q4_1], " mul_mat_vec_id_q4_1_f32" , mul_mat_vec_id_q4_1_f32_len, mul_mat_vec_id_q4_1_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {4 , 1 , 1 }, {device->subgroup_size , 4 }, 1 , true );
1819- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q5_0], " mul_mat_vec_id_q5_0_f32" , mul_mat_vec_id_q5_0_f32_len, mul_mat_vec_id_q5_0_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {4 , 1 , 1 }, {device->subgroup_size , 4 }, 1 , true );
1820- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q5_1], " mul_mat_vec_id_q5_1_f32" , mul_mat_vec_id_q5_1_f32_len, mul_mat_vec_id_q5_1_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {4 , 1 , 1 }, {device->subgroup_size , 4 }, 1 , true );
1821- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q8_0], " mul_mat_vec_id_q8_0_f32" , mul_mat_vec_id_q8_0_f32_len, mul_mat_vec_id_q8_0_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
1823+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q4_0], " mul_mat_vec_id_q4_0_f32" , mul_mat_vec_id_q4_0_f32_len, mul_mat_vec_id_q4_0_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1824+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q4_1], " mul_mat_vec_id_q4_1_f32" , mul_mat_vec_id_q4_1_f32_len, mul_mat_vec_id_q4_1_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1825+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q5_0], " mul_mat_vec_id_q5_0_f32" , mul_mat_vec_id_q5_0_f32_len, mul_mat_vec_id_q5_0_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1826+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q5_1], " mul_mat_vec_id_q5_1_f32" , mul_mat_vec_id_q5_1_f32_len, mul_mat_vec_id_q5_1_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
1827+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q8_0], " mul_mat_vec_id_q8_0_f32" , mul_mat_vec_id_q8_0_f32_len, mul_mat_vec_id_q8_0_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 *rm , 1 , 1 }, {device->subgroup_size , 1 *rm }, 1 , true );
18221828 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q2_K], " mul_mat_vec_id_q2_k_f32" , mul_mat_vec_id_q2_k_f32_len, mul_mat_vec_id_q2_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {device->subgroup_size }, 1 , true );
18231829 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q3_K], " mul_mat_vec_id_q3_k_f32" , mul_mat_vec_id_q3_k_f32_len, mul_mat_vec_id_q3_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {device->subgroup_size }, 1 , true );
18241830 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q4_K], " mul_mat_vec_id_q4_k_f32" , mul_mat_vec_id_q4_k_f32_len, mul_mat_vec_id_q4_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {device->subgroup_size }, 1 , true );
18251831 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q5_K], " mul_mat_vec_id_q5_k_f32" , mul_mat_vec_id_q5_k_f32_len, mul_mat_vec_id_q5_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {device->subgroup_size }, 1 , true );
18261832 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q6_K], " mul_mat_vec_id_q6_k_f32" , mul_mat_vec_id_q6_k_f32_len, mul_mat_vec_id_q6_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
1827- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_IQ4_NL], " mul_mat_vec_id_iq4_nl_f32" , mul_mat_vec_id_iq4_nl_f32_len, mul_mat_vec_id_iq4_nl_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {4 , 1 , 1 }, {device->subgroup_size , 4 }, 1 , true );
1833+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_IQ4_NL], " mul_mat_vec_id_iq4_nl_f32" , mul_mat_vec_id_iq4_nl_f32_len, mul_mat_vec_id_iq4_nl_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 *rm , 1 , 1 }, {device->subgroup_size , 2 *rm }, 1 , true );
18281834
18291835 // dequant shaders
18301836 ggml_vk_create_pipeline (device, device->pipeline_dequant [GGML_TYPE_F32 ], " f32_to_f16" , dequant_f32_len, dequant_f32_data, " main" , 2 , 5 * sizeof (uint32_t ), {256 * 16 , 1 , 1 }, {}, 1 );
0 commit comments