@@ -1673,31 +1673,31 @@ static void ggml_vk_load_shaders(vk_device& device) {
16731673 CREATE_MM2 (pipeline_matmul_f16_f32, matmul_f16_f32, wg_denoms, warptile, vk_mat_mat_push_constants, 3 , );
16741674
16751675 if (device->coopmat_acc_f16_support ) {
1676- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_0].f16acc , matmul_q4_0_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1677- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_1].f16acc , matmul_q4_1_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1678- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0].f16acc , matmul_q5_0_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1679- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1].f16acc , matmul_q5_1_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1680- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0].f16acc , matmul_q8_0_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1681-
1682- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K].f16acc , matmul_q2_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1683- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K].f16acc , matmul_q3_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1684- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K].f16acc , matmul_q4_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1685- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K].f16acc , matmul_q5_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1686- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K].f16acc , matmul_q6_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1687- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f16acc , matmul_iq4_nl_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1676+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_0].f16acc , matmul_q4_0_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1677+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_1].f16acc , matmul_q4_1_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1678+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0].f16acc , matmul_q5_0_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1679+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1].f16acc , matmul_q5_1_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1680+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0].f16acc , matmul_q8_0_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1681+
1682+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K].f16acc , matmul_q2_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1683+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K].f16acc , matmul_q3_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1684+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K].f16acc , matmul_q4_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1685+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K].f16acc , matmul_q5_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1686+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K].f16acc , matmul_q6_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1687+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f16acc , matmul_iq4_nl_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
16881688 } else {
1689- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_0].f16acc , matmul_q4_0_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1690- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_1].f16acc , matmul_q4_1_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1691- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0].f16acc , matmul_q5_0_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1692- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1].f16acc , matmul_q5_1_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1693- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0].f16acc , matmul_q8_0_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1689+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_0].f16acc , matmul_q4_0_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1690+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_1].f16acc , matmul_q4_1_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1691+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_0].f16acc , matmul_q5_0_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1692+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_1].f16acc , matmul_q5_1_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1693+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q8_0].f16acc , matmul_q8_0_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
16941694
1695- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K].f16acc , matmul_q2_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1696- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K].f16acc , matmul_q3_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1697- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K].f16acc , matmul_q4_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1698- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K].f16acc , matmul_q5_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1699- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K].f16acc , matmul_q6_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1700- CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f16acc , matmul_iq4_nl_f32, , wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1695+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q2_K].f16acc , matmul_q2_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1696+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q3_K].f16acc , matmul_q3_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1697+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_K].f16acc , matmul_q4_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1698+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q5_K].f16acc , matmul_q5_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1699+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_Q6_K].f16acc , matmul_q6_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
1700+ CREATE_MM (pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f16acc , matmul_iq4_nl_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_push_constants, 3 , );
17011701 }
17021702
17031703 // If there's not enough shared memory for row_ids and the result tile, don't create these pipelines.
@@ -1707,31 +1707,31 @@ static void ggml_vk_load_shaders(vk_device& device) {
17071707 CREATE_MM2 (pipeline_matmul_id_f16_f32, matmul_id_f16_f32, wg_denoms, warptile, vk_mat_mat_push_constants, 4 , _id);
17081708
17091709 if (device->coopmat_acc_f16_support ) {
1710- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0].f16acc , matmul_id_q4_0_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1711- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1].f16acc , matmul_id_q4_1_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1712- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0].f16acc , matmul_id_q5_0_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1713- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1].f16acc , matmul_id_q5_1_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1714- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0].f16acc , matmul_id_q8_0_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1715-
1716- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K].f16acc , matmul_id_q2_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1717- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K].f16acc , matmul_id_q3_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1718- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K].f16acc , matmul_id_q4_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1719- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K].f16acc , matmul_id_q5_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1720- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K].f16acc , matmul_id_q6_k_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1721- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc , matmul_id_iq4_nl_f32, _f16acc, wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1710+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0].f16acc , matmul_id_q4_0_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1711+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1].f16acc , matmul_id_q4_1_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1712+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0].f16acc , matmul_id_q5_0_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1713+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1].f16acc , matmul_id_q5_1_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1714+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0].f16acc , matmul_id_q8_0_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1715+
1716+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K].f16acc , matmul_id_q2_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1717+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K].f16acc , matmul_id_q3_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1718+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K].f16acc , matmul_id_q4_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1719+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K].f16acc , matmul_id_q5_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1720+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K].f16acc , matmul_id_q6_k_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1721+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc , matmul_id_iq4_nl_f32, _f16acc, mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
17221722 } else {
1723- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0].f16acc , matmul_id_q4_0_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1724- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1].f16acc , matmul_id_q4_1_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1725- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0].f16acc , matmul_id_q5_0_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1726- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1].f16acc , matmul_id_q5_1_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1727- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0].f16acc , matmul_id_q8_0_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1728-
1729- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K].f16acc , matmul_id_q2_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1730- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K].f16acc , matmul_id_q3_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1731- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K].f16acc , matmul_id_q4_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1732- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K].f16acc , matmul_id_q5_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1733- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K].f16acc , matmul_id_q6_k_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1734- CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc , matmul_id_iq4_nl_f32, , wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1723+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0].f16acc , matmul_id_q4_0_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1724+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_1].f16acc , matmul_id_q4_1_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1725+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_0].f16acc , matmul_id_q5_0_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1726+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_1].f16acc , matmul_id_q5_1_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1727+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q8_0].f16acc , matmul_id_q8_0_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1728+
1729+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q2_K].f16acc , matmul_id_q2_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1730+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q3_K].f16acc , matmul_id_q3_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1731+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_K].f16acc , matmul_id_q4_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1732+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q5_K].f16acc , matmul_id_q5_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1733+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q6_K].f16acc , matmul_id_q6_k_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
1734+ CREATE_MM (pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc , matmul_id_iq4_nl_f32, , mmq_wg_denoms , warptile_mmq, vk_mat_mat_id_push_constants, 4 , _id);
17351735 }
17361736 }
17371737#undef CREATE_MM2
0 commit comments