@@ -3315,13 +3315,18 @@ static void ggml_vk_load_shaders(vk_device& device) {
33153315 // the number of rows computed per shader depends on GPU model and quant
33163316 uint32_t rm_stdq = 1;
33173317 uint32_t rm_kq = 2;
3318+ uint32_t rm_stdq_int = 1;
3319+ uint32_t rm_kq_int = 1;
33183320 if (device->vendor_id == VK_VENDOR_ID_AMD) {
33193321 if (device->architecture == AMD_GCN) {
33203322 rm_stdq = 2;
33213323 rm_kq = 4;
3324+ rm_stdq_int = 4;
33223325 }
3323- } else if (device->vendor_id == VK_VENDOR_ID_INTEL)
3326+ } else if (device->vendor_id == VK_VENDOR_ID_INTEL) {
33243327 rm_stdq = 2;
3328+ rm_stdq_int = 2;
3329+ }
33253330 uint32_t rm_iq = 2 * rm_kq;
33263331
33273332 const bool use_subgroups = device->subgroup_arithmetic && device->architecture != vk_device_architecture::AMD_GCN;
@@ -3400,19 +3405,19 @@ static void ggml_vk_load_shaders(vk_device& device) {
34003405 const uint32_t subgroup_size_int = (device->vendor_id == VK_VENDOR_ID_INTEL && device->subgroup_size_control) ? device->subgroup_min_size : device->subgroup_size;
34013406 const uint32_t wg_size_subgroup_int = (w == DMMV_WG_SIZE_SUBGROUP) ? subgroup_size_int : (subgroup_size_int * 4);
34023407
3403- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q4_0][i], "mul_mat_vec_q4_0_q8_1_f32", arr_dmmv_q4_0_q8_1_f32_len[reduc], arr_dmmv_q4_0_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3404- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q4_1][i], "mul_mat_vec_q4_1_q8_1_f32", arr_dmmv_q4_1_q8_1_f32_len[reduc], arr_dmmv_q4_1_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3405- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q5_0][i], "mul_mat_vec_q5_0_q8_1_f32", arr_dmmv_q5_0_q8_1_f32_len[reduc], arr_dmmv_q5_0_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3406- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q5_1][i], "mul_mat_vec_q5_1_q8_1_f32", arr_dmmv_q5_1_q8_1_f32_len[reduc], arr_dmmv_q5_1_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3407- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q8_0][i], "mul_mat_vec_q8_0_q8_1_f32", arr_dmmv_q8_0_q8_1_f32_len[reduc], arr_dmmv_q8_0_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {1*rm_stdq , 1, 1}, {wg_size_subgroup_int, 1*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3408+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q4_0][i], "mul_mat_vec_q4_0_q8_1_f32", arr_dmmv_q4_0_q8_1_f32_len[reduc], arr_dmmv_q4_0_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {1*rm_stdq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_stdq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
3409+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q4_1][i], "mul_mat_vec_q4_1_q8_1_f32", arr_dmmv_q4_1_q8_1_f32_len[reduc], arr_dmmv_q4_1_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {1*rm_stdq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_stdq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
3410+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q5_0][i], "mul_mat_vec_q5_0_q8_1_f32", arr_dmmv_q5_0_q8_1_f32_len[reduc], arr_dmmv_q5_0_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {1*rm_stdq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_stdq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
3411+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q5_1][i], "mul_mat_vec_q5_1_q8_1_f32", arr_dmmv_q5_1_q8_1_f32_len[reduc], arr_dmmv_q5_1_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {1*rm_stdq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_stdq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
3412+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q8_0][i], "mul_mat_vec_q8_0_q8_1_f32", arr_dmmv_q8_0_q8_1_f32_len[reduc], arr_dmmv_q8_0_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {1*rm_stdq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_stdq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
34083413
3409- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_MXFP4][i], "mul_mat_vec_mxfp4_q8_1_f32", arr_dmmv_mxfp4_q8_1_f32_len[reduc], arr_dmmv_mxfp4_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {1*rm_stdq , 1, 1}, {wg_size_subgroup_int, 1*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3414+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_MXFP4][i], "mul_mat_vec_mxfp4_q8_1_f32", arr_dmmv_mxfp4_q8_1_f32_len[reduc], arr_dmmv_mxfp4_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {2*rm_stdq_int , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
34103415
3411- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q2_K][i], "mul_mat_vec_q2_k_q8_1_f32", arr_dmmv_q2_k_q8_1_f32_len[reduc], arr_dmmv_q2_k_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3412- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q3_K][i], "mul_mat_vec_q3_k_q8_1_f32", arr_dmmv_q3_k_q8_1_f32_len[reduc], arr_dmmv_q3_k_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3413- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q4_K][i], "mul_mat_vec_q4_k_q8_1_f32", arr_dmmv_q4_k_q8_1_f32_len[reduc], arr_dmmv_q4_k_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3414- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q5_K][i], "mul_mat_vec_q5_k_q8_1_f32", arr_dmmv_q5_k_q8_1_f32_len[reduc], arr_dmmv_q5_k_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3415- ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q6_K][i], "mul_mat_vec_q6_k_q8_1_f32", arr_dmmv_q6_k_q8_1_f32_len[reduc], arr_dmmv_q6_k_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {2*rm_stdq , 1, 1}, {wg_size_subgroup_int, 2*rm_stdq , i+1}, 1, true, use_subgroups, subgroup_size_int);
3416+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q2_K][i], "mul_mat_vec_q2_k_q8_1_f32", arr_dmmv_q2_k_q8_1_f32_len[reduc], arr_dmmv_q2_k_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {2*rm_kq_int , 1, 1}, {wg_size_subgroup_int, 2*rm_kq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
3417+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q3_K][i], "mul_mat_vec_q3_k_q8_1_f32", arr_dmmv_q3_k_q8_1_f32_len[reduc], arr_dmmv_q3_k_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {1*rm_kq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_kq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
3418+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q4_K][i], "mul_mat_vec_q4_k_q8_1_f32", arr_dmmv_q4_k_q8_1_f32_len[reduc], arr_dmmv_q4_k_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {1*rm_kq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_kq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
3419+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q5_K][i], "mul_mat_vec_q5_k_q8_1_f32", arr_dmmv_q5_k_q8_1_f32_len[reduc], arr_dmmv_q5_k_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {1*rm_kq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_kq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
3420+ ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_q8_1_f32[w][GGML_TYPE_Q6_K][i], "mul_mat_vec_q6_k_q8_1_f32", arr_dmmv_q6_k_q8_1_f32_len[reduc], arr_dmmv_q6_k_q8_1_f32_data[reduc], "main", 3, sizeof(vk_mat_vec_push_constants), {1*rm_kq_int , 1, 1}, {wg_size_subgroup_int, 1*rm_kq_int , i+1}, 1, true, use_subgroups, subgroup_size_int);
34163421 }
34173422#endif // GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT
34183423 }
0 commit comments