4444
4545#define MAX_VK_BUFFERS 256
4646
47- #ifndef K_QUANTS_PER_ITERATION
48- #define K_QUANTS_PER_ITERATION 1
49- #else
50- static_assert (K_QUANTS_PER_ITERATION == 1 || K_QUANTS_PER_ITERATION == 2 , " K_QUANTS_PER_ITERATION must be 1 or 2" );
51- #endif
52-
5347#define VK_CHECK (err, msg ) \
5448 do { \
5549 vk::Result err_ = (err); \
@@ -1792,10 +1786,10 @@ static void ggml_vk_load_shaders(vk_device& device) {
17921786 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q5_0], " mul_mat_vec_q5_0_f32_f32" , mul_mat_vec_q5_0_f32_f32_len, mul_mat_vec_q5_0_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
17931787 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q5_1], " mul_mat_vec_q5_1_f32_f32" , mul_mat_vec_q5_1_f32_f32_len, mul_mat_vec_q5_1_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
17941788 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q8_0], " mul_mat_vec_q8_0_f32_f32" , mul_mat_vec_q8_0_f32_f32_len, mul_mat_vec_q8_0_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device->subgroup_size , 1 }, 1 , true );
1795- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q2_K], " mul_mat_vec_q2_k_f32_f32" , mul_mat_vec_q2_k_f32_f32_len, mul_mat_vec_q2_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device-> subgroup_size }, 1 , true );
1796- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q3_K], " mul_mat_vec_q3_k_f32_f32" , mul_mat_vec_q3_k_f32_f32_len, mul_mat_vec_q3_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device-> subgroup_size }, 1 , true );
1797- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q4_K], " mul_mat_vec_q4_k_f32_f32" , mul_mat_vec_q4_k_f32_f32_len, mul_mat_vec_q4_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device-> subgroup_size }, 1 , true );
1798- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q5_K], " mul_mat_vec_q5_k_f32_f32" , mul_mat_vec_q5_k_f32_f32_len, mul_mat_vec_q5_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device-> subgroup_size }, 1 , true );
1789+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q2_K], " mul_mat_vec_q2_k_f32_f32" , mul_mat_vec_q2_k_f32_f32_len, mul_mat_vec_q2_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16 }, 1 , true );
1790+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q3_K], " mul_mat_vec_q3_k_f32_f32" , mul_mat_vec_q3_k_f32_f32_len, mul_mat_vec_q3_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16 }, 1 , true );
1791+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q4_K], " mul_mat_vec_q4_k_f32_f32" , mul_mat_vec_q4_k_f32_f32_len, mul_mat_vec_q4_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16 }, 1 , true );
1792+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q5_K], " mul_mat_vec_q5_k_f32_f32" , mul_mat_vec_q5_k_f32_f32_len, mul_mat_vec_q5_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16 }, 1 , true );
17991793 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_Q6_K], " mul_mat_vec_q6_k_f32_f32" , mul_mat_vec_q6_k_f32_f32_len, mul_mat_vec_q6_k_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
18001794 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f32_f32 [GGML_TYPE_IQ4_NL], " mul_mat_vec_iq4_nl_f32_f32" , mul_mat_vec_iq4_nl_f32_f32_len, mul_mat_vec_iq4_nl_f32_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
18011795
@@ -1806,10 +1800,10 @@ static void ggml_vk_load_shaders(vk_device& device) {
18061800 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q5_0], " mul_mat_vec_q5_0_f16_f32" , mul_mat_vec_q5_0_f16_f32_len, mul_mat_vec_q5_0_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
18071801 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q5_1], " mul_mat_vec_q5_1_f16_f32" , mul_mat_vec_q5_1_f16_f32_len, mul_mat_vec_q5_1_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
18081802 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q8_0], " mul_mat_vec_q8_0_f16_f32" , mul_mat_vec_q8_0_f16_f32_len, mul_mat_vec_q8_0_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device->subgroup_size , 1 }, 1 , true );
1809- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q2_K], " mul_mat_vec_q2_k_f16_f32" , mul_mat_vec_q2_k_f16_f32_len, mul_mat_vec_q2_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device-> subgroup_size }, 1 , true );
1810- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q3_K], " mul_mat_vec_q3_k_f16_f32" , mul_mat_vec_q3_k_f16_f32_len, mul_mat_vec_q3_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device-> subgroup_size }, 1 , true );
1811- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q4_K], " mul_mat_vec_q4_k_f16_f32" , mul_mat_vec_q4_k_f16_f32_len, mul_mat_vec_q4_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device-> subgroup_size }, 1 , true );
1812- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q5_K], " mul_mat_vec_q5_k_f16_f32" , mul_mat_vec_q5_k_f16_f32_len, mul_mat_vec_q5_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {device-> subgroup_size }, 1 , true );
1803+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q2_K], " mul_mat_vec_q2_k_f16_f32" , mul_mat_vec_q2_k_f16_f32_len, mul_mat_vec_q2_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16 }, 1 , true );
1804+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q3_K], " mul_mat_vec_q3_k_f16_f32" , mul_mat_vec_q3_k_f16_f32_len, mul_mat_vec_q3_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16 }, 1 , true );
1805+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q4_K], " mul_mat_vec_q4_k_f16_f32" , mul_mat_vec_q4_k_f16_f32_len, mul_mat_vec_q4_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16 }, 1 , true );
1806+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q5_K], " mul_mat_vec_q5_k_f16_f32" , mul_mat_vec_q5_k_f16_f32_len, mul_mat_vec_q5_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16 }, 1 , true );
18131807 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_Q6_K], " mul_mat_vec_q6_k_f16_f32" , mul_mat_vec_q6_k_f16_f32_len, mul_mat_vec_q6_k_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
18141808 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_f16_f32 [GGML_TYPE_IQ4_NL], " mul_mat_vec_iq4_nl_f16_f32" , mul_mat_vec_iq4_nl_f16_f32_len, mul_mat_vec_iq4_nl_f16_f32_data, " main" , 3 , sizeof (vk_mat_vec_push_constants), {2 , 1 , 1 }, {device->subgroup_size }, 1 , true );
18151809
@@ -1820,10 +1814,10 @@ static void ggml_vk_load_shaders(vk_device& device) {
18201814 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q5_0], " mul_mat_vec_id_q5_0_f32" , mul_mat_vec_id_q5_0_f32_len, mul_mat_vec_id_q5_0_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
18211815 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q5_1], " mul_mat_vec_id_q5_1_f32" , mul_mat_vec_id_q5_1_f32_len, mul_mat_vec_id_q5_1_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
18221816 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q8_0], " mul_mat_vec_id_q8_0_f32" , mul_mat_vec_id_q8_0_f32_len, mul_mat_vec_id_q8_0_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {device->subgroup_size , 1 }, 1 , true );
1823- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q2_K], " mul_mat_vec_id_q2_k_f32" , mul_mat_vec_id_q2_k_f32_len, mul_mat_vec_id_q2_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {device-> subgroup_size }, 1 , true );
1824- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q3_K], " mul_mat_vec_id_q3_k_f32" , mul_mat_vec_id_q3_k_f32_len, mul_mat_vec_id_q3_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {device-> subgroup_size }, 1 , true );
1825- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q4_K], " mul_mat_vec_id_q4_k_f32" , mul_mat_vec_id_q4_k_f32_len, mul_mat_vec_id_q4_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {device-> subgroup_size }, 1 , true );
1826- ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q5_K], " mul_mat_vec_id_q5_k_f32" , mul_mat_vec_id_q5_k_f32_len, mul_mat_vec_id_q5_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {device-> subgroup_size }, 1 , true );
1817+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q2_K], " mul_mat_vec_id_q2_k_f32" , mul_mat_vec_id_q2_k_f32_len, mul_mat_vec_id_q2_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {subgroup_size_16 }, 1 , true );
1818+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q3_K], " mul_mat_vec_id_q3_k_f32" , mul_mat_vec_id_q3_k_f32_len, mul_mat_vec_id_q3_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {subgroup_size_16 }, 1 , true );
1819+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q4_K], " mul_mat_vec_id_q4_k_f32" , mul_mat_vec_id_q4_k_f32_len, mul_mat_vec_id_q4_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {subgroup_size_16 }, 1 , true );
1820+ ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q5_K], " mul_mat_vec_id_q5_k_f32" , mul_mat_vec_id_q5_k_f32_len, mul_mat_vec_id_q5_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {subgroup_size_16 }, 1 , true );
18271821 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_Q6_K], " mul_mat_vec_id_q6_k_f32" , mul_mat_vec_id_q6_k_f32_len, mul_mat_vec_id_q6_k_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {1 , 1 , 1 }, {subgroup_size_16}, 1 , true );
18281822 ggml_vk_create_pipeline (device, device->pipeline_dequant_mul_mat_vec_id_f32 [GGML_TYPE_IQ4_NL], " mul_mat_vec_id_iq4_nl_f32" , mul_mat_vec_id_iq4_nl_f32_len, mul_mat_vec_id_iq4_nl_f32_data, " main" , 4 , sizeof (vk_mat_vec_id_push_constants), {2 , 1 , 1 }, {device->subgroup_size , 2 }, 1 , true );
18291823
0 commit comments