File tree Expand file tree Collapse file tree 1 file changed +23
-2
lines changed
Expand file tree Collapse file tree 1 file changed +23
-2
lines changed Original file line number Diff line number Diff line change @@ -61,14 +61,15 @@ enum mmvq_parameter_table_id {
6161 MMVQ_PARAMETERS_GENERIC = 0 ,
6262 MMVQ_PARAMETERS_GCN,
6363 MMVQ_PARAMETERS_RDNA2,
64+ MMVQ_PARAMETERS_RDNA3,
6465 MMVQ_PARAMETERS_RDNA4
6566};
6667
6768static constexpr __device__ mmvq_parameter_table_id get_device_table_id () {
6869#if defined(RDNA4)
6970 return MMVQ_PARAMETERS_RDNA4;
7071#elif defined(RDNA3)
71- return MMVQ_PARAMETERS_RDNA4 ;
72+ return MMVQ_PARAMETERS_RDNA3 ;
7273#elif defined(RDNA2)
7374 return MMVQ_PARAMETERS_RDNA2;
7475#elif defined(GCN) || defined(CDNA)
@@ -83,7 +84,7 @@ static __host__ mmvq_parameter_table_id get_device_table_id(int cc) {
8384 return MMVQ_PARAMETERS_RDNA4;
8485 }
8586 if (GGML_CUDA_CC_IS_RDNA3 (cc)) {
86- return MMVQ_PARAMETERS_RDNA4 ;
87+ return MMVQ_PARAMETERS_RDNA3 ;
8788 }
8889 if (GGML_CUDA_CC_IS_RDNA2 (cc)) {
8990 return MMVQ_PARAMETERS_RDNA2;
@@ -149,6 +150,26 @@ static constexpr __host__ __device__ int calc_nwarps(ggml_type type, int ncols_d
149150 }
150151 return 1 ;
151152 }
153+ if (table_id == MMVQ_PARAMETERS_RDNA3) {
154+ // RDNA3 (W7900): stricter whitelist than RDNA4.
155+ // Q2_K / Q5_K / IQ4_XS regress in full quant sweeps.
156+ if (ncols_dst == 1 ) {
157+ switch (type) {
158+ case GGML_TYPE_Q4_0:
159+ case GGML_TYPE_Q4_1:
160+ case GGML_TYPE_Q5_0:
161+ case GGML_TYPE_Q5_1:
162+ case GGML_TYPE_Q8_0:
163+ case GGML_TYPE_Q4_K:
164+ case GGML_TYPE_Q6_K:
165+ case GGML_TYPE_IQ4_NL:
166+ return 8 ;
167+ default :
168+ return 1 ;
169+ }
170+ }
171+ return 1 ;
172+ }
152173 return 1 ;
153174}
154175
You can’t perform that action at this time.
0 commit comments