Skip to content

Commit 2df360d

Browse files
committed
mmvq: add dedicated RDNA3 parameter table
1 parent 45b6f6a commit 2df360d

File tree

1 file changed

+23
-2
lines changed

1 file changed

+23
-2
lines changed

ggml/src/ggml-cuda/mmvq.cu

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,15 @@ enum mmvq_parameter_table_id {
6161
MMVQ_PARAMETERS_GENERIC = 0,
6262
MMVQ_PARAMETERS_GCN,
6363
MMVQ_PARAMETERS_RDNA2,
64+
MMVQ_PARAMETERS_RDNA3,
6465
MMVQ_PARAMETERS_RDNA4
6566
};
6667

6768
static constexpr __device__ mmvq_parameter_table_id get_device_table_id() {
6869
#if defined(RDNA4)
6970
return MMVQ_PARAMETERS_RDNA4;
7071
#elif defined(RDNA3)
71-
return MMVQ_PARAMETERS_RDNA4;
72+
return MMVQ_PARAMETERS_RDNA3;
7273
#elif defined(RDNA2)
7374
return MMVQ_PARAMETERS_RDNA2;
7475
#elif defined(GCN) || defined(CDNA)
@@ -83,7 +84,7 @@ static __host__ mmvq_parameter_table_id get_device_table_id(int cc) {
8384
return MMVQ_PARAMETERS_RDNA4;
8485
}
8586
if (GGML_CUDA_CC_IS_RDNA3(cc)) {
86-
return MMVQ_PARAMETERS_RDNA4;
87+
return MMVQ_PARAMETERS_RDNA3;
8788
}
8889
if (GGML_CUDA_CC_IS_RDNA2(cc)) {
8990
return MMVQ_PARAMETERS_RDNA2;
@@ -149,6 +150,26 @@ static constexpr __host__ __device__ int calc_nwarps(ggml_type type, int ncols_d
149150
}
150151
return 1;
151152
}
153+
if (table_id == MMVQ_PARAMETERS_RDNA3) {
154+
// RDNA3 (W7900): stricter whitelist than RDNA4.
155+
// Q2_K / Q5_K / IQ4_XS regress in full quant sweeps.
156+
if (ncols_dst == 1) {
157+
switch (type) {
158+
case GGML_TYPE_Q4_0:
159+
case GGML_TYPE_Q4_1:
160+
case GGML_TYPE_Q5_0:
161+
case GGML_TYPE_Q5_1:
162+
case GGML_TYPE_Q8_0:
163+
case GGML_TYPE_Q4_K:
164+
case GGML_TYPE_Q6_K:
165+
case GGML_TYPE_IQ4_NL:
166+
return 8;
167+
default:
168+
return 1;
169+
}
170+
}
171+
return 1;
172+
}
152173
return 1;
153174
}
154175

0 commit comments

Comments
 (0)