Skip to content

Commit 6ea1e55

Browse files
author
horasal
committed
add MXFP6-E2M3
1 parent d940de5 commit 6ea1e55

File tree

17 files changed

+552
-10
lines changed

17 files changed

+552
-10
lines changed

ggml/include/ggml.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,8 @@ extern "C" {
418418
// GGML_TYPE_IQ4_NL_8_8 = 38,
419419
GGML_TYPE_MXFP4 = 39, // MXFP4 (1 block)
420420
GGML_TYPE_MXFP6_E3M2 = 40,
421-
GGML_TYPE_COUNT = 41,
421+
GGML_TYPE_MXFP6_E2M3 = 41,
422+
GGML_TYPE_COUNT = 42,
422423
};
423424

424425
// precision
@@ -455,6 +456,7 @@ extern "C" {
455456
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
456457
GGML_FTYPE_MOSTLY_MXFP4 = 25, // except 1d tensors
457458
GGML_FTYPE_MOSTLY_MXFP6_E3M2 = 26, // except 1d tensors
459+
GGML_FTYPE_MOSTLY_MXFP6_E2M3 = 27, // except 1d tensors
458460
};
459461

460462
// available tensor operations:

ggml/src/ggml-common.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,10 @@ typedef sycl::half2 ggml_half2;
106106
// FIXME: QR(Value Per Byte) does not match this
107107
#define QR_MXFP6_E3M2 2
108108

109+
#define QI_MXFP6_E2M3 (QK_MXFP6_E3M2 * 3 / (4 * 4))
110+
// FIXME: QR(Value Per Byte) does not match this
111+
#define QR_MXFP6_E2M3 2
112+
109113
#define QI5_0 (QK5_0 / (4 * QR5_0))
110114
#define QR5_0 2
111115

@@ -205,6 +209,12 @@ typedef struct {
205209
} block_mxfp6_e3m2;
206210
static_assert(sizeof(block_mxfp6_e3m2) == sizeof(uint8_t) + QK_MXFP6_E3M2 * 3 / 4, "wrong mxfp6_e3m2 block size/padding");
207211

212+
#define QK_MXFP6_E2M3 32
213+
typedef struct {
214+
uint8_t e; // E8M0
215+
uint8_t qs[QK_MXFP6_E2M3 * 3 / 4]; // 6bits -> 8bits
216+
} block_mxfp6_e2m3;
217+
static_assert(sizeof(block_mxfp6_e2m3) == sizeof(uint8_t) + QK_MXFP6_E2M3 * 3 / 4, "wrong mxfp6_e2m3 block size/padding");
208218

209219
#define QK5_0 32
210220
typedef struct {
@@ -1118,6 +1128,20 @@ GGML_TABLE_BEGIN(int16_t, kvalues_mxfp6_e3m2, 64)
11181128
-256, -320, -384, -448,
11191129
GGML_TABLE_END()
11201130

1131+
// 8^(-1)
1132+
#define MXFP6_SCALER 0.125f
1133+
GGML_TABLE_BEGIN(int16_t, kvalues_mxfp6_e2m3, 64)
1134+
0, 1, 2, 3, 4, 5, 6, 7,
1135+
8, 9, 10, 11, 12, 13, 14, 15,
1136+
16, 18, 20, 22, 24, 26, 28, 30,
1137+
32, 36, 40, 44, 48, 52, 56, 60,
1138+
0, -1, -2, -3, -4, -5, -6, -7,
1139+
-8, -9, -10, -11, -12, -13, -14, -15,
1140+
-16, -18, -20, -22, -24, -26, -28, -30,
1141+
-32, -36, -40, -44, -48, -52, -56, -60
1142+
GGML_TABLE_END()
1143+
1144+
11211145
#define NGRID_IQ1S 2048
11221146
#define IQ1S_DELTA 0.125f
11231147
#define IQ1M_DELTA 0.125f

ggml/src/ggml-cpu/arch/x86/quants.c

Lines changed: 129 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -860,7 +860,7 @@ void ggml_vec_dot_mxfp6_e3m2_q8_0(int n, float * GGML_RESTRICT s, size_t bs, con
860860
int ib = 0;
861861
float sumf = 0;
862862

863-
#if 0 //defined __AVX2__
863+
#if defined __AVX2__
864864
__m256 accum_ps = _mm256_setzero_ps();
865865

866866
for (; ib + 1 < nb; ib += 2) {
@@ -969,6 +969,134 @@ void ggml_vec_dot_mxfp6_e3m2_q8_0(int n, float * GGML_RESTRICT s, size_t bs, con
969969
*s = sumf;
970970
}
971971

972+
void ggml_vec_dot_mxfp6_e2m3_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
973+
assert(nrc == 1);
974+
UNUSED(nrc);
975+
UNUSED(bx);
976+
UNUSED(by);
977+
UNUSED(bs);
978+
assert(n % QK_MXFP6_E2M3 == 0);
979+
static_assert(QK_MXFP6_E2M3 == QK8_0, "QK_MXFP6_E2M3 and QK8_0 must be the same");
980+
assert(QK_MXFP6_E2M3 == 32);
981+
982+
const block_mxfp6_e2m3 * GGML_RESTRICT x = vx;
983+
const block_q8_0 * GGML_RESTRICT y = vy;
984+
985+
const int nb = n / QK_MXFP6_E2M3;
986+
987+
int ib = 0;
988+
float sumf = 0;
989+
990+
#if defined __AVX2__
991+
__m256 accum_ps = _mm256_setzero_ps();
992+
993+
for (; ib + 1 < nb; ib += 2) {
994+
const block_mxfp6_e2m3 * x1 = &x[ib + 0];
995+
const block_q8_0 * y1 = &y[ib + 0];
996+
997+
const block_mxfp6_e2m3 * x2 = &x[ib + 1];
998+
const block_q8_0 * y2 = &y[ib + 1];
999+
1000+
int16_t k_vals_1[32];
1001+
{
1002+
const uint8_t * q3 = x1->qs;
1003+
for (int j = 0; j < 8; ++j) {
1004+
const uint8_t b0 = q3[0];
1005+
const uint8_t b1 = q3[1];
1006+
const uint8_t b2 = q3[2];
1007+
k_vals_1[4*j + 0] = kvalues_mxfp6_e2m3[b0 & 0x3F];
1008+
k_vals_1[4*j + 1] = kvalues_mxfp6_e2m3[(b0 >> 6) | ((b1 & 0x0F) << 2)];
1009+
k_vals_1[4*j + 2] = kvalues_mxfp6_e2m3[(b1 >> 4) | ((b2 & 0x03) << 4)];
1010+
k_vals_1[4*j + 3] = kvalues_mxfp6_e2m3[b2 >> 2];
1011+
q3 += 3;
1012+
}
1013+
}
1014+
1015+
int16_t k_vals_2[32];
1016+
{
1017+
const uint8_t * q3 = x2->qs;
1018+
for (int j = 0; j < 8; ++j) {
1019+
const uint8_t b0 = q3[0];
1020+
const uint8_t b1 = q3[1];
1021+
const uint8_t b2 = q3[2];
1022+
k_vals_2[4*j + 0] = kvalues_mxfp6_e2m3[b0 & 0x3F];
1023+
k_vals_2[4*j + 1] = kvalues_mxfp6_e2m3[(b0 >> 6) | ((b1 & 0x0F) << 2)];
1024+
k_vals_2[4*j + 2] = kvalues_mxfp6_e2m3[(b1 >> 4) | ((b2 & 0x03) << 4)];
1025+
k_vals_2[4*j + 3] = kvalues_mxfp6_e2m3[b2 >> 2];
1026+
q3 += 3;
1027+
}
1028+
}
1029+
1030+
const __m256i k_1_lo = _mm256_load_si256((const __m256i *)(k_vals_1 + 0)); // k-vals 0-15
1031+
const __m256i k_1_hi = _mm256_load_si256((const __m256i *)(k_vals_1 + 16)); // k-vals 16-31
1032+
1033+
const __m256i q8_1_all = _mm256_loadu_si256((const __m256i *)y1->qs);
1034+
1035+
const __m256i q8_1_lo = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(q8_1_all, 0)); // q-vals 0-15
1036+
const __m256i q8_1_hi = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(q8_1_all, 1)); // q-vals 16-31
1037+
1038+
const __m256i p_1_lo = _mm256_madd_epi16(k_1_lo, q8_1_lo);
1039+
const __m256i p_1_hi = _mm256_madd_epi16(k_1_hi, q8_1_hi);
1040+
1041+
const __m256i p_1_all = _mm256_add_epi32(p_1_lo, p_1_hi); // 8x s32
1042+
1043+
const __m256i k_2_lo = _mm256_load_si256((const __m256i *)(k_vals_2 + 0));
1044+
const __m256i k_2_hi = _mm256_load_si256((const __m256i *)(k_vals_2 + 16));
1045+
const __m256i q8_2_all = _mm256_loadu_si256((const __m256i *)y2->qs);
1046+
const __m256i q8_2_lo = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(q8_2_all, 0));
1047+
const __m256i q8_2_hi = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(q8_2_all, 1));
1048+
const __m256i p_2_lo = _mm256_madd_epi16(k_2_lo, q8_2_lo);
1049+
const __m256i p_2_hi = _mm256_madd_epi16(k_2_hi, q8_2_hi);
1050+
const __m256i p_2_all = _mm256_add_epi32(p_2_lo, p_2_hi); // 8x s32
1051+
1052+
const __m256 p_1_ps = _mm256_cvtepi32_ps(p_1_all);
1053+
const __m256 p_2_ps = _mm256_cvtepi32_ps(p_2_all);
1054+
1055+
// (d = d_y * d_x)
1056+
const float d1 = GGML_CPU_FP16_TO_FP32(y1->d) * GGML_E8M0_TO_FP32_HALF(x1->e);
1057+
const float d2 = GGML_CPU_FP16_TO_FP32(y2->d) * GGML_E8M0_TO_FP32_HALF(x2->e);
1058+
1059+
const __m256 d_1_ps = _mm256_set1_ps(d1);
1060+
const __m256 d_2_ps = _mm256_set1_ps(d2);
1061+
1062+
// Fused Multiply-Add (FMA): accum = (d * p) + accum
1063+
accum_ps = _mm256_fmadd_ps(d_1_ps, p_1_ps, accum_ps);
1064+
accum_ps = _mm256_fmadd_ps(d_2_ps, p_2_ps, accum_ps);
1065+
}
1066+
1067+
sumf = hsum_float_8(accum_ps);
1068+
#endif
1069+
1070+
for (; ib < nb; ++ib) {
1071+
const float d = GGML_CPU_FP16_TO_FP32(y[ib].d) * GGML_E8M0_TO_FP32_HALF(x[ib].e);
1072+
1073+
int sumi = 0;
1074+
1075+
for (int j = 0; j < QK_MXFP6_E2M3 / 4; ++j) {
1076+
const uint8_t * q3 = x[ib].qs + 3 * j;
1077+
const int8_t * q8 = y[ib].qs + 4 * j;
1078+
1079+
const uint8_t b0 = q3[0];
1080+
const uint8_t b1 = q3[1];
1081+
const uint8_t b2 = q3[2];
1082+
1083+
const uint8_t v0_idx = b0 & 0x3F;
1084+
const uint8_t v1_idx = (b0 >> 6) | ((b1 & 0x0F) << 2);
1085+
const uint8_t v2_idx = (b1 >> 4) | ((b2 & 0x03) << 4);
1086+
const uint8_t v3_idx = b2 >> 2;
1087+
1088+
sumi += q8[0] * kvalues_mxfp6_e2m3[v0_idx];
1089+
sumi += q8[1] * kvalues_mxfp6_e2m3[v1_idx];
1090+
sumi += q8[2] * kvalues_mxfp6_e2m3[v2_idx];
1091+
sumi += q8[3] * kvalues_mxfp6_e2m3[v3_idx];
1092+
}
1093+
sumf += d * sumi;
1094+
}
1095+
1096+
*s = sumf;
1097+
}
1098+
1099+
9721100
void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
9731101
const int qk = QK8_0;
9741102
const int nb = n / qk;

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,12 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
265265
.vec_dot_type = GGML_TYPE_Q8_0,
266266
.nrows = 1,
267267
},
268+
[GGML_TYPE_MXFP6_E2M3] = {
269+
.from_float = quantize_row_mxfp6_e2m3,
270+
.vec_dot = ggml_vec_dot_mxfp6_e2m3_q8_0,
271+
.vec_dot_type = GGML_TYPE_Q8_0,
272+
.nrows = 1,
273+
},
268274
[GGML_TYPE_Q2_K] = {
269275
.from_float = quantize_row_q2_K,
270276
.vec_dot = ggml_vec_dot_q2_K_q8_K,

ggml/src/ggml-cpu/ops.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,7 @@ void ggml_compute_forward_add(
669669
case GGML_TYPE_Q8_0:
670670
case GGML_TYPE_MXFP4:
671671
case GGML_TYPE_MXFP6_E3M2:
672+
case GGML_TYPE_MXFP6_E2M3:
672673
case GGML_TYPE_Q2_K:
673674
case GGML_TYPE_Q3_K:
674675
case GGML_TYPE_Q4_K:
@@ -1119,6 +1120,7 @@ void ggml_compute_forward_add1(
11191120
case GGML_TYPE_Q8_1:
11201121
case GGML_TYPE_MXFP4:
11211122
case GGML_TYPE_MXFP6_E3M2:
1123+
case GGML_TYPE_MXFP6_E2M3:
11221124
case GGML_TYPE_Q2_K:
11231125
case GGML_TYPE_Q3_K:
11241126
case GGML_TYPE_Q4_K:
@@ -1247,6 +1249,7 @@ void ggml_compute_forward_acc(
12471249
case GGML_TYPE_Q8_0:
12481250
case GGML_TYPE_Q8_1:
12491251
case GGML_TYPE_MXFP6_E3M2:
1252+
case GGML_TYPE_MXFP6_E2M3:
12501253
case GGML_TYPE_Q2_K:
12511254
case GGML_TYPE_Q3_K:
12521255
case GGML_TYPE_Q4_K:
@@ -4143,6 +4146,7 @@ void ggml_compute_forward_out_prod(
41434146
case GGML_TYPE_Q5_1:
41444147
case GGML_TYPE_Q8_0:
41454148
case GGML_TYPE_MXFP6_E3M2:
4149+
case GGML_TYPE_MXFP6_E2M3:
41464150
case GGML_TYPE_Q2_K:
41474151
case GGML_TYPE_Q3_K:
41484152
case GGML_TYPE_Q4_K:
@@ -4418,6 +4422,7 @@ void ggml_compute_forward_set(
44184422
case GGML_TYPE_Q8_0:
44194423
case GGML_TYPE_Q8_1:
44204424
case GGML_TYPE_MXFP6_E3M2:
4425+
case GGML_TYPE_MXFP6_E2M3:
44214426
case GGML_TYPE_Q2_K:
44224427
case GGML_TYPE_Q3_K:
44234428
case GGML_TYPE_Q4_K:
@@ -4680,6 +4685,7 @@ void ggml_compute_forward_get_rows(
46804685
case GGML_TYPE_Q8_0:
46814686
case GGML_TYPE_Q8_1:
46824687
case GGML_TYPE_MXFP6_E3M2:
4688+
case GGML_TYPE_MXFP6_E2M3:
46834689
case GGML_TYPE_Q2_K:
46844690
case GGML_TYPE_Q3_K:
46854691
case GGML_TYPE_Q4_K:
@@ -5404,6 +5410,7 @@ void ggml_compute_forward_clamp(
54045410
case GGML_TYPE_Q8_0:
54055411
case GGML_TYPE_Q8_1:
54065412
case GGML_TYPE_MXFP6_E3M2:
5413+
case GGML_TYPE_MXFP6_E2M3:
54075414
case GGML_TYPE_Q2_K:
54085415
case GGML_TYPE_Q3_K:
54095416
case GGML_TYPE_Q4_K:

ggml/src/ggml-cpu/quants.c

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ void quantize_row_mxfp6_e3m2(const float * GGML_RESTRICT x, void * GGML_RESTRICT
5454
quantize_row_mxfp6_e3m2_ref(x, y, k);
5555
}
5656

57+
void quantize_row_mxfp6_e2m3(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
58+
quantize_row_mxfp6_e2m3_ref(x, y, k);
59+
}
60+
5761
//
5862
// 2-6 bit quantization in super-blocks
5963
//
@@ -271,6 +275,58 @@ void ggml_vec_dot_mxfp6_e3m2_q8_0_generic(int n, float * GGML_RESTRICT s, size_t
271275
*s = sumf;
272276
}
273277

278+
void ggml_vec_dot_mxfp6_e2m3_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc)
279+
{
280+
assert(nrc == 1);
281+
UNUSED(nrc);
282+
UNUSED(bx);
283+
UNUSED(by);
284+
UNUSED(bs);
285+
assert(n % QK_MXFP6_E2M3 == 0);
286+
static_assert(QK_MXFP6_E2M3 == QK8_0, "QK_MXFP6_E2M3 and QK8_0 must be the same");
287+
288+
const block_mxfp6_e2m3 * GGML_RESTRICT x = vx;
289+
const block_q8_0 * GGML_RESTRICT y = vy;
290+
291+
const int nb = n / QK_MXFP6_E2M3;
292+
293+
int ib = 0;
294+
float sumf = 0;
295+
296+
for (; ib < nb; ++ib) {
297+
const float d = GGML_CPU_FP16_TO_FP32(y[ib].d)*GGML_E8M0_TO_FP32_HALF(x[ib].e);
298+
int sumi = 0;
299+
// Q8_0 (y) * MXFP6 (block_size = 32)
300+
for (int j = 0; j < QK_MXFP6_E2M3/4; ++j) {
301+
// Current Packed MXFP6
302+
const uint8_t* q3 = x[ib].qs + 3 * j;
303+
// Current Packed Q8_0
304+
const int8_t* q8 = y[ib].qs + 4 * j;
305+
306+
const uint8_t b0 = q3[0];
307+
const uint8_t b1 = q3[1];
308+
const uint8_t b2 = q3[2];
309+
310+
const uint8_t v0_idx = b0 & 0x3F;
311+
const uint8_t v1_idx = (b0 >> 6) | ((b1 & 0x0F) << 2);
312+
const uint8_t v2_idx = (b1 >> 4) | ((b2 & 0x03) << 4);
313+
const uint8_t v3_idx = b2 >> 2;
314+
315+
// (y[4*j + 0] * x[4*j + 0])
316+
sumi += q8[0] * kvalues_mxfp6_e2m3[v0_idx];
317+
// (y[4*j + 1] * x[4*j + 1])
318+
sumi += q8[1] * kvalues_mxfp6_e2m3[v1_idx];
319+
// (y[4*j + 2] * x[4*j + 2])
320+
sumi += q8[2] * kvalues_mxfp6_e2m3[v2_idx];
321+
// (y[4*j + 3] * x[4*j + 3])
322+
sumi += q8[3] * kvalues_mxfp6_e2m3[v3_idx];
323+
}
324+
sumf += d * sumi;
325+
}
326+
*s = sumf;
327+
}
328+
329+
274330
void ggml_vec_dot_q5_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
275331
const int qk = QK8_0;
276332
const int nb = n / qk;

ggml/src/ggml-cpu/quants.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in
2121

2222
void quantize_row_mxfp4(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
2323
void quantize_row_mxfp6_e3m2(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
24+
void quantize_row_mxfp6_e2m3(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
2425

2526
void quantize_row_q2_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
2627
void quantize_row_q3_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
@@ -44,6 +45,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
4445

4546
void ggml_vec_dot_mxfp4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
4647
void ggml_vec_dot_mxfp6_e3m2_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
48+
void ggml_vec_dot_mxfp6_e2m3_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
4749

4850
void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
4951
void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
@@ -75,6 +77,8 @@ void ggml_vec_dot_q5_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, c
7577
void ggml_vec_dot_q8_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
7678

7779
void ggml_vec_dot_mxfp4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
80+
void ggml_vec_dot_mxfp6_e2m3_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
81+
void ggml_vec_dot_mxfp6_e3m2_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
7882

7983
void ggml_vec_dot_tq1_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
8084
void ggml_vec_dot_tq2_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);

0 commit comments

Comments
 (0)