Skip to content

Commit f44f793

Browse files
authored
ggml-quants : fix make_qp_quants NANs and IQ1 assertion errors (#15379)
* ggml-quants : fix make_qp_quants NANs and IQ1 assertion errors * ggml-quants : avoid division by zero in make_q3_quants
1 parent ae532ea commit f44f793

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

ggml/src/ggml-quants.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ static float make_q3_quants(int n, int nmax, const float * GGML_RESTRICT x, int8
566566
for (int i = 0; i < n; ++i) {
567567
L[i] += nmax;
568568
}
569-
return sumlx / suml2;
569+
return suml2 > 0.0f ? sumlx / suml2 : 0.0f;
570570
}
571571
for (int i = 0; i < n; ++i) {
572572
int l = nearest_int(iscale * x[i]);
@@ -901,7 +901,7 @@ static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint
901901
for (int i = 0; i < n; ++i) {
902902
max = MAX(max, x[i]);
903903
}
904-
if (!max) { // all zero
904+
if (max < GROUP_MAX_EPS) { // all zero
905905
for (int i = 0; i < n; ++i) { L[i] = 0; }
906906
return 0.f;
907907
}
@@ -966,7 +966,7 @@ static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint
966966
break;
967967
}
968968
}
969-
return sumlx/suml2;
969+
return suml2 > 0.0f ? sumlx / suml2 : 0.0f;
970970
}
971971

972972
static void quantize_row_q2_K_impl(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int k, const float * GGML_RESTRICT quant_weights) {
@@ -4266,7 +4266,7 @@ static void quantize_row_iq1_s_impl(const float * GGML_RESTRICT x, void * GGML_R
42664266
sumw[j+1] = sumw[j] + weight[i];
42674267
}
42684268
}
4269-
float best_score = -FLT_MIN, scale = max;
4269+
float best_score = -FLT_MAX, scale = max;
42704270
int besti1 = -1, besti2 = -1, best_shift = 0;
42714271
for (int i1 = 0; i1 <= block_size; ++i1) {
42724272
for (int i2 = i1; i2 <= block_size; ++i2) {
@@ -4442,7 +4442,7 @@ static void quantize_row_iq1_m_impl(const float * GGML_RESTRICT x, void * GGML_R
44424442
idx[2*j] = j;
44434443
}
44444444
qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
4445-
float best_score = -FLT_MIN, scale = max;
4445+
float best_score = -FLT_MAX, scale = max;
44464446
int besti1 = -1, besti2 = -1, best_k = -1;
44474447
// 0: +, +
44484448
// 1: +, -

0 commit comments

Comments
 (0)