@@ -566,7 +566,7 @@ static float make_q3_quants(int n, int nmax, const float * GGML_RESTRICT x, int8
566
566
for (int i = 0 ; i < n ; ++ i ) {
567
567
L [i ] += nmax ;
568
568
}
569
- return sumlx / suml2 ;
569
+ return suml2 > 0.0f ? sumlx / suml2 : 0.0f ;
570
570
}
571
571
for (int i = 0 ; i < n ; ++ i ) {
572
572
int l = nearest_int (iscale * x [i ]);
@@ -901,7 +901,7 @@ static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint
901
901
for (int i = 0 ; i < n ; ++ i ) {
902
902
max = MAX (max , x [i ]);
903
903
}
904
- if (! max ) { // all zero
904
+ if (max < GROUP_MAX_EPS ) { // all zero
905
905
for (int i = 0 ; i < n ; ++ i ) { L [i ] = 0 ; }
906
906
return 0.f ;
907
907
}
@@ -966,7 +966,7 @@ static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint
966
966
break ;
967
967
}
968
968
}
969
- return sumlx / suml2 ;
969
+ return suml2 > 0.0f ? sumlx / suml2 : 0.0f ;
970
970
}
971
971
972
972
static void quantize_row_q2_K_impl (const float * GGML_RESTRICT x , block_q2_K * GGML_RESTRICT y , int k , const float * GGML_RESTRICT quant_weights ) {
@@ -4266,7 +4266,7 @@ static void quantize_row_iq1_s_impl(const float * GGML_RESTRICT x, void * GGML_R
4266
4266
sumw [j + 1 ] = sumw [j ] + weight [i ];
4267
4267
}
4268
4268
}
4269
- float best_score = - FLT_MIN , scale = max ;
4269
+ float best_score = - FLT_MAX , scale = max ;
4270
4270
int besti1 = -1 , besti2 = -1 , best_shift = 0 ;
4271
4271
for (int i1 = 0 ; i1 <= block_size ; ++ i1 ) {
4272
4272
for (int i2 = i1 ; i2 <= block_size ; ++ i2 ) {
@@ -4442,7 +4442,7 @@ static void quantize_row_iq1_m_impl(const float * GGML_RESTRICT x, void * GGML_R
4442
4442
idx [2 * j ] = j ;
4443
4443
}
4444
4444
qsort (pairs , block_size , 2 * sizeof (float ), iq1_sort_helper );
4445
- float best_score = - FLT_MIN , scale = max ;
4445
+ float best_score = - FLT_MAX , scale = max ;
4446
4446
int besti1 = -1 , besti2 = -1 , best_k = -1 ;
4447
4447
// 0: +, +
4448
4448
// 1: +, -
0 commit comments