@@ -8227,18 +8227,38 @@ class QuantizerIQKT {
82278227 inline int bin3 (int idim, float x) const { return x < m_mid[2 *idim+0 ] ? 0 : x < m_mid[2 *idim+1 ] ? 1 : 2 ; }
82288228
82298229 static inline void set_weights (float sigma2_scale, int nblock, const float * x, const float * imatrix, float * row_weights) {
8230+ constexpr float kEps2 = 1e-14f ;
8231+ constexpr float kWeight = 1e-4f ;
82308232 for (int ibl = 0 ; ibl < nblock; ++ibl) {
82318233
82328234 const float * xbl = x + ibl*kSuperBlockSize ;
82338235 float * wbl = row_weights + ibl*kSuperBlockSize ;
82348236
82358237 float sumx2 = 0 ;
82368238 for (int j = 0 ; j < kSuperBlockSize ; ++j) sumx2 += xbl[j]*xbl[j];
8239+ if (sumx2 < kEps2 *kSuperBlockSize ) {
8240+ // all x in th super block are (almost) zero
8241+ for (int j = 0 ; j < kSuperBlockSize ; ++j) wbl[j] = kWeight ;
8242+ continue ;
8243+ }
82378244 const float sigma2 = sigma2_scale*sumx2/kSuperBlockSize ;
82388245
82398246 if (imatrix) {
8240- const float * qw = imatrix + ibl*kSuperBlockSize ;
8241- for (int j = 0 ; j < kSuperBlockSize ; ++j) wbl[j] = qw[j] * sqrtf (sigma2 + xbl[j]*xbl[j]);
8247+ for (int ib = 0 ; ib < kSuperBlockSize /kBlockSize ; ++ib) {
8248+ const float * qw = imatrix + ibl*kSuperBlockSize + ib*kBlockSize ;
8249+ const float * xb = xbl + ib*kBlockSize ;
8250+ float * wb = wbl + ib*kBlockSize ;
8251+ float sumwx = 0 , sumw2 = 0 , sumx2 = 0 ;
8252+ for (int j = 0 ; j < kBlockSize ; ++j) {
8253+ wb[j] = qw[j] * sqrtf (sigma2 + xb[j]*xb[j]);
8254+ sumwx += wb[j]*std::abs (xb[j]);
8255+ sumw2 += wb[j]*wb[j];
8256+ sumx2 += xb[j]*xb[j];
8257+ }
8258+ if (sumx2 < kEps2 || sumw2 < kEps2 || sumwx < kEps2 ) {
8259+ for (int j = 0 ; j < kBlockSize ; ++j) wb[j] = kWeight ;
8260+ }
8261+ }
82428262 } else {
82438263 for (int j = 0 ; j < kSuperBlockSize ; ++j) wbl[j] = 0 .25f *sigma2 + xbl[j]*xbl[j];
82448264 }
@@ -9390,15 +9410,25 @@ void quantize_row_iq3_kt_impl(const float * x, void * vy, int n_per_row, const f
93909410 float scale_0 = std::max (84 .f , 123 .f *amax/amax_row);
93919411 // float scale_0 = std::max(64.f, 123.f*amax/amax_row);
93929412 float best = 0 ;
9413+ bool found_solution = false ;
93939414 for (int itry = -3 ; itry <= 3 ; ++itry) {
93949415 quantizer.find_best_match (amax/(scale_0 + kStep *itry), xaux, weight, best_idx);
93959416 auto [d, score] = quantizer.find_best_scale (xaux, weight, best_idx);
93969417 if (score > best) {
93979418 best = score;
9419+ found_solution = true ;
93989420 scales[ib] = d;
93999421 std::memcpy (best_idx+Q::kNg , best_idx, Q::kNg *sizeof (int ));
94009422 }
94019423 }
9424+ if (!found_solution) {
9425+ fprintf (stderr, " ======================= %s: failed to find solution for a block\n " , __func__);
9426+ fprintf (stderr, " Model weights and importances:\n " );
9427+ for (int j = 0 ; j < Q::kBlockSize ; ++j) {
9428+ fprintf (stderr, " %2d %g %g\n " , j, xaux[j], weight[j]);
9429+ }
9430+ GGML_ASSERT (false );
9431+ }
94029432
94039433 auto xt = qtmp + ibl*Q::kSuperBlockSize + ib*Q::kBlockSize ;
94049434 for (int ig = 0 ; ig < Q::kNg ; ++ig) {
0 commit comments