@@ -6116,23 +6116,48 @@ size_t quantize_iq1_s_r4(const float * src, void * dst, int64_t nrows, int64_t n
61166116 auto y = (block_iq1_s_r4 *)(dptr + 4 );
61176117 for (int k = 0 ; k < 4 ; ++k) max[k] = 0 ;
61186118 for (int ibl = 0 ; ibl < nblock; ++ibl) {
6119- if (imatrix) {
6120- for (int j = 0 ; j < kBlockSize ; ++j) weight[j] = imatrix[kBlockSize *ibl + j];
6121- }
61226119 for (int k = 0 ; k < 4 ; ++k) {
61236120 auto xb = src + k*n_per_row + kBlockSize *ibl;
61246121 float sumx2 = 0 ;
61256122 for (int j = 0 ; j < kBlockSize ; ++j) sumx2 += xb[j]*xb[j];
6123+ if (!sumx2) {
6124+ printf (" Found block with all zeros\n " );
6125+ // all zero
6126+ int ind = 1029 ; // this is the grid entry with all zeros
6127+ scales[4 *ibl+k] = 0 ;
6128+ uint16_t h = 0 ;
6129+ for (int i = 0 ; i < 4 ; ++i) {
6130+ y[ibl].qs [4 *i + k] = ind & 255 ;
6131+ h |= (ind >> 8 ) << 3 *i;
6132+ }
6133+ y[ibl].qh [k] = h;
6134+ continue ;
6135+ }
61266136 float sigma2 = 1 .5f *sumx2/kBlockSize ;
6137+ bool have_imatrix = false ;
61276138 if (imatrix) {
6128- for (int j = 0 ; j < kBlockSize ; ++j) weight[j] = imatrix[kBlockSize *ibl + j]*sqrt (sigma2 + xb[j]*xb[j]);
6129- } else {
6139+ have_imatrix = true ;
6140+ float sumwx = 0 ;
6141+ for (int j = 0 ; j < kBlockSize ; ++j) {
6142+ weight[j] = imatrix[kBlockSize *ibl + j]*sqrt (sigma2 + xb[j]*xb[j]);
6143+ sumwx += weight[j]*std::abs (xb[j]);
6144+ }
6145+ if (!sumwx) {
6146+ printf (" Found block with mismatching importance/model weights\n " );
6147+ // Either all weights are zero, or xb is zero where weight is not zero.
6148+ // In both of these cases it is better to simply ignore the imatrix
6149+ have_imatrix = false ;
6150+ }
6151+ }
6152+ if (!have_imatrix) {
61306153 for (int j = 0 ; j < kBlockSize ; ++j) weight[j] = sqrt (sigma2 + xb[j]*xb[j]);
61316154 }
61326155 iq1s_process_1block (kBlockSize , xb, weight, L, scales.data () + 4 *ibl + k, index, &shift, pairs, sumx, sumw);
6156+ GGML_ASSERT (scales[4 *ibl+k] >= 0 );
61336157 max[k] = std::max (max[k], scales[4 *ibl+k]);
61346158 uint16_t h = 0 ;
61356159 for (int i = 0 ; i < 4 ; ++i) {
6160+ GGML_ASSERT (index[i] >= 0 && index[i] < 2048 );
61366161 y[ibl].qs [4 *i + k] = index[i] & 255 ;
61376162 h |= (index[i] >> 8 ) << 3 *i;
61386163 }
0 commit comments