Skip to content

Commit b08a2e9

Browse files
ikawrakowIwan Kawrakow
andauthored
Add additional checks for iq1_s_r4 quantization (#191)
Co-authored-by: Iwan Kawrakow <[email protected]>
1 parent a08501e commit b08a2e9

File tree

1 file changed

+30
-5
lines changed

1 file changed

+30
-5
lines changed

ggml/src/iqk/iqk_quantize.cpp

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6116,23 +6116,48 @@ size_t quantize_iq1_s_r4(const float * src, void * dst, int64_t nrows, int64_t n
61166116
auto y = (block_iq1_s_r4 *)(dptr + 4);
61176117
for (int k = 0; k < 4; ++k) max[k] = 0;
61186118
for (int ibl = 0; ibl < nblock; ++ibl) {
6119-
if (imatrix) {
6120-
for (int j = 0; j < kBlockSize; ++j) weight[j] = imatrix[kBlockSize*ibl + j];
6121-
}
61226119
for (int k = 0; k < 4; ++k) {
61236120
auto xb = src + k*n_per_row + kBlockSize*ibl;
61246121
float sumx2 = 0;
61256122
for (int j = 0; j < kBlockSize; ++j) sumx2 += xb[j]*xb[j];
6123+
if (!sumx2) {
6124+
printf("Found block with all zeros\n");
6125+
// all zero
6126+
int ind = 1029; // this is the grid entry with all zeros
6127+
scales[4*ibl+k] = 0;
6128+
uint16_t h = 0;
6129+
for (int i = 0; i < 4; ++i) {
6130+
y[ibl].qs[4*i + k] = ind & 255;
6131+
h |= (ind >> 8) << 3*i;
6132+
}
6133+
y[ibl].qh[k] = h;
6134+
continue;
6135+
}
61266136
float sigma2 = 1.5f*sumx2/kBlockSize;
6137+
bool have_imatrix = false;
61276138
if (imatrix) {
6128-
for (int j = 0; j < kBlockSize; ++j) weight[j] = imatrix[kBlockSize*ibl + j]*sqrt(sigma2 + xb[j]*xb[j]);
6129-
} else {
6139+
have_imatrix = true;
6140+
float sumwx = 0;
6141+
for (int j = 0; j < kBlockSize; ++j) {
6142+
weight[j] = imatrix[kBlockSize*ibl + j]*sqrt(sigma2 + xb[j]*xb[j]);
6143+
sumwx += weight[j]*std::abs(xb[j]);
6144+
}
6145+
if (!sumwx) {
6146+
printf("Found block with mismatching importance/model weights\n");
6147+
// Either all weights are zero, or xb is zero where weight is not zero.
6148+
// In both of these cases it is better to simply ignore the imatrix
6149+
have_imatrix = false;
6150+
}
6151+
}
6152+
if (!have_imatrix) {
61306153
for (int j = 0; j < kBlockSize; ++j) weight[j] = sqrt(sigma2 + xb[j]*xb[j]);
61316154
}
61326155
iq1s_process_1block(kBlockSize, xb, weight, L, scales.data() + 4*ibl + k, index, &shift, pairs, sumx, sumw);
6156+
GGML_ASSERT(scales[4*ibl+k] >= 0);
61336157
max[k] = std::max(max[k], scales[4*ibl+k]);
61346158
uint16_t h = 0;
61356159
for (int i = 0; i < 4; ++i) {
6160+
GGML_ASSERT(index[i] >= 0 && index[i] < 2048);
61366161
y[ibl].qs[4*i + k] = index[i] & 255;
61376162
h |= (index[i] >> 8) << 3*i;
61386163
}

0 commit comments

Comments
 (0)