Skip to content

Commit d44aba7

Browse files
author
Iwan Kawrakow
committed
Bug fix in activation quantization
I added a change in the last PR how activations are quantized. It looked like it is working and slightly improving performance. But I now hit an edge case where I get gibberish that goes away if I remove the change. I absolutely don't see what goes wrong, so leaving the change in commented out for now.
1 parent 0551e76 commit d44aba7

File tree

1 file changed

+23
-23
lines changed

1 file changed

+23
-23
lines changed

ggml/src/ggml.c

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14115,28 +14115,28 @@ UseGgmlGemm1:;
1411514115
assert(params->wsize >= ne13*nbw3);
1411614116
GGML_ASSERT(src1->type == GGML_TYPE_F32);
1411714117

14118-
#ifdef GGML_USE_IQK_MULMAT
14119-
int ts = type_traits[vec_dot_type].type_size;
14120-
int bs = type_traits[vec_dot_type].blck_size;
14121-
int64_t blocks_per_row = ne10/bs;
14122-
int64_t num_blocks = ne11*ne12*ne13*blocks_per_row;
14123-
int gcd = simple_gcd(128, ts); // 128 is to cover cache line sizes for common architectures without getting involved
14124-
// with trying to get it from ggml
14125-
int64_t num_blocks_gcd = (num_blocks + gcd - 1)/gcd;
14126-
int64_t block_per_thread = ((num_blocks_gcd + nth - 1)/nth)*gcd;
14127-
int64_t first_block = ith*block_per_thread;
14128-
int64_t last_block = MIN(num_blocks, first_block + block_per_thread);
14129-
while (first_block < last_block) {
14130-
int64_t i13 = first_block/(ne11*ne12*blocks_per_row);
14131-
int64_t i12 = (first_block - i13*ne11*ne12*blocks_per_row)/(ne11*blocks_per_row);
14132-
int64_t i11 = (first_block - (i13*ne12 + i12)*ne11*blocks_per_row)/blocks_per_row;
14133-
int64_t i10 = first_block % blocks_per_row;
14134-
int64_t blocks_to_do = MIN(blocks_per_row - i10, last_block - first_block);
14135-
from_float((float *)((char *)src1->data + i13*nb13 + i12*nb12 + i11*nb11) + i10*bs,
14136-
(void *)(wdata + i13*nbw3 + i12*nbw2 + i11*nbw1 + i10*ts), blocks_to_do*bs);
14137-
first_block += blocks_to_do;
14138-
}
14139-
#else
14118+
//#ifdef GGML_USE_IQK_MULMAT
14119+
// int ts = type_traits[vec_dot_type].type_size;
14120+
// int bs = type_traits[vec_dot_type].blck_size;
14121+
// int64_t blocks_per_row = ne10/bs;
14122+
// int64_t num_blocks = ne11*ne12*ne13*blocks_per_row;
14123+
// int gcd = simple_gcd(128, ts); // 128 is to cover cache line sizes for common architectures without getting involved
14124+
// // with trying to get it from ggml
14125+
// int64_t num_blocks_gcd = (num_blocks + gcd - 1)/gcd;
14126+
// int64_t block_per_thread = ((num_blocks_gcd + nth - 1)/nth)*gcd;
14127+
// int64_t first_block = ith*block_per_thread;
14128+
// int64_t last_block = MIN(num_blocks, first_block + block_per_thread);
14129+
// while (first_block < last_block) {
14130+
// int64_t i13 = first_block/(ne11*ne12*blocks_per_row);
14131+
// int64_t i12 = (first_block - i13*ne11*ne12*blocks_per_row)/(ne11*blocks_per_row);
14132+
// int64_t i11 = (first_block - (i13*ne12 + i12)*ne11*blocks_per_row)/blocks_per_row;
14133+
// int64_t i10 = first_block % blocks_per_row;
14134+
// int64_t blocks_to_do = MIN(blocks_per_row - i10, last_block - first_block);
14135+
// from_float((float *)((char *)src1->data + i13*nb13 + i12*nb12 + i11*nb11) + i10*bs,
14136+
// (void *)(wdata + i13*nbw3 + i12*nbw2 + i11*nbw1 + i10*ts), blocks_to_do*bs);
14137+
// first_block += blocks_to_do;
14138+
// }
14139+
//#else
1414014140

1414114141
for (int64_t i13 = 0; i13 < ne13; ++i13) {
1414214142
for (int64_t i12 = 0; i12 < ne12; ++i12) {
@@ -14158,7 +14158,7 @@ UseGgmlGemm1:;
1415814158
}
1415914159
}
1416014160
}
14161-
#endif
14161+
//#endif
1416214162

1416314163
ggml_barrier(params->shared);
1416414164

0 commit comments

Comments
 (0)