Skip to content

Commit 047ba89

Browse files
ikawrakowIwan Kawrakow
andauthored
Repack also experts (#210)
Co-authored-by: Iwan Kawrakow <[email protected]>
1 parent d44aba7 commit 047ba89

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

ggml/src/iqk/iqk_quantize.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6507,7 +6507,7 @@ void iqk_repack_tensor(struct ggml_tensor * tensor) {
65076507
if (!tensor) return;
65086508
if (!ggml_is_contiguous(tensor)) return;
65096509
if (strncmp(tensor->name, "token_embd.weight", GGML_MAX_NAME) == 0) return;
6510-
if (tensor->ne[1] % 4 || tensor->ne[2]*tensor->ne[3] > 1) return;
6510+
if (tensor->ne[1] % 4) return;
65116511
static const std::unordered_map<ggml_type, Repack> k_map = {
65126512
{ GGML_TYPE_IQ2_K, { GGML_TYPE_IQ2_K_R4, 4, (Repack::repack_func)repack_iq2_k} },
65136513
{ GGML_TYPE_IQ3_K, { GGML_TYPE_IQ3_K_R4, 4, (Repack::repack_func)repack_iq3_k} },
@@ -6544,16 +6544,18 @@ void iqk_repack_tensor(struct ggml_tensor * tensor) {
65446544

65456545
auto& r = it->second;
65466546

6547+
auto nrows = ggml_nrows(tensor);
6548+
65476549
int max_thread = std::max(1, int(std::thread::hardware_concurrency()/2));
6548-
int num_chunks = (tensor->ne[1] + kChunk*r.num_rows - 1)/(kChunk*r.num_rows);
6550+
int num_chunks = (nrows + kChunk*r.num_rows - 1)/(kChunk*r.num_rows);
65496551
int nthread = std::min(num_chunks, max_thread);
65506552

65516553
//printf("%s(%s): %s -> %s. %d rows, %d chunks, %d threads\n", __func__, tensor->name, ggml_type_name(tensor->type), ggml_type_name(r.new_type),
65526554
// int(tensor->ne[1]), num_chunks, nthread);
65536555

65546556
std::atomic<int> counter(0);;
65556557
auto compute = [&counter, &r, tensor, num_chunks, chunkSize = kChunk] () {
6556-
int nrows = tensor->ne[1];
6558+
int nrows = ggml_nrows(tensor);
65576559
int n_per_row = tensor->ne[0];
65586560
auto row_size = ggml_row_size(tensor->type, n_per_row);
65596561
std::vector<char> qtmp(r.num_rows*row_size);

0 commit comments

Comments
 (0)