@@ -6507,7 +6507,7 @@ void iqk_repack_tensor(struct ggml_tensor * tensor) {
65076507 if (!tensor) return ;
65086508 if (!ggml_is_contiguous (tensor)) return ;
65096509 if (strncmp (tensor->name , " token_embd.weight" , GGML_MAX_NAME) == 0 ) return ;
6510- if (tensor->ne [1 ] % 4 || tensor-> ne [ 2 ]*tensor-> ne [ 3 ] > 1 ) return ;
6510+ if (tensor->ne [1 ] % 4 ) return ;
65116511 static const std::unordered_map<ggml_type, Repack> k_map = {
65126512 { GGML_TYPE_IQ2_K, { GGML_TYPE_IQ2_K_R4, 4 , (Repack::repack_func)repack_iq2_k} },
65136513 { GGML_TYPE_IQ3_K, { GGML_TYPE_IQ3_K_R4, 4 , (Repack::repack_func)repack_iq3_k} },
@@ -6544,16 +6544,18 @@ void iqk_repack_tensor(struct ggml_tensor * tensor) {
65446544
65456545 auto & r = it->second ;
65466546
6547+ auto nrows = ggml_nrows (tensor);
6548+
65476549 int max_thread = std::max (1 , int (std::thread::hardware_concurrency ()/2 ));
6548- int num_chunks = (tensor-> ne [ 1 ] + kChunk *r.num_rows - 1 )/(kChunk *r.num_rows );
6550+ int num_chunks = (nrows + kChunk *r.num_rows - 1 )/(kChunk *r.num_rows );
65496551 int nthread = std::min (num_chunks, max_thread);
65506552
65516553 // printf("%s(%s): %s -> %s. %d rows, %d chunks, %d threads\n", __func__, tensor->name, ggml_type_name(tensor->type), ggml_type_name(r.new_type),
65526554 // int(tensor->ne[1]), num_chunks, nthread);
65536555
65546556 std::atomic<int > counter (0 );;
65556557 auto compute = [&counter, &r, tensor, num_chunks, chunkSize = kChunk ] () {
6556- int nrows = tensor-> ne [ 1 ] ;
6558+ int nrows = ggml_nrows ( tensor) ;
65576559 int n_per_row = tensor->ne [0 ];
65586560 auto row_size = ggml_row_size (tensor->type , n_per_row);
65596561 std::vector<char > qtmp (r.num_rows *row_size);
0 commit comments