@@ -655,8 +655,11 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
655655 GGML_TYPE_IQ1_S,
656656 GGML_TYPE_IQ1_M,
657657 GGML_TYPE_IQ2_XXS,
658+ GGML_TYPE_IQ2_XS,
659+ GGML_TYPE_IQ2_S,
658660 GGML_TYPE_Q2_K,
659661 GGML_TYPE_IQ3_XXS,
662+ GGML_TYPE_IQ3_S,
660663 GGML_TYPE_Q3_K,
661664 GGML_TYPE_IQ4_XS,
662665 GGML_TYPE_IQ4_NL,
@@ -1155,7 +1158,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
11551158 }
11561159 {
11571160 std::lock_guard<std::mutex> lock (log_mutex);
1158- LLAMA_LOG_INFO (" \t target_bpw_type : - processing tensor %45s \t (%12" PRId64 " elements)\n " , name.c_str (), ggml_nelements (tensor));
1161+ LLAMA_LOG_INFO (" \t %s : - processing tensor %45s \t (%12" PRId64 " elements)\n " , func , name.c_str (), ggml_nelements (tensor));
11591162 }
11601163
11611164 if (!ml.use_mmap ) {
@@ -1457,19 +1460,19 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
14571460 std::vector<tensor_info> all; // this vector will be populated by the parallel workers
14581461 {
14591462 std::atomic<size_t > tensor_idx{0 }; // shared work queue index for all threads
1460- const size_t num_tensors_to_process = tensors.size ();
1463+ const size_t tensors_to_process = tensors.size ();
14611464 std::mutex loader_mutex;
14621465 std::mutex log_mutex;
14631466 std::mutex results_mutex;
14641467 std::vector<std::thread> workers;
1465- int num_threads_to_spawn = std::max (1 , std::min<int >(nthread, (int )num_tensors_to_process ));
1468+ int threads_to_spawn = std::max (1 , std::min<int >(nthread, (int )tensors_to_process ));
14661469
1467- for (int i = 0 ; i < num_threads_to_spawn ; ++i) {
1470+ for (int i = 0 ; i < threads_to_spawn ; ++i) {
14681471 workers.emplace_back ([&]() {
14691472 std::vector<no_init<uint8_t >> thread_local_buffer;
14701473 while (true ) {
14711474 const size_t current_idx = tensor_idx.fetch_add (1 );
1472- if (current_idx >= num_tensors_to_process ) { break ; }
1475+ if (current_idx >= tensors_to_process ) { break ; }
14731476 const auto * tw = tensors[current_idx];
14741477 if (!can_quantize (tw->tensor )) { continue ; }
14751478 // Execute the main processing logic for this tensor
0 commit comments