Skip to content

Commit b6094a9

Browse files
committed
Add quant types
1 parent 12e0524 commit b6094a9

File tree

1 file changed

+8
-5
lines changed

1 file changed

+8
-5
lines changed

src/llama-quant.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -655,8 +655,11 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
655655
GGML_TYPE_IQ1_S,
656656
GGML_TYPE_IQ1_M,
657657
GGML_TYPE_IQ2_XXS,
658+
GGML_TYPE_IQ2_XS,
659+
GGML_TYPE_IQ2_S,
658660
GGML_TYPE_Q2_K,
659661
GGML_TYPE_IQ3_XXS,
662+
GGML_TYPE_IQ3_S,
660663
GGML_TYPE_Q3_K,
661664
GGML_TYPE_IQ4_XS,
662665
GGML_TYPE_IQ4_NL,
@@ -1155,7 +1158,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
11551158
}
11561159
{
11571160
std::lock_guard<std::mutex> lock(log_mutex);
1158-
LLAMA_LOG_INFO("\ttarget_bpw_type: - processing tensor %45s \t(%12" PRId64 " elements)\n", name.c_str(), ggml_nelements(tensor));
1161+
LLAMA_LOG_INFO("\t%s: - processing tensor %45s \t(%12" PRId64 " elements)\n", func, name.c_str(), ggml_nelements(tensor));
11591162
}
11601163

11611164
if (!ml.use_mmap) {
@@ -1457,19 +1460,19 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
14571460
std::vector<tensor_info> all; // this vector will be populated by the parallel workers
14581461
{
14591462
std::atomic<size_t> tensor_idx{0}; // shared work queue index for all threads
1460-
const size_t num_tensors_to_process = tensors.size();
1463+
const size_t tensors_to_process = tensors.size();
14611464
std::mutex loader_mutex;
14621465
std::mutex log_mutex;
14631466
std::mutex results_mutex;
14641467
std::vector<std::thread> workers;
1465-
int num_threads_to_spawn = std::max(1, std::min<int>(nthread, (int)num_tensors_to_process));
1468+
int threads_to_spawn = std::max(1, std::min<int>(nthread, (int)tensors_to_process));
14661469

1467-
for (int i = 0; i < num_threads_to_spawn; ++i) {
1470+
for (int i = 0; i < threads_to_spawn; ++i) {
14681471
workers.emplace_back([&]() {
14691472
std::vector<no_init<uint8_t>> thread_local_buffer;
14701473
while (true) {
14711474
const size_t current_idx = tensor_idx.fetch_add(1);
1472-
if (current_idx >= num_tensors_to_process) { break; }
1475+
if (current_idx >= tensors_to_process) { break; }
14731476
const auto * tw = tensors[current_idx];
14741477
if (!can_quantize(tw->tensor)) { continue; }
14751478
// Execute the main processing logic for this tensor

0 commit comments

Comments
 (0)