|
14 | 14 | #include <unordered_map> |
15 | 15 |
|
16 | 16 | //static std::vector prune_map = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29}; |
17 | | -static std::vector<int> prune_map = {7}; |
| 17 | +static std::vector<int> prune_map = {3}; |
18 | 18 |
|
19 | 19 | static void zeros(std::ofstream & file, size_t n) { |
20 | 20 | char zero = 0; |
@@ -52,6 +52,28 @@ static std::string remap_layer(const std::string & orig_name, const std::vector< |
52 | 52 | return orig_name; |
53 | 53 | } |
54 | 54 |
|
| 55 | +static std::string remap_imatrix (const std::string & orig_name, const std::map<int, std::string>& mapped) { |
| 56 | + if (mapped.empty()) { |
| 57 | + return orig_name; |
| 58 | + } |
| 59 | + |
| 60 | + static const std::regex pattern(R"(blk\.(\d+)\.)"); |
| 61 | + if (std::smatch match; std::regex_search(orig_name, match, pattern)) { |
| 62 | + const std::string blk(match[1]); |
| 63 | + std::string new_name = orig_name; |
| 64 | + |
| 65 | + for (const auto & p : mapped) { |
| 66 | + if (p.second == blk) { |
| 67 | + //LLAMA_LOG_DEBUG("(imatrix -> %d) ", p.first); |
| 68 | + return new_name.replace(match.position(1), match.length(1), std::to_string(p.first)); |
| 69 | + } |
| 70 | + } |
| 71 | + GGML_ABORT("\n%s: imatrix mapping error for %s\n", __func__, orig_name.c_str()); |
| 72 | + } |
| 73 | + |
| 74 | + return orig_name; |
| 75 | +} |
| 76 | + |
55 | 77 | struct quantize_state_impl { |
56 | 78 | const llama_model & model; |
57 | 79 | const llama_model_quantize_params * params; |
@@ -882,7 +904,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: |
882 | 904 |
|
883 | 905 | const float * imatrix = nullptr; |
884 | 906 | if (imatrix_data) { |
885 | | - auto it = imatrix_data->find(tensor->name); |
| 907 | + auto it = imatrix_data->find(remap_imatrix(tensor->name, mapped)); |
886 | 908 | if (it == imatrix_data->end()) { |
887 | 909 | LLAMA_LOG_INFO("\n====== %s: did not find weights for %s\n", __func__, tensor->name); |
888 | 910 | } else { |
|
0 commit comments