Skip to content

Commit c59bb6d

Browse files
committed
Add Euclidean-Cosine score to identify important tensors
1 parent 6e32244 commit c59bb6d

File tree

1 file changed

+19
-6
lines changed

1 file changed

+19
-6
lines changed

src/llama-quant.cpp

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1571,12 +1571,25 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
15711571

15721572
// Certain tensors have a higher impact on model quality, so we apply a lower penalty to them
15731573
auto is_important = [&](const std::string & tensor_name) -> bool {
1574-
const auto important = tensor_name == "output.weight" ||
1575-
tensor_name.find(".ffn_down.weight") != std::string::npos ||
1576-
tensor_name.find(".ffn_down_exps.weight") != std::string::npos ||
1577-
tensor_name.find(".attn_output.weight") != std::string::npos ||
1578-
tensor_name.find(".time_mix_output.weight") != std::string::npos ||
1579-
tensor_name.find(".attn_o.weight") != std::string::npos;
1574+
bool important = false;
1575+
1576+
if (statistics_data) {
1577+
float ecs = 0.0f; // Euclidean-Cosine score
1578+
const std::string key = remap_imatrix(tensor_name, mapped);
1579+
const auto tstats = statistics_data->find(key);
1580+
if (tstats != statistics_data->end() && !tstats->second.empty()) {
1581+
ecs = tstats->second.front();
1582+
important = ecs == 100.0f; // mark as important if ecs is 100%
1583+
}
1584+
} else {
1585+
important = tensor_name == "output.weight" ||
1586+
tensor_name.find(".ffn_down.weight") != std::string::npos ||
1587+
tensor_name.find(".ffn_down_exps.weight") != std::string::npos ||
1588+
tensor_name.find(".attn_output.weight") != std::string::npos ||
1589+
tensor_name.find(".time_mix_output.weight") != std::string::npos ||
1590+
tensor_name.find(".attn_o.weight") != std::string::npos;
1591+
}
1592+
15801593
return important;
15811594
};
15821595

0 commit comments

Comments
 (0)