Skip to content

Commit b32af93

Browse files
authored
enable expert group selection for all models with it
1 parent 84bf3c6 commit b32af93

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

convert_hf_to_gguf.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -591,6 +591,12 @@ def set_gguf_parameters(self):
591591
if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None:
592592
self.gguf_writer.add_expert_used_count(n_experts_used)
593593
logger.info(f"gguf: experts used count = {n_experts_used}")
594+
if (n_expert_groups := self.hparams.get("n_group")) is not None:
595+
self.gguf_writer.add_expert_group_count(n_expert_groups)
596+
logger.info(f"gguf: expert groups count = {n_expert_groups}")
597+
if (n_group_used := self.hparams.get("topk_group")) is not None:
598+
self.gguf_writer.add_expert_group_used_count(n_group_used)
599+
logger.info(f"gguf: expert groups used count = {n_group_used}")
594600

595601
if (head_dim := self.hparams.get("head_dim")) is not None:
596602
self.gguf_writer.add_key_length(head_dim)
@@ -8089,8 +8095,6 @@ def set_gguf_parameters(self):
80898095
self.gguf_writer.add_expert_weights_scale(hparams["routed_scaling_factor"])
80908096
self.gguf_writer.add_expert_count(hparams["num_experts"])
80918097
self.gguf_writer.add_expert_shared_count(hparams["num_shared_experts"])
8092-
self.gguf_writer.add_expert_group_count(hparams["n_group"])
8093-
self.gguf_writer.add_expert_group_used_count(hparams["topk_group"])
80948098
self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
80958099

80968100
if hparams["score_function"] == "sigmoid":

src/llama-model.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6356,6 +6356,8 @@ void llama_model::print_info() const {
63566356
LLAMA_LOG_INFO("%s: n_ff = %s\n", __func__, print_f([&](uint32_t il) { return hparams.n_ff(il); }, hparams.n_layer).c_str());
63576357
LLAMA_LOG_INFO("%s: n_expert = %u\n", __func__, hparams.n_expert);
63586358
LLAMA_LOG_INFO("%s: n_expert_used = %u\n", __func__, hparams.n_expert_used);
6359+
LLAMA_LOG_INFO("%s: n_expert_groups = %d\n", __func__, hparams.n_expert_groups);
6360+
LLAMA_LOG_INFO("%s: n_group_used = %d\n", __func__, hparams.n_group_used);
63596361
LLAMA_LOG_INFO("%s: causal attn = %d\n", __func__, hparams.causal_attn);
63606362
LLAMA_LOG_INFO("%s: pooling type = %d\n", __func__, hparams.pooling_type);
63616363
LLAMA_LOG_INFO("%s: rope type = %d\n", __func__, hparams.rope_type);
@@ -6456,8 +6458,6 @@ void llama_model::print_info() const {
64566458
LLAMA_LOG_INFO("%s: n_ff_exp = %d\n", __func__, hparams.n_ff_exp);
64576459
LLAMA_LOG_INFO("%s: n_ff_shexp = %d\n", __func__, hparams.n_ff_shexp);
64586460
LLAMA_LOG_INFO("%s: n_expert_shared = %d\n", __func__, hparams.n_expert_shared);
6459-
LLAMA_LOG_INFO("%s: n_expert_groups = %d\n", __func__, hparams.n_expert_groups);
6460-
LLAMA_LOG_INFO("%s: n_group_used = %d\n", __func__, hparams.n_group_used);
64616461
LLAMA_LOG_INFO("%s: expert_weights_scale = %.1f\n", __func__, hparams.expert_weights_scale);
64626462
LLAMA_LOG_INFO("%s: expert_weights_norm = %d\n", __func__, hparams.expert_weights_norm);
64636463
LLAMA_LOG_INFO("%s: expert_gating_func = %s\n", __func__, llama_expert_gating_func_name((llama_expert_gating_func_type) hparams.expert_gating_func));

0 commit comments

Comments
 (0)