Skip to content

Commit 20b91ba

Browse files
CISCtheo77186
authored andcommitted
convert : enable expert group selection for all models with it (ggml-org#16691)
1 parent 484b273 commit 20b91ba

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

convert_hf_to_gguf.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,12 @@ def set_gguf_parameters(self):
742742
if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None:
743743
self.gguf_writer.add_expert_used_count(n_experts_used)
744744
logger.info(f"gguf: experts used count = {n_experts_used}")
745+
if (n_expert_groups := self.hparams.get("n_group")) is not None:
746+
self.gguf_writer.add_expert_group_count(n_expert_groups)
747+
logger.info(f"gguf: expert groups count = {n_expert_groups}")
748+
if (n_group_used := self.hparams.get("topk_group")) is not None:
749+
self.gguf_writer.add_expert_group_used_count(n_group_used)
750+
logger.info(f"gguf: expert groups used count = {n_group_used}")
745751

746752
if (head_dim := self.hparams.get("head_dim")) is not None:
747753
self.gguf_writer.add_key_length(head_dim)
@@ -8468,8 +8474,6 @@ def set_gguf_parameters(self):
84688474
self.gguf_writer.add_expert_weights_scale(hparams["routed_scaling_factor"])
84698475
self.gguf_writer.add_expert_count(hparams["num_experts"])
84708476
self.gguf_writer.add_expert_shared_count(hparams["num_shared_experts"])
8471-
self.gguf_writer.add_expert_group_count(hparams["n_group"])
8472-
self.gguf_writer.add_expert_group_used_count(hparams["topk_group"])
84738477
self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
84748478

84758479
if hparams["score_function"] == "sigmoid":

src/llama-model.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6409,6 +6409,8 @@ void llama_model::print_info() const {
64096409
LLAMA_LOG_INFO("%s: n_ff = %s\n", __func__, print_f([&](uint32_t il) { return hparams.n_ff(il); }, hparams.n_layer).c_str());
64106410
LLAMA_LOG_INFO("%s: n_expert = %u\n", __func__, hparams.n_expert);
64116411
LLAMA_LOG_INFO("%s: n_expert_used = %u\n", __func__, hparams.n_expert_used);
6412+
LLAMA_LOG_INFO("%s: n_expert_groups = %d\n", __func__, hparams.n_expert_groups);
6413+
LLAMA_LOG_INFO("%s: n_group_used = %d\n", __func__, hparams.n_group_used);
64126414
LLAMA_LOG_INFO("%s: causal attn = %d\n", __func__, hparams.causal_attn);
64136415
LLAMA_LOG_INFO("%s: pooling type = %d\n", __func__, hparams.pooling_type);
64146416
LLAMA_LOG_INFO("%s: rope type = %d\n", __func__, hparams.rope_type);
@@ -6513,8 +6515,6 @@ void llama_model::print_info() const {
65136515
LLAMA_LOG_INFO("%s: n_ff_exp = %d\n", __func__, hparams.n_ff_exp);
65146516
LLAMA_LOG_INFO("%s: n_ff_shexp = %d\n", __func__, hparams.n_ff_shexp);
65156517
LLAMA_LOG_INFO("%s: n_expert_shared = %d\n", __func__, hparams.n_expert_shared);
6516-
LLAMA_LOG_INFO("%s: n_expert_groups = %d\n", __func__, hparams.n_expert_groups);
6517-
LLAMA_LOG_INFO("%s: n_group_used = %d\n", __func__, hparams.n_group_used);
65186518
LLAMA_LOG_INFO("%s: expert_weights_scale = %.1f\n", __func__, hparams.expert_weights_scale);
65196519
LLAMA_LOG_INFO("%s: expert_weights_norm = %d\n", __func__, hparams.expert_weights_norm);
65206520
LLAMA_LOG_INFO("%s: expert_gating_func = %s\n", __func__, llama_expert_gating_func_name((llama_expert_gating_func_type) hparams.expert_gating_func));

0 commit comments

Comments
 (0)