Skip to content

Commit 9d3ef48

Browse files
authored
convert : set expert gating func in base class (ggml-org#17279)
1 parent c7b7db0 commit 9d3ef48

File tree

1 file changed

+9
-34
lines changed

1 file changed

+9
-34
lines changed

convert_hf_to_gguf.py

Lines changed: 9 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -825,6 +825,15 @@ def set_gguf_parameters(self):
825825
self.gguf_writer.add_expert_group_used_count(n_group_used)
826826
logger.info(f"gguf: expert groups used count = {n_group_used}")
827827

828+
if (score_func := self.find_hparam(["score_function", "scoring_func", "score_func"], optional=True)) is not None:
829+
if score_func == "sigmoid":
830+
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
831+
elif score_func == "softmax":
832+
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
833+
else:
834+
raise ValueError(f"Unsupported expert score gating function value: {score_func}")
835+
logger.info(f"gguf: expert score gating function = {score_func}")
836+
828837
if (head_dim := self.hparams.get("head_dim")) is not None:
829838
self.gguf_writer.add_key_length(head_dim)
830839
self.gguf_writer.add_value_length(head_dim)
@@ -2553,15 +2562,6 @@ def set_gguf_parameters(self):
25532562
if (n_dense_layers := self.hparams.get("num_dense_layers")) is not None:
25542563
self.gguf_writer.add_leading_dense_block_count(n_dense_layers)
25552564

2556-
# Expert Gating Function
2557-
score_func = self.hparams.get("score_func")
2558-
if score_func == "sigmoid":
2559-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
2560-
elif score_func == "softmax":
2561-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
2562-
elif score_func is not None:
2563-
raise ValueError(f"Unsupported score_function value: {score_func}")
2564-
25652565
# Route normalization and scaling
25662566
if (route_norm := self.hparams.get("route_norm")) is not None:
25672567
self.gguf_writer.add_expert_weights_norm(route_norm)
@@ -7182,13 +7182,6 @@ def set_gguf_parameters(self):
71827182
self.gguf_writer.add_expert_weights_scale(hparams["routed_scaling_factor"])
71837183
self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
71847184

7185-
if hparams["scoring_func"] == "sigmoid":
7186-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
7187-
elif hparams["scoring_func"] == "softmax":
7188-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
7189-
else:
7190-
raise ValueError(f"Unsupported scoring_func value: {hparams['scoring_func']}")
7191-
71927185
self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
71937186

71947187
rope_scaling = self.hparams.get("rope_scaling") or {}
@@ -7294,12 +7287,6 @@ def __init__(self, *args, **kwargs):
72947287

72957288
def set_gguf_parameters(self):
72967289
super().set_gguf_parameters()
7297-
if self.hparams["scoring_func"] == "sigmoid":
7298-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
7299-
elif self.hparams["scoring_func"] == "softmax":
7300-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
7301-
else:
7302-
raise ValueError(f"Unsupported scoring_func value: {self.hparams['scoring_func']}")
73037290

73047291
self.gguf_writer.add_expert_feed_forward_length(self.find_hparam(["intermediate_size"]))
73057292
self.gguf_writer.add_rope_dimension_count(self.find_hparam(["rotary_dim"]))
@@ -7392,11 +7379,6 @@ def set_gguf_parameters(self):
73927379
self.gguf_writer.add_expert_weights_scale(self.hparams["routed_scaling_factor"])
73937380
self.gguf_writer.add_expert_weights_norm(self.hparams["norm_topk_prob"])
73947381

7395-
if self.hparams["scoring_func"] == "noaux_tc":
7396-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
7397-
else:
7398-
raise ValueError(f"Unsupported scoring_func value: {self.hparams['scoring_func']}")
7399-
74007382
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
74017383
if name.endswith("e_score_correction_bias"):
74027384
name = name.replace("e_score_correction_bias", "e_score_correction.bias")
@@ -8717,13 +8699,6 @@ def set_gguf_parameters(self):
87178699
self.gguf_writer.add_expert_shared_count(hparams["num_shared_experts"])
87188700
self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
87198701

8720-
if hparams["score_function"] == "sigmoid":
8721-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
8722-
elif hparams["score_function"] == "softmax":
8723-
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
8724-
else:
8725-
raise ValueError(f"Unsupported score_function value: {hparams['score_function']}")
8726-
87278702
if (nextn_layers := self.hparams.get("num_nextn_predict_layers")) is not None:
87288703
self.gguf_writer.add_nextn_predict_layers(nextn_layers)
87298704

0 commit comments

Comments
 (0)