@@ -825,6 +825,15 @@ def set_gguf_parameters(self):
825825 self .gguf_writer .add_expert_group_used_count (n_group_used )
826826 logger .info (f"gguf: expert groups used count = { n_group_used } " )
827827
828+ if (score_func := self .find_hparam (["score_function" , "scoring_func" , "score_func" ], optional = True )) is not None :
829+ if score_func == "sigmoid" :
830+ self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
831+ elif score_func == "softmax" :
832+ self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SOFTMAX )
833+ else :
834+ raise ValueError (f"Unsupported expert score gating function value: { score_func } " )
835+ logger .info (f"gguf: expert score gating function = { score_func } " )
836+
828837 if (head_dim := self .hparams .get ("head_dim" )) is not None :
829838 self .gguf_writer .add_key_length (head_dim )
830839 self .gguf_writer .add_value_length (head_dim )
@@ -2553,15 +2562,6 @@ def set_gguf_parameters(self):
25532562 if (n_dense_layers := self .hparams .get ("num_dense_layers" )) is not None :
25542563 self .gguf_writer .add_leading_dense_block_count (n_dense_layers )
25552564
2556- # Expert Gating Function
2557- score_func = self .hparams .get ("score_func" )
2558- if score_func == "sigmoid" :
2559- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
2560- elif score_func == "softmax" :
2561- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SOFTMAX )
2562- elif score_func is not None :
2563- raise ValueError (f"Unsupported score_function value: { score_func } " )
2564-
25652565 # Route normalization and scaling
25662566 if (route_norm := self .hparams .get ("route_norm" )) is not None :
25672567 self .gguf_writer .add_expert_weights_norm (route_norm )
@@ -7182,13 +7182,6 @@ def set_gguf_parameters(self):
71827182 self .gguf_writer .add_expert_weights_scale (hparams ["routed_scaling_factor" ])
71837183 self .gguf_writer .add_expert_weights_norm (hparams ["norm_topk_prob" ])
71847184
7185- if hparams ["scoring_func" ] == "sigmoid" :
7186- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
7187- elif hparams ["scoring_func" ] == "softmax" :
7188- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SOFTMAX )
7189- else :
7190- raise ValueError (f"Unsupported scoring_func value: { hparams ['scoring_func' ]} " )
7191-
71927185 self .gguf_writer .add_rope_dimension_count (hparams ["qk_rope_head_dim" ])
71937186
71947187 rope_scaling = self .hparams .get ("rope_scaling" ) or {}
@@ -7294,12 +7287,6 @@ def __init__(self, *args, **kwargs):
72947287
72957288 def set_gguf_parameters (self ):
72967289 super ().set_gguf_parameters ()
7297- if self .hparams ["scoring_func" ] == "sigmoid" :
7298- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
7299- elif self .hparams ["scoring_func" ] == "softmax" :
7300- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SOFTMAX )
7301- else :
7302- raise ValueError (f"Unsupported scoring_func value: { self .hparams ['scoring_func' ]} " )
73037290
73047291 self .gguf_writer .add_expert_feed_forward_length (self .find_hparam (["intermediate_size" ]))
73057292 self .gguf_writer .add_rope_dimension_count (self .find_hparam (["rotary_dim" ]))
@@ -7392,11 +7379,6 @@ def set_gguf_parameters(self):
73927379 self .gguf_writer .add_expert_weights_scale (self .hparams ["routed_scaling_factor" ])
73937380 self .gguf_writer .add_expert_weights_norm (self .hparams ["norm_topk_prob" ])
73947381
7395- if self .hparams ["scoring_func" ] == "noaux_tc" :
7396- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
7397- else :
7398- raise ValueError (f"Unsupported scoring_func value: { self .hparams ['scoring_func' ]} " )
7399-
74007382 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ):
74017383 if name .endswith ("e_score_correction_bias" ):
74027384 name = name .replace ("e_score_correction_bias" , "e_score_correction.bias" )
@@ -8717,13 +8699,6 @@ def set_gguf_parameters(self):
87178699 self .gguf_writer .add_expert_shared_count (hparams ["num_shared_experts" ])
87188700 self .gguf_writer .add_expert_weights_norm (hparams ["norm_topk_prob" ])
87198701
8720- if hparams ["score_function" ] == "sigmoid" :
8721- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
8722- elif hparams ["score_function" ] == "softmax" :
8723- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SOFTMAX )
8724- else :
8725- raise ValueError (f"Unsupported score_function value: { hparams ['score_function' ]} " )
8726-
87278702 if (nextn_layers := self .hparams .get ("num_nextn_predict_layers" )) is not None :
87288703 self .gguf_writer .add_nextn_predict_layers (nextn_layers )
87298704
0 commit comments