@@ -189,10 +189,10 @@ def index_tensors(self, remote_hf_model_id: str | None = None) -> dict[str, Call
189189 return tensors
190190
191191 prefix = "model" if not self .is_mistral_format else "consolidated"
192- part_names : list [str ] = ModelBase .get_model_part_names (self .dir_model , prefix , ".safetensors" )
192+ part_names : set [str ] = set ( ModelBase .get_model_part_names (self .dir_model , prefix , ".safetensors" ) )
193193 is_safetensors : bool = len (part_names ) > 0
194194 if not is_safetensors :
195- part_names = ModelBase .get_model_part_names (self .dir_model , "pytorch_model" , ".bin" )
195+ part_names = set ( ModelBase .get_model_part_names (self .dir_model , "pytorch_model" , ".bin" ) )
196196
197197 tensor_names_from_index : set [str ] = set ()
198198
@@ -209,6 +209,7 @@ def index_tensors(self, remote_hf_model_id: str | None = None) -> dict[str, Call
209209 if weight_map is None or not isinstance (weight_map , dict ):
210210 raise ValueError (f"Can't load 'weight_map' from { index_name !r} " )
211211 tensor_names_from_index .update (weight_map .keys ())
212+ part_names |= set (weight_map .values ())
212213 else :
213214 weight_map = {}
214215 else :
@@ -835,6 +836,15 @@ def set_gguf_parameters(self):
835836 self .gguf_writer .add_expert_group_used_count (n_group_used )
836837 logger .info (f"gguf: expert groups used count = { n_group_used } " )
837838
839+ if (score_func := self .find_hparam (["score_function" , "scoring_func" , "score_func" ], optional = True )) is not None :
840+ if score_func == "sigmoid" :
841+ self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
842+ elif score_func == "softmax" :
843+ self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SOFTMAX )
844+ else :
845+ raise ValueError (f"Unsupported expert score gating function value: { score_func } " )
846+ logger .info (f"gguf: expert score gating function = { score_func } " )
847+
838848 if (head_dim := self .hparams .get ("head_dim" )) is not None :
839849 self .gguf_writer .add_key_length (head_dim )
840850 self .gguf_writer .add_value_length (head_dim )
@@ -2563,15 +2573,6 @@ def set_gguf_parameters(self):
25632573 if (n_dense_layers := self .hparams .get ("num_dense_layers" )) is not None :
25642574 self .gguf_writer .add_leading_dense_block_count (n_dense_layers )
25652575
2566- # Expert Gating Function
2567- score_func = self .hparams .get ("score_func" )
2568- if score_func == "sigmoid" :
2569- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
2570- elif score_func == "softmax" :
2571- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SOFTMAX )
2572- elif score_func is not None :
2573- raise ValueError (f"Unsupported score_function value: { score_func } " )
2574-
25752576 # Route normalization and scaling
25762577 if (route_norm := self .hparams .get ("route_norm" )) is not None :
25772578 self .gguf_writer .add_expert_weights_norm (route_norm )
@@ -7192,13 +7193,6 @@ def set_gguf_parameters(self):
71927193 self .gguf_writer .add_expert_weights_scale (hparams ["routed_scaling_factor" ])
71937194 self .gguf_writer .add_expert_weights_norm (hparams ["norm_topk_prob" ])
71947195
7195- if hparams ["scoring_func" ] == "sigmoid" :
7196- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
7197- elif hparams ["scoring_func" ] == "softmax" :
7198- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SOFTMAX )
7199- else :
7200- raise ValueError (f"Unsupported scoring_func value: { hparams ['scoring_func' ]} " )
7201-
72027196 self .gguf_writer .add_rope_dimension_count (hparams ["qk_rope_head_dim" ])
72037197
72047198 rope_scaling = self .hparams .get ("rope_scaling" ) or {}
@@ -7304,12 +7298,6 @@ def __init__(self, *args, **kwargs):
73047298
73057299 def set_gguf_parameters (self ):
73067300 super ().set_gguf_parameters ()
7307- if self .hparams ["scoring_func" ] == "sigmoid" :
7308- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
7309- elif self .hparams ["scoring_func" ] == "softmax" :
7310- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SOFTMAX )
7311- else :
7312- raise ValueError (f"Unsupported scoring_func value: { self .hparams ['scoring_func' ]} " )
73137301
73147302 self .gguf_writer .add_expert_feed_forward_length (self .find_hparam (["intermediate_size" ]))
73157303 self .gguf_writer .add_rope_dimension_count (self .find_hparam (["rotary_dim" ]))
@@ -7402,11 +7390,6 @@ def set_gguf_parameters(self):
74027390 self .gguf_writer .add_expert_weights_scale (self .hparams ["routed_scaling_factor" ])
74037391 self .gguf_writer .add_expert_weights_norm (self .hparams ["norm_topk_prob" ])
74047392
7405- if self .hparams ["scoring_func" ] == "noaux_tc" :
7406- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
7407- else :
7408- raise ValueError (f"Unsupported scoring_func value: { self .hparams ['scoring_func' ]} " )
7409-
74107393 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ):
74117394 if name .endswith ("e_score_correction_bias" ):
74127395 name = name .replace ("e_score_correction_bias" , "e_score_correction.bias" )
@@ -8727,13 +8710,6 @@ def set_gguf_parameters(self):
87278710 self .gguf_writer .add_expert_shared_count (hparams ["num_shared_experts" ])
87288711 self .gguf_writer .add_expert_weights_norm (hparams ["norm_topk_prob" ])
87298712
8730- if hparams ["score_function" ] == "sigmoid" :
8731- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
8732- elif hparams ["score_function" ] == "softmax" :
8733- self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SOFTMAX )
8734- else :
8735- raise ValueError (f"Unsupported score_function value: { hparams ['score_function' ]} " )
8736-
87378713 if (nextn_layers := self .hparams .get ("num_nextn_predict_layers" )) is not None :
87388714 self .gguf_writer .add_nextn_predict_layers (nextn_layers )
87398715
0 commit comments