@@ -7834,9 +7834,8 @@ def set_vocab(self):
78347834 def set_gguf_parameters (self ):
78357835 super ().set_gguf_parameters ()
78367836 hparams = self .hparams
7837- if (rope_dim := hparams .get ("head_dim" )) is None :
7838- rope_dim = hparams ["hidden_size" ] // hparams ["num_attention_heads" ]
78397837
7838+ rope_dim = int (hparams ['partial_rotary_factor' ] * hparams ['head_dim' ])
78407839 self .gguf_writer .add_rope_dimension_count (rope_dim )
78417840 rope_scaling = self .hparams .get ("rope_scaling" ) or {}
78427841 if rope_scaling .get ("rope_type" , rope_scaling .get ("type" )) == "yarn" and "factor" in rope_scaling :
@@ -7848,10 +7847,16 @@ def set_gguf_parameters(self):
78487847 self .gguf_writer .add_leading_dense_block_count (hparams ["first_k_dense_replace" ])
78497848 self .gguf_writer .add_vocab_size (hparams ["vocab_size" ])
78507849 self .gguf_writer .add_expert_feed_forward_length (hparams ["moe_intermediate_size" ])
7851- self .gguf_writer .add_expert_weights_scale (1.0 )
7850+ self .gguf_writer .add_expert_weights_scale (hparams [ "routed_scaling_factor" ] )
78527851 self .gguf_writer .add_expert_count (hparams ["num_experts" ])
78537852 self .gguf_writer .add_expert_shared_count (hparams ["num_shared_experts" ])
78547853 self .gguf_writer .add_expert_weights_norm (hparams ["norm_topk_prob" ])
7854+ if hparams ["score_function" ] == "sigmoid" :
7855+ self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
7856+ elif hparams ["score_function" ] == "softmax" :
7857+ self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SOFTMAX )
7858+ else :
7859+ raise ValueError (f"Unsupported score_function value: { hparams ['score_function' ]} " )
78557860
78567861 _experts : list [dict [str , Tensor ]] | None = None
78577862
0 commit comments