@@ -2829,25 +2829,30 @@ def set_gguf_parameters(self):
28292829 self .gguf_writer .add_expert_used_count (self .hparams ["moe_k" ])
28302830 self .gguf_writer .add_moe_every_n_layers (self .hparams ["moe_layer_interval" ])
28312831
2832+ def tensor_force_quant (self , name : str , new_name : str , bid : int | None , n_dims : int ) -> gguf .GGMLQuantizationType | bool :
2833+ if "experts" in new_name :
2834+ return gguf .GGMLQuantizationType .F16
2835+ return super ().tensor_force_quant (name , new_name , bid , n_dims )
2836+
28322837 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
28332838 # Modify correction bias name as in DeepseekV2
28342839 if name .endswith ("e_score_correction_bias" ):
28352840 name = name .replace ("e_score_correction_bias" , "e_score_correction.bias" )
2836-
2841+
28372842 # skip Multi-Token Prediction (MTP) layers (again, same as DeepseekV2)
28382843 match = re .match (r"model.mtp_block.(\d+)" , name )
28392844 if match :
28402845 return []
2841-
2846+
28422847 # skip all other MTP tensors for now
28432848 match = re .match (r"model.mtp_emb_norm.(\d+)" , name )
28442849 if match :
28452850 return []
2846-
2851+
28472852 match = re .match (r"model.mtp_hidden_norm.(\d+)" , name )
28482853 if match :
28492854 return []
2850-
2855+
28512856 match = re .match (r"model.mtp_linear_proj.(\d+)" , name )
28522857 if match :
28532858 return []
@@ -2874,16 +2879,16 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
28742879 datas .append (self ._experts [bid ][ename_to_retrieve ])
28752880 del self ._experts [bid ][ename_to_retrieve ]
28762881
2877- data_torch = torch .stack (datas , dim = 0 )
2882+ data_torch = torch .stack (datas , dim = 0 )
28782883 merged_name = f"layers.{ bid } .mlp.experts.{ w_name } .weight"
28792884 new_name = self .map_tensor_name (merged_name )
28802885 tensors .append ((new_name , data_torch ))
2881-
2886+
28822887 return tensors
28832888 else :
28842889 return []
28852890 return [(self .map_tensor_name (name ), data_torch )]
2886-
2891+
28872892 def prepare_tensors (self ):
28882893 super ().prepare_tensors ()
28892894
0 commit comments