@@ -844,14 +844,14 @@ def get_vocab_base_pre(self, tokenizer) -> str:
844844 def _set_vocab_none (self ) -> None :
845845 self .gguf_writer .add_tokenizer_model ("none" )
846846
847- def _set_vocab_gpt2 (self , load_merges = True ) -> None :
847+ def _set_vocab_gpt2 (self ) -> None :
848848 tokens , toktypes , tokpre = self .get_vocab_base ()
849849 self .gguf_writer .add_tokenizer_model ("gpt2" )
850850 self .gguf_writer .add_tokenizer_pre (tokpre )
851851 self .gguf_writer .add_token_list (tokens )
852852 self .gguf_writer .add_token_types (toktypes )
853853
854- special_vocab = gguf .SpecialVocab (self .dir_model , load_merges )
854+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True )
855855 special_vocab .add_to_gguf (self .gguf_writer )
856856
857857 def _set_vocab_qwen (self ):
@@ -6405,11 +6405,6 @@ def __init__(self, *args, **kwargs):
64056405 self ._tok_embd = None
64066406
64076407 def set_vocab (self ):
6408- """
6409- A self-contained vocab implementation for the HunYuan tiktoken-based tokenizer.
6410- This method correctly generates tokens, types, and the required "fake" merges
6411- to satisfy the llama.cpp GGUF loader.
6412- """
64136408 from transformers import AutoTokenizer
64146409 tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
64156410
@@ -6456,7 +6451,7 @@ def set_vocab(self):
64566451 # 5. Add special tokens and chat templates
64576452 special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = False )
64586453 special_vocab .add_to_gguf (self .gguf_writer )
6459- # FIX for BOS token: Manually set the correct BOS token ID.
6454+ # FIX for BOS token: Overwrite incorrect id read from config.json
64606455 self .gguf_writer .add_bos_token_id (127959 ) # <|bos|>
64616456
64626457 def set_gguf_parameters (self ):
@@ -6478,11 +6473,11 @@ def set_gguf_parameters(self):
64786473 assert all (n == moe_shared_expert [0 ] for n in moe_shared_expert )
64796474 self .gguf_writer .add_expert_shared_count (moe_shared_expert [0 ])
64806475
6481- self .gguf_writer .add_qk_norm (hparams .get ("use_qk_norm" , True ))
6482-
64836476 # Rope
64846477 rope_scaling = hparams .get ("rope_scaling" , {})
64856478 if rope_scaling .get ("type" ) == "dynamic" :
6479+ # Not sure if YARN is correct here, and the factor in the config is only 1 anyway
6480+ # but the release claims to scale to 256k, which would be a factor of 8
64866481 self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
64876482 self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
64886483 self .gguf_writer .add_rope_scaling_orig_ctx_len (self .hparams ["max_position_embeddings" ])
@@ -6492,31 +6487,41 @@ def set_gguf_parameters(self):
64926487 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
64936488 if name == "model.embed_tokens.weight" :
64946489 self ._tok_embd = data_torch .clone ()
6490+
64956491 if name == "lm_head.weight" :
64966492 if self .hparams .get ("tie_word_embeddings" , False ):
64976493 logger .info ("Skipping tied output layer 'lm_head.weight'" )
64986494 return []
6495+
64996496 if name .find ("mlp.experts" ) != - 1 :
65006497 n_experts = self .hparams ["num_experts" ]
65016498 assert bid is not None
6499+
65026500 if self ._experts is None :
65036501 self ._experts = [{} for _ in range (self .block_count )]
6502+
65046503 self ._experts [bid ][name ] = data_torch
6504+
65056505 if len (self ._experts [bid ]) >= n_experts * 3 :
6506+ # merge the experts into a single 3d tensor
65066507 tensors : list [tuple [str , Tensor ]] = []
65076508 for w_name in ["down_proj" , "gate_proj" , "up_proj" ]:
65086509 datas : list [Tensor ] = []
6510+
65096511 for xid in range (n_experts ):
65106512 ename = f"model.layers.{ bid } .mlp.experts.{ xid } .{ w_name } .weight"
65116513 datas .append (self ._experts [bid ][ename ])
65126514 del self ._experts [bid ][ename ]
6515+
65136516 data_torch = torch .stack (datas , dim = 0 )
65146517 merged_name = f"model.layers.{ bid } .mlp.experts.{ w_name } .weight"
65156518 new_name = self .map_tensor_name (merged_name )
65166519 tensors .append ((new_name , data_torch ))
6520+
65176521 return tensors
65186522 else :
65196523 return []
6524+
65206525 return [(self .map_tensor_name (name ), data_torch )]
65216526
65226527 def prepare_tensors (self ):
0 commit comments