@@ -6399,20 +6399,22 @@ class HunYuanMoEModel(TextModel):
63996399
64006400 def __init__ (self , * args , ** kwargs ):
64016401 super ().__init__ (* args , ** kwargs )
6402- # FIX for tied embeddings: Capture the token embeddings.
6402+ # For handling tied embeddings
64036403 self ._tok_embd = None
64046404
64056405 def set_vocab (self ):
6406- self ._set_vocab_gpt2 (load_merges = False )
6407- # FIX for BOS token: Manually set the correct BOS token ID.
6408- # The SpecialVocab helper gets incorrect id `bos_token_id: 1` from config.json.
6409- self .gguf_writer .add_bos_token_id (127959 ) # <|bos|>
6410-
6411- def get_vocab_base (self ) -> tuple [list [str ], list [int ], str ]:
6406+ """
6407+ A self-contained vocab implementation for the HunYuan tiktoken-based tokenizer.
6408+ This method correctly generates tokens, types, and the required "fake" merges
6409+ to satisfy the llama.cpp GGUF loader.
6410+ """
64126411 from transformers import AutoTokenizer
64136412 tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
64146413
6415- # Fake merges
6414+ # 1. Get the pre-tokenizer identifier hash
6415+ tokpre = self .get_vocab_base_pre (tokenizer )
6416+
6417+ # 2. Reverse-engineer the merges list from mergeable_ranks
64166418 merges = []
64176419 mergeable_ranks = tokenizer .mergeable_ranks
64186420 for token , rank in mergeable_ranks .items ():
@@ -6421,19 +6423,13 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
64216423 merged = QwenModel .bpe (mergeable_ranks , token , max_rank = rank )
64226424 if len (merged ) == 2 :
64236425 merges .append (' ' .join (map (QwenModel .token_bytes_to_string , merged )))
6424- self .gguf_writer .add_token_merges (merges )
64256426
6427+ # 3. Generate the tokens and toktypes lists
64266428 vocab_size = self .hparams ["vocab_size" ]
6427-
64286429 reverse_vocab = tokenizer .decoder
6429- assert max (reverse_vocab .keys ()) < tokenizer .vocab_size , tokenizer .vocab_size == vocab_size
6430-
6431- tokpre = self .get_vocab_base_pre (tokenizer )
64326430 special_token_ids = set (tokenizer .special_tokens .values ())
6433-
64346431 tokens : list [str ] = []
64356432 toktypes : list [int ] = []
6436-
64376433 for i in range (vocab_size ):
64386434 if i not in reverse_vocab :
64396435 tokens .append (f"[PAD{ i } ]" )
@@ -6446,30 +6442,42 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
64466442 else :
64476443 toktypes .append (gguf .TokenType .NORMAL )
64486444
6449- return tokens , toktypes , tokpre
6445+ # 4. Write all vocab-related fields to the GGUF writer
6446+ self .gguf_writer .add_tokenizer_model ("gpt2" )
6447+ self .gguf_writer .add_tokenizer_pre (tokpre )
6448+ self .gguf_writer .add_token_list (tokens )
6449+ self .gguf_writer .add_token_types (toktypes )
6450+ self .gguf_writer .add_token_merges (merges )
6451+
6452+ # 5. Add special tokens and chat templates
6453+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = False )
6454+ special_vocab .add_to_gguf (self .gguf_writer )
6455+ # FIX for BOS token: Manually set the correct BOS token ID.
6456+ self .gguf_writer .add_bos_token_id (127959 ) # <|bos|>
64506457
64516458 def set_gguf_parameters (self ):
64526459 super ().set_gguf_parameters ()
6460+ hparams = self .hparams
64536461
6454- self .gguf_writer .add_expert_count (self . hparams ["num_experts" ])
6455- self .gguf_writer .add_expert_shared_feed_forward_length (self . hparams ["intermediate_size" ])
6462+ self .gguf_writer .add_expert_count (hparams ["num_experts" ])
6463+ self .gguf_writer .add_expert_shared_feed_forward_length (hparams ["intermediate_size" ])
64566464
6457- moe_intermediate_size = self . hparams ["moe_intermediate_size" ]
6465+ moe_intermediate_size = hparams ["moe_intermediate_size" ]
64586466 assert all (n == moe_intermediate_size [0 ] for n in moe_intermediate_size )
64596467 self .gguf_writer .add_expert_feed_forward_length (moe_intermediate_size [0 ])
64606468
6461- moe_topk = self . hparams ["moe_topk" ]
6469+ moe_topk = hparams ["moe_topk" ]
64626470 assert all (topk == moe_topk [0 ] for topk in moe_topk )
64636471 self .gguf_writer .add_expert_used_count (moe_topk [0 ])
64646472
6465- moe_shared_expert = self . hparams ["num_shared_expert" ]
6473+ moe_shared_expert = hparams ["num_shared_expert" ]
64666474 assert all (n == moe_shared_expert [0 ] for n in moe_shared_expert )
64676475 self .gguf_writer .add_expert_shared_count (moe_shared_expert [0 ])
64686476
6469- self .gguf_writer .add_qk_norm (self . hparams .get ("use_qk_norm" , True ))
6477+ self .gguf_writer .add_qk_norm (hparams .get ("use_qk_norm" , True ))
64706478
64716479 # Rope
6472- rope_scaling = self . hparams .get ("rope_scaling" , {})
6480+ rope_scaling = hparams .get ("rope_scaling" , {})
64736481 if rope_scaling .get ("type" ) == "dynamic" :
64746482 self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
64756483 self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
@@ -6478,50 +6486,33 @@ def set_gguf_parameters(self):
64786486 _experts : list [dict [str , Tensor ]] | None = None
64796487
64806488 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
6481- # FIX for tied embeddings: Capture the token embeddings.
64826489 if name == "model.embed_tokens.weight" :
64836490 self ._tok_embd = data_torch .clone ()
6484-
6485- # FIX for tied embeddings: Skip the lm_head if it's tied.
64866491 if name == "lm_head.weight" :
64876492 if self .hparams .get ("tie_word_embeddings" , False ):
64886493 logger .info ("Skipping tied output layer 'lm_head.weight'" )
64896494 return []
6490-
6491- # process the experts separately
64926495 if name .find ("mlp.experts" ) != - 1 :
64936496 n_experts = self .hparams ["num_experts" ]
64946497 assert bid is not None
6495-
6496- tensors : list [tuple [str , Tensor ]] = []
6497-
64986498 if self ._experts is None :
64996499 self ._experts = [{} for _ in range (self .block_count )]
6500-
65016500 self ._experts [bid ][name ] = data_torch
6502-
65036501 if len (self ._experts [bid ]) >= n_experts * 3 :
6504- # merge the experts into a single 3d tensor
6502+ tensors : list [ tuple [ str , Tensor ]] = []
65056503 for w_name in ["down_proj" , "gate_proj" , "up_proj" ]:
65066504 datas : list [Tensor ] = []
6507-
65086505 for xid in range (n_experts ):
65096506 ename = f"model.layers.{ bid } .mlp.experts.{ xid } .{ w_name } .weight"
65106507 datas .append (self ._experts [bid ][ename ])
65116508 del self ._experts [bid ][ename ]
6512-
65136509 data_torch = torch .stack (datas , dim = 0 )
6514-
65156510 merged_name = f"model.layers.{ bid } .mlp.experts.{ w_name } .weight"
6516-
65176511 new_name = self .map_tensor_name (merged_name )
6518-
65196512 tensors .append ((new_name , data_torch ))
6520-
65216513 return tensors
65226514 else :
65236515 return []
6524-
65256516 return [(self .map_tensor_name (name ), data_torch )]
65266517
65276518 def prepare_tensors (self ):
0 commit comments