@@ -842,14 +842,14 @@ def get_vocab_base_pre(self, tokenizer) -> str:
842842 def _set_vocab_none (self ) -> None :
843843 self .gguf_writer .add_tokenizer_model ("none" )
844844
845- def _set_vocab_gpt2 (self ) -> None :
845+ def _set_vocab_gpt2 (self , load_merges = True ) -> None :
846846 tokens , toktypes , tokpre = self .get_vocab_base ()
847847 self .gguf_writer .add_tokenizer_model ("gpt2" )
848848 self .gguf_writer .add_tokenizer_pre (tokpre )
849849 self .gguf_writer .add_token_list (tokens )
850850 self .gguf_writer .add_token_types (toktypes )
851851
852- special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True )
852+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges )
853853 special_vocab .add_to_gguf (self .gguf_writer )
854854
855855 def _set_vocab_qwen (self ):
@@ -6394,15 +6394,14 @@ def set_gguf_parameters(self):
63946394
63956395
63966396@ModelBase .register ("HunYuanMoEV1ForCausalLM" )
6397- class HunYuanMoEModel (LlamaModel ):
6397+ class HunYuanMoEModel (TextModel ):
63986398 model_arch = gguf .MODEL_ARCH .HUNYUAN_MOE
6399- undo_permute = False
64006399
64016400 def __init__ (self , * args , ** kwargs ):
64026401 super ().__init__ (* args , ** kwargs )
64036402
64046403 def set_vocab (self ):
6405- self ._set_vocab_gpt2 ()
6404+ self ._set_vocab_gpt2 (load_merges = False )
64066405
64076406 def get_vocab_base (self ) -> tuple [list [str ], list [int ], str ]:
64086407 tokens : list [str ] = []
@@ -6411,52 +6410,41 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
64116410 from transformers import AutoTokenizer
64126411 tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
64136412
6414- # merge logic is copied from QwenModel, maybe incorrect
64156413 merges = []
6416- vocab = {}
64176414 mergeable_ranks = tokenizer .mergeable_ranks
64186415 for token , rank in mergeable_ranks .items ():
6419- vocab [QwenModel .token_bytes_to_string (token )] = rank
64206416 if len (token ) == 1 :
64216417 continue
6418+ # bpe() will decompose the token into its smallest parts and then
6419+ # re-merge them. If the token is a valid merge, bpe() will return
6420+ # the two pieces that were merged to create it.
64226421 merged = QwenModel .bpe (mergeable_ranks , token , max_rank = rank )
64236422 if len (merged ) == 2 :
64246423 merges .append (' ' .join (map (QwenModel .token_bytes_to_string , merged )))
64256424 self .gguf_writer .add_token_merges (merges )
64266425
6426+ vocab_size = self .hparams ["vocab_size" ]
6427+
64276428 reverse_vocab = tokenizer .decoder
6428- assert max (reverse_vocab .keys ()) < tokenizer .vocab_size
6429+ assert max (reverse_vocab .keys ()) < tokenizer .vocab_size , tokenizer . vocab_size == vocab_size
64296430
64306431 tokpre = self .get_vocab_base_pre (tokenizer )
6431- added_vocab = tokenizer .get_added_vocab ( )
6432+ special_token_ids = set ( tokenizer .special_tokens . values () )
64326433
6433- added_tokens_decoder = tokenizer .added_tokens_decoder
6434+ tokens : list [str ] = []
6435+ toktypes : list [int ] = []
64346436
6435- for i in range (tokenizer . vocab_size ):
6437+ for i in range (vocab_size ):
64366438 if i not in reverse_vocab :
64376439 tokens .append (f"[PAD{ i } ]" )
64386440 toktypes .append (gguf .TokenType .UNUSED )
64396441 else :
6440- token : str = reverse_vocab [i ]
6441- if token in added_vocab :
6442- # The tokenizer in llama.cpp assumes the CONTROL and USER_DEFINED tokens are pre-normalized.
6443- # To avoid unexpected issues - we make sure to normalize non-normalized tokens
6444- if not added_tokens_decoder [i ].normalized :
6445- previous_token = token
6446- token = tokenizer .decode (tokenizer .encode (token , add_special_tokens = False ))
6447- if previous_token != token :
6448- logger .info (f"{ repr (previous_token )} is encoded and decoded back to { repr (token )} using AutoTokenizer" )
6449-
6450- if added_tokens_decoder [i ].special or self .does_token_look_special (token ):
6451- toktypes .append (gguf .TokenType .CONTROL )
6452- else :
6453- # NOTE: this was added for Gemma.
6454- # Encoding and decoding the tokens above isn't sufficient for this case.
6455- token = token .replace (b"\xe2 \x96 \x81 " .decode ("utf-8" ), " " ) # pre-normalize user-defined spaces
6456- toktypes .append (gguf .TokenType .USER_DEFINED )
6442+ token = reverse_vocab [i ]
6443+ tokens .append (token )
6444+ if i in special_token_ids :
6445+ toktypes .append (gguf .TokenType .CONTROL )
64576446 else :
64586447 toktypes .append (gguf .TokenType .NORMAL )
6459- tokens .append (token )
64606448
64616449 return tokens , toktypes , tokpre
64626450
@@ -6474,6 +6462,25 @@ def set_gguf_parameters(self):
64746462 assert all (topk == moe_topk [0 ] for topk in moe_topk )
64756463 self .gguf_writer .add_expert_used_count (moe_topk [0 ])
64766464
6465+ moe_shared_expert = self .hparams ["num_shared_expert" ]
6466+ assert all (n == moe_shared_expert [0 ] for n in moe_shared_expert )
6467+ self .gguf_writer .add_expert_shared_count (moe_shared_expert [0 ])
6468+
6469+ self .gguf_writer .add_qk_norm (self .hparams .get ("use_qk_norm" , True ))
6470+
6471+ # Rope
6472+ rope_scaling = self .hparams .get ("rope_scaling" , {})
6473+ if rope_scaling .get ("type" ) == "dynamic" :
6474+ logger .warning ("Model uses 'dynamic' rope scaling, which is not yet supported in GGUF. "
6475+ "The resulting model may not work correctly with contexts longer than the training length." )
6476+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .NONE )
6477+ else :
6478+ # Fallback for other potential scaling types
6479+ # This part is inherited from TextModel and will handle standard rope_theta
6480+ pass
6481+
6482+ _experts : list [dict [str , Tensor ]] | None = None
6483+
64776484 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
64786485 # process the experts separately
64796486 if name .find ("mlp.experts" ) != - 1 :
@@ -6511,6 +6518,13 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
65116518
65126519 return [(self .map_tensor_name (name ), data_torch )]
65136520
6521+ def prepare_tensors (self ):
6522+ super ().prepare_tensors ()
6523+ if self ._experts is not None :
6524+ experts = [k for d in self ._experts for k in d .keys ()]
6525+ if len (experts ) > 0 :
6526+ raise ValueError (f"Unprocessed experts: { experts } " )
6527+
65146528###### CONVERSION LOGIC ######
65156529
65166530
0 commit comments