@@ -684,6 +684,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
684684 if chkhsh == "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664" :
685685 # ref: https://huggingface.co/tencent/Hunyuan-A13B-Instruct
686686 res = "hunyuan"
687+ if chkhsh == "bba3b3366b646dbdded5dbc42d59598b849371afc42f7beafa914afaa5b70aa6" :
688+ # TODO: update ref
689+ res = "hunyuan"
687690 if chkhsh == "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6" :
688691 # ref: https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base
689692 res = "falcon-h1"
@@ -7531,6 +7534,108 @@ def prepare_tensors(self):
75317534 raise ValueError (f"Unprocessed experts: { experts } " )
75327535
75337536
7537+ @ModelBase .register ("HunYuanDenseV1ForCausalLM" )
7538+ class HunYuanModel (TextModel ):
7539+ model_arch = gguf .MODEL_ARCH .HUNYUAN_V1_DENSE
7540+
7541+ def __init__ (self , * args , ** kwargs ):
7542+ super ().__init__ (* args , ** kwargs )
7543+ # For handling tied embeddings
7544+ self ._tok_embd = None
7545+
7546+ def set_vocab (self ):
7547+ if (self .dir_model / "tokenizer.json" ).is_file ():
7548+ self ._set_vocab_gpt2 ()
7549+ else :
7550+ from transformers import AutoTokenizer
7551+ tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
7552+
7553+ # 1. Get the pre-tokenizer identifier hash
7554+ tokpre = self .get_vocab_base_pre (tokenizer )
7555+
7556+ # 2. Reverse-engineer the merges list from mergeable_ranks
7557+ merges = []
7558+ vocab = {}
7559+ mergeable_ranks = tokenizer .mergeable_ranks
7560+ for token , rank in mergeable_ranks .items ():
7561+ vocab [QwenModel .token_bytes_to_string (token )] = rank
7562+ if len (token ) == 1 :
7563+ continue
7564+ merged = QwenModel .bpe (mergeable_ranks , token , max_rank = rank )
7565+ if len (merged ) == 2 :
7566+ merges .append (' ' .join (map (QwenModel .token_bytes_to_string , merged )))
7567+
7568+ # 3. Generate the tokens and toktypes lists
7569+ vocab_size = self .hparams ["vocab_size" ]
7570+ assert tokenizer .vocab_size == vocab_size
7571+ special_tokens = tokenizer .special_tokens
7572+ reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in {** vocab , ** special_tokens }.items ()}
7573+ tokens : list [str ] = []
7574+ toktypes : list [int ] = []
7575+ for i in range (vocab_size ):
7576+ if i not in reverse_vocab :
7577+ tokens .append (f"[PAD{ i } ]" )
7578+ toktypes .append (gguf .TokenType .UNUSED )
7579+ else :
7580+ token = reverse_vocab [i ]
7581+ tokens .append (token )
7582+ if i in special_tokens .values ():
7583+ toktypes .append (gguf .TokenType .CONTROL )
7584+ else :
7585+ toktypes .append (gguf .TokenType .NORMAL )
7586+
7587+ # 4. Write all vocab-related fields to the GGUF writer
7588+ self .gguf_writer .add_tokenizer_model ("gpt2" )
7589+ self .gguf_writer .add_tokenizer_pre (tokpre )
7590+ self .gguf_writer .add_token_list (tokens )
7591+ self .gguf_writer .add_token_types (toktypes )
7592+ self .gguf_writer .add_token_merges (merges )
7593+
7594+ # 5. Add special tokens and chat templates
7595+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = False )
7596+ special_vocab .add_to_gguf (self .gguf_writer )
7597+ # FIX for BOS token: Overwrite incorrect id read from config.json
7598+ self .gguf_writer .add_bos_token_id (127958 ) # <|bos|>
7599+
7600+ def set_gguf_parameters (self ):
7601+ super ().set_gguf_parameters ()
7602+ hparams = self .hparams
7603+
7604+ self .gguf_writer .add_expert_shared_feed_forward_length (hparams ["intermediate_size" ])
7605+
7606+ # Rope
7607+ rope_scaling = hparams .get ("rope_scaling" , {})
7608+ if rope_scaling .get ("type" ) == "dynamic" :
7609+ # HunYuan uses NTK Aware Alpha based scaling. Original implementation: https://www.reddit.com/r/LocalLLaMA/comments/14lz7j5/ntkaware_scaled_rope_allows_llama_models_to_have/
7610+ # 1000 corresponds to a usable context length of 256k (https://github.com/Tencent-Hunyuan/Hunyuan-A13B/blob/main/report/Hunyuan_A13B_Technical_Report.pdf)
7611+ alpha = rope_scaling .get ("alpha" , 50 )
7612+ base = hparams .get ("rope_theta" , 10000.0 )
7613+ dim = hparams ["head_dim" ]
7614+ scaled_base = base * (alpha ** (dim / (dim - 2 )))
7615+ self .gguf_writer .add_rope_freq_base (scaled_base )
7616+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .NONE )
7617+ self .gguf_writer .add_rope_scaling_factor (1 )
7618+ # There is no consistent way to calculate ctx from alpha, and the config is incorrectly set to 32k
7619+ self .gguf_writer .add_rope_scaling_orig_ctx_len (256 * 1024 ) # 256k context length
7620+ self .gguf_writer .add_context_length (256 * 1024 ) # 256k context length
7621+
7622+ # if any of our assumptions about the values are wrong, something has changed and this may need to be updated
7623+ assert alpha == 50 and base == 10000.0 and self .hparams ["max_position_embeddings" ] in [32 * 1024 , 256 * 1024 ] , \
7624+ "HunYuan dynamic RoPE scaling assumptions changed, please update the logic or context length manually"
7625+
7626+ _experts : list [dict [str , Tensor ]] | None = None
7627+
7628+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
7629+ if name == "model.embed_tokens.weight" :
7630+ self ._tok_embd = data_torch .clone ()
7631+
7632+ if name == "lm_head.weight" :
7633+ if self .hparams .get ("tie_word_embeddings" , False ):
7634+ logger .info ("Skipping tied output layer 'lm_head.weight'" )
7635+ return []
7636+
7637+ return [(self .map_tensor_name (name ), data_torch )]
7638+
75347639@ModelBase .register ("SmolLM3ForCausalLM" )
75357640class SmolLM3Model (LlamaModel ):
75367641 model_arch = gguf .MODEL_ARCH .SMOLLM3
0 commit comments