@@ -684,6 +684,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
684684 if chkhsh == "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664" :
685685 # ref: https://huggingface.co/tencent/Hunyuan-A13B-Instruct
686686 res = "hunyuan"
687+ if chkhsh == "bba3b3366b646dbdded5dbc42d59598b849371afc42f7beafa914afaa5b70aa6" :
688+ # ref: https://huggingface.co/tencent/Hunyuan-4B-Instruct
689+ res = "hunyuan-dense"
687690 if chkhsh == "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6" :
688691 # ref: https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base
689692 res = "falcon-h1"
@@ -7553,11 +7556,6 @@ def set_gguf_parameters(self):
75537556class HunYuanMoEModel (TextModel ):
75547557 model_arch = gguf .MODEL_ARCH .HUNYUAN_MOE
75557558
7556- def __init__ (self , * args , ** kwargs ):
7557- super ().__init__ (* args , ** kwargs )
7558- # For handling tied embeddings
7559- self ._tok_embd = None
7560-
75617559 def set_vocab (self ):
75627560 from transformers import AutoTokenizer
75637561 tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
@@ -7651,9 +7649,6 @@ def set_gguf_parameters(self):
76517649 _experts : list [dict [str , Tensor ]] | None = None
76527650
76537651 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
7654- if name == "model.embed_tokens.weight" :
7655- self ._tok_embd = data_torch .clone ()
7656-
76577652 if name == "lm_head.weight" :
76587653 if self .hparams .get ("tie_word_embeddings" , False ):
76597654 logger .info ("Skipping tied output layer 'lm_head.weight'" )
@@ -7698,6 +7693,98 @@ def prepare_tensors(self):
76987693 raise ValueError (f"Unprocessed experts: { experts } " )
76997694
77007695
7696+ @ModelBase .register ("HunYuanDenseV1ForCausalLM" )
7697+ class HunYuanModel (TextModel ):
7698+ model_arch = gguf .MODEL_ARCH .HUNYUAN_DENSE
7699+
7700+ def set_vocab (self ):
7701+ if (self .dir_model / "tokenizer.json" ).is_file ():
7702+ self ._set_vocab_gpt2 ()
7703+ else :
7704+ from transformers import AutoTokenizer
7705+ tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
7706+
7707+ # 1. Get the pre-tokenizer identifier hash
7708+ tokpre = self .get_vocab_base_pre (tokenizer )
7709+
7710+ # 2. Reverse-engineer the merges list from mergeable_ranks
7711+ merges = []
7712+ vocab = {}
7713+ mergeable_ranks = tokenizer .mergeable_ranks
7714+ for token , rank in mergeable_ranks .items ():
7715+ vocab [QwenModel .token_bytes_to_string (token )] = rank
7716+ if len (token ) == 1 :
7717+ continue
7718+ merged = QwenModel .bpe (mergeable_ranks , token , max_rank = rank )
7719+ if len (merged ) == 2 :
7720+ merges .append (' ' .join (map (QwenModel .token_bytes_to_string , merged )))
7721+
7722+ # 3. Generate the tokens and toktypes lists
7723+ vocab_size = self .hparams ["vocab_size" ]
7724+ assert tokenizer .vocab_size == vocab_size
7725+ special_tokens = tokenizer .special_tokens
7726+ reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in {** vocab , ** special_tokens }.items ()}
7727+ tokens : list [str ] = []
7728+ toktypes : list [int ] = []
7729+ for i in range (vocab_size ):
7730+ if i not in reverse_vocab :
7731+ tokens .append (f"[PAD{ i } ]" )
7732+ toktypes .append (gguf .TokenType .UNUSED )
7733+ else :
7734+ token = reverse_vocab [i ]
7735+ tokens .append (token )
7736+ if i in special_tokens .values ():
7737+ toktypes .append (gguf .TokenType .CONTROL )
7738+ else :
7739+ toktypes .append (gguf .TokenType .NORMAL )
7740+
7741+ # 4. Write all vocab-related fields to the GGUF writer
7742+ self .gguf_writer .add_tokenizer_model ("gpt2" )
7743+ self .gguf_writer .add_tokenizer_pre (tokpre )
7744+ self .gguf_writer .add_token_list (tokens )
7745+ self .gguf_writer .add_token_types (toktypes )
7746+ self .gguf_writer .add_token_merges (merges )
7747+
7748+ # 5. Add special tokens and chat templates
7749+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = False )
7750+ special_vocab .add_to_gguf (self .gguf_writer )
7751+ # FIX for BOS token: Overwrite incorrect id read from config.json
7752+ if self .hparams ['hidden_size' ] == 4096 :
7753+ self .gguf_writer .add_bos_token_id (127958 ) # only for 7b dense, fix <|bos|> token
7754+
7755+ def set_gguf_parameters (self ):
7756+ super ().set_gguf_parameters ()
7757+ hparams = self .hparams
7758+
7759+ # Rope
7760+ rope_scaling = hparams .get ("rope_scaling" , {})
7761+ if rope_scaling .get ("type" ) == "dynamic" :
7762+ # HunYuan uses NTK Aware Alpha based scaling. Original implementation: https://www.reddit.com/r/LocalLLaMA/comments/14lz7j5/ntkaware_scaled_rope_allows_llama_models_to_have/
7763+ # 1000 corresponds to a usable context length of 256k (https://github.com/Tencent-Hunyuan/Hunyuan-A13B/blob/main/report/Hunyuan_A13B_Technical_Report.pdf)
7764+ alpha = rope_scaling .get ("alpha" , 50 )
7765+ base = hparams .get ("rope_theta" , 10000.0 )
7766+ dim = hparams ["head_dim" ]
7767+ scaled_base = base * (alpha ** (dim / (dim - 2 )))
7768+ self .gguf_writer .add_rope_freq_base (scaled_base )
7769+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .NONE )
7770+ self .gguf_writer .add_rope_scaling_factor (1 )
7771+ # There is no consistent way to calculate ctx from alpha, and the config is incorrectly set to 32k
7772+ self .gguf_writer .add_rope_scaling_orig_ctx_len (256 * 1024 ) # 256k context length
7773+ self .gguf_writer .add_context_length (256 * 1024 ) # 256k context length
7774+
7775+ # if any of our assumptions about the values are wrong, something has changed and this may need to be updated
7776+ assert base == 10000.0 and self .hparams ["max_position_embeddings" ] in [32 * 1024 , 256 * 1024 ] , \
7777+ "HunYuan dynamic RoPE scaling assumptions changed, please update the logic or context length manually"
7778+
7779+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
7780+ if name == "lm_head.weight" :
7781+ if self .hparams .get ("tie_word_embeddings" , False ):
7782+ logger .info ("Skipping tied output layer 'lm_head.weight'" )
7783+ return []
7784+
7785+ return [(self .map_tensor_name (name ), data_torch )]
7786+
7787+
77017788@ModelBase .register ("SmolLM3ForCausalLM" )
77027789class SmolLM3Model (LlamaModel ):
77037790 model_arch = gguf .MODEL_ARCH .SMOLLM3
0 commit comments