@@ -684,6 +684,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
684
684
if chkhsh == "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664" :
685
685
# ref: https://huggingface.co/tencent/Hunyuan-A13B-Instruct
686
686
res = "hunyuan"
687
+ if chkhsh == "bba3b3366b646dbdded5dbc42d59598b849371afc42f7beafa914afaa5b70aa6" :
688
+ # TODO: update ref
689
+ res = "hunyuan"
687
690
if chkhsh == "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6" :
688
691
# ref: https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base
689
692
res = "falcon-h1"
@@ -7531,6 +7534,108 @@ def prepare_tensors(self):
7531
7534
raise ValueError (f"Unprocessed experts: { experts } " )
7532
7535
7533
7536
7537
+ @ModelBase .register ("HunYuanDenseV1ForCausalLM" )
7538
+ class HunYuanModel (TextModel ):
7539
+ model_arch = gguf .MODEL_ARCH .HUNYUAN_V1_DENSE
7540
+
7541
+ def __init__ (self , * args , ** kwargs ):
7542
+ super ().__init__ (* args , ** kwargs )
7543
+ # For handling tied embeddings
7544
+ self ._tok_embd = None
7545
+
7546
+ def set_vocab (self ):
7547
+ if (self .dir_model / "tokenizer.json" ).is_file ():
7548
+ self ._set_vocab_gpt2 ()
7549
+ else :
7550
+ from transformers import AutoTokenizer
7551
+ tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
7552
+
7553
+ # 1. Get the pre-tokenizer identifier hash
7554
+ tokpre = self .get_vocab_base_pre (tokenizer )
7555
+
7556
+ # 2. Reverse-engineer the merges list from mergeable_ranks
7557
+ merges = []
7558
+ vocab = {}
7559
+ mergeable_ranks = tokenizer .mergeable_ranks
7560
+ for token , rank in mergeable_ranks .items ():
7561
+ vocab [QwenModel .token_bytes_to_string (token )] = rank
7562
+ if len (token ) == 1 :
7563
+ continue
7564
+ merged = QwenModel .bpe (mergeable_ranks , token , max_rank = rank )
7565
+ if len (merged ) == 2 :
7566
+ merges .append (' ' .join (map (QwenModel .token_bytes_to_string , merged )))
7567
+
7568
+ # 3. Generate the tokens and toktypes lists
7569
+ vocab_size = self .hparams ["vocab_size" ]
7570
+ assert tokenizer .vocab_size == vocab_size
7571
+ special_tokens = tokenizer .special_tokens
7572
+ reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in {** vocab , ** special_tokens }.items ()}
7573
+ tokens : list [str ] = []
7574
+ toktypes : list [int ] = []
7575
+ for i in range (vocab_size ):
7576
+ if i not in reverse_vocab :
7577
+ tokens .append (f"[PAD{ i } ]" )
7578
+ toktypes .append (gguf .TokenType .UNUSED )
7579
+ else :
7580
+ token = reverse_vocab [i ]
7581
+ tokens .append (token )
7582
+ if i in special_tokens .values ():
7583
+ toktypes .append (gguf .TokenType .CONTROL )
7584
+ else :
7585
+ toktypes .append (gguf .TokenType .NORMAL )
7586
+
7587
+ # 4. Write all vocab-related fields to the GGUF writer
7588
+ self .gguf_writer .add_tokenizer_model ("gpt2" )
7589
+ self .gguf_writer .add_tokenizer_pre (tokpre )
7590
+ self .gguf_writer .add_token_list (tokens )
7591
+ self .gguf_writer .add_token_types (toktypes )
7592
+ self .gguf_writer .add_token_merges (merges )
7593
+
7594
+ # 5. Add special tokens and chat templates
7595
+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = False )
7596
+ special_vocab .add_to_gguf (self .gguf_writer )
7597
+ # FIX for BOS token: Overwrite incorrect id read from config.json
7598
+ self .gguf_writer .add_bos_token_id (127958 ) # <|bos|>
7599
+
7600
+ def set_gguf_parameters (self ):
7601
+ super ().set_gguf_parameters ()
7602
+ hparams = self .hparams
7603
+
7604
+ self .gguf_writer .add_expert_shared_feed_forward_length (hparams ["intermediate_size" ])
7605
+
7606
+ # Rope
7607
+ rope_scaling = hparams .get ("rope_scaling" , {})
7608
+ if rope_scaling .get ("type" ) == "dynamic" :
7609
+ # HunYuan uses NTK Aware Alpha based scaling. Original implementation: https://www.reddit.com/r/LocalLLaMA/comments/14lz7j5/ntkaware_scaled_rope_allows_llama_models_to_have/
7610
+ # 1000 corresponds to a usable context length of 256k (https://github.com/Tencent-Hunyuan/Hunyuan-A13B/blob/main/report/Hunyuan_A13B_Technical_Report.pdf)
7611
+ alpha = rope_scaling .get ("alpha" , 50 )
7612
+ base = hparams .get ("rope_theta" , 10000.0 )
7613
+ dim = hparams ["head_dim" ]
7614
+ scaled_base = base * (alpha ** (dim / (dim - 2 )))
7615
+ self .gguf_writer .add_rope_freq_base (scaled_base )
7616
+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .NONE )
7617
+ self .gguf_writer .add_rope_scaling_factor (1 )
7618
+ # There is no consistent way to calculate ctx from alpha, and the config is incorrectly set to 32k
7619
+ self .gguf_writer .add_rope_scaling_orig_ctx_len (256 * 1024 ) # 256k context length
7620
+ self .gguf_writer .add_context_length (256 * 1024 ) # 256k context length
7621
+
7622
+ # if any of our assumptions about the values are wrong, something has changed and this may need to be updated
7623
+ assert alpha == 50 and base == 10000.0 and self .hparams ["max_position_embeddings" ] in [32 * 1024 , 256 * 1024 ] , \
7624
+ "HunYuan dynamic RoPE scaling assumptions changed, please update the logic or context length manually"
7625
+
7626
+ _experts : list [dict [str , Tensor ]] | None = None
7627
+
7628
+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
7629
+ if name == "model.embed_tokens.weight" :
7630
+ self ._tok_embd = data_torch .clone ()
7631
+
7632
+ if name == "lm_head.weight" :
7633
+ if self .hparams .get ("tie_word_embeddings" , False ):
7634
+ logger .info ("Skipping tied output layer 'lm_head.weight'" )
7635
+ return []
7636
+
7637
+ return [(self .map_tensor_name (name ), data_torch )]
7638
+
7534
7639
@ModelBase .register ("SmolLM3ForCausalLM" )
7535
7640
class SmolLM3Model (LlamaModel ):
7536
7641
model_arch = gguf .MODEL_ARCH .SMOLLM3
0 commit comments