update new hunyuan dense v1 models

foldl · foldl · commit 4ea1c01fb9b7 · 2025-08-05T18:11:19.000+08:00
diff --git a/convert.py b/convert.py
@@ -186,6 +186,7 @@ class ModelType(Enum):
 
     HunYuanDense    = 0x1f00
     HunYuanMoEV1    = 0x1f01
+    HunYuanDenseV1  = 0x1f02
 
     MoonLight       = 0x2000
 
@@ -6789,6 +6790,28 @@ def get_weight_names(config):
 
         return weight_names
 
+class HunYuanDenseV1Converter(BaseConverter):
+    MODEL_TYPE = ModelType.HunYuanDenseV1
+
+    @staticmethod
+    def dump_config(f, config, ggml_type):
+        if config.attention_head_dim is not None:
+            assert config.head_dim == config.attention_head_dim
+        else:
+            config.attention_head_dim = config.head_dim
+
+        HunYuanDenseConverter.dump_config(f, config, ggml_type)
+
+        config_values = [
+            config.head_dim,
+        ]
+        f.write(struct.pack("<i", *config_values))
+
+    @staticmethod
+    def get_weight_names(config):
+        weight_names = HunYuanDenseConverter.get_weight_names(config)
+        return weight_names
+
 class HunYuanMoEV1Converter(BaseConverter):
     MODEL_TYPE = ModelType.HunYuanMoEV1
 
@@ -7758,6 +7781,10 @@ def main():
             (isinstance(config.num_experts, list) and max(config.num_experts) > 1)):
             raise Exception('HunYuanForCausalLM: only dense model is supported')
         HunYuanDenseConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
+    elif arch == 'HunYuanDenseV1ForCausalLM':
+        assert config.use_mla is None
+        config.use_mla = False
+        HunYuanDenseV1Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
     elif arch == 'HunYuanMoEV1ForCausalLM':
         HunYuanMoEV1Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
     elif arch == 'InstellaForCausalLM':
diff --git a/docs/models.md b/docs/models.md
@@ -80,7 +80,11 @@
     * [x] v3.2: [Instruct-2B](https://huggingface.co/ibm-granite/granite-3.2-2b-instruct), [Instruct-2B](https://huggingface.co/ibm-granite/granite-3.2-8b-instruct), [Instruct-8B](https://huggingface.co/ibm-granite/granite-3.2-8b-instruct/tree/0276d996f60d5eb0b376b6d06622042d4ef3eb4b)
 
 * HunYuan (`HunYuanForCausalLM`)
-    * [x] Dense: [Instruct-7B](https://huggingface.co/tencent/Hunyuan-7B-Instruct)
+    * [x] ~~Dense: [Instruct-7B](https://huggingface.co/tencent/Hunyuan-7B-Instruct)~~ (lost)
+    * [x] Dense: [0.5B-Instruct](https://huggingface.co/tencent/Hunyuan-0.5B-Instruct/tree/9ec1774c379d7dde3f2d7ddd3286cde88949e181),
+    [1.8B-Instruct](https://huggingface.co/tencent/Hunyuan-1.8B-Instruct/tree/21ab9fd367ee99ba8001d34a182252ddb2ed255c),
+    [4B-Instruct](https://huggingface.co/tencent/Hunyuan-4B-Instruct/tree/3a419720cb283ece18dc6baac1b2484418cf525f),
+    [7B-Instruct](https://huggingface.co/tencent/Hunyuan-7B-Instruct/tree/e256110382dc42f4e2f4d97afc9f8bea5a907a4a)
     * [x] MoE: [A13B-Instruct](https://huggingface.co/tencent/Hunyuan-A13B-Instruct/tree/202c9758065873e0ac7c80211e6275593f165442)
 
 * Instella (`InstellaForCausalLM`)
diff --git a/models/hunyuan.cpp b/models/hunyuan.cpp
@@ -69,8 +69,8 @@ namespace chatllm::hunyuan::dense
     {
     }
 
-    ConditionalGeneration::ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config)
-        : BaseModelForConditionalGeneration(MODEL_TYPE_HUNYUAN_DENSE, config, runtime_config), config(config)
+    ConditionalGeneration::ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type, int head_dim)
+        : BaseModelForConditionalGeneration(type, config, runtime_config), config(config)
     {
         const size_t tensor_ovhd = ggml_tensor_overhead();
         const size_t num_tensors = 2 + config.num_hidden_layers * 14;
@@ -81,6 +81,7 @@ namespace chatllm::hunyuan::dense
         transformer = new ModelClass(&w_ctx_, config, nullptr,
                                     config.hidden_size, config.num_attention_heads,
                                     config.intermediate_size, config.num_key_value_heads,
+                                    head_dim,
                                     config.max_length);
 
         for (int i = 0; i < config.num_hidden_layers; i++)
@@ -91,6 +92,11 @@ namespace chatllm::hunyuan::dense
         }
     }
 
+    ConditionalGeneration::ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type)
+        : ConditionalGeneration(config, runtime_config, type, config.hidden_size / config.num_attention_heads)
+    {
+    }
+
     void ConditionalGeneration::load(ModelLoader &loader)
     {
         auto transformer = get_typed_transformer<ModelClass>();
@@ -122,6 +128,103 @@ namespace chatllm::hunyuan::dense
     }
 }
 
+namespace chatllm::hunyuan::dense_v1
+{
+    struct Config : dense::Config
+    {
+        int head_dim;
+    };
+
+    class ChatHistoryEncoder : public BaseHistoryEncoder
+    {
+    public:
+        void append_sys_prompt(std::vector<int> &ids) const override;
+        void append_ai(int round_idx, const std::string &ai, std::vector<int> &ids) const override;
+        void append_user(int round_idx, const std::string &user, std::vector<int> &ids) const override;
+        void append_ai_opening(int round_idx, std::vector<int> &ids) const override;
+    };
+
+    static ChatHistoryEncoder _chat_encoder;
+
+    class Tokenizer : public BaseTokenizer
+    {
+    public:
+        Tokenizer(const BaseConfig &config)
+            : BaseTokenizer(config, &_chat_encoder)
+        {}
+
+        size_t load(tokenizer::DataReader *buffer, int n_vocab) override
+        {
+            tp = new tokenizer::BPEProcessor2(
+                {
+                    // "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
+                    "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+                }
+            );
+            size_t size = tp->Load(buffer, n_vocab);
+
+            hy_User_token_id        = tp->PieceToId("<｜hy_User｜>");
+            hy_Assistant_token_id   = tp->PieceToId("<｜hy_Assistant｜>");
+            bos_token_id            = tp->PieceToId("<｜hy_begin▁of▁sentence｜>");
+            eos_token_id            = tp->PieceToId("<｜hy_place▁holder▁no▁2｜>");
+
+            terminate_ids.insert(eos_token_id);
+
+            tp->OverrideTokenDecoding(tp->PieceToId("<think>"),  "<think>");
+            tp->OverrideTokenDecoding(tp->PieceToId("</think>"), "</think>");
+
+            return size;
+        }
+
+    public:
+        int hy_User_token_id;
+        int hy_Assistant_token_id;
+    };
+
+    void ChatHistoryEncoder::append_sys_prompt(std::vector<int> &ids) const
+    {
+        Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
+
+        ids.push_back(tok->bos_token_id);
+
+        if (tok->get_system_prompt().size() > 0)
+        {
+            tok->encode(tok->get_system_prompt(), ids);
+        }
+    }
+
+    void ChatHistoryEncoder::append_ai(int round_idx, const std::string &ai, std::vector<int> &ids) const
+    {
+        Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
+
+        append_ai_opening(round_idx, ids);
+        tok->encode(ai, ids);
+        ids.push_back(tok->eos_token_id);
+    }
+
+    void ChatHistoryEncoder::append_user(int round_idx, const std::string &user, std::vector<int> &ids) const
+    {
+        Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
+
+        ids.push_back(tok->hy_User_token_id);
+        tok->encode(user, ids);
+    }
+
+    void ChatHistoryEncoder::append_ai_opening(int round_idx, std::vector<int> &ids) const
+    {
+        Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
+        ids.push_back(tok->hy_Assistant_token_id);
+    }
+
+    class ConditionalGeneration : public dense::ConditionalGeneration
+    {
+    public:
+        ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config)
+            : dense::ConditionalGeneration(config, runtime_config, MODEL_TYPE_HUNYUAN_DENSE_V1, config.head_dim)
+        {}
+    };
+}
+
 namespace chatllm::hunyuan::moe_v1
 {
     template <class HunyuanMoEMLP> class HunyuanMoEBlock : public LMBlock1<RMSNorm, dense::HunyuanSelfAttention, RMSNorm, HunyuanMoEMLP>
@@ -248,5 +351,6 @@ namespace chatllm::hunyuan::moe_v1
 namespace chatllm
 {
     REGISTER_MODEL_LOADER(HUNYUAN_DENSE,         hunyuan::dense, 1);
+    REGISTER_MODEL_LOADER(HUNYUAN_DENSE_V1,      hunyuan::dense_v1, 1);
     REGISTER_MODEL_LOADER(HUNYUAN_MOE_V1,        hunyuan::moe_v1, 1);
 }
diff --git a/models/hunyuan.h b/models/hunyuan.h
@@ -43,17 +43,19 @@ namespace chatllm::hunyuan::dense
     class HunyuanBlock : public LMBlock1<RMSNorm, HunyuanSelfAttention, RMSNorm, SiLUMLP>
     {
     public:
-        HunyuanBlock(InitContext *ctx, int hidden_size, int num_attention_heads, int intermediate_size, int num_kv_heads, int max_length)
-            : LMBlock1(ctx, hidden_size, num_attention_heads, intermediate_size, num_kv_heads, max_length)
+        HunyuanBlock(InitContext *ctx, int hidden_size, int num_attention_heads, int intermediate_size, int num_kv_heads, int head_dim, int max_length)
+            : LMBlock1(ctx, hidden_size, num_attention_heads, intermediate_size, num_kv_heads, head_dim, max_length)
         {}
     };
 
     class ConditionalGeneration : public BaseModelForConditionalGeneration
     {
     public:
-        typedef Model<Config, Embedding, RMSNorm, HunyuanBlock, int, int, int, int, int> ModelClass;
+        typedef Model<Config, Embedding, RMSNorm, HunyuanBlock, int, int, int, int, int, int> ModelClass;
     public:
-        ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config);
+        ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type = MODEL_TYPE_HUNYUAN_DENSE);
+
+        ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type, int head_dim);
 
         void load(ModelLoader &loader) override;
 
diff --git a/scripts/models.json b/scripts/models.json
@@ -2434,6 +2434,33 @@
         "default": "7b",
         "license": "Tencent License",
         "variants": {
+            "0.5b": {
+                "default": "q8",
+                "quantized": {
+                    "q8": {
+                        "size": 576757664,
+                        "url": "chatllm_quantized_hunyuan/hunyuan-dense-v1-0.5b.bin"
+                    }
+                }
+            },
+            "1.8b": {
+                "default": "q8",
+                "quantized": {
+                    "q8": {
+                        "size": 1907337760,
+                        "url": "chatllm_quantized_hunyuan/hunyuan-dense-v1-1.8b.bin"
+                    }
+                }
+            },
+            "4b": {
+                "default": "q8",
+                "quantized": {
+                    "q8": {
+                        "size": 4490205600,
+                        "url": "chatllm_quantized_hunyuan/hunyuan-dense-v1-4b.bin"
+                    }
+                }
+            },
             "7b": {
                 "default": "q8",
                 "quantized": {
diff --git a/src/models_priv.h b/src/models_priv.h
@@ -146,6 +146,7 @@ namespace chatllm
 
         MODEL_TYPE_HUNYUAN_DENSE    = 0x1f00,
         MODEL_TYPE_HUNYUAN_MOE_V1   = 0x1f01,
+        MODEL_TYPE_HUNYUAN_DENSE_V1 = 0x1f02,
 
         MODEL_TYPE_MOONLIGHT        = 0x2000,