Skip to content

Commit 4ea1c01

Browse files
committed
update new hunyuan dense v1 models
1 parent 81fe5f3 commit 4ea1c01

File tree

6 files changed

+172
-7
lines changed

6 files changed

+172
-7
lines changed

convert.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ class ModelType(Enum):
186186

187187
HunYuanDense = 0x1f00
188188
HunYuanMoEV1 = 0x1f01
189+
HunYuanDenseV1 = 0x1f02
189190

190191
MoonLight = 0x2000
191192

@@ -6789,6 +6790,28 @@ def get_weight_names(config):
67896790

67906791
return weight_names
67916792

6793+
class HunYuanDenseV1Converter(BaseConverter):
6794+
MODEL_TYPE = ModelType.HunYuanDenseV1
6795+
6796+
@staticmethod
6797+
def dump_config(f, config, ggml_type):
6798+
if config.attention_head_dim is not None:
6799+
assert config.head_dim == config.attention_head_dim
6800+
else:
6801+
config.attention_head_dim = config.head_dim
6802+
6803+
HunYuanDenseConverter.dump_config(f, config, ggml_type)
6804+
6805+
config_values = [
6806+
config.head_dim,
6807+
]
6808+
f.write(struct.pack("<i", *config_values))
6809+
6810+
@staticmethod
6811+
def get_weight_names(config):
6812+
weight_names = HunYuanDenseConverter.get_weight_names(config)
6813+
return weight_names
6814+
67926815
class HunYuanMoEV1Converter(BaseConverter):
67936816
MODEL_TYPE = ModelType.HunYuanMoEV1
67946817

@@ -7758,6 +7781,10 @@ def main():
77587781
(isinstance(config.num_experts, list) and max(config.num_experts) > 1)):
77597782
raise Exception('HunYuanForCausalLM: only dense model is supported')
77607783
HunYuanDenseConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
7784+
elif arch == 'HunYuanDenseV1ForCausalLM':
7785+
assert config.use_mla is None
7786+
config.use_mla = False
7787+
HunYuanDenseV1Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
77617788
elif arch == 'HunYuanMoEV1ForCausalLM':
77627789
HunYuanMoEV1Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
77637790
elif arch == 'InstellaForCausalLM':

docs/models.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,11 @@
8080
* [x] v3.2: [Instruct-2B](https://huggingface.co/ibm-granite/granite-3.2-2b-instruct), [Instruct-2B](https://huggingface.co/ibm-granite/granite-3.2-8b-instruct), [Instruct-8B](https://huggingface.co/ibm-granite/granite-3.2-8b-instruct/tree/0276d996f60d5eb0b376b6d06622042d4ef3eb4b)
8181

8282
* HunYuan (`HunYuanForCausalLM`)
83-
* [x] Dense: [Instruct-7B](https://huggingface.co/tencent/Hunyuan-7B-Instruct)
83+
* [x] ~~Dense: [Instruct-7B](https://huggingface.co/tencent/Hunyuan-7B-Instruct)~~ (lost)
84+
* [x] Dense: [0.5B-Instruct](https://huggingface.co/tencent/Hunyuan-0.5B-Instruct/tree/9ec1774c379d7dde3f2d7ddd3286cde88949e181),
85+
[1.8B-Instruct](https://huggingface.co/tencent/Hunyuan-1.8B-Instruct/tree/21ab9fd367ee99ba8001d34a182252ddb2ed255c),
86+
[4B-Instruct](https://huggingface.co/tencent/Hunyuan-4B-Instruct/tree/3a419720cb283ece18dc6baac1b2484418cf525f),
87+
[7B-Instruct](https://huggingface.co/tencent/Hunyuan-7B-Instruct/tree/e256110382dc42f4e2f4d97afc9f8bea5a907a4a)
8488
* [x] MoE: [A13B-Instruct](https://huggingface.co/tencent/Hunyuan-A13B-Instruct/tree/202c9758065873e0ac7c80211e6275593f165442)
8589

8690
* Instella (`InstellaForCausalLM`)

models/hunyuan.cpp

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ namespace chatllm::hunyuan::dense
6969
{
7070
}
7171

72-
ConditionalGeneration::ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config)
73-
: BaseModelForConditionalGeneration(MODEL_TYPE_HUNYUAN_DENSE, config, runtime_config), config(config)
72+
ConditionalGeneration::ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type, int head_dim)
73+
: BaseModelForConditionalGeneration(type, config, runtime_config), config(config)
7474
{
7575
const size_t tensor_ovhd = ggml_tensor_overhead();
7676
const size_t num_tensors = 2 + config.num_hidden_layers * 14;
@@ -81,6 +81,7 @@ namespace chatllm::hunyuan::dense
8181
transformer = new ModelClass(&w_ctx_, config, nullptr,
8282
config.hidden_size, config.num_attention_heads,
8383
config.intermediate_size, config.num_key_value_heads,
84+
head_dim,
8485
config.max_length);
8586

8687
for (int i = 0; i < config.num_hidden_layers; i++)
@@ -91,6 +92,11 @@ namespace chatllm::hunyuan::dense
9192
}
9293
}
9394

95+
ConditionalGeneration::ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type)
96+
: ConditionalGeneration(config, runtime_config, type, config.hidden_size / config.num_attention_heads)
97+
{
98+
}
99+
94100
void ConditionalGeneration::load(ModelLoader &loader)
95101
{
96102
auto transformer = get_typed_transformer<ModelClass>();
@@ -122,6 +128,103 @@ namespace chatllm::hunyuan::dense
122128
}
123129
}
124130

131+
namespace chatllm::hunyuan::dense_v1
132+
{
133+
struct Config : dense::Config
134+
{
135+
int head_dim;
136+
};
137+
138+
class ChatHistoryEncoder : public BaseHistoryEncoder
139+
{
140+
public:
141+
void append_sys_prompt(std::vector<int> &ids) const override;
142+
void append_ai(int round_idx, const std::string &ai, std::vector<int> &ids) const override;
143+
void append_user(int round_idx, const std::string &user, std::vector<int> &ids) const override;
144+
void append_ai_opening(int round_idx, std::vector<int> &ids) const override;
145+
};
146+
147+
static ChatHistoryEncoder _chat_encoder;
148+
149+
class Tokenizer : public BaseTokenizer
150+
{
151+
public:
152+
Tokenizer(const BaseConfig &config)
153+
: BaseTokenizer(config, &_chat_encoder)
154+
{}
155+
156+
size_t load(tokenizer::DataReader *buffer, int n_vocab) override
157+
{
158+
tp = new tokenizer::BPEProcessor2(
159+
{
160+
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
161+
"(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
162+
}
163+
);
164+
size_t size = tp->Load(buffer, n_vocab);
165+
166+
hy_User_token_id = tp->PieceToId("<|hy_User|>");
167+
hy_Assistant_token_id = tp->PieceToId("<|hy_Assistant|>");
168+
bos_token_id = tp->PieceToId("<|hy_begin▁of▁sentence|>");
169+
eos_token_id = tp->PieceToId("<|hy_place▁holder▁no▁2|>");
170+
171+
terminate_ids.insert(eos_token_id);
172+
173+
tp->OverrideTokenDecoding(tp->PieceToId("<think>"), "<think>");
174+
tp->OverrideTokenDecoding(tp->PieceToId("</think>"), "</think>");
175+
176+
return size;
177+
}
178+
179+
public:
180+
int hy_User_token_id;
181+
int hy_Assistant_token_id;
182+
};
183+
184+
void ChatHistoryEncoder::append_sys_prompt(std::vector<int> &ids) const
185+
{
186+
Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
187+
188+
ids.push_back(tok->bos_token_id);
189+
190+
if (tok->get_system_prompt().size() > 0)
191+
{
192+
tok->encode(tok->get_system_prompt(), ids);
193+
}
194+
}
195+
196+
void ChatHistoryEncoder::append_ai(int round_idx, const std::string &ai, std::vector<int> &ids) const
197+
{
198+
Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
199+
200+
append_ai_opening(round_idx, ids);
201+
tok->encode(ai, ids);
202+
ids.push_back(tok->eos_token_id);
203+
}
204+
205+
void ChatHistoryEncoder::append_user(int round_idx, const std::string &user, std::vector<int> &ids) const
206+
{
207+
Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
208+
209+
ids.push_back(tok->hy_User_token_id);
210+
tok->encode(user, ids);
211+
}
212+
213+
void ChatHistoryEncoder::append_ai_opening(int round_idx, std::vector<int> &ids) const
214+
{
215+
Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
216+
ids.push_back(tok->hy_Assistant_token_id);
217+
}
218+
219+
class ConditionalGeneration : public dense::ConditionalGeneration
220+
{
221+
public:
222+
ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config)
223+
: dense::ConditionalGeneration(config, runtime_config, MODEL_TYPE_HUNYUAN_DENSE_V1, config.head_dim)
224+
{}
225+
};
226+
}
227+
125228
namespace chatllm::hunyuan::moe_v1
126229
{
127230
template <class HunyuanMoEMLP> class HunyuanMoEBlock : public LMBlock1<RMSNorm, dense::HunyuanSelfAttention, RMSNorm, HunyuanMoEMLP>
@@ -248,5 +351,6 @@ namespace chatllm::hunyuan::moe_v1
248351
namespace chatllm
249352
{
250353
REGISTER_MODEL_LOADER(HUNYUAN_DENSE, hunyuan::dense, 1);
354+
REGISTER_MODEL_LOADER(HUNYUAN_DENSE_V1, hunyuan::dense_v1, 1);
251355
REGISTER_MODEL_LOADER(HUNYUAN_MOE_V1, hunyuan::moe_v1, 1);
252356
}

models/hunyuan.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,19 @@ namespace chatllm::hunyuan::dense
4343
class HunyuanBlock : public LMBlock1<RMSNorm, HunyuanSelfAttention, RMSNorm, SiLUMLP>
4444
{
4545
public:
46-
HunyuanBlock(InitContext *ctx, int hidden_size, int num_attention_heads, int intermediate_size, int num_kv_heads, int max_length)
47-
: LMBlock1(ctx, hidden_size, num_attention_heads, intermediate_size, num_kv_heads, max_length)
46+
HunyuanBlock(InitContext *ctx, int hidden_size, int num_attention_heads, int intermediate_size, int num_kv_heads, int head_dim, int max_length)
47+
: LMBlock1(ctx, hidden_size, num_attention_heads, intermediate_size, num_kv_heads, head_dim, max_length)
4848
{}
4949
};
5050

5151
class ConditionalGeneration : public BaseModelForConditionalGeneration
5252
{
5353
public:
54-
typedef Model<Config, Embedding, RMSNorm, HunyuanBlock, int, int, int, int, int> ModelClass;
54+
typedef Model<Config, Embedding, RMSNorm, HunyuanBlock, int, int, int, int, int, int> ModelClass;
5555
public:
56-
ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config);
56+
ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type = MODEL_TYPE_HUNYUAN_DENSE);
57+
58+
ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type, int head_dim);
5759

5860
void load(ModelLoader &loader) override;
5961

scripts/models.json

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2434,6 +2434,33 @@
24342434
"default": "7b",
24352435
"license": "Tencent License",
24362436
"variants": {
2437+
"0.5b": {
2438+
"default": "q8",
2439+
"quantized": {
2440+
"q8": {
2441+
"size": 576757664,
2442+
"url": "chatllm_quantized_hunyuan/hunyuan-dense-v1-0.5b.bin"
2443+
}
2444+
}
2445+
},
2446+
"1.8b": {
2447+
"default": "q8",
2448+
"quantized": {
2449+
"q8": {
2450+
"size": 1907337760,
2451+
"url": "chatllm_quantized_hunyuan/hunyuan-dense-v1-1.8b.bin"
2452+
}
2453+
}
2454+
},
2455+
"4b": {
2456+
"default": "q8",
2457+
"quantized": {
2458+
"q8": {
2459+
"size": 4490205600,
2460+
"url": "chatllm_quantized_hunyuan/hunyuan-dense-v1-4b.bin"
2461+
}
2462+
}
2463+
},
24372464
"7b": {
24382465
"default": "q8",
24392466
"quantized": {

src/models_priv.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ namespace chatllm
146146

147147
MODEL_TYPE_HUNYUAN_DENSE = 0x1f00,
148148
MODEL_TYPE_HUNYUAN_MOE_V1 = 0x1f01,
149+
MODEL_TYPE_HUNYUAN_DENSE_V1 = 0x1f02,
149150

150151
MODEL_TYPE_MOONLIGHT = 0x2000,
151152

0 commit comments

Comments
 (0)