@@ -69,8 +69,8 @@ namespace chatllm::hunyuan::dense
6969 {
7070 }
7171
72- ConditionalGeneration::ConditionalGeneration (const Config &config, const RuntimeConfig &runtime_config)
73- : BaseModelForConditionalGeneration(MODEL_TYPE_HUNYUAN_DENSE , config, runtime_config), config(config)
72+ ConditionalGeneration::ConditionalGeneration (const Config &config, const RuntimeConfig &runtime_config, ModelType type, int head_dim )
73+ : BaseModelForConditionalGeneration(type , config, runtime_config), config(config)
7474 {
7575 const size_t tensor_ovhd = ggml_tensor_overhead ();
7676 const size_t num_tensors = 2 + config.num_hidden_layers * 14 ;
@@ -81,6 +81,7 @@ namespace chatllm::hunyuan::dense
8181 transformer = new ModelClass (&w_ctx_, config, nullptr ,
8282 config.hidden_size , config.num_attention_heads ,
8383 config.intermediate_size , config.num_key_value_heads ,
84+ head_dim,
8485 config.max_length );
8586
8687 for (int i = 0 ; i < config.num_hidden_layers ; i++)
@@ -91,6 +92,11 @@ namespace chatllm::hunyuan::dense
9192 }
9293 }
9394
95+ ConditionalGeneration::ConditionalGeneration (const Config &config, const RuntimeConfig &runtime_config, ModelType type)
96+ : ConditionalGeneration(config, runtime_config, type, config.hidden_size / config.num_attention_heads)
97+ {
98+ }
99+
94100 void ConditionalGeneration::load (ModelLoader &loader)
95101 {
96102 auto transformer = get_typed_transformer<ModelClass>();
@@ -122,6 +128,103 @@ namespace chatllm::hunyuan::dense
122128 }
123129}
124130
131+ namespace chatllm ::hunyuan::dense_v1
132+ {
133+ struct Config : dense::Config
134+ {
135+ int head_dim;
136+ };
137+
138+ class ChatHistoryEncoder : public BaseHistoryEncoder
139+ {
140+ public:
141+ void append_sys_prompt (std::vector<int > &ids) const override ;
142+ void append_ai (int round_idx, const std::string &ai, std::vector<int > &ids) const override ;
143+ void append_user (int round_idx, const std::string &user, std::vector<int > &ids) const override ;
144+ void append_ai_opening (int round_idx, std::vector<int > &ids) const override ;
145+ };
146+
147+ static ChatHistoryEncoder _chat_encoder;
148+
149+ class Tokenizer : public BaseTokenizer
150+ {
151+ public:
152+ Tokenizer (const BaseConfig &config)
153+ : BaseTokenizer(config, &_chat_encoder)
154+ {}
155+
156+ size_t load (tokenizer::DataReader *buffer, int n_vocab) override
157+ {
158+ tp = new tokenizer::BPEProcessor2 (
159+ {
160+ // "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
161+ " (?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\ r\\ n\\ p{L}\\ p{N}]?\\ p{L}+|\\ p{N}| ?[^\\ s\\ p{L}\\ p{N}]+[\\ r\\ n]*|\\ s*[\\ r\\ n]+|\\ s+(?!\\ S)|\\ s+" ,
162+ }
163+ );
164+ size_t size = tp->Load (buffer, n_vocab);
165+
166+ hy_User_token_id = tp->PieceToId (" <|hy_User|>" );
167+ hy_Assistant_token_id = tp->PieceToId (" <|hy_Assistant|>" );
168+ bos_token_id = tp->PieceToId (" <|hy_begin▁of▁sentence|>" );
169+ eos_token_id = tp->PieceToId (" <|hy_place▁holder▁no▁2|>" );
170+
171+ terminate_ids.insert (eos_token_id);
172+
173+ tp->OverrideTokenDecoding (tp->PieceToId (" <think>" ), " <think>" );
174+ tp->OverrideTokenDecoding (tp->PieceToId (" </think>" ), " </think>" );
175+
176+ return size;
177+ }
178+
179+ public:
180+ int hy_User_token_id;
181+ int hy_Assistant_token_id;
182+ };
183+
184+ void ChatHistoryEncoder::append_sys_prompt (std::vector<int > &ids) const
185+ {
186+ Tokenizer *tok = dynamic_cast <Tokenizer *>(tokenizer);
187+
188+ ids.push_back (tok->bos_token_id );
189+
190+ if (tok->get_system_prompt ().size () > 0 )
191+ {
192+ tok->encode (tok->get_system_prompt (), ids);
193+ }
194+ }
195+
196+ void ChatHistoryEncoder::append_ai (int round_idx, const std::string &ai, std::vector<int > &ids) const
197+ {
198+ Tokenizer *tok = dynamic_cast <Tokenizer *>(tokenizer);
199+
200+ append_ai_opening (round_idx, ids);
201+ tok->encode (ai, ids);
202+ ids.push_back (tok->eos_token_id );
203+ }
204+
205+ void ChatHistoryEncoder::append_user (int round_idx, const std::string &user, std::vector<int > &ids) const
206+ {
207+ Tokenizer *tok = dynamic_cast <Tokenizer *>(tokenizer);
208+
209+ ids.push_back (tok->hy_User_token_id );
210+ tok->encode (user, ids);
211+ }
212+
213+ void ChatHistoryEncoder::append_ai_opening (int round_idx, std::vector<int > &ids) const
214+ {
215+ Tokenizer *tok = dynamic_cast <Tokenizer *>(tokenizer);
216+ ids.push_back (tok->hy_Assistant_token_id );
217+ }
218+
219+ class ConditionalGeneration : public dense ::ConditionalGeneration
220+ {
221+ public:
222+ ConditionalGeneration (const Config &config, const RuntimeConfig &runtime_config)
223+ : dense::ConditionalGeneration(config, runtime_config, MODEL_TYPE_HUNYUAN_DENSE_V1, config.head_dim)
224+ {}
225+ };
226+ }
227+
125228namespace chatllm ::hunyuan::moe_v1
126229{
127230 template <class HunyuanMoEMLP > class HunyuanMoEBlock : public LMBlock1 <RMSNorm, dense::HunyuanSelfAttention, RMSNorm, HunyuanMoEMLP>
@@ -248,5 +351,6 @@ namespace chatllm::hunyuan::moe_v1
248351namespace chatllm
249352{
250353 REGISTER_MODEL_LOADER (HUNYUAN_DENSE, hunyuan::dense, 1 );
354+ REGISTER_MODEL_LOADER (HUNYUAN_DENSE_V1, hunyuan::dense_v1, 1 );
251355 REGISTER_MODEL_LOADER (HUNYUAN_MOE_V1, hunyuan::moe_v1, 1 );
252356}
0 commit comments