@@ -238,8 +238,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
238238 { LLM_ARCH_OLMOE, "olmoe" },
239239 { LLM_ARCH_OPENELM, "openelm" },
240240 { LLM_ARCH_ARCTIC, "arctic" },
241- { LLM_ARCH_DEEPSEEK2, "deepseek2" },
242241 { LLM_ARCH_DEEPSEEK, "deepseek" },
242+ { LLM_ARCH_DEEPSEEK2, "deepseek2" },
243243 { LLM_ARCH_CHATGLM, "chatglm" },
244244 { LLM_ARCH_BITNET, "bitnet" },
245245 { LLM_ARCH_T5, "t5" },
@@ -1291,25 +1291,23 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
12911291 },
12921292 },
12931293 {
1294- LLM_ARCH_DEEPSEEK2 ,
1294+ LLM_ARCH_DEEPSEEK ,
12951295 {
12961296 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
12971297 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
12981298 { LLM_TENSOR_OUTPUT, "output" },
1299+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
12991300 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1300- { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
1301- { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
13021301 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1303- { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
1304- { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
1305- { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
1306- { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
1302+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1303+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
13071304 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1305+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1306+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
13081307 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
13091308 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1310- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
13111309 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1312- { LLM_TENSOR_FFN_GATE_INP , "blk.%d.ffn_gate_inp " },
1310+ { LLM_TENSOR_FFN_UP , "blk.%d.ffn_up " },
13131311 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
13141312 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
13151313 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
@@ -1320,23 +1318,25 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
13201318 },
13211319 },
13221320 {
1323- LLM_ARCH_DEEPSEEK ,
1321+ LLM_ARCH_DEEPSEEK2 ,
13241322 {
13251323 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
13261324 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
13271325 { LLM_TENSOR_OUTPUT, "output" },
1328- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
13291326 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1327+ { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
1328+ { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
13301329 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1331- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1332- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1330+ { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
1331+ { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
1332+ { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
1333+ { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
13331334 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1334- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1335- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
13361335 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
13371336 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1338- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
13391337 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1338+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1339+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
13401340 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
13411341 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
13421342 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
@@ -6088,36 +6088,36 @@ static void llm_load_hparams(
60886088 model.type = e_model::MODEL_UNKNOWN;
60896089 }
60906090 } break;
6091- case LLM_ARCH_DEEPSEEK2 :
6091+ case LLM_ARCH_DEEPSEEK :
60926092 {
6093- bool is_lite = (hparams.n_layer == 27);
60946093 ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
60956094 ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
6096- if (!is_lite) {
6097- ml.get_key(LLM_KV_ATTENTION_Q_LORA_RANK, hparams.n_lora_q);
6098- }
6099- ml.get_key(LLM_KV_ATTENTION_KV_LORA_RANK, hparams.n_lora_kv);
61006095 ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
61016096 ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
61026097 ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
6103- ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul);
61046098
61056099 switch (hparams.n_layer) {
6106- case 27: model.type = e_model::MODEL_16B; break;
6107- case 60: model.type = e_model::MODEL_236B; break;
6100+ case 28: model.type = e_model::MODEL_20B; break;
61086101 default: model.type = e_model::MODEL_UNKNOWN;
61096102 }
61106103 } break;
6111- case LLM_ARCH_DEEPSEEK :
6104+ case LLM_ARCH_DEEPSEEK2 :
61126105 {
6106+ bool is_lite = (hparams.n_layer == 27);
61136107 ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
61146108 ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
6109+ if (!is_lite) {
6110+ ml.get_key(LLM_KV_ATTENTION_Q_LORA_RANK, hparams.n_lora_q);
6111+ }
6112+ ml.get_key(LLM_KV_ATTENTION_KV_LORA_RANK, hparams.n_lora_kv);
61156113 ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
61166114 ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
61176115 ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
6116+ ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul);
61186117
61196118 switch (hparams.n_layer) {
6120- case 28: model.type = e_model::MODEL_20B; break;
6119+ case 27: model.type = e_model::MODEL_16B; break;
6120+ case 60: model.type = e_model::MODEL_236B; break;
61216121 default: model.type = e_model::MODEL_UNKNOWN;
61226122 }
61236123 } break;
@@ -7099,21 +7099,21 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
70997099
71007100 LLAMA_LOG_INFO("%s: max token length = %d\n", __func__, vocab.max_token_len);
71017101
7102- if (model.arch == LLM_ARCH_DEEPSEEK2 ) {
7102+ if (model.arch == LLM_ARCH_DEEPSEEK ) {
71037103 LLAMA_LOG_INFO("%s: n_layer_dense_lead = %d\n", __func__, hparams.n_layer_dense_lead);
7104- LLAMA_LOG_INFO("%s: n_lora_q = %d\n", __func__, hparams.n_lora_q);
7105- LLAMA_LOG_INFO("%s: n_lora_kv = %d\n", __func__, hparams.n_lora_kv);
71067104 LLAMA_LOG_INFO("%s: n_ff_exp = %d\n", __func__, hparams.n_ff_exp);
71077105 LLAMA_LOG_INFO("%s: n_expert_shared = %d\n", __func__, hparams.n_expert_shared);
71087106 LLAMA_LOG_INFO("%s: expert_weights_scale = %.1f\n", __func__, hparams.expert_weights_scale);
7109- LLAMA_LOG_INFO("%s: rope_yarn_log_mul = %.4f\n", __func__, hparams.rope_yarn_log_mul);
71107107 }
71117108
7112- if (model.arch == LLM_ARCH_DEEPSEEK ) {
7109+ if (model.arch == LLM_ARCH_DEEPSEEK2 ) {
71137110 LLAMA_LOG_INFO("%s: n_layer_dense_lead = %d\n", __func__, hparams.n_layer_dense_lead);
7111+ LLAMA_LOG_INFO("%s: n_lora_q = %d\n", __func__, hparams.n_lora_q);
7112+ LLAMA_LOG_INFO("%s: n_lora_kv = %d\n", __func__, hparams.n_lora_kv);
71147113 LLAMA_LOG_INFO("%s: n_ff_exp = %d\n", __func__, hparams.n_ff_exp);
71157114 LLAMA_LOG_INFO("%s: n_expert_shared = %d\n", __func__, hparams.n_expert_shared);
71167115 LLAMA_LOG_INFO("%s: expert_weights_scale = %.1f\n", __func__, hparams.expert_weights_scale);
7116+ LLAMA_LOG_INFO("%s: rope_yarn_log_mul = %.4f\n", __func__, hparams.rope_yarn_log_mul);
71177117 }
71187118
71197119 if (model.arch == LLM_ARCH_QWEN2MOE) {
@@ -22121,32 +22121,6 @@ static int32_t llama_chat_apply_template_internal(
2212122121 if (add_ass) {
2212222122 ss << "<|im_start|>assistant\n";
2212322123 }
22124- } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
22125- // GigaChat template
22126- bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
22127-
22128- // Handle system message if present
22129- if (has_system) {
22130- ss << "<s>" << chat[0]->content << "<|message_sep|>";
22131- } else {
22132- ss << "<s>";
22133- }
22134-
22135- // Process remaining messages
22136- for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
22137- std::string role(chat[i]->role);
22138- if (role == "user") {
22139- ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
22140- << "available functions<|role_sep|>[]<|message_sep|>";
22141- } else if (role == "assistant") {
22142- ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
22143- }
22144- }
22145-
22146- // Add generation prompt if needed
22147- if (add_ass) {
22148- ss << "assistant<|role_sep|>";
22149- }
2215022124 } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7) {
2215122125 // Official mistral 'v7' template
2215222126 // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
@@ -22450,6 +22424,32 @@ static int32_t llama_chat_apply_template_internal(
2245022424 if (add_ass) {
2245122425 ss << "<|start_of_role|>assistant<|end_of_role|>\n";
2245222426 }
22427+ } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
22428+ // GigaChat template
22429+ bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
22430+
22431+ // Handle system message if present
22432+ if (has_system) {
22433+ ss << "<s>" << chat[0]->content << "<|message_sep|>";
22434+ } else {
22435+ ss << "<s>";
22436+ }
22437+
22438+ // Process remaining messages
22439+ for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
22440+ std::string role(chat[i]->role);
22441+ if (role == "user") {
22442+ ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
22443+ << "available functions<|role_sep|>[]<|message_sep|>";
22444+ } else if (role == "assistant") {
22445+ ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
22446+ }
22447+ }
22448+
22449+ // Add generation prompt if needed
22450+ if (add_ass) {
22451+ ss << "assistant<|role_sep|>";
22452+ }
2245322453 } else {
2245422454 // template not supported
2245522455 return -1;
0 commit comments