@@ -238,8 +238,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
238238    { LLM_ARCH_OLMOE,           "olmoe"        },
239239    { LLM_ARCH_OPENELM,         "openelm"      },
240240    { LLM_ARCH_ARCTIC,          "arctic"       },
241-     { LLM_ARCH_DEEPSEEK2,       "deepseek2"    },
242241    { LLM_ARCH_DEEPSEEK,        "deepseek"     },
242+     { LLM_ARCH_DEEPSEEK2,       "deepseek2"    },
243243    { LLM_ARCH_CHATGLM,         "chatglm"      },
244244    { LLM_ARCH_BITNET,          "bitnet"       },
245245    { LLM_ARCH_T5,              "t5"           },
@@ -1291,25 +1291,23 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
12911291        },
12921292    },
12931293    {
1294-         LLM_ARCH_DEEPSEEK2 ,
1294+         LLM_ARCH_DEEPSEEK ,
12951295        {
12961296            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
12971297            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
12981298            { LLM_TENSOR_OUTPUT,             "output" },
1299+             { LLM_TENSOR_ROPE_FREQS,         "rope_freqs" },
12991300            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
1300-             { LLM_TENSOR_ATTN_Q_A_NORM,      "blk.%d.attn_q_a_norm" },
1301-             { LLM_TENSOR_ATTN_KV_A_NORM,     "blk.%d.attn_kv_a_norm" },
13021301            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
1303-             { LLM_TENSOR_ATTN_Q_A,           "blk.%d.attn_q_a" },
1304-             { LLM_TENSOR_ATTN_Q_B,           "blk.%d.attn_q_b" },
1305-             { LLM_TENSOR_ATTN_KV_A_MQA,      "blk.%d.attn_kv_a_mqa" },
1306-             { LLM_TENSOR_ATTN_KV_B,          "blk.%d.attn_kv_b" },
1302+             { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
1303+             { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
13071304            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
1305+             { LLM_TENSOR_ATTN_ROT_EMBD,      "blk.%d.attn_rot_embd" },
1306+             { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
13081307            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
13091308            { LLM_TENSOR_FFN_GATE,           "blk.%d.ffn_gate" },
1310-             { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" },
13111309            { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" },
1312-             { LLM_TENSOR_FFN_GATE_INP ,       "blk.%d.ffn_gate_inp " },
1310+             { LLM_TENSOR_FFN_UP ,              "blk.%d.ffn_up " },
13131311            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
13141312            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
13151313            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
@@ -1320,23 +1318,25 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
13201318        },
13211319    },
13221320    {
1323-         LLM_ARCH_DEEPSEEK ,
1321+         LLM_ARCH_DEEPSEEK2 ,
13241322        {
13251323            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
13261324            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
13271325            { LLM_TENSOR_OUTPUT,             "output" },
1328-             { LLM_TENSOR_ROPE_FREQS,         "rope_freqs" },
13291326            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
1327+             { LLM_TENSOR_ATTN_Q_A_NORM,      "blk.%d.attn_q_a_norm" },
1328+             { LLM_TENSOR_ATTN_KV_A_NORM,     "blk.%d.attn_kv_a_norm" },
13301329            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
1331-             { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
1332-             { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
1330+             { LLM_TENSOR_ATTN_Q_A,           "blk.%d.attn_q_a" },
1331+             { LLM_TENSOR_ATTN_Q_B,           "blk.%d.attn_q_b" },
1332+             { LLM_TENSOR_ATTN_KV_A_MQA,      "blk.%d.attn_kv_a_mqa" },
1333+             { LLM_TENSOR_ATTN_KV_B,          "blk.%d.attn_kv_b" },
13331334            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
1334-             { LLM_TENSOR_ATTN_ROT_EMBD,      "blk.%d.attn_rot_embd" },
1335-             { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
13361335            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
13371336            { LLM_TENSOR_FFN_GATE,           "blk.%d.ffn_gate" },
1338-             { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" },
13391337            { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" },
1338+             { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" },
1339+             { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
13401340            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
13411341            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
13421342            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
@@ -6088,36 +6088,36 @@ static void llm_load_hparams(
60886088                    model.type = e_model::MODEL_UNKNOWN;
60896089                }
60906090            } break;
6091-         case LLM_ARCH_DEEPSEEK2 :
6091+         case LLM_ARCH_DEEPSEEK :
60926092            {
6093-                 bool is_lite = (hparams.n_layer == 27);
60946093                ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
60956094                ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
6096-                 if (!is_lite) {
6097-                     ml.get_key(LLM_KV_ATTENTION_Q_LORA_RANK, hparams.n_lora_q);
6098-                 }
6099-                 ml.get_key(LLM_KV_ATTENTION_KV_LORA_RANK, hparams.n_lora_kv);
61006095                ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
61016096                ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
61026097                ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
6103-                 ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul);
61046098
61056099                switch (hparams.n_layer) {
6106-                     case 27: model.type = e_model::MODEL_16B; break;
6107-                     case 60: model.type = e_model::MODEL_236B; break;
6100+                     case 28: model.type = e_model::MODEL_20B; break;
61086101                    default: model.type = e_model::MODEL_UNKNOWN;
61096102                }
61106103            } break;
6111-         case LLM_ARCH_DEEPSEEK :
6104+         case LLM_ARCH_DEEPSEEK2 :
61126105            {
6106+                 bool is_lite = (hparams.n_layer == 27);
61136107                ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
61146108                ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
6109+                 if (!is_lite) {
6110+                     ml.get_key(LLM_KV_ATTENTION_Q_LORA_RANK, hparams.n_lora_q);
6111+                 }
6112+                 ml.get_key(LLM_KV_ATTENTION_KV_LORA_RANK, hparams.n_lora_kv);
61156113                ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
61166114                ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
61176115                ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
6116+                 ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul);
61186117
61196118                switch (hparams.n_layer) {
6120-                     case 28: model.type = e_model::MODEL_20B; break;
6119+                     case 27: model.type = e_model::MODEL_16B; break;
6120+                     case 60: model.type = e_model::MODEL_236B; break;
61216121                    default: model.type = e_model::MODEL_UNKNOWN;
61226122                }
61236123            } break;
@@ -7099,21 +7099,21 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
70997099
71007100    LLAMA_LOG_INFO("%s: max token length = %d\n", __func__, vocab.max_token_len);
71017101
7102-     if (model.arch == LLM_ARCH_DEEPSEEK2 ) {
7102+     if (model.arch == LLM_ARCH_DEEPSEEK ) {
71037103        LLAMA_LOG_INFO("%s: n_layer_dense_lead   = %d\n",     __func__, hparams.n_layer_dense_lead);
7104-         LLAMA_LOG_INFO("%s: n_lora_q             = %d\n",     __func__, hparams.n_lora_q);
7105-         LLAMA_LOG_INFO("%s: n_lora_kv            = %d\n",     __func__, hparams.n_lora_kv);
71067104        LLAMA_LOG_INFO("%s: n_ff_exp             = %d\n",     __func__, hparams.n_ff_exp);
71077105        LLAMA_LOG_INFO("%s: n_expert_shared      = %d\n",     __func__, hparams.n_expert_shared);
71087106        LLAMA_LOG_INFO("%s: expert_weights_scale = %.1f\n",   __func__, hparams.expert_weights_scale);
7109-         LLAMA_LOG_INFO("%s: rope_yarn_log_mul    = %.4f\n",   __func__, hparams.rope_yarn_log_mul);
71107107    }
71117108
7112-     if (model.arch == LLM_ARCH_DEEPSEEK ) {
7109+     if (model.arch == LLM_ARCH_DEEPSEEK2 ) {
71137110        LLAMA_LOG_INFO("%s: n_layer_dense_lead   = %d\n",     __func__, hparams.n_layer_dense_lead);
7111+         LLAMA_LOG_INFO("%s: n_lora_q             = %d\n",     __func__, hparams.n_lora_q);
7112+         LLAMA_LOG_INFO("%s: n_lora_kv            = %d\n",     __func__, hparams.n_lora_kv);
71147113        LLAMA_LOG_INFO("%s: n_ff_exp             = %d\n",     __func__, hparams.n_ff_exp);
71157114        LLAMA_LOG_INFO("%s: n_expert_shared      = %d\n",     __func__, hparams.n_expert_shared);
71167115        LLAMA_LOG_INFO("%s: expert_weights_scale = %.1f\n",   __func__, hparams.expert_weights_scale);
7116+         LLAMA_LOG_INFO("%s: rope_yarn_log_mul    = %.4f\n",   __func__, hparams.rope_yarn_log_mul);
71177117    }
71187118
71197119    if (model.arch == LLM_ARCH_QWEN2MOE) {
@@ -22121,32 +22121,6 @@ static int32_t llama_chat_apply_template_internal(
2212122121        if (add_ass) {
2212222122            ss << "<|im_start|>assistant\n";
2212322123        }
22124-     } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
22125-         // GigaChat template
22126-         bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
22127- 
22128-         // Handle system message if present
22129-         if (has_system) {
22130-             ss << "<s>" << chat[0]->content << "<|message_sep|>";
22131-         } else {
22132-             ss << "<s>";
22133-         }
22134- 
22135-         // Process remaining messages
22136-         for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
22137-             std::string role(chat[i]->role);
22138-             if (role == "user") {
22139-                 ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
22140-                 << "available functions<|role_sep|>[]<|message_sep|>";
22141-             } else if (role == "assistant") {
22142-                 ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
22143-             }
22144-         }
22145- 
22146-         // Add generation prompt if needed
22147-         if (add_ass) {
22148-             ss << "assistant<|role_sep|>";
22149-         }
2215022124    } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7) {
2215122125        // Official mistral 'v7' template
2215222126        // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
@@ -22450,6 +22424,32 @@ static int32_t llama_chat_apply_template_internal(
2245022424        if (add_ass) {
2245122425            ss << "<|start_of_role|>assistant<|end_of_role|>\n";
2245222426        }
22427+     } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
22428+         // GigaChat template
22429+         bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
22430+ 
22431+         // Handle system message if present
22432+         if (has_system) {
22433+             ss << "<s>" << chat[0]->content << "<|message_sep|>";
22434+         } else {
22435+             ss << "<s>";
22436+         }
22437+ 
22438+         // Process remaining messages
22439+         for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
22440+             std::string role(chat[i]->role);
22441+             if (role == "user") {
22442+                 ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
22443+                 << "available functions<|role_sep|>[]<|message_sep|>";
22444+             } else if (role == "assistant") {
22445+                 ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
22446+             }
22447+         }
22448+ 
22449+         // Add generation prompt if needed
22450+         if (add_ass) {
22451+             ss << "assistant<|role_sep|>";
22452+         }
2245322453    } else {
2245422454        // template not supported
2245522455        return -1;
0 commit comments