Skip to content

Commit b32159c

Browse files
committed
fix order of deepseek and deepseek2, move gigachat temlate to the end of func
1 parent 66e59b0 commit b32159c

File tree

1 file changed

+59
-59
lines changed

1 file changed

+59
-59
lines changed

src/llama.cpp

Lines changed: 59 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -238,8 +238,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
238238
{ LLM_ARCH_OLMOE, "olmoe" },
239239
{ LLM_ARCH_OPENELM, "openelm" },
240240
{ LLM_ARCH_ARCTIC, "arctic" },
241-
{ LLM_ARCH_DEEPSEEK2, "deepseek2" },
242241
{ LLM_ARCH_DEEPSEEK, "deepseek" },
242+
{ LLM_ARCH_DEEPSEEK2, "deepseek2" },
243243
{ LLM_ARCH_CHATGLM, "chatglm" },
244244
{ LLM_ARCH_BITNET, "bitnet" },
245245
{ LLM_ARCH_T5, "t5" },
@@ -1291,25 +1291,23 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
12911291
},
12921292
},
12931293
{
1294-
LLM_ARCH_DEEPSEEK2,
1294+
LLM_ARCH_DEEPSEEK,
12951295
{
12961296
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
12971297
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
12981298
{ LLM_TENSOR_OUTPUT, "output" },
1299+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
12991300
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1300-
{ LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
1301-
{ LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
13021301
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1303-
{ LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
1304-
{ LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
1305-
{ LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
1306-
{ LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
1302+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1303+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
13071304
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1305+
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1306+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
13081307
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
13091308
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1310-
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
13111309
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1312-
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1310+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
13131311
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
13141312
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
13151313
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
@@ -1320,23 +1318,25 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
13201318
},
13211319
},
13221320
{
1323-
LLM_ARCH_DEEPSEEK,
1321+
LLM_ARCH_DEEPSEEK2,
13241322
{
13251323
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
13261324
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
13271325
{ LLM_TENSOR_OUTPUT, "output" },
1328-
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
13291326
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1327+
{ LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
1328+
{ LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
13301329
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1331-
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1332-
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1330+
{ LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
1331+
{ LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
1332+
{ LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
1333+
{ LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
13331334
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1334-
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1335-
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
13361335
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
13371336
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1338-
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
13391337
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1338+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1339+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
13401340
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
13411341
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
13421342
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
@@ -6088,36 +6088,36 @@ static void llm_load_hparams(
60886088
model.type = e_model::MODEL_UNKNOWN;
60896089
}
60906090
} break;
6091-
case LLM_ARCH_DEEPSEEK2:
6091+
case LLM_ARCH_DEEPSEEK:
60926092
{
6093-
bool is_lite = (hparams.n_layer == 27);
60946093
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
60956094
ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
6096-
if (!is_lite) {
6097-
ml.get_key(LLM_KV_ATTENTION_Q_LORA_RANK, hparams.n_lora_q);
6098-
}
6099-
ml.get_key(LLM_KV_ATTENTION_KV_LORA_RANK, hparams.n_lora_kv);
61006095
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
61016096
ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
61026097
ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
6103-
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul);
61046098

61056099
switch (hparams.n_layer) {
6106-
case 27: model.type = e_model::MODEL_16B; break;
6107-
case 60: model.type = e_model::MODEL_236B; break;
6100+
case 28: model.type = e_model::MODEL_20B; break;
61086101
default: model.type = e_model::MODEL_UNKNOWN;
61096102
}
61106103
} break;
6111-
case LLM_ARCH_DEEPSEEK:
6104+
case LLM_ARCH_DEEPSEEK2:
61126105
{
6106+
bool is_lite = (hparams.n_layer == 27);
61136107
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
61146108
ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
6109+
if (!is_lite) {
6110+
ml.get_key(LLM_KV_ATTENTION_Q_LORA_RANK, hparams.n_lora_q);
6111+
}
6112+
ml.get_key(LLM_KV_ATTENTION_KV_LORA_RANK, hparams.n_lora_kv);
61156113
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
61166114
ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
61176115
ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
6116+
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul);
61186117

61196118
switch (hparams.n_layer) {
6120-
case 28: model.type = e_model::MODEL_20B; break;
6119+
case 27: model.type = e_model::MODEL_16B; break;
6120+
case 60: model.type = e_model::MODEL_236B; break;
61216121
default: model.type = e_model::MODEL_UNKNOWN;
61226122
}
61236123
} break;
@@ -7099,21 +7099,21 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
70997099

71007100
LLAMA_LOG_INFO("%s: max token length = %d\n", __func__, vocab.max_token_len);
71017101

7102-
if (model.arch == LLM_ARCH_DEEPSEEK2) {
7102+
if (model.arch == LLM_ARCH_DEEPSEEK) {
71037103
LLAMA_LOG_INFO("%s: n_layer_dense_lead = %d\n", __func__, hparams.n_layer_dense_lead);
7104-
LLAMA_LOG_INFO("%s: n_lora_q = %d\n", __func__, hparams.n_lora_q);
7105-
LLAMA_LOG_INFO("%s: n_lora_kv = %d\n", __func__, hparams.n_lora_kv);
71067104
LLAMA_LOG_INFO("%s: n_ff_exp = %d\n", __func__, hparams.n_ff_exp);
71077105
LLAMA_LOG_INFO("%s: n_expert_shared = %d\n", __func__, hparams.n_expert_shared);
71087106
LLAMA_LOG_INFO("%s: expert_weights_scale = %.1f\n", __func__, hparams.expert_weights_scale);
7109-
LLAMA_LOG_INFO("%s: rope_yarn_log_mul = %.4f\n", __func__, hparams.rope_yarn_log_mul);
71107107
}
71117108

7112-
if (model.arch == LLM_ARCH_DEEPSEEK) {
7109+
if (model.arch == LLM_ARCH_DEEPSEEK2) {
71137110
LLAMA_LOG_INFO("%s: n_layer_dense_lead = %d\n", __func__, hparams.n_layer_dense_lead);
7111+
LLAMA_LOG_INFO("%s: n_lora_q = %d\n", __func__, hparams.n_lora_q);
7112+
LLAMA_LOG_INFO("%s: n_lora_kv = %d\n", __func__, hparams.n_lora_kv);
71147113
LLAMA_LOG_INFO("%s: n_ff_exp = %d\n", __func__, hparams.n_ff_exp);
71157114
LLAMA_LOG_INFO("%s: n_expert_shared = %d\n", __func__, hparams.n_expert_shared);
71167115
LLAMA_LOG_INFO("%s: expert_weights_scale = %.1f\n", __func__, hparams.expert_weights_scale);
7116+
LLAMA_LOG_INFO("%s: rope_yarn_log_mul = %.4f\n", __func__, hparams.rope_yarn_log_mul);
71177117
}
71187118

71197119
if (model.arch == LLM_ARCH_QWEN2MOE) {
@@ -22121,32 +22121,6 @@ static int32_t llama_chat_apply_template_internal(
2212122121
if (add_ass) {
2212222122
ss << "<|im_start|>assistant\n";
2212322123
}
22124-
} else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
22125-
// GigaChat template
22126-
bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
22127-
22128-
// Handle system message if present
22129-
if (has_system) {
22130-
ss << "<s>" << chat[0]->content << "<|message_sep|>";
22131-
} else {
22132-
ss << "<s>";
22133-
}
22134-
22135-
// Process remaining messages
22136-
for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
22137-
std::string role(chat[i]->role);
22138-
if (role == "user") {
22139-
ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
22140-
<< "available functions<|role_sep|>[]<|message_sep|>";
22141-
} else if (role == "assistant") {
22142-
ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
22143-
}
22144-
}
22145-
22146-
// Add generation prompt if needed
22147-
if (add_ass) {
22148-
ss << "assistant<|role_sep|>";
22149-
}
2215022124
} else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7) {
2215122125
// Official mistral 'v7' template
2215222126
// See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
@@ -22450,6 +22424,32 @@ static int32_t llama_chat_apply_template_internal(
2245022424
if (add_ass) {
2245122425
ss << "<|start_of_role|>assistant<|end_of_role|>\n";
2245222426
}
22427+
} else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
22428+
// GigaChat template
22429+
bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
22430+
22431+
// Handle system message if present
22432+
if (has_system) {
22433+
ss << "<s>" << chat[0]->content << "<|message_sep|>";
22434+
} else {
22435+
ss << "<s>";
22436+
}
22437+
22438+
// Process remaining messages
22439+
for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
22440+
std::string role(chat[i]->role);
22441+
if (role == "user") {
22442+
ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
22443+
<< "available functions<|role_sep|>[]<|message_sep|>";
22444+
} else if (role == "assistant") {
22445+
ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
22446+
}
22447+
}
22448+
22449+
// Add generation prompt if needed
22450+
if (add_ass) {
22451+
ss << "assistant<|role_sep|>";
22452+
}
2245322453
} else {
2245422454
// template not supported
2245522455
return -1;

0 commit comments

Comments
 (0)