Skip to content

Commit 41a235b

Browse files
committed
Fix chat template detection
Changes suggested by @ubergarm - ikawrakow#668 (comment)
1 parent d97ebef commit 41a235b

File tree

1 file changed

+22
-12
lines changed

1 file changed

+22
-12
lines changed

src/llama.cpp

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1740,8 +1740,8 @@ enum llm_chat_template {
17401740
LLM_CHAT_TEMPLATE_DEEPSEEK_3,
17411741
LLM_CHAT_TEMPLATE_COMMAND_R,
17421742
LLM_CHAT_TEMPLATE_LLAMA_3,
1743-
LLM_CHAT_TEMPLATE_CHATGML_3,
1744-
LLM_CHAT_TEMPLATE_CHATGML_4,
1743+
LLM_CHAT_TEMPLATE_CHATGLM_3,
1744+
LLM_CHAT_TEMPLATE_CHATGLM_4,
17451745
LLM_CHAT_TEMPLATE_MINICPM,
17461746
LLM_CHAT_TEMPLATE_EXAONE_3,
17471747
LLM_CHAT_TEMPLATE_RWKV_WORLD,
@@ -1781,8 +1781,8 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
17811781
{ "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
17821782
{ "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
17831783
{ "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
1784-
{ "chatglm3", LLM_CHAT_TEMPLATE_CHATGML_3 },
1785-
{ "chatglm4", LLM_CHAT_TEMPLATE_CHATGML_4 },
1784+
{ "chatglm3", LLM_CHAT_TEMPLATE_CHATGLM_3 },
1785+
{ "chatglm4", LLM_CHAT_TEMPLATE_CHATGLM_4 },
17861786
{ "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
17871787
{ "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
17881788
{ "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
@@ -20712,7 +20712,17 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
2071220712
// - qs.n_attention_wv == 3 * model.hparams.n_layer for Encoder-Decoder models
2071320713
// - model.arch == LLM_ARCH_DECI for Deci-Nemotron models
2071420714
//
20715-
GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer || model.arch == LLM_ARCH_DECI) && "n_attention_wv is unexpected");
20715+
//GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer || model.arch == LLM_ARCH_DECI) && "n_attention_wv is unexpected");
20716+
// allow any count for GLM4-MoE, but still enforce for all others
20717+
if (model.arch != LLM_ARCH_GLM4_MOE) {
20718+
GGML_ASSERT(
20719+
qs.n_attention_wv == 0
20720+
|| qs.n_attention_wv == (int)model.hparams.n_layer
20721+
|| qs.n_attention_wv == 3 * (int)model.hparams.n_layer
20722+
|| model.arch == LLM_ARCH_DECI
20723+
&& "n_attention_wv is unexpected"
20724+
);
20725+
}
2071620726

2071720727
size_t total_size_org = 0;
2071820728
size_t total_size_new = 0;
@@ -23841,6 +23851,11 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) {
2384123851
return LLM_CHAT_TEMPLATE_LLAMA_2;
2384223852
}
2384323853
}
23854+
} else if (tmpl_contains("[gMASK]sop")) {
23855+
// chatglm3-6b
23856+
return LLM_CHAT_TEMPLATE_CHATGLM_3;
23857+
} else if (tmpl_contains("[gMASK]<sop>")) {
23858+
return LLM_CHAT_TEMPLATE_CHATGLM_4;
2384423859
} else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
2384523860
return LLM_CHAT_TEMPLATE_PHI_3;
2384623861
} else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
@@ -23873,11 +23888,6 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) {
2387323888
return LLM_CHAT_TEMPLATE_COMMAND_R;
2387423889
} else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) {
2387523890
return LLM_CHAT_TEMPLATE_LLAMA_3;
23876-
} else if (tmpl_contains("[gMASK]sop")) {
23877-
// chatglm3-6b
23878-
return LLM_CHAT_TEMPLATE_CHATGML_3;
23879-
} else if (tmpl_contains("[gMASK]<sop>")) {
23880-
return LLM_CHAT_TEMPLATE_CHATGML_4;
2388123891
} else if (tmpl_contains(LU8("<用户>"))) {
2388223892
// MiniCPM-3B-OpenHermes-2.5-v2-GGUF
2388323893
return LLM_CHAT_TEMPLATE_MINICPM;
@@ -24160,7 +24170,7 @@ static int32_t llama_chat_apply_template_internal(
2416024170
if (add_ass) {
2416124171
ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
2416224172
}
24163-
} else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_3) {
24173+
} else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_3) {
2416424174
// chatglm3-6b
2416524175
ss << "[gMASK]" << "sop";
2416624176
for (auto message : chat) {
@@ -24170,7 +24180,7 @@ static int32_t llama_chat_apply_template_internal(
2417024180
if (add_ass) {
2417124181
ss << "<|assistant|>";
2417224182
}
24173-
} else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_4) {
24183+
} else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) {
2417424184
ss << "[gMASK]" << "<sop>";
2417524185
for (auto message : chat) {
2417624186
std::string role(message->role);

0 commit comments

Comments
 (0)