Skip to content

Commit 63284c2

Browse files
authored
Merge pull request #20 from Thireus/glm-4.5
Glm 4.5
2 parents fca1387 + 9b4139b commit 63284c2

File tree

1 file changed

+12
-22
lines changed

1 file changed

+12
-22
lines changed

src/llama.cpp

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1740,8 +1740,8 @@ enum llm_chat_template {
17401740
LLM_CHAT_TEMPLATE_DEEPSEEK_3,
17411741
LLM_CHAT_TEMPLATE_COMMAND_R,
17421742
LLM_CHAT_TEMPLATE_LLAMA_3,
1743-
LLM_CHAT_TEMPLATE_CHATGML_3,
1744-
LLM_CHAT_TEMPLATE_CHATGML_4,
1743+
LLM_CHAT_TEMPLATE_CHATGLM_3,
1744+
LLM_CHAT_TEMPLATE_CHATGLM_4,
17451745
LLM_CHAT_TEMPLATE_MINICPM,
17461746
LLM_CHAT_TEMPLATE_EXAONE_3,
17471747
LLM_CHAT_TEMPLATE_RWKV_WORLD,
@@ -1781,8 +1781,8 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
17811781
{ "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
17821782
{ "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
17831783
{ "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
1784-
{ "chatglm3", LLM_CHAT_TEMPLATE_CHATGML_3 },
1785-
{ "chatglm4", LLM_CHAT_TEMPLATE_CHATGML_4 },
1784+
{ "chatglm3", LLM_CHAT_TEMPLATE_CHATGLM_3 },
1785+
{ "chatglm4", LLM_CHAT_TEMPLATE_CHATGLM_4 },
17861786
{ "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
17871787
{ "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
17881788
{ "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
@@ -20712,17 +20712,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
2071220712
// - qs.n_attention_wv == 3 * model.hparams.n_layer for Encoder-Decoder models
2071320713
// - model.arch == LLM_ARCH_DECI for Deci-Nemotron models
2071420714
//
20715-
//GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer || model.arch == LLM_ARCH_DECI) && "n_attention_wv is unexpected");
20716-
// allow any count for GLM4-MoE, but still enforce for all others
20717-
if (model.arch != LLM_ARCH_GLM4_MOE) {
20718-
GGML_ASSERT(
20719-
qs.n_attention_wv == 0
20720-
|| qs.n_attention_wv == (int)model.hparams.n_layer
20721-
|| qs.n_attention_wv == 3 * (int)model.hparams.n_layer
20722-
|| model.arch == LLM_ARCH_DECI
20723-
&& "n_attention_wv is unexpected"
20724-
);
20725-
}
20715+
GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer || model.arch == LLM_ARCH_DECI) && "n_attention_wv is unexpected");
2072620716

2072720717
size_t total_size_org = 0;
2072820718
size_t total_size_new = 0;
@@ -23851,6 +23841,11 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) {
2385123841
return LLM_CHAT_TEMPLATE_LLAMA_2;
2385223842
}
2385323843
}
23844+
} else if (tmpl_contains("[gMASK]sop")) {
23845+
// chatglm3-6b
23846+
return LLM_CHAT_TEMPLATE_CHATGLM_3;
23847+
} else if (tmpl_contains("[gMASK]<sop>")) {
23848+
return LLM_CHAT_TEMPLATE_CHATGLM_4;
2385423849
} else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
2385523850
return LLM_CHAT_TEMPLATE_PHI_3;
2385623851
} else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
@@ -23883,11 +23878,6 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) {
2388323878
return LLM_CHAT_TEMPLATE_COMMAND_R;
2388423879
} else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) {
2388523880
return LLM_CHAT_TEMPLATE_LLAMA_3;
23886-
} else if (tmpl_contains("[gMASK]sop")) {
23887-
// chatglm3-6b
23888-
return LLM_CHAT_TEMPLATE_CHATGML_3;
23889-
} else if (tmpl_contains("[gMASK]<sop>")) {
23890-
return LLM_CHAT_TEMPLATE_CHATGML_4;
2389123881
} else if (tmpl_contains(LU8("<用户>"))) {
2389223882
// MiniCPM-3B-OpenHermes-2.5-v2-GGUF
2389323883
return LLM_CHAT_TEMPLATE_MINICPM;
@@ -24170,7 +24160,7 @@ static int32_t llama_chat_apply_template_internal(
2417024160
if (add_ass) {
2417124161
ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
2417224162
}
24173-
} else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_3) {
24163+
} else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_3) {
2417424164
// chatglm3-6b
2417524165
ss << "[gMASK]" << "sop";
2417624166
for (auto message : chat) {
@@ -24180,7 +24170,7 @@ static int32_t llama_chat_apply_template_internal(
2418024170
if (add_ass) {
2418124171
ss << "<|assistant|>";
2418224172
}
24183-
} else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_4) {
24173+
} else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) {
2418424174
ss << "[gMASK]" << "<sop>";
2418524175
for (auto message : chat) {
2418624176
std::string role(message->role);

0 commit comments

Comments
 (0)