@@ -1740,8 +1740,8 @@ enum llm_chat_template {
17401740 LLM_CHAT_TEMPLATE_DEEPSEEK_3,
17411741 LLM_CHAT_TEMPLATE_COMMAND_R,
17421742 LLM_CHAT_TEMPLATE_LLAMA_3,
1743- LLM_CHAT_TEMPLATE_CHATGML_3 ,
1744- LLM_CHAT_TEMPLATE_CHATGML_4 ,
1743+ LLM_CHAT_TEMPLATE_CHATGLM_3 ,
1744+ LLM_CHAT_TEMPLATE_CHATGLM_4 ,
17451745 LLM_CHAT_TEMPLATE_MINICPM,
17461746 LLM_CHAT_TEMPLATE_EXAONE_3,
17471747 LLM_CHAT_TEMPLATE_RWKV_WORLD,
@@ -1781,8 +1781,8 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
17811781 { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
17821782 { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
17831783 { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
1784- { "chatglm3", LLM_CHAT_TEMPLATE_CHATGML_3 },
1785- { "chatglm4", LLM_CHAT_TEMPLATE_CHATGML_4 },
1784+ { "chatglm3", LLM_CHAT_TEMPLATE_CHATGLM_3 },
1785+ { "chatglm4", LLM_CHAT_TEMPLATE_CHATGLM_4 },
17861786 { "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
17871787 { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
17881788 { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
@@ -20712,17 +20712,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
2071220712 // - qs.n_attention_wv == 3 * model.hparams.n_layer for Encoder-Decoder models
2071320713 // - model.arch == LLM_ARCH_DECI for Deci-Nemotron models
2071420714 //
20715- //GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer || model.arch == LLM_ARCH_DECI) && "n_attention_wv is unexpected");
20716- // allow any count for GLM4-MoE, but still enforce for all others
20717- if (model.arch != LLM_ARCH_GLM4_MOE) {
20718- GGML_ASSERT(
20719- qs.n_attention_wv == 0
20720- || qs.n_attention_wv == (int)model.hparams.n_layer
20721- || qs.n_attention_wv == 3 * (int)model.hparams.n_layer
20722- || model.arch == LLM_ARCH_DECI
20723- && "n_attention_wv is unexpected"
20724- );
20725- }
20715+ GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer || model.arch == LLM_ARCH_DECI) && "n_attention_wv is unexpected");
2072620716
2072720717 size_t total_size_org = 0;
2072820718 size_t total_size_new = 0;
@@ -23851,6 +23841,11 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) {
2385123841 return LLM_CHAT_TEMPLATE_LLAMA_2;
2385223842 }
2385323843 }
23844+ } else if (tmpl_contains("[gMASK]sop")) {
23845+ // chatglm3-6b
23846+ return LLM_CHAT_TEMPLATE_CHATGLM_3;
23847+ } else if (tmpl_contains("[gMASK]<sop>")) {
23848+ return LLM_CHAT_TEMPLATE_CHATGLM_4;
2385423849 } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
2385523850 return LLM_CHAT_TEMPLATE_PHI_3;
2385623851 } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
@@ -23883,11 +23878,6 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) {
2388323878 return LLM_CHAT_TEMPLATE_COMMAND_R;
2388423879 } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) {
2388523880 return LLM_CHAT_TEMPLATE_LLAMA_3;
23886- } else if (tmpl_contains("[gMASK]sop")) {
23887- // chatglm3-6b
23888- return LLM_CHAT_TEMPLATE_CHATGML_3;
23889- } else if (tmpl_contains("[gMASK]<sop>")) {
23890- return LLM_CHAT_TEMPLATE_CHATGML_4;
2389123881 } else if (tmpl_contains(LU8("<用户>"))) {
2389223882 // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
2389323883 return LLM_CHAT_TEMPLATE_MINICPM;
@@ -24170,7 +24160,7 @@ static int32_t llama_chat_apply_template_internal(
2417024160 if (add_ass) {
2417124161 ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
2417224162 }
24173- } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_3 ) {
24163+ } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_3 ) {
2417424164 // chatglm3-6b
2417524165 ss << "[gMASK]" << "sop";
2417624166 for (auto message : chat) {
@@ -24180,7 +24170,7 @@ static int32_t llama_chat_apply_template_internal(
2418024170 if (add_ass) {
2418124171 ss << "<|assistant|>";
2418224172 }
24183- } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_4 ) {
24173+ } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4 ) {
2418424174 ss << "[gMASK]" << "<sop>";
2418524175 for (auto message : chat) {
2418624176 std::string role(message->role);
0 commit comments