File tree Expand file tree Collapse file tree 4 files changed +14
-4
lines changed Expand file tree Collapse file tree 4 files changed +14
-4
lines changed Original file line number Diff line number Diff line change @@ -66,6 +66,7 @@ enum llm_arch {
6666 LLM_ARCH_DEEPSEEK2,
6767 LLM_ARCH_CHATGLM,
6868 LLM_ARCH_GLM4,
69+ LLM_ATCH_GLM4_MOE,
6970 LLM_ARCH_BITNET,
7071 LLM_ARCH_T5,
7172 LLM_ARCH_T5ENCODER,
Original file line number Diff line number Diff line change @@ -749,8 +749,10 @@ ggml_tensor * llm_graph_context::build_ffn(
749749
750750 if (down) {
751751 cur = build_lora_mm (down, cur);
752- if (arch == LLM_ARCH_GLM4) {
753- // GLM4 seems to have numerical issues with half-precision accumulators
752+ if (arch == LLM_ARCH_GLM4 || arch == LLM_ARCH_GLM4_MOE) {
753+ // GLM4 FFNs seem to have numerical issues with half-precision accumulators
754+ // -- ref: https://github.com/ggml-org/llama.cpp/pull/13101
755+ // (GLM4_MOE uses some GLM4 FFNs, so we need to match it too)
754756 ggml_mul_mat_set_prec (cur, GGML_PREC_F32);
755757 }
756758 }
@@ -1391,8 +1393,10 @@ ggml_tensor * llm_graph_context::build_attn(
13911393
13921394 if (wo) {
13931395 cur = build_lora_mm (wo, cur);
1394- if (arch == LLM_ARCH_GLM4) {
1395- // GLM4 seems to have numerical issues with half-precision accumulators
1396+ if (arch == LLM_ARCH_GLM4 || arch == LLM_ARCH_GLM4_MOE) {
1397+ // GLM4 FFNs seem to have numerical issues with half-precision accumulators
1398+ // -- ref: https://github.com/ggml-org/llama.cpp/pull/13101
1399+ // (GLM4_MOE uses some GLM4 FFNs, so we need to match it too)
13961400 ggml_mul_mat_set_prec (cur, GGML_PREC_F32);
13971401 }
13981402 }
Original file line number Diff line number Diff line change @@ -111,6 +111,8 @@ const char * llm_type_name(llm_type type) {
111111 case LLM_TYPE_30B_A3B: return "30B.A3B";
112112 case LLM_TYPE_235B_A22B: return "235B.A22B";
113113 case LLM_TYPE_300B_A47B: return "300B.A47B";
114+ case LLM_TYPE_355B_A32B: return "355B.A32B (GLM-4.5)";
115+ case LLM_TYPE_106B_A12B: return "106B.A12B (GLM-4.5)";
114116 case LLM_TYPE_E2B: return "E2B";
115117 case LLM_TYPE_E4B: return "E4B";
116118 default: return "?B";
@@ -18153,6 +18155,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
1815318155 case LLM_ARCH_PLM:
1815418156 case LLM_ARCH_CHATGLM:
1815518157 case LLM_ARCH_GLM4:
18158+ case LLM_ARCH_GLM4_MOE:
1815618159 case LLM_ARCH_GRANITE:
1815718160 case LLM_ARCH_GRANITE_MOE:
1815818161 case LLM_ARCH_GRANITE_HYBRID:
Original file line number Diff line number Diff line change @@ -103,6 +103,8 @@ enum llm_type {
103103 LLM_TYPE_30B_A3B,
104104 LLM_TYPE_235B_A22B,
105105 LLM_TYPE_300B_A47B, // Ernie MoE big
106+ LLM_TYPE_355B_A32B, // GLM-4.5
107+ LLM_TYPE_106B_A12B, // GLM-4.5-Air
106108 LLM_TYPE_E2B,
107109 LLM_TYPE_E4B,
108110};
You can’t perform that action at this time.
0 commit comments