Skip to content

Commit 2586ae5

Browse files
committed
initial GLM-4.5 integration
1 parent 69d1c58 commit 2586ae5

File tree

4 files changed

+14
-4
lines changed

4 files changed

+14
-4
lines changed

src/llama-arch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ enum llm_arch {
6666
LLM_ARCH_DEEPSEEK2,
6767
LLM_ARCH_CHATGLM,
6868
LLM_ARCH_GLM4,
69+
LLM_ATCH_GLM4_MOE,
6970
LLM_ARCH_BITNET,
7071
LLM_ARCH_T5,
7172
LLM_ARCH_T5ENCODER,

src/llama-graph.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -749,8 +749,10 @@ ggml_tensor * llm_graph_context::build_ffn(
749749

750750
if (down) {
751751
cur = build_lora_mm(down, cur);
752-
if (arch == LLM_ARCH_GLM4) {
753-
// GLM4 seems to have numerical issues with half-precision accumulators
752+
if (arch == LLM_ARCH_GLM4 || arch == LLM_ARCH_GLM4_MOE) {
753+
// GLM4 FFNs seem to have numerical issues with half-precision accumulators
754+
// -- ref: https://github.com/ggml-org/llama.cpp/pull/13101
755+
// (GLM4_MOE uses some GLM4 FFNs, so we need to match it too)
754756
ggml_mul_mat_set_prec(cur, GGML_PREC_F32);
755757
}
756758
}
@@ -1391,8 +1393,10 @@ ggml_tensor * llm_graph_context::build_attn(
13911393

13921394
if (wo) {
13931395
cur = build_lora_mm(wo, cur);
1394-
if (arch == LLM_ARCH_GLM4) {
1395-
// GLM4 seems to have numerical issues with half-precision accumulators
1396+
if (arch == LLM_ARCH_GLM4 || arch == LLM_ARCH_GLM4_MOE) {
1397+
// GLM4 FFNs seem to have numerical issues with half-precision accumulators
1398+
// -- ref: https://github.com/ggml-org/llama.cpp/pull/13101
1399+
// (GLM4_MOE uses some GLM4 FFNs, so we need to match it too)
13961400
ggml_mul_mat_set_prec(cur, GGML_PREC_F32);
13971401
}
13981402
}

src/llama-model.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ const char * llm_type_name(llm_type type) {
111111
case LLM_TYPE_30B_A3B: return "30B.A3B";
112112
case LLM_TYPE_235B_A22B: return "235B.A22B";
113113
case LLM_TYPE_300B_A47B: return "300B.A47B";
114+
case LLM_TYPE_355B_A32B: return "355B.A32B (GLM-4.5)";
115+
case LLM_TYPE_106B_A12B: return "106B.A12B (GLM-4.5)";
114116
case LLM_TYPE_E2B: return "E2B";
115117
case LLM_TYPE_E4B: return "E4B";
116118
default: return "?B";
@@ -18153,6 +18155,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
1815318155
case LLM_ARCH_PLM:
1815418156
case LLM_ARCH_CHATGLM:
1815518157
case LLM_ARCH_GLM4:
18158+
case LLM_ARCH_GLM4_MOE:
1815618159
case LLM_ARCH_GRANITE:
1815718160
case LLM_ARCH_GRANITE_MOE:
1815818161
case LLM_ARCH_GRANITE_HYBRID:

src/llama-model.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ enum llm_type {
103103
LLM_TYPE_30B_A3B,
104104
LLM_TYPE_235B_A22B,
105105
LLM_TYPE_300B_A47B, // Ernie MoE big
106+
LLM_TYPE_355B_A32B, // GLM-4.5
107+
LLM_TYPE_106B_A12B, // GLM-4.5-Air
106108
LLM_TYPE_E2B,
107109
LLM_TYPE_E4B,
108110
};

0 commit comments

Comments
 (0)