Skip to content

Commit 7c8fc01

Browse files
sammcjCISC
andauthored
model: glm 4.5 apply suggestions from code review
Co-authored-by: Sigbjørn Skjæret <[email protected]>
1 parent c429c1a commit 7c8fc01

File tree

8 files changed

+27
-27
lines changed

8 files changed

+27
-27
lines changed

convert_hf_to_gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6644,7 +6644,7 @@ def set_gguf_parameters(self):
66446644

66456645
# NextN/MTP prediction layers
66466646
if (num_nextn_predict_layers := self.hparams.get("num_nextn_predict_layers")) is not None:
6647-
self.gguf_writer.add_num_nextn_predict_layers(num_nextn_predict_layers)
6647+
self.gguf_writer.add_nextn_predict_layers(num_nextn_predict_layers)
66486648

66496649
_experts: list[dict[str, Tensor]] | None = None
66506650

gguf-py/gguf/constants.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ class LLM:
105105
EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
106106
EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
107107
MOE_EVERY_N_LAYERS = "{arch}.moe_every_n_layers"
108-
NUM_NEXTN_PREDICT_LAYERS = "{arch}.num_nextn_predict_layers"
108+
NEXTN_PREDICT_LAYERS = "{arch}.num_nextn_predict_layers"
109109
POOLING_TYPE = "{arch}.pooling_type"
110110
LOGIT_SCALE = "{arch}.logit_scale"
111111
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
@@ -940,12 +940,12 @@ class MODEL_TENSOR(IntEnum):
940940
MODEL_TENSOR.A_MM_NORM_PRE: "mm.a.norm_pre",
941941
MODEL_TENSOR.A_MM_NORM_MID: "mm.a.norm_mid",
942942
# NextN/MTP
943-
MODEL_TENSOR.NEXTN_EH_PROJ: "blk.{bid}.eh_proj",
944-
MODEL_TENSOR.NEXTN_EMBED_TOKENS: "blk.{bid}.embed_tokens",
945-
MODEL_TENSOR.NEXTN_ENORM: "blk.{bid}.enorm",
946-
MODEL_TENSOR.NEXTN_HNORM: "blk.{bid}.hnorm",
947-
MODEL_TENSOR.NEXTN_SHARED_HEAD_HEAD: "blk.{bid}.shared_head.head",
948-
MODEL_TENSOR.NEXTN_SHARED_HEAD_NORM: "blk.{bid}.shared_head.norm",
943+
MODEL_TENSOR.NEXTN_EH_PROJ: "blk.{bid}.nextn.eh_proj",
944+
MODEL_TENSOR.NEXTN_EMBED_TOKENS: "blk.{bid}.nextn.embed_tokens",
945+
MODEL_TENSOR.NEXTN_ENORM: "blk.{bid}.nextn.enorm",
946+
MODEL_TENSOR.NEXTN_HNORM: "blk.{bid}.nextn.hnorm",
947+
MODEL_TENSOR.NEXTN_SHARED_HEAD_HEAD: "blk.{bid}.nextn.shared_head_head",
948+
MODEL_TENSOR.NEXTN_SHARED_HEAD_NORM: "blk.{bid}.nextn.shared_head_norm",
949949
}
950950

951951
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {

gguf-py/gguf/gguf_writer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -753,8 +753,8 @@ def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
753753
def add_moe_every_n_layers(self, value: int) -> None:
754754
self.add_uint32(Keys.LLM.MOE_EVERY_N_LAYERS.format(arch=self.arch), value)
755755

756-
def add_num_nextn_predict_layers(self, count: int) -> None:
757-
self.add_uint32(Keys.LLM.NUM_NEXTN_PREDICT_LAYERS.format(arch=self.arch), count)
756+
def add_nextn_predict_layers(self, count: int) -> None:
757+
self.add_uint32(Keys.LLM.NEXTN_PREDICT_LAYERS.format(arch=self.arch), count)
758758

759759
def add_swin_norm(self, value: bool) -> None:
760760
self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)

gguf-py/gguf/tensor_mapping.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,27 +1351,27 @@ class TensorNameMap:
13511351

13521352
# NextN/MTP tensors for GLM4_MOE
13531353
MODEL_TENSOR.NEXTN_EH_PROJ: (
1354-
"model.layers.{bid}.eh_proj.weight",
1354+
"model.layers.{bid}.eh_proj",
13551355
),
13561356

13571357
MODEL_TENSOR.NEXTN_EMBED_TOKENS: (
1358-
"model.layers.{bid}.embed_tokens.weight",
1358+
"model.layers.{bid}.embed_tokens",
13591359
),
13601360

13611361
MODEL_TENSOR.NEXTN_ENORM: (
1362-
"model.layers.{bid}.enorm.weight",
1362+
"model.layers.{bid}.enorm",
13631363
),
13641364

13651365
MODEL_TENSOR.NEXTN_HNORM: (
1366-
"model.layers.{bid}.hnorm.weight",
1366+
"model.layers.{bid}.hnorm",
13671367
),
13681368

13691369
MODEL_TENSOR.NEXTN_SHARED_HEAD_HEAD: (
1370-
"model.layers.{bid}.shared_head.head.weight",
1370+
"model.layers.{bid}.shared_head.head",
13711371
),
13721372

13731373
MODEL_TENSOR.NEXTN_SHARED_HEAD_NORM: (
1374-
"model.layers.{bid}.shared_head.norm.weight",
1374+
"model.layers.{bid}.shared_head.norm",
13751375
),
13761376
}
13771377

src/llama-arch.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
126126
{ LLM_KV_EXPERT_WEIGHTS_NORM, "%s.expert_weights_norm" },
127127
{ LLM_KV_EXPERT_GATING_FUNC, "%s.expert_gating_func" },
128128
{ LLM_KV_MOE_EVERY_N_LAYERS, "%s.moe_every_n_layers" },
129-
{ LLM_KV_NUM_NEXTN_PREDICT_LAYERS, "%s.num_nextn_predict_layers" },
129+
{ LLM_KV_NEXTN_PREDICT_LAYERS, "%s.nextn_predict_layers" },
130130
{ LLM_KV_POOLING_TYPE, "%s.pooling_type" },
131131
{ LLM_KV_LOGIT_SCALE, "%s.logit_scale" },
132132
{ LLM_KV_DECODER_START_TOKEN_ID, "%s.decoder_start_token_id" },
@@ -1417,12 +1417,12 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
14171417
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
14181418
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
14191419
// NextN/MTP tensors - preserved but unused (in final layer, dynamic layer number)
1420-
{ LLM_TENSOR_NEXTN_EH_PROJ, "blk.%d.eh_proj" },
1421-
{ LLM_TENSOR_NEXTN_EMBED_TOKENS, "blk.%d.embed_tokens" },
1422-
{ LLM_TENSOR_NEXTN_ENORM, "blk.%d.enorm" },
1423-
{ LLM_TENSOR_NEXTN_HNORM, "blk.%d.hnorm" },
1424-
{ LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "blk.%d.shared_head.head" },
1425-
{ LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "blk.%d.shared_head.norm" },
1420+
{ LLM_TENSOR_NEXTN_EH_PROJ, "blk.%d.nextn.eh_proj" },
1421+
{ LLM_TENSOR_NEXTN_EMBED_TOKENS, "blk.%d.nextn.embed_tokens" },
1422+
{ LLM_TENSOR_NEXTN_ENORM, "blk.%d.nextn.enorm" },
1423+
{ LLM_TENSOR_NEXTN_HNORM, "blk.%d.nextn.hnorm" },
1424+
{ LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "blk.%d.nextn.shared_head_head" },
1425+
{ LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "blk.%d.nextn.shared_head_norm" },
14261426
},
14271427
},
14281428
{

src/llama-arch.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ enum llm_kv {
130130
LLM_KV_EXPERT_WEIGHTS_NORM,
131131
LLM_KV_EXPERT_GATING_FUNC,
132132
LLM_KV_MOE_EVERY_N_LAYERS,
133-
LLM_KV_NUM_NEXTN_PREDICT_LAYERS,
133+
LLM_KV_NEXTN_PREDICT_LAYERS,
134134
LLM_KV_POOLING_TYPE,
135135
LLM_KV_LOGIT_SCALE,
136136
LLM_KV_DECODER_START_TOKEN_ID,

src/llama-hparams.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ struct llama_hparams {
7272
float expert_weights_scale = 0.0;
7373
bool expert_weights_norm = false;
7474
uint32_t expert_gating_func = LLAMA_EXPERT_GATING_FUNC_TYPE_NONE;
75-
uint32_t moe_every_n_layers = 0;
76-
uint32_t num_nextn_predict_layers = 0;
75+
uint32_t moe_every_n_layers = 0;
76+
uint32_t nextn_predict_layers = 0;
7777

7878
float f_norm_eps;
7979
float f_norm_rms_eps;

src/llama-kv-cache-unified.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ llama_kv_cache_unified::llama_kv_cache_unified(
4141
}
4242
if (model.arch == LLM_ARCH_GLM4_MOE) {
4343
// GLM-4.5: Only process up to last layer, skip final NextN layer
44-
n_layer_cache = hparams.n_layer - 1;
44+
n_layer_cache = hparams.n_layer - hparam.nextn_predict_layers;
4545
}
4646

4747
// create a context for each buffer type

0 commit comments

Comments
 (0)