Skip to content

Commit f5df812

Browse files
authored
Apply suggestions from code review
1 parent 0bb9f28 commit f5df812

File tree

4 files changed

+6
-7
lines changed

4 files changed

+6
-7
lines changed

convert_hf_to_gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6588,7 +6588,7 @@ class Glm4MoeModel(TextModel):
65886588
def __init__(self, *args, **kwargs):
65896589
super().__init__(*args, **kwargs)
65906590
# GLM4_MOE has num_hidden_layers + 1 actual layers (including NextN layer)
6591-
self.block_count = self.hparams["num_hidden_layers"] + 1
6591+
self.block_count = self.hparams["num_hidden_layers"] + self.hparams.get("num_nextn_predict_layers", 0)
65926592
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
65936593

65946594
def set_vocab(self):

gguf-py/gguf/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ class LLM:
105105
EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
106106
EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
107107
MOE_EVERY_N_LAYERS = "{arch}.moe_every_n_layers"
108-
NEXTN_PREDICT_LAYERS = "{arch}.num_nextn_predict_layers"
108+
NEXTN_PREDICT_LAYERS = "{arch}.nextn_predict_layers"
109109
POOLING_TYPE = "{arch}.pooling_type"
110110
LOGIT_SCALE = "{arch}.logit_scale"
111111
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"

src/llama-model.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1439,7 +1439,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
14391439
}
14401440

14411441
// NextN/MTP parameters
1442-
ml.get_key(LLM_KV_NEXTN_PREDICT_LAYERS, hparams.nextn_predict_layers, false);
1442+
ml.get_key(LLM_KV_NEXTN_PREDICT_LAYERS, hparams.nextn_predict_layers, false);
14431443

14441444
switch (hparams.n_layer) {
14451445
case 47: type = LLM_TYPE_106B_A12B; break; // GLM-4.5-Air (46 layers + 1 NextN layer)
@@ -4394,7 +4394,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
43944394
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), { n_embd, n_vocab }, TENSOR_DUPLICATED);
43954395
}
43964396

4397-
43984397
// Load ALL tensors including NextN layer to satisfy total tensor count
43994398
// but only PROCESS up to last layer (skipping final NextN layer) in forward pass
44004399
for (int i = 0; i < n_layer; ++i) {

src/llama-vocab.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2185,7 +2185,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
21852185
|| t.first == "<|fim▁begin|>" // DeepSeek
21862186
|| t.first == "<PRE>"
21872187
|| t.first == "▁<PRE>" // CodeLlama
2188-
|| t.first == "<|code_prefix|>" // GLM4_MOE
2188+
|| t.first == "<|code_prefix|>" // GLM-4.5
21892189
) {
21902190
special_fim_pre_id = t.second;
21912191
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
@@ -2205,7 +2205,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
22052205
|| t.first == "<|fim▁hole|>" // DeepSeek
22062206
|| t.first == "<SUF>"
22072207
|| t.first == "▁<SUF>" // CodeLlama
2208-
|| t.first == "<|code_suffix|>" // GLM4_MOE
2208+
|| t.first == "<|code_suffix|>" // GLM-4.5
22092209
) {
22102210
special_fim_suf_id = t.second;
22112211
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
@@ -2225,7 +2225,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
22252225
|| t.first == "<|fim▁end|>" // DeepSeek
22262226
|| t.first == "<MID>"
22272227
|| t.first == "▁<MID>" // CodeLlama
2228-
|| t.first == "<|code_middle|>" // GLM4_MOE
2228+
|| t.first == "<|code_middle|>" // GLM-4.5
22292229
) {
22302230
special_fim_mid_id = t.second;
22312231
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {

0 commit comments

Comments
 (0)