Skip to content

Commit 9652812

Browse files
committed
feat: support GLM 4.5 family of models
1 parent 6b478bb commit 9652812

File tree

2 files changed

+15
-14
lines changed

2 files changed

+15
-14
lines changed

src/llama-arch.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1414,13 +1414,13 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
14141414
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
14151415
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
14161416
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1417-
// NextN/MTP tensors - preserved but unused (treated as output tensors)
1418-
{ LLM_TENSOR_NEXTN_EH_PROJ, "blk.46.eh_proj" },
1419-
{ LLM_TENSOR_NEXTN_EMBED_TOKENS, "blk.46.embed_tokens" },
1420-
{ LLM_TENSOR_NEXTN_ENORM, "blk.46.enorm" },
1421-
{ LLM_TENSOR_NEXTN_HNORM, "blk.46.hnorm" },
1422-
{ LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "blk.46.shared_head.head" },
1423-
{ LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "blk.46.shared_head.norm" },
1417+
// NextN/MTP tensors - preserved but unused (in final layer, dynamic layer number)
1418+
{ LLM_TENSOR_NEXTN_EH_PROJ, "blk.%d.eh_proj" },
1419+
{ LLM_TENSOR_NEXTN_EMBED_TOKENS, "blk.%d.embed_tokens" },
1420+
{ LLM_TENSOR_NEXTN_ENORM, "blk.%d.enorm" },
1421+
{ LLM_TENSOR_NEXTN_HNORM, "blk.%d.hnorm" },
1422+
{ LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "blk.%d.shared_head.head" },
1423+
{ LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "blk.%d.shared_head.norm" },
14241424
},
14251425
},
14261426
{

src/llama-model.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4386,13 +4386,14 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
43864386
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), { n_embd, n_vocab }, TENSOR_DUPLICATED);
43874387
}
43884388

4389-
// NextN/MTP tensors (preserved but unused) - treated as output tensors
4390-
create_tensor(tn(LLM_TENSOR_NEXTN_EH_PROJ), { 2 * n_embd, n_embd }, TENSOR_NOT_REQUIRED);
4391-
create_tensor(tn(LLM_TENSOR_NEXTN_EMBED_TOKENS), { n_embd, n_vocab }, TENSOR_NOT_REQUIRED);
4392-
create_tensor(tn(LLM_TENSOR_NEXTN_ENORM), { n_embd }, TENSOR_NOT_REQUIRED);
4393-
create_tensor(tn(LLM_TENSOR_NEXTN_HNORM), { n_embd }, TENSOR_NOT_REQUIRED);
4394-
create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD), { n_embd, n_vocab }, TENSOR_NOT_REQUIRED);
4395-
create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_NORM), { n_embd }, TENSOR_NOT_REQUIRED);
4389+
// NextN/MTP tensors (preserved but unused) - in final layer (dynamic layer number)
4390+
const int final_layer = n_layer - 1; // NextN tensors are in the last layer
4391+
create_tensor(tn(LLM_TENSOR_NEXTN_EH_PROJ, "weight", final_layer), { 2 * n_embd, n_embd }, TENSOR_NOT_REQUIRED);
4392+
create_tensor(tn(LLM_TENSOR_NEXTN_EMBED_TOKENS, "weight", final_layer), { n_embd, n_vocab }, TENSOR_NOT_REQUIRED);
4393+
create_tensor(tn(LLM_TENSOR_NEXTN_ENORM, "weight", final_layer), { n_embd }, TENSOR_NOT_REQUIRED);
4394+
create_tensor(tn(LLM_TENSOR_NEXTN_HNORM, "weight", final_layer), { n_embd }, TENSOR_NOT_REQUIRED);
4395+
create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "weight", final_layer), { n_embd, n_vocab }, TENSOR_NOT_REQUIRED);
4396+
create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "weight", final_layer), { n_embd }, TENSOR_NOT_REQUIRED);
43964397

43974398
for (int i = 0; i < n_layer; ++i) {
43984399
auto & layer = layers[i];

0 commit comments

Comments
 (0)