Skip to content

Commit 8cb89e1

Browse files
bartowski1182pwilkin
authored andcommitted
model : support GLM 4.6 (make a few NextN/MTP tensors not required) (ggml-org#16359)
* Make a few GLM tensors not required layer.nextn.shared_head_head and layer.nextn.embed_tokens are both excluded from GLM 4.6 resulting in the model not loading after conversion/quantization, this marks those tensors as not required which makes it work * Update llama-model.cpp layer.nextn.shared_head_norm also not required in case of future models
1 parent b0f974b commit 8cb89e1

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

src/llama-model.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5006,11 +5006,13 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
50065006
// NextN/MTP tensors (preserved but unused) - conditionally load for last nextn_predict_layers
50075007
if (hparams.nextn_predict_layers > 0 && static_cast<uint32_t>(i) >= n_layer - hparams.nextn_predict_layers) {
50085008
layer.nextn.eh_proj = create_tensor(tn(LLM_TENSOR_NEXTN_EH_PROJ, "weight", i), { 2 * n_embd, n_embd }, flags);
5009-
layer.nextn.embed_tokens = create_tensor(tn(LLM_TENSOR_NEXTN_EMBED_TOKENS, "weight", i), { n_embd, n_vocab }, flags);
50105009
layer.nextn.enorm = create_tensor(tn(LLM_TENSOR_NEXTN_ENORM, "weight", i), { n_embd }, flags);
50115010
layer.nextn.hnorm = create_tensor(tn(LLM_TENSOR_NEXTN_HNORM, "weight", i), { n_embd }, flags);
5012-
layer.nextn.shared_head_head = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "weight", i), { n_embd, n_vocab }, flags);
5013-
layer.nextn.shared_head_norm = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "weight", i), { n_embd }, flags);
5011+
5012+
// Optional tensors
5013+
layer.nextn.embed_tokens = create_tensor(tn(LLM_TENSOR_NEXTN_EMBED_TOKENS, "weight", i), { n_embd, n_vocab }, flags | TENSOR_NOT_REQUIRED);
5014+
layer.nextn.shared_head_head = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "weight", i), { n_embd, n_vocab }, flags | TENSOR_NOT_REQUIRED);
5015+
layer.nextn.shared_head_norm = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "weight", i), { n_embd }, flags | TENSOR_NOT_REQUIRED);
50145016
}
50155017
}
50165018
}

0 commit comments

Comments
 (0)