@@ -4388,12 +4388,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
43884388
43894389 // NextN/MTP tensors (preserved but unused) - in final layer (dynamic layer number)
43904390 const int final_layer = n_layer - 1; // NextN tensors are in the last layer
4391- create_tensor(tn(LLM_TENSOR_NEXTN_EH_PROJ, "weight", final_layer), { 2 * n_embd, n_embd }, TENSOR_NOT_REQUIRED);
4392- create_tensor(tn(LLM_TENSOR_NEXTN_EMBED_TOKENS, "weight", final_layer), { n_embd, n_vocab }, TENSOR_NOT_REQUIRED);
4393- create_tensor(tn(LLM_TENSOR_NEXTN_ENORM, "weight", final_layer), { n_embd }, TENSOR_NOT_REQUIRED);
4394- create_tensor(tn(LLM_TENSOR_NEXTN_HNORM, "weight", final_layer), { n_embd }, TENSOR_NOT_REQUIRED);
4395- create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "weight", final_layer), { n_embd, n_vocab }, TENSOR_NOT_REQUIRED);
4396- create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "weight", final_layer), { n_embd }, TENSOR_NOT_REQUIRED);
4391+ create_tensor(tn(LLM_TENSOR_NEXTN_EH_PROJ, final_layer), { 2 * n_embd, n_embd }, TENSOR_NOT_REQUIRED);
4392+ create_tensor(tn(LLM_TENSOR_NEXTN_EMBED_TOKENS, final_layer), { n_embd, n_vocab }, TENSOR_NOT_REQUIRED);
4393+ create_tensor(tn(LLM_TENSOR_NEXTN_ENORM, final_layer), { n_embd }, TENSOR_NOT_REQUIRED);
4394+ create_tensor(tn(LLM_TENSOR_NEXTN_HNORM, final_layer), { n_embd }, TENSOR_NOT_REQUIRED);
4395+ create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, final_layer), { n_embd, n_vocab }, TENSOR_NOT_REQUIRED);
4396+ create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, final_layer), { n_embd }, TENSOR_NOT_REQUIRED);
43974397
43984398 for (int i = 0; i < n_layer; ++i) {
43994399 auto & layer = layers[i];
0 commit comments