@@ -126,7 +126,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
126126 { LLM_KV_EXPERT_WEIGHTS_NORM, " %s.expert_weights_norm" },
127127 { LLM_KV_EXPERT_GATING_FUNC, " %s.expert_gating_func" },
128128 { LLM_KV_MOE_EVERY_N_LAYERS, " %s.moe_every_n_layers" },
129- { LLM_KV_NUM_NEXTN_PREDICT_LAYERS , " %s.num_nextn_predict_layers " },
129+ { LLM_KV_NEXTN_PREDICT_LAYERS , " %s.nextn_predict_layers " },
130130 { LLM_KV_POOLING_TYPE, " %s.pooling_type" },
131131 { LLM_KV_LOGIT_SCALE, " %s.logit_scale" },
132132 { LLM_KV_DECODER_START_TOKEN_ID, " %s.decoder_start_token_id" },
@@ -1417,12 +1417,12 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
14171417 { LLM_TENSOR_FFN_UP_SHEXP, " blk.%d.ffn_up_shexp" },
14181418 { LLM_TENSOR_FFN_EXP_PROBS_B, " blk.%d.exp_probs_b" },
14191419 // NextN/MTP tensors - preserved but unused (in final layer, dynamic layer number)
1420- { LLM_TENSOR_NEXTN_EH_PROJ, " blk.%d.eh_proj" },
1421- { LLM_TENSOR_NEXTN_EMBED_TOKENS, " blk.%d.embed_tokens" },
1422- { LLM_TENSOR_NEXTN_ENORM, " blk.%d.enorm" },
1423- { LLM_TENSOR_NEXTN_HNORM, " blk.%d.hnorm" },
1424- { LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, " blk.%d.shared_head.head " },
1425- { LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, " blk.%d.shared_head.norm " },
1420+ { LLM_TENSOR_NEXTN_EH_PROJ, " blk.%d.nextn. eh_proj" },
1421+ { LLM_TENSOR_NEXTN_EMBED_TOKENS, " blk.%d.nextn. embed_tokens" },
1422+ { LLM_TENSOR_NEXTN_ENORM, " blk.%d.nextn. enorm" },
1423+ { LLM_TENSOR_NEXTN_HNORM, " blk.%d.nextn. hnorm" },
1424+ { LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, " blk.%d.nextn.shared_head_head " },
1425+ { LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, " blk.%d.nextn.shared_head_norm " },
14261426 },
14271427 },
14281428 {
0 commit comments