Rename a tensor

bartowski1182 · bartowski1182 · commit 592e2f50f1ea · 2025-10-11T12:31:50.000-04:00
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -2333,7 +2333,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
         if ".attn_norm_a." in name and bid is not None:
             return [(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_NORM, bid), data_torch)]
         elif ".attn_norm_b." in name and bid is not None:
-            return [(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_NORM_2, bid), data_torch)]
+            return [(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_POST_NORM, bid), data_torch)]
         elif ".ffn_norm_a." in name and bid is not None:
             return [(self.format_tensor_name(gguf.MODEL_TENSOR.FFN_NORM, bid), data_torch)]
         elif ".ffn_norm_b." in name and bid is not None:
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
@@ -2581,7 +2581,7 @@ class MODEL_TENSOR(IntEnum):
         MODEL_TENSOR.OUTPUT_NORM,
         MODEL_TENSOR.OUTPUT,
         MODEL_TENSOR.ATTN_NORM,
-        MODEL_TENSOR.ATTN_NORM_2,
+        MODEL_TENSOR.ATTN_POST_NORM,
         MODEL_TENSOR.ATTN_Q,
         MODEL_TENSOR.ATTN_K,
         MODEL_TENSOR.ATTN_V,
diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp
@@ -327,7 +327,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
             { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
             { LLM_TENSOR_OUTPUT,          "output" },
             { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
-            { LLM_TENSOR_ATTN_NORM_2,     "blk.%d.attn_norm_2" },
+            { LLM_TENSOR_ATTN_POST_NORM,  "blk.%d.post_attention_norm" },
             { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
             { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
             { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -5617,8 +5617,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                         auto & layer = layers[i];
 
                         // dual attention normalization
-                        layer.attn_norm   = create_tensor(tn(LLM_TENSOR_ATTN_NORM,   "weight", i), {n_embd}, 0);
-                        layer.attn_norm_2 = create_tensor(tn(LLM_TENSOR_ATTN_NORM_2, "weight", i), {n_embd}, 0);
+                        layer.attn_norm      = create_tensor(tn(LLM_TENSOR_ATTN_NORM,      "weight", i), {n_embd}, 0);
+                        layer.attn_post_norm = create_tensor(tn(LLM_TENSOR_ATTN_POST_NORM, "weight", i), {n_embd}, 0);
 
                         // attention projections
                         layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q,   "weight", i), {n_embd, n_embd_head_k * n_head}, 0);
@@ -18247,9 +18247,9 @@ struct llm_build_afmoe : public llm_graph_context {
 
             // dual attention normalization (post)
             cur = build_norm(cur,
-                    model.layers[il].attn_norm_2, NULL,
+                    model.layers[il].attn_post_norm, NULL,
                     LLM_NORM_RMS, il);
-            cb(cur, "attn_norm_2", il);
+            cb(cur, "attn_post_norm", il);
 
             if (il == n_layer - 1 && inp_out_ids) {
                 cur   = ggml_get_rows(ctx0,   cur, inp_out_ids);