Skip to content

Commit 592e2f5

Browse files
committed
Rename a tensor
1 parent 798d015 commit 592e2f5

File tree

4 files changed

+7
-7
lines changed

4 files changed

+7
-7
lines changed

convert_hf_to_gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2333,7 +2333,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
23332333
if ".attn_norm_a." in name and bid is not None:
23342334
return [(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_NORM, bid), data_torch)]
23352335
elif ".attn_norm_b." in name and bid is not None:
2336-
return [(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_NORM_2, bid), data_torch)]
2336+
return [(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_POST_NORM, bid), data_torch)]
23372337
elif ".ffn_norm_a." in name and bid is not None:
23382338
return [(self.format_tensor_name(gguf.MODEL_TENSOR.FFN_NORM, bid), data_torch)]
23392339
elif ".ffn_norm_b." in name and bid is not None:

gguf-py/gguf/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2581,7 +2581,7 @@ class MODEL_TENSOR(IntEnum):
25812581
MODEL_TENSOR.OUTPUT_NORM,
25822582
MODEL_TENSOR.OUTPUT,
25832583
MODEL_TENSOR.ATTN_NORM,
2584-
MODEL_TENSOR.ATTN_NORM_2,
2584+
MODEL_TENSOR.ATTN_POST_NORM,
25852585
MODEL_TENSOR.ATTN_Q,
25862586
MODEL_TENSOR.ATTN_K,
25872587
MODEL_TENSOR.ATTN_V,

src/llama-arch.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
327327
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
328328
{ LLM_TENSOR_OUTPUT, "output" },
329329
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
330-
{ LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
330+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
331331
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
332332
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
333333
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },

src/llama-model.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5617,8 +5617,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
56175617
auto & layer = layers[i];
56185618

56195619
// dual attention normalization
5620-
layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0);
5621-
layer.attn_norm_2 = create_tensor(tn(LLM_TENSOR_ATTN_NORM_2, "weight", i), {n_embd}, 0);
5620+
layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0);
5621+
layer.attn_post_norm = create_tensor(tn(LLM_TENSOR_ATTN_POST_NORM, "weight", i), {n_embd}, 0);
56225622

56235623
// attention projections
56245624
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_head_k * n_head}, 0);
@@ -18247,9 +18247,9 @@ struct llm_build_afmoe : public llm_graph_context {
1824718247

1824818248
// dual attention normalization (post)
1824918249
cur = build_norm(cur,
18250-
model.layers[il].attn_norm_2, NULL,
18250+
model.layers[il].attn_post_norm, NULL,
1825118251
LLM_NORM_RMS, il);
18252-
cb(cur, "attn_norm_2", il);
18252+
cb(cur, "attn_post_norm", il);
1825318253

1825418254
if (il == n_layer - 1 && inp_out_ids) {
1825518255
cur = ggml_get_rows(ctx0, cur, inp_out_ids);

0 commit comments

Comments
 (0)