Skip to content

Commit 7786520

Browse files
committed
Use ATTN_K/Q_NORM for k,q weights to prevent quantization
1 parent 149b98c commit 7786520

File tree

3 files changed

+7
-7
lines changed

3 files changed

+7
-7
lines changed

gguf-py/gguf/tensor_mapping.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,6 @@ class TensorNameMap:
177177
"transformer.layer.{bid}.attention.q_lin", # distillbert
178178
"transformer.h.{bid}.attn.q_proj", # gpt-j
179179
"model.layers.layers.{bid}.self_attn.q_proj", # plamo
180-
"model.layers.layers.{bid}.mixer.q", # plamo2
181180
"model.layers.{bid}.attention.wq", # internlm2
182181
"transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
183182
"transformer.h.{bid}.attn.attention.q_proj", # exaone
@@ -194,7 +193,6 @@ class TensorNameMap:
194193
"transformer.h.{bid}.attn.k_proj", # gpt-j
195194
"transformer.h.{bid}.attn.k", # refact
196195
"model.layers.layers.{bid}.self_attn.k_proj", # plamo
197-
"model.layers.layers.{bid}.mixer.k", # plamo2
198196
"model.layers.{bid}.attention.wk", # internlm2
199197
"transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
200198
"transformer.h.{bid}.attn.attention.k_proj", # exaone
@@ -472,6 +470,7 @@ class TensorNameMap:
472470
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
473471
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
474472
"transformer.layers.{bid}.attn.q_norm", # openelm
473+
"model.layers.layers.{bid}.mixer.q", # plamo2
475474
),
476475

477476
MODEL_TENSOR.ATTN_K_NORM: (
@@ -481,6 +480,7 @@ class TensorNameMap:
481480
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
482481
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
483482
"transformer.layers.{bid}.attn.k_norm", # openelm
483+
"model.layers.layers.{bid}.mixer.k", # plamo2
484484
),
485485

486486
MODEL_TENSOR.ROPE_FREQS: (

src/llama-arch.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
3434
{ LLM_ARCH_PHI3, "phi3" },
3535
{ LLM_ARCH_PHIMOE, "phimoe" },
3636
{ LLM_ARCH_PLAMO, "plamo" },
37-
{ LLM_ARCH_PLAMO2, "plamo2" },
37+
{ LLM_ARCH_PLAMO2, "plamo2" },
3838
{ LLM_ARCH_CODESHELL, "codeshell" },
3939
{ LLM_ARCH_ORION, "orion" },
4040
{ LLM_ARCH_INTERNLM2, "internlm2" },
@@ -788,8 +788,8 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
788788
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
789789
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
790790
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
791-
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
792-
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
791+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
792+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
793793
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
794794
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
795795
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },

src/llama-model.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2898,8 +2898,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
28982898
const int64_t v_proj_dim = v_num_heads * v_dim;
28992899

29002900
layer.wqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, q_proj_dim + k_proj_dim + v_proj_dim}, 0);
2901-
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {head_dim, num_attention_heads}, 0);
2902-
layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {head_dim, k_num_heads}, 0);
2901+
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q_NORM, "weight", i), {head_dim, num_attention_heads}, 0);
2902+
layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), {head_dim, k_num_heads}, 0);
29032903
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {q_num_heads * v_dim, n_embd}, 0);
29042904
}
29052905

0 commit comments

Comments
 (0)