[fix] fix for eog token

im0qianqian · im0qianqian · commit b359533f0dad · 2025-09-14T19:06:36.000+08:00
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -7860,14 +7860,6 @@ def set_gguf_parameters(self):
 
     _experts: list[dict[str, Tensor]] | None = None
 
-    @staticmethod
-    def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
-        if n_head_kv is not None and n_head != n_head_kv:
-            n_head = n_head_kv
-        return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
-                .swapaxes(1, 2)
-                .reshape(weights.shape))
-
     def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
         if name.endswith("query_key_value.weight"):
             n_head = self.hparams["num_attention_heads"]
@@ -7878,8 +7870,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
             q, k, v = data_torch.split([n_head * head_dim, n_kv_head * head_dim, n_kv_head * head_dim], dim=-2)
 
             return [
-                (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), self.permute(q, n_head, n_head)),
-                (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), self.permute(k, n_head, n_kv_head)),
+                (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), q),
+                (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), k),
                 (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid), v)
             ]
         elif name.find("mlp.experts") != -1:
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
@@ -2327,6 +2327,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                     || t.first == "_<EOT>"
                     || t.first == "<|end_of_text|>"
                     || t.first == "<end_of_utterance>" // smoldocling
+                    || t.first == "<|role_end|>" // Ling v2
                ) {
                 special_eog_ids.insert(t.second);
                 if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {