Skip to content

Commit 711d5e6

Browse files
authored
convert : fix Qwen3-Embedding pre-tokenizer hash (#15030)
1 parent f738989 commit 711d5e6

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

convert_hf_to_gguf.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
702702
if chkhsh == "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890":
703703
# ref: https://huggingface.co/moonshotai/Kimi-K2-Base
704704
res = "kimi-k2"
705+
if chkhsh == "d4540891389ea895b53b399da6ac824becc30f2fba0e9ddbb98f92e55ca0e97c":
706+
# ref: https://huggingface.co/Qwen/Qwen3-Embedding-0.6B
707+
res = "qwen2"
705708
if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
706709
# ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
707710
res = "llama-bpe"
@@ -849,9 +852,6 @@ def get_vocab_base_pre(self, tokenizer) -> str:
849852
if chkhsh == "2085e1638f6c377a0aa4ead21b27bb4cb941bf800df86ed391011769c1758dfb":
850853
# ref: https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B
851854
res = "exaone4"
852-
if chkhsh == "d4540891389ea895b53b399da6ac824becc30f2fba0e9ddbb98f92e55ca0e97c":
853-
# ref: https://huggingface.co/Qwen/Qwen3-Embedding-8B
854-
res = "qwen2"
855855

856856
if res is None:
857857
logger.warning("\n")

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ class TOKENIZER_TYPE(IntEnum):
147147
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-7B-Base", "chkhsh": "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896"},
148148
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-34B-Base", "chkhsh": "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b"},
149149
{"name": "kimi-k2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/moonshotai/Kimi-K2-Base", "chkhsh": "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890"},
150+
{"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen3-Embedding-0.6B", "chkhsh": "d4540891389ea895b53b399da6ac824becc30f2fba0e9ddbb98f92e55ca0e97c"},
150151
]
151152

152153

0 commit comments

Comments
 (0)