Skip to content

Commit 34360eb

Browse files
committed
Fix how to load special token id to gguf
1 parent 6afd3be commit 34360eb

File tree

1 file changed

+15
-8
lines changed

1 file changed

+15
-8
lines changed

convert_hf_to_gguf.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3551,14 +3551,21 @@ def set_vocab(self):
35513551
self.gguf_writer.add_token_types(toktypes)
35523552

35533553
# Add special tokens from config
3554-
if "bos_token_id" in tokenizer_config:
3555-
self.gguf_writer.add_bos_token_id(tokenizer_config["bos_token_id"])
3556-
if "eos_token_id" in tokenizer_config:
3557-
self.gguf_writer.add_eos_token_id(tokenizer_config["eos_token_id"])
3558-
if "pad_token_id" in tokenizer_config:
3559-
self.gguf_writer.add_pad_token_id(tokenizer_config["pad_token_id"])
3560-
if "unk_token_id" in tokenizer_config:
3561-
self.gguf_writer.add_unk_token_id(tokenizer_config["unk_token_id"])
3554+
if "bos_token" in tokenizer_config and tokenizer_config["bos_token"] is not None:
3555+
token_id = tokens.index(tokenizer_config["bos_token"].encode("utf-8"))
3556+
self.gguf_writer.add_bos_token_id(token_id)
3557+
if "eos_token" in tokenizer_config and tokenizer_config["eos_token"] is not None:
3558+
token_id = tokens.index(tokenizer_config["eos_token"].encode("utf-8"))
3559+
self.gguf_writer.add_eos_token_id(token_id)
3560+
if "pad_token" in tokenizer_config and tokenizer_config["pad_token"] is not None:
3561+
token_id = tokens.index(tokenizer_config["pad_token"].encode("utf-8"))
3562+
self.gguf_writer.add_pad_token_id(token_id)
3563+
if "sep_token" in tokenizer_config and tokenizer_config["sep_token"] is not None:
3564+
token_id = tokens.index(tokenizer_config["sep_token"].encode("utf-8"))
3565+
self.gguf_writer.add_sep_token_id(token_id)
3566+
if "unk_token" in tokenizer_config and tokenizer_config["unk_token"] is not None:
3567+
token_id = tokens.index(tokenizer_config["unk_token"].encode("utf-8"))
3568+
self.gguf_writer.add_unk_token_id(token_id)
35623569

35633570
self.gguf_writer.add_add_space_prefix(False)
35643571

0 commit comments

Comments
 (0)