Skip to content

Commit cf87c76

Browse files
authored
type fix
1 parent 301ba77 commit cf87c76

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

convert_hf_to_gguf.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2649,19 +2649,19 @@ def set_vocab(self):
26492649
tokenizer = json.load(f)
26502650

26512651
vocab_size = tokenizer["vocab_size"]
2652-
tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
2652+
tokens: list[str] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
26532653
scores: list[float] = [-10000.0] * vocab_size
26542654
toktypes: list[int] = [gguf.TokenType.UNUSED] * vocab_size
26552655

2656-
def decode_grok_token(token: dict, toktype: gguf.TokenType) -> tuple[gguf.TokenType, int, bytes]:
2657-
tokid = token["token"]
2658-
tokb = token["bytes"]
2656+
def decode_grok_token(token: dict, toktype: gguf.TokenType) -> tuple[gguf.TokenType, int, str]:
2657+
tokid: int = token["token"]
2658+
tokb: list[int] = token["bytes"]
26592659
try:
26602660
tokc = bytes(tokb).decode("utf-8")
26612661
except Exception:
26622662
tokc = None
2663-
if len(tokb) == 1 or not tokc:
2664-
return gguf.TokenType.BYTE, tokid, "<0x{:02X}>".format(tokb[0]).encode("utf-8")
2663+
if len(tokb) == 1 or tokc is None:
2664+
return gguf.TokenType.BYTE, tokid, "<0x{:02X}>".format(tokb[0])
26652665
else:
26662666
return toktype, tokid, tokc
26672667

0 commit comments

Comments
 (0)