Skip to content

Commit b20bd26

Browse files
committed
tokenization working
1 parent 34cc679 commit b20bd26

File tree

1 file changed

+8
-6
lines changed

1 file changed

+8
-6
lines changed

convert_hf_to_gguf.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,8 @@ def prepare_tensors(self):
346346
data_qtype = gguf.GGMLQuantizationType.BF16
347347
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0:
348348
data_qtype = gguf.GGMLQuantizationType.Q8_0
349+
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q4_0:
350+
data_qtype = gguf.GGMLQuantizationType.Q4_0
349351
elif self.ftype == gguf.LlamaFileType.MOSTLY_TQ1_0:
350352
data_qtype = gguf.GGMLQuantizationType.TQ1_0
351353
elif self.ftype == gguf.LlamaFileType.MOSTLY_TQ2_0:
@@ -6419,7 +6421,7 @@ def set_vocab(self):
64196421
vocab = {}
64206422
mergeable_ranks = tokenizer.mergeable_ranks
64216423
for token, rank in mergeable_ranks.items():
6422-
#vocab[QwenModel.token_bytes_to_string(token)] = rank
6424+
vocab[QwenModel.token_bytes_to_string(token)] = rank
64236425
if len(token) == 1:
64246426
continue
64256427
merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank)
@@ -6428,9 +6430,8 @@ def set_vocab(self):
64286430

64296431
# 3. Generate the tokens and toktypes lists
64306432
vocab_size = self.hparams["vocab_size"]
6431-
special_token_ids = set(tokenizer.special_tokens.values())
6432-
reverse_vocab = tokenizer.decoder
6433-
#reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **special_token_ids}.items()}
6433+
special_tokens = tokenizer.special_tokens
6434+
reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **special_tokens}.items()}
64346435
tokens: list[str] = []
64356436
toktypes: list[int] = []
64366437
for i in range(vocab_size):
@@ -6440,7 +6441,7 @@ def set_vocab(self):
64406441
else:
64416442
token = reverse_vocab[i]
64426443
tokens.append(token)
6443-
if i in special_token_ids:
6444+
if i in special_tokens.values():
64446445
toktypes.append(gguf.TokenType.CONTROL)
64456446
else:
64466447
toktypes.append(gguf.TokenType.NORMAL)
@@ -6614,7 +6615,7 @@ def parse_args() -> argparse.Namespace:
66146615
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
66156616
)
66166617
parser.add_argument(
6617-
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "auto"], default="f16",
6618+
"--outtype", type=str, choices=["f32", "f16", "bf16", "q4_0", "q8_0", "tq1_0", "tq2_0", "auto"], default="f16",
66186619
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, and auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
66196620
)
66206621
parser.add_argument(
@@ -6746,6 +6747,7 @@ def main() -> None:
67466747
"f32": gguf.LlamaFileType.ALL_F32,
67476748
"f16": gguf.LlamaFileType.MOSTLY_F16,
67486749
"bf16": gguf.LlamaFileType.MOSTLY_BF16,
6750+
"q4_0": gguf.LlamaFileType.MOSTLY_Q4_0,
67496751
"q8_0": gguf.LlamaFileType.MOSTLY_Q8_0,
67506752
"tq1_0": gguf.LlamaFileType.MOSTLY_TQ1_0,
67516753
"tq2_0": gguf.LlamaFileType.MOSTLY_TQ2_0,

0 commit comments

Comments
 (0)