Skip to content

Commit 958eea6

Browse files
authored
remove dead padding code that got readded in merge
1 parent 5030d48 commit 958eea6

File tree

1 file changed

+0
-8
lines changed

1 file changed

+0
-8
lines changed

convert_hf_to_gguf.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3870,14 +3870,6 @@ def _xlmroberta_set_vocab(self) -> None:
38703870
scores[token_id] = score
38713871
toktypes[token_id] = toktype
38723872

3873-
if vocab_size > len(tokens):
3874-
pad_count = vocab_size - len(tokens)
3875-
logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
3876-
for i in range(1, pad_count + 1):
3877-
tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
3878-
scores.append(-1000.0)
3879-
toktypes.append(SentencePieceTokenTypes.UNUSED)
3880-
38813873
if isinstance(tokenizer, SentencePieceProcessor):
38823874
# realign tokens (see HF tokenizer code)
38833875
tokens = [b'<s>', b'<pad>', b'</s>', b'<unk>'] + tokens[3:-1]

0 commit comments

Comments
 (0)