Skip to content

Commit 78b8015

Browse files
authored
[Bugfix] Relax tokenizer regex for mixtral to include 'tokenizer.model' (vllm-project#25964)
Signed-off-by: Bowen Bao <[email protected]>
1 parent 831b124 commit 78b8015

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

vllm/transformers_utils/tokenizers/mistral.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,15 +122,21 @@ def list_local_repo_files(repo_id: str, revision: Optional[str]) -> list[str]:
122122

123123

124124
def find_tokenizer_file(files: list[str]):
125+
# Accept both versioned (tokenizer.model.v3) and unversioned
126+
# (tokenizer.model) forms, plus tekken.json and tokenizer.mm.model
127+
# variants. Previous pattern only matched the versioned variants.
125128
file_pattern = re.compile(
126-
r"^tokenizer\.model\.v.*$|^tekken\.json$|^tokenizer\.mm\.model\.v.*$")
129+
r"^tokenizer\.model(\.v.*)?|tekken\.json|tokenizer\.mm\.model(\.v.*)?$"
130+
)
127131

128132
matched_files = [file for file in files if file_pattern.match(file)]
129133
if len(matched_files) > 1:
130-
raise OSError(
131-
f"Found {len(matched_files)} files matching the "
132-
f"pattern: `{file_pattern.pattern}`. Make sure only one Mistral "
133-
f"tokenizer is present in {files}.")
134+
logger.warning(
135+
"Multiple files matched pattern `%s`: %s. Using %s.",
136+
file_pattern.pattern,
137+
matched_files,
138+
matched_files[0],
139+
)
134140
elif len(matched_files) == 0:
135141
raise OSError(
136142
f"Found {len(matched_files)} files matching the "

0 commit comments

Comments
 (0)