Skip to content

Commit 97e4fd9

Browse files
committed
Fail if tokenizer.json not found
1 parent a811641 commit 97e4fd9

File tree

1 file changed

+11
-9
lines changed

1 file changed

+11
-9
lines changed

exllamav2/tokenizer/tokenizer.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -135,15 +135,17 @@ def __init__(
135135
self.unspecial_piece_to_id = {}
136136

137137
tokenizer_json_path = os.path.join(self.config.model_dir, "tokenizer.json")
138-
if os.path.exists(tokenizer_json_path):
139-
with open(tokenizer_json_path, encoding = "utf8") as f:
140-
tokenizer_json = json.load(f)
141-
if "added_tokens" in tokenizer_json:
142-
for v in tokenizer_json["added_tokens"]:
143-
if v["special"]:
144-
self.extended_piece_to_id[v["content"]] = v["id"]
145-
else:
146-
self.unspecial_piece_to_id[v["content"]] = v["id"]
138+
if not os.path.exists(tokenizer_json_path):
139+
raise ValueError(" ## Model does not include a tokenizer.json file. SentencePiece-only tokenizers are no longer supported")
140+
141+
with open(tokenizer_json_path, encoding = "utf8") as f:
142+
tokenizer_json = json.load(f)
143+
if "added_tokens" in tokenizer_json:
144+
for v in tokenizer_json["added_tokens"]:
145+
if v["special"]:
146+
self.extended_piece_to_id[v["content"]] = v["id"]
147+
else:
148+
self.unspecial_piece_to_id[v["content"]] = v["id"]
147149

148150
# Attempt to load tokenizer_config.json
149151

0 commit comments

Comments
 (0)