File tree Expand file tree Collapse file tree 1 file changed +11
-9
lines changed Expand file tree Collapse file tree 1 file changed +11
-9
lines changed Original file line number Diff line number Diff line change @@ -135,15 +135,17 @@ def __init__(
135
135
self .unspecial_piece_to_id = {}
136
136
137
137
tokenizer_json_path = os .path .join (self .config .model_dir , "tokenizer.json" )
138
- if os .path .exists (tokenizer_json_path ):
139
- with open (tokenizer_json_path , encoding = "utf8" ) as f :
140
- tokenizer_json = json .load (f )
141
- if "added_tokens" in tokenizer_json :
142
- for v in tokenizer_json ["added_tokens" ]:
143
- if v ["special" ]:
144
- self .extended_piece_to_id [v ["content" ]] = v ["id" ]
145
- else :
146
- self .unspecial_piece_to_id [v ["content" ]] = v ["id" ]
138
+ if not os .path .exists (tokenizer_json_path ):
139
+ raise ValueError (" ## Model does not include a tokenizer.json file. SentencePiece-only tokenizers are no longer supported" )
140
+
141
+ with open (tokenizer_json_path , encoding = "utf8" ) as f :
142
+ tokenizer_json = json .load (f )
143
+ if "added_tokens" in tokenizer_json :
144
+ for v in tokenizer_json ["added_tokens" ]:
145
+ if v ["special" ]:
146
+ self .extended_piece_to_id [v ["content" ]] = v ["id" ]
147
+ else :
148
+ self .unspecial_piece_to_id [v ["content" ]] = v ["id" ]
147
149
148
150
# Attempt to load tokenizer_config.json
149
151
You can’t perform that action at this time.
0 commit comments