Skip to content

Commit 8c6111f

Browse files
committed
fix: handle KeyError in token encoding mapping with fallback
1 parent f7a75b1 commit 8c6111f

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

modules/helpers.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,14 @@ def num_tokens_from_string(string: str, model: str = "gpt-4o-mini") -> int:
218218
219219
See https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken
220220
"""
221-
encoding_name = tiktoken.encoding_name_for_model(model_name=model)
221+
222+
try:
223+
encoding_name = tiktoken.encoding_name_for_model(model_name=model)
224+
except KeyError as e:
225+
logging.error("Couldn't map %s to tokenizer: %s", model, str(e))
226+
# workaround until https://github.com/openai/tiktoken/issues/395 is fixed
227+
encoding_name = "o200k_base"
228+
222229
encoding = tiktoken.get_encoding(encoding_name)
223230
return len(encoding.encode(string))
224231

0 commit comments

Comments
 (0)