55from collections .abc import AsyncIterator
66
77import pipmaster as pm
8+ import tiktoken
89
910# install specific modules
1011if not pm .is_installed ("openai" ):
@@ -74,6 +75,30 @@ class InvalidResponseError(Exception):
7475 pass
7576
7677
78+ # Module-level cache for tiktoken encodings
79+ _TIKTOKEN_ENCODING_CACHE : dict [str , Any ] = {}
80+
81+
82+ def _get_tiktoken_encoding_for_model (model : str ) -> Any :
83+ """Get tiktoken encoding for the specified model with caching.
84+
85+ Args:
86+ model: The model name to get encoding for.
87+
88+ Returns:
89+ The tiktoken encoding for the model.
90+ """
91+ if model not in _TIKTOKEN_ENCODING_CACHE :
92+ try :
93+ _TIKTOKEN_ENCODING_CACHE [model ] = tiktoken .encoding_for_model (model )
94+ except KeyError :
95+ logger .debug (
96+ f"Encoding for model '{ model } ' not found, falling back to cl100k_base"
97+ )
98+ _TIKTOKEN_ENCODING_CACHE [model ] = tiktoken .get_encoding ("cl100k_base" )
99+ return _TIKTOKEN_ENCODING_CACHE [model ]
100+
101+
77102def create_openai_async_client (
78103 api_key : str | None = None ,
79104 base_url : str | None = None ,
@@ -695,15 +720,17 @@ async def openai_embed(
695720 base_url : str | None = None ,
696721 api_key : str | None = None ,
697722 embedding_dim : int | None = None ,
723+ max_token_size : int | None = None ,
698724 client_configs : dict [str , Any ] | None = None ,
699725 token_tracker : Any | None = None ,
700726 use_azure : bool = False ,
701727 azure_deployment : str | None = None ,
702728 api_version : str | None = None ,
703729) -> np .ndarray :
704- """Generate embeddings for a list of texts using OpenAI's API.
730+ """Generate embeddings for a list of texts using OpenAI's API with automatic text truncation .
705731
706- This function supports both standard OpenAI and Azure OpenAI services.
732+ This function supports both standard OpenAI and Azure OpenAI services. It automatically
733+ truncates texts that exceed the model's token limit to prevent API errors.
707734
708735 Args:
709736 texts: List of texts to embed.
@@ -719,6 +746,10 @@ async def openai_embed(
719746 The dimension is controlled by the @wrap_embedding_func_with_attrs decorator.
720747 Manually passing a different value will trigger a warning and be ignored.
721748 When provided (by EmbeddingFunc), it will be passed to the OpenAI API for dimension reduction.
749+ max_token_size: Maximum tokens per text. Texts exceeding this limit will be truncated.
750+ **IMPORTANT**: This parameter is automatically injected by the EmbeddingFunc wrapper
751+ when the underlying function signature supports it (via inspect.signature check).
752+ The value is controlled by the @wrap_embedding_func_with_attrs decorator.
722753 client_configs: Additional configuration options for the AsyncOpenAI/AsyncAzureOpenAI client.
723754 These will override any default configurations but will be overridden by
724755 explicit parameters (api_key, base_url). Supports proxy configuration,
@@ -740,6 +771,35 @@ async def openai_embed(
740771 RateLimitError: If the OpenAI API rate limit is exceeded.
741772 APITimeoutError: If the OpenAI API request times out.
742773 """
774+ # Apply text truncation if max_token_size is provided
775+ if max_token_size is not None and max_token_size > 0 :
776+ encoding = _get_tiktoken_encoding_for_model (model )
777+ truncated_texts = []
778+ truncation_count = 0
779+
780+ for text in texts :
781+ if not text :
782+ truncated_texts .append (text )
783+ continue
784+
785+ tokens = encoding .encode (text )
786+ if len (tokens ) > max_token_size :
787+ truncated_tokens = tokens [:max_token_size ]
788+ truncated_texts .append (encoding .decode (truncated_tokens ))
789+ truncation_count += 1
790+ logger .debug (
791+ f"Text truncated from { len (tokens )} to { max_token_size } tokens"
792+ )
793+ else :
794+ truncated_texts .append (text )
795+
796+ if truncation_count > 0 :
797+ logger .info (
798+ f"Truncated { truncation_count } /{ len (texts )} texts to fit token limit ({ max_token_size } )"
799+ )
800+
801+ texts = truncated_texts
802+
743803 # Create the OpenAI client (supports both OpenAI and Azure)
744804 openai_async_client = create_openai_async_client (
745805 api_key = api_key ,
0 commit comments