diff --git a/singlestoredb/ai/chat.py b/singlestoredb/ai/chat.py index 4878c96c..e9e55fca 100644 --- a/singlestoredb/ai/chat.py +++ b/singlestoredb/ai/chat.py @@ -58,11 +58,13 @@ def SingleStoreChatFactory( model_name=model_name, name='', connection_url=base_url, + internal_connection_url=base_url, project_id='', hosting_platform=hosting_platform, ) if base_url is not None: info.connection_url = base_url + info.internal_connection_url = base_url if hosting_platform is not None: info.hosting_platform = hosting_platform @@ -98,7 +100,7 @@ def SingleStoreChatFactory( cfg = Config(**cfg_kwargs) client = boto3.client( 'bedrock-runtime', - endpoint_url=info.connection_url, + endpoint_url=info.internal_connection_url, region_name='us-east-1', aws_access_key_id='placeholder', aws_secret_access_key='placeholder', @@ -138,7 +140,7 @@ def _inject_headers(request: Any, **_ignored: Any) -> None: return ChatBedrockConverse( model_id=model_name, - endpoint_url=info.connection_url, + endpoint_url=info.internal_connection_url, region_name='us-east-1', aws_access_key_id='placeholder', aws_secret_access_key='placeholder', @@ -152,7 +154,7 @@ def _inject_headers(request: Any, **_ignored: Any) -> None: token = api_key if api_key is not None else token_env openai_kwargs = dict( - base_url=info.connection_url, + base_url=info.internal_connection_url, api_key=token, model=model_name, streaming=streaming, diff --git a/singlestoredb/ai/embeddings.py b/singlestoredb/ai/embeddings.py index fe23331c..092dd88b 100644 --- a/singlestoredb/ai/embeddings.py +++ b/singlestoredb/ai/embeddings.py @@ -57,11 +57,13 @@ def SingleStoreEmbeddingsFactory( model_name=model_name, name='', connection_url=base_url, + internal_connection_url=base_url, project_id='', hosting_platform=hosting_platform, ) if base_url is not None: info.connection_url = base_url + info.internal_connection_url = base_url if hosting_platform is not None: info.hosting_platform = hosting_platform @@ -97,7 +99,7 @@ def SingleStoreEmbeddingsFactory( cfg = Config(**cfg_kwargs) client = boto3.client( 'bedrock-runtime', - endpoint_url=info.connection_url, + endpoint_url=info.internal_connection_url, region_name='us-east-1', aws_access_key_id='placeholder', aws_secret_access_key='placeholder', @@ -129,7 +131,7 @@ def _inject_headers(request: Any, **_ignored: Any) -> None: return BedrockEmbeddings( model_id=model_name, - endpoint_url=info.connection_url, + endpoint_url=info.internal_connection_url, region_name='us-east-1', aws_access_key_id='placeholder', aws_secret_access_key='placeholder', @@ -142,7 +144,7 @@ def _inject_headers(request: Any, **_ignored: Any) -> None: token = api_key if api_key is not None else token_env openai_kwargs = dict( - base_url=info.connection_url, + base_url=info.internal_connection_url, api_key=token, model=model_name, ) diff --git a/singlestoredb/management/inference_api.py b/singlestoredb/management/inference_api.py index 404fa1c5..be9568ff 100644 --- a/singlestoredb/management/inference_api.py +++ b/singlestoredb/management/inference_api.py @@ -152,6 +152,7 @@ class InferenceAPIInfo(object): model_name: str name: str connection_url: str + internal_connection_url: str project_id: str hosting_platform: str _manager: Optional['InferenceAPIManager'] @@ -162,12 +163,14 @@ def __init__( model_name: str, name: str, connection_url: str, + internal_connection_url: str, project_id: str, hosting_platform: str, manager: Optional['InferenceAPIManager'] = None, ): self.service_id = service_id self.connection_url = connection_url + self.internal_connection_url = internal_connection_url self.model_name = model_name self.name = name self.project_id = project_id @@ -198,6 +201,7 @@ def from_dict( model_name=obj['modelName'], name=obj['name'], connection_url=obj['connectionURL'], + internal_connection_url=obj['internalConnectionURL'], hosting_platform=obj['hostingPlatform'], ) return out