Azure
diff --git a/‎sdk/cosmos/azure-cosmos/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎sdk/cosmos/azure-cosmos/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py‎
Lines changed: 5 additions & 1 deletion b/‎sdk/cosmos/azure-cosmos/azure/cosmos/_constants.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py‎
Lines changed: 9 additions & 0 deletions b/‎sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎sdk/cosmos/azure-cosmos/azure/cosmos/_inference_auth_policy.py‎
Lines changed: 71 additions & 0 deletions b/‎sdk/cosmos/azure-cosmos/azure/cosmos/_inference_auth_policy.py‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎sdk/cosmos/azure-cosmos/azure/cosmos/_inference_service.py‎
Lines changed: 220 additions & 0 deletions b/‎sdk/cosmos/azure-cosmos/azure/cosmos/_inference_service.py‎
Lines changed: 220 additions & 0 deletions
@@ -4,6 +4,7 @@
 
 #### Features Added
 * Added ability to return a tuple of a DatabaseProxy/ContainerProxy with the associated database/container properties when creating or reading databases/containers through `return_properties` parameter. See [PR 41742](https://github.com/Azure/azure-sdk-for-python/pull/41742)
+* Added a new API for Semantic Reranking. See [PR 42991](https://github.com/Azure/azure-sdk-for-python/pull/42991)
 #### Breaking Changes
 
 #### Bugs Fixed
 
@@ -26,6 +26,8 @@
 from typing import Dict
 from typing_extensions import Literal
 
+# cspell:ignore reranker
+
 
 class _Constants:
     """Constants used in the azure-cosmos package"""
@@ -53,10 +55,12 @@ class _Constants:
     MAX_ITEM_BUFFER_VS_CONFIG_DEFAULT: int = 50000
     SESSION_TOKEN_FALSE_PROGRESS_MERGE_CONFIG: str = "AZURE_COSMOS_SESSION_TOKEN_FALSE_PROGRESS_MERGE"
     SESSION_TOKEN_FALSE_PROGRESS_MERGE_CONFIG_DEFAULT: str = "True"
-    CIRCUIT_BREAKER_ENABLED_CONFIG: str =  "AZURE_COSMOS_ENABLE_CIRCUIT_BREAKER"
+    CIRCUIT_BREAKER_ENABLED_CONFIG: str = "AZURE_COSMOS_ENABLE_CIRCUIT_BREAKER"
     CIRCUIT_BREAKER_ENABLED_CONFIG_DEFAULT: str = "False"
     AAD_SCOPE_OVERRIDE: str = "AZURE_COSMOS_AAD_SCOPE_OVERRIDE"
     AAD_DEFAULT_SCOPE: str = "https://cosmos.azure.com/.default"
+    INFERENCE_SERVICE_DEFAULT_SCOPE = "https://dbinference.azure.com/.default"
+    SEMANTIC_RERANKER_INFERENCE_ENDPOINT: str = "AZURE_COSMOS_SEMANTIC_RERANKER_INFERENCE_ENDPOINT"
 
     # Database Account Retry Policy constants
     AZURE_COSMOS_HEALTH_CHECK_MAX_RETRIES: str = "AZURE_COSMOS_HEALTH_CHECK_MAX_RETRIES"
 
@@ -71,6 +71,7 @@
 from ._request_object import RequestObject
 from ._retry_utility import ConnectionRetryPolicy
 from ._routing import routing_map_provider, routing_range
+from ._inference_service import _InferenceService
 from .documents import ConnectionPolicy, DatabaseAccount
 from .partition_key import (
     _Undefined,
@@ -236,6 +237,10 @@ def __init__( # pylint: disable=too-many-statements
             policies=policies
         )
 
+        self._inference_service: Optional[_InferenceService] = None
+        if self.aad_credentials:
+            self._inference_service = _InferenceService(self)
+
         # Query compatibility mode.
         # Allows to specify compatibility mode used by client when making query requests. Should be removed when
         # application/sql is no longer supported.
@@ -302,6 +307,10 @@ def _set_client_consistency_level(
         else:
             self.session = None
 
+    def _get_inference_service(self) -> Optional[_InferenceService]:
+        """Get inference service instance"""
+        return self._inference_service
+
     @property
     def Session(self) -> Optional[_session.Session]:
         """Gets the session object from the client.
 
@@ -0,0 +1,71 @@
+# The MIT License (MIT)
+# Copyright (c) 2014 Microsoft Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+from typing import TypeVar, Any, MutableMapping, cast
+
+from azure.core.pipeline import PipelineRequest
+from azure.core.pipeline.policies import BearerTokenCredentialPolicy
+from azure.core.pipeline.transport import HttpRequest as LegacyHttpRequest
+from azure.core.rest import HttpRequest
+from azure.core.credentials import AccessToken
+
+HTTPRequestType = TypeVar("HTTPRequestType", HttpRequest, LegacyHttpRequest)
+
+
+class InferenceServiceBearerTokenPolicy(BearerTokenCredentialPolicy):
+    """Bearer token authentication policy for inference service.
+
+    This policy preserves the standard JWT Bearer token format required by
+    external inference services, unlike CosmosBearerTokenCredentialPolicy which
+    modifies tokens for Cosmos DB authentication.
+    """
+
+    @staticmethod
+    def _update_headers(headers: MutableMapping[str, str], token: str) -> None:
+        """Updates the Authorization header with the standard-bearer token format.
+
+        :param MutableMapping[str, str] headers: The HTTP Request headers
+        :param str token: The OAuth token.
+        """
+        headers["Authorization"] = f"Bearer {token}"
+
+    def on_request(self, request: PipelineRequest[HTTPRequestType]) -> None:
+        """Called before the policy sends a request.
+
+        The base implementation authorizes the request with a bearer token.
+
+        :param ~azure.core.pipeline.PipelineRequest request: the request
+        """
+        super().on_request(request)
+        # The None-check for self._token is done in the parent on_request
+        self._update_headers(request.http_request.headers, cast(AccessToken, self._token).token)
+
+    def authorize_request(self, request: PipelineRequest[HTTPRequestType], *scopes: str, **kwargs: Any) -> None:
+        """Acquire a token from the credential and authorize the request with it.
+
+        Keyword arguments are passed to the credential's get_token method. The token will be cached and used to
+        authorize future requests.
+
+        :param ~azure.core.pipeline.PipelineRequest request: the request
+        :param str scopes: required scopes of authentication
+        """
+        super().authorize_request(request, *scopes, **kwargs)
+        # The None-check for self._token is done in the parent authorize_request
+        self._update_headers(request.http_request.headers, cast(AccessToken, self._token).token)
@@ -0,0 +1,220 @@
+# The MIT License (MIT)
+# Copyright (c) 2014 Microsoft Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import json
+import os
+import urllib
+from typing import Any, cast, Dict, List, Optional
+from urllib3.util.retry import Retry
+
+from azure.core import PipelineClient
+from azure.core.exceptions import DecodeError
+from azure.core.pipeline.policies import (ContentDecodePolicy, CustomHookPolicy, DistributedTracingPolicy,
+                                          HeadersPolicy, HTTPPolicy, NetworkTraceLoggingPolicy, ProxyPolicy,
+                                          UserAgentPolicy)
+from azure.core.pipeline.transport import HttpRequest
+from azure.core.utils import CaseInsensitiveDict
+
+from . import exceptions
+from ._constants import _Constants as Constants
+from ._cosmos_http_logging_policy import CosmosHttpLoggingPolicy
+from ._cosmos_responses import CosmosDict
+from ._inference_auth_policy import InferenceServiceBearerTokenPolicy
+from ._retry_utility import ConnectionRetryPolicy
+from .http_constants import HttpHeaders
+
+
+# cspell:ignore rerank reranker reranking
+# pylint: disable=protected-access,line-too-long
+
+
+class _InferenceService:
+    """Internal client for inference service."""
+
+    TOTAL_RETRIES = 3
+    RETRY_BACKOFF_MAX = 120  # seconds
+    RETRY_AFTER_STATUS_CODES = frozenset([429, 500])
+    RETRY_BACKOFF_FACTOR = 0.8
+    inference_service_default_scope = Constants.INFERENCE_SERVICE_DEFAULT_SCOPE
+    semantic_reranking_inference_endpoint = os.environ.get(Constants.SEMANTIC_RERANKER_INFERENCE_ENDPOINT)
+
+    def __init__(self, cosmos_client_connection):
+        """Initialize inference service with credentials and endpoint information.
+
+        :param cosmos_client_connection: Optional reference to cosmos client connection for accessing settings
+        :type cosmos_client_connection: Optional[CosmosClientConnection]
+        """
+        self._client_connection = cosmos_client_connection
+        self._aad_credentials = self._client_connection.aad_credentials
+        self._token_scope = self.inference_service_default_scope
+
+        self._inference_endpoint = f"{self.semantic_reranking_inference_endpoint}/inference/semanticReranking"
+        self._inference_pipeline_client = self._create_inference_pipeline_client()
+
+    def _create_inference_pipeline_client(self) -> PipelineClient:
+        """Create a pipeline for inference requests.
+
+        :returns: A PipelineClient configured for inference calls.
+        :rtype: ~azure.core.PipelineClient
+        """
+        access_token = self._aad_credentials
+        auth_policy = InferenceServiceBearerTokenPolicy(access_token, self._token_scope)
+
+        connection_policy = self._client_connection.connection_policy
+        retry_policy = None
+        if isinstance(connection_policy.ConnectionRetryConfiguration, HTTPPolicy):
+            retry_policy = ConnectionRetryPolicy(
+                retry_total=getattr(connection_policy.ConnectionRetryConfiguration, 'retry_total',
+                                    self.TOTAL_RETRIES),
+                retry_connect=getattr(connection_policy.ConnectionRetryConfiguration, 'retry_connect', None),
+                retry_read=getattr(connection_policy.ConnectionRetryConfiguration, 'retry_read', None),
+                retry_status=getattr(connection_policy.ConnectionRetryConfiguration, 'retry_status', None),
+                retry_backoff_max=getattr(connection_policy.ConnectionRetryConfiguration, 'retry_backoff_max',
+                                          self.RETRY_BACKOFF_MAX),
+                retry_on_status_codes=getattr(connection_policy.ConnectionRetryConfiguration, 'retry_on_status_codes',
+                                              self.RETRY_AFTER_STATUS_CODES),
+                retry_backoff_factor=getattr(connection_policy.ConnectionRetryConfiguration, 'retry_backoff_factor',
+                                             self.RETRY_BACKOFF_FACTOR)
+            )
+        elif isinstance(connection_policy.ConnectionRetryConfiguration, int):
+            retry_policy = ConnectionRetryPolicy(total=connection_policy.ConnectionRetryConfiguration)
+        elif isinstance(connection_policy.ConnectionRetryConfiguration, Retry):
+            # Convert a urllib3 retry policy to a Pipeline policy
+            retry_policy = ConnectionRetryPolicy(
+                retry_total=connection_policy.ConnectionRetryConfiguration.total,
+                retry_connect=connection_policy.ConnectionRetryConfiguration.connect,
+                retry_read=connection_policy.ConnectionRetryConfiguration.read,
+                retry_status=connection_policy.ConnectionRetryConfiguration.status,
+                retry_backoff_max=connection_policy.ConnectionRetryConfiguration.DEFAULT_BACKOFF_MAX,
+                retry_on_status_codes=list(connection_policy.ConnectionRetryConfiguration.status_forcelist),
+                retry_backoff_factor=connection_policy.ConnectionRetryConfiguration.backoff_factor
+            )
+        else:
+            raise TypeError(
+                "Unsupported retry policy. Must be an azure.cosmos.ConnectionRetryPolicy, int, or urllib3.Retry")
+
+        proxies = {}
+        if connection_policy.ProxyConfiguration and connection_policy.ProxyConfiguration.Host:
+            host = connection_policy.ProxyConfiguration.Host
+            url = urllib.parse.urlparse(host)
+            proxy = host if url.port else host + ":" + str(connection_policy.ProxyConfiguration.Port)
+            proxies.update({url.scheme: proxy})
+
+        self._user_agent: str = self._client_connection._user_agent
+        policies = [
+            HeadersPolicy(),
+            ProxyPolicy(proxies=proxies),
+            UserAgentPolicy(base_user_agent=self._user_agent),
+            ContentDecodePolicy(),
+            retry_policy,
+            auth_policy,
+            CustomHookPolicy(),
+            NetworkTraceLoggingPolicy(),
+            DistributedTracingPolicy(),
+            CosmosHttpLoggingPolicy(
+                enable_diagnostics_logging=self._client_connection._enable_diagnostics_logging,
+            ),
+        ]
+
+        return PipelineClient(
+            base_url=self._inference_endpoint,
+            policies=policies
+        )
+
+    def rerank(
+        self,
+        reranking_context: str,
+        documents: List[str],
+        semantic_reranking_options: Optional[Dict[str, Any]] = None,
+    ) -> CosmosDict:
+        """Rerank documents using the semantic reranking service.
+
+        :param reranking_context: Query / context string used to score documents.
+        :type reranking_context: str
+        :param documents: List of document strings to rerank.
+        :type documents: List[str]
+        :param semantic_reranking_options: Optional dictionary of tuning parameters. Supported keys:
+            * return_documents (bool): Include original document text in results. Default True.
+            * top_k (int): Limit number of scored documents returned.
+            * batch_size (int): Batch size for internal scoring operations.
+            * sort (bool): If True (default) results are ordered by descending score.
+        :type semantic_reranking_options: Optional[Dict[str, Any]]
+        :returns: Reranking result payload.
+        :rtype: ~azure.cosmos.CosmosDict[str, Any]
+        :raises ~azure.cosmos.exceptions.CosmosHttpResponseError: On HTTP or service error.
+        """
+        try:
+            body = {
+                "query": reranking_context,
+                "documents": documents,
+            }
+
+            if semantic_reranking_options:
+                if "return_documents" in semantic_reranking_options:
+                    body["return_documents"] = semantic_reranking_options["return_documents"]
+                if "top_k" in semantic_reranking_options:
+                    body["top_k"] = semantic_reranking_options["top_k"]
+                if "batch_size" in semantic_reranking_options:
+                    body["batch_size"] = semantic_reranking_options["batch_size"]
+                if "sort" in semantic_reranking_options:
+                    body["sort"] = semantic_reranking_options["sort"]
+
+            headers = {
+                HttpHeaders.ContentType: "application/json"
+            }
+
+            request = HttpRequest(
+                method="POST",
+                url=self._inference_endpoint,
+                headers=headers,
+                data=json.dumps(body, separators=(",", ":"))
+            )
+
+            pipeline_response = self._inference_pipeline_client._pipeline.run(request)
+            response = pipeline_response.http_response
+            response_headers = cast(CaseInsensitiveDict, response.headers)
+
+            data = response.body()
+            if data:
+                data = data.decode("utf-8")
+
+            if response.status_code >= 400:
+                raise exceptions.CosmosHttpResponseError(message=data, response=response)
+
+            result = None
+            if data:
+                try:
+                    result = json.loads(data)
+                except Exception as e:
+                    raise DecodeError(
+                        message="Failed to decode JSON data: {}".format(e),
+                        response=response,
+                        error=e) from e
+
+            return CosmosDict(result, response_headers=response_headers)
+
+        except Exception as e:
+            if isinstance(e, (exceptions.CosmosHttpResponseError, exceptions.CosmosResourceNotFoundError)):
+                raise
+            raise exceptions.CosmosHttpResponseError(
+                message=f"Semantic reranking failed: {str(e)}",
+                response=None
+            ) from e