Add "api-key" HTTP request header when using AzureKeyCredential (Azure#37888)

dargilco · web-flow · commit 9b589f5df252 · 2024-10-15T14:19:39.000-07:00
diff --git a/sdk/ai/azure-ai-inference/CHANGELOG.md b/sdk/ai/azure-ai-inference/CHANGELOG.md
@@ -4,7 +4,8 @@
 
 ### Features Added
 
-* Support for tracing. Please find more information in the package [README.md](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/README.md).
+* Support for OpenTelemetry tracing. Please find more information in the package [README.md](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/README.md).
+* When constructing clients using input `credential` of type `AzureKeyCredential`, two HTTP request headers are sent simultaneously for authentication: `Authentication: Beater <key>` and `api-key: <key>` (previously only the first one was sent). This is to support different inference services, removing the need for the application to explicitly specify an additional HTTP request header.
 
 ### Breaking Changes
 
diff --git a/sdk/ai/azure-ai-inference/README.md b/sdk/ai/azure-ai-inference/README.md
@@ -41,7 +41,7 @@ Studio.
   * An [OpenAI Model from the catalog](https://oai.azure.com/resource/models) deployed through Azure OpenAI Studio.
   * The endpoint URL of your model, in the form `https://<your-resouce-name>.openai.azure.com/openai/deployments/<your-deployment-name>`, where `your-resource-name` is your globally unique AOAI resource name, and `your-deployment-name` is your AI Model deployment name.
   * Depending on your authentication preference, you either need an API key to authenticate against the service, or Entra ID credentials. The API key is a 32-character string.
-  * An api-version. Latest preview or GA version listed in the `Data plane - inference` row in [the API Specs table](https://learn.microsoft.com/azure/ai-services/openai/reference#api-specs). At the time of writing, latest GA version was "2024-06-01".
+  * An api-version. Latest preview or GA version listed in the `Data plane - inference` row in [the API Specs table](https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions). At the time of writing, latest GA version was "2024-06-01".
 
 ### Install the package
 
@@ -60,11 +60,9 @@ pip install --upgrade azure-ai-inference
 If you want to install Azure AI Inferencing package with support for OpenTelemetry based tracing, use the following command:
 
 ```bash
-pip install azure-ai-inference[trace]
+pip install azure-ai-inference[opentelemetry]
 ```
 
-
-
 ## Key concepts
 
 ### Create and authenticate a client directly, using API key or GitHub token
@@ -91,9 +89,8 @@ client = ChatCompletionsClient(
 # For Azure OpenAI endpoint
 client = ChatCompletionsClient(
     endpoint=endpoint,  # Of the form https://<your-resouce-name>.openai.azure.com/openai/deployments/<your-deployment-name>
-    credential=AzureKeyCredential(""),  # Pass in an empty value.
-    headers={"api-key": key},
-    api_version="2024-06-01",  # AOAI api-version. Update as needed.
+    credential=AzureKeyCredential(key),
+    api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
 )
 ```
 
@@ -146,7 +143,7 @@ client = ChatCompletionsClient(
     endpoint=endpoint,
     credential=DefaultAzureCredential(exclude_interactive_browser_credential=False),
     credential_scopes=["https://cognitiveservices.azure.com/.default"],
-    api_version="2024-06-01",  # AOAI api-version. Update as needed.
+    api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
 )
 ```
 
diff --git a/sdk/ai/azure-ai-inference/assets.json b/sdk/ai/azure-ai-inference/assets.json
@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "python",
   "TagPrefix": "python/ai/azure-ai-inference",
-  "Tag": "python/ai/azure-ai-inference_19a0adafc6"
+  "Tag": "python/ai/azure-ai-inference_3934744053"
 }
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
@@ -14,7 +14,8 @@
 4. Add support for get_model_info, while caching the result (all clients)
 5. Add support for chat completion streaming (ChatCompletionsClient client only)
 6. Add support for friendly print of result objects (__str__ method) (all clients)
-7. Add support for load() method in ImageUrl class (see /models/_patch.py).
+7. Add support for load() method in ImageUrl class (see /models/_patch.py)
+8. Add support for sending two auth headers for api-key auth (all clients)
 
 """
 import json
@@ -245,8 +246,22 @@ def __init__(
         self._model = model
         self._model_extras = model_extras
 
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
+
         super().__init__(endpoint, credential, **kwargs)
 
+
     @overload
     def complete(
         self,
@@ -724,6 +739,19 @@ def __init__(
         self._model = model
         self._model_extras = model_extras
 
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
+
         super().__init__(endpoint, credential, **kwargs)
 
     @overload
@@ -1007,6 +1035,19 @@ def __init__(
         self._model = model
         self._model_extras = model_extras
 
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
+
         super().__init__(endpoint, credential, **kwargs)
 
     @overload
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
@@ -229,6 +229,19 @@ def __init__(
         self._model = model
         self._model_extras = model_extras
 
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
+
         super().__init__(endpoint, credential, **kwargs)
 
     @overload
@@ -707,6 +720,19 @@ def __init__(
         self._model = model
         self._model_extras = model_extras
 
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
+
         super().__init__(endpoint, credential, **kwargs)
 
     @overload
@@ -990,6 +1016,19 @@ def __init__(
         self._model = model
         self._model_extras = model_extras
 
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
+
         super().__init__(endpoint, credential, **kwargs)
 
     @overload
diff --git a/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py b/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py
@@ -14,7 +14,7 @@
        Entra ID authentication.
     2. Update `api_version` (the AOAI REST API version) as needed.
        See the "Data plane - inference" row in the table here for latest AOAI api-version:
-       https://learn.microsoft.com/azure/ai-services/openai/reference#api-specs
+       https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
     3. Set one or two environment variables, depending on your authentication method:
         * AZURE_OPENAI_CHAT_ENDPOINT - Your AOAI endpoint URL, with partial path, in the form
             https://<your-unique-resouce-name>.openai.azure.com/openai/deployments/<your-deployment-name>
@@ -55,9 +55,8 @@ async def sample_chat_completions_streaming_azure_openai_async():
 
         client = ChatCompletionsClient(
             endpoint=endpoint,
-            credential=AzureKeyCredential(""),  # Pass in an empty value.
-            headers={"api-key": key},
-            api_version="2024-06-01",  # AOAI api-version. Update as needed.
+            credential=AzureKeyCredential(key),
+            api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
         )
 
     else:  # Entra ID authentication
@@ -67,7 +66,7 @@ async def sample_chat_completions_streaming_azure_openai_async():
             endpoint=endpoint,
             credential=DefaultAzureCredential(exclude_interactive_browser_credential=False),
             credential_scopes=["https://cognitiveservices.azure.com/.default"],
-            api_version="2024-06-01",  # AOAI api-version. Update as needed.
+            api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
         )
 
     response = await client.complete(
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_azure_openai.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_azure_openai.py
@@ -14,7 +14,7 @@
        Entra ID authentication.
     2. Update `api_version` (the AOAI REST API version) as needed.
        See the "Data plane - inference" row in the table here for latest AOAI api-version:
-       https://learn.microsoft.com/azure/ai-services/openai/reference#api-specs
+       https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
     3. Set one or two environment variables, depending on your authentication method:
         * AZURE_OPENAI_CHAT_ENDPOINT - Your AOAI endpoint URL, with partial path, in the form
             https://<your-unique-resouce-name>.openai.azure.com/openai/deployments/<your-deployment-name>
@@ -54,9 +54,8 @@ def sample_chat_completions_azure_openai():
 
         client = ChatCompletionsClient(
             endpoint=endpoint,
-            credential=AzureKeyCredential(""),  # Pass in an empty value.
-            headers={"api-key": key},
-            api_version="2024-06-01",  # AOAI api-version. Update as needed.
+            credential=AzureKeyCredential(key),
+            api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
         )
 
     else:  # Entra ID authentication
@@ -66,7 +65,7 @@ def sample_chat_completions_azure_openai():
             endpoint=endpoint,
             credential=DefaultAzureCredential(exclude_interactive_browser_credential=False),
             credential_scopes=["https://cognitiveservices.azure.com/.default"],
-            api_version="2024-06-01",  # AOAI api-version. Update as needed.
+            api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions 
         )
 
     response = client.complete(
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_tools.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_tools.py
@@ -112,9 +112,8 @@ def get_flight_info(origin_city: str, destination_city: str):
         # Create a chat completion client for Azure OpenAI endpoint
         client = ChatCompletionsClient(
             endpoint=endpoint,
-            credential=AzureKeyCredential(""),  # Pass in an empty value
-            headers={"api-key": key},
-            api_version="2024-06-01",  # AOAI api-version. Update as needed.
+            credential=AzureKeyCredential(key),
+            api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
         )
     else:
         # Create a chat completions client for Serverless API endpoint or Managed Compute endpoint
diff --git a/sdk/ai/azure-ai-inference/samples/sample_embeddings_azure_openai.py b/sdk/ai/azure-ai-inference/samples/sample_embeddings_azure_openai.py
@@ -14,7 +14,7 @@
        Entra ID authentication.
     2. Update `api_version` (the AOAI REST API version) as needed.
        See the "Data plane - inference" row in the table here for latest AOAI api-version:
-       https://learn.microsoft.com/azure/ai-services/openai/reference#api-specs
+       https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
     3. Set one or two environment variables, depending on your authentication method:
         * AZURE_OPENAI_EMBEDDINGS_ENDPOINT - Your AOAI endpoint URL, with partial path, in the form 
             https://<your-unique-resouce-name>.openai.azure.com/openai/deployments/<your-deployment-name>
@@ -53,9 +53,8 @@ def sample_embeddings_azure_openai():
 
         client = EmbeddingsClient(
             endpoint=endpoint,
-            credential=AzureKeyCredential(""),  # Pass in an empty value.
-            headers={"api-key": key},
-            api_version="2024-06-01",  # AOAI api-version. Update as needed.
+            credential=AzureKeyCredential(key),
+            api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
         )
 
     else:  # Entra ID authentication
@@ -65,7 +64,7 @@ def sample_embeddings_azure_openai():
             endpoint=endpoint,
             credential=DefaultAzureCredential(exclude_interactive_browser_credential=False),
             credential_scopes=["https://cognitiveservices.azure.com/.default"],
-            api_version="2024-06-01",  # AOAI api-version. Update as needed.
+            api_version="2024-06-01",  # Azure OpenAI api-version. See https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
         )
 
     response = client.embed(input=["first phrase", "second phrase", "third phrase"])
diff --git a/sdk/ai/azure-ai-inference/setup.py b/sdk/ai/azure-ai-inference/setup.py
@@ -35,7 +35,7 @@
     license="MIT License",
     author="Microsoft Corporation",
     author_email="azpysdkhelp@microsoft.com",
-    url="https://github.com/Azure/azure-sdk-for-python/tree/main/sdk",
+    url="https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/ai/azure-ai-inference",
     keywords="azure, azure sdk",
     classifiers=[
         "Development Status :: 4 - Beta",
@@ -68,7 +68,7 @@
         "typing-extensions>=4.6.0",
     ],
     python_requires=">=3.8",
-    extras_require={  
-        'trace': ['azure-core-tracing-opentelemetry']  
+    extras_require={
+        'opentelemetry': ['azure-core-tracing-opentelemetry']  
     }
 )
diff --git a/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py b/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py
@@ -49,13 +49,13 @@
 # hosted on Azure OpenAI (AOAI) endpoint.
 # TODO: When we have a MaaS model that supports chat completions with image input,
 # use that instead.
-# AZURE_OPENAI_CHAT_ENDPOINT=https://<endpont-name>.openai.azure.com/openai/deployments/gpt-4o
+# AZURE_OPENAI_CHAT_ENDPOINT=https://<endpont-name>.openai.azure.com/openai/deployments/gpt-4o-0806
 # AZURE_OPENAI_CHAT_KEY=<32-char-api-key>
 #
 ServicePreparerAOAIChatCompletions = functools.partial(
     EnvironmentVariableLoader,
     "azure_openai_chat",
-    azure_openai_chat_endpoint="https://your-deployment-name.openai.azure.com/openai/deployments/gpt-4o",
+    azure_openai_chat_endpoint="https://your-deployment-name.openai.azure.com/openai/deployments/gpt-4o-0806",
     azure_openai_chat_key="00000000000000000000000000000000",
 )
 
@@ -146,7 +146,7 @@ def _load_chat_credentials(self, *, bad_key: bool, **kwargs):
         return endpoint, credential
 
     # See the "Data plane - inference" row in the table here for latest AOAI api-version:
-    # https://learn.microsoft.com/azure/ai-services/openai/reference#api-specs
+    # https://aka.ms/azsdk/azure-ai-inference/azure-openai-api-versions
     def _load_aoai_chat_credentials(self, *, key_auth: bool, bad_key: bool, **kwargs):
         endpoint = kwargs.pop("azure_openai_chat_endpoint")
         if key_auth:
@@ -158,7 +158,7 @@ def _load_aoai_chat_credentials(self, *, key_auth: bool, bad_key: bool, **kwargs
             credential = self.get_credential(sdk.ChatCompletionsClient, is_async=False)
             credential_scopes: list[str] = ["https://cognitiveservices.azure.com/.default"]
             headers = {}
-        api_version = "2024-06-01"
+        api_version = "2024-08-01-preview"
         return endpoint, credential, credential_scopes, headers, api_version
 
     def _load_embeddings_credentials(self, *, bad_key: bool, **kwargs):
@@ -258,6 +258,7 @@ def _validate_embeddings_json_request_payload(self) -> None:
         assert "MyAppId azsdk-python-ai-inference/" in headers["User-Agent"]
         assert " Python/" in headers["User-Agent"]
         assert headers["Authorization"] == "Bearer key-value"
+        assert headers["api-key"] == "key-value"
         assert self.pipeline_request.http_request.data == self.EMBEDDINGDS_JSON_REQUEST_PAYLOAD
 
     def _validate_chat_completions_json_request_payload(self) -> None:
@@ -272,6 +273,7 @@ def _validate_chat_completions_json_request_payload(self) -> None:
         assert "MyAppId azsdk-python-ai-inference/" in headers["User-Agent"]
         assert " Python/" in headers["User-Agent"]
         assert headers["Authorization"] == "Bearer key-value"
+        assert headers["api-key"] == "key-value"
         assert self.pipeline_request.http_request.data == self.CHAT_COMPLETIONS_JSON_REQUEST_PAYLOAD
 
     @staticmethod

Original file line number	Diff line number	Diff line change
`@@ -2,5 +2,5 @@`
`2`	`2`	`"AssetsRepo": "Azure/azure-sdk-assets",`
`3`	`3`	`"AssetsRepoPrefixPath": "python",`
`4`	`4`	`"TagPrefix": "python/ai/azure-ai-inference",`
`5`		`- "Tag": "python/ai/azure-ai-inference_19a0adafc6"`
	`5`	`+ "Tag": "python/ai/azure-ai-inference_3934744053"`
`6`	`6`	`}`