Python: Fix Azure AI Inference connector model_extras duplication (#13066)

TaoChenOSU · web-flow · commit d7db35dc47ce · 2025-09-04T01:27:19.000Z
### Motivation and Context  Addresses #13005 ### Description  1. Remove the `extra_parameters` key from `AzureAIInferenceChatPromptExecutionSettings` before calling the chat completion API to avoid unknown parameter error. 2. Allow setting the API version on the `AzureAIInferenceXXX` connectors. ### Contribution Checklist  - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄
diff --git a/python/samples/concepts/reasoning/simple_reasoning_azure_ai_inference.py b/python/samples/concepts/reasoning/simple_reasoning_azure_ai_inference.py
@@ -0,0 +1,87 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from semantic_kernel.connectors.ai.azure_ai_inference import (
+    AzureAIInferenceChatCompletion,
+    AzureAIInferenceChatPromptExecutionSettings,
+)
+from semantic_kernel.contents import ChatHistory
+
+"""
+This sample demonstrates an example of how to use reasoning models using the Azure AI Inference service.
+"""
+
+chat_service = AzureAIInferenceChatCompletion(
+    ai_model_id="gpt-5-mini",
+    # You must specify the endpoint and api_key or configure them via environment variables:
+    # AZURE_AI_INFERENCE_ENDPOINT
+    # AZURE_AI_INFERENCE_API_KEY
+    endpoint="...",
+    api_key="...",
+)
+request_settings = AzureAIInferenceChatPromptExecutionSettings(
+    extra_parameters={
+        "reasoning_effort": "medium",
+        "verbosity": "medium",
+    },
+)
+
+# Create a ChatHistory object
+chat_history = ChatHistory()
+
+# This is the system message that gives the chatbot its personality.
+developer_message = """
+As an assistant supporting the user,
+you recognize all user input
+as questions or consultations and answer them.
+"""
+# The developer message was newly introduced for reasoning models such as OpenAI’s o1 and o1-mini.
+# `system message` cannot be used with reasoning models.
+chat_history.add_developer_message(developer_message)
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except KeyboardInterrupt:
+        print("\n\nExiting chat...")
+        return False
+    except EOFError:
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    chat_history.add_user_message(user_input)
+
+    # Get the chat message content from the chat completion service.
+    response = await chat_service.get_chat_message_content(
+        chat_history=chat_history,
+        settings=request_settings,
+    )
+    if response:
+        print(f"Reasoning model:> {response}")
+
+        # Add the chat message to the chat history to keep track of the conversation.
+        chat_history.add_message(response)
+
+    return True
+
+
+async def main() -> None:
+    # Start the chat loop. The chat loop will continue until the user types "exit".
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+    # Sample output:
+    # User:> Why is the sky blue in one sentence?
+    # Mosscap:> The sky appears blue because air molecules in the atmosphere scatter shorter-wavelength (blue)
+    #           light more efficiently than longer-wavelength (red) light.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/azure_ai_inference_settings.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/azure_ai_inference_settings.py
@@ -4,6 +4,7 @@
 
 from pydantic import SecretStr
 
+from semantic_kernel.connectors.ai.open_ai.const import DEFAULT_AZURE_API_VERSION
 from semantic_kernel.kernel_pydantic import HttpsUrl, KernelBaseSettings
 from semantic_kernel.utils.feature_stage_decorator import experimental
 
@@ -29,9 +30,12 @@ class AzureAIInferenceSettings(KernelBaseSettings):
                 This value can be found in the Keys & Endpoint section when examining
                 your resource from the Azure portal. You can use either KEY1 or KEY2.
                 (Env var AZURE_AI_INFERENCE_API_KEY)
+    - api_version: str | None - The API version to use. The default value is "2024-10-21".
+                (Env var AZURE_AI_INFERENCE_API_VERSION)
     """
 
     env_prefix: ClassVar[str] = "AZURE_AI_INFERENCE_"
 
     endpoint: HttpsUrl
     api_key: SecretStr | None = None
+    api_version: str = DEFAULT_AZURE_API_VERSION
diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py
@@ -47,6 +47,7 @@ def __init__(
         client_type: AzureAIInferenceClientType,
         api_key: str | None = None,
         endpoint: str | None = None,
+        api_version: str | None = None,
         env_file_path: str | None = None,
         env_file_encoding: str | None = None,
         client: ChatCompletionsClient | EmbeddingsClient | None = None,
@@ -60,11 +61,13 @@ def __init__(
         The following environment variables are used:
         - AZURE_AI_INFERENCE_API_KEY
         - AZURE_AI_INFERENCE_ENDPOINT
+        - AZURE_AI_INFERENCE_API_VERSION
 
         Args:
             client_type (AzureAIInferenceClientType): The client type to use.
             api_key (str | None): The API key for the Azure AI Inference service deployment. (Optional)
             endpoint (str | None): The endpoint of the Azure AI Inference service deployment. (Optional)
+            api_version (str | None): The API version to use. (Optional)
             env_file_path (str | None): The path to the environment file. (Optional)
             env_file_encoding (str | None): The encoding of the environment file. (Optional)
             client (ChatCompletionsClient | None): The Azure AI Inference client to use. (Optional)
@@ -81,6 +84,7 @@ def __init__(
                 azure_ai_inference_settings = AzureAIInferenceSettings(
                     api_key=api_key,
                     endpoint=endpoint,
+                    api_version=api_version,
                     env_file_path=env_file_path,
                     env_file_encoding=env_file_encoding,
                 )
@@ -93,6 +97,7 @@ def __init__(
                     endpoint=endpoint,
                     credential=AzureKeyCredential(azure_ai_inference_settings.api_key.get_secret_value()),
                     user_agent=SEMANTIC_KERNEL_USER_AGENT,
+                    api_version=azure_ai_inference_settings.api_version,
                 )
             else:
                 if credential is None:
@@ -102,6 +107,7 @@ def __init__(
                     endpoint=endpoint,
                     credential=credential,
                     user_agent=SEMANTIC_KERNEL_USER_AGENT,
+                    api_version=azure_ai_inference_settings.api_version,
                 )
 
         args: dict[str, Any] = {
diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
@@ -66,6 +66,7 @@ def __init__(
         ai_model_id: str,
         api_key: str | None = None,
         endpoint: str | None = None,
+        api_version: str | None = None,
         service_id: str | None = None,
         env_file_path: str | None = None,
         env_file_encoding: str | None = None,
@@ -78,11 +79,13 @@ def __init__(
         The following environment variables are used:
         - AZURE_AI_INFERENCE_API_KEY
         - AZURE_AI_INFERENCE_ENDPOINT
+        - AZURE_AI_INFERENCE_API_VERSION
 
         Args:
             ai_model_id: (str): A string that is used to identify the model such as the model name. (Required)
             api_key (str | None): The API key for the Azure AI Inference service deployment. (Optional)
             endpoint (str | None): The endpoint of the Azure AI Inference service deployment. (Optional)
+            api_version (str | None): The API version to use. (Optional)
             service_id (str | None): Service ID for the chat completion service. (Optional)
             env_file_path (str | None): The path to the environment file. (Optional)
             env_file_encoding (str | None): The encoding of the environment file. (Optional)
@@ -99,6 +102,7 @@ def __init__(
             "client_type": AzureAIInferenceClientType.ChatCompletions,
             "client": client,
             "endpoint": endpoint,
+            "api_version": api_version,
             "env_file_path": env_file_path,
             "env_file_encoding": env_file_encoding,
         }
@@ -139,6 +143,9 @@ async def _inner_get_chat_message_contents(
         assert isinstance(self.client, ChatCompletionsClient)  # nosec
         with AzureAIInferenceTracing():
             settings_dict = settings.prepare_settings_dict()
+            # Remove the extra parameters since it will be passed in via the `model_extras` param
+            settings_dict.pop("extra_parameters", None)
+
             self._handle_structured_output(settings, settings_dict)
             response: ChatCompletions = await self.client.complete(
                 messages=self._prepare_chat_history_for_request(chat_history),
@@ -165,6 +172,9 @@ async def _inner_get_streaming_chat_message_contents(
         assert isinstance(self.client, ChatCompletionsClient)  # nosec
         with AzureAIInferenceTracing():
             settings_dict = settings.prepare_settings_dict()
+            # Remove the extra parameters since it will be passed in via the `model_extras` param
+            settings_dict.pop("extra_parameters", None)
+
             self._handle_structured_output(settings, settings_dict)
             response: AsyncStreamingChatCompletions = await self.client.complete(
                 stream=True,
diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_text_embedding.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_text_embedding.py
@@ -35,6 +35,7 @@ def __init__(
         ai_model_id: str,
         api_key: str | None = None,
         endpoint: str | None = None,
+        api_version: str | None = None,
         service_id: str | None = None,
         env_file_path: str | None = None,
         env_file_encoding: str | None = None,
@@ -46,11 +47,13 @@ def __init__(
         The following environment variables are used:
         - AZURE_AI_INFERENCE_API_KEY
         - AZURE_AI_INFERENCE_ENDPOINT
+        - AZURE_AI_INFERENCE_API_VERSION
 
         Args:
             ai_model_id: (str): A string that is used to identify the model such as the model name. (Required)
             api_key (str | None): The API key for the Azure AI Inference service deployment. (Optional)
             endpoint (str | None): The endpoint of the Azure AI Inference service deployment. (Optional)
+            api_version (str | None): The API version to use. (Optional)
             service_id (str | None): Service ID for the chat completion service. (Optional)
             env_file_path (str | None): The path to the environment file. (Optional)
             env_file_encoding (str | None): The encoding of the environment file. (Optional)
@@ -65,6 +68,7 @@ def __init__(
             client_type=AzureAIInferenceClientType.Embeddings,
             api_key=api_key,
             endpoint=endpoint,
+            api_version=api_version,
             env_file_path=env_file_path,
             env_file_encoding=env_file_encoding,
             client=client,
diff --git a/python/semantic_kernel/connectors/ai/open_ai/settings/azure_open_ai_settings.py b/python/semantic_kernel/connectors/ai/open_ai/settings/azure_open_ai_settings.py
@@ -84,7 +84,7 @@ class AzureOpenAISettings(KernelBaseSettings):
                 your resource from the Azure portal, the endpoint should end in openai.azure.com.
                 If both base_url and endpoint are supplied, base_url will be used.
                 (Env var AZURE_OPENAI_ENDPOINT)
-    - api_version: str | None - The API version to use. The default value is "2024-02-01".
+    - api_version: str | None - The API version to use. The default value is "2024-10-21".
                 (Env var AZURE_OPENAI_API_VERSION)
     - token_endpoint: str - The token endpoint to use to retrieve the authentication token.
                 The default value is "https://cognitiveservices.azure.com/.default".
diff --git a/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py b/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py
@@ -66,6 +66,15 @@ def test_azure_ai_inference_chat_completion_init_with_service_id(
     assert isinstance(azure_ai_inference.client, ChatCompletionsClient)
 
 
+def test_azure_ai_inference_chat_completion_init_with_api_version(azure_ai_inference_unit_test_env, model_id) -> None:
+    """Test initialization of AzureAIInferenceChatCompletion with api_version"""
+    azure_ai_inference = AzureAIInferenceChatCompletion(model_id, api_version="2024-02-15-test")
+
+    assert azure_ai_inference.ai_model_id == model_id
+    assert isinstance(azure_ai_inference.client, ChatCompletionsClient)
+    assert azure_ai_inference.client._config.api_version == "2024-02-15-test"
+
+
 @pytest.mark.parametrize(
     "azure_ai_inference_client",
     [AzureAIInferenceChatCompletion.__name__],
@@ -210,7 +219,6 @@ async def test_azure_ai_inference_chat_completion_with_extra_parameters(
         messages=[UserMessage(content=user_message_content)],
         model=model_id,
         model_extras=settings.extra_parameters,
-        **settings.prepare_settings_dict(),
     )
     assert len(responses) == 1
     assert responses[0].role == "assistant"
@@ -506,7 +514,6 @@ async def test_azure_ai_inference_streaming_chat_completion_with_extra_parameter
         messages=[UserMessage(content=user_message_content)],
         model=model_id,
         model_extras=settings.extra_parameters,
-        **settings.prepare_settings_dict(),
     )
 
 
diff --git a/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_text_embedding.py b/python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_text_embedding.py
@@ -52,6 +52,15 @@ def test_azure_ai_inference_text_embedding_init_with_service_id(
     assert isinstance(azure_ai_inference.client, EmbeddingsClient)
 
 
+def test_azure_ai_inference_text_embedding_init_with_api_version(azure_ai_inference_unit_test_env, model_id) -> None:
+    """Test initialization of AzureAIInferenceTextEmbedding with api_version"""
+    azure_ai_inference = AzureAIInferenceTextEmbedding(model_id, api_version="2024-02-15-test")
+
+    assert azure_ai_inference.ai_model_id == model_id
+    assert isinstance(azure_ai_inference.client, EmbeddingsClient)
+    assert azure_ai_inference.client._config.api_version == "2024-02-15-test"
+
+
 @pytest.mark.parametrize(
     "azure_ai_inference_client",
     [AzureAIInferenceTextEmbedding.__name__],