chore: Update Azure default model to gpt-4.1-mini (#10167)

vblagoje · web-flow · commit adcd4ae3619d · 2025-12-03T14:13:49.000+01:00
* Update Azure default model to gpt-4.1-mini

* Add version

* Version updates

* Experiment

* Update experiment with 2024-12-01-preview

* Fix test

* Add reno note

* Fix typo
diff --git a/haystack/components/generators/azure.py b/haystack/components/generators/azure.py
@@ -40,7 +40,7 @@ class AzureOpenAIGenerator(OpenAIGenerator):
     client = AzureOpenAIGenerator(
         azure_endpoint="<Your Azure endpoint e.g. `https://your-company.azure.openai.com/>",
         api_key=Secret.from_token("<your-api-key>"),
-        azure_deployment="<this a model name, e.g.  gpt-4o-mini>")
+        azure_deployment="<this a model name, e.g.  gpt-4.1-mini>")
     response = client.run("What's Natural Language Processing? Be brief.")
     print(response)
     ```
@@ -49,7 +49,7 @@ class AzureOpenAIGenerator(OpenAIGenerator):
     >> {'replies': ['Natural Language Processing (NLP) is a branch of artificial intelligence that focuses on
     >> the interaction between computers and human language. It involves enabling computers to understand, interpret,
     >> and respond to natural human language in a way that is both meaningful and useful.'], 'meta': [{'model':
-    >> 'gpt-4o-mini', 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 16,
+    >> 'gpt-4.1-mini', 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 16,
     >> 'completion_tokens': 49, 'total_tokens': 65}}]}
     ```
     """
@@ -58,8 +58,8 @@ class AzureOpenAIGenerator(OpenAIGenerator):
     def __init__(  # pylint: disable=too-many-positional-arguments  # noqa: PLR0913
         self,
         azure_endpoint: Optional[str] = None,
-        api_version: Optional[str] = "2023-05-15",
-        azure_deployment: Optional[str] = "gpt-4o-mini",
+        api_version: Optional[str] = "2024-12-01-preview",
+        azure_deployment: Optional[str] = "gpt-4.1-mini",
         api_key: Optional[Secret] = Secret.from_env_var("AZURE_OPENAI_API_KEY", strict=False),
         azure_ad_token: Optional[Secret] = Secret.from_env_var("AZURE_OPENAI_AD_TOKEN", strict=False),
         organization: Optional[str] = None,
@@ -77,7 +77,7 @@ def __init__(  # pylint: disable=too-many-positional-arguments  # noqa: PLR0913
         Initialize the Azure OpenAI Generator.
 
         :param azure_endpoint: The endpoint of the deployed model, for example `https://example-resource.azure.openai.com/`.
-        :param api_version: The version of the API to use. Defaults to 2023-05-15.
+        :param api_version: The version of the API to use. Defaults to 2024-12-01-preview.
         :param azure_deployment: The deployment of the model, usually the model name.
         :param api_key: The API key to use for authentication.
         :param azure_ad_token: [Azure Active Directory token](https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id).
@@ -144,7 +144,7 @@ def __init__(  # pylint: disable=too-many-positional-arguments  # noqa: PLR0913
         self.azure_endpoint = azure_endpoint
         self.azure_deployment = azure_deployment
         self.organization = organization
-        self.model: str = azure_deployment or "gpt-4o-mini"
+        self.model: str = azure_deployment or "gpt-4.1-mini"
         self.timeout = timeout if timeout is not None else float(os.environ.get("OPENAI_TIMEOUT", "30.0"))
         self.max_retries = max_retries if max_retries is not None else int(os.environ.get("OPENAI_MAX_RETRIES", "5"))
         self.http_client_kwargs = http_client_kwargs
diff --git a/haystack/components/generators/chat/azure.py b/haystack/components/generators/chat/azure.py
@@ -53,7 +53,7 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator):
     client = AzureOpenAIChatGenerator(
         azure_endpoint="<Your Azure endpoint e.g. `https://your-company.azure.openai.com/>",
         api_key=Secret.from_token("<your-api-key>"),
-        azure_deployment="<this a model name, e.g. gpt-4o-mini>")
+        azure_deployment="<this a model name, e.g. gpt-4.1-mini>")
     response = client.run(messages)
     print(response)
     ```
@@ -64,7 +64,7 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator):
         "Natural Language Processing (NLP) is a branch of artificial intelligence that focuses on
          enabling computers to understand, interpret, and generate human language in a way that is useful.")],
          _name=None,
-         _meta={'model': 'gpt-4o-mini', 'index': 0, 'finish_reason': 'stop',
+         _meta={'model': 'gpt-4.1-mini', 'index': 0, 'finish_reason': 'stop',
          'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]
     }
     ```
@@ -75,8 +75,8 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator):
     def __init__(  # pylint: disable=too-many-positional-arguments
         self,
         azure_endpoint: Optional[str] = None,
-        api_version: Optional[str] = "2023-05-15",
-        azure_deployment: Optional[str] = "gpt-4o-mini",
+        api_version: Optional[str] = "2024-12-01-preview",
+        azure_deployment: Optional[str] = "gpt-4.1-mini",
         api_key: Optional[Secret] = Secret.from_env_var("AZURE_OPENAI_API_KEY", strict=False),
         azure_ad_token: Optional[Secret] = Secret.from_env_var("AZURE_OPENAI_AD_TOKEN", strict=False),
         organization: Optional[str] = None,
@@ -95,7 +95,7 @@ def __init__(  # pylint: disable=too-many-positional-arguments
         Initialize the Azure OpenAI Chat Generator component.
 
         :param azure_endpoint: The endpoint of the deployed model, for example `"https://example-resource.azure.openai.com/"`.
-        :param api_version: The version of the API to use. Defaults to 2023-05-15.
+        :param api_version: The version of the API to use. Defaults to 2024-12-01-preview.
         :param azure_deployment: The deployment of the model, usually the model name.
         :param api_key: The API key to use for authentication.
         :param azure_ad_token: [Azure Active Directory token](https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id).
@@ -173,7 +173,7 @@ def __init__(  # pylint: disable=too-many-positional-arguments
         self.azure_endpoint = azure_endpoint
         self.azure_deployment = azure_deployment
         self.organization = organization
-        self.model = azure_deployment or "gpt-4o-mini"
+        self.model = azure_deployment or "gpt-4.1-mini"
         self.timeout = timeout if timeout is not None else float(os.environ.get("OPENAI_TIMEOUT", "30.0"))
         self.max_retries = max_retries if max_retries is not None else int(os.environ.get("OPENAI_MAX_RETRIES", "5"))
         self.default_headers = default_headers or {}
diff --git a/releasenotes/notes/azure-default-model-upgrade-52af3fe333338b8c.yaml b/releasenotes/notes/azure-default-model-upgrade-52af3fe333338b8c.yaml
@@ -0,0 +1,4 @@
+---
+upgrade:
+  - |
+    Updated the default Azure OpenAI model from ``gpt-4o-mini`` to ``gpt-4.1-mini`` and the default API version from ``2023-05-15`` to ``2024-12-01-preview`` for both ``AzureOpenAIGenerator`` and ``AzureOpenAIChatGenerator``.
diff --git a/test/components/generators/chat/test_azure.py b/test/components/generators/chat/test_azure.py
@@ -79,7 +79,7 @@ def test_init_default(self, monkeypatch):
         monkeypatch.setenv("AZURE_OPENAI_API_KEY", "test-api-key")
         component = AzureOpenAIChatGenerator(azure_endpoint="some-non-existing-endpoint")
         assert component.client.api_key == "test-api-key"
-        assert component.azure_deployment == "gpt-4o-mini"
+        assert component.azure_deployment == "gpt-4.1-mini"
         assert component.streaming_callback is None
         assert not component.generation_kwargs
 
@@ -100,7 +100,7 @@ def test_init_with_parameters(self, tools):
             azure_ad_token_provider=default_azure_ad_token_provider,
         )
         assert component.client.api_key == "test-api-key"
-        assert component.azure_deployment == "gpt-4o-mini"
+        assert component.azure_deployment == "gpt-4.1-mini"
         assert component.streaming_callback is print_streaming_chunk
         assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
         assert component.tools == tools
@@ -121,7 +121,7 @@ def test_init_with_0_max_retries(self, tools):
             max_retries=0,
         )
         assert component.client.api_key == "test-api-key"
-        assert component.azure_deployment == "gpt-4o-mini"
+        assert component.azure_deployment == "gpt-4.1-mini"
         assert component.streaming_callback is print_streaming_chunk
         assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
         assert component.tools == tools
@@ -138,9 +138,9 @@ def test_to_dict_default(self, monkeypatch):
             "init_parameters": {
                 "api_key": {"env_vars": ["AZURE_OPENAI_API_KEY"], "strict": False, "type": "env_var"},
                 "azure_ad_token": {"env_vars": ["AZURE_OPENAI_AD_TOKEN"], "strict": False, "type": "env_var"},
-                "api_version": "2023-05-15",
+                "api_version": "2024-12-01-preview",
                 "azure_endpoint": "some-non-existing-endpoint",
-                "azure_deployment": "gpt-4o-mini",
+                "azure_deployment": "gpt-4.1-mini",
                 "organization": None,
                 "streaming_callback": None,
                 "generation_kwargs": {},
@@ -177,9 +177,9 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model):
             "init_parameters": {
                 "api_key": {"env_vars": ["ENV_VAR"], "strict": False, "type": "env_var"},
                 "azure_ad_token": {"env_vars": ["ENV_VAR1"], "strict": False, "type": "env_var"},
-                "api_version": "2023-05-15",
+                "api_version": "2024-12-01-preview",
                 "azure_endpoint": "some-non-existing-endpoint",
-                "azure_deployment": "gpt-4o-mini",
+                "azure_deployment": "gpt-4.1-mini",
                 "organization": None,
                 "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
                 "timeout": 2.5,
@@ -222,9 +222,9 @@ def test_from_dict(self, monkeypatch):
             "init_parameters": {
                 "api_key": {"env_vars": ["AZURE_OPENAI_API_KEY"], "strict": False, "type": "env_var"},
                 "azure_ad_token": {"env_vars": ["AZURE_OPENAI_AD_TOKEN"], "strict": False, "type": "env_var"},
-                "api_version": "2023-05-15",
+                "api_version": "2024-12-01-preview",
                 "azure_endpoint": "some-non-existing-endpoint",
-                "azure_deployment": "gpt-4o-mini",
+                "azure_deployment": "gpt-4.1-mini",
                 "organization": None,
                 "streaming_callback": None,
                 "generation_kwargs": {},
@@ -252,9 +252,9 @@ def test_from_dict(self, monkeypatch):
 
         assert generator.api_key == Secret.from_env_var("AZURE_OPENAI_API_KEY", strict=False)
         assert generator.azure_ad_token == Secret.from_env_var("AZURE_OPENAI_AD_TOKEN", strict=False)
-        assert generator.api_version == "2023-05-15"
+        assert generator.api_version == "2024-12-01-preview"
         assert generator.azure_endpoint == "some-non-existing-endpoint"
-        assert generator.azure_deployment == "gpt-4o-mini"
+        assert generator.azure_deployment == "gpt-4.1-mini"
         assert generator.organization is None
         assert generator.streaming_callback is None
         assert generator.generation_kwargs == {}
@@ -282,9 +282,9 @@ def test_pipeline_serialization_deserialization(self, tmp_path, monkeypatch):
                     "type": "haystack.components.generators.chat.azure.AzureOpenAIChatGenerator",
                     "init_parameters": {
                         "azure_endpoint": "some-non-existing-endpoint",
-                        "azure_deployment": "gpt-4o-mini",
+                        "azure_deployment": "gpt-4.1-mini",
                         "organization": None,
-                        "api_version": "2023-05-15",
+                        "api_version": "2024-12-01-preview",
                         "streaming_callback": None,
                         "generation_kwargs": {},
                         "timeout": 30.0,
@@ -341,7 +341,7 @@ def test_live_run(self):
         assert len(results["replies"]) == 1
         message: ChatMessage = results["replies"][0]
         assert "Paris" in message.text
-        assert "gpt-4o-mini" in message.meta["model"]
+        assert "gpt-4.1-mini" in message.meta["model"]
         assert message.meta["finish_reason"] == "stop"
 
     @pytest.mark.integration
@@ -544,7 +544,7 @@ def test_init_should_also_create_async_client_with_same_args(self, tools):
             tools_strict=True,
         )
         assert component.async_client.api_key == "test-api-key"
-        assert component.azure_deployment == "gpt-4o-mini"
+        assert component.azure_deployment == "gpt-4.1-mini"
         assert component.streaming_callback is print_streaming_chunk
         assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
         assert component.tools == tools
@@ -567,7 +567,7 @@ async def test_live_run_async(self):
         assert len(results["replies"]) == 1
         message: ChatMessage = results["replies"][0]
         assert "Paris" in message.text
-        assert "gpt-4o" in message.meta["model"]
+        assert "gpt-4.1-mini" in message.meta["model"]
         assert message.meta["finish_reason"] == "stop"
 
     @pytest.mark.integration
diff --git a/test/components/generators/test_azure.py b/test/components/generators/test_azure.py
@@ -19,7 +19,7 @@ def test_init_default(self, monkeypatch):
         monkeypatch.setenv("AZURE_OPENAI_API_KEY", "test-api-key")
         component = AzureOpenAIGenerator(azure_endpoint="some-non-existing-endpoint")
         assert component.client.api_key == "test-api-key"
-        assert component.azure_deployment == "gpt-4o-mini"
+        assert component.azure_deployment == "gpt-4.1-mini"
         assert component.streaming_callback is None
         assert not component.generation_kwargs
 
@@ -33,13 +33,13 @@ def test_init_with_parameters(self):
         component = AzureOpenAIGenerator(
             api_key=Secret.from_token("fake-api-key"),
             azure_endpoint="some-non-existing-endpoint",
-            azure_deployment="gpt-4o-mini",
+            azure_deployment="gpt-4.1-mini",
             streaming_callback=print_streaming_chunk,
             generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
             azure_ad_token_provider=default_azure_ad_token_provider,
         )
         assert component.client.api_key == "fake-api-key"
-        assert component.azure_deployment == "gpt-4o-mini"
+        assert component.azure_deployment == "gpt-4.1-mini"
         assert component.streaming_callback is print_streaming_chunk
         assert component.timeout == 30.0
         assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
@@ -51,14 +51,14 @@ def test_init_with_0_max_retries(self):
         component = AzureOpenAIGenerator(
             api_key=Secret.from_token("fake-api-key"),
             azure_endpoint="some-non-existing-endpoint",
-            azure_deployment="gpt-4o-mini",
+            azure_deployment="gpt-4.1-mini",
             streaming_callback=print_streaming_chunk,
             generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
             azure_ad_token_provider=default_azure_ad_token_provider,
             max_retries=0,
         )
         assert component.client.api_key == "fake-api-key"
-        assert component.azure_deployment == "gpt-4o-mini"
+        assert component.azure_deployment == "gpt-4.1-mini"
         assert component.streaming_callback is print_streaming_chunk
         assert component.timeout == 30.0
         assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
@@ -74,8 +74,8 @@ def test_to_dict_default(self, monkeypatch):
             "init_parameters": {
                 "api_key": {"env_vars": ["AZURE_OPENAI_API_KEY"], "strict": False, "type": "env_var"},
                 "azure_ad_token": {"env_vars": ["AZURE_OPENAI_AD_TOKEN"], "strict": False, "type": "env_var"},
-                "azure_deployment": "gpt-4o-mini",
-                "api_version": "2023-05-15",
+                "azure_deployment": "gpt-4.1-mini",
+                "api_version": "2024-12-01-preview",
                 "streaming_callback": None,
                 "azure_endpoint": "some-non-existing-endpoint",
                 "organization": None,
@@ -109,8 +109,8 @@ def test_to_dict_with_parameters(self, monkeypatch):
             "init_parameters": {
                 "api_key": {"env_vars": ["ENV_VAR"], "strict": False, "type": "env_var"},
                 "azure_ad_token": {"env_vars": ["ENV_VAR1"], "strict": False, "type": "env_var"},
-                "azure_deployment": "gpt-4o-mini",
-                "api_version": "2023-05-15",
+                "azure_deployment": "gpt-4.1-mini",
+                "api_version": "2024-12-01-preview",
                 "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
                 "azure_endpoint": "some-non-existing-endpoint",
                 "organization": None,
@@ -131,8 +131,8 @@ def test_from_dict_defaults(self, monkeypatch):
             "init_parameters": {
                 "api_key": {"env_vars": ["AZURE_OPENAI_API_KEY"], "strict": False, "type": "env_var"},
                 "azure_ad_token": {"env_vars": ["AZURE_OPENAI_AD_TOKEN"], "strict": False, "type": "env_var"},
-                "azure_deployment": "gpt-4o-mini",
-                "api_version": "2023-05-15",
+                "azure_deployment": "gpt-4.1-mini",
+                "api_version": "2024-12-01-preview",
                 "streaming_callback": None,
                 "azure_endpoint": "some-non-existing-endpoint",
                 "organization": None,
@@ -148,8 +148,8 @@ def test_from_dict_defaults(self, monkeypatch):
         component = AzureOpenAIGenerator.from_dict(data)
         assert component.api_key == Secret.from_env_var("AZURE_OPENAI_API_KEY", strict=False)
         assert component.azure_ad_token == Secret.from_env_var("AZURE_OPENAI_AD_TOKEN", strict=False)
-        assert component.azure_deployment == "gpt-4o-mini"
-        assert component.api_version == "2023-05-15"
+        assert component.azure_deployment == "gpt-4.1-mini"
+        assert component.api_version == "2024-12-01-preview"
         assert component.streaming_callback is None
         assert component.azure_endpoint == "some-non-existing-endpoint"
         assert component.organization is None
@@ -187,7 +187,7 @@ def test_live_run(self):
         assert "Paris" in response
 
         metadata = results["meta"][0]
-        assert "gpt-4o-mini" in metadata["model"]
+        assert "gpt-4.1-mini" in metadata["model"]
         assert metadata["finish_reason"] == "stop"
 
         assert "usage" in metadata

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +---
 +upgrade:
 +  - |
 +    Updated the default Azure OpenAI model from ``gpt-4o-mini`` to ``gpt-4.1-mini`` and the default API version from ``2023-05-15`` to ``2024-12-01-preview`` for both ``AzureOpenAIGenerator`` and ``AzureOpenAIChatGenerator``.