fix: to_dict in OpenAIResponsesChatGenerator and json_schema for structured outputs (#10043)

Amnah199 · web-flow · commit 564780b768e4 · 2025-11-11T12:16:52.000+01:00
* Fix to dict and json schema support

* Update Azure Responses

* Add tests

* Fix tests

* Fix tests

* remove print

* Change model

* Add a new test

* Loosen tests
diff --git a/haystack/components/generators/chat/azure_responses.py b/haystack/components/generators/chat/azure_responses.py
@@ -97,16 +97,19 @@ def __init__(
                 comprising the top 10% probability mass are considered.
             - `previous_response_id`: The ID of the previous response.
                 Use this to create multi-turn conversations.
-            - `text_format`: A JSON schema or a Pydantic model that enforces the structure of the model's response.
+            - `text_format`: A Pydantic model that enforces the structure of the model's response.
                 If provided, the output will always be validated against this
                 format (unless the model returns a tool call).
                 For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs).
+            - `text`: A JSON schema that enforces the structure of the model's response.
+                If provided, the output will always be validated against this
+                format (unless the model returns a tool call).
                 Notes:
-                - This parameter accepts Pydantic models and JSON schemas for latest models starting from GPT-4o.
-                  Older models only support basic version of structured outputs through `{"type": "json_object"}`.
-                  For detailed information on JSON mode, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs#json-mode).
-                - For structured outputs with streaming,
-                  the `text_format` must be a JSON schema and not a Pydantic model.
+                - Both JSON Schema and Pydantic models are supported for latest models starting from GPT-4o.
+                - If both are provided, `text_format` takes precedence and json schema passed to `text` is ignored.
+                - Currently, this component doesn't support streaming for structured outputs.
+                - Older models only support basic version of structured outputs through `{"type": "json_object"}`.
+                    For detailed information on JSON mode, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs#json-mode).
             - `reasoning`: A dictionary of parameters for reasoning. For example:
                 - `summary`: The summary of the reasoning.
                 - `effort`: The level of effort to put into the reasoning. Can be `low`, `medium` or `high`.
@@ -161,20 +164,21 @@ def to_dict(self) -> dict[str, Any]:
             else None
         )
 
-        # If the response format is a Pydantic model, it's converted to openai's json schema format
+        # If the text format is a Pydantic model, it's converted to openai's json schema format
         # If it's already a json schema, it's left as is
         generation_kwargs = self.generation_kwargs.copy()
-        response_format = generation_kwargs.get("response_format")
-        if response_format and issubclass(response_format, BaseModel):
+        text_format = generation_kwargs.pop("text_format", None)
+        if text_format and isinstance(text_format, type) and issubclass(text_format, BaseModel):
             json_schema = {
-                "type": "json_schema",
-                "json_schema": {
-                    "name": response_format.__name__,
+                "format": {
+                    "type": "json_schema",
+                    "name": text_format.__name__,
                     "strict": True,
-                    "schema": to_strict_json_schema(response_format),
-                },
+                    "schema": to_strict_json_schema(text_format),
+                }
             }
-            generation_kwargs["response_format"] = json_schema
+            # json schema needs to be passed to text parameter instead of text_format
+            generation_kwargs["text"] = json_schema
 
         # OpenAI/MCP tools are passed as list of dictionaries
         serialized_tools: Union[dict[str, Any], list[dict[str, Any]], None]
diff --git a/haystack/components/generators/chat/openai_responses.py b/haystack/components/generators/chat/openai_responses.py
@@ -116,16 +116,19 @@ def __init__(
                 comprising the top 10% probability mass are considered.
             - `previous_response_id`: The ID of the previous response.
                 Use this to create multi-turn conversations.
-            - `text_format`: A JSON schema or a Pydantic model that enforces the structure of the model's response.
+            - `text_format`: A Pydantic model that enforces the structure of the model's response.
                 If provided, the output will always be validated against this
                 format (unless the model returns a tool call).
                 For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs).
+            - `text`: A JSON schema that enforces the structure of the model's response.
+                If provided, the output will always be validated against this
+                format (unless the model returns a tool call).
                 Notes:
-                - This parameter accepts Pydantic models and JSON schemas for latest models starting from GPT-4o.
-                  Older models only support basic version of structured outputs through `{"type": "json_object"}`.
-                  For detailed information on JSON mode, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs#json-mode).
-                - For structured outputs with streaming,
-                  the `text_format` must be a JSON schema and not a Pydantic model.
+                - Both JSON Schema and Pydantic models are supported for latest models starting from GPT-4o.
+                - If both are provided, `text_format` takes precedence and json schema passed to `text` is ignored.
+                - Currently, this component doesn't support streaming for structured outputs.
+                - Older models only support basic version of structured outputs through `{"type": "json_object"}`.
+                    For detailed information on JSON mode, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs#json-mode).
             - `reasoning`: A dictionary of parameters for reasoning. For example:
                 - `summary`: The summary of the reasoning.
                 - `effort`: The level of effort to put into the reasoning. Can be `low`, `medium` or `high`.
@@ -215,20 +218,21 @@ def to_dict(self) -> dict[str, Any]:
         """
         callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
         generation_kwargs = self.generation_kwargs.copy()
-        response_format = generation_kwargs.get("text_format")
+        text_format = generation_kwargs.pop("text_format", None)
 
         # If the response format is a Pydantic model, it's converted to openai's json schema format
         # If it's already a json schema, it's left as is
-        if response_format and issubclass(response_format, BaseModel):
+        if text_format and isinstance(text_format, type) and issubclass(text_format, BaseModel):
             json_schema = {
-                "type": "json_schema",
-                "json_schema": {
-                    "name": response_format.__name__,
+                "format": {
+                    "type": "json_schema",
+                    "name": text_format.__name__,
                     "strict": True,
-                    "schema": to_strict_json_schema(response_format),
-                },
+                    "schema": to_strict_json_schema(text_format),
+                }
             }
-            generation_kwargs["text_format"] = json_schema
+            # json schema needs to be passed to text parameter instead of text_format
+            generation_kwargs["text"] = json_schema
 
         # OpenAI/MCP tools are passed as list of dictionaries
         serialized_tools: Union[dict[str, Any], list[dict[str, Any]], None]
@@ -434,8 +438,6 @@ def _prepare_api_call(  # noqa: PLR0913
         # update generation kwargs by merging with the generation kwargs passed to the run method
         generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}
 
-        text_format = generation_kwargs.pop("text_format", None)
-
         # adapt ChatMessage(s) to the format expected by the OpenAI API
         openai_formatted_messages: list[dict[str, Any]] = []
         for message in messages:
@@ -468,16 +470,12 @@ def _prepare_api_call(  # noqa: PLR0913
 
         base_args = {"model": self.model, "input": openai_formatted_messages, **openai_tools, **generation_kwargs}
 
-        if text_format and issubclass(text_format, BaseModel):
-            return {
-                **base_args,
-                "stream": streaming_callback is not None,
-                "text_format": text_format,
-                "openai_endpoint": "parse",
-            }
+        # if both `text_format` and `text` are provided, `text_format` takes precedence
+        # and json schema passed to `text` is ignored
+        if generation_kwargs.get("text_format") or generation_kwargs.get("text"):
+            return {**base_args, "stream": streaming_callback is not None, "openai_endpoint": "parse"}
         # we pass a key `openai_endpoint` as a hint to the run method to use the create or parse endpoint
         # this key will be removed before the API call is made
-
         return {**base_args, "stream": streaming_callback is not None, "openai_endpoint": "create"}
 
     def _handle_stream_response(self, responses: Stream, callback: SyncStreamingCallbackT) -> list[ChatMessage]:
diff --git a/test/components/generators/chat/test_azure_responses.py b/test/components/generators/chat/test_azure_responses.py
@@ -74,7 +74,7 @@ def tools():
     return [weather_tool, message_extractor_tool]
 
 
-class TestAzureOpenAIChatGenerator:
+class TestAzureOpenAIResponsesChatGenerator:
     def test_init_default(self, monkeypatch):
         monkeypatch.setenv("AZURE_OPENAI_API_KEY", "test-api-key")
         component = AzureOpenAIResponsesChatGenerator(azure_endpoint="some-non-existing-endpoint")
@@ -143,7 +143,7 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model):
             generation_kwargs={
                 "max_completion_tokens": 10,
                 "some_test_param": "test-params",
-                "response_format": calendar_event_model,
+                "text_format": calendar_event_model,
             },
             http_client_kwargs={"proxy": "http://localhost:8080"},
         )
@@ -161,9 +161,9 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model):
                 "generation_kwargs": {
                     "max_completion_tokens": 10,
                     "some_test_param": "test-params",
-                    "response_format": {
-                        "type": "json_schema",
-                        "json_schema": {
+                    "text": {
+                        "format": {
+                            "type": "json_schema",
                             "name": "CalendarEvent",
                             "strict": True,
                             "schema": {
@@ -177,7 +177,7 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model):
                                 "type": "object",
                                 "additionalProperties": False,
                             },
-                        },
+                        }
                     },
                 },
                 "tools": None,
@@ -393,12 +393,12 @@ def test_live_run_with_tools(self, tools):
         reason="Export an env var called AZURE_OPENAI_API_KEY containing the Azure OpenAI API key to run this test.",
     )
     @pytest.mark.integration
-    def test_live_run_with_response_format(self):
+    def test_live_run_with_text_format(self, calendar_event_model):
         chat_messages = [
             ChatMessage.from_user("The marketing summit takes place on October12th at the Hilton Hotel downtown.")
         ]
         component = AzureOpenAIResponsesChatGenerator(
-            azure_deployment="gpt-4o-mini", generation_kwargs={"text_format": CalendarEvent}
+            azure_deployment="gpt-4o-mini", generation_kwargs={"text_format": calendar_event_model}
         )
         results = component.run(chat_messages)
         assert len(results["replies"]) == 1
@@ -409,6 +409,42 @@ def test_live_run_with_response_format(self):
         assert isinstance(msg["event_location"], str)
         assert message.meta["status"] == "completed"
 
+    @pytest.mark.skipif(
+        not os.environ.get("AZURE_OPENAI_API_KEY", None),
+        reason="Export an env var called AZURE_OPENAI_API_KEY containing the Azure OpenAI API key to run this test.",
+    )
+    @pytest.mark.integration
+    # So far from documentation, responses.parse only supports BaseModel
+    def test_live_run_with_text_format_json_schema(self):
+        json_schema = {
+            "format": {
+                "type": "json_schema",
+                "name": "person",
+                "strict": True,
+                "schema": {
+                    "type": "object",
+                    "properties": {
+                        "name": {"type": "string", "minLength": 1},
+                        "age": {"type": "number", "minimum": 0, "maximum": 130},
+                    },
+                    "required": ["name", "age"],
+                    "additionalProperties": False,
+                },
+            }
+        }
+        chat_messages = [ChatMessage.from_user("Jane 54 years old")]
+        component = AzureOpenAIResponsesChatGenerator(
+            azure_deployment="gpt-4o-mini", generation_kwargs={"text": json_schema}
+        )
+        results = component.run(chat_messages)
+        assert len(results["replies"]) == 1
+        message: ChatMessage = results["replies"][0]
+        msg = json.loads(message.text)
+        assert "Jane" in msg["name"]
+        assert msg["age"] == 54
+        assert message.meta["status"] == "completed"
+        assert message.meta["usage"]["output_tokens"] > 0
+
     def test_to_dict_with_toolset(self, tools, monkeypatch):
         """Test that the AzureOpenAIChatGenerator can be serialized to a dictionary with a Toolset."""
         monkeypatch.setenv("AZURE_OPENAI_API_KEY", "test-api-key")
@@ -532,7 +568,7 @@ def warm_up(self):
         assert len(warm_up_calls) == initial_count
 
 
-class TestAzureOpenAIChatGeneratorAsync:
+class TestAzureOpenAIResponsesChatGeneratorAsync:
     def test_init_should_also_create_async_client_with_same_args(self, tools):
         component = AzureOpenAIResponsesChatGenerator(
             api_key=Secret.from_token("test-api-key"),
diff --git a/test/components/generators/chat/test_openai_responses.py b/test/components/generators/chat/test_openai_responses.py
@@ -236,9 +236,9 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model):
                 "generation_kwargs": {
                     "max_tokens": 10,
                     "some_test_param": "test-params",
-                    "text_format": {
-                        "type": "json_schema",
-                        "json_schema": {
+                    "text": {
+                        "format": {
+                            "type": "json_schema",
                             "name": "CalendarEvent",
                             "strict": True,
                             "schema": {
@@ -252,7 +252,7 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model):
                                 "type": "object",
                                 "additionalProperties": False,
                             },
-                        },
+                        }
                     },
                 },
                 "tools": [
@@ -585,6 +585,40 @@ def test_live_run_with_text_format(self, calendar_event_model):
         assert isinstance(msg["event_date"], str)
         assert isinstance(msg["event_location"], str)
 
+    @pytest.mark.skipif(
+        not os.environ.get("OPENAI_API_KEY", None),
+        reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
+    )
+    @pytest.mark.integration
+    # So far from documentation, responses.parse only supports BaseModel
+    def test_live_run_with_text_format_json_schema(self):
+        json_schema = {
+            "format": {
+                "type": "json_schema",
+                "name": "person",
+                "strict": True,
+                "schema": {
+                    "type": "object",
+                    "properties": {
+                        "name": {"type": "string", "minLength": 1},
+                        "age": {"type": "number", "minimum": 0, "maximum": 130},
+                    },
+                    "required": ["name", "age"],
+                    "additionalProperties": False,
+                },
+            }
+        }
+        chat_messages = [ChatMessage.from_user("Jane 54 years old")]
+        component = OpenAIResponsesChatGenerator(generation_kwargs={"text": json_schema})
+        results = component.run(chat_messages)
+        assert len(results["replies"]) == 1
+        message: ChatMessage = results["replies"][0]
+        msg = json.loads(message.text)
+        assert "Jane" in msg["name"]
+        assert msg["age"] == 54
+        assert message.meta["status"] == "completed"
+        assert message.meta["usage"]["output_tokens"] > 0
+
     @pytest.mark.skipif(
         not os.environ.get("OPENAI_API_KEY", None),
         reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
@@ -609,6 +643,26 @@ def test_live_run_with_text_format_and_streaming(self, calendar_event_model):
         assert isinstance(msg["event_date"], str)
         assert isinstance(msg["event_location"], str)
 
+    @pytest.mark.skipif(
+        not os.environ.get("OPENAI_API_KEY", None),
+        reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
+    )
+    @pytest.mark.integration
+    def test_live_run_with_ser_deser_and_text_format(self, calendar_event_model):
+        chat_messages = [
+            ChatMessage.from_user("The marketing summit takes place on October12th at the Hilton Hotel downtown.")
+        ]
+        component = OpenAIResponsesChatGenerator(generation_kwargs={"text_format": calendar_event_model})
+        serialized = component.to_dict()
+        deser = OpenAIResponsesChatGenerator.from_dict(serialized)
+        results = deser.run(chat_messages)
+        assert len(results["replies"]) == 1
+        message: ChatMessage = results["replies"][0]
+        msg = json.loads(message.text)
+        assert "Marketing Summit" in msg["event_name"]
+        assert isinstance(msg["event_date"], str)
+        assert isinstance(msg["event_location"], str)
+
     def test_run_with_wrong_model(self):
         mock_client = MagicMock()
         mock_client.responses.create.side_effect = OpenAIError("Invalid model name")
@@ -710,15 +764,12 @@ def test_live_run_with_tools_streaming(self, tools):
         assert not message.text
         assert message.tool_calls
         tool_calls = message.tool_calls
-        assert len(tool_calls) == 2
+        assert len(tool_calls) > 0
 
         for tool_call in tool_calls:
             assert isinstance(tool_call, ToolCall)
             assert tool_call.tool_name == "weather"
 
-        arguments = [tool_call.arguments for tool_call in tool_calls]
-        assert sorted(arguments, key=lambda x: x["city"]) == [{"city": "Berlin"}, {"city": "Paris"}]
-
     @pytest.mark.skipif(
         not os.environ.get("OPENAI_API_KEY", None),
         reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",