From ccb818af9e48505a3f1c1af104bf938775579e51 Mon Sep 17 00:00:00 2001 From: Tim Conley Date: Mon, 21 Jul 2025 12:45:17 -0700 Subject: [PATCH 1/6] Making summary generation more robust, generally summarize as the last message type input --- .../openai_agents/_temporal_model_stub.py | 35 +++++++++++++------ temporalio/contrib/openai_agents/workflow.py | 2 +- tests/contrib/openai_agents/test_openai.py | 1 + 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/temporalio/contrib/openai_agents/_temporal_model_stub.py b/temporalio/contrib/openai_agents/_temporal_model_stub.py index b053c259b..5b994361f 100644 --- a/temporalio/contrib/openai_agents/_temporal_model_stub.py +++ b/temporalio/contrib/openai_agents/_temporal_model_stub.py @@ -3,6 +3,12 @@ import logging from typing import Optional +from openai.types.responses import ( + ResponseOutputMessage, + ResponseOutputText, + ResponseOutputTextParam, +) + from temporalio import workflow from temporalio.contrib.openai_agents._model_parameters import ModelActivityParameters @@ -54,7 +60,7 @@ def __init__( async def get_response( self, system_instructions: Optional[str], - input: Union[str, list[TResponseInputItem], dict[str, str]], + input: Union[str, list[TResponseInputItem]], model_settings: ModelSettings, tools: list[Tool], output_schema: Optional[AgentOutputSchemaBase], @@ -65,7 +71,7 @@ async def get_response( prompt: Optional[ResponsePromptParam], ) -> ModelResponse: def get_summary( - input: Union[str, list[TResponseInputItem], dict[str, str]], + input: Union[str, list[TResponseInputItem]], ) -> str: ### Activity summary shown in the UI try: @@ -73,15 +79,22 @@ def get_summary( if isinstance(input, str): return input[:max_size] elif isinstance(input, list): - seq_input = cast(Sequence[Any], input) - last_item = seq_input[-1] - if isinstance(last_item, dict): - return last_item.get("content", "")[:max_size] - elif hasattr(last_item, "content"): - return str(getattr(last_item, "content"))[:max_size] - return str(last_item)[:max_size] - elif isinstance(input, dict): - return input.get("content", "")[:max_size] + content: Any = [ + item + for item in input + if (item.get("type") or "message") == "message" + ][-1] + if isinstance(content, dict): + content = content.get("content", "") + elif hasattr(content, "content"): + content = getattr(content, "content") + + if isinstance(content, list): + content = content[-1] + + if isinstance(content, dict) and content.get("text") is not None: + content = content.get("text") + return str(content)[:max_size] except Exception as e: logger.error(f"Error getting summary: {e}") return "" diff --git a/temporalio/contrib/openai_agents/workflow.py b/temporalio/contrib/openai_agents/workflow.py index 50eba0b9e..76969fc8d 100644 --- a/temporalio/contrib/openai_agents/workflow.py +++ b/temporalio/contrib/openai_agents/workflow.py @@ -134,7 +134,7 @@ async def run_activity(ctx: RunContextWrapper[Any], input: str) -> Any: cancellation_type=cancellation_type, activity_id=activity_id, versioning_intent=versioning_intent, - summary=summary, + summary=summary or schema.description, priority=priority, ) try: diff --git a/tests/contrib/openai_agents/test_openai.py b/tests/contrib/openai_agents/test_openai.py index 57dc5c252..31362956f 100644 --- a/tests/contrib/openai_agents/test_openai.py +++ b/tests/contrib/openai_agents/test_openai.py @@ -965,6 +965,7 @@ async def test_agents_as_tools_workflow(client: Client, use_local_model: bool): .activity_task_completed_event_attributes.result.payloads[0] .data.decode() ) + assert False class AirlineAgentContext(BaseModel): From 2d826745198bacbc55272158c658d951118b3b9e Mon Sep 17 00:00:00 2001 From: Tim Conley Date: Tue, 22 Jul 2025 09:19:16 -0700 Subject: [PATCH 2/6] Remove assert --- tests/contrib/openai_agents/test_openai.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/contrib/openai_agents/test_openai.py b/tests/contrib/openai_agents/test_openai.py index 31362956f..57dc5c252 100644 --- a/tests/contrib/openai_agents/test_openai.py +++ b/tests/contrib/openai_agents/test_openai.py @@ -965,7 +965,6 @@ async def test_agents_as_tools_workflow(client: Client, use_local_model: bool): .activity_task_completed_event_attributes.result.payloads[0] .data.decode() ) - assert False class AirlineAgentContext(BaseModel): From 4f02aa9ec7e9a3c4832dbae018689e5c3061fa9a Mon Sep 17 00:00:00 2001 From: Tim Conley Date: Tue, 22 Jul 2025 09:20:33 -0700 Subject: [PATCH 3/6] Remove unused imports --- temporalio/contrib/openai_agents/_temporal_model_stub.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/temporalio/contrib/openai_agents/_temporal_model_stub.py b/temporalio/contrib/openai_agents/_temporal_model_stub.py index 5b994361f..e8dfed887 100644 --- a/temporalio/contrib/openai_agents/_temporal_model_stub.py +++ b/temporalio/contrib/openai_agents/_temporal_model_stub.py @@ -3,18 +3,12 @@ import logging from typing import Optional -from openai.types.responses import ( - ResponseOutputMessage, - ResponseOutputText, - ResponseOutputTextParam, -) - from temporalio import workflow from temporalio.contrib.openai_agents._model_parameters import ModelActivityParameters logger = logging.getLogger(__name__) -from typing import Any, AsyncIterator, Sequence, Union, cast +from typing import Any, AsyncIterator, Union, cast from agents import ( AgentOutputSchema, From 4ff18a30f78d6af1c3e90a7f6a692c717edf9a98 Mon Sep 17 00:00:00 2001 From: Tim Conley Date: Tue, 22 Jul 2025 09:25:40 -0700 Subject: [PATCH 4/6] Extend timeout for test stability --- tests/contrib/openai_agents/test_openai.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/contrib/openai_agents/test_openai.py b/tests/contrib/openai_agents/test_openai.py index 57dc5c252..15bb6edc8 100644 --- a/tests/contrib/openai_agents/test_openai.py +++ b/tests/contrib/openai_agents/test_openai.py @@ -694,7 +694,8 @@ async def test_research_workflow(client: Client, use_local_model: bool): response_index = 0 model_params = ModelActivityParameters( - start_to_close_timeout=timedelta(seconds=120) + start_to_close_timeout=timedelta(seconds=120), + schedule_to_close_timeout=timedelta(seconds=120), ) with set_open_ai_agent_temporal_overrides(model_params): model_activity = ModelActivity( From 241a0a929bb6705c2d9ab421a7f19458318c6cd2 Mon Sep 17 00:00:00 2001 From: Tim Conley Date: Thu, 24 Jul 2025 09:36:13 -0700 Subject: [PATCH 5/6] Unit test summary, explicitly handle some edge cases --- .../openai_agents/_temporal_model_stub.py | 62 ++++++++++--------- tests/contrib/openai_agents/test_openai.py | 41 +++++++++++- 2 files changed, 72 insertions(+), 31 deletions(-) diff --git a/temporalio/contrib/openai_agents/_temporal_model_stub.py b/temporalio/contrib/openai_agents/_temporal_model_stub.py index e8dfed887..e9ea8ec0c 100644 --- a/temporalio/contrib/openai_agents/_temporal_model_stub.py +++ b/temporalio/contrib/openai_agents/_temporal_model_stub.py @@ -64,35 +64,6 @@ async def get_response( previous_response_id: Optional[str], prompt: Optional[ResponsePromptParam], ) -> ModelResponse: - def get_summary( - input: Union[str, list[TResponseInputItem]], - ) -> str: - ### Activity summary shown in the UI - try: - max_size = 100 - if isinstance(input, str): - return input[:max_size] - elif isinstance(input, list): - content: Any = [ - item - for item in input - if (item.get("type") or "message") == "message" - ][-1] - if isinstance(content, dict): - content = content.get("content", "") - elif hasattr(content, "content"): - content = getattr(content, "content") - - if isinstance(content, list): - content = content[-1] - - if isinstance(content, dict) and content.get("text") is not None: - content = content.get("text") - return str(content)[:max_size] - except Exception as e: - logger.error(f"Error getting summary: {e}") - return "" - def make_tool_info(tool: Tool) -> ToolInput: if isinstance(tool, (FileSearchTool, WebSearchTool)): return tool @@ -157,7 +128,7 @@ def make_tool_info(tool: Tool) -> ToolInput: return await workflow.execute_activity_method( ModelActivity.invoke_model_activity, activity_input, - summary=self.model_params.summary_override or get_summary(input), + summary=self.model_params.summary_override or _extract_summary(input), task_queue=self.model_params.task_queue, schedule_to_close_timeout=self.model_params.schedule_to_close_timeout, schedule_to_start_timeout=self.model_params.schedule_to_start_timeout, @@ -183,3 +154,34 @@ def stream_response( prompt: ResponsePromptParam | None, ) -> AsyncIterator[TResponseStreamEvent]: raise NotImplementedError("Temporal model doesn't support streams yet") + + +def _extract_summary(input: Union[str, list[TResponseInputItem]]) -> str: + ### Activity summary shown in the UI + try: + max_size = 100 + if isinstance(input, str): + return input[:max_size] + elif isinstance(input, list): + # Find all message inputs, which are reasonably summarizable + messages: list[TResponseInputItem] = [ + item for item in input if (item.get("type") or "message") == "message" + ] + if not messages: + return "" + + content: Any = messages[-1].get("content", "") + + # In the case of multiple contents, take the last one + if isinstance(content, list): + if not content: + return "" + content = content[-1] + + # Take the text field from the content if present + if isinstance(content, dict) and content.get("text") is not None: + content = content.get("text") + return str(content)[:max_size] + except Exception as e: + logger.error(f"Error getting summary: {e}") + return "" diff --git a/tests/contrib/openai_agents/test_openai.py b/tests/contrib/openai_agents/test_openai.py index 61ae12bb2..2998da99f 100644 --- a/tests/contrib/openai_agents/test_openai.py +++ b/tests/contrib/openai_agents/test_openai.py @@ -42,12 +42,16 @@ ) from openai import APIStatusError, AsyncOpenAI, BaseModel from openai.types.responses import ( + EasyInputMessageParam, ResponseFunctionToolCall, + ResponseFunctionToolCallParam, ResponseFunctionWebSearch, + ResponseInputTextParam, ResponseOutputMessage, ResponseOutputText, ) from openai.types.responses.response_function_web_search import ActionSearch +from openai.types.responses.response_input_item_param import Message from openai.types.responses.response_prompt_param import ResponsePromptParam from pydantic import ConfigDict, Field, TypeAdapter @@ -61,6 +65,7 @@ TestModel, TestModelProvider, ) +from temporalio.contrib.openai_agents._temporal_model_stub import _extract_summary from temporalio.contrib.pydantic import pydantic_data_converter from temporalio.exceptions import ApplicationError, CancelledError from temporalio.testing import WorkflowEnvironment @@ -1706,7 +1711,7 @@ class WorkflowToolModel(StaticTestModel): id="", content=[ ResponseOutputText( - text="", + text="Workflow tool was used", annotations=[], type="output_text", ) @@ -1877,3 +1882,37 @@ async def test_chat_completions_model(client: Client): execution_timeout=timedelta(seconds=10), ) await workflow_handle.result() + + +def test_summary_extraction(): + input: list[TResponseInputItem] = [ + EasyInputMessageParam( + content="First message", + role="user", + ) + ] + + assert _extract_summary(input) == "First message" + + input.append( + Message( + content=[ + ResponseInputTextParam( + text="Second message", + type="input_text", + ) + ], + role="user", + ) + ) + assert _extract_summary(input) == "Second message" + + input.append( + ResponseFunctionToolCallParam( + arguments="", + call_id="", + name="", + type="function_call", + ) + ) + assert _extract_summary(input) == "Second message" From d80fd6cd116872c869bed74304c7dda15204343d Mon Sep 17 00:00:00 2001 From: Tim Conley Date: Thu, 24 Jul 2025 10:00:52 -0700 Subject: [PATCH 6/6] Small fix --- temporalio/contrib/openai_agents/_temporal_model_stub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/temporalio/contrib/openai_agents/_temporal_model_stub.py b/temporalio/contrib/openai_agents/_temporal_model_stub.py index e9ea8ec0c..e4b7664ca 100644 --- a/temporalio/contrib/openai_agents/_temporal_model_stub.py +++ b/temporalio/contrib/openai_agents/_temporal_model_stub.py @@ -165,7 +165,7 @@ def _extract_summary(input: Union[str, list[TResponseInputItem]]) -> str: elif isinstance(input, list): # Find all message inputs, which are reasonably summarizable messages: list[TResponseInputItem] = [ - item for item in input if (item.get("type") or "message") == "message" + item for item in input if item.get("type", "message") == "message" ] if not messages: return ""