Merge branch 'main' into openai/early_failure

tconley1428 · web-flow · commit 8428da681262 · 2025-07-24T11:37:56.000-07:00
diff --git a/temporalio/contrib/openai_agents/_heartbeat_decorator.py b/temporalio/contrib/openai_agents/_heartbeat_decorator.py
@@ -24,7 +24,10 @@ async def wrapper(*args: Any, **kwargs: Any) -> Any:
             if heartbeat_task:
                 heartbeat_task.cancel()
                 # Wait for heartbeat cancellation to complete
-                await heartbeat_task
+                try:
+                    await heartbeat_task
+                except asyncio.CancelledError:
+                    pass
 
     return cast(F, wrapper)
 
diff --git a/temporalio/contrib/openai_agents/_temporal_model_stub.py b/temporalio/contrib/openai_agents/_temporal_model_stub.py
@@ -8,7 +8,7 @@
 
 logger = logging.getLogger(__name__)
 
-from typing import Any, AsyncIterator, Sequence, Union, cast
+from typing import Any, AsyncIterator, Union, cast
 
 from agents import (
     AgentOutputSchema,
@@ -54,7 +54,7 @@ def __init__(
     async def get_response(
         self,
         system_instructions: Optional[str],
-        input: Union[str, list[TResponseInputItem], dict[str, str]],
+        input: Union[str, list[TResponseInputItem]],
         model_settings: ModelSettings,
         tools: list[Tool],
         output_schema: Optional[AgentOutputSchemaBase],
@@ -64,28 +64,6 @@ async def get_response(
         previous_response_id: Optional[str],
         prompt: Optional[ResponsePromptParam],
     ) -> ModelResponse:
-        def get_summary(
-            input: Union[str, list[TResponseInputItem], dict[str, str]],
-        ) -> str:
-            ### Activity summary shown in the UI
-            try:
-                max_size = 100
-                if isinstance(input, str):
-                    return input[:max_size]
-                elif isinstance(input, list):
-                    seq_input = cast(Sequence[Any], input)
-                    last_item = seq_input[-1]
-                    if isinstance(last_item, dict):
-                        return last_item.get("content", "")[:max_size]
-                    elif hasattr(last_item, "content"):
-                        return str(getattr(last_item, "content"))[:max_size]
-                    return str(last_item)[:max_size]
-                elif isinstance(input, dict):
-                    return input.get("content", "")[:max_size]
-            except Exception as e:
-                logger.error(f"Error getting summary: {e}")
-            return ""
-
         def make_tool_info(tool: Tool) -> ToolInput:
             if isinstance(tool, (FileSearchTool, WebSearchTool)):
                 return tool
@@ -150,7 +128,7 @@ def make_tool_info(tool: Tool) -> ToolInput:
         return await workflow.execute_activity_method(
             ModelActivity.invoke_model_activity,
             activity_input,
-            summary=self.model_params.summary_override or get_summary(input),
+            summary=self.model_params.summary_override or _extract_summary(input),
             task_queue=self.model_params.task_queue,
             schedule_to_close_timeout=self.model_params.schedule_to_close_timeout,
             schedule_to_start_timeout=self.model_params.schedule_to_start_timeout,
@@ -176,3 +154,34 @@ def stream_response(
         prompt: ResponsePromptParam | None,
     ) -> AsyncIterator[TResponseStreamEvent]:
         raise NotImplementedError("Temporal model doesn't support streams yet")
+
+
+def _extract_summary(input: Union[str, list[TResponseInputItem]]) -> str:
+    ### Activity summary shown in the UI
+    try:
+        max_size = 100
+        if isinstance(input, str):
+            return input[:max_size]
+        elif isinstance(input, list):
+            # Find all message inputs, which are reasonably summarizable
+            messages: list[TResponseInputItem] = [
+                item for item in input if item.get("type", "message") == "message"
+            ]
+            if not messages:
+                return ""
+
+            content: Any = messages[-1].get("content", "")
+
+            # In the case of multiple contents, take the last one
+            if isinstance(content, list):
+                if not content:
+                    return ""
+                content = content[-1]
+
+            # Take the text field from the content if present
+            if isinstance(content, dict) and content.get("text") is not None:
+                content = content.get("text")
+            return str(content)[:max_size]
+    except Exception as e:
+        logger.error(f"Error getting summary: {e}")
+    return ""
diff --git a/temporalio/contrib/openai_agents/workflow.py b/temporalio/contrib/openai_agents/workflow.py
@@ -134,7 +134,7 @@ async def run_activity(ctx: RunContextWrapper[Any], input: str) -> Any:
             cancellation_type=cancellation_type,
             activity_id=activity_id,
             versioning_intent=versioning_intent,
-            summary=summary,
+            summary=summary or schema.description,
             priority=priority,
         )
         try:
diff --git a/tests/contrib/openai_agents/test_openai.py b/tests/contrib/openai_agents/test_openai.py
@@ -1,9 +1,10 @@
+import asyncio
 import json
 import os
 import uuid
 from dataclasses import dataclass
 from datetime import timedelta
-from typing import Any, Optional, Union, no_type_check
+from typing import Any, AsyncIterator, Optional, Union, no_type_check
 
 import nexusrpc
 import pytest
@@ -39,15 +40,20 @@
     HandoffOutputItem,
     ToolCallItem,
     ToolCallOutputItem,
+    TResponseStreamEvent,
 )
 from openai import APIStatusError, AsyncOpenAI, BaseModel
 from openai.types.responses import (
+    EasyInputMessageParam,
     ResponseFunctionToolCall,
+    ResponseFunctionToolCallParam,
     ResponseFunctionWebSearch,
+    ResponseInputTextParam,
     ResponseOutputMessage,
     ResponseOutputText,
 )
 from openai.types.responses.response_function_web_search import ActionSearch
+from openai.types.responses.response_input_item_param import Message
 from openai.types.responses.response_prompt_param import ResponsePromptParam
 from pydantic import ConfigDict, Field, TypeAdapter
 
@@ -61,6 +67,7 @@
     TestModel,
     TestModelProvider,
 )
+from temporalio.contrib.openai_agents._temporal_model_stub import _extract_summary
 from temporalio.contrib.pydantic import pydantic_data_converter
 from temporalio.exceptions import ApplicationError, CancelledError
 from temporalio.testing import WorkflowEnvironment
@@ -70,25 +77,16 @@
 from tests.helpers import new_worker
 from tests.helpers.nexus import create_nexus_endpoint, make_nexus_endpoint_name
 
-response_index: int = 0
-
 
 class StaticTestModel(TestModel):
     __test__ = False
     responses: list[ModelResponse] = []
 
-    def response(self):
-        global response_index
-        response = self.responses[response_index]
-        response_index += 1
-        return response
-
     def __init__(
         self,
     ) -> None:
-        global response_index
-        response_index = 0
-        super().__init__(self.response)
+        self._responses = iter(self.responses)
+        super().__init__(lambda: next(self._responses))
 
 
 class TestHelloModel(StaticTestModel):
@@ -687,7 +685,8 @@ async def test_research_workflow(client: Client, use_local_model: bool):
     new_config["plugins"] = [
         openai_agents.OpenAIAgentsPlugin(
             model_params=ModelActivityParameters(
-                start_to_close_timeout=timedelta(seconds=30)
+                start_to_close_timeout=timedelta(seconds=120),
+                schedule_to_close_timeout=timedelta(seconds=120),
             ),
             model_provider=TestModelProvider(TestResearchModel())
             if use_local_model
@@ -1340,9 +1339,6 @@ async def test_customer_service_workflow(client: Client, use_local_model: bool):
             )
 
 
-guardrail_response_index: int = 0
-
-
 class InputGuardrailModel(OpenAIResponsesModel):
     __test__ = False
     responses: list[ModelResponse] = [
@@ -1431,11 +1427,9 @@ def __init__(
         model: str,
         openai_client: AsyncOpenAI,
     ) -> None:
-        global response_index
-        response_index = 0
-        global guardrail_response_index
-        guardrail_response_index = 0
         super().__init__(model, openai_client)
+        self._responses = iter(self.responses)
+        self._guardrail_responses = iter(self.guardrail_responses)
 
     async def get_response(
         self,
@@ -1453,15 +1447,9 @@ async def get_response(
             system_instructions
             == "Check if the user is asking you to do their math homework."
         ):
-            global guardrail_response_index
-            response = self.guardrail_responses[guardrail_response_index]
-            guardrail_response_index += 1
-            return response
+            return next(self._guardrail_responses)
         else:
-            global response_index
-            response = self.responses[response_index]
-            response_index += 1
-            return response
+            return next(self._responses)
 
 
 ### 1. An agent-based guardrail that is triggered if the user is asking to do math homework
@@ -1705,7 +1693,7 @@ class WorkflowToolModel(StaticTestModel):
                     id="",
                     content=[
                         ResponseOutputText(
-                            text="",
+                            text="Workflow tool was used",
                             annotations=[],
                             type="output_text",
                         )
@@ -1876,3 +1864,117 @@ async def test_chat_completions_model(client: Client):
             execution_timeout=timedelta(seconds=10),
         )
         await workflow_handle.result()
+
+
+class WaitModel(Model):
+    async def get_response(
+        self,
+        system_instructions: Union[str, None],
+        input: Union[str, list[TResponseInputItem]],
+        model_settings: ModelSettings,
+        tools: list[Tool],
+        output_schema: Union[AgentOutputSchemaBase, None],
+        handoffs: list[Handoff],
+        tracing: ModelTracing,
+        *,
+        previous_response_id: Union[str, None],
+        prompt: Union[ResponsePromptParam, None] = None,
+    ) -> ModelResponse:
+        activity.logger.info("Waiting")
+        await asyncio.sleep(1.0)
+        activity.logger.info("Returning")
+        return ModelResponse(
+            output=[
+                ResponseOutputMessage(
+                    id="",
+                    content=[
+                        ResponseOutputText(
+                            text="test", annotations=[], type="output_text"
+                        )
+                    ],
+                    role="assistant",
+                    status="completed",
+                    type="message",
+                )
+            ],
+            usage=Usage(),
+            response_id=None,
+        )
+
+    def stream_response(
+        self,
+        system_instructions: Optional[str],
+        input: Union[str, list[TResponseInputItem]],
+        model_settings: ModelSettings,
+        tools: list[Tool],
+        output_schema: Optional[AgentOutputSchemaBase],
+        handoffs: list[Handoff],
+        tracing: ModelTracing,
+        *,
+        previous_response_id: Optional[str],
+        prompt: Optional[ResponsePromptParam],
+    ) -> AsyncIterator[TResponseStreamEvent]:
+        raise NotImplementedError()
+
+
+async def test_heartbeat(client: Client, env: WorkflowEnvironment):
+    if env.supports_time_skipping:
+        pytest.skip("Relies on real timing, skip.")
+
+    new_config = client.config()
+    new_config["plugins"] = [
+        openai_agents.OpenAIAgentsPlugin(
+            model_params=ModelActivityParameters(
+                heartbeat_timeout=timedelta(seconds=0.5),
+            ),
+            model_provider=TestModelProvider(WaitModel()),
+        )
+    ]
+    client = Client(**new_config)
+
+    async with new_worker(
+        client,
+        HelloWorldAgent,
+    ) as worker:
+        workflow_handle = await client.start_workflow(
+            HelloWorldAgent.run,
+            "Tell me about recursion in programming.",
+            id=f"workflow-tool-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+            execution_timeout=timedelta(seconds=5.0),
+        )
+        await workflow_handle.result()
+
+
+def test_summary_extraction():
+    input: list[TResponseInputItem] = [
+        EasyInputMessageParam(
+            content="First message",
+            role="user",
+        )
+    ]
+
+    assert _extract_summary(input) == "First message"
+
+    input.append(
+        Message(
+            content=[
+                ResponseInputTextParam(
+                    text="Second message",
+                    type="input_text",
+                )
+            ],
+            role="user",
+        )
+    )
+    assert _extract_summary(input) == "Second message"
+
+    input.append(
+        ResponseFunctionToolCallParam(
+            arguments="",
+            call_id="",
+            name="",
+            type="function_call",
+        )
+    )
+    assert _extract_summary(input) == "Second message"

Original file line number	Diff line number	Diff line change
`@@ -134,7 +134,7 @@ async def run_activity(ctx: RunContextWrapper[Any], input: str) -> Any:`
`134`	`134`	`cancellation_type=cancellation_type,`
`135`	`135`	`activity_id=activity_id,`
`136`	`136`	`versioning_intent=versioning_intent,`
`137`		`- summary=summary,`
	`137`	`+ summary=summary or schema.description,`
`138`	`138`	`priority=priority,`
`139`	`139`	`)`
`140`	`140`	`try:`