bug: preserve usage tracking on streaming errors

habema · habema · commit b32bb910bd19 · 2025-10-23T19:45:59.000+03:00
diff --git a/src/agents/__init__.py b/src/agents/__init__.py
@@ -20,6 +20,7 @@
     MaxTurnsExceeded,
     ModelBehaviorError,
     OutputGuardrailTripwireTriggered,
+    RunError,
     RunErrorDetails,
     ToolInputGuardrailTripwireTriggered,
     ToolOutputGuardrailTripwireTriggered,
@@ -212,6 +213,8 @@ def enable_verbose_stdout_logging():
     "OutputGuardrailTripwireTriggered",
     "ToolInputGuardrailTripwireTriggered",
     "ToolOutputGuardrailTripwireTriggered",
+    "RunError",
+    "RunErrorDetails",
     "DynamicPromptFunction",
     "GenerateDynamicPromptData",
     "Prompt",
diff --git a/src/agents/exceptions.py b/src/agents/exceptions.py
@@ -129,3 +129,20 @@ def __init__(self, guardrail: ToolOutputGuardrail[Any], output: ToolGuardrailFun
         self.guardrail = guardrail
         self.output = output
         super().__init__(f"Tool output guardrail {guardrail.__class__.__name__} triggered tripwire")
+
+
+class RunError(AgentsException):
+    """Wrapper exception for non-AgentsException errors that occur during agent runs.
+
+    This exception wraps external errors (API errors, connection failures, etc.) to ensure
+    that run data including usage information is preserved and accessible.
+    """
+
+    original_exception: Exception
+    """The original exception that was raised."""
+
+    def __init__(self, original_exception: Exception):
+        self.original_exception = original_exception
+        super().__init__(str(original_exception))
+        # Preserve the original exception as the cause
+        self.__cause__ = original_exception
diff --git a/src/agents/result.py b/src/agents/result.py
@@ -15,6 +15,7 @@
     AgentsException,
     InputGuardrailTripwireTriggered,
     MaxTurnsExceeded,
+    RunError,
     RunErrorDetails,
 )
 from .guardrail import InputGuardrailResult, OutputGuardrailResult
@@ -299,23 +300,40 @@ def _check_errors(self):
         if self._run_impl_task and self._run_impl_task.done():
             run_impl_exc = self._run_impl_task.exception()
             if run_impl_exc and isinstance(run_impl_exc, Exception):
-                if isinstance(run_impl_exc, AgentsException) and run_impl_exc.run_data is None:
-                    run_impl_exc.run_data = self._create_error_details()
-                self._stored_exception = run_impl_exc
+                if isinstance(run_impl_exc, AgentsException):
+                    # For AgentsException, attach run_data if missing
+                    if run_impl_exc.run_data is None:
+                        run_impl_exc.run_data = self._create_error_details()
+                    self._stored_exception = run_impl_exc
+                else:
+                    # For non-AgentsException, wrap it to preserve run_data
+                    wrapped_exc = RunError(run_impl_exc)
+                    wrapped_exc.run_data = self._create_error_details()
+                    self._stored_exception = wrapped_exc
 
         if self._input_guardrails_task and self._input_guardrails_task.done():
             in_guard_exc = self._input_guardrails_task.exception()
             if in_guard_exc and isinstance(in_guard_exc, Exception):
-                if isinstance(in_guard_exc, AgentsException) and in_guard_exc.run_data is None:
-                    in_guard_exc.run_data = self._create_error_details()
-                self._stored_exception = in_guard_exc
+                if isinstance(in_guard_exc, AgentsException):
+                    if in_guard_exc.run_data is None:
+                        in_guard_exc.run_data = self._create_error_details()
+                    self._stored_exception = in_guard_exc
+                else:
+                    wrapped_exc = RunError(in_guard_exc)
+                    wrapped_exc.run_data = self._create_error_details()
+                    self._stored_exception = wrapped_exc
 
         if self._output_guardrails_task and self._output_guardrails_task.done():
             out_guard_exc = self._output_guardrails_task.exception()
             if out_guard_exc and isinstance(out_guard_exc, Exception):
-                if isinstance(out_guard_exc, AgentsException) and out_guard_exc.run_data is None:
-                    out_guard_exc.run_data = self._create_error_details()
-                self._stored_exception = out_guard_exc
+                if isinstance(out_guard_exc, AgentsException):
+                    if out_guard_exc.run_data is None:
+                        out_guard_exc.run_data = self._create_error_details()
+                    self._stored_exception = out_guard_exc
+                else:
+                    wrapped_exc = RunError(out_guard_exc)
+                    wrapped_exc.run_data = self._create_error_details()
+                    self._stored_exception = wrapped_exc
 
     def _cleanup_tasks(self):
         if self._run_impl_task and not self._run_impl_task.done():
diff --git a/src/agents/run.py b/src/agents/run.py
@@ -35,6 +35,7 @@
     MaxTurnsExceeded,
     ModelBehaviorError,
     OutputGuardrailTripwireTriggered,
+    RunError,
     RunErrorDetails,
     UserError,
 )
@@ -702,6 +703,19 @@ async def run(
                     output_guardrail_results=[],
                 )
                 raise
+            except Exception as exc:
+                # Wrap non-AgentsException to preserve run_data including usage
+                wrapped_exc = RunError(exc)
+                wrapped_exc.run_data = RunErrorDetails(
+                    input=original_input,
+                    new_items=generated_items,
+                    raw_responses=model_responses,
+                    last_agent=current_agent,
+                    context_wrapper=context_wrapper,
+                    input_guardrail_results=input_guardrail_results,
+                    output_guardrail_results=[],
+                )
+                raise wrapped_exc from exc
             finally:
                 if current_span:
                     current_span.finish(reset_current=True)
diff --git a/tests/test_run_hooks.py b/tests/test_run_hooks.py
@@ -172,6 +172,8 @@ async def test_async_run_hooks_with_agent_hooks_with_llm():
 
 @pytest.mark.asyncio
 async def test_run_hooks_llm_error_non_streaming(monkeypatch):
+    from agents import RunError
+
     hooks = RunHooksForTests()
     model = FakeModel()
     agent = Agent(name="A", model=model, tools=[get_function_tool("f", "res")], handoffs=[])
@@ -181,9 +183,16 @@ async def boom(*args, **kwargs):
 
     monkeypatch.setattr(FakeModel, "get_response", boom, raising=True)
 
-    with pytest.raises(RuntimeError, match="boom"):
+    with pytest.raises(RunError) as exc_info:
         await Runner.run(agent, input="hello", hooks=hooks)
 
+    # Verify the original exception is preserved
+    assert isinstance(exc_info.value.original_exception, RuntimeError)
+    assert str(exc_info.value.original_exception) == "boom"
+    # Verify run_data is attached
+    assert exc_info.value.run_data is not None
+    assert exc_info.value.run_data.context_wrapper is not None
+
     # Current behavior is that hooks will not fire on LLM failure
     assert hooks.events["on_agent_start"] == 1
     assert hooks.events["on_llm_start"] == 1
@@ -229,16 +238,26 @@ async def test_streamed_run_hooks_llm_error(monkeypatch):
     Verify that when the streaming path raises, we still emit on_llm_start
     but do NOT emit on_llm_end (current behavior), and the exception propagates.
     """
+    from agents import RunError
+
     hooks = RunHooksForTests()
     agent = Agent(name="A", model=BoomModel(), tools=[get_function_tool("f", "res")], handoffs=[])
 
     stream = Runner.run_streamed(agent, input="hello", hooks=hooks)
 
-    # Consuming the stream should surface the exception
-    with pytest.raises(RuntimeError, match="stream blew up"):
+    # Consuming the stream should surface the exception (wrapped in RunError to preserve usage data)
+    with pytest.raises(RunError) as exc_info:
         async for _ in stream.stream_events():
             pass
 
+    # Verify the original exception is preserved and accessible
+    assert isinstance(exc_info.value.original_exception, RuntimeError)
+    assert str(exc_info.value.original_exception) == "stream blew up"
+    # Verify run_data is attached with usage information
+    assert exc_info.value.run_data is not None
+    assert exc_info.value.run_data.context_wrapper is not None
+    assert exc_info.value.run_data.context_wrapper.usage is not None
+
     # Current behavior: success-only on_llm_end; ensure starts fired but ends did not.
     assert hooks.events["on_agent_start"] == 1
     assert hooks.events["on_llm_start"] == 1
diff --git a/tests/test_tracing_errors.py b/tests/test_tracing_errors.py
@@ -15,6 +15,7 @@
     MaxTurnsExceeded,
     ModelBehaviorError,
     RunContextWrapper,
+    RunError,
     Runner,
     TResponseInputItem,
 )
@@ -39,9 +40,12 @@ async def test_single_turn_model_error():
         name="test_agent",
         model=model,
     )
-    with pytest.raises(ValueError):
+    with pytest.raises(RunError) as exc_info:
         await Runner.run(agent, input="first_test")
 
+    # Verify the original exception is preserved
+    assert isinstance(exc_info.value.original_exception, ValueError)
+
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -92,9 +96,12 @@ async def test_multi_turn_no_handoffs():
         ]
     )
 
-    with pytest.raises(ValueError):
+    with pytest.raises(RunError) as exc_info:
         await Runner.run(agent, input="first_test")
 
+    # Verify the original exception is preserved
+    assert isinstance(exc_info.value.original_exception, ValueError)
+
     assert fetch_normalized_spans() == snapshot(
         [
             {
diff --git a/tests/test_tracing_errors_streamed.py b/tests/test_tracing_errors_streamed.py
@@ -18,6 +18,7 @@
     OutputGuardrail,
     OutputGuardrailTripwireTriggered,
     RunContextWrapper,
+    RunError,
     Runner,
     TResponseInputItem,
 )
@@ -42,11 +43,14 @@ async def test_single_turn_model_error():
         name="test_agent",
         model=model,
     )
-    with pytest.raises(ValueError):
+    with pytest.raises(RunError) as exc_info:
         result = Runner.run_streamed(agent, input="first_test")
         async for _ in result.stream_events():
             pass
 
+    # Verify the original exception is preserved
+    assert isinstance(exc_info.value.original_exception, ValueError)
+
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -98,11 +102,14 @@ async def test_multi_turn_no_handoffs():
         ]
     )
 
-    with pytest.raises(ValueError):
+    with pytest.raises(RunError) as exc_info:
         result = Runner.run_streamed(agent, input="first_test")
         async for _ in result.stream_events():
             pass
 
+    # Verify the original exception is preserved
+    assert isinstance(exc_info.value.original_exception, ValueError)
+
     assert fetch_normalized_spans() == snapshot(
         [
             {

Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,7 @@`
`15`	`15`	`MaxTurnsExceeded,`
`16`	`16`	`ModelBehaviorError,`
`17`	`17`	`RunContextWrapper,`
	`18`	`+ RunError,`
`18`	`19`	`Runner,`
`19`	`20`	`TResponseInputItem,`
`20`	`21`	`)`
`@@ -39,9 +40,12 @@ async def test_single_turn_model_error():`
`39`	`40`	`name="test_agent",`
`40`	`41`	`model=model,`
`41`	`42`	`)`
`42`		`- with pytest.raises(ValueError):`
	`43`	`+ with pytest.raises(RunError) as exc_info:`
`43`	`44`	`await Runner.run(agent, input="first_test")`
`44`	`45`
	`46`	`+ # Verify the original exception is preserved`
	`47`	`+ assert isinstance(exc_info.value.original_exception, ValueError)`
	`48`	`+`
`45`	`49`	`assert fetch_normalized_spans() == snapshot(`
`46`	`50`	`[`
`47`	`51`	`{`
`@@ -92,9 +96,12 @@ async def test_multi_turn_no_handoffs():`
`92`	`96`	`]`
`93`	`97`	`)`
`94`	`98`
`95`		`- with pytest.raises(ValueError):`
	`99`	`+ with pytest.raises(RunError) as exc_info:`
`96`	`100`	`await Runner.run(agent, input="first_test")`
`97`	`101`
	`102`	`+ # Verify the original exception is preserved`
	`103`	`+ assert isinstance(exc_info.value.original_exception, ValueError)`
	`104`	`+`
`98`	`105`	`assert fetch_normalized_spans() == snapshot(`
`99`	`106`	`[`
`100`	`107`	`{`
Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,7 @@`
`18`	`18`	`OutputGuardrail,`
`19`	`19`	`OutputGuardrailTripwireTriggered,`
`20`	`20`	`RunContextWrapper,`
	`21`	`+ RunError,`
`21`	`22`	`Runner,`
`22`	`23`	`TResponseInputItem,`
`23`	`24`	`)`
`@@ -42,11 +43,14 @@ async def test_single_turn_model_error():`
`42`	`43`	`name="test_agent",`
`43`	`44`	`model=model,`
`44`	`45`	`)`
`45`		`- with pytest.raises(ValueError):`
	`46`	`+ with pytest.raises(RunError) as exc_info:`
`46`	`47`	`result = Runner.run_streamed(agent, input="first_test")`
`47`	`48`	`async for _ in result.stream_events():`
`48`	`49`	`pass`
`49`	`50`
	`51`	`+ # Verify the original exception is preserved`
	`52`	`+ assert isinstance(exc_info.value.original_exception, ValueError)`
	`53`	`+`
`50`	`54`	`assert fetch_normalized_spans() == snapshot(`
`51`	`55`	`[`
`52`	`56`	`{`
`@@ -98,11 +102,14 @@ async def test_multi_turn_no_handoffs():`
`98`	`102`	`]`
`99`	`103`	`)`
`100`	`104`
`101`		`- with pytest.raises(ValueError):`
	`105`	`+ with pytest.raises(RunError) as exc_info:`
`102`	`106`	`result = Runner.run_streamed(agent, input="first_test")`
`103`	`107`	`async for _ in result.stream_events():`
`104`	`108`	`pass`
`105`	`109`
	`110`	`+ # Verify the original exception is preserved`
	`111`	`+ assert isinstance(exc_info.value.original_exception, ValueError)`
	`112`	`+`
`106`	`113`	`assert fetch_normalized_spans() == snapshot(`
`107`	`114`	`[`
`108`	`115`	`{`