Record agent run attributes in case of streaming and exception (#1610)

alexmojaki · web-flow · commit ea2bbc55e9f4 · 2025-04-28T13:55:23.000+02:00
diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -2,14 +2,13 @@
 
 import asyncio
 import dataclasses
-import json
 from collections.abc import AsyncIterator, Awaitable, Iterator, Sequence
 from contextlib import asynccontextmanager, contextmanager
 from contextvars import ContextVar
 from dataclasses import field
 from typing import TYPE_CHECKING, Any, Callable, Generic, Literal, Union, cast
 
-from opentelemetry.trace import Span, Tracer
+from opentelemetry.trace import Tracer
 from typing_extensions import TypeGuard, TypeVar, assert_never
 
 from pydantic_graph import BaseNode, Graph, GraphRunContext
@@ -24,7 +23,6 @@
     result,
     usage as _usage,
 )
-from .models.instrumented import InstrumentedModel
 from .result import OutputDataT, ToolOutput
 from .settings import ModelSettings, merge_model_settings
 from .tools import RunContext, Tool, ToolDefinition
@@ -95,7 +93,6 @@ class GraphAgentDeps(Generic[DepsT, OutputDataT]):
     function_tools: dict[str, Tool[DepsT]] = dataclasses.field(repr=False)
     mcp_servers: Sequence[MCPServer] = dataclasses.field(repr=False)
 
-    run_span: Span
     tracer: Tracer
 
 
@@ -498,39 +495,12 @@ def _handle_final_result(
         final_result: result.FinalResult[NodeRunEndT],
         tool_responses: list[_messages.ModelRequestPart],
     ) -> End[result.FinalResult[NodeRunEndT]]:
-        run_span = ctx.deps.run_span
-        usage = ctx.state.usage
         messages = ctx.state.message_history
 
         # For backwards compatibility, append a new ModelRequest using the tool returns and retries
         if tool_responses:
             messages.append(_messages.ModelRequest(parts=tool_responses))
 
-        run_span.set_attributes(
-            {
-                **usage.opentelemetry_attributes(),
-                'all_messages_events': json.dumps(
-                    [InstrumentedModel.event_to_dict(e) for e in InstrumentedModel.messages_to_otel_events(messages)]
-                ),
-                'final_result': final_result.output
-                if isinstance(final_result.output, str)
-                else json.dumps(InstrumentedModel.serialize_any(final_result.output)),
-            }
-        )
-        run_span.set_attributes(
-            {
-                'logfire.json_schema': json.dumps(
-                    {
-                        'type': 'object',
-                        'properties': {
-                            'all_messages_events': {'type': 'array'},
-                            'final_result': {'type': 'object'},
-                        },
-                    }
-                ),
-            }
-        )
-
         return End(final_result)
 
     async def _handle_text_response(
diff --git a/pydantic_ai_slim/pydantic_ai/agent.py b/pydantic_ai_slim/pydantic_ai/agent.py
@@ -2,6 +2,7 @@
 
 import dataclasses
 import inspect
+import json
 import warnings
 from collections.abc import AsyncIterator, Awaitable, Iterator, Sequence
 from contextlib import AbstractAsyncContextManager, AsyncExitStack, asynccontextmanager, contextmanager
@@ -600,9 +601,10 @@ async def main():
         )
 
         # Build the initial state
+        usage = usage or _usage.Usage()
         state = _agent_graph.GraphAgentState(
             message_history=message_history[:] if message_history else [],
-            usage=usage or _usage.Usage(),
+            usage=usage,
             retries=0,
             run_step=0,
         )
@@ -656,7 +658,6 @@ async def get_instructions(run_context: RunContext[AgentDepsT]) -> str | None:
             output_validators=output_validators,
             function_tools=self._function_tools,
             mcp_servers=self._mcp_servers,
-            run_span=run_span,
             tracer=tracer,
             get_instructions=get_instructions,
         )
@@ -669,14 +670,51 @@ async def get_instructions(run_context: RunContext[AgentDepsT]) -> str | None:
             system_prompt_dynamic_functions=self._system_prompt_dynamic_functions,
         )
 
-        async with graph.iter(
-            start_node,
-            state=state,
-            deps=graph_deps,
-            span=use_span(run_span, end_on_exit=True) if run_span.is_recording() else None,
-            infer_name=False,
-        ) as graph_run:
-            yield AgentRun(graph_run)
+        try:
+            async with graph.iter(
+                start_node,
+                state=state,
+                deps=graph_deps,
+                span=use_span(run_span) if run_span.is_recording() else None,
+                infer_name=False,
+            ) as graph_run:
+                agent_run = AgentRun(graph_run)
+                yield agent_run
+                if (final_result := agent_run.result) is not None and run_span.is_recording():
+                    run_span.set_attribute(
+                        'final_result',
+                        (
+                            final_result.output
+                            if isinstance(final_result.output, str)
+                            else json.dumps(InstrumentedModel.serialize_any(final_result.output))
+                        ),
+                    )
+        finally:
+            try:
+                if run_span.is_recording():
+                    run_span.set_attributes(self._run_span_end_attributes(state, usage))
+            finally:
+                run_span.end()
+
+    def _run_span_end_attributes(self, state: _agent_graph.GraphAgentState, usage: _usage.Usage):
+        return {
+            **usage.opentelemetry_attributes(),
+            'all_messages_events': json.dumps(
+                [
+                    InstrumentedModel.event_to_dict(e)
+                    for e in InstrumentedModel.messages_to_otel_events(state.message_history)
+                ]
+            ),
+            'logfire.json_schema': json.dumps(
+                {
+                    'type': 'object',
+                    'properties': {
+                        'all_messages_events': {'type': 'array'},
+                        'final_result': {'type': 'object'},
+                    },
+                }
+            ),
+        }
 
     @overload
     def run_sync(
diff --git a/tests/models/test_fallback.py b/tests/models/test_fallback.py
@@ -216,6 +216,10 @@ async def test_first_failed_instrumented_stream(capfire: CaptureLogfire) -> None
                     'agent_name': 'agent',
                     'logfire.msg': 'agent run',
                     'logfire.span_type': 'span',
+                    'gen_ai.usage.input_tokens': 50,
+                    'gen_ai.usage.output_tokens': 2,
+                    'all_messages_events': '[{"content": "input", "role": "user", "gen_ai.message.index": 0, "event.name": "gen_ai.user.message"}, {"role": "assistant", "content": "hello world", "gen_ai.message.index": 1, "event.name": "gen_ai.assistant.message"}]',
+                    'logfire.json_schema': '{"type": "object", "properties": {"all_messages_events": {"type": "array"}, "final_result": {"type": "object"}}}',
                 },
             },
         ]
@@ -236,6 +240,82 @@ def test_all_failed() -> None:
     assert exceptions[0].body == {'error': 'test error'}
 
 
+@pytest.mark.skipif(not logfire_imports_successful(), reason='logfire not installed')
+def test_all_failed_instrumented(capfire: CaptureLogfire) -> None:
+    fallback_model = FallbackModel(failure_model, failure_model)
+    agent = Agent(model=fallback_model, instrument=True)
+    with pytest.raises(ExceptionGroup) as exc_info:
+        agent.run_sync('hello')
+    assert 'All models from FallbackModel failed' in exc_info.value.args[0]
+    exceptions = exc_info.value.exceptions
+    assert len(exceptions) == 2
+    assert isinstance(exceptions[0], ModelHTTPError)
+    assert exceptions[0].status_code == 500
+    assert exceptions[0].model_name == 'test-function-model'
+    assert exceptions[0].body == {'error': 'test error'}
+    assert capfire.exporter.exported_spans_as_dict() == snapshot(
+        [
+            {
+                'name': 'chat fallback:function:failure_response:,function:failure_response:',
+                'context': {'trace_id': 1, 'span_id': 3, 'is_remote': False},
+                'parent': {'trace_id': 1, 'span_id': 1, 'is_remote': False},
+                'start_time': 2000000000,
+                'end_time': 4000000000,
+                'attributes': {
+                    'gen_ai.operation.name': 'chat',
+                    'gen_ai.system': 'fallback:function,function',
+                    'gen_ai.request.model': 'fallback:function:failure_response:,function:failure_response:',
+                    'model_request_parameters': '{"function_tools": [], "allow_text_output": true, "output_tools": []}',
+                    'logfire.json_schema': '{"type": "object", "properties": {"model_request_parameters": {"type": "object"}}}',
+                    'logfire.span_type': 'span',
+                    'logfire.msg': 'chat fallback:function:failure_response:,function:failure_response:',
+                    'logfire.level_num': 17,
+                },
+                'events': [
+                    {
+                        'name': 'exception',
+                        'timestamp': 3000000000,
+                        'attributes': {
+                            'exception.type': 'pydantic_ai.exceptions.FallbackExceptionGroup',
+                            'exception.message': 'All models from FallbackModel failed (2 sub-exceptions)',
+                            'exception.stacktrace': '+------------------------------------',
+                            'exception.escaped': 'False',
+                        },
+                    }
+                ],
+            },
+            {
+                'name': 'agent run',
+                'context': {'trace_id': 1, 'span_id': 1, 'is_remote': False},
+                'parent': None,
+                'start_time': 1000000000,
+                'end_time': 6000000000,
+                'attributes': {
+                    'model_name': 'fallback:function:failure_response:,function:failure_response:',
+                    'agent_name': 'agent',
+                    'logfire.msg': 'agent run',
+                    'logfire.span_type': 'span',
+                    'all_messages_events': '[{"content": "hello", "role": "user", "gen_ai.message.index": 0, "event.name": "gen_ai.user.message"}]',
+                    'logfire.json_schema': '{"type": "object", "properties": {"all_messages_events": {"type": "array"}, "final_result": {"type": "object"}}}',
+                    'logfire.level_num': 17,
+                },
+                'events': [
+                    {
+                        'name': 'exception',
+                        'timestamp': 5000000000,
+                        'attributes': {
+                            'exception.type': 'pydantic_ai.exceptions.FallbackExceptionGroup',
+                            'exception.message': 'All models from FallbackModel failed (2 sub-exceptions)',
+                            'exception.stacktrace': '+------------------------------------',
+                            'exception.escaped': 'False',
+                        },
+                    }
+                ],
+            },
+        ]
+    )
+
+
 async def success_response_stream(_model_messages: list[ModelMessage], _agent_info: AgentInfo) -> AsyncIterator[str]:
     yield 'hello '
     yield 'world'
diff --git a/uv.lock b/uv.lock