feat(agent): Update structured output tracing to agent level

kazmer97 · kazmer97 · commit f2e46b9b4eb1 · 2025-08-16T12:50:30.000+01:00
diff --git a/src/strands/agent/agent.py b/src/strands/agent/agent.py
@@ -510,101 +510,84 @@ def capture_structured_output_hook(event: AfterToolInvocationEvent) -> None:
                         if tool_input:
                             captured_result = output_model(**tool_input)
 
-        # Add the callback temporarily (use add_callback, not add_hook)
         self.hooks.add_callback(AfterToolInvocationEvent, capture_structured_output_hook)
         added_callback = capture_structured_output_hook
 
-        try:
-            with self.tracer.tracer.start_as_current_span(
-                "execute_structured_output", kind=trace_api.SpanKind.CLIENT
-            ) as structured_output_span:
-                try:
-                    if not self.messages and not prompt:
-                        raise ValueError("No conversation history or prompt provided")
-
-                    # Create temporary messages array if prompt is provided
-                    message: Message
-                    if prompt:
-                        content: list[ContentBlock] = [{"text": prompt}] if isinstance(prompt, str) else prompt
-                        message = {"role": "user", "content": content}
-                    else:
-                        # Use existing conversation history
-                        message = {
-                            "role": "user",
-                            "content": [
-                                {
-                                    "text": "Please provide the information from our conversation in the requested "
-                                    "structured format."
-                                }
-                            ],
-                        }
-
-                    structured_output_span.set_attributes(
-                        {
-                            "gen_ai.system": "strands-agents",
-                            "gen_ai.agent.name": self.name,
-                            "gen_ai.agent.id": self.agent_id,
-                            "gen_ai.operation.name": "execute_structured_output",
-                        }
-                    )
-
-                    # Add tracing for messages
-                    messages_to_trace = self.messages if not prompt else self.messages + [message]
-                    for msg in messages_to_trace:
-                        structured_output_span.add_event(
-                            f"gen_ai.{msg['role']}.message",
-                            attributes={"role": msg["role"], "content": serialize(msg["content"])},
-                        )
+        # Create message for tracing
+        message: Message
+        if prompt:
+            content: list[ContentBlock] = [{"text": prompt}] if isinstance(prompt, str) else prompt
+            message = {"role": "user", "content": content}
+        else:
+            # Use existing conversation history
+            message = {
+                "role": "user",
+                "content": [
+                    {"text": "Please provide the information from our conversation in the requested structured format."}
+                ],
+            }
 
-                    if self.system_prompt:
-                        structured_output_span.add_event(
-                            "gen_ai.system.message",
-                            attributes={"role": "system", "content": serialize([{"text": self.system_prompt}])},
-                        )
+        # Start agent trace span (same as stream_async)
+        self.trace_span = self._start_agent_trace_span(message)
 
-                    invocation_state = {
-                        "structured_output_mode": True,
-                        "structured_output_model": output_model,
-                    }
+        try:
+            with trace_api.use_span(self.trace_span):
+                if not self.messages and not prompt:
+                    raise ValueError("No conversation history or prompt provided")
 
-                    # Run the event loop
-                    async for event in self._run_loop(message=message, invocation_state=invocation_state):
-                        if "stop" in event:
-                            break
+                invocation_state = {
+                    "structured_output_mode": True,
+                    "structured_output_model": output_model,
+                }
 
-                    # Return the captured structured result if we got it from the tool
-                    if captured_result:
-                        structured_output_span.add_event(
-                            "gen_ai.choice", attributes={"message": serialize(captured_result.model_dump())}
+                # Run the event loop
+                async for event in self._run_loop(message=message, invocation_state=invocation_state):
+                    if "stop" in event:
+                        break
+
+                # Return the captured structured result if we got it from the tool
+                if captured_result:
+                    self._end_agent_trace_span(
+                        response=AgentResult(
+                            message={"role": "assistant", "content": [{"text": str(captured_result)}]},
+                            stop_reason="end_turn",
+                            metrics=self.event_loop_metrics,
+                            state={},
                         )
-                        return captured_result
-
-                    # Fallback: Use the original model.structured_output approach
-                    # This maintains backward compatibility with existing tests and implementations
-                    # Use original_messages to get clean message state, or self.messages if preserve_conversation=True
-                    base_messages = original_messages if original_messages is not None else self.messages
-                    temp_messages = base_messages if not prompt else base_messages + [message]
+                    )
+                    return captured_result
 
-                    events = self.model.structured_output(output_model, temp_messages, system_prompt=self.system_prompt)
-                    async for event in events:
-                        if "callback" in event:
-                            self.callback_handler(**cast(dict, event["callback"]))
+                # Fallback: Use the original model.structured_output approach
+                # This maintains backward compatibility with existing tests and implementations
+                # Use original_messages to get clean message state, or self.messages if preserve_conversation=True
+                base_messages = original_messages if original_messages is not None else self.messages
+                temp_messages = base_messages if not prompt else base_messages + [message]
 
-                    structured_output_span.add_event(
-                        "gen_ai.choice", attributes={"message": serialize(event["output"].model_dump())}
+                events = self.model.structured_output(output_model, temp_messages, system_prompt=self.system_prompt)
+                async for event in events:
+                    if "callback" in event:
+                        self.callback_handler(**cast(dict, event["callback"]))
+
+                self._end_agent_trace_span(
+                    response=AgentResult(
+                        message={"role": "assistant", "content": [{"text": str(event["output"])}]},
+                        stop_reason="end_turn",
+                        metrics=self.event_loop_metrics,
+                        state={},
                     )
-                    return cast(T, event["output"])
-
-                except Exception as e:
-                    structured_output_span.record_exception(e)
-                    raise
+                )
+                return cast(T, event["output"])
 
+        except Exception as e:
+            self._end_agent_trace_span(error=e)
+            raise
         finally:
             # Clean up what we added - remove the callback
-            if added_callback is not None and AfterToolInvocationEvent in self.hooks._registered_callbacks:
-                callbacks = self.hooks._registered_callbacks[AfterToolInvocationEvent]
-                if added_callback in callbacks:
-                    callbacks.remove(added_callback)
+            if added_callback is not None:
+                with suppress(ValueError, KeyError):
+                    callbacks = self.hooks._registered_callbacks.get(AfterToolInvocationEvent, [])
+                    if added_callback in callbacks:
+                        callbacks.remove(added_callback)
 
             # Remove the tool we added
             if added_tool_name:
diff --git a/tests/strands/agent/test_agent.py b/tests/strands/agent/test_agent.py
@@ -992,13 +992,12 @@ def test_agent_callback_handler_custom_handler_used():
 
 
 def test_agent_structured_output(agent, system_prompt, user, agenerator):
-    # Setup mock tracer and span
-    mock_strands_tracer = unittest.mock.MagicMock()
-    mock_otel_tracer = unittest.mock.MagicMock()
+    # Mock the agent tracing methods instead of direct OpenTelemetry calls
+    agent._start_agent_trace_span = unittest.mock.Mock()
+    agent._end_agent_trace_span = unittest.mock.Mock()
     mock_span = unittest.mock.MagicMock()
-    mock_strands_tracer.tracer = mock_otel_tracer
-    mock_otel_tracer.start_as_current_span.return_value.__enter__.return_value = mock_span
-    agent.tracer = mock_strands_tracer
+    agent._start_agent_trace_span.return_value = mock_span
+    agent.trace_span = mock_span
 
     agent.model.structured_output = unittest.mock.Mock(return_value=agenerator([{"output": user}]))
 
@@ -1019,34 +1018,19 @@ def test_agent_structured_output(agent, system_prompt, user, agenerator):
         type(user), [{"role": "user", "content": [{"text": prompt}]}], system_prompt=system_prompt
     )
 
-    mock_span.set_attributes.assert_called_once_with(
-        {
-            "gen_ai.system": "strands-agents",
-            "gen_ai.agent.name": "Strands Agents",
-            "gen_ai.agent.id": "default",
-            "gen_ai.operation.name": "execute_structured_output",
-        }
-    )
-
-    mock_span.add_event.assert_any_call(
-        "gen_ai.user.message",
-        attributes={"role": "user", "content": '[{"text": "Jane Doe is 30 years old and her email is jane@doe.com"}]'},
-    )
-
-    mock_span.add_event.assert_called_with(
-        "gen_ai.choice",
-        attributes={"message": json.dumps(user.model_dump())},
-    )
+    # Verify agent-level tracing was called
+    agent._start_agent_trace_span.assert_called_once()
+    agent._end_agent_trace_span.assert_called_once()
 
 
 def test_agent_structured_output_multi_modal_input(agent, system_prompt, user, agenerator):
-    # Setup mock tracer and span
-    mock_strands_tracer = unittest.mock.MagicMock()
-    mock_otel_tracer = unittest.mock.MagicMock()
+    # Mock the agent tracing methods instead of direct OpenTelemetry calls
+    agent._start_agent_trace_span = unittest.mock.Mock()
+    agent._end_agent_trace_span = unittest.mock.Mock()
     mock_span = unittest.mock.MagicMock()
-    mock_strands_tracer.tracer = mock_otel_tracer
-    mock_otel_tracer.start_as_current_span.return_value.__enter__.return_value = mock_span
-    agent.tracer = mock_strands_tracer
+    agent._start_agent_trace_span.return_value = mock_span
+    agent.trace_span = mock_span
+
     agent.model.structured_output = unittest.mock.Mock(return_value=agenerator([{"output": user}]))
 
     prompt = [
@@ -1076,10 +1060,9 @@ def test_agent_structured_output_multi_modal_input(agent, system_prompt, user, a
         type(user), [{"role": "user", "content": prompt}], system_prompt=system_prompt
     )
 
-    mock_span.add_event.assert_called_with(
-        "gen_ai.choice",
-        attributes={"message": json.dumps(user.model_dump())},
-    )
+    # Verify agent-level tracing was called
+    agent._start_agent_trace_span.assert_called_once()
+    agent._end_agent_trace_span.assert_called_once()
 
 
 @pytest.mark.asyncio