Add better tracing for sync_provider

danielmillerp · danielmillerp · commit 1500161afa6c · 2025-11-03T18:14:56.000-05:00
diff --git a/src/agentex/lib/adk/providers/_modules/sync_provider.py b/src/agentex/lib/adk/providers/_modules/sync_provider.py
@@ -109,10 +109,12 @@ async def get_response(
 
                 response = await self.original_model.get_response(**kwargs)
 
-                # Set span output
-                if span:
+                # Set span output with structured data
+                if span and response:
+                    new_items, final_output = _extract_response_items(response)
                     span.output = {
-                        "response": str(response) if response else None,
+                        "new_items": new_items,
+                        "final_output": final_output if final_output else None,
                     }
 
                 return response
@@ -160,7 +162,9 @@ async def stream_response(
         # Wrap the streaming in a tracing span if tracer is available
         if self.tracer and self.trace_id:
             trace = self.tracer.trace(self.trace_id)
-            async with trace.span(
+
+            # Manually start the span instead of using context manager
+            span = await trace.start_span(
                 parent_id=self.parent_span_id,
                 name="run_agent_streamed",
                 input={
@@ -172,7 +176,9 @@ async def stream_response(
                     "handoffs": [str(h) for h in handoffs] if handoffs else [],
                     "previous_response_id": previous_response_id,
                 },
-            ) as span:
+            )
+
+            try:
                 # Get the stream from the original model
                 stream_kwargs = {
                     "system_instructions": system_instructions,
@@ -193,23 +199,110 @@ async def stream_response(
                 # Get the stream response from the original model and yield each event
                 stream_response = self.original_model.stream_response(**stream_kwargs)
 
-                # Pass through each event from the original stream
-                event_count = 0
-                final_output = None
+                # Pass through each event from the original stream and track items
+                new_items = []
+                final_response_text = ""
+                current_text_item = None
+                tool_call_map = {}  # Map call_id to tool name
+
                 async for event in stream_response:
-                    event_count += 1
-                    # Track the final output if available
-                    if hasattr(event, 'type') and event.type == 'raw_response_event':
-                        if hasattr(event.data, 'output'):
-                            final_output = event.data.output
+                    # Track reasoning, tool calls, and responses from run_item_stream_event
+                    if hasattr(event, 'type') and event.type == 'run_item_stream_event':
+                        if hasattr(event, 'item'):
+                            item = event.item
+
+                            # Handle reasoning items
+                            if item.type == 'reasoning_item':
+                                reasoning_summary = []
+                                if hasattr(item, 'raw_item') and hasattr(item.raw_item, 'summary'):
+                                    for summary_part in item.raw_item.summary:
+                                        if hasattr(summary_part, 'text'):
+                                            reasoning_summary.append({
+                                                "text": summary_part.text,
+                                                "type": "summary_text"
+                                            })
+
+                                new_items.append({
+                                    "id": getattr(item.raw_item, 'id', None),
+                                    "type": "reasoning",
+                                    "status": getattr(item.raw_item, 'status', None),
+                                    "content": None,
+                                    "summary": reasoning_summary if reasoning_summary else None,
+                                })
+
+                            # Handle tool call items
+                            elif item.type == 'tool_call_item':
+                                call_id, tool_name, tool_arguments = _extract_tool_call_info(item.raw_item)
+                                tool_call_map[call_id] = tool_name
+
+                                new_items.append({
+                                    "id": getattr(item.raw_item, 'id', None),
+                                    "name": tool_name,
+                                    "type": "function_call",
+                                    "status": getattr(item.raw_item, 'status', 'completed'),
+                                    "call_id": call_id,
+                                    "arguments": str(tool_arguments) if isinstance(tool_arguments, dict) else tool_arguments,
+                                })
+
+                            # Handle tool output items
+                            elif item.type == 'tool_call_output_item':
+                                call_id, tool_name, content = _extract_tool_response_info(tool_call_map, item.raw_item)
+
+                                new_items.append({
+                                    "type": "function_call_output",
+                                    "output": content,
+                                    "call_id": call_id,
+                                })
+
+                    # Accumulate text deltas to build final response
+                    # Note: OpenAI Agents SDK can emit events in different formats
+                    if hasattr(event, 'type') and event.type == 'response.output_text.delta':
+                        # Direct event type from OpenAI Agents SDK (observed in practice)
+                        if hasattr(event, 'delta'):
+                            final_response_text += event.delta
+
+                    # Handle raw_response_event wrapper (alternative event format, kept for compatibility)
+                    elif hasattr(event, 'type') and event.type == 'raw_response_event':
+                        if hasattr(event, 'data'):
+                            raw_event = event.data
+
+                            # Track when output items are added
+                            if isinstance(raw_event, ResponseOutputItemAddedEvent):
+                                if hasattr(raw_event, 'item') and raw_event.item.type == 'message':
+                                    current_text_item = {
+                                        "id": getattr(raw_event.item, 'id', None),
+                                        "role": getattr(raw_event.item, 'role', 'assistant'),
+                                        "type": "message",
+                                        "status": "in_progress",
+                                        "content": []
+                                    }
+
+                            # Check if this is a text delta event
+                            elif isinstance(raw_event, ResponseTextDeltaEvent):
+                                if hasattr(raw_event, 'delta') and raw_event.delta:
+                                    final_response_text += raw_event.delta
+
+                            # Track when output items are done
+                            elif isinstance(raw_event, ResponseOutputItemDoneEvent):
+                                if current_text_item and final_response_text:
+                                    current_text_item["status"] = "completed"
+                                    current_text_item["content"] = [{
+                                        "text": final_response_text,
+                                        "type": "output_text",
+                                    }]
+                                    new_items.append(current_text_item)
+                                    current_text_item = None
+
                     yield event
 
-                # Set span output
-                if span:
-                    span.output = {
-                        "event_count": event_count,
-                        "final_output": str(final_output) if final_output else None,
-                    }
+                # Set span output with structured data including tool calls and final response
+                span.output = {
+                    "new_items": new_items,
+                    "final_output": final_response_text if final_response_text else None,
+                }
+            finally:
+                # End the span after all events have been yielded
+                await trace.end_span(span)
         else:
             # No tracing, just stream normally
             # Get the stream from the original model
@@ -275,6 +368,90 @@ def get_model(self, model_name: Optional[str] = None) -> Model:
         return wrapped_model
 
 
+def _extract_response_items(response: Any) -> tuple[list[dict[str, Any]], str]:
+    """
+    Extract new_items and final_output from a ModelResponse object.
+
+    Args:
+        response: The ModelResponse object to extract from
+
+    Returns:
+        A tuple of (new_items, final_output)
+    """
+    new_items = []
+    final_output = ""
+
+    # Extract final output text first - try multiple sources
+    if hasattr(response, 'final_output') and response.final_output:
+        final_output = response.final_output
+    elif hasattr(response, 'text') and response.text:
+        final_output = response.text
+    elif hasattr(response, 'content') and response.content:
+        final_output = response.content
+
+    # Extract items from the response
+    if hasattr(response, 'new_items') and response.new_items:
+        for item in response.new_items:
+            # Handle reasoning items
+            if hasattr(item, 'type') and item.type == 'reasoning':
+                reasoning_summary = []
+                if hasattr(item, 'summary') and item.summary:
+                    for summary_part in item.summary:
+                        if hasattr(summary_part, 'text'):
+                            reasoning_summary.append({
+                                "text": summary_part.text,
+                                "type": "summary_text"
+                            })
+
+                new_items.append({
+                    "id": getattr(item, 'id', None),
+                    "type": "reasoning",
+                    "status": getattr(item, 'status', None),
+                    "content": None,
+                    "summary": reasoning_summary if reasoning_summary else None,
+                })
+
+            # Handle tool call items
+            elif hasattr(item, 'type') and item.type == 'function_call':
+                new_items.append({
+                    "id": getattr(item, 'id', None),
+                    "name": getattr(item, 'name', None),
+                    "type": "function_call",
+                    "status": getattr(item, 'status', 'completed'),
+                    "call_id": getattr(item, 'call_id', None),
+                    "arguments": getattr(item, 'arguments', None),
+                })
+
+            # Handle tool output items
+            elif hasattr(item, 'type') and item.type == 'function_call_output':
+                new_items.append({
+                    "type": "function_call_output",
+                    "output": getattr(item, 'output', None),
+                    "call_id": getattr(item, 'call_id', None),
+                })
+
+            # Handle message items
+            elif hasattr(item, 'type') and item.type == 'message':
+                content = []
+                if hasattr(item, 'content') and item.content:
+                    for content_part in item.content:
+                        if hasattr(content_part, 'text'):
+                            content.append({
+                                "text": content_part.text,
+                                "type": "output_text",
+                            })
+
+                new_items.append({
+                    "id": getattr(item, 'id', None),
+                    "role": getattr(item, 'role', 'assistant'),
+                    "type": "message",
+                    "status": getattr(item, 'status', 'completed'),
+                    "content": content,
+                })
+
+    return new_items, final_output
+
+
 def _extract_tool_call_info(tool_call_item: Any) -> tuple[str, str, dict[str, Any]]:
     """
     Extract call_id, tool_name, and tool_arguments from a tool call item.