diff --git a/sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/_ai_project_instrumentor.py b/sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/_ai_project_instrumentor.py index f3ac7f846943..4ee8ddaf24f6 100644 --- a/sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/_ai_project_instrumentor.py +++ b/sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/_ai_project_instrumentor.py @@ -437,9 +437,9 @@ def _add_instructions_event( event_body: Dict[str, Any] = {} if _trace_agents_content and (instructions or additional_instructions): if instructions and additional_instructions: - event_body["content"] = f"{instructions} {additional_instructions}" + event_body["text"] = f"{instructions} {additional_instructions}" else: - event_body["content"] = instructions or additional_instructions + event_body["text"] = instructions or additional_instructions attributes = self._create_event_attributes(agent_id=agent_id, thread_id=thread_id) attributes[GEN_AI_EVENT_CONTENT] = json.dumps(event_body, ensure_ascii=False) diff --git a/sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/_responses_instrumentor.py b/sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/_responses_instrumentor.py index 14d2283c4634..ddbe95672144 100644 --- a/sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/_responses_instrumentor.py +++ b/sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/_responses_instrumentor.py @@ -471,10 +471,10 @@ def _add_message_event( conversation_id: Optional[str] = None, ) -> None: """Add a message event to the span.""" - event_body: Dict[str, Any] = {"role": role} + event_body: Dict[str, Any] = {} if _trace_responses_content and content: - event_body["content"] = content + event_body["text"] = content attributes = self._create_event_attributes( conversation_id=conversation_id, @@ -486,6 +486,310 @@ def _add_message_event( event_name = f"gen_ai.{role}.message" span.span_instance.add_event(name=event_name, attributes=attributes) + def _add_tool_message_events( + self, + span: "AbstractSpan", + tool_outputs: List[Any], + conversation_id: Optional[str] = None, + ) -> None: + """Add tool message events (tool call outputs) to the span.""" + event_body: Dict[str, Any] = {} + + if _trace_responses_content and tool_outputs: + tool_call_outputs = [] + for output_item in tool_outputs: + try: + tool_output: Dict[str, Any] = {} + + # Get the item type - handle both dict and object attributes + if isinstance(output_item, dict): + item_type = output_item.get("type") + else: + item_type = getattr(output_item, "type", None) + + if not item_type: + continue # Skip if no type + + # Convert function_call_output to "function" + if item_type == "function_call_output": + tool_output["type"] = "function" + else: + tool_output["type"] = item_type + + # Add call_id as "id" - handle both dict and object + if isinstance(output_item, dict): + call_id = output_item.get("call_id") or output_item.get("id") + else: + call_id = getattr(output_item, "call_id", None) or getattr(output_item, "id", None) + + if call_id: + tool_output["id"] = call_id + + # Add output field - parse JSON string if needed + if isinstance(output_item, dict): + output_value = output_item.get("output") + else: + output_value = getattr(output_item, "output", None) + + if output_value is not None: + # Try to parse JSON string into object + if isinstance(output_value, str): + try: + tool_output["output"] = json.loads(output_value) + except (json.JSONDecodeError, TypeError): + # If parsing fails, keep as string + tool_output["output"] = output_value + else: + tool_output["output"] = output_value + + tool_call_outputs.append(tool_output) + except Exception: # pylint: disable=broad-exception-caught + # Skip items that can't be processed + logger.debug("Failed to process tool output item: %s", output_item, exc_info=True) + continue + + if tool_call_outputs: + event_body["tool_call_outputs"] = tool_call_outputs + + attributes = self._create_event_attributes( + conversation_id=conversation_id, + message_role="tool", + ) + attributes[GEN_AI_EVENT_CONTENT] = json.dumps(event_body, ensure_ascii=False) + + # Use "tool" for the event name: gen_ai.tool.message + span.span_instance.add_event(name="gen_ai.tool.message", attributes=attributes) + + def _emit_tool_call_event( + self, + span: "AbstractSpan", + tool_call: Dict[str, Any], + conversation_id: Optional[str] = None, + ) -> None: + """Helper to emit a single tool call event.""" + event_body: Dict[str, Any] = {"tool_calls": [tool_call]} + attributes = self._create_event_attributes( + conversation_id=conversation_id, + message_role="assistant", + ) + attributes[GEN_AI_EVENT_CONTENT] = json.dumps(event_body, ensure_ascii=False) + span.span_instance.add_event(name="gen_ai.assistant.message", attributes=attributes) + + def _add_tool_call_events( # pylint: disable=too-many-branches + self, + span: "AbstractSpan", + response: Any, + conversation_id: Optional[str] = None, + ) -> None: + """Add tool call events to the span from response output.""" + if not span or not span.span_instance.is_recording: + return + + # Extract function calls and tool calls from response output + output = getattr(response, "output", None) + if not output: + return + + for output_item in output: + try: + item_type = getattr(output_item, "type", None) + if not item_type: + continue + + tool_call: Dict[str, Any] # Declare once for all branches + + # Handle function_call type + if item_type == "function_call": + tool_call = { + "type": "function", + } + + # Always include id (needed to correlate with function output) + if hasattr(output_item, "call_id"): + tool_call["id"] = output_item.call_id + + # Only include function name and arguments if content recording is enabled + if _trace_responses_content: + function_details: Dict[str, Any] = {} + if hasattr(output_item, "name"): + function_details["name"] = output_item.name + if hasattr(output_item, "arguments"): + function_details["arguments"] = output_item.arguments + if function_details: + tool_call["function"] = function_details + + self._emit_tool_call_event(span, tool_call, conversation_id) + + # Handle file_search_call type + elif item_type == "file_search_call": + tool_call = { + "type": "file_search", + } + + if hasattr(output_item, "id"): + tool_call["id"] = output_item.id + + # Only include search details if content recording is enabled + if _trace_responses_content: + # queries and results are directly on the item + if hasattr(output_item, "queries") and output_item.queries: + tool_call["queries"] = output_item.queries + if hasattr(output_item, "results") and output_item.results: + tool_call["results"] = [] + for result in output_item.results: + result_data = { + "file_id": getattr(result, "file_id", None), + "filename": getattr(result, "filename", None), + "score": getattr(result, "score", None), + } + tool_call["results"].append(result_data) + + self._emit_tool_call_event(span, tool_call, conversation_id) + + # Handle code_interpreter_call type + elif item_type == "code_interpreter_call": + tool_call = { + "type": "code_interpreter", + } + + if hasattr(output_item, "id"): + tool_call["id"] = output_item.id + + # Only include code interpreter details if content recording is enabled + if _trace_responses_content: + # code and outputs are directly on the item + if hasattr(output_item, "code") and output_item.code: + tool_call["code"] = output_item.code + if hasattr(output_item, "outputs") and output_item.outputs: + tool_call["outputs"] = [] + for output in output_item.outputs: + # Outputs can be logs or images + output_data = { + "type": getattr(output, "type", None), + } + if hasattr(output, "logs"): + output_data["logs"] = output.logs + elif hasattr(output, "image"): + output_data["image"] = {"file_id": getattr(output.image, "file_id", None)} + tool_call["outputs"].append(output_data) + + self._emit_tool_call_event(span, tool_call, conversation_id) + + # Handle web_search_call type + elif item_type == "web_search_call": + tool_call = { + "type": "web_search", + } + + if hasattr(output_item, "id"): + tool_call["id"] = output_item.id + + # Only include search action if content recording is enabled + if _trace_responses_content: + # action is directly on the item + if hasattr(output_item, "action") and output_item.action: + # WebSearchAction has type and type-specific fields + tool_call["action"] = { + "type": getattr(output_item.action, "type", None), + } + # Try to capture action-specific fields + if hasattr(output_item.action, "query"): + tool_call["action"]["query"] = output_item.action.query + if hasattr(output_item.action, "results"): + tool_call["action"]["results"] = [] + for result in output_item.action.results: + result_data = { + "title": getattr(result, "title", None), + "url": getattr(result, "url", None), + } + tool_call["action"]["results"].append(result_data) + + self._emit_tool_call_event(span, tool_call, conversation_id) + + # Handle mcp_call type (Model Context Protocol) + elif item_type == "mcp_call": + tool_call = { + "type": "mcp", + } + + if hasattr(output_item, "id"): + tool_call["id"] = output_item.id + + # Only include MCP details if content recording is enabled + if _trace_responses_content: + if hasattr(output_item, "name"): + tool_call["name"] = output_item.name + if hasattr(output_item, "arguments"): + tool_call["arguments"] = output_item.arguments + if hasattr(output_item, "server_label"): + tool_call["server_label"] = output_item.server_label + + self._emit_tool_call_event(span, tool_call, conversation_id) + + # Handle computer_call type (for computer use) + elif item_type == "computer_call": + tool_call = { + "type": "computer", + } + + if hasattr(output_item, "call_id"): + tool_call["call_id"] = output_item.call_id + + # Only include computer action details if content recording is enabled + if _trace_responses_content: + # action is directly on the item + if hasattr(output_item, "action") and output_item.action: + # ComputerAction has type and type-specific fields + tool_call["action"] = { + "type": getattr(output_item.action, "type", None), + } + # Try to capture common action fields + for attr in ["x", "y", "text", "key", "command", "scroll"]: + if hasattr(output_item.action, attr): + tool_call["action"][attr] = getattr(output_item.action, attr) + + self._emit_tool_call_event(span, tool_call, conversation_id) + + # Handle unknown/future tool call types with best effort + elif item_type and "_call" in item_type: + try: + tool_call = { + "type": item_type, + } + + # Always try to include common ID fields (safe, needed for correlation) + for id_field in ["id", "call_id"]: + if hasattr(output_item, id_field): + tool_call["id" if id_field == "id" else "id"] = getattr(output_item, id_field) + break # Use first available ID field + + # Only include detailed fields if content recording is enabled + if _trace_responses_content: + # Try to get the full tool details using as_dict() if available + if hasattr(output_item, "as_dict"): + tool_dict = output_item.as_dict() + # Extract the tool-specific details (exclude common fields already captured) + for key, value in tool_dict.items(): + if key not in ["type", "id", "call_id"] and value is not None: + tool_call[key] = value + else: + # Fallback: try to capture common fields manually + for field in ["name", "arguments", "input", "query", "search_query", "server_label"]: + if hasattr(output_item, field): + value = getattr(output_item, field) + if value is not None: + tool_call[field] = value + + self._emit_tool_call_event(span, tool_call, conversation_id) + + except Exception as e: + # Log but don't crash if we can't handle an unknown tool type + logger.debug(f"Failed to process unknown tool call type '{item_type}': {e}") + + except Exception as e: + # Catch-all to prevent any tool call processing errors from breaking instrumentation + logger.debug(f"Error processing tool call events: {e}") + def start_responses_span( self, server_address: Optional[str] = None, @@ -494,6 +798,7 @@ def start_responses_span( assistant_name: Optional[str] = None, conversation_id: Optional[str] = None, input_text: Optional[str] = None, + input_raw: Optional[Any] = None, stream: bool = False, # pylint: disable=unused-argument ) -> "Optional[AbstractSpan]": """Start a span for responses API call.""" @@ -526,8 +831,37 @@ def start_responses_span( self._set_span_attribute_safe(span, "gen_ai.conversation.id", conversation_id) self._set_span_attribute_safe(span, "gen_ai.request.assistant_name", assistant_name) - # Add user message event if content recording is enabled - if input_text: + # Process input - check if it contains tool outputs + tool_outputs = [] + has_tool_outputs = False + + # Use input_raw (or input_text if it's a list) to check for tool outputs + input_to_check = input_raw if input_raw is not None else input_text + + # Check if input is a list (structured input with potential tool outputs) + if isinstance(input_to_check, list): + for item in input_to_check: + # Check if this item has type "function_call_output" or similar + item_type = None + if hasattr(item, "type"): + item_type = getattr(item, "type", None) + elif isinstance(item, dict): + item_type = item.get("type") + + if item_type and ("output" in item_type or item_type == "function_call_output"): + has_tool_outputs = True + tool_outputs.append(item) + + # Add appropriate message events based on input type + if has_tool_outputs: + # Add tool message event for tool outputs + self._add_tool_message_events( + span, + tool_outputs=tool_outputs, + conversation_id=conversation_id, + ) + elif input_text and not isinstance(input_text, list): + # Add regular user message event (only if input_text is a string, not a list) self._add_message_event( span, role="user", @@ -758,6 +1092,7 @@ def _create_responses_span_from_parameters(self, *args, **kwargs): model = self._extract_model(kwargs) assistant_name = self._extract_assistant_name(kwargs) input_text = self._extract_input_text(kwargs) + input_raw = kwargs.get("input") # Get the raw input (could be string or list) stream = kwargs.get("stream", False) # Create and return the span @@ -768,6 +1103,7 @@ def _create_responses_span_from_parameters(self, *args, **kwargs): assistant_name=assistant_name, conversation_id=conversation_id, input_text=input_text, + input_raw=input_raw, stream=stream, ) @@ -852,10 +1188,13 @@ def trace_responses_create(self, function, *args, **kwargs): # Extract and set response attributes self._extract_responses_api_attributes(span, result) + # Add tool call events (if any) + conversation_id = self._extract_conversation_id(kwargs) + self._add_tool_call_events(span, result, conversation_id) + # Add assistant message event output_text = self._extract_output_text(result) if output_text: - conversation_id = self._extract_conversation_id(kwargs) self._add_message_event( span, role="assistant", @@ -981,10 +1320,13 @@ async def trace_responses_create_async(self, function, *args, **kwargs): # Extract and set response attributes self._extract_responses_api_attributes(span, result) + # Add tool call events (if any) + conversation_id = self._extract_conversation_id(kwargs) + self._add_tool_call_events(span, result, conversation_id) + # Add assistant message event output_text = self._extract_output_text(result) if output_text: - conversation_id = self._extract_conversation_id(kwargs) self._add_message_event( span, role="assistant", @@ -1077,6 +1419,10 @@ def __init__( self.input_tokens = 0 self.output_tokens = 0 + # Track all output items from streaming events (tool calls, text, etc.) + self.output_items = {} # Dict[item_id, output_item] - keyed by call_id or id + self.has_output_items = False + def append_output_content(self, content): """Append content to accumulated output list.""" if content: @@ -1093,12 +1439,49 @@ def set_response_metadata(self, chunk): def process_chunk(self, chunk): """Process chunk to accumulate data and update metadata.""" - # Detect ResponseTextDeltaEvent format - if hasattr(chunk, "delta") and isinstance(chunk.delta, str): - self.append_output_content(chunk.delta) - # Detect standard Responses API format - elif hasattr(chunk, "output") and chunk.output: - self.append_output_content(chunk.output) + # Check for output item events in streaming + chunk_type = getattr(chunk, "type", None) + + # Collect all complete output items from ResponseOutputItemDoneEvent + # This includes function_call, file_search_tool_call, code_interpreter_tool_call, + # web_search, mcp_call, computer_tool_call, custom_tool_call, and any future types + if chunk_type == "response.output_item.done" and hasattr(chunk, "item"): + item = chunk.item + item_type = getattr(item, "type", None) + + # Collect any output item (not just function_call) + if item_type: + # Use call_id or id as the key + item_id = getattr(item, "call_id", None) or getattr(item, "id", None) + if item_id: + self.output_items[item_id] = item + self.has_output_items = True + + # Capture response ID from ResponseCreatedEvent or ResponseCompletedEvent + if chunk_type == "response.created" and hasattr(chunk, "response"): + if not self.response_id: + self.response_id = chunk.response.id + self.response_model = getattr(chunk.response, "model", None) + elif chunk_type == "response.completed" and hasattr(chunk, "response"): + if not self.response_id: + self.response_id = chunk.response.id + if not self.response_model: + self.response_model = getattr(chunk.response, "model", None) + + # Only append TEXT content from delta events (not function call arguments or other deltas) + # Text deltas can come as: + # 1. response.text.delta - has delta as string + # 2. response.output_item.delta - has delta.text attribute + # Function call arguments come via response.function_call_arguments.delta - has delta as JSON string + # We need to avoid appending function call arguments + if chunk_type and ".delta" in chunk_type and hasattr(chunk, "delta"): + # If it's function_call_arguments.delta, skip it + if "function_call_arguments" not in chunk_type: + # Check if delta is a string (text content) or has .text attribute + if isinstance(chunk.delta, str): + self.append_output_content(chunk.delta) + elif hasattr(chunk.delta, "text"): + self.append_output_content(chunk.delta.text) # Always update metadata self.set_response_metadata(chunk) @@ -1125,13 +1508,25 @@ def cleanup(self): complete_content = "".join(self.accumulated_output) if self.span.span_instance.is_recording: - # Always add assistant message event (content determined by _add_message_event) - self.instrumentor._add_message_event( - self.span, - role="assistant", - content=complete_content, - conversation_id=self.conversation_id, - ) + # Add tool call events if we detected any output items (tool calls, etc.) + if self.has_output_items: + # Create mock response with output items for event generation + # The existing _add_tool_call_events method handles all tool types + mock_response = type("Response", (), {"output": list(self.output_items.values())})() + self.instrumentor._add_tool_call_events( + self.span, + mock_response, + self.conversation_id, + ) + + # Only add assistant message event if there's actual text content (not empty/whitespace) + if complete_content and complete_content.strip(): + self.instrumentor._add_message_event( + self.span, + role="assistant", + content=complete_content, + conversation_id=self.conversation_id, + ) # Set final span attributes using accumulated metadata if self.response_id: @@ -1343,6 +1738,10 @@ def __init__( self.input_tokens = 0 self.output_tokens = 0 + # Track all output items from streaming events (tool calls, text, etc.) + self.output_items = {} # Dict[item_id, output_item] - keyed by call_id or id + self.has_output_items = False + def append_output_content(self, content): """Append content to accumulated output list.""" if content: @@ -1359,12 +1758,49 @@ def set_response_metadata(self, chunk): def process_chunk(self, chunk): """Process chunk to accumulate data and update metadata.""" - # Detect ResponseTextDeltaEvent format - if hasattr(chunk, "delta") and isinstance(chunk.delta, str): - self.append_output_content(chunk.delta) - # Detect standard Responses API format - elif hasattr(chunk, "output") and chunk.output: - self.append_output_content(chunk.output) + # Check for output item events in streaming + chunk_type = getattr(chunk, "type", None) + + # Collect all complete output items from ResponseOutputItemDoneEvent + # This includes function_call, file_search_tool_call, code_interpreter_tool_call, + # web_search, mcp_call, computer_tool_call, custom_tool_call, and any future types + if chunk_type == "response.output_item.done" and hasattr(chunk, "item"): + item = chunk.item + item_type = getattr(item, "type", None) + + # Collect any output item (not just function_call) + if item_type: + # Use call_id or id as the key + item_id = getattr(item, "call_id", None) or getattr(item, "id", None) + if item_id: + self.output_items[item_id] = item + self.has_output_items = True + + # Capture response ID from ResponseCreatedEvent or ResponseCompletedEvent + if chunk_type == "response.created" and hasattr(chunk, "response"): + if not self.response_id: + self.response_id = chunk.response.id + self.response_model = getattr(chunk.response, "model", None) + elif chunk_type == "response.completed" and hasattr(chunk, "response"): + if not self.response_id: + self.response_id = chunk.response.id + if not self.response_model: + self.response_model = getattr(chunk.response, "model", None) + + # Only append TEXT content from delta events (not function call arguments or other deltas) + # Text deltas can come as: + # 1. response.text.delta - has delta as string + # 2. response.output_item.delta - has delta.text attribute + # Function call arguments come via response.function_call_arguments.delta - has delta as JSON string + # We need to avoid appending function call arguments + if chunk_type and ".delta" in chunk_type and hasattr(chunk, "delta"): + # If it's function_call_arguments.delta, skip it + if "function_call_arguments" not in chunk_type: + # Check if delta is a string (text content) or has .text attribute + if isinstance(chunk.delta, str): + self.append_output_content(chunk.delta) + elif hasattr(chunk.delta, "text"): + self.append_output_content(chunk.delta.text) # Always update metadata self.set_response_metadata(chunk) @@ -1391,13 +1827,25 @@ def cleanup(self): complete_content = "".join(self.accumulated_output) if self.span.span_instance.is_recording: - # Always add assistant message event (content determined by _add_message_event) - self.instrumentor._add_message_event( - self.span, - role="assistant", - content=complete_content, - conversation_id=self.conversation_id, - ) + # Add tool call events if we detected any output items (tool calls, etc.) + if self.has_output_items: + # Create mock response with output items for event generation + # The existing _add_tool_call_events method handles all tool types + mock_response = type("Response", (), {"output": list(self.output_items.values())})() + self.instrumentor._add_tool_call_events( + self.span, + mock_response, + self.conversation_id, + ) + + # Only add assistant message event if there's actual text content (not empty/whitespace) + if complete_content and complete_content.strip(): + self.instrumentor._add_message_event( + self.span, + role="assistant", + content=complete_content, + conversation_id=self.conversation_id, + ) # Set final span attributes using accumulated metadata if self.response_id: @@ -1787,7 +2235,7 @@ def start_list_conversation_items_span( return span - def _add_conversation_item_event( + def _add_conversation_item_event( # pylint: disable=too-many-branches self, span: "AbstractSpan", item: Any, @@ -1801,45 +2249,114 @@ def _add_conversation_item_event( item_type = getattr(item, "type", "unknown") role = getattr(item, "role", "unknown") - # Create event attributes + # Create event body - format depends on item type + event_body: Dict[str, Any] = {} + + # Handle different item types + if item_type == "function_call_output": + # Tool output - use tool_call_outputs format + role = "tool" # Override role for tool outputs + if _trace_responses_content: + tool_output: Dict[str, Any] = { + "type": "function", + } + + # Add call_id as "id" + if hasattr(item, "call_id"): + tool_output["id"] = item.call_id + elif hasattr(item, "id"): + tool_output["id"] = item.id + + # Add output field - parse JSON string if needed + if hasattr(item, "output"): + output_value = item.output + if isinstance(output_value, str): + try: + tool_output["output"] = json.loads(output_value) + except (json.JSONDecodeError, TypeError): + tool_output["output"] = output_value + else: + tool_output["output"] = output_value + + event_body["tool_call_outputs"] = [tool_output] + + event_name = "gen_ai.tool.message" + + elif item_type == "function_call": + # Tool call - use tool_calls format + role = "assistant" # Override role for function calls + if _trace_responses_content: + tool_call: Dict[str, Any] = { + "type": "function", + } + + # Add call_id as "id" + if hasattr(item, "call_id"): + tool_call["id"] = item.call_id + elif hasattr(item, "id"): + tool_call["id"] = item.id + + # Add function details + if hasattr(item, "name"): + function_details: Dict[str, Any] = { + "name": item.name, + } + if hasattr(item, "arguments"): + # Parse arguments if it's a JSON string + args_value = item.arguments + if isinstance(args_value, str): + try: + function_details["arguments"] = json.loads(args_value) + except (json.JSONDecodeError, TypeError): + function_details["arguments"] = args_value + else: + function_details["arguments"] = args_value + + tool_call["function"] = function_details + + event_body["tool_calls"] = [tool_call] + + event_name = "gen_ai.assistant.message" + + elif item_type == "message": + # Regular message - use text format + if _trace_responses_content and hasattr(item, "content") and item.content: + content_list = [] + for content_item in item.content: + if hasattr(content_item, "type") and content_item.type == "input_text": + if hasattr(content_item, "text"): + content_list.append(content_item.text) + elif hasattr(content_item, "type") and content_item.type == "output_text": + if hasattr(content_item, "text"): + content_list.append(content_item.text) + elif hasattr(content_item, "type") and content_item.type == "text": + if hasattr(content_item, "text"): + content_list.append(content_item.text) + + if content_list: + event_body["text"] = " ".join(content_list) + + # Determine event name based on role + if role == "assistant": + event_name = "gen_ai.assistant.message" + elif role == "user": + event_name = "gen_ai.user.message" + else: + event_name = "gen_ai.conversation.item" + else: + # Unknown item type - use generic event name + event_name = "gen_ai.conversation.item" + + # Create event attributes with the determined role event_attributes = { - GEN_AI_PROVIDER_NAME: AZURE_OPENAI_SYSTEM, "gen_ai.conversation.item.id": item_id, "gen_ai.conversation.item.type": item_type, - "gen_ai.conversation.item.role": role, + "gen_ai.conversation.item.role": role, # Use the overridden role } - # Create event body with the same JSON format as responses.create - event_body: Dict[str, Any] = {"role": role} - - # Add content if content recording is enabled - if _trace_responses_content and hasattr(item, "content") and item.content: - content_list = [] - for content_item in item.content: - if hasattr(content_item, "type") and content_item.type == "input_text": - if hasattr(content_item, "text"): - content_list.append(content_item.text) - elif hasattr(content_item, "type") and content_item.type == "output_text": - if hasattr(content_item, "text"): - content_list.append(content_item.text) - elif hasattr(content_item, "type") and content_item.type == "text": - if hasattr(content_item, "text"): - content_list.append(content_item.text) - - if content_list: - event_body["content"] = " ".join(content_list) - # Use JSON format for event content (consistent with responses.create) event_attributes["gen_ai.event.content"] = json.dumps(event_body, ensure_ascii=False) - # Determine event name based on role - if role == "assistant": - event_name = "gen_ai.assistant.message" - elif role == "user": - event_name = "gen_ai.user.message" - else: - event_name = "gen_ai.conversation.item" - span.span_instance.add_event(name=event_name, attributes=event_attributes) def _wrap_conversation_items_list( diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/gen_ai_trace_verifier.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/gen_ai_trace_verifier.py index 45d927c37304..b3e68d615c21 100644 --- a/sdk/ai/azure-ai-projects/tests/agents/telemetry/gen_ai_trace_verifier.py +++ b/sdk/ai/azure-ai-projects/tests/agents/telemetry/gen_ai_trace_verifier.py @@ -187,6 +187,7 @@ def check_span_events(self, span, expected_events): raise AssertionError("check_span_events: event not found") if len(span_events) > 0: # If there are any additional events in the span_events - raise AssertionError("check_span_events: unexpected event found") + unexpected_event_names = [event.name for event in span_events] + raise AssertionError(f"check_span_events: unexpected event(s) found: {unexpected_event_names}") return True diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_ai_agents_instrumentor.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_ai_agents_instrumentor.py index b91d65ded8c7..5167971e002f 100644 --- a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_ai_agents_instrumentor.py +++ b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_ai_agents_instrumentor.py @@ -237,7 +237,7 @@ def test_agent_creation_with_tracing_content_recording_enabled(self, **kwargs): "name": "gen_ai.system.instruction", "attributes": { "gen_ai.system": "az.ai.agents", - "gen_ai.event.content": '{"content": "You are a helpful AI assistant. Be polite and provide accurate information."}', + "gen_ai.event.content": '{"text": "You are a helpful AI assistant. Be polite and provide accurate information."}', }, } ] diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_ai_agents_instrumentor_async.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_ai_agents_instrumentor_async.py index ccf3f03cb8ef..5f1c45ac2539 100644 --- a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_ai_agents_instrumentor_async.py +++ b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_ai_agents_instrumentor_async.py @@ -114,7 +114,7 @@ async def test_create_agent_with_tracing_content_recording_enabled(self, **kwarg "name": "gen_ai.system.instruction", "attributes": { "gen_ai.system": "az.ai.agents", - "gen_ai.event.content": '{"content": "You are a helpful AI assistant. Always be polite and provide accurate information."}', + "gen_ai.event.content": '{"text": "You are a helpful AI assistant. Always be polite and provide accurate information."}', }, } ] diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor.py index 3570feb8ee43..445d2b53feb3 100644 --- a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor.py +++ b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor.py @@ -5,6 +5,7 @@ # ------------------------------------ import os +import json import pytest from typing import Optional, Tuple from azure.ai.projects.telemetry import AIProjectInstrumentor, _utils @@ -12,6 +13,7 @@ from gen_ai_trace_verifier import GenAiTraceVerifier from openai import OpenAI from devtools_testutils import recorded_by_proxy +from azure.ai.projects.models import PromptAgentDefinition, FunctionTool from test_base import servicePreparer from test_ai_instrumentor_base import TestAiAgentsInstrumentorBase, CONTENT_TRACING_ENV_VARIABLE @@ -219,7 +221,7 @@ def test_sync_non_streaming_with_content_recording(self, **kwargs): "attributes": { "gen_ai.provider.name": "azure.openai", "gen_ai.message.role": "user", - "gen_ai.event.content": '{"role": "user", "content": "Write a short poem about AI"}', + "gen_ai.event.content": '{"text": "Write a short poem about AI"}', }, }, { @@ -227,7 +229,7 @@ def test_sync_non_streaming_with_content_recording(self, **kwargs): "attributes": { "gen_ai.provider.name": "azure.openai", "gen_ai.message.role": "assistant", - "gen_ai.event.content": '{"role": "assistant", "content": "*"}', + "gen_ai.event.content": '{"text": "*"}', }, }, ] @@ -294,7 +296,7 @@ def test_sync_non_streaming_without_content_recording(self, **kwargs): "attributes": { "gen_ai.provider.name": "azure.openai", "gen_ai.message.role": "user", - "gen_ai.event.content": '{"role": "user"}', + "gen_ai.event.content": "{}", }, }, { @@ -302,7 +304,7 @@ def test_sync_non_streaming_without_content_recording(self, **kwargs): "attributes": { "gen_ai.provider.name": "azure.openai", "gen_ai.message.role": "assistant", - "gen_ai.event.content": '{"role": "assistant"}', + "gen_ai.event.content": "{}", }, }, ] @@ -315,6 +317,7 @@ def test_sync_non_streaming_without_content_recording(self, **kwargs): @recorded_by_proxy def test_sync_streaming_with_content_recording(self, **kwargs): """Test synchronous streaming responses with content recording enabled.""" + from openai.types.responses.response_input_param import FunctionCallOutput self.cleanup() os.environ.update( {CONTENT_TRACING_ENV_VARIABLE: "True", "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True"} @@ -377,7 +380,7 @@ def test_sync_streaming_with_content_recording(self, **kwargs): "attributes": { "gen_ai.provider.name": "azure.openai", "gen_ai.message.role": "user", - "gen_ai.event.content": '{"role": "user", "content": "Write a short poem about AI"}', + "gen_ai.event.content": '{"text": "Write a short poem about AI"}', }, }, { @@ -385,7 +388,7 @@ def test_sync_streaming_with_content_recording(self, **kwargs): "attributes": { "gen_ai.provider.name": "azure.openai", "gen_ai.message.role": "assistant", - "gen_ai.event.content": '{"role": "assistant", "content": "*"}', + "gen_ai.event.content": '{"text": "*"}', }, }, ] @@ -620,7 +623,7 @@ def test_sync_non_streaming_without_conversation(self, **kwargs): "attributes": { "gen_ai.provider.name": "azure.openai", "gen_ai.message.role": "user", - "gen_ai.event.content": '{"role": "user", "content": "Write a short poem about AI"}', + "gen_ai.event.content": '{"text": "Write a short poem about AI"}', }, }, { @@ -628,9 +631,999 @@ def test_sync_non_streaming_without_conversation(self, **kwargs): "attributes": { "gen_ai.provider.name": "azure.openai", "gen_ai.message.role": "assistant", - "gen_ai.event.content": '{"role": "assistant", "content": "*"}', + "gen_ai.event.content": '{"text": "*"}', }, }, ] events_match = GenAiTraceVerifier().check_span_events(span, expected_events) assert events_match == True + + @pytest.mark.skip(reason="recordings not working for responses API") + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy + def test_sync_function_tool_with_content_recording_non_streaming(self, **kwargs): + """Test synchronous function tool usage with content recording enabled (non-streaming).""" + from openai.types.responses.response_input_param import FunctionCallOutput + self.cleanup() + os.environ.update( + {CONTENT_TRACING_ENV_VARIABLE: "True", "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True"} + ) + self.setup_telemetry() + assert True == AIProjectInstrumentor().is_content_recording_enabled() + assert True == AIProjectInstrumentor().is_instrumented() + + with self.create_client(operation_group="tracing", **kwargs) as project_client: + # Get the OpenAI client from the project client + client = project_client.get_openai_client() + deployment_name = self.test_agents_params["model_deployment_name"] + + # Define a function tool + func_tool = FunctionTool( + name="get_weather", + parameters={ + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city name, e.g. San Francisco", + }, + }, + "required": ["location"], + "additionalProperties": False, + }, + description="Get the current weather for a location.", + strict=True, + ) + + # Create agent with function tool + agent = project_client.agents.create_version( + agent_name="WeatherAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can use function tools.", + tools=[func_tool], + ), + ) + + # Create a conversation + conversation = client.conversations.create() + + # First request - should trigger function call + response = client.responses.create( + conversation=conversation.id, + input="What's the weather in Seattle?", + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + stream=False, + ) + function_calls = [item for item in response.output if item.type == "function_call"] + + # Process function calls and prepare input for second request + input_list = [] + for item in function_calls: + if item.name == "get_weather": + # Mock function result + weather_result = {"temperature": "72°F", "condition": "sunny"} + input_list.append( + FunctionCallOutput( + type="function_call_output", + call_id=item.call_id, + output=json.dumps(weather_result), + ) + ) + + # Second request - provide function results + response2 = client.responses.create( + conversation=conversation.id, + input=input_list, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + stream=False, + ) + assert hasattr(response2, "output") + assert response2.output is not None + + # Cleanup + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + # Check spans - should have 2 responses spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 2 + + # Validate first span (user message + tool call) + span1 = spans[0] + expected_attributes_1 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.request.model", deployment_name), + ("gen_ai.request.assistant_name", agent.name), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", ""), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span1, expected_attributes_1) + assert attributes_match == True + + # Check events for first span - user message and assistant tool call + expected_events_1 = [ + { + "name": "gen_ai.user.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "user", + "gen_ai.event.content": '{"text": "What\'s the weather in Seattle?"}', + }, + }, + { + "name": "gen_ai.assistant.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "assistant", + "gen_ai.event.content": '{"tool_calls": [{"type": "function", "id": "*", "function": {"name": "get_weather", "arguments": "*"}}]}', + }, + }, + ] + events_match = GenAiTraceVerifier().check_span_events(span1, expected_events_1) + assert events_match == True + + # Validate second span (tool output + final response) + span2 = spans[1] + expected_attributes_2 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.request.model", deployment_name), + ("gen_ai.request.assistant_name", agent.name), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", ""), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span2, expected_attributes_2) + assert attributes_match == True + + # Check events for second span - tool output and assistant response + expected_events_2 = [ + { + "name": "gen_ai.tool.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "tool", + "gen_ai.event.content": '{"tool_call_outputs": [{"type": "function", "id": "*", "output": {"temperature": "72°F", "condition": "sunny"}}]}', + }, + }, + { + "name": "gen_ai.assistant.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "assistant", + "gen_ai.event.content": '{"text": "*"}', + }, + }, + ] + events_match = GenAiTraceVerifier().check_span_events(span2, expected_events_2) + assert events_match == True + + @pytest.mark.skip(reason="recordings not working for responses API") + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy + def test_sync_function_tool_with_content_recording_streaming(self, **kwargs): + """Test synchronous function tool usage with content recording enabled (streaming).""" + from openai.types.responses.response_input_param import FunctionCallOutput + self.cleanup() + os.environ.update( + {CONTENT_TRACING_ENV_VARIABLE: "True", "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True"} + ) + self.setup_telemetry() + assert True == AIProjectInstrumentor().is_content_recording_enabled() + assert True == AIProjectInstrumentor().is_instrumented() + + with self.create_client(operation_group="tracing", **kwargs) as project_client: + # Get the OpenAI client from the project client + client = project_client.get_openai_client() + deployment_name = self.test_agents_params["model_deployment_name"] + + # Define a function tool + func_tool = FunctionTool( + name="get_weather", + parameters={ + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city name, e.g. San Francisco", + }, + }, + "required": ["location"], + "additionalProperties": False, + }, + description="Get the current weather for a location.", + strict=True, + ) + + # Create agent with function tool + agent = project_client.agents.create_version( + agent_name="WeatherAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can use function tools.", + tools=[func_tool], + ), + ) + + # Create a conversation + conversation = client.conversations.create() + + # First request - should trigger function call + stream = client.responses.create( + conversation=conversation.id, + input="What's the weather in Seattle?", + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + stream=True, + ) + # Consume the stream and collect function calls + # In streaming, we get events, not direct output items + function_calls_dict = {} + first_response_id = None + for chunk in stream: + # Capture the response ID from ResponseCreatedEvent or ResponseCompletedEvent + if chunk.type == "response.created" and hasattr(chunk, "response"): + first_response_id = chunk.response.id + elif chunk.type == "response.completed" and hasattr(chunk, "response"): + if first_response_id is None: + first_response_id = chunk.response.id + + # Collect complete function calls from ResponseOutputItemDoneEvent + if chunk.type == "response.output_item.done" and hasattr(chunk, "item"): + item = chunk.item + if hasattr(item, "type") and item.type == "function_call": + call_id = item.call_id + function_calls_dict[call_id] = item + + # Process function calls and prepare input for second request + input_list = [] + for item in function_calls_dict.values(): + # Mock function result + weather_result = {"temperature": "72°F", "condition": "sunny"} + output = FunctionCallOutput( + type="function_call_output", + call_id=item.call_id, + output=json.dumps(weather_result), + ) + input_list.append(output) + + # Second request - provide function results (using conversation, not previous_response_id) + stream2 = client.responses.create( + conversation=conversation.id, + input=input_list, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + stream=True, + ) + # Consume the second stream + accumulated_content = [] + for chunk in stream2: + if hasattr(chunk, "delta") and isinstance(chunk.delta, str): + accumulated_content.append(chunk.delta) + elif hasattr(chunk, "output") and chunk.output: + accumulated_content.append(str(chunk.output)) + full_content = "".join(accumulated_content) + assert full_content is not None + assert len(full_content) > 0 + + # Cleanup + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + # Check spans - should have 2 responses spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 2 + + # Validate first span (user message + tool call) + span1 = spans[0] + expected_attributes_1 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.request.model", deployment_name), + ("gen_ai.request.assistant_name", agent.name), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", ""), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span1, expected_attributes_1) + assert attributes_match == True + + # Check events for first span - user message and assistant tool call + expected_events_1 = [ + { + "name": "gen_ai.user.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "user", + "gen_ai.event.content": '{"text": "What\'s the weather in Seattle?"}', + }, + }, + { + "name": "gen_ai.assistant.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "assistant", + "gen_ai.event.content": '{"tool_calls": [{"type": "function", "id": "*", "function": {"name": "get_weather", "arguments": "*"}}]}', + }, + }, + ] + events_match = GenAiTraceVerifier().check_span_events(span1, expected_events_1) + assert events_match == True + + # Validate second span (tool output + final response) + span2 = spans[1] + expected_attributes_2 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.request.model", deployment_name), + ("gen_ai.request.assistant_name", agent.name), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", ""), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span2, expected_attributes_2) + assert attributes_match == True + + # Check events for second span - tool output and assistant response + expected_events_2 = [ + { + "name": "gen_ai.tool.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "tool", + "gen_ai.event.content": '{"tool_call_outputs": [{"type": "function", "id": "*", "output": {"temperature": "72°F", "condition": "sunny"}}]}', + }, + }, + { + "name": "gen_ai.assistant.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "assistant", + "gen_ai.event.content": '{"text": "*"}', + }, + }, + ] + events_match = GenAiTraceVerifier().check_span_events(span2, expected_events_2) + assert events_match == True + + @pytest.mark.skip(reason="recordings not working for responses API") + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy + def test_sync_function_tool_without_content_recording_non_streaming(self, **kwargs): + """Test synchronous function tool usage without content recording (non-streaming).""" + from openai.types.responses.response_input_param import FunctionCallOutput + self.cleanup() + os.environ.update( + {CONTENT_TRACING_ENV_VARIABLE: "False", "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True"} + ) + self.setup_telemetry() + assert False == AIProjectInstrumentor().is_content_recording_enabled() + assert True == AIProjectInstrumentor().is_instrumented() + + with self.create_client(operation_group="tracing", **kwargs) as project_client: + # Get the OpenAI client from the project client + client = project_client.get_openai_client() + deployment_name = self.test_agents_params["model_deployment_name"] + + # Define a function tool + func_tool = FunctionTool( + name="get_weather", + parameters={ + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city name, e.g. San Francisco", + }, + }, + "required": ["location"], + "additionalProperties": False, + }, + description="Get the current weather for a location.", + strict=True, + ) + + # Create agent with function tool + agent = project_client.agents.create_version( + agent_name="WeatherAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can use function tools.", + tools=[func_tool], + ), + ) + + # Create a conversation + conversation = client.conversations.create() + + # First request - should trigger function call + response = client.responses.create( + conversation=conversation.id, + input="What's the weather in Seattle?", + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + stream=False, + ) + function_calls = [item for item in response.output if item.type == "function_call"] + + # Process function calls and prepare input for second request + input_list = [] + for item in function_calls: + if item.name == "get_weather": + # Mock function result + weather_result = {"temperature": "72°F", "condition": "sunny"} + input_list.append( + FunctionCallOutput( + type="function_call_output", + call_id=item.call_id, + output=json.dumps(weather_result), + ) + ) + + # Second request - provide function results + response2 = client.responses.create( + conversation=conversation.id, + input=input_list, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + stream=False, + ) + assert hasattr(response2, "output") + + # Cleanup + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + # Check spans - should have 2 responses spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 2 + + # Validate first span (user message + tool call) - no content + span1 = spans[0] + expected_attributes_1 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.request.model", deployment_name), + ("gen_ai.request.assistant_name", agent.name), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", ""), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span1, expected_attributes_1) + assert attributes_match == True + + # Check events for first span - tool call ID included but no function details + expected_events_1 = [ + { + "name": "gen_ai.user.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "user", + "gen_ai.event.content": "{}", + }, + }, + { + "name": "gen_ai.assistant.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "assistant", + "gen_ai.event.content": '{"tool_calls": [{"type": "function", "id": "*"}]}', + }, + }, + ] + events_match = GenAiTraceVerifier().check_span_events(span1, expected_events_1) + assert events_match == True + + # Validate second span (tool output + final response) - no content + span2 = spans[1] + expected_attributes_2 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.request.model", deployment_name), + ("gen_ai.request.assistant_name", agent.name), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", ""), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span2, expected_attributes_2) + assert attributes_match == True + + # Check events for second span - empty content bodies + expected_events_2 = [ + { + "name": "gen_ai.tool.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "tool", + "gen_ai.event.content": "{}", + }, + }, + { + "name": "gen_ai.assistant.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "assistant", + "gen_ai.event.content": "{}", + }, + }, + ] + events_match = GenAiTraceVerifier().check_span_events(span2, expected_events_2) + assert events_match == True + + @pytest.mark.skip(reason="recordings not working for responses API") + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy + def test_sync_function_tool_without_content_recording_streaming(self, **kwargs): + """Test synchronous function tool usage without content recording (streaming).""" + from openai.types.responses.response_input_param import FunctionCallOutput + self.cleanup() + os.environ.update( + {CONTENT_TRACING_ENV_VARIABLE: "False", "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True"} + ) + self.setup_telemetry() + assert False == AIProjectInstrumentor().is_content_recording_enabled() + assert True == AIProjectInstrumentor().is_instrumented() + + with self.create_client(operation_group="tracing", **kwargs) as project_client: + # Get the OpenAI client from the project client + client = project_client.get_openai_client() + deployment_name = self.test_agents_params["model_deployment_name"] + + # Define a function tool + func_tool = FunctionTool( + name="get_weather", + parameters={ + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city name, e.g. San Francisco", + }, + }, + "required": ["location"], + "additionalProperties": False, + }, + description="Get the current weather for a location.", + strict=True, + ) + + # Create agent with function tool + agent = project_client.agents.create_version( + agent_name="WeatherAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can use function tools.", + tools=[func_tool], + ), + ) + + # Create a conversation + conversation = client.conversations.create() + + # First request - should trigger function call + stream = client.responses.create( + conversation=conversation.id, + input="What's the weather in Seattle?", + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + stream=True, + ) + # Consume the stream and collect function calls + # In streaming, we get events, not direct output items + function_calls_dict = {} + first_response_id = None + for chunk in stream: + # Capture the response ID from ResponseCreatedEvent or ResponseCompletedEvent + if chunk.type == "response.created" and hasattr(chunk, "response"): + first_response_id = chunk.response.id + elif chunk.type == "response.completed" and hasattr(chunk, "response"): + if first_response_id is None: + first_response_id = chunk.response.id + + # Collect complete function calls from ResponseOutputItemDoneEvent + if chunk.type == "response.output_item.done" and hasattr(chunk, "item"): + item = chunk.item + if hasattr(item, "type") and item.type == "function_call": + call_id = item.call_id + function_calls_dict[call_id] = item + + # Process function calls and prepare input for second request + # Respond to ALL function calls (streaming may not populate name attribute reliably) + input_list = [] + for item in function_calls_dict.values(): + # Mock function result + weather_result = {"temperature": "72°F", "condition": "sunny"} + output = FunctionCallOutput( + type="function_call_output", + call_id=item.call_id, + output=json.dumps(weather_result), + ) + input_list.append(output) + + # Second request - provide function results (using conversation, not previous_response_id) + stream2 = client.responses.create( + conversation=conversation.id, + input=input_list, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + stream=True, + ) + # Consume the second stream + for chunk in stream2: + pass # Just consume the stream + + # Cleanup + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + # Check spans - should have 2 responses spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 2 + + # Validate first span (user message + tool call) - no content + span1 = spans[0] + expected_attributes_1 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.request.model", deployment_name), + ("gen_ai.request.assistant_name", agent.name), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", ""), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span1, expected_attributes_1) + assert attributes_match == True + + # Check events for first span - tool call ID included but no function details + expected_events_1 = [ + { + "name": "gen_ai.user.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "user", + "gen_ai.event.content": "{}", + }, + }, + { + "name": "gen_ai.assistant.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "assistant", + "gen_ai.event.content": '{"tool_calls": [{"type": "function", "id": "*"}]}', + }, + }, + ] + events_match = GenAiTraceVerifier().check_span_events(span1, expected_events_1) + assert events_match == True + + # Validate second span (tool output + final response) - no content + span2 = spans[1] + expected_attributes_2 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.request.model", deployment_name), + ("gen_ai.request.assistant_name", agent.name), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", ""), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span2, expected_attributes_2) + assert attributes_match == True + + # Check events for second span - empty content bodies + expected_events_2 = [ + { + "name": "gen_ai.tool.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "tool", + "gen_ai.event.content": "{}", + }, + }, + { + "name": "gen_ai.assistant.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "assistant", + "gen_ai.event.content": "{}", + }, + }, + ] + events_match = GenAiTraceVerifier().check_span_events(span2, expected_events_2) + assert events_match == True + + @pytest.mark.skip(reason="recordings not working for responses API") + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy + def test_sync_function_tool_list_conversation_items_with_content_recording(self, **kwargs): + """Test listing conversation items after function tool usage with content recording enabled.""" + self.cleanup() + os.environ.update( + {CONTENT_TRACING_ENV_VARIABLE: "True", "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True"} + ) + self.setup_telemetry() + assert True == AIProjectInstrumentor().is_content_recording_enabled() + assert True == AIProjectInstrumentor().is_instrumented() + + with self.create_client(operation_group="tracing", **kwargs) as project_client: + # Get the OpenAI client from the project client + client = project_client.get_openai_client() + deployment_name = self.test_agents_params["model_deployment_name"] + + # Define a function tool + func_tool = FunctionTool( + name="get_weather", + parameters={ + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city name, e.g. San Francisco", + }, + }, + "required": ["location"], + "additionalProperties": False, + }, + description="Get the current weather for a location.", + strict=True, + ) + + # Create agent with function tool + agent = project_client.agents.create_version( + agent_name="WeatherAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can use function tools.", + tools=[func_tool], + ), + ) + + # Create a conversation + conversation = client.conversations.create() + + # First request - should trigger function call + response = client.responses.create( + conversation=conversation.id, + input="What's the weather in Seattle?", + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + stream=False, + ) + + # Process function calls + input_list = [] + for item in response.output: + if item.type == "function_call" and item.name == "get_weather": + weather_result = {"temperature": "72°F", "condition": "sunny"} + input_list.append( + FunctionCallOutput( + type="function_call_output", + call_id=item.call_id, + output=json.dumps(weather_result), + ) + ) + + # Second request - provide function results + response2 = client.responses.create( + conversation=conversation.id, + input=input_list, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + stream=False, + ) + + # List conversation items + items = client.conversations.items.list(conversation_id=conversation.id) + items_list = list(items) + assert len(items_list) > 0 + + # Cleanup + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + # Check spans + self.exporter.force_flush() + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1 + list_span = list_spans[0] + + # Check span attributes + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "list_conversation_items"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(list_span, expected_attributes) + assert attributes_match == True + + # Check events - should include user message, function_call, function_call_output, and assistant response + # The order might vary, so we check that all expected event types are present + events = list_span.events + event_names = [event.name for event in events] + + # Should have: user message, assistant message (with tool call), tool message (output), assistant message (final) + assert "gen_ai.user.message" in event_names + assert "gen_ai.assistant.message" in event_names + assert "gen_ai.tool.message" in event_names + + # Find and validate the tool message event + tool_events = [e for e in events if e.name == "gen_ai.tool.message"] + assert len(tool_events) >= 1 + tool_event = tool_events[0] + + # Check that tool event has correct role attribute + tool_event_attrs = dict(tool_event.attributes) + assert "gen_ai.conversation.item.role" in tool_event_attrs + assert tool_event_attrs["gen_ai.conversation.item.role"] == "tool" + + # Check that content contains tool_call_outputs + assert "gen_ai.event.content" in tool_event_attrs + content = tool_event_attrs["gen_ai.event.content"] + assert "tool_call_outputs" in content + assert "temperature" in content + assert "72°F" in content + + @pytest.mark.skip(reason="recordings not working for responses API") + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy + def test_sync_function_tool_list_conversation_items_without_content_recording(self, **kwargs): + """Test listing conversation items after function tool usage without content recording.""" + self.cleanup() + os.environ.update( + {CONTENT_TRACING_ENV_VARIABLE: "False", "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True"} + ) + self.setup_telemetry() + assert False == AIProjectInstrumentor().is_content_recording_enabled() + assert True == AIProjectInstrumentor().is_instrumented() + + with self.create_client(operation_group="tracing", **kwargs) as project_client: + # Get the OpenAI client from the project client + client = project_client.get_openai_client() + deployment_name = self.test_agents_params["model_deployment_name"] + + # Define a function tool + func_tool = FunctionTool( + name="get_weather", + parameters={ + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city name, e.g. San Francisco", + }, + }, + "required": ["location"], + "additionalProperties": False, + }, + description="Get the current weather for a location.", + strict=True, + ) + + # Create agent with function tool + agent = project_client.agents.create_version( + agent_name="WeatherAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can use function tools.", + tools=[func_tool], + ), + ) + + # Create a conversation + conversation = client.conversations.create() + + # First request - should trigger function call + response = client.responses.create( + conversation=conversation.id, + input="What's the weather in Seattle?", + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + stream=False, + ) + + # Process function calls + input_list = [] + for item in response.output: + if item.type == "function_call" and item.name == "get_weather": + weather_result = {"temperature": "72°F", "condition": "sunny"} + input_list.append( + FunctionCallOutput( + type="function_call_output", + call_id=item.call_id, + output=json.dumps(weather_result), + ) + ) + + # Second request - provide function results + response2 = client.responses.create( + conversation=conversation.id, + input=input_list, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + stream=False, + ) + + # List conversation items + items = client.conversations.items.list(conversation_id=conversation.id) + items_list = list(items) + assert len(items_list) > 0 + + # Cleanup + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + # Check spans + self.exporter.force_flush() + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1 + list_span = list_spans[0] + + # Check span attributes + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "list_conversation_items"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(list_span, expected_attributes) + assert attributes_match == True + + # Check events - should have event names but empty content + events = list_span.events + event_names = [event.name for event in events] + + # Should have the event types present + assert "gen_ai.user.message" in event_names + assert "gen_ai.assistant.message" in event_names + assert "gen_ai.tool.message" in event_names + + # Find and validate the tool message event has correct role but no content details + tool_events = [e for e in events if e.name == "gen_ai.tool.message"] + assert len(tool_events) >= 1 + tool_event = tool_events[0] + + # Check that tool event has correct role attribute + tool_event_attrs = dict(tool_event.attributes) + assert "gen_ai.conversation.item.role" in tool_event_attrs + assert tool_event_attrs["gen_ai.conversation.item.role"] == "tool" + + # Check that content is empty when content recording is disabled + assert "gen_ai.event.content" in tool_event_attrs + content = tool_event_attrs["gen_ai.event.content"] + assert content == "{}" # Should be empty JSON object diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_async.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_async.py index 99cc6fde9430..eba4538c62c6 100644 --- a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_async.py +++ b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_async.py @@ -5,8 +5,10 @@ # ------------------------------------ import os +import json import pytest from azure.ai.projects.telemetry import AIProjectInstrumentor, _utils +from azure.ai.projects.models import FunctionTool, PromptAgentDefinition from azure.core.settings import settings from gen_ai_trace_verifier import GenAiTraceVerifier @@ -84,7 +86,7 @@ async def test_async_non_streaming_with_content_recording(self, **kwargs): "attributes": { "gen_ai.provider.name": "azure.openai", "gen_ai.message.role": "user", - "gen_ai.event.content": '{"role": "user", "content": "Write a short poem about AI"}', + "gen_ai.event.content": '{"text": "Write a short poem about AI"}', }, }, { @@ -92,7 +94,7 @@ async def test_async_non_streaming_with_content_recording(self, **kwargs): "attributes": { "gen_ai.provider.name": "azure.openai", "gen_ai.message.role": "assistant", - "gen_ai.event.content": '{"role": "assistant", "content": "*"}', + "gen_ai.event.content": '{"text": "*"}', }, }, ] @@ -169,7 +171,7 @@ async def test_async_streaming_with_content_recording(self, **kwargs): "attributes": { "gen_ai.provider.name": "azure.openai", "gen_ai.message.role": "user", - "gen_ai.event.content": '{"role": "user", "content": "Write a short poem about AI"}', + "gen_ai.event.content": '{"text": "Write a short poem about AI"}', }, }, { @@ -177,7 +179,7 @@ async def test_async_streaming_with_content_recording(self, **kwargs): "attributes": { "gen_ai.provider.name": "azure.openai", "gen_ai.message.role": "assistant", - "gen_ai.event.content": '{"role": "assistant", "content": "*"}', + "gen_ai.event.content": '{"text": "*"}', }, }, ] @@ -281,3 +283,391 @@ async def test_async_list_conversation_items_with_content_recording(self, **kwar ] attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) assert attributes_match == True + + @pytest.mark.skip(reason="recordings not working for responses API") + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy_async + async def test_async_function_tool_with_content_recording_streaming(self, **kwargs): + """Test asynchronous function tool usage with content recording enabled (streaming).""" + from openai.types.responses.response_input_param import FunctionCallOutput + self.cleanup() + os.environ.update( + {CONTENT_TRACING_ENV_VARIABLE: "True", "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True"} + ) + self.setup_telemetry() + assert True == AIProjectInstrumentor().is_content_recording_enabled() + assert True == AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + + async with project_client: + # Get the OpenAI client from the project client + client = await project_client.get_openai_client() + deployment_name = self.test_agents_params["model_deployment_name"] + + # Define a function tool + func_tool = FunctionTool( + name="get_weather", + parameters={ + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city name, e.g. San Francisco", + }, + }, + "required": ["location"], + "additionalProperties": False, + }, + description="Get the current weather for a location.", + strict=True, + ) + + # Create agent with function tool + agent = await project_client.agents.create_version( + agent_name="WeatherAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can use function tools.", + tools=[func_tool], + ), + ) + + # Create a conversation + conversation = await client.conversations.create() + + # First request - should trigger function call + stream = await client.responses.create( + conversation=conversation.id, + input="What's the weather in Seattle?", + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + stream=True, + ) + # Consume the stream and collect function calls + # In streaming, we get events, not direct output items + function_calls_dict = {} + first_response_id = None + async for chunk in stream: + # Capture the response ID from ResponseCreatedEvent or ResponseCompletedEvent + if chunk.type == "response.created" and hasattr(chunk, "response"): + first_response_id = chunk.response.id + elif chunk.type == "response.completed" and hasattr(chunk, "response"): + if first_response_id is None: + first_response_id = chunk.response.id + + # Collect complete function calls from ResponseOutputItemDoneEvent + if chunk.type == "response.output_item.done" and hasattr(chunk, "item"): + item = chunk.item + if hasattr(item, "type") and item.type == "function_call": + call_id = item.call_id + function_calls_dict[call_id] = item + + # Process function calls and prepare input for second request + input_list = [] + for item in function_calls_dict.values(): + # Mock function result + weather_result = {"temperature": "72°F", "condition": "sunny"} + output = FunctionCallOutput( + type="function_call_output", + call_id=item.call_id, + output=json.dumps(weather_result), + ) + input_list.append(output) + + # Second request - provide function results (using conversation, not previous_response_id) + stream2 = await client.responses.create( + conversation=conversation.id, + input=input_list, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + stream=True, + ) + # Consume the second stream + accumulated_content = [] + async for chunk in stream2: + if hasattr(chunk, "delta") and isinstance(chunk.delta, str): + accumulated_content.append(chunk.delta) + elif hasattr(chunk, "output") and chunk.output: + accumulated_content.append(str(chunk.output)) + full_content = "".join(accumulated_content) + assert full_content is not None + assert len(full_content) > 0 + + # Cleanup + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + # Check spans - should have 2 responses spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 2 + + # Validate first span (user message + tool call) + span1 = spans[0] + expected_attributes_1 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.request.model", deployment_name), + ("gen_ai.request.assistant_name", agent.name), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", ""), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span1, expected_attributes_1) + assert attributes_match == True + + # Check events for first span - user message and assistant tool call + expected_events_1 = [ + { + "name": "gen_ai.user.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "user", + "gen_ai.event.content": '{"text": "What\'s the weather in Seattle?"}', + }, + }, + { + "name": "gen_ai.assistant.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "assistant", + "gen_ai.event.content": '{"tool_calls": [{"type": "function", "id": "*", "function": {"name": "get_weather", "arguments": "*"}}]}', + }, + }, + ] + events_match = GenAiTraceVerifier().check_span_events(span1, expected_events_1) + assert events_match == True + + # Validate second span (tool output + final response) + span2 = spans[1] + expected_attributes_2 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.request.model", deployment_name), + ("gen_ai.request.assistant_name", agent.name), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", ""), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span2, expected_attributes_2) + assert attributes_match == True + + # Check events for second span - tool output and assistant response + expected_events_2 = [ + { + "name": "gen_ai.tool.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "tool", + "gen_ai.event.content": '{"tool_call_outputs": [{"type": "function", "id": "*", "output": {"temperature": "72°F", "condition": "sunny"}}]}', + }, + }, + { + "name": "gen_ai.assistant.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "assistant", + "gen_ai.event.content": '{"text": "*"}', + }, + }, + ] + events_match = GenAiTraceVerifier().check_span_events(span2, expected_events_2) + assert events_match == True + + @pytest.mark.skip(reason="recordings not working for responses API") + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy_async + async def test_async_function_tool_without_content_recording_streaming(self, **kwargs): + """Test asynchronous function tool usage without content recording (streaming).""" + from openai.types.responses.response_input_param import FunctionCallOutput + self.cleanup() + os.environ.update( + {CONTENT_TRACING_ENV_VARIABLE: "False", "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True"} + ) + self.setup_telemetry() + assert False == AIProjectInstrumentor().is_content_recording_enabled() + assert True == AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + + async with project_client: + # Get the OpenAI client from the project client + client = await project_client.get_openai_client() + deployment_name = self.test_agents_params["model_deployment_name"] + + # Define a function tool + func_tool = FunctionTool( + name="get_weather", + parameters={ + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city name, e.g. San Francisco", + }, + }, + "required": ["location"], + "additionalProperties": False, + }, + description="Get the current weather for a location.", + strict=True, + ) + + # Create agent with function tool + agent = await project_client.agents.create_version( + agent_name="WeatherAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can use function tools.", + tools=[func_tool], + ), + ) + + # Create a conversation + conversation = await client.conversations.create() + + # First request - should trigger function call + stream = await client.responses.create( + conversation=conversation.id, + input="What's the weather in Seattle?", + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + stream=True, + ) + # Consume the stream and collect function calls + # In streaming, we get events, not direct output items + function_calls_dict = {} + first_response_id = None + async for chunk in stream: + # Capture the response ID from ResponseCreatedEvent or ResponseCompletedEvent + if chunk.type == "response.created" and hasattr(chunk, "response"): + first_response_id = chunk.response.id + elif chunk.type == "response.completed" and hasattr(chunk, "response"): + if first_response_id is None: + first_response_id = chunk.response.id + + # Collect complete function calls from ResponseOutputItemDoneEvent + if chunk.type == "response.output_item.done" and hasattr(chunk, "item"): + item = chunk.item + if hasattr(item, "type") and item.type == "function_call": + call_id = item.call_id + function_calls_dict[call_id] = item + + # Process function calls and prepare input for second request + # Respond to ALL function calls (streaming may not populate name attribute reliably) + input_list = [] + for item in function_calls_dict.values(): + # Mock function result + weather_result = {"temperature": "72°F", "condition": "sunny"} + output = FunctionCallOutput( + type="function_call_output", + call_id=item.call_id, + output=json.dumps(weather_result), + ) + input_list.append(output) + + # Second request - provide function results (using conversation, not previous_response_id) + stream2 = await client.responses.create( + conversation=conversation.id, + input=input_list, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + stream=True, + ) + # Consume the second stream + async for chunk in stream2: + pass # Just consume the stream + + # Cleanup + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + # Check spans - should have 2 responses spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 2 + + # Validate first span (user message + tool call) - no content + span1 = spans[0] + expected_attributes_1 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.request.model", deployment_name), + ("gen_ai.request.assistant_name", agent.name), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", ""), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span1, expected_attributes_1) + assert attributes_match == True + + # Check events for first span - tool call ID included but no function details + expected_events_1 = [ + { + "name": "gen_ai.user.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "user", + "gen_ai.event.content": "{}", + }, + }, + { + "name": "gen_ai.assistant.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "assistant", + "gen_ai.event.content": '{"tool_calls": [{"type": "function", "id": "*"}]}', + }, + }, + ] + events_match = GenAiTraceVerifier().check_span_events(span1, expected_events_1) + assert events_match == True + + # Validate second span (tool output + final response) - no content + span2 = spans[1] + expected_attributes_2 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.request.model", deployment_name), + ("gen_ai.request.assistant_name", agent.name), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", ""), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span2, expected_attributes_2) + assert attributes_match == True + + # Check events for second span - empty content bodies + expected_events_2 = [ + { + "name": "gen_ai.tool.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "tool", + "gen_ai.event.content": "{}", + }, + }, + { + "name": "gen_ai.assistant.message", + "attributes": { + "gen_ai.provider.name": "azure.openai", + "gen_ai.message.role": "assistant", + "gen_ai.event.content": "{}", + }, + }, + ] + events_match = GenAiTraceVerifier().check_span_events(span2, expected_events_2) + assert events_match == True