diff --git a/agentops/instrumentation/__init__.py b/agentops/instrumentation/__init__.py index 70017743b..99b088d1f 100644 --- a/agentops/instrumentation/__init__.py +++ b/agentops/instrumentation/__init__.py @@ -169,7 +169,7 @@ class InstrumentorConfig(TypedDict): "agents": { "module_name": "agentops.instrumentation.openai_agents", "class_name": "OpenAIAgentsInstrumentor", - "min_version": "0.1.0", + "min_version": "0.0.1", }, } diff --git a/agentops/instrumentation/openai/attributes/response.py b/agentops/instrumentation/openai/attributes/response.py index d0a821f8f..195eb5bdb 100644 --- a/agentops/instrumentation/openai/attributes/response.py +++ b/agentops/instrumentation/openai/attributes/response.py @@ -83,7 +83,7 @@ SpanAttributes.LLM_RESPONSE_ID: "id", SpanAttributes.LLM_REQUEST_MODEL: "model", SpanAttributes.LLM_RESPONSE_MODEL: "model", - SpanAttributes.LLM_PROMPTS: "instructions", + SpanAttributes.LLM_OPENAI_RESPONSE_INSTRUCTIONS: "instructions", SpanAttributes.LLM_REQUEST_MAX_TOKENS: "max_output_tokens", SpanAttributes.LLM_REQUEST_TEMPERATURE: "temperature", SpanAttributes.LLM_REQUEST_TOP_P: "top_p", diff --git a/agentops/instrumentation/openai_agents/attributes/common.py b/agentops/instrumentation/openai_agents/attributes/common.py index b06691021..93e880cf3 100644 --- a/agentops/instrumentation/openai_agents/attributes/common.py +++ b/agentops/instrumentation/openai_agents/attributes/common.py @@ -5,15 +5,25 @@ for extracting and formatting attributes according to OpenTelemetry semantic conventions. """ -from typing import Any +from typing import Any, List, Dict, Optional from agentops.logging import logger -from agentops.semconv import AgentAttributes, WorkflowAttributes, SpanAttributes, InstrumentationAttributes +from agentops.semconv import ( + AgentAttributes, + WorkflowAttributes, + SpanAttributes, + InstrumentationAttributes, + ToolAttributes, + AgentOpsSpanKindValues, + ToolStatus, +) +from agentops.helpers import safe_serialize # Import safe_serialize from agentops.instrumentation.common import AttributeMap, _extract_attributes_from_mapping from agentops.instrumentation.common.attributes import get_common_attributes from agentops.instrumentation.common.objects import get_uploaded_object_attributes from agentops.instrumentation.openai.attributes.response import get_response_response_attributes from agentops.instrumentation.openai_agents import LIBRARY_NAME, LIBRARY_VERSION + from agentops.instrumentation.openai_agents.attributes.model import ( get_model_attributes, get_model_config_attributes, @@ -33,9 +43,10 @@ # Attribute mapping for FunctionSpanData FUNCTION_SPAN_ATTRIBUTES: AttributeMap = { - AgentAttributes.AGENT_NAME: "name", - WorkflowAttributes.WORKFLOW_INPUT: "input", - WorkflowAttributes.FINAL_OUTPUT: "output", + ToolAttributes.TOOL_NAME: "name", + ToolAttributes.TOOL_PARAMETERS: "input", + ToolAttributes.TOOL_RESULT: "output", + # AgentAttributes.AGENT_NAME: "name", AgentAttributes.FROM_AGENT: "from_agent", } @@ -55,7 +66,9 @@ # Attribute mapping for ResponseSpanData RESPONSE_SPAN_ATTRIBUTES: AttributeMap = { - WorkflowAttributes.WORKFLOW_INPUT: "input", + # Don't map input here as it causes double serialization + # We handle prompts manually in get_response_span_attributes + SpanAttributes.LLM_RESPONSE_MODEL: "model", } @@ -80,6 +93,72 @@ } +def _get_llm_messages_attributes(messages: Optional[List[Dict]], attribute_base: str) -> AttributeMap: + """ + Extracts attributes from a list of message dictionaries (e.g., prompts or completions). + Uses the attribute_base to format the specific attribute keys. + """ + attributes: AttributeMap = {} + if not messages: + return attributes + if not isinstance(messages, list): + logger.warning( + f"[_get_llm_messages_attributes] Expected a list of messages for base '{attribute_base}', got {type(messages)}. Value: {safe_serialize(messages)}. Returning empty." + ) + return attributes + + for i, msg_dict in enumerate(messages): + if isinstance(msg_dict, dict): + role = msg_dict.get("role") + content = msg_dict.get("content") + name = msg_dict.get("name") + tool_calls = msg_dict.get("tool_calls") + tool_call_id = msg_dict.get("tool_call_id") + + # Common role and content + if role: + attributes[f"{attribute_base}.{i}.role"] = str(role) + if content is not None: + attributes[f"{attribute_base}.{i}.content"] = safe_serialize(content) + + # Optional name for some roles + if name: + attributes[f"{attribute_base}.{i}.name"] = str(name) + + # Tool calls (specific to assistant messages) + if tool_calls and isinstance(tool_calls, list): + for tc_idx, tc_dict in enumerate(tool_calls): + if isinstance(tc_dict, dict): + tc_id = tc_dict.get("id") + tc_type = tc_dict.get("type") + tc_function_data = tc_dict.get("function") + + if tc_function_data and isinstance(tc_function_data, dict): + tc_func_name = tc_function_data.get("name") + tc_func_args = tc_function_data.get("arguments") + + base_tool_call_key_formatted = f"{attribute_base}.{i}.tool_calls.{tc_idx}" + if tc_id: + attributes[f"{base_tool_call_key_formatted}.id"] = str(tc_id) + if tc_type: + attributes[f"{base_tool_call_key_formatted}.type"] = str(tc_type) + if tc_func_name: + attributes[f"{base_tool_call_key_formatted}.function.name"] = str(tc_func_name) + if tc_func_args is not None: + attributes[f"{base_tool_call_key_formatted}.function.arguments"] = safe_serialize( + tc_func_args + ) + + # Tool call ID (specific to tool_call_output messages) + if tool_call_id: + attributes[f"{attribute_base}.{i}.tool_call_id"] = str(tool_call_id) + else: + # If a message is not a dict, serialize its representation + attributes[f"{attribute_base}.{i}.content"] = safe_serialize(msg_dict) + + return attributes + + def get_common_instrumentation_attributes() -> AttributeMap: """Get common instrumentation attributes for the OpenAI Agents instrumentation. @@ -109,9 +188,22 @@ def get_agent_span_attributes(span_data: Any) -> AttributeMap: Returns: Dictionary of attributes for agent span """ - attributes = _extract_attributes_from_mapping(span_data, AGENT_SPAN_ATTRIBUTES) + attributes = {} attributes.update(get_common_attributes()) + attributes[SpanAttributes.AGENTOPS_SPAN_KIND] = AgentOpsSpanKindValues.AGENT.value + + # Get agent name directly from span_data + if hasattr(span_data, "name") and span_data.name: + attributes[AgentAttributes.AGENT_NAME] = str(span_data.name) + + # Get handoffs directly from span_data + if hasattr(span_data, "handoffs") and span_data.handoffs: + attributes[AgentAttributes.HANDOFFS] = safe_serialize(span_data.handoffs) + + if hasattr(span_data, "tools") and span_data.tools: + attributes[AgentAttributes.AGENT_TOOLS] = safe_serialize([str(getattr(t, "name", t)) for t in span_data.tools]) + return attributes @@ -128,6 +220,20 @@ def get_function_span_attributes(span_data: Any) -> AttributeMap: """ attributes = _extract_attributes_from_mapping(span_data, FUNCTION_SPAN_ATTRIBUTES) attributes.update(get_common_attributes()) + attributes[SpanAttributes.AGENTOPS_SPAN_KIND] = AgentOpsSpanKindValues.TOOL.value + + # Determine tool status based on presence of error + if hasattr(span_data, "error") and span_data.error: + attributes[ToolAttributes.TOOL_STATUS] = ToolStatus.FAILED.value + else: + if hasattr(span_data, "output") and span_data.output is not None: + attributes[ToolAttributes.TOOL_STATUS] = ToolStatus.SUCCEEDED.value + else: + # Status will be set by exporter based on span lifecycle + pass + + if hasattr(span_data, "from_agent") and span_data.from_agent: + attributes[f"{AgentAttributes.AGENT}.calling_tool.name"] = str(span_data.from_agent) return attributes @@ -149,6 +255,66 @@ def get_handoff_span_attributes(span_data: Any) -> AttributeMap: return attributes +def _extract_text_from_content(content: Any) -> Optional[str]: + """Extract text from various content formats used in the Responses API. + + Args: + content: Content in various formats (str, dict, list) + + Returns: + Extracted text or None if no text found + """ + if isinstance(content, str): + return content + + if isinstance(content, dict): + # Direct text field + if "text" in content: + return content["text"] + # Output text type + if content.get("type") == "output_text": + return content.get("text", "") + + if isinstance(content, list): + text_parts = [] + for item in content: + extracted = _extract_text_from_content(item) + if extracted: + text_parts.append(extracted) + return " ".join(text_parts) if text_parts else None + + return None + + +def _build_prompt_messages_from_input(input_data: Any) -> List[Dict[str, Any]]: + """Build prompt messages from various input formats. + + Args: + input_data: Input data from span_data.input + + Returns: + List of message dictionaries with role and content + """ + messages = [] + + if isinstance(input_data, str): + # Single string input - assume it's a user message + messages.append({"role": "user", "content": input_data}) + + elif isinstance(input_data, list): + for msg in input_data: + if isinstance(msg, dict): + role = msg.get("role") + content = msg.get("content") + + if role and content is not None: + extracted_text = _extract_text_from_content(content) + if extracted_text: + messages.append({"role": role, "content": extracted_text}) + + return messages + + def get_response_span_attributes(span_data: Any) -> AttributeMap: """Extract attributes from a ResponseSpanData object with full LLM response processing. @@ -170,8 +336,43 @@ def get_response_span_attributes(span_data: Any) -> AttributeMap: attributes = _extract_attributes_from_mapping(span_data, RESPONSE_SPAN_ATTRIBUTES) attributes.update(get_common_attributes()) + # Process response attributes first to get all response data including instructions if span_data.response: - attributes.update(get_response_response_attributes(span_data.response)) + response_attrs = get_response_response_attributes(span_data.response) + + # Extract system prompt if present + system_prompt = response_attrs.get(SpanAttributes.LLM_OPENAI_RESPONSE_INSTRUCTIONS) + + prompt_messages = [] + # Add system prompt as first message if available + if system_prompt: + prompt_messages.append({"role": "system", "content": system_prompt}) + # Remove from response attrs to avoid duplication + response_attrs.pop(SpanAttributes.LLM_OPENAI_RESPONSE_INSTRUCTIONS, None) + + # Add conversation history from input + if hasattr(span_data, "input") and span_data.input: + prompt_messages.extend(_build_prompt_messages_from_input(span_data.input)) + + # Format prompts using existing function + if prompt_messages: + attributes.update(_get_llm_messages_attributes(prompt_messages, "gen_ai.prompt")) + + # Remove any prompt-related attributes that might have been set by response processing + response_attrs = { + k: v for k, v in response_attrs.items() if not k.startswith("gen_ai.prompt") and k != "gen_ai.request.tools" + } + + # Add remaining response attributes + attributes.update(response_attrs) + else: + # No response object, just process input as prompts + if hasattr(span_data, "input") and span_data.input: + prompt_messages = _build_prompt_messages_from_input(span_data.input) + if prompt_messages: + attributes.update(_get_llm_messages_attributes(prompt_messages, "gen_ai.prompt")) + + attributes[SpanAttributes.AGENTOPS_SPAN_KIND] = AgentOpsSpanKindValues.LLM.value return attributes @@ -181,12 +382,6 @@ def get_generation_span_attributes(span_data: Any) -> AttributeMap: Generations are requests made to the `openai.completions` endpoint. - # TODO this has not been extensively tested yet as there is a flag that needs ot be set to use the - # completions API with the Agents SDK. - # We can enable chat.completions API by calling: - # `from agents import set_default_openai_api` - # `set_default_openai_api("chat_completions")` - Args: span_data: The GenerationSpanData object @@ -196,17 +391,42 @@ def get_generation_span_attributes(span_data: Any) -> AttributeMap: attributes = _extract_attributes_from_mapping(span_data, GENERATION_SPAN_ATTRIBUTES) attributes.update(get_common_attributes()) + if SpanAttributes.LLM_PROMPTS in attributes: + raw_prompt_input = attributes.pop(SpanAttributes.LLM_PROMPTS) + formatted_prompt_for_llm = [] + if isinstance(raw_prompt_input, str): + formatted_prompt_for_llm.append({"role": "user", "content": raw_prompt_input}) + elif isinstance(raw_prompt_input, list): + temp_formatted_list = [] + all_strings_or_dicts = True + for item in raw_prompt_input: + if isinstance(item, str): + temp_formatted_list.append({"role": "user", "content": item}) + elif isinstance(item, dict): + temp_formatted_list.append(item) + else: + all_strings_or_dicts = False + break + if all_strings_or_dicts: + formatted_prompt_for_llm = temp_formatted_list + else: + logger.warning( + f"[get_generation_span_attributes] span_data.input was a list with mixed/unexpected content: {safe_serialize(raw_prompt_input)}" + ) + + if formatted_prompt_for_llm: + attributes.update(_get_llm_messages_attributes(formatted_prompt_for_llm, "gen_ai.prompt")) + if span_data.model: attributes.update(get_model_attributes(span_data.model)) - # Process output for GenerationSpanData if available if span_data.output: attributes.update(get_generation_output_attributes(span_data.output)) - # Add model config attributes if present if span_data.model_config: attributes.update(get_model_config_attributes(span_data.model_config)) + attributes[SpanAttributes.AGENTOPS_SPAN_KIND] = AgentOpsSpanKindValues.LLM.value return attributes diff --git a/agentops/instrumentation/openai_agents/exporter.py b/agentops/instrumentation/openai_agents/exporter.py index 598f81c18..6e6734971 100644 --- a/agentops/instrumentation/openai_agents/exporter.py +++ b/agentops/instrumentation/openai_agents/exporter.py @@ -196,7 +196,7 @@ def export_trace(self, trace: Any) -> None: "span": span, "span_type": "TraceSpan", "trace_id": trace_id, - "parent_id": None, # Trace spans don't have parents + "parent_id": None, } else: span.end() @@ -308,6 +308,7 @@ def export_span(self, span: Any) -> None: # Unique lookup key for this span span_lookup_key = _get_span_lookup_key(trace_id, span_id) + attributes = get_base_span_attributes(span) span_attributes = get_span_attributes(span_data) attributes.update(span_attributes) @@ -346,7 +347,6 @@ def export_span(self, span: Any) -> None: # Handle any error information self._handle_span_error(span, otel_span) - # DO NOT end the span for start events - we want to keep it open for updates return @@ -354,33 +354,39 @@ def export_span(self, span: Any) -> None: if span_lookup_key in self._span_map: existing_span = self._span_map[span_lookup_key] - # Check if span is already ended span_is_ended = False if isinstance(existing_span, Span) and hasattr(existing_span, "_end_time"): span_is_ended = existing_span._end_time is not None if not span_is_ended: - # Update and end the existing span + # Update with final attributes for key, value in attributes.items(): existing_span.set_attribute(key, value) - # Set status and handle any error information - existing_span.set_status(Status(StatusCode.OK if span.status == "OK" else StatusCode.ERROR)) + existing_span.set_status( + Status(StatusCode.OK if getattr(span, "status", "OK") == "OK" else StatusCode.ERROR) + ) self._handle_span_error(span, existing_span) - existing_span.end() + # Span already ended, create a new one (should be rare if logic is correct) else: - # Create a new span with the complete data (already ended state) - self.create_span(span, span_type, attributes) + logger.warning( + f"[Exporter] SDK span_id: {span_id} (END event) - Attempting to end an ALREADY ENDED span: {span_lookup_key}. Creating a new one instead." + ) + self.create_span(span, span_type, attributes, is_already_ended=True) + # No existing span found for end event, create a new one else: - # No existing span found, create a new one with all data - self.create_span(span, span_type, attributes) + logger.warning( + f"[Exporter] SDK span_id: {span_id} (END event) - No active span found for end event: {span_lookup_key}. Creating a new one." + ) + self.create_span(span, span_type, attributes, is_already_ended=True) - # Clean up our tracking resources self._active_spans.pop(span_id, None) self._span_map.pop(span_lookup_key, None) - def create_span(self, span: Any, span_type: str, attributes: Dict[str, Any]) -> None: + def create_span( + self, span: Any, span_type: str, attributes: Dict[str, Any], is_already_ended: bool = False + ) -> None: """Create a new span with the provided data and end it immediately. This method creates a span using the appropriate parent context, applies @@ -392,11 +398,8 @@ def create_span(self, span: Any, span_type: str, attributes: Dict[str, Any]) -> span_type: The type of span being created attributes: The attributes to set on the span """ - # For simplicity and backward compatibility, use None as the parent context - # In a real implementation, you might want to look up the parent parent_ctx = None if hasattr(span, "parent_id") and span.parent_id: - # Get parent context from trace_id and parent_id if available parent_ctx = self._get_parent_context( getattr(span, "trace_id", "unknown"), getattr(span, "id", "unknown"), span.parent_id ) @@ -404,7 +407,6 @@ def create_span(self, span: Any, span_type: str, attributes: Dict[str, Any]) -> name = get_span_name(span) kind = get_span_kind(span) - # Create the span with parent context and end it immediately self._create_span_with_parent( name=name, kind=kind, attributes=attributes, parent_ctx=parent_ctx, end_immediately=True ) diff --git a/agentops/instrumentation/openai_agents/instrumentor.py b/agentops/instrumentation/openai_agents/instrumentor.py index 2684bb50d..2b92e2d79 100644 --- a/agentops/instrumentation/openai_agents/instrumentor.py +++ b/agentops/instrumentation/openai_agents/instrumentor.py @@ -5,42 +5,59 @@ tool usage, LLM requests, and token metrics. The implementation uses a clean separation between exporters and processors. The exporter -translates Agent spans into OpenTelemetry spans with appropriate semantic conventions. -The processor implements the tracing interface, collects metrics, and manages timing data. - -We use the built-in add_trace_processor hook for all functionality. Streaming support -would require monkey-patching the run method of `Runner`, but doesn't really get us -more data than we already have, since the `Response` object is always passed to us -from the `agents.tracing` module. - -TODO Calls to the OpenAI API are not available in this tracing context, so we may -need to monkey-patch the `openai` from here to get that data. While we do have -separate instrumentation for the OpenAI API, in order to get it to nest with the -spans we create here, it's probably easier (or even required) that we incorporate -that here as well. + translates Agent spans into OpenTelemetry spans with appropriate semantic conventions. + + The processor implements the tracing interface, collects metrics, and manages timing data. + + We use the built-in add_trace_processor hook for all functionality. Streaming support + would require monkey-patching the run method of `Runner`, but doesn't really get us + more data than we already have, since the `Response` object is always passed to us + from the `agents.tracing` module. + + TODO Calls to the OpenAI API are not available in this tracing context, so we may + need to monkey-patch the `openai` from here to get that data. While we do have + separate instrumentation for the OpenAI API, in order to get it to nest with the + spans we create here, it's probably easier (or even required) that we incorporate + that here as well. """ from typing import Collection + +from opentelemetry import trace from opentelemetry.instrumentation.instrumentor import BaseInstrumentor # type: ignore +from agentops.instrumentation.openai_agents import LIBRARY_VERSION + from agentops.logging import logger from agentops.instrumentation.openai_agents.processor import OpenAIAgentsProcessor from agentops.instrumentation.openai_agents.exporter import OpenAIAgentsExporter class OpenAIAgentsInstrumentor(BaseInstrumentor): - """An instrumentor for OpenAI Agents SDK that primarily uses the built-in tracing API.""" + """An instrumentor for OpenAI Agents SDK that uses the built-in tracing API.""" _processor = None _exporter = None _default_processor = None + def __init__(self): + super().__init__() + self._tracer = None + self._is_instrumented_instance_flag = False + def instrumentation_dependencies(self) -> Collection[str]: """Return packages required for instrumentation.""" return ["openai-agents >= 0.0.1"] def _instrument(self, **kwargs): """Instrument the OpenAI Agents SDK.""" + if self._is_instrumented_instance_flag: + logger.debug("OpenAI Agents SDK already instrumented. Skipping.") + return + tracer_provider = kwargs.get("tracer_provider") + if self._tracer is None: + logger.debug("OpenAI Agents SDK tracer is None, creating new tracer.") + self._tracer = trace.get_tracer("agentops.instrumentation.openai_agents", LIBRARY_VERSION) try: self._exporter = OpenAIAgentsExporter(tracer_provider=tracer_provider) @@ -49,23 +66,27 @@ def _instrument(self, **kwargs): ) # Replace the default processor with our processor - from agents import set_trace_processors # type: ignore - from agents.tracing.processors import default_processor # type: ignore + from agents import set_trace_processors + from agents.tracing.processors import default_processor + + if getattr(self, "_default_processor", None) is None: + self._default_processor = default_processor() # Store reference to default processor for later restoration - self._default_processor = default_processor() set_trace_processors([self._processor]) - logger.debug("Replaced default processor with OpenAIAgentsProcessor in OpenAI Agents SDK") + self._is_instrumented_instance_flag = True except Exception as e: - logger.warning(f"Failed to instrument OpenAI Agents SDK: {e}") + logger.warning(f"Failed to instrument OpenAI Agents SDK: {e}", exc_info=True) def _uninstrument(self, **kwargs): """Remove instrumentation from OpenAI Agents SDK.""" + if not self._is_instrumented_instance_flag: + logger.debug("OpenAI Agents SDK not currently instrumented. Skipping uninstrument.") + return try: # Clean up any active spans in the exporter if hasattr(self, "_exporter") and self._exporter: - # Call cleanup to properly handle any active spans if hasattr(self._exporter, "cleanup"): self._exporter.cleanup() @@ -75,9 +96,12 @@ def _uninstrument(self, **kwargs): if hasattr(self, "_default_processor") and self._default_processor: set_trace_processors([self._default_processor]) self._default_processor = None + else: + logger.warning("OpenAI Agents SDK has no default processor to restore.") self._processor = None self._exporter = None + self._is_instrumented_instance_flag = False logger.info("Successfully removed OpenAI Agents SDK instrumentation") except Exception as e: - logger.warning(f"Failed to uninstrument OpenAI Agents SDK: {e}") + logger.warning(f"Failed to uninstrument OpenAI Agents SDK: {e}", exc_info=True) diff --git a/agentops/semconv/span_attributes.py b/agentops/semconv/span_attributes.py index 79f0285a9..0561c3910 100644 --- a/agentops/semconv/span_attributes.py +++ b/agentops/semconv/span_attributes.py @@ -77,6 +77,7 @@ class SpanAttributes: # OpenAI specific LLM_OPENAI_RESPONSE_SYSTEM_FINGERPRINT = "gen_ai.openai.system_fingerprint" + LLM_OPENAI_RESPONSE_INSTRUCTIONS = "gen_ai.openai.instructions" LLM_OPENAI_API_BASE = "gen_ai.openai.api_base" LLM_OPENAI_API_VERSION = "gen_ai.openai.api_version" LLM_OPENAI_API_TYPE = "gen_ai.openai.api_type" diff --git a/tests/unit/instrumentation/openai_agents/test_openai_agents_attributes.py b/tests/unit/instrumentation/openai_agents/test_openai_agents_attributes.py index 1173b34af..35085eef7 100644 --- a/tests/unit/instrumentation/openai_agents/test_openai_agents_attributes.py +++ b/tests/unit/instrumentation/openai_agents/test_openai_agents_attributes.py @@ -46,7 +46,6 @@ SpanAttributes, MessageAttributes, AgentAttributes, - WorkflowAttributes, InstrumentationAttributes, ) @@ -171,10 +170,8 @@ def test_agent_span_attributes(self): # Verify extracted attributes assert attrs[AgentAttributes.AGENT_NAME] == "test_agent" - assert attrs[WorkflowAttributes.WORKFLOW_INPUT] == "test input" - assert attrs[WorkflowAttributes.FINAL_OUTPUT] == "test output" - assert attrs[AgentAttributes.AGENT_TOOLS] == '["tool1", "tool2"]' # JSON-serialized string is fine. - # LLM_PROMPTS is handled in common.py now so we don't test for it directly + assert "agentops.span.kind" in attrs + assert attrs["agentops.span.kind"] == "agent" def test_function_span_attributes(self): """Test extraction of attributes from a FunctionSpanData object""" @@ -189,11 +186,17 @@ def test_function_span_attributes(self): # Extract attributes attrs = get_function_span_attributes(mock_function_span) - # Verify extracted attributes - note that complex objects should be serialized to strings - assert attrs[AgentAttributes.AGENT_NAME] == "test_function" - assert attrs[WorkflowAttributes.WORKFLOW_INPUT] == '{"arg1": "value1"}' # Serialized string - assert attrs[WorkflowAttributes.FINAL_OUTPUT] == '{"result": "success"}' # Serialized string - assert attrs[AgentAttributes.FROM_AGENT] == "caller_agent" + # Verify extracted attributes + assert "tool.name" in attrs + assert attrs["tool.name"] == "test_function" + assert "tool.parameters" in attrs + assert '{"arg1": "value1"}' in attrs["tool.parameters"] # Serialized string + assert "tool.result" in attrs + assert '{"result": "success"}' in attrs["tool.result"] # Serialized string + assert "agentops.span.kind" in attrs + assert attrs["agentops.span.kind"] == "tool" + assert "agent.calling_tool.name" in attrs + assert attrs["agent.calling_tool.name"] == "caller_agent" def test_generation_span_with_chat_completion(self): """Test extraction of attributes from a GenerationSpanData with Chat Completion API data""" @@ -217,7 +220,10 @@ def __init__(self): # Verify model and input attributes assert attrs[SpanAttributes.LLM_REQUEST_MODEL] == "gpt-4o-2024-08-06" assert attrs[SpanAttributes.LLM_RESPONSE_MODEL] == "gpt-4o-2024-08-06" - assert attrs[SpanAttributes.LLM_PROMPTS] == "What is the capital of France?" + assert "gen_ai.prompt.0.role" in attrs + assert attrs["gen_ai.prompt.0.role"] == "user" + assert "gen_ai.prompt.0.content" in attrs + assert attrs["gen_ai.prompt.0.content"] == "What is the capital of France?" # Verify model config attributes assert attrs[SpanAttributes.LLM_REQUEST_TEMPERATURE] == 0.7 @@ -248,7 +254,10 @@ def __init__(self): # Verify model and input attributes assert attrs[SpanAttributes.LLM_REQUEST_MODEL] == "gpt-4o-2024-08-06" assert attrs[SpanAttributes.LLM_RESPONSE_MODEL] == "gpt-4o-2024-08-06" - assert attrs[SpanAttributes.LLM_PROMPTS] == "What is the capital of France?" + assert "gen_ai.prompt.0.role" in attrs + assert attrs["gen_ai.prompt.0.role"] == "user" + assert "gen_ai.prompt.0.content" in attrs + assert attrs["gen_ai.prompt.0.content"] == "What is the capital of France?" # Verify token usage - this is handled through model_to_dict now # Since we're using a direct fixture, the serialization might differ @@ -421,8 +430,12 @@ def __init__(self): attrs = get_response_span_attributes(mock_response_span) # Verify extracted attributes - # SpanAttributes.LLM_PROMPTS is no longer explicitly set here - assert attrs[WorkflowAttributes.WORKFLOW_INPUT] == "user query" + assert "gen_ai.prompt.0.role" in attrs + assert attrs["gen_ai.prompt.0.role"] == "user" + assert "gen_ai.prompt.0.content" in attrs + assert attrs["gen_ai.prompt.0.content"] == "user query" + assert "agentops.span.kind" in attrs + assert attrs["agentops.span.kind"] == "llm" def test_span_attributes_dispatcher(self): """Test the dispatcher function that routes to type-specific extractors""" @@ -456,7 +469,8 @@ def __init__(self): assert AgentAttributes.AGENT_NAME in agent_attrs function_attrs = get_span_attributes(function_span) - assert AgentAttributes.AGENT_NAME in function_attrs + assert "tool.name" in function_attrs + assert function_attrs["tool.name"] == "test_function" # Unknown span type should return empty dict unknown_attrs = get_span_attributes(unknown_span) diff --git a/tests/unit/instrumentation/openai_core/test_response_attributes.py b/tests/unit/instrumentation/openai_core/test_response_attributes.py index 90391002b..660302ab0 100644 --- a/tests/unit/instrumentation/openai_core/test_response_attributes.py +++ b/tests/unit/instrumentation/openai_core/test_response_attributes.py @@ -340,8 +340,8 @@ def test_get_response_response_attributes(self): assert attributes[SpanAttributes.LLM_RESPONSE_ID] == response_data["id"] assert SpanAttributes.LLM_RESPONSE_MODEL in attributes assert attributes[SpanAttributes.LLM_RESPONSE_MODEL] == response_data["model"] - assert SpanAttributes.LLM_PROMPTS in attributes - assert attributes[SpanAttributes.LLM_PROMPTS] == response_data["instructions"] + assert SpanAttributes.LLM_OPENAI_RESPONSE_INSTRUCTIONS in attributes + assert attributes[SpanAttributes.LLM_OPENAI_RESPONSE_INSTRUCTIONS] == response_data["instructions"] # Check usage attributes assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS in attributes