AgentOps-AI · dot-agi · Mar 24, 2025 · Mar 14, 2025 · Mar 14, 2025 · Mar 15, 2025
diff --git a/agentops/helpers/serialization.py b/agentops/helpers/serialization.py
@@ -72,8 +72,57 @@ def serialize_uuid(obj: UUID) -> str:
     return str(obj)
 
 
+def model_to_dict(obj: Any) -> dict:
+    """Convert a model object to a dictionary safely.
+
+    Handles various model types including:
+    - Pydantic models (model_dump/dict methods)
+    - Dictionary-like objects
+    - API response objects with parse method
+    - Objects with __dict__ attribute
+
+    Args:
+        obj: The model object to convert to dictionary
+
+    Returns:
+        Dictionary representation of the object, or empty dict if conversion fails
+    """
+    if obj is None:
+        return {}
+    if isinstance(obj, dict):
+        return obj
+    if hasattr(obj, "model_dump"):  # Pydantic v2
+        return obj.model_dump()
+    elif hasattr(obj, "dict"):  # Pydantic v1
+        return obj.dict()
+    elif hasattr(obj, "parse"):  # Raw API response
+        return model_to_dict(obj.parse())
+    else:
+        # Try to use __dict__ as fallback
+        try:
+            return obj.__dict__
+        except:
+            return {}
+
+
 def safe_serialize(obj: Any) -> Any:
-    """Safely serialize an object to JSON-compatible format"""
+    """Safely serialize an object to JSON-compatible format
+
+    This function handles complex objects by:
+    1. Converting models to dictionaries
+    2. Using custom JSON encoder to handle special types
+    3. Falling back to string representation only when necessary
+
+    Args:
+        obj: The object to serialize
+
+    Returns:
+        JSON string representation of the object
+    """
+    # First convert any model objects to dictionaries
+    if hasattr(obj, "model_dump") or hasattr(obj, "dict") or hasattr(obj, "parse"):
+        obj = model_to_dict(obj)
+
     try:
         return json.dumps(obj, cls=AgentOpsJSONEncoder)
     except (TypeError, ValueError) as e:

diff --git a/agentops/instrumentation/__init__.py b/agentops/instrumentation/__init__.py
@@ -68,7 +68,7 @@ def get_instance(self) -> BaseInstrumentor:
         provider_import_name="crewai",
     ),
     InstrumentorLoader(
-        module_name="opentelemetry.instrumentation.agents",
+        module_name="agentops.instrumentation.openai_agents",
         class_name="AgentsInstrumentor",
         provider_import_name="agents",
     ),

diff --git a/agentops/instrumentation/openai/__init__.py b/agentops/instrumentation/openai/__init__.py
@@ -0,0 +1,116 @@
+"""
+AgentOps instrumentation utilities for OpenAI
+
+This module provides shared utilities for instrumenting various OpenAI products and APIs.
+It centralizes common functions and behaviors to ensure consistent instrumentation
+across all OpenAI-related components.
+
+IMPORTANT DISTINCTION BETWEEN OPENAI API FORMATS:
+1. OpenAI Completions API - The traditional API format using prompt_tokens/completion_tokens
+2. OpenAI Response API - The newer format used by the Agents SDK using input_tokens/output_tokens
+3. Agents SDK - The framework that uses Response API format
+
+This module implements utilities that handle both formats consistently.
+"""
+
+import logging
+from typing import Any, Dict, List, Optional, Union
+
+# Import span attributes from semconv
+from agentops.semconv import SpanAttributes
+
+# Logger
+logger = logging.getLogger(__name__)
+
+def get_value(data: Dict[str, Any], keys: Union[str, List[str]]) -> Optional[Any]:
+    """
+    Get a value from a dictionary using a key or prioritized list of keys.
+
+    Args:
+        data: Source dictionary
+        keys: A single key or list of keys in priority order
+
+    Returns:
+        The value if found, or None if not found
+    """
+    if isinstance(keys, str):
+        return data.get(keys)
+
+    for key in keys:
+        if key in data:
+            return data[key]
+
+    return None
+
+def process_token_usage(usage: Dict[str, Any], attributes: Dict[str, Any]) -> None:
+    """
+    Process token usage metrics from any OpenAI API response and add them to span attributes.
+
+    This function maps token usage fields from various API formats to standardized 
+    attribute names according to OpenTelemetry semantic conventions:
+
+    - OpenAI ChatCompletion API uses: prompt_tokens, completion_tokens, total_tokens
+    - OpenAI Response API uses: input_tokens, output_tokens, total_tokens
+
+    Both formats are mapped to the standardized OTel attributes.
+
+    Args:
+        usage: Dictionary containing token usage metrics from an OpenAI API
+        attributes: The span attributes dictionary where the metrics will be added
+    """
+    if not usage or not isinstance(usage, dict):
+        return
+
+    # Define mapping for standard usage metrics (target → source)
+    token_mapping = {
+        # Standard tokens mapping (target attribute → source field)
+        SpanAttributes.LLM_USAGE_TOTAL_TOKENS: "total_tokens",
+        SpanAttributes.LLM_USAGE_PROMPT_TOKENS: ["prompt_tokens", "input_tokens"],
+        SpanAttributes.LLM_USAGE_COMPLETION_TOKENS: ["completion_tokens", "output_tokens"],
+    }
+
+    # Apply the mapping for all token usage fields
+    for target_attr, source_keys in token_mapping.items():
+        value = get_value(usage, source_keys)
+        if value is not None:
+            attributes[target_attr] = value
+
+    # Process output_tokens_details if present
+    if "output_tokens_details" in usage and isinstance(usage["output_tokens_details"], dict):
+        process_token_details(usage["output_tokens_details"], attributes)
+
+
+def process_token_details(details: Dict[str, Any], attributes: Dict[str, Any]) -> None:
+    """
+    Process detailed token metrics from OpenAI API responses and add them to span attributes.
+
+    This function maps token detail fields (like reasoning_tokens) to standardized attribute names
+    according to semantic conventions, ensuring consistent telemetry across the system.
+
+    Args:
+        details: Dictionary containing token detail metrics from an OpenAI API
+        attributes: The span attributes dictionary where the metrics will be added
+    """
+    if not details or not isinstance(details, dict):
+        return
+
+    # Token details attribute mapping for standardized token metrics
+    # Maps standardized attribute names to API-specific token detail keys (target → source)
+    token_details_mapping = {
+        f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}.reasoning": "reasoning_tokens",
+        # Add more mappings here as OpenAI introduces new token detail types
+    }
+
+    # Process all token detail fields
+    for detail_key, detail_value in details.items():
+        # First check if there's a mapping for this key
+        mapped = False
+        for target_attr, source_key in token_details_mapping.items():
+            if source_key == detail_key:
+                attributes[target_attr] = detail_value
+                mapped = True
+                break
+
+        # For unknown token details, use generic naming format
+        if not mapped:
+            attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}.{detail_key}"] = detail_value
diff --git a/agentops/instrumentation/openai_agents/README.md b/agentops/instrumentation/openai_agents/README.md
@@ -0,0 +1,126 @@
+# OpenAI Agents SDK Instrumentation
+
+This module provides automatic instrumentation for the OpenAI Agents SDK, adding telemetry that follows OpenTelemetry semantic conventions for Generative AI systems.
+
+## Architecture Overview
+
+The OpenAI Agents SDK instrumentor works by:
+
+1. Intercepting the Agents SDK's trace processor interface to capture Agent, Function, Generation, and other span types
+2. Monkey-patching the Agents SDK `Runner` class to capture the full execution lifecycle, including streaming operations
+3. Converting all captured data to OpenTelemetry spans and metrics following semantic conventions
+
+## Span Types
+
+The instrumentor captures the following span types:
+
+- **Trace**: The root span representing an entire agent workflow execution
+  - Implementation: `_export_trace()` method in `exporter.py`
+  - Creates a span with the trace name, ID, and workflow metadata
+
+- **Agent**: Represents an agent's execution lifecycle
+  - Implementation: `_process_agent_span()` method in `exporter.py`
+  - Uses `SpanKind.CONSUMER` to indicate an agent receiving a request
+  - Captures agent name, input, output, tools, and other metadata
+
+- **Function**: Represents a tool/function call
+  - Implementation: `_process_function_span()` method in `exporter.py`
+  - Uses `SpanKind.CLIENT` to indicate an outbound call to a function
+  - Captures function name, input arguments, output results, and error information
+
+- **Generation**: Captures details of model generation
+  - Implementation: `_process_generation_span()` method in `exporter.py`
+  - Uses `SpanKind.CLIENT` to indicate an outbound call to an LLM
+  - Captures model name, configuration, usage statistics, and response content
+
+- **Response**: Lightweight span for tracking model response IDs
+  - Implementation: Handled within `_process_response_api()` and `_process_completions()` methods
+  - Extracts response IDs and metadata from both Chat Completion API and Response API formats
+
+- **Handoff**: Represents control transfer between agents
+  - Implementation: Captured through the `AgentAttributes.HANDOFFS` attribute
+  - Maps from the Agents SDK's "handoffs" field to standardized attribute name
+
+## Metrics
+
+The instrumentor collects the following metrics:
+
+- **Agent Runs**: Number of agent runs
+  - Implementation: `_agent_run_counter` in `instrumentor.py`
+  - Incremented at the start of each agent run with metadata about the agent and run configuration
+
+- **Agent Turns**: Number of agent turns
+  - Implementation: Inferred from raw responses processing
+  - Each raw response represents a turn in the conversation
+
+- **Agent Execution Time**: Time taken for agent execution
+  - Implementation: `_agent_execution_time_histogram` in `instrumentor.py`
+  - Measured from the start of an agent run to its completion
+
+- **Token Usage**: Number of input and output tokens used
+  - Implementation: `_agent_token_usage_histogram` in `instrumentor.py`
+  - Records both prompt and completion tokens separately with appropriate labels
+
+## Key Design Patterns
+
+### Target → Source Mapping Pattern
+
+We use a consistent pattern for attribute mapping where dictionary keys represent the target attribute names (what we want in the final span), and values represent the source field names (where the data comes from):
+
+```python
+_CONFIG_MAPPING = {
+    # Target semantic convention → source field
+    <SemanticConvention>: Union[str, list[str]], 
+    # ...
+}
+```
+
+This pattern makes it easy to maintain mappings and apply them consistently.
+
+### Multi-API Format Support
+
+The instrumentor handles both OpenAI API formats:
+
+1. **Chat Completion API**: Traditional format with "choices" array and prompt_tokens/completion_tokens
+2. **Response API**: Newer format with "output" array and input_tokens/output_tokens
+
+The implementation intelligently detects which format is being used and processes accordingly.
+
+
+### Streaming Operation Tracking
+
+When instrumenting streaming operations, we:
+
+1. Track active streaming operations using unique IDs
+2. Handle proper flushing of spans to ensure metrics are recorded
+3. Create separate spans for token usage metrics to avoid premature span closure
+
+### Response API Content Extraction
+
+The Response API has a nested structure for content:
+
+```
+output → message → content → [items] → text
+```
+
+Extracting the actual text requires special handling:
+
+```python
+# From _process_response_api in exporter.py
+if isinstance(content_items, list):
+    # Combine text from all text items
+    texts = []
+    for content_item in content_items:
+        if content_item.get("type") == "output_text" and "text" in content_item:
+            texts.append(content_item["text"])
+
+    # Join texts (even if empty)
+    attributes[f"{prefix}.content"] = " ".join(texts)
+```
+
+
+## TODO
+- Add support for additional semantic conventions
+    - `gen_ai` doesn't have conventions for response data beyond `role` and `content`
+    - We're shoehorning `responses` into `completions` since the spec doesn't
+      have a convention in place for this yet. 
diff --git a/agentops/instrumentation/openai_agents/__init__.py b/agentops/instrumentation/openai_agents/__init__.py
@@ -0,0 +1,39 @@
+"""
+AgentOps Instrumentor for OpenAI Agents SDK
+
+This module provides automatic instrumentation for the OpenAI Agents SDK when AgentOps is imported.
+It implements a clean, maintainable implementation that follows semantic conventions.
+
+IMPORTANT DISTINCTION BETWEEN OPENAI API FORMATS:
+1. OpenAI Completions API - The traditional API format using prompt_tokens/completion_tokens
+2. OpenAI Response API - The newer format used by the Agents SDK using input_tokens/output_tokens
+3. Agents SDK - The framework that uses Response API format
+
+The Agents SDK uses the Response API format, which we handle using shared utilities from
+agentops.instrumentation.openai.
+"""
+from typing import Optional
+import importlib.metadata
+from agentops.logging import logger
+
+def get_version():
+    """Get the version of the agents SDK, or 'unknown' if not found"""
+    try:
+        installed_version = importlib.metadata.version("agents")
+        return installed_version
+    except importlib.metadata.PackageNotFoundError:
+        logger.debug("`agents` package not found; unable to determine installed version.")
+        return None
+
+LIBRARY_NAME = "agents-sdk"
+LIBRARY_VERSION: Optional[str] = get_version()  # Actual OpenAI Agents SDK version
+
+# Import after defining constants to avoid circular imports
+from .instrumentor import AgentsInstrumentor
+
+__all__ = [
+    "LIBRARY_NAME",
+    "LIBRARY_VERSION",
+    "SDK_VERSION",
+    "AgentsInstrumentor",
+]