diff --git a/agentops/helpers/__init__.py b/agentops/helpers/__init__.py index c14229495..ba8c1aad7 100644 --- a/agentops/helpers/__init__.py +++ b/agentops/helpers/__init__.py @@ -45,5 +45,4 @@ "get_env_bool", "get_env_int", "get_env_list", - "get_tags_from_config", ] diff --git a/agentops/instrumentation/common/attributes.py b/agentops/instrumentation/common/attributes.py index dcf8225f5..d89f94bfb 100644 --- a/agentops/instrumentation/common/attributes.py +++ b/agentops/instrumentation/common/attributes.py @@ -1,5 +1,8 @@ """Common attribute processing utilities shared across all instrumentors. +This utility ensures consistent attribute extraction and transformation across different +instrumentation use cases. + This module provides core utilities for extracting and formatting OpenTelemetry-compatible attributes from span data. These functions are provider-agnostic and used by all instrumentors in the AgentOps @@ -19,7 +22,7 @@ These utilities ensure consistent attribute handling across different LLM service instrumentors while maintaining separation of concerns. """ -from typing import Dict, Any, Optional, List +from typing import runtime_checkable, Protocol, Any, Optional, Dict, TypedDict from agentops.logging import logger from agentops.helpers import safe_serialize, get_agentops_version from agentops.semconv import ( @@ -28,17 +31,96 @@ WorkflowAttributes, ) -# target_attribute_key: source_attribute -AttributeMap = Dict[str, Any] + +# `AttributeMap` is a dictionary that maps target attribute keys to source attribute keys. +# It is used to extract and transform attributes from a span or trace data object +# into a standardized format following OpenTelemetry semantic conventions. +# +# Key-Value Format: +# - Key (str): The target attribute key in the standardized output format +# - Value (str): The source attribute key in the input data object +# +# Example Usage: +# -------------- +# +# Create your mapping: +# attribute_mapping: AttributeMap = { +# CoreAttributes.TRACE_ID: "trace_id", +# CoreAttributes.SPAN_ID: "span_id" +# } +# +# Extract the attributes: +# span_data = { +# "trace_id": "12345", +# "span_id": "67890", +# } +# +# attributes = _extract_attributes_from_mapping(span_data, attribute_mapping) +# # >> {"trace.id": "12345", "span.id": "67890"} +AttributeMap = Dict[str, str] # target_attribute_key: source_attribute + + +# `IndexedAttributeMap` differs from `AttributeMap` in that it allows for dynamic formatting of +# target attribute keys using indices `i` and optionally `j`. This is particularly useful +# when dealing with collections of similar attributes that should be uniquely identified +# in the output. +# +# Key-Value Format: +# - Key (IndexedAttribute): An object implementing the IndexedAttribute protocol with a format method +# - Value (str): The source attribute key in the input data object +# +# Example Usage: +# -------------- +# +# Create your mapping: +# attribute_mapping: IndexedAttributeMap = { +# MessageAttributes.TOOL_CALL_ID: "id", +# MessageAttributes.TOOL_CALL_TYPE: "type" +# } +# +# Process tool calls: +# span_data = { +# "id": "tool_1", +# "type": "search", +# } +# +# attributes = _extract_attributes_from_mapping_with_index( +# span_data, attribute_mapping, i=0) +# # >> {"gen_ai.request.tools.0.id": "tool_1", "gen_ai.request.tools.0.type": "search"} + +@runtime_checkable +class IndexedAttribute(Protocol): + """ + Protocol for objects that define a method to format indexed attributes using + only the provided indices `i` and optionally `j`. This allows for dynamic + formatting of attribute keys based on the indices. + """ + + def format(self, *, i: int, j: Optional[int] = None) -> str: + ... + +IndexedAttributeMap = Dict[IndexedAttribute, str] # target_attribute_key: source_attribute + + +class IndexedAttributeData(TypedDict, total=False): + """ + Represents a dictionary structure for indexed attribute data. + + Attributes: + i (int): The primary index value. This field is required. + j (Optional[int]): An optional secondary index value. + """ + i: int + j: Optional[int] = None def _extract_attributes_from_mapping(span_data: Any, attribute_mapping: AttributeMap) -> AttributeMap: """Helper function to extract attributes based on a mapping. - + Args: span_data: The span data object or dict to extract attributes from attribute_mapping: Dictionary mapping target attributes to source attributes - + Returns: Dictionary of extracted attributes """ @@ -56,19 +138,48 @@ def _extract_attributes_from_mapping(span_data: Any, attribute_mapping: Attribut # Skip if value is None or empty if value is None or (isinstance(value, (list, dict, str)) and not value): continue - + # Serialize complex objects elif isinstance(value, (dict, list, object)) and not isinstance(value, (str, int, float, bool)): value = safe_serialize(value) - + attributes[target_attr] = value - + return attributes +def _extract_attributes_from_mapping_with_index(span_data: Any, attribute_mapping: IndexedAttributeMap, i: int, j: Optional[int] = None) -> AttributeMap: + """Helper function to extract attributes based on a mapping with index. + + This function extends `_extract_attributes_from_mapping` by allowing for indexed keys in the attribute mapping. + + Span data is expected to have keys which contain format strings for i/j, e.g. `my_attr_{i}` or `my_attr_{i}_{j}`. + + Args: + span_data: The span data object or dict to extract attributes from + attribute_mapping: Dictionary mapping target attributes to source attributes, with format strings for i/j + i: The primary index to use in formatting the attribute keys + j: An optional secondary index (default is None) + Returns: + Dictionary of extracted attributes with formatted indexed keys. + """ + + # `i` is required for formatting the attribute keys, `j` is optional + format_kwargs: IndexedAttributeData = {'i': i} + if j is not None: + format_kwargs['j'] = j + + # Update the attribute mapping to include the index for the span + attribute_mapping_with_index: AttributeMap = {} + for target_attr, source_attr in attribute_mapping.items(): + attribute_mapping_with_index[target_attr.format(**format_kwargs)] = source_attr + + return _extract_attributes_from_mapping(span_data, attribute_mapping_with_index) + + def get_common_attributes() -> AttributeMap: """Get common instrumentation attributes used across traces and spans. - + Returns: Dictionary of common instrumentation attributes """ @@ -80,58 +191,58 @@ def get_common_attributes() -> AttributeMap: def get_base_trace_attributes(trace: Any) -> AttributeMap: """Create the base attributes dictionary for an OpenTelemetry trace. - + Args: trace: The trace object to extract attributes from - + Returns: Dictionary containing base trace attributes """ - if not hasattr(trace, 'trace_id'): + if not hasattr(trace, "trace_id"): logger.warning("Cannot create trace attributes: missing trace_id") return {} - + attributes = { WorkflowAttributes.WORKFLOW_NAME: trace.name, CoreAttributes.TRACE_ID: trace.trace_id, WorkflowAttributes.WORKFLOW_STEP_TYPE: "trace", **get_common_attributes(), } - + # Add tags from the config to the trace attributes (these should only be added to the trace) from agentops import get_client - + config = get_client().config tags = [] if config.default_tags: # `default_tags` can either be a `set` or a `list` tags = list(config.default_tags) - + attributes[CoreAttributes.TAGS] = tags - + return attributes def get_base_span_attributes(span: Any) -> AttributeMap: """Create the base attributes dictionary for an OpenTelemetry span. - + Args: span: The span object to extract attributes from - + Returns: Dictionary containing base span attributes """ - span_id = getattr(span, 'span_id', 'unknown') - trace_id = getattr(span, 'trace_id', 'unknown') - parent_id = getattr(span, 'parent_id', None) - + span_id = getattr(span, "span_id", "unknown") + trace_id = getattr(span, "trace_id", "unknown") + parent_id = getattr(span, "parent_id", None) + attributes = { CoreAttributes.TRACE_ID: trace_id, CoreAttributes.SPAN_ID: span_id, **get_common_attributes(), } - + if parent_id: attributes[CoreAttributes.PARENT_ID] = parent_id - - return attributes \ No newline at end of file + + return attributes diff --git a/agentops/instrumentation/common/wrappers.py b/agentops/instrumentation/common/wrappers.py index 9c33962f4..8ef93e191 100644 --- a/agentops/instrumentation/common/wrappers.py +++ b/agentops/instrumentation/common/wrappers.py @@ -5,7 +5,6 @@ a configuration class for wrapping methods, helper functions for updating spans with attributes, and functions for creating and applying wrappers. """ - from typing import Any, Optional, Tuple, Dict, Callable from dataclasses import dataclass from wrapt import wrap_function_wrapper # type: ignore @@ -20,36 +19,42 @@ AttributeHandler = Callable[[Optional[Tuple], Optional[Dict], Optional[Any]], AttributeMap] + @dataclass class WrapConfig: """Configuration for wrapping a method with OpenTelemetry instrumentation. - + This class defines how a method should be wrapped for instrumentation, including what package, class, and method to wrap, what span attributes to set, and how to name the resulting trace spans. - + Attributes: trace_name: The name to use for the trace span package: The package containing the target class class_name: The name of the class containing the method method_name: The name of the method to wrap handler: A function that extracts attributes from args, kwargs, or return value + is_async: Whether the method is asynchronous (default: False) + We explicitly specify async methods since `asyncio.iscoroutinefunction` + is not reliable in this context. span_kind: The kind of span to create (default: CLIENT) """ + trace_name: str package: str class_name: str method_name: str handler: AttributeHandler + is_async: bool = False span_kind: SpanKind = SpanKind.CLIENT - + def __repr__(self): return f"{self.package}.{self.class_name}.{self.method_name}" def _update_span(span: Span, attributes: AttributeMap) -> None: """Update a span with the provided attributes. - + Args: span: The OpenTelemetry span to update attributes: A dictionary of attributes to set on the span @@ -60,7 +65,7 @@ def _update_span(span: Span, attributes: AttributeMap) -> None: def _finish_span_success(span: Span) -> None: """Mark a span as successful by setting its status to OK. - + Args: span: The OpenTelemetry span to update """ @@ -69,7 +74,7 @@ def _finish_span_success(span: Span) -> None: def _finish_span_error(span: Span, exception: Exception) -> None: """Mark a span as failed by recording the exception and setting error status. - + Args: span: The OpenTelemetry span to update exception: The exception that caused the error @@ -78,33 +83,65 @@ def _finish_span_error(span: Span, exception: Exception) -> None: span.set_status(Status(StatusCode.ERROR, str(exception))) -def _create_wrapper(wrap_config: WrapConfig, tracer: Tracer): +def _create_wrapper(wrap_config: WrapConfig, tracer: Tracer) -> Callable: """Create a wrapper function for the specified configuration. - + This function creates a wrapper that: 1. Creates a new span for the wrapped method 2. Sets attributes on the span based on input arguments 3. Calls the wrapped method 4. Sets attributes on the span based on the return value 5. Handles exceptions by recording them on the span - + Args: wrap_config: Configuration for the wrapper tracer: The OpenTelemetry tracer to use for creating spans - + Returns: A wrapper function compatible with wrapt.wrap_function_wrapper """ handler = wrap_config.handler - + + async def awrapper(wrapped, instance, args, kwargs): + # Skip instrumentation if it's suppressed in the current context + # TODO I don't understand what this actually does + if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY): + return wrapped(*args, **kwargs) + + return_value = None + + with tracer.start_as_current_span( + wrap_config.trace_name, + kind=wrap_config.span_kind, + ) as span: + try: + # Add the input attributes to the span before execution + attributes = handler(args=args, kwargs=kwargs) + _update_span(span, attributes) + + return_value = await wrapped(*args, **kwargs) + + # Add the output attributes to the span after execution + attributes = handler(return_value=return_value) + _update_span(span, attributes) + _finish_span_success(span) + except Exception as e: + # Add everything we have in the case of an error + attributes = handler(args=args, kwargs=kwargs, return_value=return_value) + _update_span(span, attributes) + _finish_span_error(span, e) + raise + + return return_value + def wrapper(wrapped, instance, args, kwargs): # Skip instrumentation if it's suppressed in the current context # TODO I don't understand what this actually does if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY): return wrapped(*args, **kwargs) - + return_value = None - + with tracer.start_as_current_span( wrap_config.trace_name, kind=wrap_config.span_kind, @@ -113,9 +150,9 @@ def wrapper(wrapped, instance, args, kwargs): # Add the input attributes to the span before execution attributes = handler(args=args, kwargs=kwargs) _update_span(span, attributes) - + return_value = wrapped(*args, **kwargs) - + # Add the output attributes to the span after execution attributes = handler(return_value=return_value) _update_span(span, attributes) @@ -126,22 +163,25 @@ def wrapper(wrapped, instance, args, kwargs): _update_span(span, attributes) _finish_span_error(span, e) raise - + return return_value - - return wrapper + + if wrap_config.is_async: + return awrapper + else: + return wrapper -def wrap(wrap_config: WrapConfig, tracer: Tracer): +def wrap(wrap_config: WrapConfig, tracer: Tracer) -> Callable: """Wrap a method with OpenTelemetry instrumentation. - + This function applies the wrapper created by _create_wrapper to the method specified in the wrap_config. - + Args: wrap_config: Configuration specifying what to wrap and how tracer: The OpenTelemetry tracer to use for creating spans - + Returns: The result of wrap_function_wrapper (typically None) """ @@ -154,17 +194,16 @@ def wrap(wrap_config: WrapConfig, tracer: Tracer): def unwrap(wrap_config: WrapConfig): """Remove instrumentation wrapper from a method. - + This function removes the wrapper applied by wrap(). - + Args: wrap_config: Configuration specifying what to unwrap - + Returns: The result of the unwrap operation (typically None) """ return _unwrap( - f"{wrap_config.package}.{wrap_config.class_name}", - wrap_config.method_name, + f"{wrap_config.package}.{wrap_config.class_name}", + wrap_config.method_name, ) - diff --git a/agentops/instrumentation/openai/attributes/response.py b/agentops/instrumentation/openai/attributes/response.py index 98cc9b34d..3716f37c8 100644 --- a/agentops/instrumentation/openai/attributes/response.py +++ b/agentops/instrumentation/openai/attributes/response.py @@ -8,34 +8,90 @@ ) from agentops.instrumentation.common.attributes import ( AttributeMap, + IndexedAttributeMap, _extract_attributes_from_mapping, + _extract_attributes_from_mapping_with_index, ) try: from openai.types import Reasoning - from openai.types.beta import FunctionTool # TODO beta will likely change from openai.types.responses import ( + FunctionTool, + WebSearchTool, + FileSearchTool, + ComputerTool, + Response, ResponseUsage, - ResponseOutputMessage, - ResponseOutputText, ResponseReasoningItem, - ResponseFunctionToolCall, + ResponseInputParam, - # ResponseComputerToolCall, - # ResponseFileSearchToolCall, - # ResponseFunctionWebSearch, # ResponseInputItemParam, + ResponseOutputMessage, + ResponseOutputText, + + ResponseFunctionToolCall, + ResponseFunctionWebSearch, + ResponseFileSearchToolCall, + ResponseComputerToolCall, + # ResponseOutputItem, # ResponseOutputRefusal, # ResponseStreamEvent, ) - from openai.types.responses.response_usage import OutputTokensDetails + from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails + + ToolTypes = Union[ + FunctionTool, + WebSearchTool, + FileSearchTool, + ] + ResponseOutputTypes = Union[ + ResponseOutputMessage, + ResponseOutputText, + ResponseFunctionToolCall, + ResponseFunctionWebSearch, + ResponseComputerToolCall, + ResponseFileSearchToolCall, + ] except ImportError as e: logger.debug(f"[agentops.instrumentation.openai_agents] Could not import OpenAI Agents SDK types: {e}") RESPONSE_ATTRIBUTES: AttributeMap = { + # Response( + # id='resp_67ddd0196a4c81929f7e3783a80f18110b486458d6766f93', + # created_at=1742589977.0, + # error=None, + # incomplete_details=None, + # instructions='You are a helpful assistant...', + # metadata={}, + # model='gpt-4o-2024-08-06', + # object='response', + # output=[ + # ... + # ], + # parallel_tool_calls=True, + # temperature=1.0, + # tool_choice='auto', + # tools=[ + # ...) + # ], + # top_p=1.0, + # max_output_tokens=None, + # previous_response_id=None, + # reasoning=Reasoning( + # ... + # ), + # status='completed', + # text=ResponseTextConfig(format=ResponseFormatText(type='text')), + # truncation='disabled', + # usage=ResponseUsage( + # ... + # ), + # user=None, + # store=True + # ) SpanAttributes.LLM_RESPONSE_ID: "id", SpanAttributes.LLM_REQUEST_MODEL: "model", SpanAttributes.LLM_RESPONSE_MODEL: "model", @@ -46,44 +102,206 @@ } -RESPONSE_TOOLS_ATTRIBUTES: AttributeMap = { - ToolAttributes.TOOL_NAME: "name", - ToolAttributes.TOOL_DESCRIPTION: "description", - ToolAttributes.TOOL_PARAMETERS: "parameters", - # TODO `type` & `strict` are not converted +RESPONSE_TOOL_ATTRIBUTES: IndexedAttributeMap = { + # FunctionTool( + # name='get_weather', + # parameters={'properties': {'location': {'title': 'Location', 'type': 'string'}}, 'required': ['location'], 'title': 'get_weather_args', 'type': 'object', 'additionalProperties': False}, + # strict=True, + # type='function', + # description='Get the current weather for a location.' + # ) + MessageAttributes.TOOL_CALL_TYPE: "type", + MessageAttributes.TOOL_CALL_NAME: "name", + MessageAttributes.TOOL_CALL_DESCRIPTION: "description", + MessageAttributes.TOOL_CALL_ARGUMENTS: "parameters", + # TODO `strict` is not converted } -RESPONSE_OUTPUT_ATTRIBUTES: AttributeMap = { - MessageAttributes.COMPLETION_ID: "id", +RESPONSE_TOOL_WEB_SEARCH_ATTRIBUTES: IndexedAttributeMap = { + # WebSearchTool( + # type='web_search_preview', + # search_context_size='medium', + # user_location=UserLocation( + # type='approximate', + # city=None, + # country='US', + # region=None, + # timezone=None + # ) + # ) + MessageAttributes.TOOL_CALL_NAME: "type", + # `parameters` is added by the `get_response_tool_web_search_attributes` function, + # which contains `search_context_size` and `user_location`. + MessageAttributes.TOOL_CALL_ARGUMENTS: "parameters", +} + + +RESPONSE_TOOL_FILE_SEARCH_ATTRIBUTES: IndexedAttributeMap = { + # FileSearchTool( + # type='file_search', + # vector_store_ids=['store_123', 'store_456'], + # filters=Filters( + # key='value' + # ), + # max_num_results=10, + # ranking_options=RankingOptions( + # ranker='default-2024-11-15', + # score_threshold=0.8 + # ) + # ) + MessageAttributes.TOOL_CALL_TYPE: "type", + # `parameters` is added by the `get_response_tool_file_search_attributes` function, + # which contains `vector_store_ids`, `filters`, `max_num_results`, and `ranking_options`. + MessageAttributes.TOOL_CALL_ARGUMENTS: "parameters", +} + + +RESPONSE_TOOL_COMPUTER_ATTRIBUTES: IndexedAttributeMap = { + # ComputerTool( + # display_height=1080.0, + # display_width=1920.0, + # environment='mac', + # type='computer_use_preview' + # ) + MessageAttributes.TOOL_CALL_TYPE: "type", + # `parameters` is added by the `get_response_tool_computer_attributes` function, + # which contains `display_height`, `display_width`, `environment`, etc. + MessageAttributes.TOOL_CALL_ARGUMENTS: "parameters", } -RESPONSE_OUTPUT_MESSAGE_ATTRIBUTES: AttributeMap = { +RESPONSE_OUTPUT_MESSAGE_ATTRIBUTES: IndexedAttributeMap = { + # ResponseOutputMessage( + # id='msg_67ddcad3b6008192b521035d8b71fc570db7bfce93fd916a', + # content=[ + # ... + # ], + # role='assistant', + # status='completed', + # type='message' + # ) MessageAttributes.COMPLETION_ID: "id", + MessageAttributes.COMPLETION_TYPE: "type", MessageAttributes.COMPLETION_ROLE: "role", MessageAttributes.COMPLETION_FINISH_REASON: "status", - MessageAttributes.COMPLETION_TYPE: "type", } -RESPONSE_OUTPUT_TEXT_ATTRIBUTES: AttributeMap = { +RESPONSE_OUTPUT_TEXT_ATTRIBUTES: IndexedAttributeMap = { + # ResponseOutputText( + # annotations=[], + # text='Recursion is a programming technique ...', + # type='output_text' + # ) + MessageAttributes.COMPLETION_TYPE: "type", MessageAttributes.COMPLETION_CONTENT: "text", + # TODO `annotations` are not converted +} + + +RESPONSE_OUTPUT_REASONING_ATTRIBUTES: IndexedAttributeMap = { + # ResponseReasoningItem( + # id='reasoning_12345', + # summary=[ + # Summary( + # text='The model used a step-by-step approach to solve the problem.', + # type='summary_text' + # ) + # ], + # type='reasoning', + # status='completed' + # ) + MessageAttributes.COMPLETION_ID: "id", + MessageAttributes.COMPLETION_TYPE: "type", + MessageAttributes.COMPLETION_FINISH_REASON: "status", + # TODO `summary` is not converted } -RESPONSE_OUTPUT_TOOL_ATTRIBUTES: AttributeMap = { - MessageAttributes.FUNCTION_CALL_ID: "id", - MessageAttributes.FUNCTION_CALL_NAME: "name", - MessageAttributes.FUNCTION_CALL_ARGUMENTS: "arguments", - MessageAttributes.FUNCTION_CALL_TYPE: "type", +RESPONSE_OUTPUT_TOOL_ATTRIBUTES: IndexedAttributeMap = { + # ResponseFunctionToolCall( + # id='ftc_67ddcad3b6008192b521035d8b71fc570db7bfce93fd916a', + # arguments='{"location": "New York"}', + # call_id='call_12345', + # name='get_weather', + # type='function_call', + # status='completed' + # ) + MessageAttributes.COMPLETION_TOOL_CALL_ID: "id", + MessageAttributes.COMPLETION_TOOL_CALL_TYPE: "type", + MessageAttributes.COMPLETION_TOOL_CALL_NAME: "name", + MessageAttributes.COMPLETION_TOOL_CALL_ARGUMENTS: "arguments", # TODO `status` & `call_id` are not converted } -RESPONSE_OUTPUT_REASONING_ATTRIBUTES: AttributeMap = { - # TODO we don't have semantic conventions for these - # TODO `id`, `summary`, `type`, `status` are not converted +RESPONSE_OUTPUT_TOOL_WEB_SEARCH_ATTRIBUTES: IndexedAttributeMap = { + # ResponseFunctionWebSearch( + # id='ws_67eda37a5f18819280bf8b64f315bfa70091ec39ac46b411', + # status='completed', + # type='web_search_call' + # ) + MessageAttributes.COMPLETION_TOOL_CALL_ID: "id", + MessageAttributes.COMPLETION_TOOL_CALL_TYPE: "type", + MessageAttributes.COMPLETION_TOOL_CALL_STATUS: "status", +} + +RESPONSE_OUTPUT_TOOL_WEB_SEARCH_URL_ANNOTATIONS: IndexedAttributeMap = { + # AnnotationURLCitation( + # end_index=747, + # start_index=553, + # title="You can now play a real-time AI-rendered Quake II in your browser", + # type='url_citation', + # url='https://www.tomshardware.com/video-games/you-can-now-play-a-real-time-ai-rendered-quake-ii-in-your-browser-microsofts-whamm-offers-generative-ai-for-games?utm_source=openai' + # ) + MessageAttributes.COMPLETION_ANNOTATION_END_INDEX: "end_index", + MessageAttributes.COMPLETION_ANNOTATION_START_INDEX: "start_index", + MessageAttributes.COMPLETION_ANNOTATION_TITLE: "title", + MessageAttributes.COMPLETION_ANNOTATION_TYPE: "type", + MessageAttributes.COMPLETION_ANNOTATION_URL: "url", +} + + +RESPONSE_OUTPUT_TOOL_COMPUTER_ATTRIBUTES: IndexedAttributeMap = { + # ResponseComputerToolCall( + # id='comp_12345', + # action=Action( + # type='click', + # target='button_submit' + # ), + # call_id='call_67890', + # pending_safety_checks=[ + # PendingSafetyCheck( + # type='check_type', + # status='pending' + # ) + # ], + # status='completed', + # type='computer_call' + # ) + # TODO semantic conventions for `ResponseComputerToolCall` are not defined yet +} + + +RESPONSE_OUTPUT_TOOL_FILE_SEARCH_ATTRIBUTES: IndexedAttributeMap = { + # ResponseFileSearchToolCall( + # id='fsc_12345', + # queries=['example query'], + # status='completed', + # type='file_search_call', + # results=[ + # Result( + # attributes={'key1': 'value1', 'key2': 42}, + # file_id='file_67890', + # filename='example.txt', + # score=0.95, + # text='Example text retrieved from the file.' + # ), + # ... + # ] + # ) + # TODO semantic conventions for `ResponseFileSearchToolCall` are not defined yet } @@ -102,7 +320,11 @@ RESPONSE_REASONING_ATTRIBUTES: AttributeMap = { - # TODO `effort` and `generate_summary` are not converted + # Reasoning( + # effort='medium', + # generate_summary=None, + # ) + # TODO `effort` and `generate_summary` need semantic conventions } @@ -144,6 +366,7 @@ def get_response_kwarg_attributes(kwargs: dict) -> AttributeMap: if isinstance(_input, str): attributes[MessageAttributes.PROMPT_ROLE.format(i=0)] = "user" attributes[MessageAttributes.PROMPT_CONTENT.format(i=0)] = _input + elif isinstance(_input, list): for i, prompt in enumerate(_input): # Object type is pretty diverse, so we handle common attributes, but do so @@ -154,12 +377,12 @@ def get_response_kwarg_attributes(kwargs: dict) -> AttributeMap: attributes[MessageAttributes.PROMPT_ROLE.format(i=i)] = prompt.role if hasattr(prompt, "content"): attributes[MessageAttributes.PROMPT_CONTENT.format(i=i)] = prompt.content + else: - logger.debug(f"[agentops.instrumentation.openai_agents] '{type(_input)}' is not a recognized input type.") + logger.debug(f"[agentops.instrumentation.openai.response] '{type(_input)}' is not a recognized input type.") # `model` is always `str` (`ChatModel` type is just a string literal) - _model: str = str(kwargs.get("model")) - attributes[SpanAttributes.LLM_REQUEST_MODEL] = _model + attributes[SpanAttributes.LLM_REQUEST_MODEL] = str(kwargs.get("model")) return attributes @@ -168,42 +391,8 @@ def get_response_kwarg_attributes(kwargs: dict) -> AttributeMap: # a return type from the `responses` module def get_response_response_attributes(response: 'Response') -> AttributeMap: """Handles interpretation of an openai Response object.""" - # Response( - # id='resp_67ddd0196a4c81929f7e3783a80f18110b486458d6766f93', - # created_at=1742589977.0, - # error=None, - # incomplete_details=None, - # instructions='You are a helpful assistant...', - # metadata={}, - # model='gpt-4o-2024-08-06', - # object='response', - # output=[ - # ... - # ], - # parallel_tool_calls=True, - # temperature=1.0, - # tool_choice='auto', - # tools=[ - # ...) - # ], - # top_p=1.0, - # max_output_tokens=None, - # previous_response_id=None, - # reasoning=Reasoning( - # ... - # ), - # status='completed', - # text=ResponseTextConfig(format=ResponseFormatText(type='text')), - # truncation='disabled', - # usage=ResponseUsage( - # ... - # ), - # user=None, - # store=True - # ) attributes = _extract_attributes_from_mapping( - response.__dict__, - RESPONSE_ATTRIBUTES) + response.__dict__, RESPONSE_ATTRIBUTES) if response.output: attributes.update(get_response_output_attributes(response.output)) @@ -212,7 +401,8 @@ def get_response_response_attributes(response: 'Response') -> AttributeMap: attributes.update(get_response_tools_attributes(response.tools)) if response.reasoning: - attributes.update(get_response_reasoning_attributes(response.reasoning)) + attributes.update(_extract_attributes_from_mapping( + response.reasoning.__dict__, RESPONSE_REASONING_ATTRIBUTES)) if response.usage: attributes.update(get_response_usage_attributes(response.usage)) @@ -220,128 +410,160 @@ def get_response_response_attributes(response: 'Response') -> AttributeMap: return attributes -def get_response_output_attributes(output: List[Any]) -> AttributeMap: +def get_response_output_attributes(output: List['ResponseOutputTypes']) -> AttributeMap: """Handles interpretation of an openai Response `output` list.""" attributes = {} for i, output_item in enumerate(output): if isinstance(output_item, ResponseOutputMessage): attributes.update(get_response_output_message_attributes(i, output_item)) + elif isinstance(output_item, ResponseReasoningItem): - attributes.update(get_response_output_reasoning_attributes(i, output_item)) + attributes.update(_extract_attributes_from_mapping_with_index( + output_item, RESPONSE_OUTPUT_REASONING_ATTRIBUTES, i)) + elif isinstance(output_item, ResponseFunctionToolCall): - attributes.update(get_response_output_tool_attributes(i, output_item)) + attributes.update(_extract_attributes_from_mapping_with_index( + output_item, RESPONSE_OUTPUT_TOOL_ATTRIBUTES, i=i, j=0)) + + elif isinstance(output_item, ResponseFunctionWebSearch): + attributes.update(_extract_attributes_from_mapping_with_index( + output_item, RESPONSE_OUTPUT_TOOL_WEB_SEARCH_ATTRIBUTES, i=i, j=0)) + + elif isinstance(output_item, ResponseComputerToolCall): + attributes.update(_extract_attributes_from_mapping_with_index( + output_item, RESPONSE_OUTPUT_TOOL_COMPUTER_ATTRIBUTES, i=i, j=0)) + + elif isinstance(output_item, ResponseFileSearchToolCall): + attributes.update(_extract_attributes_from_mapping_with_index( + output_item, RESPONSE_OUTPUT_TOOL_FILE_SEARCH_ATTRIBUTES, i=i, j=0)) + else: - logger.debug(f"[agentops.instrumentation.openai_agents] '{output_item}' is not a recognized output type.") + logger.debug(f"[agentops.instrumentation.openai.response] '{output_item}' is not a recognized output type.") + + return attributes + + +def get_response_output_text_attributes(output_text: 'ResponseOutputText', index: int) -> AttributeMap: + """Handles interpretation of an openai ResponseOutputText object.""" + # This function is a helper to handle the ResponseOutputText type specifically + attributes = _extract_attributes_from_mapping_with_index( + output_text, RESPONSE_OUTPUT_TEXT_ATTRIBUTES, index) + + if hasattr(output_text, "annotations"): + for j, output_text_annotation in enumerate(output_text.annotations): + attributes.update( + _extract_attributes_from_mapping_with_index( + output_text_annotation, RESPONSE_OUTPUT_TOOL_WEB_SEARCH_URL_ANNOTATIONS, i=index, j=j + ) + ) return attributes def get_response_output_message_attributes(index: int, message: 'ResponseOutputMessage') -> AttributeMap: """Handles interpretation of an openai ResponseOutputMessage object.""" - # ResponseOutputMessage( - # id='msg_67ddcad3b6008192b521035d8b71fc570db7bfce93fd916a', - # content=[ - # ... - # ], - # role='assistant', - # status='completed', - # type='message' - # ) - attributes = {} - - for attribute, lookup in RESPONSE_OUTPUT_MESSAGE_ATTRIBUTES.items(): - if hasattr(message, lookup): - attributes[attribute.format(i=index)] = safe_serialize(getattr(message, lookup)) + attributes = _extract_attributes_from_mapping_with_index( + message, RESPONSE_OUTPUT_MESSAGE_ATTRIBUTES, index) if message.content: for i, content in enumerate(message.content): if isinstance(content, ResponseOutputText): - attributes.update(get_response_output_text_attributes(i, content)) + attributes.update(get_response_output_text_attributes(content, i)) + else: - logger.debug(f"[agentops.instrumentation.openai_agents] '{content}' is not a recognized content type.") + logger.debug(f"[agentops.instrumentation.openai.response] '{content}' is not a recognized content type.") return attributes -def get_response_output_text_attributes(index: int, content: 'ResponseOutputText') -> AttributeMap: - """Handles interpretation of an openai ResponseOutputText object.""" - # ResponseOutputText( - # annotations=[], - # text='Recursion is a programming technique ...', - # type='output_text' - # ) +def get_response_tools_attributes(tools: List['ToolTypes']) -> AttributeMap: + """Handles interpretation of openai Response `tools` list.""" attributes = {} - for attribute, lookup in RESPONSE_OUTPUT_TEXT_ATTRIBUTES.items(): - if hasattr(content, lookup): - attributes[attribute.format(i=index)] = safe_serialize(getattr(content, lookup)) + for i, tool in enumerate(tools): + if isinstance(tool, FunctionTool): + attributes.update(_extract_attributes_from_mapping_with_index( + tool, RESPONSE_TOOL_ATTRIBUTES, i)) + + elif isinstance(tool, WebSearchTool): + attributes.update(get_response_tool_web_search_attributes(tool, i)) + + elif isinstance(tool, FileSearchTool): + attributes.update(get_response_tool_file_search_attributes(tool, i)) + + elif isinstance(tool, ComputerTool): + attributes.update(get_response_tool_computer_attributes(tool, i)) + + else: + logger.debug(f"[agentops.instrumentation.openai.response] '{tool}' is not a recognized tool type.") return attributes -def get_response_output_reasoning_attributes(index: int, output: 'ResponseReasoningItem') -> AttributeMap: - """Handles interpretation of an openai ResponseReasoningItem object.""" - # Reasoning( - # effort=None, - # generate_summary=None - # ) - attributes = {} +def get_response_tool_web_search_attributes(tool: 'WebSearchTool', index: int) -> AttributeMap: + """Handles interpretation of an openai WebSearchTool object.""" + parameters = {} + if hasattr(tool, 'search_context_size'): + parameters['search_context_size'] = tool.search_context_size - for attribute, lookup in RESPONSE_OUTPUT_REASONING_ATTRIBUTES.items(): - if hasattr(output, lookup): - attributes[attribute.format(i=index)] = safe_serialize(getattr(output, lookup)) + if hasattr(tool, 'user_location'): + parameters['user_location'] = tool.user_location.__dict__ - return attributes + tool_data = tool.__dict__ + if parameters: + # add parameters to the tool_data dict so we can format them with the other attributes + tool_data['parameters'] = parameters + + return _extract_attributes_from_mapping_with_index( + tool_data, RESPONSE_TOOL_WEB_SEARCH_ATTRIBUTES, index) -def get_response_output_tool_attributes(index: int, output: 'ResponseFunctionToolCall') -> AttributeMap: - """Handles interpretation of an openai ResponseFunctionToolCall object.""" - # FunctionTool( - # name='get_weather', - # parameters={'properties': {'location': {'title': 'Location', 'type': 'string'}}, 'required': ['location'], 'title': 'get_weather_args', 'type': 'object', 'additionalProperties': False}, - # strict=True, - # type='function', - # description='Get the current weather for a location.' - # ) - attributes = {} +def get_response_tool_file_search_attributes(tool: 'FileSearchTool', index: int) -> AttributeMap: + """Handles interpretation of an openai FileSearchTool object.""" + parameters = {} - for attribute, lookup in RESPONSE_OUTPUT_TOOL_ATTRIBUTES.items(): - if hasattr(output, lookup): - attributes[attribute.format(i=index)] = safe_serialize(getattr(output, lookup)) + if hasattr(tool, 'vector_store_ids'): + parameters['vector_store_ids'] = tool.vector_store_ids - return attributes + if hasattr(tool, 'filters'): + parameters['filters'] = tool.filters.__dict__ + + if hasattr(tool, 'max_num_results'): + parameters['max_num_results'] = tool.max_num_results + + if hasattr(tool, 'ranking_options'): + parameters['ranking_options'] = tool.ranking_options.__dict__ + + tool_data = tool.__dict__ + if parameters: + # add parameters to the tool_data dict so we can format them with the other attributes + tool_data['parameters'] = parameters + + return _extract_attributes_from_mapping_with_index( + tool_data, RESPONSE_TOOL_FILE_SEARCH_ATTRIBUTES, index) -def get_response_tools_attributes(tools: List[Any]) -> AttributeMap: - """Handles interpretation of openai Response `tools` list.""" - # FunctionTool( - # name='get_weather', - # parameters={'properties': {'location': {'title': 'Location', 'type': 'string'}}, 'required': ['location'], 'title': 'get_weather_args', 'type': 'object', 'additionalProperties': False}, - # strict=True, - # type='function', - # description='Get the current weather for a location.' - # ) - attributes = {} +def get_response_tool_computer_attributes(tool: 'ComputerTool', index: int) -> AttributeMap: + """Handles interpretation of an openai ComputerTool object.""" + parameters = {} - for i, tool in enumerate(tools): - if isinstance(tool, FunctionTool): - # FunctionTool( - # name='get_weather', - # parameters={'properties': {'location': {'title': 'Location', 'type': 'string'}}, 'required': ['location'], 'title': 'get_weather_args', 'type': 'object', 'additionalProperties': False}, - # strict=True, - # type='function', - # description='Get the current weather for a location.' - # ) - for attribute, lookup in RESPONSE_TOOLS_ATTRIBUTES.items(): - if not hasattr(tool, lookup): - continue - - attributes[attribute.format(i=i)] = safe_serialize(getattr(tool, lookup)) - else: - logger.debug(f"[agentops.instrumentation.openai_agents] '{tool}' is not a recognized tool type.") + if hasattr(tool, 'display_height'): + parameters['display_height'] = tool.display_height - return attributes + if hasattr(tool, 'display_width'): + parameters['display_width'] = tool.display_width + + if hasattr(tool, 'environment'): + parameters['environment'] = tool.environment + + tool_data = tool.__dict__ + if parameters: + # add parameters to the tool_data dict so we can format them with the other attributes + tool_data['parameters'] = parameters + + return _extract_attributes_from_mapping_with_index( + tool_data, RESPONSE_TOOL_COMPUTER_ATTRIBUTES, index) def get_response_usage_attributes(usage: 'ResponseUsage') -> AttributeMap: @@ -359,35 +581,34 @@ def get_response_usage_attributes(usage: 'ResponseUsage') -> AttributeMap: usage.__dict__, RESPONSE_USAGE_ATTRIBUTES)) - # input_tokens_details is a dict if it exists + # input_tokens_details is an `InputTokensDetails` object or `dict` if it exists if hasattr(usage, 'input_tokens_details'): input_details = usage.input_tokens_details - if input_details and isinstance(input_details, dict): + if input_details is None: + pass + + elif isinstance(input_details, InputTokensDetails): + attributes.update(_extract_attributes_from_mapping( + input_details.__dict__, RESPONSE_USAGE_DETAILS_ATTRIBUTES)) + + elif isinstance(input_details, dict): # openai-agents often returns a dict for some reason. attributes.update(_extract_attributes_from_mapping( - input_details, - RESPONSE_USAGE_DETAILS_ATTRIBUTES)) + input_details, RESPONSE_USAGE_DETAILS_ATTRIBUTES)) + else: - logger.debug(f"[agentops.instrumentation.openai_agents] '{input_details}' is not a recognized input details type.") + logger.debug(f"[agentops.instrumentation.openai.response] '{input_details}' is not a recognized input details type.") # output_tokens_details is an `OutputTokensDetails` object output_details = usage.output_tokens_details - if output_details and isinstance(output_details, OutputTokensDetails): + if output_details is None: + pass + + elif isinstance(output_details, OutputTokensDetails): attributes.update(_extract_attributes_from_mapping( - output_details.__dict__, - RESPONSE_USAGE_DETAILS_ATTRIBUTES)) + output_details.__dict__, RESPONSE_USAGE_DETAILS_ATTRIBUTES)) + else: - logger.debug(f"[agentops.instrumentation.openai_agents] '{output_details}' is not a recognized output details type.") + logger.debug(f"[agentops.instrumentation.openai.response] '{output_details}' is not a recognized output details type.") return attributes - -def get_response_reasoning_attributes(reasoning: 'Reasoning') -> AttributeMap: - """Handles interpretation of an openai Reasoning object.""" - # Reasoning( - # effort='medium', - # generate_summary=None, - # ) - return _extract_attributes_from_mapping( - reasoning.__dict__, - RESPONSE_REASONING_ATTRIBUTES) - diff --git a/agentops/instrumentation/openai/attributes/tools.py b/agentops/instrumentation/openai/attributes/tools.py new file mode 100644 index 000000000..e69de29bb diff --git a/agentops/instrumentation/openai/instrumentor.py b/agentops/instrumentation/openai/instrumentor.py index 9d6ea798b..da312cd3d 100644 --- a/agentops/instrumentation/openai/instrumentor.py +++ b/agentops/instrumentation/openai/instrumentor.py @@ -22,7 +22,7 @@ 2. Extract data from both the request parameters and response object 3. Create spans with appropriate attributes for observability """ -from typing import List, Collection +from typing import List from opentelemetry.trace import get_tracer from opentelemetry.instrumentation.openai.v1 import OpenAIV1Instrumentor as ThirdPartyOpenAIV1Instrumentor @@ -40,13 +40,14 @@ class_name="Responses", method_name="create", handler=get_response_attributes, - ), + ), WrapConfig( trace_name="openai.responses.create", package="openai.resources.responses", class_name="AsyncResponses", method_name="create", handler=get_response_attributes, + is_async=True, ), ] @@ -62,35 +63,34 @@ class OpenAIInstrumentor(ThirdPartyOpenAIV1Instrumentor): def _instrument(self, **kwargs): """Instrument the OpenAI API, extending the third-party instrumentation. - - This implementation calls the parent _instrument method to handle + + This implementation calls the parent _instrument method to handle standard OpenAI API endpoints, then adds our own instrumentation for the responses module. """ super()._instrument(**kwargs) - + tracer_provider = kwargs.get("tracer_provider") tracer = get_tracer(LIBRARY_NAME, LIBRARY_VERSION, tracer_provider) - + for wrap_config in WRAPPED_METHODS: try: wrap(wrap_config, tracer) logger.debug(f"Successfully wrapped {wrap_config}") except (AttributeError, ModuleNotFoundError) as e: logger.debug(f"Failed to wrap {wrap_config}: {e}") - + logger.debug("Successfully instrumented OpenAI API with Response extensions") def _uninstrument(self, **kwargs): """Remove instrumentation from OpenAI API.""" super()._uninstrument(**kwargs) - + for wrap_config in WRAPPED_METHODS: try: unwrap(wrap_config) logger.debug(f"Successfully unwrapped {wrap_config}") except Exception as e: logger.debug(f"Failed to unwrap {wrap_config}: {e}") - - logger.debug("Successfully removed OpenAI API instrumentation with Response extensions") + logger.debug("Successfully removed OpenAI API instrumentation with Response extensions") diff --git a/agentops/instrumentation/openai_agents/README.md b/agentops/instrumentation/openai_agents/README.md index 6f7ecbcf7..56e518429 100644 --- a/agentops/instrumentation/openai_agents/README.md +++ b/agentops/instrumentation/openai_agents/README.md @@ -132,7 +132,7 @@ AGENT_SPAN_ATTRIBUTES: AttributeMap = { - Always use MessageAttributes semantic conventions for content and tool calls - For chat completions, use MessageAttributes.COMPLETION_CONTENT.format(i=0) -- For tool calls, use MessageAttributes.TOOL_CALL_NAME.format(i=0, j=0), etc. +- For tool calls, use MessageAttributes.COMPLETION_TOOL_CALL_NAME.format(i=0, j=0), etc. - Never try to combine or aggregate contents into a single attribute - Each message component should have its own properly formatted attribute - This ensures proper display in OpenTelemetry backends and dashboards diff --git a/agentops/instrumentation/openai_agents/attributes/common.py b/agentops/instrumentation/openai_agents/attributes/common.py index de012d164..8714f27ab 100644 --- a/agentops/instrumentation/openai_agents/attributes/common.py +++ b/agentops/instrumentation/openai_agents/attributes/common.py @@ -6,12 +6,7 @@ """ from typing import Any from agentops.logging import logger -from agentops.semconv import ( - AgentAttributes, - WorkflowAttributes, - SpanAttributes, - InstrumentationAttributes -) +from agentops.semconv import AgentAttributes, WorkflowAttributes, SpanAttributes, InstrumentationAttributes from agentops.instrumentation.common import AttributeMap, _extract_attributes_from_mapping from agentops.instrumentation.common.attributes import get_common_attributes @@ -19,13 +14,12 @@ from agentops.instrumentation.openai.attributes.response import get_response_response_attributes from agentops.instrumentation.openai_agents import LIBRARY_NAME, LIBRARY_VERSION from agentops.instrumentation.openai_agents.attributes.model import ( - get_model_attributes, - get_model_config_attributes, + get_model_attributes, + get_model_config_attributes, ) from agentops.instrumentation.openai_agents.attributes.completion import get_generation_output_attributes - # Attribute mapping for AgentSpanData AGENT_SPAN_ATTRIBUTES: AttributeMap = { AgentAttributes.AGENT_NAME: "name", @@ -87,86 +81,87 @@ def get_common_instrumentation_attributes() -> AttributeMap: """Get common instrumentation attributes for the OpenAI Agents instrumentation. - + This combines the generic AgentOps attributes with OpenAI Agents specific library attributes. - + Returns: Dictionary of common instrumentation attributes """ attributes = get_common_attributes() - attributes.update({ - InstrumentationAttributes.LIBRARY_NAME: LIBRARY_NAME, - InstrumentationAttributes.LIBRARY_VERSION: LIBRARY_VERSION, - }) + attributes.update( + { + InstrumentationAttributes.LIBRARY_NAME: LIBRARY_NAME, + InstrumentationAttributes.LIBRARY_VERSION: LIBRARY_VERSION, + } + ) return attributes def get_agent_span_attributes(span_data: Any) -> AttributeMap: """Extract attributes from an AgentSpanData object. - + Agents are requests made to the `openai.agents` endpoint. - + Args: span_data: The AgentSpanData object - + Returns: Dictionary of attributes for agent span """ attributes = _extract_attributes_from_mapping(span_data, AGENT_SPAN_ATTRIBUTES) attributes.update(get_common_attributes()) - + return attributes def get_function_span_attributes(span_data: Any) -> AttributeMap: """Extract attributes from a FunctionSpanData object. - + Functions are requests made to the `openai.functions` endpoint. - + Args: span_data: The FunctionSpanData object - + Returns: Dictionary of attributes for function span """ attributes = _extract_attributes_from_mapping(span_data, FUNCTION_SPAN_ATTRIBUTES) attributes.update(get_common_attributes()) - + return attributes def get_handoff_span_attributes(span_data: Any) -> AttributeMap: """Extract attributes from a HandoffSpanData object. - + Handoffs are requests made to the `openai.handoffs` endpoint. - + Args: span_data: The HandoffSpanData object - + Returns: Dictionary of attributes for handoff span """ attributes = _extract_attributes_from_mapping(span_data, HANDOFF_SPAN_ATTRIBUTES) attributes.update(get_common_attributes()) - - return attributes + return attributes def get_response_span_attributes(span_data: Any) -> AttributeMap: """Extract attributes from a ResponseSpanData object with full LLM response processing. - - Responses are requests made to the `openai.responses` endpoint. - + + Responses are requests made to the `openai.responses` endpoint. + This function extracts not just the basic input/response mapping but also processes the rich response object to extract LLM-specific attributes like token usage, model information, content, etc. - - TODO tool calls arrive from this span type; need to figure out why that is. - + + TODO tool calls arrive from this span type; need to figure out why that is. + Args: span_data: The ResponseSpanData object - + Returns: Dictionary of attributes for response span """ @@ -176,49 +171,49 @@ def get_response_span_attributes(span_data: Any) -> AttributeMap: if span_data.response: attributes.update(get_response_response_attributes(span_data.response)) - + return attributes def get_generation_span_attributes(span_data: Any) -> AttributeMap: """Extract attributes from a GenerationSpanData object. - + Generations are requests made to the `openai.completions` endpoint. - - # TODO this has not been extensively tested yet as there is a flag that needs ot be set to use the - # completions API with the Agents SDK. + + # TODO this has not been extensively tested yet as there is a flag that needs ot be set to use the + # completions API with the Agents SDK. # We can enable chat.completions API by calling: # `from agents import set_default_openai_api` # `set_default_openai_api("chat_completions")` - + Args: span_data: The GenerationSpanData object - + Returns: Dictionary of attributes for generation span """ attributes = _extract_attributes_from_mapping(span_data, GENERATION_SPAN_ATTRIBUTES) attributes.update(get_common_attributes()) - + if span_data.model: attributes.update(get_model_attributes(span_data.model)) - + # Process output for GenerationSpanData if available if span_data.output: attributes.update(get_generation_output_attributes(span_data.output)) - + # Add model config attributes if present if span_data.model_config: attributes.update(get_model_config_attributes(span_data.model_config)) - + return attributes def get_transcription_span_attributes(span_data: Any) -> AttributeMap: """Extract attributes from a TranscriptionSpanData object. - + This represents a conversion from audio to text. - + Args: span_data: The TranscriptionSpanData object Returns: @@ -226,90 +221,90 @@ def get_transcription_span_attributes(span_data: Any) -> AttributeMap: """ from agentops import get_client from agentops.client.api.types import UploadedObjectResponse - + client = get_client() - + attributes = _extract_attributes_from_mapping(span_data, TRANSCRIPTION_SPAN_ATTRIBUTES) attributes.update(get_common_attributes()) - + if span_data.input: prefix = WorkflowAttributes.WORKFLOW_INPUT uploaded_object: UploadedObjectResponse = client.api.v4.upload_object(span_data.input) attributes.update(get_uploaded_object_attributes(uploaded_object, prefix)) - + if span_data.model: attributes.update(get_model_attributes(span_data.model)) - + if span_data.model_config: attributes.update(get_model_config_attributes(span_data.model_config)) - + return attributes def get_speech_span_attributes(span_data: Any) -> AttributeMap: """Extract attributes from a SpeechSpanData object. - + This represents a conversion from audio to text. - + Args: span_data: The SpeechSpanData object - + Returns: Dictionary of attributes for speech span """ from agentops import get_client from agentops.client.api.types import UploadedObjectResponse - + client = get_client() - + attributes = _extract_attributes_from_mapping(span_data, SPEECH_SPAN_ATTRIBUTES) attributes.update(get_common_attributes()) - + if span_data.output: prefix = WorkflowAttributes.WORKFLOW_OUTPUT uploaded_object: UploadedObjectResponse = client.api.v4.upload_object(span_data.output) attributes.update(get_uploaded_object_attributes(uploaded_object, prefix)) - + if span_data.model: attributes.update(get_model_attributes(span_data.model)) - + if span_data.model_config: attributes.update(get_model_config_attributes(span_data.model_config)) - + return attributes def get_speech_group_span_attributes(span_data: Any) -> AttributeMap: """Extract attributes from a SpeechGroupSpanData object. - + This represents a conversion from audio to text. - + Args: span_data: The SpeechGroupSpanData object - + Returns: Dictionary of attributes for speech group span """ attributes = _extract_attributes_from_mapping(span_data, SPEECH_GROUP_SPAN_ATTRIBUTES) attributes.update(get_common_attributes()) - + return attributes def get_span_attributes(span_data: Any) -> AttributeMap: """Get attributes for a span based on its type. - + This function centralizes attribute extraction by delegating to type-specific getter functions. - + Args: span_data: The span data object - + Returns: Dictionary of attributes for the span """ span_type = span_data.__class__.__name__ - + if span_type == "AgentSpanData": attributes = get_agent_span_attributes(span_data) elif span_type == "FunctionSpanData": @@ -329,7 +324,5 @@ def get_span_attributes(span_data: Any) -> AttributeMap: else: logger.debug(f"[agentops.instrumentation.openai_agents.attributes] Unknown span type: {span_type}") attributes = {} - - return attributes - + return attributes diff --git a/agentops/instrumentation/openai_agents/attributes/completion.py b/agentops/instrumentation/openai_agents/attributes/completion.py index 01ace15a1..df5adf0e2 100644 --- a/agentops/instrumentation/openai_agents/attributes/completion.py +++ b/agentops/instrumentation/openai_agents/attributes/completion.py @@ -111,9 +111,9 @@ def get_raw_response_attributes(response: Dict[str, Any]) -> Dict[str, Any]: # Handle function format if "function" in tool_call and isinstance(tool_call["function"], dict): function = tool_call["function"] - result[MessageAttributes.TOOL_CALL_ID.format(i=j, j=k)] = tool_id - result[MessageAttributes.TOOL_CALL_NAME.format(i=j, j=k)] = function.get("name", "") - result[MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=j, j=k)] = function.get("arguments", "") + result[MessageAttributes.COMPLETION_TOOL_CALL_ID.format(i=j, j=k)] = tool_id + result[MessageAttributes.COMPLETION_TOOL_CALL_NAME.format(i=j, j=k)] = function.get("name", "") + result[MessageAttributes.COMPLETION_TOOL_CALL_ARGUMENTS.format(i=j, j=k)] = function.get("arguments", "") return result @@ -154,14 +154,14 @@ def get_chat_completions_attributes(response: Dict[str, Any]) -> Dict[str, Any]: for j, tool_call in enumerate(tool_calls): if "function" in tool_call: function = tool_call["function"] - result[MessageAttributes.TOOL_CALL_ID.format(i=i, j=j)] = tool_call.get("id") - result[MessageAttributes.TOOL_CALL_NAME.format(i=i, j=j)] = function.get("name") - result[MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=i, j=j)] = function.get("arguments") + result[MessageAttributes.COMPLETION_TOOL_CALL_ID.format(i=i, j=j)] = tool_call.get("id") + result[MessageAttributes.COMPLETION_TOOL_CALL_NAME.format(i=i, j=j)] = function.get("name") + result[MessageAttributes.COMPLETION_TOOL_CALL_ARGUMENTS.format(i=i, j=j)] = function.get("arguments") if "function_call" in message and message["function_call"] is not None: function_call = message["function_call"] - result[MessageAttributes.FUNCTION_CALL_NAME.format(i=i)] = function_call.get("name") - result[MessageAttributes.FUNCTION_CALL_ARGUMENTS.format(i=i)] = function_call.get("arguments") + result[MessageAttributes.COMPLETION_TOOL_CALL_NAME.format(i=i)] = function_call.get("name") + result[MessageAttributes.COMPLETION_TOOL_CALL_ARGUMENTS.format(i=i)] = function_call.get("arguments") return result diff --git a/agentops/semconv/message.py b/agentops/semconv/message.py index d31b17f3d..9cb775163 100644 --- a/agentops/semconv/message.py +++ b/agentops/semconv/message.py @@ -8,21 +8,32 @@ class MessageAttributes: PROMPT_CONTENT = "gen_ai.prompt.{i}.content" # Content of the prompt message PROMPT_TYPE = "gen_ai.prompt.{i}.type" # Type of the prompt message + # Indexed function calls (with {i} for interpolation) + TOOL_CALL_ID = "gen_ai.request.tools.{i}.id" # Unique identifier for the function call at index {i} + TOOL_CALL_TYPE = "gen_ai.request.tools.{i}.type" # Type of the function call at index {i} + TOOL_CALL_NAME = "gen_ai.request.tools.{i}.name" # Name of the function call at index {i} + TOOL_CALL_DESCRIPTION = "gen_ai.request.tools.{i}.description" # Description of the function call at index {i} + TOOL_CALL_ARGUMENTS = "gen_ai.request.tools.{i}.arguments" # Arguments for function call at index {i} + # Indexed completions (with {i} for interpolation) COMPLETION_ID = "gen_ai.completion.{i}.id" # Unique identifier for the completion - + COMPLETION_TYPE = "gen_ai.completion.{i}.type" # Type of the completion at index {i} COMPLETION_ROLE = "gen_ai.completion.{i}.role" # Role of the completion message at index {i} COMPLETION_CONTENT = "gen_ai.completion.{i}.content" # Content of the completion message at index {i} COMPLETION_FINISH_REASON = "gen_ai.completion.{i}.finish_reason" # Finish reason for completion at index {i} - COMPLETION_TYPE = "gen_ai.completion.{i}.type" # Type of the completion at index {i} - - # Indexed function calls (with {i} for interpolation) - FUNCTION_CALL_ID = "gen_ai.request.tools.{i}.id" # Unique identifier for the function call at index {i} - FUNCTION_CALL_NAME = "gen_ai.request.tools.{i}.name" # Name of the function call at index {i} - FUNCTION_CALL_ARGUMENTS = "gen_ai.request.tools.{i}.arguments" # Arguments for function call at index {i} - FUNCTION_CALL_TYPE = "gen_ai.request.tools.{i}.type" # Type of the function call at index {i} # Indexed tool calls (with {i}/{j} for nested interpolation) - TOOL_CALL_ID = "gen_ai.completion.{i}.tool_calls.{j}.id" # ID of tool call {j} in completion {i} - TOOL_CALL_NAME = "gen_ai.completion.{i}.tool_calls.{j}.name" # Name of the tool called in tool call {j} in completion {i} - TOOL_CALL_ARGUMENTS = "gen_ai.completion.{i}.tool_calls.{j}.arguments" # Arguments for tool call {j} in completion {i} \ No newline at end of file + COMPLETION_TOOL_CALL_ID = "gen_ai.completion.{i}.tool_calls.{j}.id" # ID of tool call {j} in completion {i} + COMPLETION_TOOL_CALL_TYPE = "gen_ai.completion.{i}.tool_calls.{j}.type" # Type of tool call {j} in completion {i} + COMPLETION_TOOL_CALL_STATUS = "gen_ai.completion.{i}.tool_calls.{j}.status" # Status of tool call {j} in completion {i} + COMPLETION_TOOL_CALL_NAME = "gen_ai.completion.{i}.tool_calls.{j}.name" # Name of the tool called in tool call {j} in completion {i} + COMPLETION_TOOL_CALL_DESCRIPTION = "gen_ai.completion.{i}.tool_calls.{j}.description" # Description of the tool call {j} in completion {i} + COMPLETION_TOOL_CALL_STATUS = "gen_ai.completion.{i}.tool_calls.{j}.status" # Status of the tool call {j} in completion {i} + COMPLETION_TOOL_CALL_ARGUMENTS = "gen_ai.completion.{i}.tool_calls.{j}.arguments" # Arguments for tool call {j} in completion {i} + + # Indexed annotations of the internal tools (with {i}/{j} for nested interpolation) + COMPLETION_ANNOTATION_START_INDEX = "gen_ai.completion.{i}.annotations.{j}.start_index" # Start index of the URL annotation {j} in completion {i} + COMPLETION_ANNOTATION_END_INDEX = "gen_ai.completion.{i}.annotations.{j}.end_index" # End index of the URL annotation {j} in completion {i} + COMPLETION_ANNOTATION_TITLE = "gen_ai.completion.{i}.annotations.{j}.title" # Title of the URL annotation {j} in completion {i} + COMPLETION_ANNOTATION_TYPE = "gen_ai.completion.{i}.annotations.{j}.type" # Type of the URL annotation {j} in completion {i} + COMPLETION_ANNOTATION_URL = "gen_ai.completion.{i}.annotations.{j}.url" # URL link of the URL annotation {j} in completion {i} \ No newline at end of file diff --git a/examples/openai_responses/multi_tool_orchestration.ipynb b/examples/openai_responses/multi_tool_orchestration.ipynb new file mode 100644 index 000000000..6a36e9199 --- /dev/null +++ b/examples/openai_responses/multi_tool_orchestration.ipynb @@ -0,0 +1,614 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multi-Tool Orchestration with RAG approach using OpenAI's Responses API" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "This cookbook guides you through building dynamic, multi-tool workflows using OpenAI's Responses API. It demonstrates how to implement a Retrieval-Augmented Generation (RAG) approach that intelligently routes user queries to the appropriate in-built or external tools. Whether your query calls for general knowledge or requires accessing specific internal context from a vector database (like Pinecone), this guide shows you how to integrate function calls, web searches in-built tool, and leverage document retrieval to generate accurate, context-aware responses." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install required dependencies\n", + "%pip install datasets tqdm pandas pinecone openai" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from dotenv import load_dotenv\n", + "import time\n", + "from tqdm.auto import tqdm\n", + "from pandas import DataFrame\n", + "from datasets import load_dataset\n", + "import random\n", + "import string\n", + "\n", + "# Import Pinecone client and related specifications.\n", + "from pinecone import Pinecone\n", + "from pinecone import ServerlessSpec" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "load_dotenv()\n", + "\n", + "AGENTOPS_API_KEY = os.getenv(\"AGENTOPS_API_KEY\")\n", + "AGENTOPS_API_ENDPOINT = os.getenv(\"AGENTOPS_API_ENDPOINT\")\n", + "AGENTOPS_EXPORTER_ENDPOINT = os.getenv(\"AGENTOPS_EXPORTER_ENDPOINT\")\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "PINECONE_API_KEY = os.getenv(\"PINECONE_API_KEY\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import AgentOps client and initialize with your API key.\n", + "import agentops\n", + "\n", + "agentops.init(\n", + " api_key=AGENTOPS_API_KEY,\n", + " endpoint=AGENTOPS_API_ENDPOINT,\n", + " exporter_endpoint=AGENTOPS_EXPORTER_ENDPOINT,\n", + " tags=[\"openai\", \"responses\", \"multi-tool\", \"rag\", \"test\"],\n", + ")\n", + "\n", + "# Import OpenAI client and initialize with your API key.\n", + "from openai import OpenAI\n", + "\n", + "client = OpenAI(api_key=OPENAI_API_KEY)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example we use a sample medical reasoning dataset from Hugging Face. We convert the dataset into a Pandas DataFrame and merge the “Question” and “Response” columns into a single string. This merged text is used for embedding and later stored as metadata." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the dataset (ensure you're logged in with huggingface-cli if needed)\n", + "ds = load_dataset(\"FreedomIntelligence/medical-o1-reasoning-SFT\", \"en\", split='train[:100]', trust_remote_code=True)\n", + "ds_dataframe = DataFrame(ds)\n", + "\n", + "# Merge the Question and Response columns into a single string.\n", + "ds_dataframe['merged'] = ds_dataframe.apply(\n", + " lambda row: f\"Question: {row['Question']} Answer: {row['Response']}\", axis=1\n", + ")\n", + "print(\"Example merged text:\", ds_dataframe['merged'].iloc[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds_dataframe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a Pinecone Index Based on the Dataset\n", + "Use the dataset itself to determine the embedding dimensionality. For example, compute one embedding from the merged column and then create the index accordingly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "MODEL = \"text-embedding-3-small\" # Replace with your production embedding model if needed\n", + "# Compute an embedding for the first document to obtain the embedding dimension.\n", + "sample_embedding_resp = client.embeddings.create(\n", + " input=[ds_dataframe['merged'].iloc[0]],\n", + " model=MODEL\n", + ")\n", + "embed_dim = len(sample_embedding_resp.data[0].embedding)\n", + "print(f\"Embedding dimension: {embed_dim}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize Pinecone using your API key.\n", + "pc = Pinecone(api_key=PINECONE_API_KEY)\n", + "\n", + "# Define the Pinecone serverless specification.\n", + "AWS_REGION = \"us-east-1\"\n", + "spec = ServerlessSpec(cloud=\"aws\", region=AWS_REGION)\n", + "\n", + "# Create a random index name with lower case alphanumeric characters and '-'\n", + "index_name = 'pinecone-index-' + ''.join(random.choices(string.ascii_lowercase + string.digits, k=10))\n", + "\n", + "# Create the index if it doesn't already exist.\n", + "if index_name not in pc.list_indexes().names():\n", + " pc.create_index(\n", + " index_name,\n", + " dimension=embed_dim,\n", + " metric='dotproduct',\n", + " spec=spec\n", + " )\n", + "\n", + "# Connect to the index.\n", + "index = pc.Index(index_name)\n", + "time.sleep(1)\n", + "print(\"Index stats:\", index.describe_index_stats())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Upsert the Dataset into Pinecone index\n", + "\n", + "Process the dataset in batches, generate embeddings for each merged text, prepare metadata (including separate Question and Answer fields), and upsert each batch into the index. You may also update metadata for specific entries if needed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "batch_size = 32\n", + "for i in tqdm(range(0, len(ds_dataframe['merged']), batch_size), desc=\"Upserting to Pinecone\"):\n", + " i_end = min(i + batch_size, len(ds_dataframe['merged']))\n", + " lines_batch = ds_dataframe['merged'][i: i_end]\n", + " ids_batch = [str(n) for n in range(i, i_end)]\n", + " \n", + " # Create embeddings for the current batch.\n", + " res = client.embeddings.create(input=[line for line in lines_batch], model=MODEL)\n", + " embeds = [record.embedding for record in res.data]\n", + " \n", + " # Prepare metadata by extracting original Question and Answer.\n", + " meta = []\n", + " for record in ds_dataframe.iloc[i:i_end].to_dict('records'):\n", + " q_text = record['Question']\n", + " a_text = record['Response']\n", + " # Optionally update metadata for specific entries.\n", + " meta.append({\"Question\": q_text, \"Answer\": a_text})\n", + " \n", + " # Upsert the batch into Pinecone.\n", + " vectors = list(zip(ids_batch, embeds, meta))\n", + " index.upsert(vectors=vectors)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Pinecone Image](../../images/responses_pinecone_rag.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Query the Pinecone Index\n", + "\n", + "Create a natural language query, compute its embedding, and perform a similarity search on the Pinecone index. The returned results include metadata that provides context for generating answers." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def query_pinecone_index(client, index, model, query_text):\n", + " # Generate an embedding for the query.\n", + " query_embedding = client.embeddings.create(input=query_text, model=model).data[0].embedding\n", + "\n", + " # Query the index and return top 5 matches.\n", + " res = index.query(vector=[query_embedding], top_k=5, include_metadata=True)\n", + " print(\"Query Results:\")\n", + " for match in res['matches']:\n", + " print(f\"{match['score']:.2f}: {match['metadata'].get('Question', 'N/A')} - {match['metadata'].get('Answer', 'N/A')}\")\n", + " return res" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Example usage with a different query from the train/test set\n", + "query = (\n", + " \"A 45-year-old man with a history of alcohol use presents with symptoms including confusion, ataxia, and ophthalmoplegia. \"\n", + " \"What is the most likely diagnosis and the recommended treatment?\"\n", + ")\n", + "query_pinecone_index(client, index, MODEL, query)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generate a Response Using the Retrieved Context\n", + "\n", + "Select the best matching result from your query results and use the OpenAI Responses API to generate a final answer by combining the retrieved context with the original question." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Retrieve and concatenate top 3 match contexts.\n", + "matches = index.query(\n", + " vector=[client.embeddings.create(input=query, model=MODEL).data[0].embedding],\n", + " top_k=3,\n", + " include_metadata=True\n", + ")['matches']\n", + "\n", + "context = \"\\n\\n\".join(\n", + " f\"Question: {m['metadata'].get('Question', '')}\\nAnswer: {m['metadata'].get('Answer', '')}\"\n", + " for m in matches\n", + ")\n", + "# Use the context to generate a final answer.\n", + "response = client.responses.create(\n", + " model=\"gpt-4o\",\n", + " input=f\"Provide the answer based on the context: {context} and the question: {query} as per the internal knowledge base\",\n", + ")\n", + "print(\"\\nFinal Answer:\")\n", + "print(response.output_text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Orchestrate Multi-Tool Calls\n", + "\n", + "Now, we'll define the built-in function available through the Responses API, including the ability to invoke the external Vector Store - Pinecone as an example.\n", + "\n", + "*Web Search Preview Tool*: Enables the model to perform live web searches and preview the results. This is ideal for retrieving real-time or up-to-date information from the internet.\n", + "\n", + "*Pinecone Search Tool*: Allows the model to query a vector database using semantic search. This is especially useful for retrieving relevant documents—such as medical literature or other domain-specific content—that have been stored in a vectorized format." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Tools definition: The list of tools includes:\n", + "# - A web search preview tool.\n", + "# - A Pinecone search tool for retrieving medical documents.\n", + "\n", + "# Define available tools.\n", + "tools = [ \n", + " {\"type\": \"web_search_preview\",\n", + " \"user_location\": {\n", + " \"type\": \"approximate\",\n", + " \"country\": \"US\",\n", + " \"region\": \"California\",\n", + " \"city\": \"SF\"\n", + " },\n", + " \"search_context_size\": \"medium\"},\n", + " {\n", + " \"type\": \"function\",\n", + " \"name\": \"PineconeSearchDocuments\",\n", + " \"description\": \"Search for relevant documents based on the medical question asked by the user that is stored within the vector database using a semantic query.\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"query\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The natural language query to search the vector database.\"\n", + " },\n", + " \"top_k\": {\n", + " \"type\": \"integer\",\n", + " \"description\": \"Number of top results to return.\",\n", + " \"default\": 3\n", + " }\n", + " },\n", + " \"required\": [\"query\"],\n", + " \"additionalProperties\": False\n", + " }\n", + " }\n", + "]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# Example queries that the model should route appropriately.\n", + "queries = [\n", + " {\"query\": \"Who won the cricket world cup in 1983?\"},\n", + " {\"query\": \"What is the most common cause of death in the United States according to the internet?\"},\n", + " {\"query\": (\"A 7-year-old boy with sickle cell disease is experiencing knee and hip pain, \"\n", + " \"has been admitted for pain crises in the past, and now walks with a limp. \"\n", + " \"His exam shows a normal, cool hip with decreased range of motion and pain with ambulation. \"\n", + " \"What is the most appropriate next step in management according to the internal knowledge base?\")}\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Process each query dynamically.\n", + "for item in queries:\n", + " input_messages = [{\"role\": \"user\", \"content\": item[\"query\"]}]\n", + " print(\"\\n🌟--- Processing Query ---🌟\")\n", + " print(f\"🔍 **User Query:** {item['query']}\")\n", + " \n", + " # Call the Responses API with tools enabled and allow parallel tool calls.\n", + " response = client.responses.create(\n", + " model=\"gpt-4o\",\n", + " input=[\n", + " {\"role\": \"system\", \"content\": \"When prompted with a question, select the right tool to use based on the question.\"\n", + " },\n", + " {\"role\": \"user\", \"content\": item[\"query\"]}\n", + " ],\n", + " tools=tools,\n", + " parallel_tool_calls=True\n", + " )\n", + " \n", + " print(\"\\n✨ **Initial Response Output:**\")\n", + " print(response.output)\n", + " \n", + " # Determine if a tool call is needed and process accordingly.\n", + " if response.output:\n", + " tool_call = response.output[0]\n", + " if tool_call.type in [\"web_search_preview\", \"function_call\"]:\n", + " tool_name = tool_call.name if tool_call.type == \"function_call\" else \"web_search_preview\"\n", + " print(f\"\\n🔧 **Model triggered a tool call:** {tool_name}\")\n", + " \n", + " if tool_name == \"PineconeSearchDocuments\":\n", + " print(\"🔍 **Invoking PineconeSearchDocuments tool...**\")\n", + " res = query_pinecone_index(client, index, MODEL, item[\"query\"])\n", + " if res[\"matches\"]:\n", + " best_match = res[\"matches\"][0][\"metadata\"]\n", + " result = f\"**Question:** {best_match.get('Question', 'N/A')}\\n**Answer:** {best_match.get('Answer', 'N/A')}\"\n", + " else:\n", + " result = \"**No matching documents found in the index.**\"\n", + " print(\"✅ **PineconeSearchDocuments tool invoked successfully.**\")\n", + " else:\n", + " print(\"🔍 **Invoking simulated web search tool...**\")\n", + " result = \"**Simulated web search result.**\"\n", + " print(\"✅ **Simulated web search tool invoked successfully.**\")\n", + " \n", + " # Append the tool call and its output back into the conversation.\n", + " input_messages.append(tool_call)\n", + " input_messages.append({\n", + " \"type\": \"function_call_output\",\n", + " \"call_id\": tool_call.call_id,\n", + " \"output\": str(result)\n", + " })\n", + " \n", + " # Get the final answer incorporating the tool's result.\n", + " final_response = client.responses.create(\n", + " model=\"gpt-4o\",\n", + " input=input_messages,\n", + " tools=tools,\n", + " parallel_tool_calls=True\n", + " )\n", + " print(\"\\n💡 **Final Answer:**\")\n", + " print(final_response.output_text)\n", + " else:\n", + " # If no tool call is triggered, print the response directly.\n", + " print(\"💡 **Final Answer:**\")\n", + " print(response.output_text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As shown above, depending on the query, appropriate tool is invoked in order to determine the optimal response.\n", + "\n", + "For instance, looking at the third example, when the model triggers the tool named \"PineconeSearchDocuments\", the code calls `query_pinecone_index` with the current query and then extracts the best match (or an appropriate context) as the result. For non health related inqueries or queries where explicit internet search is asked, the code calls the web_search_call function and for other queries, it may choose to not call any tool and rather provide a response based on the question under consideration.\n", + "\n", + "Finally, the tool call and its output are appended to the conversation, and the final answer is generated by the Responses API." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multi-tool orchestration flow\n", + "\n", + "Now let us try to modify the input query and the system instructions to the responses API in order to follow a tool calling sequence and generate the output. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Process one query as an example to understand the tool calls and function calls as part of the response output\n", + "item = \"What is the most common cause of death in the United States\"\n", + "\n", + "# Initialize input messages with the user's query.\n", + "input_messages = [{\"role\": \"user\", \"content\": item}]\n", + "print(\"\\n🌟--- Processing Query ---🌟\")\n", + "print(f\"🔍 **User Query:** {item}\")\n", + " \n", + " # Call the Responses API with tools enabled and allow parallel tool calls.\n", + "print(\"\\n🔧 **Calling Responses API with Tools Enabled**\")\n", + "print(\"\\n🕵️‍♂️ **Step 1: Web Search Call**\")\n", + "print(\" - Initiating web search to gather initial information.\")\n", + "print(\"\\n📚 **Step 2: Pinecone Search Call**\")\n", + "print(\" - Querying Pinecone to find relevant examples from the internal knowledge base.\")\n", + " \n", + "response = client.responses.create(\n", + " model=\"gpt-4o\",\n", + " input=[\n", + " {\"role\": \"system\", \"content\": \"Every time it's prompted with a question, first call the web search tool for results, then call `PineconeSearchDocuments` to find real examples in the internal knowledge base.\"},\n", + " {\"role\": \"user\", \"content\": item}\n", + " ],\n", + " tools=tools,\n", + " parallel_tool_calls=True\n", + " )\n", + " \n", + "# Print the initial response output.\n", + "print(\"input_messages\", input_messages)\n", + "\n", + "print(\"\\n✨ **Initial Response Output:**\")\n", + "print(response.output)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Understand the tool calls and function calls as part of the response output\n", + "\n", + "import pandas as pd\n", + "\n", + "# Create a list to store the tool call and function call details\n", + "tool_calls = []\n", + "\n", + "# Iterate through the response output and collect the details\n", + "for i in response.output:\n", + " tool_calls.append({\n", + " \"Type\": i.type,\n", + " \"Call ID\": i.call_id if hasattr(i, 'call_id') else i.id if hasattr(i, 'id') else \"N/A\",\n", + " \"Output\": str(i.output) if hasattr(i, 'output') else \"N/A\",\n", + " \"Name\": i.name if hasattr(i, 'name') else \"N/A\"\n", + " })\n", + "\n", + "# Convert the list to a DataFrame for tabular display\n", + "df_tool_calls = pd.DataFrame(tool_calls)\n", + "\n", + "# Display the DataFrame\n", + "df_tool_calls" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tool_call_1 = response.output[0]\n", + "print(tool_call_1)\n", + "print(tool_call_1.id)\n", + "\n", + "tool_call_2 = response.output[2]\n", + "print(tool_call_2)\n", + "print(tool_call_2.call_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# append the tool call and its output back into the conversation.\n", + "input_messages.append(response.output[2])\n", + "input_messages.append({\n", + " \"type\": \"function_call_output\",\n", + " \"call_id\": tool_call_2.call_id,\n", + " \"output\": str(result)\n", + "})\n", + "print(input_messages)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Get the final answer incorporating the tool's result.\n", + "print(\"\\n🔧 **Calling Responses API for Final Answer**\")\n", + "\n", + "response_2 = client.responses.create(\n", + " model=\"gpt-4o\",\n", + " input=input_messages,\n", + ")\n", + "print(response_2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# print the final answer\n", + "print(response_2.output_text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "Here, we have seen how to utilize OpenAI's Responses API to implement a Retrieval-Augmented Generation (RAG) approach with multi-tool calling capabilities. It showcases an example where the model selects the appropriate tool based on the input query: general questions may be handled by built-in tools such as web-search, while specific medical inquiries related to internal knowledge are addressed by retrieving context from a vector database (such as Pinecone) via function calls. Additonally, we have showcased how multiple tool calls can be sequentially combined to generate a final response based on our instructions provided to responses API. Happy coding! " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/openai_responses/sync_and_async.py b/examples/openai_responses/sync_and_async.py new file mode 100644 index 000000000..3c857e7a4 --- /dev/null +++ b/examples/openai_responses/sync_and_async.py @@ -0,0 +1,43 @@ +# To run this file from project root: AGENTOPS_LOG_LEVEL=debug uv run examples/openai_responses/sync_and_async.py +import asyncio +from dotenv import load_dotenv + +load_dotenv() + +from openai import OpenAI, AsyncOpenAI +import agentops + + +def sync_responses_request(): + client = OpenAI() + response = client.responses.create( + model="gpt-4o", + input="Explain the concept of synchronous Python in one sentence.", + ) + return response + + +async def async_responses_request(): + client = AsyncOpenAI() + response = await client.responses.create( + model="gpt-4o", + input="Explain the concept of async/await in Python in one sentence.", + stream=False, + ) + return response + + +async def main(): + agentops.init() + + # Synchronous request + sync_response = sync_responses_request() + print(f"Synchronous Response:\n {sync_response.output_text}") + + # Asynchronous request + async_response = await async_responses_request() + print(f"Asynchronous Response:\n {async_response.output_text}") + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/examples/openai_responses/web_search.ipynb b/examples/openai_responses/web_search.ipynb new file mode 100644 index 000000000..3b50074c8 --- /dev/null +++ b/examples/openai_responses/web_search.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What is the Responses API\n", + "\n", + "The Responses API is a new API that focuses on greater simplicity and greater expressivity when using our APIs. It is designed for multiple tools, multiple turns, and multiple modalities — as opposed to current APIs, which either have these features bolted onto an API designed primarily for text in and out (chat completions) or need a lot bootstrapping to perform simple actions (assistants api).\n", + "\n", + "Here I will show you a couple of new features that the Responses API has to offer and tie it all together at the end.\n", + "`responses` solves for a number of user painpoints with our current set of APIs. During our time with the completions API, we found that folks wanted:\n", + "\n", + "- the ability to easily perform multi-turn model interactions in a single API call\n", + "- to have access to our hosted tools (file_search, web_search, code_interpreter)\n", + "- granular control over the context sent to the model\n", + "\n", + "As models start to develop longer running reasoning and thinking capabilities, users will want an async-friendly and stateful primitive. Response solves for this. \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Basics\n", + "By design, on the surface, the Responses API is very similar to the Completions API." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import load_dotenv\n", + "import os\n", + "\n", + "load_dotenv()\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "AGENTOPS_API_KEY = os.getenv(\"AGENTOPS_API_KEY\")\n", + "\n", + "import agentops\n", + "agentops.init(api_key=AGENTOPS_API_KEY)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from openai import OpenAI\n", + "client = OpenAI(api_key=OPENAI_API_KEY)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "response = client.responses.create(\n", + " model=\"gpt-4o-mini\",\n", + " input=\"tell me a joke\",\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(response.output[0].content[0].text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "One key feature of the Response API is that it is stateful. This means that you do not have to manage the state of the conversation by yourself, the API will handle it for you. For example, you can retrieve the response at any time and it will include the full conversation history." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fetched_response = client.responses.retrieve(\n", + "response_id=response.id)\n", + "\n", + "print(fetched_response.output[0].content[0].text)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can continue the conversation by referring to the previous response." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "response_two = client.responses.create(\n", + " model=\"gpt-4o-mini\",\n", + " input=\"tell me another\",\n", + " previous_response_id=response.id\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(response_two.output[0].content[0].text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can of course manage the context yourself. But one benefit of OpenAI maintaining the context for you is that you can fork the response at any point and continue the conversation from that point." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response_two_forked = client.responses.create(\n", + " model=\"gpt-4o-mini\",\n", + " input=\"I didn't like that joke, tell me another and tell me the difference between the two jokes\",\n", + " previous_response_id=response.id # Forking and continuing from the first response\n", + ")\n", + "\n", + "output_text = response_two_forked.output[0].content[0].text\n", + "print(output_text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hosted Tools\n", + "\n", + "Another benefit of the Responses API is that it adds support for hosted tools like `file_search` and `web_search`. Instead of manually calling the tools, simply pass in the tools and the API will decide which tool to use and use it.\n", + "\n", + "Here is an example of using the `web_search` tool to incorporate web search results into the response. You may already be familiar with how ChatGPT can search the web. You can now build similar experiences too! The web search tool uses the OpenAI Index, the one that powers the web search in ChatGPT, having being optimized for chat applications.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response = client.responses.create(\n", + " model=\"gpt-4o\", # or another supported model\n", + " input=\"What's the latest news about AI?\",\n", + " tools=[\n", + " {\n", + " \"type\": \"web_search\"\n", + " }\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "print(json.dumps(response.output, default=lambda o: o.__dict__, indent=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Multimodal, Tool-augmented conversation\n", + "\n", + "The Responses API natively supports text, images, and audio modalities. \n", + "Tying everything together, we can build a fully multimodal, tool-augmented interaction with one API call through the responses API." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import base64\n", + "\n", + "from IPython.display import Image, display\n", + "\n", + "# Display the image from the provided URL\n", + "url = \"https://upload.wikimedia.org/wikipedia/commons/thumb/1/15/Cat_August_2010-4.jpg/2880px-Cat_August_2010-4.jpg\"\n", + "display(Image(url=url, width=400))\n", + "\n", + "response_multimodal = client.responses.create(\n", + " model=\"gpt-4o\",\n", + " input=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\"type\": \"input_text\", \"text\": \n", + " \"Come up with keywords related to the image, and search on the web using the search tool for any news related to the keywords\"\n", + " \", summarize the findings and cite the sources.\"},\n", + " {\"type\": \"input_image\", \"image_url\": \"https://upload.wikimedia.org/wikipedia/commons/thumb/1/15/Cat_August_2010-4.jpg/2880px-Cat_August_2010-4.jpg\"}\n", + " ]\n", + " }\n", + " ],\n", + " tools=[\n", + " {\"type\": \"web_search\"}\n", + " ]\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "print(json.dumps(response_multimodal.__dict__, default=lambda o: o.__dict__, indent=4))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the above example, we were able to use the `web_search` tool to search the web for news related to the image in one API call instead of multiple round trips that would be required if we were using the Chat Completions API." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With the responses API\n", + "🔥 a single API call can handle:\n", + "\n", + "✅ Analyze a given image using a multimodal input.\n", + "\n", + "✅ Perform web search via the `web_search` hosted tool\n", + "\n", + "✅ Summarize the results.\n", + "\n", + "In contrast, With Chat Completions API would require multiple steps, each requiring a round trip to the API:\n", + "\n", + "1️⃣ Upload image and get analysis → 1 request\n", + "\n", + "2️⃣ Extract info, call external web search → manual step + tool execution\n", + "\n", + "3️⃣ Re-submit tool results for summarization → another request\n", + "\n", + "See the following diagram for a side by side visualized comparison!\n", + "\n", + "![Responses vs Completions](../../images/comparisons.png)\n", + "\n", + "\n", + "We are very excited for you to try out the Responses API and see how it can simplify your code and make it easier to build complex, multimodal, tool-augmented interactions!\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/unit/instrumentation/common/test_attributes.py b/tests/unit/instrumentation/common/test_attributes.py index bfc75f617..fa9d193ad 100644 --- a/tests/unit/instrumentation/common/test_attributes.py +++ b/tests/unit/instrumentation/common/test_attributes.py @@ -7,14 +7,13 @@ """ import pytest -from unittest.mock import MagicMock, patch +from unittest.mock import patch from agentops.instrumentation.common.attributes import ( _extract_attributes_from_mapping, get_common_attributes, get_base_trace_attributes, get_base_span_attributes, - AttributeMap ) from agentops.semconv import ( CoreAttributes, @@ -28,6 +27,7 @@ class TestAttributeExtraction: def test_extract_attributes_from_object(self): """Test extracting attributes from an object.""" + # Create a test object with attributes class TestObject: def __init__(self): @@ -38,9 +38,9 @@ def __init__(self): self.attr5 = {} self.attr6 = ["list", "of", "values"] self.attr7 = {"key": "value"} - + test_obj = TestObject() - + # Define a mapping of target attributes to source attributes mapping = { "target_attr1": "attr1", @@ -52,10 +52,10 @@ def __init__(self): "target_attr7": "attr7", # Dict value, should be handled "target_attr8": "missing_attr", # Missing attribute, should be skipped } - + # Extract attributes attributes = _extract_attributes_from_mapping(test_obj, mapping) - + # Verify extracted attributes assert "target_attr1" in attributes assert attributes["target_attr1"] == "value1" @@ -82,7 +82,7 @@ def test_extract_attributes_from_dict(self): "attr6": ["list", "of", "values"], "attr7": {"key": "value"}, } - + # Define a mapping of target attributes to source attributes mapping = { "target_attr1": "attr1", @@ -94,10 +94,10 @@ def test_extract_attributes_from_dict(self): "target_attr7": "attr7", # Dict value, should be handled "target_attr8": "missing_attr", # Missing key, should be skipped } - + # Extract attributes attributes = _extract_attributes_from_mapping(test_dict, mapping) - + # Verify extracted attributes assert "target_attr1" in attributes assert attributes["target_attr1"] == "value1" @@ -122,7 +122,7 @@ def test_get_common_attributes(self): with patch("agentops.instrumentation.common.attributes.get_agentops_version", return_value="0.1.2"): # Get common attributes attributes = get_common_attributes() - + # Verify attributes assert InstrumentationAttributes.NAME in attributes assert attributes[InstrumentationAttributes.NAME] == "agentops" @@ -131,20 +131,23 @@ def test_get_common_attributes(self): def test_get_base_trace_attributes_with_valid_trace(self): """Test getting base trace attributes with a valid trace.""" + # Create a mock trace class MockTrace: def __init__(self): self.trace_id = "test_trace_id" self.name = "test_trace_name" - + mock_trace = MockTrace() - + # Mock the common attributes and tags functions - with patch("agentops.instrumentation.common.attributes.get_common_attributes", - return_value={InstrumentationAttributes.NAME: "agentops", InstrumentationAttributes.VERSION: "0.1.2"}): + with patch( + "agentops.instrumentation.common.attributes.get_common_attributes", + return_value={InstrumentationAttributes.NAME: "agentops", InstrumentationAttributes.VERSION: "0.1.2"}, + ): # Get base trace attributes attributes = get_base_trace_attributes(mock_trace) - + # Verify attributes assert CoreAttributes.TRACE_ID in attributes assert attributes[CoreAttributes.TRACE_ID] == "test_trace_id" @@ -159,40 +162,44 @@ def __init__(self): def test_get_base_trace_attributes_with_invalid_trace(self): """Test getting base trace attributes with an invalid trace (missing trace_id).""" + # Create a mock trace without trace_id class MockTrace: def __init__(self): self.name = "test_trace_name" - + mock_trace = MockTrace() - + # Mock the logger with patch("agentops.instrumentation.common.attributes.logger.warning") as mock_warning: # Get base trace attributes attributes = get_base_trace_attributes(mock_trace) - + # Verify logger was called mock_warning.assert_called_once_with("Cannot create trace attributes: missing trace_id") - + # Verify attributes is empty assert attributes == {} def test_get_base_span_attributes_with_basic_span(self): """Test getting base span attributes with a basic span.""" + # Create a mock span class MockSpan: def __init__(self): self.span_id = "test_span_id" self.trace_id = "test_trace_id" - + mock_span = MockSpan() - + # Mock the common attributes function - with patch("agentops.instrumentation.common.attributes.get_common_attributes", - return_value={InstrumentationAttributes.NAME: "agentops", InstrumentationAttributes.VERSION: "0.1.2"}): + with patch( + "agentops.instrumentation.common.attributes.get_common_attributes", + return_value={InstrumentationAttributes.NAME: "agentops", InstrumentationAttributes.VERSION: "0.1.2"}, + ): # Get base span attributes attributes = get_base_span_attributes(mock_span) - + # Verify attributes assert CoreAttributes.SPAN_ID in attributes assert attributes[CoreAttributes.SPAN_ID] == "test_span_id" @@ -206,21 +213,24 @@ def __init__(self): def test_get_base_span_attributes_with_parent(self): """Test getting base span attributes with a span that has a parent.""" + # Create a mock span with parent_id class MockSpan: def __init__(self): self.span_id = "test_span_id" self.trace_id = "test_trace_id" self.parent_id = "test_parent_id" - + mock_span = MockSpan() - + # Mock the common attributes function - with patch("agentops.instrumentation.common.attributes.get_common_attributes", - return_value={InstrumentationAttributes.NAME: "agentops", InstrumentationAttributes.VERSION: "0.1.2"}): + with patch( + "agentops.instrumentation.common.attributes.get_common_attributes", + return_value={InstrumentationAttributes.NAME: "agentops", InstrumentationAttributes.VERSION: "0.1.2"}, + ): # Get base span attributes attributes = get_base_span_attributes(mock_span) - + # Verify attributes assert CoreAttributes.SPAN_ID in attributes assert attributes[CoreAttributes.SPAN_ID] == "test_span_id" @@ -237,13 +247,15 @@ def test_get_base_span_attributes_with_unknown_values(self): """Test getting base span attributes with a span that has unknown values.""" # Create a mock object that doesn't have the expected attributes mock_object = object() - + # Mock the common attributes function - with patch("agentops.instrumentation.common.attributes.get_common_attributes", - return_value={InstrumentationAttributes.NAME: "agentops", InstrumentationAttributes.VERSION: "0.1.2"}): + with patch( + "agentops.instrumentation.common.attributes.get_common_attributes", + return_value={InstrumentationAttributes.NAME: "agentops", InstrumentationAttributes.VERSION: "0.1.2"}, + ): # Get base span attributes attributes = get_base_span_attributes(mock_object) - + # Verify attributes assert CoreAttributes.SPAN_ID in attributes assert attributes[CoreAttributes.SPAN_ID] == "unknown" @@ -257,4 +269,4 @@ def test_get_base_span_attributes_with_unknown_values(self): if __name__ == "__main__": - pytest.main() \ No newline at end of file + pytest.main() diff --git a/tests/unit/instrumentation/openai_agents/test_openai_agents.py b/tests/unit/instrumentation/openai_agents/test_openai_agents.py index 34db816ff..51c8b1810 100644 --- a/tests/unit/instrumentation/openai_agents/test_openai_agents.py +++ b/tests/unit/instrumentation/openai_agents/test_openai_agents.py @@ -185,9 +185,9 @@ def test_tool_calls_span_serialization(self, instrumentation): mock_response_attrs.return_value = { MessageAttributes.COMPLETION_CONTENT.format(i=0): "I'll help you find the current weather for New York City.", MessageAttributes.COMPLETION_ROLE.format(i=0): "assistant", - MessageAttributes.TOOL_CALL_ID.format(i=0, j=0): "call_xyz789", - MessageAttributes.TOOL_CALL_NAME.format(i=0, j=0): "get_weather", - MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=0, j=0): "{\"location\":\"New York City\",\"units\":\"celsius\"}", + MessageAttributes.COMPLETION_TOOL_CALL_ID.format(i=0, j=0): "call_xyz789", + MessageAttributes.COMPLETION_TOOL_CALL_NAME.format(i=0, j=0): "get_weather", + MessageAttributes.COMPLETION_TOOL_CALL_ARGUMENTS.format(i=0, j=0): "{\"location\":\"New York City\",\"units\":\"celsius\"}", SpanAttributes.LLM_SYSTEM: "openai", SpanAttributes.LLM_USAGE_PROMPT_TOKENS: 48, SpanAttributes.LLM_USAGE_COMPLETION_TOKENS: 12, @@ -225,12 +225,12 @@ def test_tool_calls_span_serialization(self, instrumentation): captured_attributes[SpanAttributes.LLM_RESPONSE_MODEL] = "gpt-4o" # Verify tool call attributes were set correctly - assert MessageAttributes.TOOL_CALL_NAME.format(i=0, j=0) in captured_attributes - assert captured_attributes[MessageAttributes.TOOL_CALL_NAME.format(i=0, j=0)] == "get_weather" - assert MessageAttributes.TOOL_CALL_ID.format(i=0, j=0) in captured_attributes - assert captured_attributes[MessageAttributes.TOOL_CALL_ID.format(i=0, j=0)] == "call_xyz789" - assert MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=0, j=0) in captured_attributes - assert "{\"location\":\"New York City\",\"units\":\"celsius\"}" in captured_attributes[MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=0, j=0)] + assert MessageAttributes.COMPLETION_TOOL_CALL_NAME.format(i=0, j=0) in captured_attributes + assert captured_attributes[MessageAttributes.COMPLETION_TOOL_CALL_NAME.format(i=0, j=0)] == "get_weather" + assert MessageAttributes.COMPLETION_TOOL_CALL_ID.format(i=0, j=0) in captured_attributes + assert captured_attributes[MessageAttributes.COMPLETION_TOOL_CALL_ID.format(i=0, j=0)] == "call_xyz789" + assert MessageAttributes.COMPLETION_TOOL_CALL_ARGUMENTS.format(i=0, j=0) in captured_attributes + assert "{\"location\":\"New York City\",\"units\":\"celsius\"}" in captured_attributes[MessageAttributes.COMPLETION_TOOL_CALL_ARGUMENTS.format(i=0, j=0)] # Verify the text content is also captured assert MessageAttributes.COMPLETION_CONTENT.format(i=0) in captured_attributes diff --git a/tests/unit/instrumentation/openai_agents/test_openai_agents_attributes.py b/tests/unit/instrumentation/openai_agents/test_openai_agents_attributes.py index ac24d7a97..8df5662f3 100644 --- a/tests/unit/instrumentation/openai_agents/test_openai_agents_attributes.py +++ b/tests/unit/instrumentation/openai_agents/test_openai_agents_attributes.py @@ -400,10 +400,10 @@ def __init__(self): assert attrs[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 12 assert attrs[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 60 - # Verify tool call information - tool_id_key = MessageAttributes.TOOL_CALL_ID.format(i=0, j=0) - tool_name_key = MessageAttributes.TOOL_CALL_NAME.format(i=0, j=0) - tool_args_key = MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=0, j=0) + # Verify tool call information - note raw_responses is in index 0, output item 0, tool_call 0 + tool_id_key = MessageAttributes.COMPLETION_TOOL_CALL_ID.format(i=0, j=0) + tool_name_key = MessageAttributes.COMPLETION_TOOL_CALL_NAME.format(i=0, j=0) + tool_args_key = MessageAttributes.COMPLETION_TOOL_CALL_ARGUMENTS.format(i=0, j=0) assert attrs[tool_id_key] == "call_xyz789" assert attrs[tool_name_key] == "get_weather" @@ -510,13 +510,13 @@ def test_chat_completions_with_tool_calls_from_fixture(self): attrs = get_chat_completions_attributes(OPENAI_CHAT_TOOL_CALLS) # Verify tool call information is extracted - assert MessageAttributes.TOOL_CALL_ID.format(i=0, j=0) in attrs - assert MessageAttributes.TOOL_CALL_NAME.format(i=0, j=0) in attrs - assert MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=0, j=0) in attrs + assert MessageAttributes.COMPLETION_TOOL_CALL_ID.format(i=0, j=0) in attrs + assert MessageAttributes.COMPLETION_TOOL_CALL_NAME.format(i=0, j=0) in attrs + assert MessageAttributes.COMPLETION_TOOL_CALL_ARGUMENTS.format(i=0, j=0) in attrs # Verify values match fixture data (specific values will depend on your fixture content) - tool_id = attrs[MessageAttributes.TOOL_CALL_ID.format(i=0, j=0)] - tool_name = attrs[MessageAttributes.TOOL_CALL_NAME.format(i=0, j=0)] + tool_id = attrs[MessageAttributes.COMPLETION_TOOL_CALL_ID.format(i=0, j=0)] + tool_name = attrs[MessageAttributes.COMPLETION_TOOL_CALL_NAME.format(i=0, j=0)] assert tool_id is not None and len(tool_id) > 0 assert tool_name is not None and len(tool_name) > 0 diff --git a/tests/unit/instrumentation/openai_core/test_response_attributes.py b/tests/unit/instrumentation/openai_core/test_response_attributes.py index 3a64d29d5..d98903e05 100644 --- a/tests/unit/instrumentation/openai_core/test_response_attributes.py +++ b/tests/unit/instrumentation/openai_core/test_response_attributes.py @@ -17,11 +17,11 @@ get_response_output_attributes, get_response_output_message_attributes, get_response_output_text_attributes, - get_response_output_reasoning_attributes, - get_response_output_tool_attributes, get_response_tools_attributes, get_response_usage_attributes, - get_response_reasoning_attributes + get_response_tool_web_search_attributes, + get_response_tool_file_search_attributes, + get_response_tool_computer_attributes ) from agentops.semconv import ( SpanAttributes, @@ -94,7 +94,103 @@ class MockFunctionTool: def __init__(self, data): for key, value in data.items(): setattr(self, key, value) - self.type = "function" + if not hasattr(self, "type"): + self.type = "function" + self.__dict__.update(data) + + +class MockWebSearchTool: + """Mock WebSearchTool object for testing""" + def __init__(self, data): + for key, value in data.items(): + setattr(self, key, value) + if not hasattr(self, "type"): + self.type = "web_search_preview" + self.__dict__.update(data) + + +class MockFileSearchTool: + """Mock FileSearchTool object for testing""" + def __init__(self, data): + for key, value in data.items(): + setattr(self, key, value) + if not hasattr(self, "type"): + self.type = "file_search" + self.__dict__.update(data) + + +class MockComputerTool: + """Mock ComputerTool object for testing""" + def __init__(self, data): + for key, value in data.items(): + setattr(self, key, value) + if not hasattr(self, "type"): + self.type = "computer_use_preview" + self.__dict__.update(data) + + +class MockUserLocation: + """Mock UserLocation object for testing""" + def __init__(self, data): + for key, value in data.items(): + setattr(self, key, value) + self.__dict__.update(data) + + +class MockFilters: + """Mock Filters object for testing""" + def __init__(self, data): + for key, value in data.items(): + setattr(self, key, value) + self.__dict__.update(data) + + +class MockRankingOptions: + """Mock RankingOptions object for testing""" + def __init__(self, data): + for key, value in data.items(): + setattr(self, key, value) + self.__dict__.update(data) + + +class MockFunctionWebSearch: + """Mock ResponseFunctionWebSearch object for testing""" + def __init__(self, data): + for key, value in data.items(): + setattr(self, key, value) + if not hasattr(self, "type"): + self.type = "web_search_call" + self.__dict__.update(data) + + +class MockFileSearchToolCall: + """Mock ResponseFileSearchToolCall object for testing""" + def __init__(self, data): + for key, value in data.items(): + setattr(self, key, value) + if not hasattr(self, "type"): + self.type = "file_search_call" + self.__dict__.update(data) + + +class MockComputerToolCall: + """Mock ResponseComputerToolCall object for testing""" + def __init__(self, data): + for key, value in data.items(): + setattr(self, key, value) + if not hasattr(self, "type"): + self.type = "computer_call" + self.__dict__.update(data) + + +class MockReasoningItem: + """Mock ResponseReasoningItem object for testing""" + def __init__(self, data): + for key, value in data.items(): + setattr(self, key, value) + if not hasattr(self, "type"): + self.type = "reasoning" + self.__dict__.update(data) class MockFunctionToolCall: @@ -295,21 +391,70 @@ def test_get_response_output_text_attributes(self): """Test extraction of attributes from output text""" # Create a mock text content text = MockOutputText({ - 'annotations': [], - 'text': 'The capital of France is Paris.', + 'annotations': [ + { + "end_index": 636, + "start_index": 538, + "title": "5 AI Agent Frameworks Compared", + "type": "url_citation", + "url": "https://www.kdnuggets.com/5-ai-agent-frameworks-compared" + } + ], + 'text': 'CrewAI is the top AI agent library.', 'type': 'output_text' }) - # Extract attributes - attributes = get_response_output_text_attributes(0, text) + # The function doesn't use the mock directly but extracts attributes from it + # Using _extract_attributes_from_mapping_with_index internally + # We'll test by using patch to simulate the extraction - # Check attributes - assert MessageAttributes.COMPLETION_CONTENT.format(i=0) in attributes - assert attributes[MessageAttributes.COMPLETION_CONTENT.format(i=0)] == 'The capital of France is Paris.' + with patch('agentops.instrumentation.openai.attributes.response._extract_attributes_from_mapping_with_index') as mock_extract: + # Set up the mock to return expected attributes + expected_attributes = { + MessageAttributes.COMPLETION_ANNOTATION_END_INDEX.format(i=0,j=0): 636, + MessageAttributes.COMPLETION_ANNOTATION_START_INDEX.format(i=0,j=1): 538, + MessageAttributes.COMPLETION_ANNOTATION_TITLE.format(i=0,j=2): "5 AI Agent Frameworks Compared", + MessageAttributes.COMPLETION_ANNOTATION_TYPE.format(i=0,j=3): "url_citation", + MessageAttributes.COMPLETION_ANNOTATION_URL.format(i=0,j=5): "https://www.kdnuggets.com/5-ai-agent-frameworks-compared", + MessageAttributes.COMPLETION_CONTENT.format(i=0): 'CrewAI is the top AI agent library.', + MessageAttributes.COMPLETION_TYPE.format(i=0): 'output_text' + } + mock_extract.return_value = expected_attributes + + # Call the function + attributes = get_response_output_text_attributes(0, text) + + # Verify mock was called with correct arguments + mock_extract.assert_called_once() + + # Check that the return value matches our expected attributes + assert attributes == expected_attributes - def test_get_response_output_tool_attributes(self): - """Test extraction of attributes from output tool""" - # Create a mock tool call + def test_get_response_output_attributes(self): + """Test extraction of attributes from output items with all output types""" + # Create a mock response output list with all different output types + message = MockOutputMessage({ + 'id': 'msg_12345', + 'content': [ + MockOutputText({ + 'text': 'This is a test message', + 'type': 'output_text', + 'annotations': [ + { + "end_index": 636, + "start_index": 538, + "title": "Test title", + "type": "url_citation", + "url": "www.test.com", + } + ] + }) + ], + 'role': 'assistant', + 'status': 'completed', + 'type': 'message' + }) + tool_call = MockFunctionToolCall({ 'id': 'call_67890', 'name': 'get_weather', @@ -317,52 +462,200 @@ def test_get_response_output_tool_attributes(self): 'type': 'function' }) - # Extract attributes - attributes = get_response_output_tool_attributes(0, tool_call) + web_search = MockFunctionWebSearch({ + 'id': 'ws_12345', + 'status': 'completed', + 'type': 'web_search_call' + }) + + file_search = MockFileSearchToolCall({ + 'id': 'fsc_12345', + 'queries': ['search term'], + 'status': 'completed', + 'type': 'file_search_call' + }) + + computer_call = MockComputerToolCall({ + 'id': 'comp_12345', + 'status': 'completed', + 'type': 'computer_call' + }) + + reasoning_item = MockReasoningItem({ + 'id': 'reason_12345', + 'status': 'completed', + 'type': 'reasoning' + }) + + # Create an unrecognized output item to test error handling + unrecognized_item = MagicMock() + unrecognized_item.type = 'unknown_type' - # Check attributes - assert MessageAttributes.FUNCTION_CALL_ID.format(i=0) in attributes - assert attributes[MessageAttributes.FUNCTION_CALL_ID.format(i=0)] == 'call_67890' - assert MessageAttributes.FUNCTION_CALL_NAME.format(i=0) in attributes - assert attributes[MessageAttributes.FUNCTION_CALL_NAME.format(i=0)] == 'get_weather' - assert MessageAttributes.FUNCTION_CALL_ARGUMENTS.format(i=0) in attributes - assert attributes[MessageAttributes.FUNCTION_CALL_ARGUMENTS.format(i=0)] == '{"location":"Paris"}' - assert MessageAttributes.FUNCTION_CALL_TYPE.format(i=0) in attributes - assert attributes[MessageAttributes.FUNCTION_CALL_TYPE.format(i=0)] == 'function' + # Patch all the necessary type checks and logger + with patch('agentops.instrumentation.openai.attributes.response.ResponseOutputMessage', MockOutputMessage), \ + patch('agentops.instrumentation.openai.attributes.response.ResponseOutputText', MockOutputText), \ + patch('agentops.instrumentation.openai.attributes.response.ResponseFunctionToolCall', MockFunctionToolCall), \ + patch('agentops.instrumentation.openai.attributes.response.ResponseFunctionWebSearch', MockFunctionWebSearch), \ + patch('agentops.instrumentation.openai.attributes.response.ResponseFileSearchToolCall', MockFileSearchToolCall), \ + patch('agentops.instrumentation.openai.attributes.response.ResponseComputerToolCall', MockComputerToolCall), \ + patch('agentops.instrumentation.openai.attributes.response.ResponseReasoningItem', MockReasoningItem), \ + patch('agentops.instrumentation.openai.attributes.response.logger.debug') as mock_logger: + + # Test with an output list containing all different types of output items + output = [message, tool_call, web_search, file_search, computer_call, reasoning_item, unrecognized_item] + + # Call the function + attributes = get_response_output_attributes(output) + + # Check that it extracted attributes from all items + assert isinstance(attributes, dict) + + # Check message attributes were extracted (index 0) + assert MessageAttributes.COMPLETION_ROLE.format(i=0) in attributes + assert attributes[MessageAttributes.COMPLETION_ROLE.format(i=0)] == 'assistant' + assert MessageAttributes.COMPLETION_CONTENT.format(i=0) in attributes + assert attributes[MessageAttributes.COMPLETION_CONTENT.format(i=0)] == 'This is a test message' + + # Check function tool call attributes were extracted (index 1) + tool_attr_key = MessageAttributes.COMPLETION_TOOL_CALL_ID.format(i=1, j=0) + assert tool_attr_key in attributes + assert attributes[tool_attr_key] == 'call_67890' + + # Check web search attributes were extracted (index 2) + web_attr_key = MessageAttributes.COMPLETION_TOOL_CALL_ID.format(i=2, j=0) + assert web_attr_key in attributes + assert attributes[web_attr_key] == 'ws_12345' + + # Verify that logger was called for unrecognized item + assert any(call.args[0].startswith('[agentops.instrumentation.openai.response]') + for call in mock_logger.call_args_list) def test_get_response_tools_attributes(self): """Test extraction of attributes from tools list""" - # Simplify the test to just verify the function can be called without error + # Create a mock function tool + function_tool = MockFunctionTool({ + 'name': 'get_weather', + 'parameters': {'properties': {'location': {'type': 'string'}}, 'required': ['location']}, + 'description': 'Get weather information for a location', + 'type': 'function', + 'strict': True + }) - # Patch the FunctionTool class to make testing simpler + # Patch all tool types to make testing simpler with patch('agentops.instrumentation.openai.attributes.response.FunctionTool', MockFunctionTool): - # Test with empty list for simplicity - tools = [] + with patch('agentops.instrumentation.openai.attributes.response.WebSearchTool', MagicMock): + with patch('agentops.instrumentation.openai.attributes.response.FileSearchTool', MagicMock): + with patch('agentops.instrumentation.openai.attributes.response.ComputerTool', MagicMock): + # Test with a function tool + tools = [function_tool] + + # Call the function + result = get_response_tools_attributes(tools) + + # Verify extracted attributes + assert isinstance(result, dict) + assert MessageAttributes.TOOL_CALL_TYPE.format(i=0) in result + assert result[MessageAttributes.TOOL_CALL_TYPE.format(i=0)] == 'function' + assert MessageAttributes.TOOL_CALL_NAME.format(i=0) in result + assert result[MessageAttributes.TOOL_CALL_NAME.format(i=0)] == 'get_weather' + assert MessageAttributes.TOOL_CALL_DESCRIPTION.format(i=0) in result + assert result[MessageAttributes.TOOL_CALL_DESCRIPTION.format(i=0)] == 'Get weather information for a location' + + def test_get_response_tool_web_search_attributes(self): + """Test extraction of attributes from web search tool""" + # Create a mock web search tool + user_location = MockUserLocation({ + 'type': 'approximate', + 'country': 'US' + }) + + web_search_tool = MockWebSearchTool({ + 'type': 'web_search_preview', + 'search_context_size': 'medium', + 'user_location': user_location + }) + + # Call the function directly + with patch('agentops.instrumentation.openai.attributes.response.WebSearchTool', MockWebSearchTool): + result = get_response_tool_web_search_attributes(web_search_tool, 0) - # Call the function - result = get_response_tools_attributes(tools) + # Verify attributes + assert isinstance(result, dict) + assert MessageAttributes.TOOL_CALL_NAME.format(i=0) in result + assert result[MessageAttributes.TOOL_CALL_NAME.format(i=0)] == 'web_search_preview' + assert MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=0) in result + # Parameters should be serialized + assert 'search_context_size' in result[MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=0)] + assert 'user_location' in result[MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=0)] + + def test_get_response_tool_file_search_attributes(self): + """Test extraction of attributes from file search tool""" + # Create a mock file search tool + filters = MockFilters({ + 'key': 'value' + }) + + ranking_options = MockRankingOptions({ + 'ranker': 'default-2024-11-15', + 'score_threshold': 0.8 + }) + + file_search_tool = MockFileSearchTool({ + 'type': 'file_search', + 'vector_store_ids': ['store_123', 'store_456'], + 'filters': filters, + 'max_num_results': 10, + 'ranking_options': ranking_options + }) + + # Call the function directly + with patch('agentops.instrumentation.openai.attributes.response.FileSearchTool', MockFileSearchTool): + result = get_response_tool_file_search_attributes(file_search_tool, 0) - # Verify basic expected attributes + # Verify attributes assert isinstance(result, dict) - + assert MessageAttributes.TOOL_CALL_TYPE.format(i=0) in result + assert result[MessageAttributes.TOOL_CALL_TYPE.format(i=0)] == 'file_search' + assert MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=0) in result + # Parameters should be serialized + assert 'vector_store_ids' in result[MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=0)] + assert 'filters' in result[MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=0)] + assert 'max_num_results' in result[MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=0)] + assert 'ranking_options' in result[MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=0)] + + def test_get_response_tool_computer_attributes(self): + """Test extraction of attributes from computer tool""" + # Create a mock computer tool + computer_tool = MockComputerTool({ + 'type': 'computer_use_preview', + 'display_height': 1080.0, + 'display_width': 1920.0, + 'environment': 'mac' + }) + + # Call the function directly + with patch('agentops.instrumentation.openai.attributes.response.ComputerTool', MockComputerTool): + result = get_response_tool_computer_attributes(computer_tool, 0) + + # Verify attributes + assert isinstance(result, dict) + assert MessageAttributes.TOOL_CALL_TYPE.format(i=0) in result + assert result[MessageAttributes.TOOL_CALL_TYPE.format(i=0)] == 'computer_use_preview' + assert MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=0) in result + # Parameters should be serialized + assert 'display_height' in result[MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=0)] + assert 'display_width' in result[MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=0)] + assert 'environment' in result[MessageAttributes.TOOL_CALL_ARGUMENTS.format(i=0)] + def test_get_response_usage_attributes(self): """Test extraction of attributes from usage data""" - # Simplify test to verify function can be called without error + # Create a more comprehensive test for usage attributes # Patch the OutputTokensDetails class to make testing simpler with patch('agentops.instrumentation.openai.attributes.response.OutputTokensDetails', MockOutputTokensDetails): - # Create a minimal mock usage object with all necessary attributes - usage = MockResponseUsage({ - 'input_tokens': 50, - 'output_tokens': 20, - 'total_tokens': 70, - 'output_tokens_details': MockOutputTokensDetails({ - 'reasoning_tokens': 5 - }), - 'input_tokens_details': { - 'cached_tokens': 10 - }, - '__dict__': { + with patch('agentops.instrumentation.openai.attributes.response.InputTokensDetails', MagicMock): + # Test with all fields + usage = MockResponseUsage({ 'input_tokens': 50, 'output_tokens': 20, 'total_tokens': 70, @@ -371,34 +664,64 @@ def test_get_response_usage_attributes(self): }), 'input_tokens_details': { 'cached_tokens': 10 + }, + '__dict__': { + 'input_tokens': 50, + 'output_tokens': 20, + 'total_tokens': 70, + 'output_tokens_details': MockOutputTokensDetails({ + 'reasoning_tokens': 5 + }), + 'input_tokens_details': { + 'cached_tokens': 10 + } } - } - }) - - # Call the function - result = get_response_usage_attributes(usage) - - # Verify it returns a dictionary with at least these basic attributes - assert isinstance(result, dict) - assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS in result - assert result[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 50 - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS in result - assert result[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 20 - assert SpanAttributes.LLM_USAGE_TOTAL_TOKENS in result - assert result[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 70 - - def test_get_response_reasoning_attributes(self): - """Test extraction of attributes from reasoning data""" - # Create mock reasoning object - reasoning = MockReasoning({ - 'effort': 'medium', - 'generate_summary': True - }) - - # Extract attributes - currently no attributes are mapped for reasoning - attributes = get_response_reasoning_attributes(reasoning) - - # The current implementation returns an empty dictionary because - # there are no defined attributes in RESPONSE_REASONING_ATTRIBUTES - assert isinstance(attributes, dict) - assert len(attributes) == 0 # Currently no attributes are mapped \ No newline at end of file + }) + + # Test without token details (edge cases) + usage_without_details = MockResponseUsage({ + 'input_tokens': 30, + 'output_tokens': 15, + 'total_tokens': 45, + 'output_tokens_details': None, + 'input_tokens_details': None, + '__dict__': { + 'input_tokens': 30, + 'output_tokens': 15, + 'total_tokens': 45, + 'output_tokens_details': None, + 'input_tokens_details': None + } + }) + + # Call the function for complete usage + result = get_response_usage_attributes(usage) + + # Verify it returns a dictionary with all attributes + assert isinstance(result, dict) + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS in result + assert result[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 50 + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS in result + assert result[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 20 + assert SpanAttributes.LLM_USAGE_TOTAL_TOKENS in result + assert result[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 70 + assert SpanAttributes.LLM_USAGE_REASONING_TOKENS in result + assert result[SpanAttributes.LLM_USAGE_REASONING_TOKENS] == 5 + assert SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS in result + assert result[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 10 + + # Call the function for usage without details + result_without_details = get_response_usage_attributes(usage_without_details) + + # Verify basic attributes are still present + assert isinstance(result_without_details, dict) + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS in result_without_details + assert result_without_details[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 30 + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS in result_without_details + assert result_without_details[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 15 + assert SpanAttributes.LLM_USAGE_TOTAL_TOKENS in result_without_details + assert result_without_details[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 45 + # Detailed attributes shouldn't be present + assert SpanAttributes.LLM_USAGE_REASONING_TOKENS not in result_without_details + assert SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS not in result_without_details +