diff --git a/.github/workflows/test-integrations-ai.yml b/.github/workflows/test-integrations-ai.yml index 4aa0f36b77..e81d507d27 100644 --- a/.github/workflows/test-integrations-ai.yml +++ b/.github/workflows/test-integrations-ai.yml @@ -66,6 +66,10 @@ jobs: run: | set -x # print commands that are executed ./scripts/runtox.sh "py${{ matrix.python-version }}-openai-latest" + - name: Test openai_agents latest + run: | + set -x # print commands that are executed + ./scripts/runtox.sh "py${{ matrix.python-version }}-openai_agents-latest" - name: Test huggingface_hub latest run: | set -x # print commands that are executed @@ -141,6 +145,10 @@ jobs: run: | set -x # print commands that are executed ./scripts/runtox.sh --exclude-latest "py${{ matrix.python-version }}-openai" + - name: Test openai_agents pinned + run: | + set -x # print commands that are executed + ./scripts/runtox.sh --exclude-latest "py${{ matrix.python-version }}-openai_agents" - name: Test huggingface_hub pinned run: | set -x # print commands that are executed diff --git a/pyproject.toml b/pyproject.toml index 5e16b30793..e5eae2c21f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -183,6 +183,10 @@ ignore_missing_imports = true module = "grpc.*" ignore_missing_imports = true +[[tool.mypy.overrides]] +module = "agents.*" +ignore_missing_imports = true + # # Tool: Flake8 # diff --git a/scripts/populate_tox/config.py b/scripts/populate_tox/config.py index 4664845c7b..411d7fe666 100644 --- a/scripts/populate_tox/config.py +++ b/scripts/populate_tox/config.py @@ -139,6 +139,12 @@ "loguru": { "package": "loguru", }, + "openai_agents": { + "package": "openai-agents", + "deps": { + "*": ["pytest-asyncio"], + }, + }, "openfeature": { "package": "openfeature-sdk", }, diff --git a/scripts/populate_tox/tox.jinja b/scripts/populate_tox/tox.jinja index f95a913fd9..ac14bdb02a 100644 --- a/scripts/populate_tox/tox.jinja +++ b/scripts/populate_tox/tox.jinja @@ -400,6 +400,7 @@ setenv = litestar: TESTPATH=tests/integrations/litestar loguru: TESTPATH=tests/integrations/loguru openai: TESTPATH=tests/integrations/openai + openai_agents: TESTPATH=tests/integrations/openai_agents openfeature: TESTPATH=tests/integrations/openfeature opentelemetry: TESTPATH=tests/integrations/opentelemetry potel: TESTPATH=tests/integrations/opentelemetry diff --git a/scripts/split_tox_gh_actions/split_tox_gh_actions.py b/scripts/split_tox_gh_actions/split_tox_gh_actions.py index 3fbc0ec1c5..af1ff84cd6 100755 --- a/scripts/split_tox_gh_actions/split_tox_gh_actions.py +++ b/scripts/split_tox_gh_actions/split_tox_gh_actions.py @@ -63,6 +63,7 @@ "cohere", "langchain", "openai", + "openai_agents", "huggingface_hub", ], "Cloud": [ diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 34ae5bdfd8..53148a36df 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -108,16 +108,39 @@ class SPANDATA: See: https://develop.sentry.dev/sdk/performance/span-data-conventions/ """ + AI_CITATIONS = "ai.citations" + """ + References or sources cited by the AI model in its response. + Example: ["Smith et al. 2020", "Jones 2019"] + """ + + AI_DOCUMENTS = "ai.documents" + """ + Documents or content chunks used as context for the AI model. + Example: ["doc1.txt", "doc2.pdf"] + """ + + AI_FINISH_REASON = "ai.finish_reason" + """ + The reason why the model stopped generating. + Example: "length" + """ + AI_FREQUENCY_PENALTY = "ai.frequency_penalty" """ Used to reduce repetitiveness of generated tokens. Example: 0.5 """ - AI_PRESENCE_PENALTY = "ai.presence_penalty" + AI_FUNCTION_CALL = "ai.function_call" """ - Used to reduce repetitiveness of generated tokens. - Example: 0.5 + For an AI model call, the function that was called. This is deprecated for OpenAI, and replaced by tool_calls + """ + + AI_GENERATION_ID = "ai.generation_id" + """ + Unique identifier for the completion. + Example: "gen_123abc" """ AI_INPUT_MESSAGES = "ai.input_messages" @@ -126,10 +149,9 @@ class SPANDATA: Example: [{"role": "user", "message": "hello"}] """ - AI_MODEL_ID = "ai.model_id" + AI_LOGIT_BIAS = "ai.logit_bias" """ - The unique descriptor of the model being execugted - Example: gpt-4 + For an AI model call, the logit bias """ AI_METADATA = "ai.metadata" @@ -138,28 +160,94 @@ class SPANDATA: Example: {"executed_function": "add_integers"} """ - AI_TAGS = "ai.tags" + AI_MODEL_ID = "ai.model_id" """ - Tags that describe an AI pipeline step. - Example: {"executed_function": "add_integers"} + The unique descriptor of the model being execugted + Example: gpt-4 + """ + + AI_PIPELINE_NAME = "ai.pipeline.name" + """ + Name of the AI pipeline or chain being executed. + Example: "qa-pipeline" + """ + + AI_PREAMBLE = "ai.preamble" + """ + For an AI model call, the preamble parameter. + Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style. + Example: "You are now a clown." + """ + + AI_PRESENCE_PENALTY = "ai.presence_penalty" + """ + Used to reduce repetitiveness of generated tokens. + Example: 0.5 + """ + + AI_RAW_PROMPTING = "ai.raw_prompting" + """ + Minimize pre-processing done to the prompt sent to the LLM. + Example: true + """ + + AI_RESPONSE_FORMAT = "ai.response_format" + """ + For an AI model call, the format of the response + """ + + AI_RESPONSES = "ai.responses" + """ + The responses to an AI model call. Always as a list. + Example: ["hello", "world"] + """ + + AI_SEARCH_QUERIES = "ai.search_queries" + """ + Queries used to search for relevant context or documents. + Example: ["climate change effects", "renewable energy"] + """ + + AI_SEARCH_REQUIRED = "ai.is_search_required" + """ + Boolean indicating if the model needs to perform a search. + Example: true + """ + + AI_SEARCH_RESULTS = "ai.search_results" + """ + Results returned from search queries for context. + Example: ["Result 1", "Result 2"] + """ + + AI_SEED = "ai.seed" + """ + The seed, ideally models given the same seed and same other parameters will produce the exact same output. + Example: 123.45 """ AI_STREAMING = "ai.streaming" """ - Whether or not the AI model call's repsonse was streamed back asynchronously + Whether or not the AI model call's response was streamed back asynchronously Example: true """ + AI_TAGS = "ai.tags" + """ + Tags that describe an AI pipeline step. + Example: {"executed_function": "add_integers"} + """ + AI_TEMPERATURE = "ai.temperature" """ For an AI model call, the temperature parameter. Temperature essentially means how random the output will be. Example: 0.5 """ - AI_TOP_P = "ai.top_p" + AI_TEXTS = "ai.texts" """ - For an AI model call, the top_p parameter. Top_p essentially controls how random the output will be. - Example: 0.5 + Raw text inputs provided to the model. + Example: ["What is machine learning?"] """ AI_TOP_K = "ai.top_k" @@ -168,9 +256,10 @@ class SPANDATA: Example: 35 """ - AI_FUNCTION_CALL = "ai.function_call" + AI_TOP_P = "ai.top_p" """ - For an AI model call, the function that was called. This is deprecated for OpenAI, and replaced by tool_calls + For an AI model call, the top_p parameter. Top_p essentially controls how random the output will be. + Example: 0.5 """ AI_TOOL_CALLS = "ai.tool_calls" @@ -183,168 +272,236 @@ class SPANDATA: For an AI model call, the functions that are available """ - AI_RESPONSE_FORMAT = "ai.response_format" + AI_WARNINGS = "ai.warnings" """ - For an AI model call, the format of the response + Warning messages generated during model execution. + Example: ["Token limit exceeded"] """ - AI_LOGIT_BIAS = "ai.logit_bias" + CACHE_HIT = "cache.hit" """ - For an AI model call, the logit bias + A boolean indicating whether the requested data was found in the cache. + Example: true """ - AI_PREAMBLE = "ai.preamble" + CACHE_ITEM_SIZE = "cache.item_size" """ - For an AI model call, the preamble parameter. - Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style. - Example: "You are now a clown." + The size of the requested data in bytes. + Example: 58 """ - AI_RAW_PROMPTING = "ai.raw_prompting" + CACHE_KEY = "cache.key" """ - Minimize pre-processing done to the prompt sent to the LLM. - Example: true + The key of the requested data. + Example: template.cache.some_item.867da7e2af8e6b2f3aa7213a4080edb3 """ - AI_RESPONSES = "ai.responses" + + CODE_FILEPATH = "code.filepath" """ - The responses to an AI model call. Always as a list. - Example: ["hello", "world"] + The source code file name that identifies the code unit as uniquely as possible (preferably an absolute file path). + Example: "/app/myapplication/http/handler/server.py" """ - AI_SEED = "ai.seed" + CODE_FUNCTION = "code.function" """ - The seed, ideally models given the same seed and same other parameters will produce the exact same output. - Example: 123.45 + The method or function name, or equivalent (usually rightmost part of the code unit's name). + Example: "server_request" """ - AI_CITATIONS = "ai.citations" + CODE_LINENO = "code.lineno" """ - References or sources cited by the AI model in its response. - Example: ["Smith et al. 2020", "Jones 2019"] + The line number in `code.filepath` best representing the operation. It SHOULD point within the code unit named in `code.function`. + Example: 42 """ - AI_DOCUMENTS = "ai.documents" + CODE_NAMESPACE = "code.namespace" """ - Documents or content chunks used as context for the AI model. - Example: ["doc1.txt", "doc2.pdf"] + The "namespace" within which `code.function` is defined. Usually the qualified class or module name, such that `code.namespace` + some separator + `code.function` form a unique identifier for the code unit. + Example: "http.handler" """ - AI_SEARCH_QUERIES = "ai.search_queries" + DB_MONGODB_COLLECTION = "db.mongodb.collection" """ - Queries used to search for relevant context or documents. - Example: ["climate change effects", "renewable energy"] + The MongoDB collection being accessed within the database. + See: https://github.com/open-telemetry/semantic-conventions/blob/main/docs/database/mongodb.md#attributes + Example: public.users; customers """ - AI_SEARCH_RESULTS = "ai.search_results" + DB_NAME = "db.name" """ - Results returned from search queries for context. - Example: ["Result 1", "Result 2"] + The name of the database being accessed. For commands that switch the database, this should be set to the target database (even if the command fails). + Example: myDatabase """ - AI_GENERATION_ID = "ai.generation_id" + DB_OPERATION = "db.operation" """ - Unique identifier for the completion. - Example: "gen_123abc" + The name of the operation being executed, e.g. the MongoDB command name such as findAndModify, or the SQL keyword. + See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/database.md + Example: findAndModify, HMSET, SELECT """ - AI_SEARCH_REQUIRED = "ai.is_search_required" + DB_SYSTEM = "db.system" """ - Boolean indicating if the model needs to perform a search. - Example: true + An identifier for the database management system (DBMS) product being used. + See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/database.md + Example: postgresql """ - AI_FINISH_REASON = "ai.finish_reason" + DB_USER = "db.user" """ - The reason why the model stopped generating. - Example: "length" + The name of the database user used for connecting to the database. + See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/database.md + Example: my_user """ - AI_PIPELINE_NAME = "ai.pipeline.name" + GEN_AI_AGENT_NAME = "gen_ai.agent.name" """ - Name of the AI pipeline or chain being executed. - Example: "qa-pipeline" + The name of the agent being used. + Example: "ResearchAssistant" """ - AI_TEXTS = "ai.texts" + GEN_AI_CHOICE = "gen_ai.choice" """ - Raw text inputs provided to the model. - Example: ["What is machine learning?"] + The model's response message. + Example: "The weather in Paris is rainy and overcast, with temperatures around 57°F" """ - AI_WARNINGS = "ai.warnings" + GEN_AI_OPERATION_NAME = "gen_ai.operation.name" """ - Warning messages generated during model execution. - Example: ["Token limit exceeded"] + The name of the operation being performed. + Example: "chat" """ - DB_NAME = "db.name" + GEN_AI_RESPONSE_TEXT = "gen_ai.response.text" """ - The name of the database being accessed. For commands that switch the database, this should be set to the target database (even if the command fails). - Example: myDatabase + The model's response text messages. + Example: ["The weather in Paris is rainy and overcast, with temperatures around 57°F", "The weather in London is sunny and warm, with temperatures around 65°F"] """ - DB_USER = "db.user" + GEN_AI_RESPONSE_TOOL_CALLS = "gen_ai.response.tool_calls" """ - The name of the database user used for connecting to the database. - See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/database.md - Example: my_user + The tool calls in the model's response. + Example: [{"name": "get_weather", "arguments": {"location": "Paris"}}] """ - DB_OPERATION = "db.operation" + GEN_AI_REQUEST_AVAILABLE_TOOLS = "gen_ai.request.available_tools" """ - The name of the operation being executed, e.g. the MongoDB command name such as findAndModify, or the SQL keyword. - See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/database.md - Example: findAndModify, HMSET, SELECT + The available tools for the model. + Example: [{"name": "get_weather", "description": "Get the weather for a given location"}, {"name": "get_news", "description": "Get the news for a given topic"}] """ - DB_SYSTEM = "db.system" + GEN_AI_REQUEST_FREQUENCY_PENALTY = "gen_ai.request.frequency_penalty" """ - An identifier for the database management system (DBMS) product being used. - See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/database.md - Example: postgresql + The frequency penalty parameter used to reduce repetitiveness of generated tokens. + Example: 0.1 """ - DB_MONGODB_COLLECTION = "db.mongodb.collection" + GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens" """ - The MongoDB collection being accessed within the database. - See: https://github.com/open-telemetry/semantic-conventions/blob/main/docs/database/mongodb.md#attributes - Example: public.users; customers + The maximum number of tokens to generate in the response. + Example: 2048 """ - CACHE_HIT = "cache.hit" + GEN_AI_REQUEST_MESSAGES = "gen_ai.request.messages" """ - A boolean indicating whether the requested data was found in the cache. - Example: true + The messages passed to the model. The "content" can be a string or an array of objects. + Example: [{role: "system", "content: "Generate a random number."}, {"role": "user", "content": [{"text": "Generate a random number between 0 and 10.", "type": "text"}]}] """ - CACHE_ITEM_SIZE = "cache.item_size" + GEN_AI_REQUEST_MODEL = "gen_ai.request.model" """ - The size of the requested data in bytes. - Example: 58 + The model identifier being used for the request. + Example: "gpt-4-turbo-preview" """ - CACHE_KEY = "cache.key" + GEN_AI_REQUEST_PRESENCE_PENALTY = "gen_ai.request.presence_penalty" """ - The key of the requested data. - Example: template.cache.some_item.867da7e2af8e6b2f3aa7213a4080edb3 + The presence penalty parameter used to reduce repetitiveness of generated tokens. + Example: 0.1 """ - NETWORK_PEER_ADDRESS = "network.peer.address" + GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature" """ - Peer address of the network connection - IP address or Unix domain socket name. - Example: 10.1.2.80, /tmp/my.sock, localhost + The temperature parameter used to control randomness in the output. + Example: 0.7 """ - NETWORK_PEER_PORT = "network.peer.port" + GEN_AI_REQUEST_TOP_P = "gen_ai.request.top_p" """ - Peer port number of the network connection. - Example: 6379 + The top_p parameter used to control diversity via nucleus sampling. + Example: 1.0 """ - HTTP_QUERY = "http.query" + GEN_AI_SYSTEM = "gen_ai.system" """ - The Query string present in the URL. - Example: ?foo=bar&bar=baz + The name of the AI system being used. + Example: "openai" + """ + + GEN_AI_TOOL_DESCRIPTION = "gen_ai.tool.description" + """ + The description of the tool being used. + Example: "Searches the web for current information about a topic" + """ + + GEN_AI_TOOL_INPUT = "gen_ai.tool.input" + """ + The input of the tool being used. + Example: {"location": "Paris"} + """ + + GEN_AI_TOOL_NAME = "gen_ai.tool.name" + """ + The name of the tool being used. + Example: "web_search" + """ + + GEN_AI_TOOL_OUTPUT = "gen_ai.tool.output" + """ + The output of the tool being used. + Example: "rainy, 57°F" + """ + + GEN_AI_TOOL_TYPE = "gen_ai.tool.type" + """ + The type of tool being used. + Example: "function" + """ + + GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens" + """ + The number of tokens in the input. + Example: 150 + """ + + GEN_AI_USAGE_INPUT_TOKENS_CACHED = "gen_ai.usage.input_tokens.cached" + """ + The number of cached tokens in the input. + Example: 50 + """ + + GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens" + """ + The number of tokens in the output. + Example: 250 + """ + + GEN_AI_USAGE_OUTPUT_TOKENS_REASONING = "gen_ai.usage.output_tokens.reasoning" + """ + The number of tokens used for reasoning in the output. + Example: 75 + """ + + GEN_AI_USAGE_TOTAL_TOKENS = "gen_ai.usage.total_tokens" + """ + The total number of tokens used (input + output). + Example: 400 + """ + + GEN_AI_USER_MESSAGE = "gen_ai.user.message" + """ + The user message passed to the model. + Example: "What's the weather in Paris?" """ HTTP_FRAGMENT = "http.fragment" @@ -359,6 +516,12 @@ class SPANDATA: Example: GET """ + HTTP_QUERY = "http.query" + """ + The Query string present in the URL. + Example: ?foo=bar&bar=baz + """ + HTTP_STATUS_CODE = "http.response.status_code" """ The HTTP status code as an integer. @@ -376,14 +539,14 @@ class SPANDATA: The message's identifier. """ - MESSAGING_MESSAGE_RETRY_COUNT = "messaging.message.retry.count" + MESSAGING_MESSAGE_RECEIVE_LATENCY = "messaging.message.receive.latency" """ - Number of retries/attempts to process a message. + The latency between when the task was enqueued and when it was started to be processed. """ - MESSAGING_MESSAGE_RECEIVE_LATENCY = "messaging.message.receive.latency" + MESSAGING_MESSAGE_RETRY_COUNT = "messaging.message.retry.count" """ - The latency between when the task was enqueued and when it was started to be processed. + Number of retries/attempts to process a message. """ MESSAGING_SYSTEM = "messaging.system" @@ -391,6 +554,24 @@ class SPANDATA: The messaging system's name, e.g. `kafka`, `aws_sqs` """ + NETWORK_PEER_ADDRESS = "network.peer.address" + """ + Peer address of the network connection - IP address or Unix domain socket name. + Example: 10.1.2.80, /tmp/my.sock, localhost + """ + + NETWORK_PEER_PORT = "network.peer.port" + """ + Peer port number of the network connection. + Example: 6379 + """ + + PROFILER_ID = "profiler_id" + """ + Label identifying the profiler id that the span occurred in. This should be a string. + Example: "5249fbada8d5416482c2f6e47e337372" + """ + SERVER_ADDRESS = "server.address" """ Name of the database host. @@ -416,30 +597,6 @@ class SPANDATA: Example: 16456 """ - CODE_FILEPATH = "code.filepath" - """ - The source code file name that identifies the code unit as uniquely as possible (preferably an absolute file path). - Example: "/app/myapplication/http/handler/server.py" - """ - - CODE_LINENO = "code.lineno" - """ - The line number in `code.filepath` best representing the operation. It SHOULD point within the code unit named in `code.function`. - Example: 42 - """ - - CODE_FUNCTION = "code.function" - """ - The method or function name, or equivalent (usually rightmost part of the code unit's name). - Example: "server_request" - """ - - CODE_NAMESPACE = "code.namespace" - """ - The "namespace" within which `code.function` is defined. Usually the qualified class or module name, such that `code.namespace` + some separator + `code.function` form a unique identifier for the code unit. - Example: "http.handler" - """ - THREAD_ID = "thread.id" """ Identifier of a thread from where the span originated. This should be a string. @@ -452,12 +609,6 @@ class SPANDATA: Example: "MainThread" """ - PROFILER_ID = "profiler_id" - """ - Label identifying the profiler id that the span occurred in. This should be a string. - Example: "5249fbada8d5416482c2f6e47e337372" - """ - class SPANSTATUS: """ @@ -497,6 +648,10 @@ class OP: FUNCTION = "function" FUNCTION_AWS = "function.aws" FUNCTION_GCP = "function.gcp" + GEN_AI_CHAT = "gen_ai.chat" + GEN_AI_EXECUTE_TOOL = "gen_ai.execute_tool" + GEN_AI_HANDOFF = "gen_ai.handoff" + GEN_AI_INVOKE_AGENT = "gen_ai.invoke_agent" GRAPHQL_EXECUTE = "graphql.execute" GRAPHQL_MUTATION = "graphql.mutation" GRAPHQL_PARSE = "graphql.parse" diff --git a/sentry_sdk/integrations/__init__.py b/sentry_sdk/integrations/__init__.py index 118289950c..e2eadd523d 100644 --- a/sentry_sdk/integrations/__init__.py +++ b/sentry_sdk/integrations/__init__.py @@ -145,6 +145,7 @@ def iter_default_integrations(with_auto_enabling_integrations): "launchdarkly": (9, 8, 0), "loguru": (0, 7, 0), "openai": (1, 0, 0), + "openai_agents": (0, 0, 19), "openfeature": (0, 7, 1), "quart": (0, 16, 0), "ray": (2, 7, 0), diff --git a/sentry_sdk/integrations/openai_agents/__init__.py b/sentry_sdk/integrations/openai_agents/__init__.py new file mode 100644 index 0000000000..06b6459441 --- /dev/null +++ b/sentry_sdk/integrations/openai_agents/__init__.py @@ -0,0 +1,53 @@ +from sentry_sdk.integrations import DidNotEnable, Integration + +from .patches import ( + _create_get_model_wrapper, + _create_get_all_tools_wrapper, + _create_run_wrapper, + _patch_agent_run, +) + +try: + import agents + +except ImportError: + raise DidNotEnable("OpenAI Agents not installed") + + +def _patch_runner(): + # type: () -> None + # Create the root span for one full agent run (including eventual handoffs) + # Note agents.run.DEFAULT_AGENT_RUNNER.run_sync is a wrapper around + # agents.run.DEFAULT_AGENT_RUNNER.run. It does not need to be wrapped separately. + # TODO-anton: Also patch streaming runner: agents.Runner.run_streamed + agents.run.DEFAULT_AGENT_RUNNER.run = _create_run_wrapper( + agents.run.DEFAULT_AGENT_RUNNER.run + ) + + # Creating the actual spans for each agent run. + _patch_agent_run() + + +def _patch_model(): + # type: () -> None + agents.run.AgentRunner._get_model = classmethod( + _create_get_model_wrapper(agents.run.AgentRunner._get_model), + ) + + +def _patch_tools(): + # type: () -> None + agents.run.AgentRunner._get_all_tools = classmethod( + _create_get_all_tools_wrapper(agents.run.AgentRunner._get_all_tools), + ) + + +class OpenAIAgentsIntegration(Integration): + identifier = "openai_agents" + + @staticmethod + def setup_once(): + # type: () -> None + _patch_tools() + _patch_model() + _patch_runner() diff --git a/sentry_sdk/integrations/openai_agents/consts.py b/sentry_sdk/integrations/openai_agents/consts.py new file mode 100644 index 0000000000..f5de978be0 --- /dev/null +++ b/sentry_sdk/integrations/openai_agents/consts.py @@ -0,0 +1 @@ +SPAN_ORIGIN = "auto.ai.openai_agents" diff --git a/sentry_sdk/integrations/openai_agents/patches/__init__.py b/sentry_sdk/integrations/openai_agents/patches/__init__.py new file mode 100644 index 0000000000..06bb1711f8 --- /dev/null +++ b/sentry_sdk/integrations/openai_agents/patches/__init__.py @@ -0,0 +1,4 @@ +from .models import _create_get_model_wrapper # noqa: F401 +from .tools import _create_get_all_tools_wrapper # noqa: F401 +from .runner import _create_run_wrapper # noqa: F401 +from .agent_run import _patch_agent_run # noqa: F401 diff --git a/sentry_sdk/integrations/openai_agents/patches/agent_run.py b/sentry_sdk/integrations/openai_agents/patches/agent_run.py new file mode 100644 index 0000000000..084100878c --- /dev/null +++ b/sentry_sdk/integrations/openai_agents/patches/agent_run.py @@ -0,0 +1,143 @@ +from functools import wraps + +from sentry_sdk.integrations import DidNotEnable + +from ..spans import invoke_agent_span, update_invoke_agent_span, handoff_span + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any, Optional + + +try: + import agents +except ImportError: + raise DidNotEnable("OpenAI Agents not installed") + + +def _patch_agent_run(): + # type: () -> None + """ + Patches AgentRunner methods to create agent invocation spans. + This directly patches the execution flow to track when agents start and stop. + """ + + # Store original methods + original_run_single_turn = agents.run.AgentRunner._run_single_turn + original_execute_handoffs = agents._run_impl.RunImpl.execute_handoffs + original_execute_final_output = agents._run_impl.RunImpl.execute_final_output + + def _start_invoke_agent_span(context_wrapper, agent): + # type: (agents.RunContextWrapper, agents.Agent) -> None + """Start an agent invocation span""" + # Store the agent on the context wrapper so we can access it later + context_wrapper._sentry_current_agent = agent + invoke_agent_span(context_wrapper, agent) + + def _end_invoke_agent_span(context_wrapper, agent, output=None): + # type: (agents.RunContextWrapper, agents.Agent, Optional[Any]) -> None + """End the agent invocation span""" + # Clear the stored agent + if hasattr(context_wrapper, "_sentry_current_agent"): + delattr(context_wrapper, "_sentry_current_agent") + + update_invoke_agent_span(context_wrapper, agent, output) + + def _has_active_agent_span(context_wrapper): + # type: (agents.RunContextWrapper) -> bool + """Check if there's an active agent span for this context""" + return getattr(context_wrapper, "_sentry_current_agent", None) is not None + + def _get_current_agent(context_wrapper): + # type: (agents.RunContextWrapper) -> Optional[agents.Agent] + """Get the current agent from context wrapper""" + return getattr(context_wrapper, "_sentry_current_agent", None) + + @wraps( + original_run_single_turn.__func__ + if hasattr(original_run_single_turn, "__func__") + else original_run_single_turn + ) + async def patched_run_single_turn(cls, *args, **kwargs): + # type: (agents.Runner, *Any, **Any) -> Any + """Patched _run_single_turn that creates agent invocation spans""" + + agent = kwargs.get("agent") + context_wrapper = kwargs.get("context_wrapper") + should_run_agent_start_hooks = kwargs.get("should_run_agent_start_hooks") + + # Start agent span when agent starts (but only once per agent) + if should_run_agent_start_hooks and agent and context_wrapper: + # End any existing span for a different agent + if _has_active_agent_span(context_wrapper): + current_agent = _get_current_agent(context_wrapper) + if current_agent and current_agent != agent: + _end_invoke_agent_span(context_wrapper, current_agent) + + _start_invoke_agent_span(context_wrapper, agent) + + # Call original method with all the correct parameters + result = await original_run_single_turn(*args, **kwargs) + + return result + + @wraps( + original_execute_handoffs.__func__ + if hasattr(original_execute_handoffs, "__func__") + else original_execute_handoffs + ) + async def patched_execute_handoffs(cls, *args, **kwargs): + # type: (agents.Runner, *Any, **Any) -> Any + """Patched execute_handoffs that creates handoff spans and ends agent span for handoffs""" + + context_wrapper = kwargs.get("context_wrapper") + run_handoffs = kwargs.get("run_handoffs") + agent = kwargs.get("agent") + + # Create Sentry handoff span for the first handoff (agents library only processes the first one) + if run_handoffs: + first_handoff = run_handoffs[0] + handoff_agent_name = first_handoff.handoff.agent_name + handoff_span(context_wrapper, agent, handoff_agent_name) + + # Call original method with all parameters + try: + result = await original_execute_handoffs(*args, **kwargs) + + finally: + # End span for current agent after handoff processing is complete + if agent and context_wrapper and _has_active_agent_span(context_wrapper): + _end_invoke_agent_span(context_wrapper, agent) + + return result + + @wraps( + original_execute_final_output.__func__ + if hasattr(original_execute_final_output, "__func__") + else original_execute_final_output + ) + async def patched_execute_final_output(cls, *args, **kwargs): + # type: (agents.Runner, *Any, **Any) -> Any + """Patched execute_final_output that ends agent span for final outputs""" + + agent = kwargs.get("agent") + context_wrapper = kwargs.get("context_wrapper") + final_output = kwargs.get("final_output") + + # Call original method with all parameters + try: + result = await original_execute_final_output(*args, **kwargs) + finally: + # End span for current agent after final output processing is complete + if agent and context_wrapper and _has_active_agent_span(context_wrapper): + _end_invoke_agent_span(context_wrapper, agent, final_output) + + return result + + # Apply patches + agents.run.AgentRunner._run_single_turn = classmethod(patched_run_single_turn) + agents._run_impl.RunImpl.execute_handoffs = classmethod(patched_execute_handoffs) + agents._run_impl.RunImpl.execute_final_output = classmethod( + patched_execute_final_output + ) diff --git a/sentry_sdk/integrations/openai_agents/patches/models.py b/sentry_sdk/integrations/openai_agents/patches/models.py new file mode 100644 index 0000000000..e6f24da6a1 --- /dev/null +++ b/sentry_sdk/integrations/openai_agents/patches/models.py @@ -0,0 +1,50 @@ +from functools import wraps + +from sentry_sdk.integrations import DidNotEnable + +from ..spans import ai_client_span, update_ai_client_span + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any, Callable + + +try: + import agents +except ImportError: + raise DidNotEnable("OpenAI Agents not installed") + + +def _create_get_model_wrapper(original_get_model): + # type: (Callable[..., Any]) -> Callable[..., Any] + """ + Wraps the agents.Runner._get_model method to wrap the get_response method of the model to create a AI client span. + """ + + @wraps( + original_get_model.__func__ + if hasattr(original_get_model, "__func__") + else original_get_model + ) + def wrapped_get_model(cls, agent, run_config): + # type: (agents.Runner, agents.Agent, agents.RunConfig) -> agents.Model + + model = original_get_model(agent, run_config) + original_get_response = model.get_response + + @wraps(original_get_response) + async def wrapped_get_response(*args, **kwargs): + # type: (*Any, **Any) -> Any + with ai_client_span(agent, kwargs) as span: + result = await original_get_response(*args, **kwargs) + + update_ai_client_span(span, agent, kwargs, result) + + return result + + model.get_response = wrapped_get_response + + return model + + return wrapped_get_model diff --git a/sentry_sdk/integrations/openai_agents/patches/runner.py b/sentry_sdk/integrations/openai_agents/patches/runner.py new file mode 100644 index 0000000000..e1e9a3b50c --- /dev/null +++ b/sentry_sdk/integrations/openai_agents/patches/runner.py @@ -0,0 +1,42 @@ +from functools import wraps + +import sentry_sdk + +from ..spans import agent_workflow_span +from ..utils import _capture_exception + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any, Callable + + +def _create_run_wrapper(original_func): + # type: (Callable[..., Any]) -> Callable[..., Any] + """ + Wraps the agents.Runner.run methods to create a root span for the agent workflow runs. + + Note agents.Runner.run_sync() is a wrapper around agents.Runner.run(), + so it does not need to be wrapped separately. + """ + + @wraps(original_func) + async def wrapper(*args, **kwargs): + # type: (*Any, **Any) -> Any + agent = args[0] + with agent_workflow_span(agent): + result = None + try: + result = await original_func(*args, **kwargs) + return result + except Exception as exc: + _capture_exception(exc) + + # It could be that there is a "invoke agent" span still open + current_span = sentry_sdk.get_current_span() + if current_span is not None and current_span.timestamp is None: + current_span.__exit__(None, None, None) + + raise exc from None + + return wrapper diff --git a/sentry_sdk/integrations/openai_agents/patches/tools.py b/sentry_sdk/integrations/openai_agents/patches/tools.py new file mode 100644 index 0000000000..b359d32678 --- /dev/null +++ b/sentry_sdk/integrations/openai_agents/patches/tools.py @@ -0,0 +1,77 @@ +from functools import wraps + +from sentry_sdk.integrations import DidNotEnable + +from ..spans import execute_tool_span, update_execute_tool_span + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any, Callable + +try: + import agents +except ImportError: + raise DidNotEnable("OpenAI Agents not installed") + + +def _create_get_all_tools_wrapper(original_get_all_tools): + # type: (Callable[..., Any]) -> Callable[..., Any] + """ + Wraps the agents.Runner._get_all_tools method of the Runner class to wrap all function tools with Sentry instrumentation. + """ + + @wraps( + original_get_all_tools.__func__ + if hasattr(original_get_all_tools, "__func__") + else original_get_all_tools + ) + async def wrapped_get_all_tools(cls, agent, context_wrapper): + # type: (agents.Runner, agents.Agent, agents.RunContextWrapper) -> list[agents.Tool] + + # Get the original tools + tools = await original_get_all_tools(agent, context_wrapper) + + wrapped_tools = [] + for tool in tools: + # Wrap only the function tools (for now) + if tool.__class__.__name__ != "FunctionTool": + wrapped_tools.append(tool) + continue + + # Create a new FunctionTool with our wrapped invoke method + original_on_invoke = tool.on_invoke_tool + + def create_wrapped_invoke(current_tool, current_on_invoke): + # type: (agents.Tool, Callable[..., Any]) -> Callable[..., Any] + @wraps(current_on_invoke) + async def sentry_wrapped_on_invoke_tool(*args, **kwargs): + # type: (*Any, **Any) -> Any + with execute_tool_span(current_tool, *args, **kwargs) as span: + # We can not capture exceptions in tool execution here because + # `_on_invoke_tool` is swallowing the exception here: + # https://github.com/openai/openai-agents-python/blob/main/src/agents/tool.py#L409-L422 + # And because function_tool is a decorator with `default_tool_error_function` set as a default parameter + # I was unable to monkey patch it because those are evaluated at module import time + # and the SDK is too late to patch it. I was also unable to patch `_on_invoke_tool_impl` + # because it is nested inside this import time code. As if they made it hard to patch on purpose... + result = await current_on_invoke(*args, **kwargs) + update_execute_tool_span(span, agent, current_tool, result) + + return result + + return sentry_wrapped_on_invoke_tool + + wrapped_tool = agents.FunctionTool( + name=tool.name, + description=tool.description, + params_json_schema=tool.params_json_schema, + on_invoke_tool=create_wrapped_invoke(tool, original_on_invoke), + strict_json_schema=tool.strict_json_schema, + is_enabled=tool.is_enabled, + ) + wrapped_tools.append(wrapped_tool) + + return wrapped_tools + + return wrapped_get_all_tools diff --git a/sentry_sdk/integrations/openai_agents/spans/__init__.py b/sentry_sdk/integrations/openai_agents/spans/__init__.py new file mode 100644 index 0000000000..3bc453cafa --- /dev/null +++ b/sentry_sdk/integrations/openai_agents/spans/__init__.py @@ -0,0 +1,5 @@ +from .agent_workflow import agent_workflow_span # noqa: F401 +from .ai_client import ai_client_span, update_ai_client_span # noqa: F401 +from .execute_tool import execute_tool_span, update_execute_tool_span # noqa: F401 +from .handoff import handoff_span # noqa: F401 +from .invoke_agent import invoke_agent_span, update_invoke_agent_span # noqa: F401 diff --git a/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py b/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py new file mode 100644 index 0000000000..de2f28d41e --- /dev/null +++ b/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py @@ -0,0 +1,21 @@ +import sentry_sdk + +from ..consts import SPAN_ORIGIN +from ..utils import _get_start_span_function + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import agents + + +def agent_workflow_span(agent): + # type: (agents.Agent) -> sentry_sdk.tracing.Span + + # Create a transaction or a span if an transaction is already active + span = _get_start_span_function()( + name=f"{agent.name} workflow", + origin=SPAN_ORIGIN, + ) + + return span diff --git a/sentry_sdk/integrations/openai_agents/spans/ai_client.py b/sentry_sdk/integrations/openai_agents/spans/ai_client.py new file mode 100644 index 0000000000..30c5fd1dac --- /dev/null +++ b/sentry_sdk/integrations/openai_agents/spans/ai_client.py @@ -0,0 +1,38 @@ +import sentry_sdk +from sentry_sdk.consts import OP, SPANDATA + +from ..consts import SPAN_ORIGIN +from ..utils import ( + _set_agent_data, + _set_input_data, + _set_output_data, + _set_usage_data, +) + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from agents import Agent + from typing import Any + + +def ai_client_span(agent, get_response_kwargs): + # type: (Agent, dict[str, Any]) -> sentry_sdk.tracing.Span + # TODO-anton: implement other types of operations. Now "chat" is hardcoded. + span = sentry_sdk.start_span( + op=OP.GEN_AI_CHAT, + description=f"chat {agent.model}", + origin=SPAN_ORIGIN, + ) + # TODO-anton: remove hardcoded stuff and replace something that also works for embedding and so on + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "chat") + + return span + + +def update_ai_client_span(span, agent, get_response_kwargs, result): + # type: (sentry_sdk.tracing.Span, Agent, dict[str, Any], Any) -> None + _set_agent_data(span, agent) + _set_usage_data(span, result.usage) + _set_input_data(span, get_response_kwargs) + _set_output_data(span, result) diff --git a/sentry_sdk/integrations/openai_agents/spans/execute_tool.py b/sentry_sdk/integrations/openai_agents/spans/execute_tool.py new file mode 100644 index 0000000000..e6e880b64c --- /dev/null +++ b/sentry_sdk/integrations/openai_agents/spans/execute_tool.py @@ -0,0 +1,43 @@ +import sentry_sdk +from sentry_sdk.consts import OP, SPANDATA +from sentry_sdk.scope import should_send_default_pii + +from ..consts import SPAN_ORIGIN +from ..utils import _set_agent_data + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import agents + from typing import Any + + +def execute_tool_span(tool, *args, **kwargs): + # type: (agents.Tool, *Any, **Any) -> sentry_sdk.tracing.Span + span = sentry_sdk.start_span( + op=OP.GEN_AI_EXECUTE_TOOL, + name=f"execute_tool {tool.name}", + origin=SPAN_ORIGIN, + ) + + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "execute_tool") + + if tool.__class__.__name__ == "FunctionTool": + span.set_data(SPANDATA.GEN_AI_TOOL_TYPE, "function") + + span.set_data(SPANDATA.GEN_AI_TOOL_NAME, tool.name) + span.set_data(SPANDATA.GEN_AI_TOOL_DESCRIPTION, tool.description) + + if should_send_default_pii(): + input = args[1] + span.set_data(SPANDATA.GEN_AI_TOOL_INPUT, input) + + return span + + +def update_execute_tool_span(span, agent, tool, result): + # type: (sentry_sdk.tracing.Span, agents.Agent, agents.Tool, Any) -> None + _set_agent_data(span, agent) + + if should_send_default_pii(): + span.set_data(SPANDATA.GEN_AI_TOOL_OUTPUT, result) diff --git a/sentry_sdk/integrations/openai_agents/spans/handoff.py b/sentry_sdk/integrations/openai_agents/spans/handoff.py new file mode 100644 index 0000000000..78e6788c7d --- /dev/null +++ b/sentry_sdk/integrations/openai_agents/spans/handoff.py @@ -0,0 +1,19 @@ +import sentry_sdk +from sentry_sdk.consts import OP, SPANDATA + +from ..consts import SPAN_ORIGIN + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import agents + + +def handoff_span(context, from_agent, to_agent_name): + # type: (agents.RunContextWrapper, agents.Agent, str) -> None + with sentry_sdk.start_span( + op=OP.GEN_AI_HANDOFF, + name=f"handoff from {from_agent.name} to {to_agent_name}", + origin=SPAN_ORIGIN, + ) as span: + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "handoff") diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py new file mode 100644 index 0000000000..549ade1246 --- /dev/null +++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py @@ -0,0 +1,34 @@ +import sentry_sdk +from sentry_sdk.consts import OP, SPANDATA + +from ..consts import SPAN_ORIGIN +from ..utils import _set_agent_data + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import agents + from typing import Any + + +def invoke_agent_span(context, agent): + # type: (agents.RunContextWrapper, agents.Agent) -> sentry_sdk.tracing.Span + span = sentry_sdk.start_span( + op=OP.GEN_AI_INVOKE_AGENT, + name=f"invoke_agent {agent.name}", + origin=SPAN_ORIGIN, + ) + span.__enter__() + + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") + + _set_agent_data(span, agent) + + return span + + +def update_invoke_agent_span(context, agent, output): + # type: (agents.RunContextWrapper, agents.Agent, Any) -> None + current_span = sentry_sdk.get_current_span() + if current_span: + current_span.__exit__(None, None, None) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py new file mode 100644 index 0000000000..28dbd6bb75 --- /dev/null +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -0,0 +1,209 @@ +import json +import sentry_sdk +from sentry_sdk.consts import SPANDATA +from sentry_sdk.integrations import DidNotEnable +from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.utils import event_from_exception + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any + from typing import Callable + from typing import Union + from agents import Usage + +try: + import agents + +except ImportError: + raise DidNotEnable("OpenAI Agents not installed") + + +def _capture_exception(exc): + # type: (Any) -> None + event, hint = event_from_exception( + exc, + client_options=sentry_sdk.get_client().options, + mechanism={"type": "openai_agents", "handled": False}, + ) + sentry_sdk.capture_event(event, hint=hint) + + +def _get_start_span_function(): + # type: () -> Callable[..., Any] + current_span = sentry_sdk.get_current_span() + transaction_exists = ( + current_span is not None and current_span.containing_transaction == current_span + ) + return sentry_sdk.start_span if transaction_exists else sentry_sdk.start_transaction + + +def _set_agent_data(span, agent): + # type: (sentry_sdk.tracing.Span, agents.Agent) -> None + span.set_data( + SPANDATA.GEN_AI_SYSTEM, "openai" + ) # See footnote for https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/#gen-ai-system for explanation why. + + span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent.name) + + if agent.model_settings.max_tokens: + span.set_data( + SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, agent.model_settings.max_tokens + ) + + if agent.model: + span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, agent.model) + + if agent.model_settings.presence_penalty: + span.set_data( + SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, + agent.model_settings.presence_penalty, + ) + + if agent.model_settings.temperature: + span.set_data( + SPANDATA.GEN_AI_REQUEST_TEMPERATURE, agent.model_settings.temperature + ) + + if agent.model_settings.top_p: + span.set_data(SPANDATA.GEN_AI_REQUEST_TOP_P, agent.model_settings.top_p) + + if agent.model_settings.frequency_penalty: + span.set_data( + SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, + agent.model_settings.frequency_penalty, + ) + + if len(agent.tools) > 0: + span.set_data( + SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, + safe_serialize([vars(tool) for tool in agent.tools]), + ) + + +def _set_usage_data(span, usage): + # type: (sentry_sdk.tracing.Span, Usage) -> None + span.set_data(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS, usage.input_tokens) + span.set_data( + SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED, + usage.input_tokens_details.cached_tokens, + ) + span.set_data(SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS, usage.output_tokens) + span.set_data( + SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING, + usage.output_tokens_details.reasoning_tokens, + ) + span.set_data(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS, usage.total_tokens) + + +def _set_input_data(span, get_response_kwargs): + # type: (sentry_sdk.tracing.Span, dict[str, Any]) -> None + if not should_send_default_pii(): + return + + messages_by_role = { + "system": [], + "user": [], + "assistant": [], + "tool": [], + } # type: (dict[str, list[Any]]) + system_instructions = get_response_kwargs.get("system_instructions") + if system_instructions: + messages_by_role["system"].append({"type": "text", "text": system_instructions}) + + for message in get_response_kwargs.get("input", []): + if "role" in message: + messages_by_role[message.get("role")].append( + {"type": "text", "text": message.get("content")} + ) + else: + if message.get("type") == "function_call": + messages_by_role["assistant"].append(message) + elif message.get("type") == "function_call_output": + messages_by_role["tool"].append(message) + + request_messages = [] + for role, messages in messages_by_role.items(): + if len(messages) > 0: + request_messages.append({"role": role, "content": messages}) + + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, safe_serialize(request_messages)) + + +def _set_output_data(span, result): + # type: (sentry_sdk.tracing.Span, Any) -> None + if not should_send_default_pii(): + return + + output_messages = { + "response": [], + "tool": [], + } # type: (dict[str, list[Any]]) + + for output in result.output: + if output.type == "function_call": + output_messages["tool"].append(output.dict()) + elif output.type == "message": + for output_message in output.content: + try: + output_messages["response"].append(output_message.text) + except AttributeError: + # Unknown output message type, just return the json + output_messages["response"].append(output_message.dict()) + + if len(output_messages["tool"]) > 0: + span.set_data( + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, safe_serialize(output_messages["tool"]) + ) + + if len(output_messages["response"]) > 0: + span.set_data( + SPANDATA.GEN_AI_RESPONSE_TEXT, safe_serialize(output_messages["response"]) + ) + + +def safe_serialize(data): + # type: (Any) -> str + """Safely serialize to a readable string.""" + + def serialize_item(item): + # type: (Any) -> Union[str, dict[Any, Any], list[Any], tuple[Any, ...]] + if callable(item): + try: + module = getattr(item, "__module__", None) + qualname = getattr(item, "__qualname__", None) + name = getattr(item, "__name__", "anonymous") + + if module and qualname: + full_path = f"{module}.{qualname}" + elif module and name: + full_path = f"{module}.{name}" + else: + full_path = name + + return f"" + except Exception: + return f"" + elif isinstance(item, dict): + return {k: serialize_item(v) for k, v in item.items()} + elif isinstance(item, (list, tuple)): + return [serialize_item(x) for x in item] + elif hasattr(item, "__dict__"): + try: + attrs = { + k: serialize_item(v) + for k, v in vars(item).items() + if not k.startswith("_") + } + return f"<{type(item).__name__} {attrs}>" + except Exception: + return repr(item) + else: + return item + + try: + serialized = serialize_item(data) + return json.dumps(serialized, default=str) + except Exception: + return str(data) diff --git a/tests/integrations/openai_agents/__init__.py b/tests/integrations/openai_agents/__init__.py new file mode 100644 index 0000000000..6940e2bbbe --- /dev/null +++ b/tests/integrations/openai_agents/__init__.py @@ -0,0 +1,3 @@ +import pytest + +pytest.importorskip("agents") diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py new file mode 100644 index 0000000000..ec606c8806 --- /dev/null +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -0,0 +1,580 @@ +import re +import pytest +from unittest.mock import MagicMock, patch +import os + +from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration +from sentry_sdk.integrations.openai_agents.utils import safe_serialize + +import agents +from agents import ( + Agent, + ModelResponse, + Usage, + ModelSettings, +) +from agents.items import ( + ResponseOutputMessage, + ResponseOutputText, + ResponseFunctionToolCall, +) + +test_run_config = agents.RunConfig(tracing_disabled=True) + + +@pytest.fixture +def mock_usage(): + return Usage( + requests=1, + input_tokens=10, + output_tokens=20, + total_tokens=30, + input_tokens_details=MagicMock(cached_tokens=0), + output_tokens_details=MagicMock(reasoning_tokens=5), + ) + + +@pytest.fixture +def mock_model_response(mock_usage): + return ModelResponse( + output=[ + ResponseOutputMessage( + id="msg_123", + type="message", + status="completed", + content=[ + ResponseOutputText( + text="Hello, how can I help you?", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + usage=mock_usage, + response_id="resp_123", + ) + + +@pytest.fixture +def test_agent(): + """Create a real Agent instance for testing.""" + return Agent( + name="test_agent", + instructions="You are a helpful test assistant.", + model="gpt-4", + model_settings=ModelSettings( + max_tokens=100, + temperature=0.7, + top_p=1.0, + presence_penalty=0.0, + frequency_penalty=0.0, + ), + ) + + +@pytest.mark.asyncio +async def test_agent_invocation_span( + sentry_init, capture_events, test_agent, mock_model_response +): + """ + Test that the integration creates spans for agent invocations. + """ + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + mock_get_response.return_value = mock_model_response + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) + + events = capture_events() + + result = await agents.Runner.run( + test_agent, "Test input", run_config=test_run_config + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" + + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span, ai_client_span = spans + + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + assert invoke_agent_span["description"] == "invoke_agent test_agent" + assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" + assert invoke_agent_span["data"]["gen_ai.system"] == "openai" + assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["description"] == "chat gpt-4" + assert ai_client_span["data"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["data"]["gen_ai.system"] == "openai" + assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 + + +def test_agent_invocation_span_sync( + sentry_init, capture_events, test_agent, mock_model_response +): + """ + Test that the integration creates spans for agent invocations. + """ + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + mock_get_response.return_value = mock_model_response + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) + + events = capture_events() + + result = agents.Runner.run_sync( + test_agent, "Test input", run_config=test_run_config + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" + + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span, ai_client_span = spans + + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + assert invoke_agent_span["description"] == "invoke_agent test_agent" + assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" + assert invoke_agent_span["data"]["gen_ai.system"] == "openai" + assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["description"] == "chat gpt-4" + assert ai_client_span["data"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["data"]["gen_ai.system"] == "openai" + assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 + + +@pytest.mark.asyncio +async def test_handoff_span(sentry_init, capture_events, mock_usage): + """ + Test that handoff spans are created when agents hand off to other agents. + """ + # Create two simple agents with a handoff relationship + secondary_agent = agents.Agent( + name="secondary_agent", + instructions="You are a secondary agent.", + model="gpt-4o-mini", + ) + + primary_agent = agents.Agent( + name="primary_agent", + instructions="You are a primary agent that hands off to secondary agent.", + model="gpt-4o-mini", + handoffs=[secondary_agent], + ) + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + # Mock two responses: + # 1. Primary agent calls handoff tool + # 2. Secondary agent provides final response + handoff_response = ModelResponse( + output=[ + ResponseFunctionToolCall( + id="call_handoff_123", + call_id="call_handoff_123", + name="transfer_to_secondary_agent", + type="function_call", + arguments="{}", + function=MagicMock( + name="transfer_to_secondary_agent", arguments="{}" + ), + ) + ], + usage=mock_usage, + response_id="resp_handoff_123", + ) + + final_response = ModelResponse( + output=[ + ResponseOutputMessage( + id="msg_final", + type="message", + status="completed", + content=[ + ResponseOutputText( + text="I'm the specialist and I can help with that!", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + usage=mock_usage, + response_id="resp_final_123", + ) + + mock_get_response.side_effect = [handoff_response, final_response] + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) + + events = capture_events() + + result = await agents.Runner.run( + primary_agent, + "Please hand off to secondary agent", + run_config=test_run_config, + ) + + assert result is not None + + (transaction,) = events + spans = transaction["spans"] + handoff_span = spans[2] + + # Verify handoff span was created + assert handoff_span is not None + assert ( + handoff_span["description"] == "handoff from primary_agent to secondary_agent" + ) + assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" + + +@pytest.mark.asyncio +async def test_tool_execution_span(sentry_init, capture_events, test_agent): + """ + Test tool execution span creation. + """ + + @agents.function_tool + def simple_test_tool(message: str) -> str: + """A simple tool""" + return f"Tool executed with: {message}" + + # Create agent with the tool + agent_with_tool = test_agent.clone(tools=[simple_test_tool]) + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + # Create a mock response that includes tool calls + tool_call = ResponseFunctionToolCall( + id="call_123", + call_id="call_123", + name="simple_test_tool", + type="function_call", + arguments='{"message": "hello"}', + function=MagicMock( + name="simple_test_tool", arguments='{"message": "hello"}' + ), + ) + + # First response with tool call + tool_response = ModelResponse( + output=[tool_call], + usage=Usage( + requests=1, input_tokens=10, output_tokens=5, total_tokens=15 + ), + response_id="resp_tool_123", + ) + + # Second response with final answer + final_response = ModelResponse( + output=[ + ResponseOutputMessage( + id="msg_final", + type="message", + status="completed", + content=[ + ResponseOutputText( + text="Task completed using the tool", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + usage=Usage( + requests=1, input_tokens=15, output_tokens=10, total_tokens=25 + ), + response_id="resp_final_123", + ) + + # Return different responses on successive calls + mock_get_response.side_effect = [tool_response, final_response] + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + events = capture_events() + + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, + ) + + (transaction,) = events + spans = transaction["spans"] + ( + agent_span, + ai_client_span1, + tool_span, + ai_client_span2, + ) = spans + + available_tools = safe_serialize( + [ + { + "name": "simple_test_tool", + "description": "A simple tool", + "params_json_schema": { + "properties": {"message": {"title": "Message", "type": "string"}}, + "required": ["message"], + "title": "simple_test_tool_args", + "type": "object", + "additionalProperties": False, + }, + "on_invoke_tool": "._create_function_tool.._on_invoke_tool>", + "strict_json_schema": True, + "is_enabled": True, + } + ] + ) + + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + assert agent_span["description"] == "invoke_agent test_agent" + assert agent_span["origin"] == "auto.ai.openai_agents" + assert agent_span["data"]["gen_ai.agent.name"] == "test_agent" + assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" + assert agent_span["data"]["gen_ai.request.available_tools"] == available_tools + assert agent_span["data"]["gen_ai.request.max_tokens"] == 100 + assert agent_span["data"]["gen_ai.request.model"] == "gpt-4" + assert agent_span["data"]["gen_ai.request.temperature"] == 0.7 + assert agent_span["data"]["gen_ai.request.top_p"] == 1.0 + assert agent_span["data"]["gen_ai.system"] == "openai" + + assert ai_client_span1["description"] == "chat gpt-4" + assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat" + assert ai_client_span1["data"]["gen_ai.system"] == "openai" + assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span1["data"]["gen_ai.request.available_tools"] == available_tools + assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( + [ + { + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful test assistant."} + ], + }, + { + "role": "user", + "content": [ + {"type": "text", "text": "Please use the simple test tool"} + ], + }, + ] + ) + assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10 + assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5 + assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 + assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15 + assert re.sub( + r"SerializationIterator\(.*\)", + "NOT_CHECKED", + ai_client_span1["data"]["gen_ai.response.tool_calls"], + ) == safe_serialize( + [ + { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + "status": None, + "function": "NOT_CHECKED", + } + ] + ) + + assert tool_span["description"] == "execute_tool simple_test_tool" + assert tool_span["data"]["gen_ai.agent.name"] == "test_agent" + assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" + assert ( + re.sub( + "<.*>(,)", + r"'NOT_CHECKED'\1", + agent_span["data"]["gen_ai.request.available_tools"], + ) + == available_tools + ) + assert tool_span["data"]["gen_ai.request.max_tokens"] == 100 + assert tool_span["data"]["gen_ai.request.model"] == "gpt-4" + assert tool_span["data"]["gen_ai.request.temperature"] == 0.7 + assert tool_span["data"]["gen_ai.request.top_p"] == 1.0 + assert tool_span["data"]["gen_ai.system"] == "openai" + assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool" + assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}' + assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool" + assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello" + assert tool_span["data"]["gen_ai.tool.type"] == "function" + + assert ai_client_span2["description"] == "chat gpt-4" + assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat" + assert ( + re.sub( + "<.*>(,)", + r"'NOT_CHECKED'\1", + agent_span["data"]["gen_ai.request.available_tools"], + ) + == available_tools + ) + assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100 + assert re.sub( + r"SerializationIterator\(.*\)", + "NOT_CHECKED", + ai_client_span2["data"]["gen_ai.request.messages"], + ) == safe_serialize( + [ + { + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful test assistant."} + ], + }, + { + "role": "user", + "content": [ + {"type": "text", "text": "Please use the simple test tool"} + ], + }, + { + "role": "assistant", + "content": [ + { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + "function": "NOT_CHECKED", + } + ], + }, + { + "role": "tool", + "content": [ + { + "call_id": "call_123", + "output": "Tool executed with: hello", + "type": "function_call_output", + } + ], + }, + ] + ) + assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span2["data"]["gen_ai.response.text"] == safe_serialize( + ["Task completed using the tool"] + ) + assert ai_client_span2["data"]["gen_ai.system"] == "openai" + assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15 + assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0 + assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10 + assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25 + + +@pytest.mark.asyncio +async def test_error_handling(sentry_init, capture_events, test_agent): + """ + Test error handling in agent execution. + """ + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + mock_get_response.side_effect = Exception("Model Error") + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) + + events = capture_events() + + with pytest.raises(Exception, match="Model Error"): + await agents.Runner.run( + test_agent, "Test input", run_config=test_run_config + ) + + ( + error_event, + transaction, + ) = events + + assert error_event["exception"]["values"][0]["type"] == "Exception" + assert error_event["exception"]["values"][0]["value"] == "Model Error" + assert error_event["exception"]["values"][0]["mechanism"]["type"] == "openai_agents" + + spans = transaction["spans"] + (invoke_agent_span, ai_client_span) = spans + + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + assert invoke_agent_span["description"] == "invoke_agent test_agent" + assert invoke_agent_span["origin"] == "auto.ai.openai_agents" + + assert ai_client_span["description"] == "chat gpt-4" + assert ai_client_span["origin"] == "auto.ai.openai_agents" + assert ai_client_span["tags"]["status"] == "internal_error" diff --git a/tox.ini b/tox.ini index f4aee13d02..5c993718d7 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,7 @@ # The file (and all resulting CI YAMLs) then need to be regenerated via # "scripts/generate-test-files.sh". # -# Last generated: 2025-06-24T07:19:36.122984+00:00 +# Last generated: 2025-06-24T12:35:34.437673+00:00 [tox] requires = @@ -145,6 +145,8 @@ envlist = {py3.9,py3.11,py3.12}-cohere-v5.11.4 {py3.9,py3.11,py3.12}-cohere-v5.15.0 + {py3.9,py3.11,py3.12}-openai_agents-v0.0.19 + {py3.8,py3.10,py3.11}-huggingface_hub-v0.22.2 {py3.8,py3.11,py3.12}-huggingface_hub-v0.26.5 {py3.8,py3.12,py3.13}-huggingface_hub-v0.30.2 @@ -515,6 +517,9 @@ deps = cohere-v5.11.4: cohere==5.11.4 cohere-v5.15.0: cohere==5.15.0 + openai_agents-v0.0.19: openai-agents==0.0.19 + openai_agents: pytest-asyncio + huggingface_hub-v0.22.2: huggingface_hub==0.22.2 huggingface_hub-v0.26.5: huggingface_hub==0.26.5 huggingface_hub-v0.30.2: huggingface_hub==0.30.2 @@ -809,6 +814,7 @@ setenv = litestar: TESTPATH=tests/integrations/litestar loguru: TESTPATH=tests/integrations/loguru openai: TESTPATH=tests/integrations/openai + openai_agents: TESTPATH=tests/integrations/openai_agents openfeature: TESTPATH=tests/integrations/openfeature opentelemetry: TESTPATH=tests/integrations/opentelemetry potel: TESTPATH=tests/integrations/opentelemetry