diff --git a/.github/workflows/test-integrations-ai.yml b/.github/workflows/test-integrations-ai.yml
index 4aa0f36b77..e81d507d27 100644
--- a/.github/workflows/test-integrations-ai.yml
+++ b/.github/workflows/test-integrations-ai.yml
@@ -66,6 +66,10 @@ jobs:
         run: |
           set -x # print commands that are executed
           ./scripts/runtox.sh "py${{ matrix.python-version }}-openai-latest"
+      - name: Test openai_agents latest
+        run: |
+          set -x # print commands that are executed
+          ./scripts/runtox.sh "py${{ matrix.python-version }}-openai_agents-latest"
       - name: Test huggingface_hub latest
         run: |
           set -x # print commands that are executed
@@ -141,6 +145,10 @@ jobs:
         run: |
           set -x # print commands that are executed
           ./scripts/runtox.sh --exclude-latest "py${{ matrix.python-version }}-openai"
+      - name: Test openai_agents pinned
+        run: |
+          set -x # print commands that are executed
+          ./scripts/runtox.sh --exclude-latest "py${{ matrix.python-version }}-openai_agents"
       - name: Test huggingface_hub pinned
         run: |
           set -x # print commands that are executed
diff --git a/pyproject.toml b/pyproject.toml
index 5e16b30793..e5eae2c21f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -183,6 +183,10 @@ ignore_missing_imports = true
 module = "grpc.*"
 ignore_missing_imports = true
 
+[[tool.mypy.overrides]]
+module = "agents.*"
+ignore_missing_imports = true
+
 #
 # Tool: Flake8
 #
diff --git a/scripts/populate_tox/config.py b/scripts/populate_tox/config.py
index 4664845c7b..411d7fe666 100644
--- a/scripts/populate_tox/config.py
+++ b/scripts/populate_tox/config.py
@@ -139,6 +139,12 @@
     "loguru": {
         "package": "loguru",
     },
+    "openai_agents": {
+        "package": "openai-agents",
+        "deps": {
+            "*": ["pytest-asyncio"],
+        },
+    },
     "openfeature": {
         "package": "openfeature-sdk",
     },
diff --git a/scripts/populate_tox/tox.jinja b/scripts/populate_tox/tox.jinja
index f95a913fd9..ac14bdb02a 100644
--- a/scripts/populate_tox/tox.jinja
+++ b/scripts/populate_tox/tox.jinja
@@ -400,6 +400,7 @@ setenv =
     litestar: TESTPATH=tests/integrations/litestar
     loguru: TESTPATH=tests/integrations/loguru
     openai: TESTPATH=tests/integrations/openai
+    openai_agents: TESTPATH=tests/integrations/openai_agents
     openfeature: TESTPATH=tests/integrations/openfeature
     opentelemetry: TESTPATH=tests/integrations/opentelemetry
     potel: TESTPATH=tests/integrations/opentelemetry
diff --git a/scripts/split_tox_gh_actions/split_tox_gh_actions.py b/scripts/split_tox_gh_actions/split_tox_gh_actions.py
index 3fbc0ec1c5..af1ff84cd6 100755
--- a/scripts/split_tox_gh_actions/split_tox_gh_actions.py
+++ b/scripts/split_tox_gh_actions/split_tox_gh_actions.py
@@ -63,6 +63,7 @@
         "cohere",
         "langchain",
         "openai",
+        "openai_agents",
         "huggingface_hub",
     ],
     "Cloud": [
diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index 34ae5bdfd8..53148a36df 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -108,16 +108,39 @@ class SPANDATA:
     See: https://develop.sentry.dev/sdk/performance/span-data-conventions/
     """
 
+    AI_CITATIONS = "ai.citations"
+    """
+    References or sources cited by the AI model in its response.
+    Example: ["Smith et al. 2020", "Jones 2019"]
+    """
+
+    AI_DOCUMENTS = "ai.documents"
+    """
+    Documents or content chunks used as context for the AI model.
+    Example: ["doc1.txt", "doc2.pdf"]
+    """
+
+    AI_FINISH_REASON = "ai.finish_reason"
+    """
+    The reason why the model stopped generating.
+    Example: "length"
+    """
+
     AI_FREQUENCY_PENALTY = "ai.frequency_penalty"
     """
     Used to reduce repetitiveness of generated tokens.
     Example: 0.5
     """
 
-    AI_PRESENCE_PENALTY = "ai.presence_penalty"
+    AI_FUNCTION_CALL = "ai.function_call"
     """
-    Used to reduce repetitiveness of generated tokens.
-    Example: 0.5
+    For an AI model call, the function that was called. This is deprecated for OpenAI, and replaced by tool_calls
+    """
+
+    AI_GENERATION_ID = "ai.generation_id"
+    """
+    Unique identifier for the completion.
+    Example: "gen_123abc"
     """
 
     AI_INPUT_MESSAGES = "ai.input_messages"
@@ -126,10 +149,9 @@ class SPANDATA:
     Example: [{"role": "user", "message": "hello"}]
     """
 
-    AI_MODEL_ID = "ai.model_id"
+    AI_LOGIT_BIAS = "ai.logit_bias"
     """
-    The unique descriptor of the model being execugted
-    Example: gpt-4
+    For an AI model call, the logit bias
     """
 
     AI_METADATA = "ai.metadata"
@@ -138,28 +160,94 @@ class SPANDATA:
     Example: {"executed_function": "add_integers"}
     """
 
-    AI_TAGS = "ai.tags"
+    AI_MODEL_ID = "ai.model_id"
     """
-    Tags that describe an AI pipeline step.
-    Example: {"executed_function": "add_integers"}
+    The unique descriptor of the model being execugted
+    Example: gpt-4
+    """
+
+    AI_PIPELINE_NAME = "ai.pipeline.name"
+    """
+    Name of the AI pipeline or chain being executed.
+    Example: "qa-pipeline"
+    """
+
+    AI_PREAMBLE = "ai.preamble"
+    """
+    For an AI model call, the preamble parameter.
+    Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style.
+    Example: "You are now a clown."
+    """
+
+    AI_PRESENCE_PENALTY = "ai.presence_penalty"
+    """
+    Used to reduce repetitiveness of generated tokens.
+    Example: 0.5
+    """
+
+    AI_RAW_PROMPTING = "ai.raw_prompting"
+    """
+    Minimize pre-processing done to the prompt sent to the LLM.
+    Example: true
+    """
+
+    AI_RESPONSE_FORMAT = "ai.response_format"
+    """
+    For an AI model call, the format of the response
+    """
+
+    AI_RESPONSES = "ai.responses"
+    """
+    The responses to an AI model call. Always as a list.
+    Example: ["hello", "world"]
+    """
+
+    AI_SEARCH_QUERIES = "ai.search_queries"
+    """
+    Queries used to search for relevant context or documents.
+    Example: ["climate change effects", "renewable energy"]
+    """
+
+    AI_SEARCH_REQUIRED = "ai.is_search_required"
+    """
+    Boolean indicating if the model needs to perform a search.
+    Example: true
+    """
+
+    AI_SEARCH_RESULTS = "ai.search_results"
+    """
+    Results returned from search queries for context.
+    Example: ["Result 1", "Result 2"]
+    """
+
+    AI_SEED = "ai.seed"
+    """
+    The seed, ideally models given the same seed and same other parameters will produce the exact same output.
+    Example: 123.45
     """
 
     AI_STREAMING = "ai.streaming"
     """
-    Whether or not the AI model call's repsonse was streamed back asynchronously
+    Whether or not the AI model call's response was streamed back asynchronously
     Example: true
     """
 
+    AI_TAGS = "ai.tags"
+    """
+    Tags that describe an AI pipeline step.
+    Example: {"executed_function": "add_integers"}
+    """
+
     AI_TEMPERATURE = "ai.temperature"
     """
     For an AI model call, the temperature parameter. Temperature essentially means how random the output will be.
     Example: 0.5
     """
 
-    AI_TOP_P = "ai.top_p"
+    AI_TEXTS = "ai.texts"
     """
-    For an AI model call, the top_p parameter. Top_p essentially controls how random the output will be.
-    Example: 0.5
+    Raw text inputs provided to the model.
+    Example: ["What is machine learning?"]
     """
 
     AI_TOP_K = "ai.top_k"
@@ -168,9 +256,10 @@ class SPANDATA:
     Example: 35
     """
 
-    AI_FUNCTION_CALL = "ai.function_call"
+    AI_TOP_P = "ai.top_p"
     """
-    For an AI model call, the function that was called. This is deprecated for OpenAI, and replaced by tool_calls
+    For an AI model call, the top_p parameter. Top_p essentially controls how random the output will be.
+    Example: 0.5
     """
 
     AI_TOOL_CALLS = "ai.tool_calls"
@@ -183,168 +272,236 @@ class SPANDATA:
     For an AI model call, the functions that are available
     """
 
-    AI_RESPONSE_FORMAT = "ai.response_format"
+    AI_WARNINGS = "ai.warnings"
     """
-    For an AI model call, the format of the response
+    Warning messages generated during model execution.
+    Example: ["Token limit exceeded"]
     """
 
-    AI_LOGIT_BIAS = "ai.logit_bias"
+    CACHE_HIT = "cache.hit"
     """
-    For an AI model call, the logit bias
+    A boolean indicating whether the requested data was found in the cache.
+    Example: true
     """
 
-    AI_PREAMBLE = "ai.preamble"
+    CACHE_ITEM_SIZE = "cache.item_size"
     """
-    For an AI model call, the preamble parameter.
-    Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style.
-    Example: "You are now a clown."
+    The size of the requested data in bytes.
+    Example: 58
     """
 
-    AI_RAW_PROMPTING = "ai.raw_prompting"
+    CACHE_KEY = "cache.key"
     """
-    Minimize pre-processing done to the prompt sent to the LLM.
-    Example: true
+    The key of the requested data.
+    Example: template.cache.some_item.867da7e2af8e6b2f3aa7213a4080edb3
     """
-    AI_RESPONSES = "ai.responses"
+
+    CODE_FILEPATH = "code.filepath"
     """
-    The responses to an AI model call. Always as a list.
-    Example: ["hello", "world"]
+    The source code file name that identifies the code unit as uniquely as possible (preferably an absolute file path).
+    Example: "/app/myapplication/http/handler/server.py"
     """
 
-    AI_SEED = "ai.seed"
+    CODE_FUNCTION = "code.function"
     """
-    The seed, ideally models given the same seed and same other parameters will produce the exact same output.
-    Example: 123.45
+    The method or function name, or equivalent (usually rightmost part of the code unit's name).
+    Example: "server_request"
     """
 
-    AI_CITATIONS = "ai.citations"
+    CODE_LINENO = "code.lineno"
     """
-    References or sources cited by the AI model in its response.
-    Example: ["Smith et al. 2020", "Jones 2019"]
+    The line number in `code.filepath` best representing the operation. It SHOULD point within the code unit named in `code.function`.
+    Example: 42
     """
 
-    AI_DOCUMENTS = "ai.documents"
+    CODE_NAMESPACE = "code.namespace"
     """
-    Documents or content chunks used as context for the AI model.
-    Example: ["doc1.txt", "doc2.pdf"]
+    The "namespace" within which `code.function` is defined. Usually the qualified class or module name, such that `code.namespace` + some separator + `code.function` form a unique identifier for the code unit.
+    Example: "http.handler"
     """
 
-    AI_SEARCH_QUERIES = "ai.search_queries"
+    DB_MONGODB_COLLECTION = "db.mongodb.collection"
     """
-    Queries used to search for relevant context or documents.
-    Example: ["climate change effects", "renewable energy"]
+    The MongoDB collection being accessed within the database.
+    See: https://github.com/open-telemetry/semantic-conventions/blob/main/docs/database/mongodb.md#attributes
+    Example: public.users; customers
     """
 
-    AI_SEARCH_RESULTS = "ai.search_results"
+    DB_NAME = "db.name"
     """
-    Results returned from search queries for context.
-    Example: ["Result 1", "Result 2"]
+    The name of the database being accessed. For commands that switch the database, this should be set to the target database (even if the command fails).
+    Example: myDatabase
     """
 
-    AI_GENERATION_ID = "ai.generation_id"
+    DB_OPERATION = "db.operation"
     """
-    Unique identifier for the completion.
-    Example: "gen_123abc"
+    The name of the operation being executed, e.g. the MongoDB command name such as findAndModify, or the SQL keyword.
+    See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/database.md
+    Example: findAndModify, HMSET, SELECT
     """
 
-    AI_SEARCH_REQUIRED = "ai.is_search_required"
+    DB_SYSTEM = "db.system"
     """
-    Boolean indicating if the model needs to perform a search.
-    Example: true
+    An identifier for the database management system (DBMS) product being used.
+    See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/database.md
+    Example: postgresql
     """
 
-    AI_FINISH_REASON = "ai.finish_reason"
+    DB_USER = "db.user"
     """
-    The reason why the model stopped generating.
-    Example: "length"
+    The name of the database user used for connecting to the database.
+    See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/database.md
+    Example: my_user
     """
 
-    AI_PIPELINE_NAME = "ai.pipeline.name"
+    GEN_AI_AGENT_NAME = "gen_ai.agent.name"
     """
-    Name of the AI pipeline or chain being executed.
-    Example: "qa-pipeline"
+    The name of the agent being used.
+    Example: "ResearchAssistant"
     """
 
-    AI_TEXTS = "ai.texts"
+    GEN_AI_CHOICE = "gen_ai.choice"
     """
-    Raw text inputs provided to the model.
-    Example: ["What is machine learning?"]
+    The model's response message.
+    Example: "The weather in Paris is rainy and overcast, with temperatures around 57°F"
     """
 
-    AI_WARNINGS = "ai.warnings"
+    GEN_AI_OPERATION_NAME = "gen_ai.operation.name"
     """
-    Warning messages generated during model execution.
-    Example: ["Token limit exceeded"]
+    The name of the operation being performed.
+    Example: "chat"
     """
 
-    DB_NAME = "db.name"
+    GEN_AI_RESPONSE_TEXT = "gen_ai.response.text"
     """
-    The name of the database being accessed. For commands that switch the database, this should be set to the target database (even if the command fails).
-    Example: myDatabase
+    The model's response text messages.
+    Example: ["The weather in Paris is rainy and overcast, with temperatures around 57°F", "The weather in London is sunny and warm, with temperatures around 65°F"]
     """
 
-    DB_USER = "db.user"
+    GEN_AI_RESPONSE_TOOL_CALLS = "gen_ai.response.tool_calls"
     """
-    The name of the database user used for connecting to the database.
-    See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/database.md
-    Example: my_user
+    The tool calls in the model's response.
+    Example: [{"name": "get_weather", "arguments": {"location": "Paris"}}]
     """
 
-    DB_OPERATION = "db.operation"
+    GEN_AI_REQUEST_AVAILABLE_TOOLS = "gen_ai.request.available_tools"
     """
-    The name of the operation being executed, e.g. the MongoDB command name such as findAndModify, or the SQL keyword.
-    See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/database.md
-    Example: findAndModify, HMSET, SELECT
+    The available tools for the model.
+    Example: [{"name": "get_weather", "description": "Get the weather for a given location"}, {"name": "get_news", "description": "Get the news for a given topic"}]
     """
 
-    DB_SYSTEM = "db.system"
+    GEN_AI_REQUEST_FREQUENCY_PENALTY = "gen_ai.request.frequency_penalty"
     """
-    An identifier for the database management system (DBMS) product being used.
-    See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/database.md
-    Example: postgresql
+    The frequency penalty parameter used to reduce repetitiveness of generated tokens.
+    Example: 0.1
     """
 
-    DB_MONGODB_COLLECTION = "db.mongodb.collection"
+    GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
     """
-    The MongoDB collection being accessed within the database.
-    See: https://github.com/open-telemetry/semantic-conventions/blob/main/docs/database/mongodb.md#attributes
-    Example: public.users; customers
+    The maximum number of tokens to generate in the response.
+    Example: 2048
     """
 
-    CACHE_HIT = "cache.hit"
+    GEN_AI_REQUEST_MESSAGES = "gen_ai.request.messages"
     """
-    A boolean indicating whether the requested data was found in the cache.
-    Example: true
+    The messages passed to the model. The "content" can be a string or an array of objects.
+    Example: [{role: "system", "content: "Generate a random number."}, {"role": "user", "content": [{"text": "Generate a random number between 0 and 10.", "type": "text"}]}]
     """
 
-    CACHE_ITEM_SIZE = "cache.item_size"
+    GEN_AI_REQUEST_MODEL = "gen_ai.request.model"
     """
-    The size of the requested data in bytes.
-    Example: 58
+    The model identifier being used for the request.
+    Example: "gpt-4-turbo-preview"
     """
 
-    CACHE_KEY = "cache.key"
+    GEN_AI_REQUEST_PRESENCE_PENALTY = "gen_ai.request.presence_penalty"
     """
-    The key of the requested data.
-    Example: template.cache.some_item.867da7e2af8e6b2f3aa7213a4080edb3
+    The presence penalty parameter used to reduce repetitiveness of generated tokens.
+    Example: 0.1
     """
 
-    NETWORK_PEER_ADDRESS = "network.peer.address"
+    GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
     """
-    Peer address of the network connection - IP address or Unix domain socket name.
-    Example: 10.1.2.80, /tmp/my.sock, localhost
+    The temperature parameter used to control randomness in the output.
+    Example: 0.7
     """
 
-    NETWORK_PEER_PORT = "network.peer.port"
+    GEN_AI_REQUEST_TOP_P = "gen_ai.request.top_p"
     """
-    Peer port number of the network connection.
-    Example: 6379
+    The top_p parameter used to control diversity via nucleus sampling.
+    Example: 1.0
     """
 
-    HTTP_QUERY = "http.query"
+    GEN_AI_SYSTEM = "gen_ai.system"
     """
-    The Query string present in the URL.
-    Example: ?foo=bar&bar=baz
+    The name of the AI system being used.
+    Example: "openai"
+    """
+
+    GEN_AI_TOOL_DESCRIPTION = "gen_ai.tool.description"
+    """
+    The description of the tool being used.
+    Example: "Searches the web for current information about a topic"
+    """
+
+    GEN_AI_TOOL_INPUT = "gen_ai.tool.input"
+    """
+    The input of the tool being used.
+    Example: {"location": "Paris"}
+    """
+
+    GEN_AI_TOOL_NAME = "gen_ai.tool.name"
+    """
+    The name of the tool being used.
+    Example: "web_search"
+    """
+
+    GEN_AI_TOOL_OUTPUT = "gen_ai.tool.output"
+    """
+    The output of the tool being used.
+    Example: "rainy, 57°F"
+    """
+
+    GEN_AI_TOOL_TYPE = "gen_ai.tool.type"
+    """
+    The type of tool being used.
+    Example: "function"
+    """
+
+    GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
+    """
+    The number of tokens in the input.
+    Example: 150
+    """
+
+    GEN_AI_USAGE_INPUT_TOKENS_CACHED = "gen_ai.usage.input_tokens.cached"
+    """
+    The number of cached tokens in the input.
+    Example: 50
+    """
+
+    GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
+    """
+    The number of tokens in the output.
+    Example: 250
+    """
+
+    GEN_AI_USAGE_OUTPUT_TOKENS_REASONING = "gen_ai.usage.output_tokens.reasoning"
+    """
+    The number of tokens used for reasoning in the output.
+    Example: 75
+    """
+
+    GEN_AI_USAGE_TOTAL_TOKENS = "gen_ai.usage.total_tokens"
+    """
+    The total number of tokens used (input + output).
+    Example: 400
+    """
+
+    GEN_AI_USER_MESSAGE = "gen_ai.user.message"
+    """
+    The user message passed to the model.
+    Example: "What's the weather in Paris?"
     """
 
     HTTP_FRAGMENT = "http.fragment"
@@ -359,6 +516,12 @@ class SPANDATA:
     Example: GET
     """
 
+    HTTP_QUERY = "http.query"
+    """
+    The Query string present in the URL.
+    Example: ?foo=bar&bar=baz
+    """
+
     HTTP_STATUS_CODE = "http.response.status_code"
     """
     The HTTP status code as an integer.
@@ -376,14 +539,14 @@ class SPANDATA:
     The message's identifier.
     """
 
-    MESSAGING_MESSAGE_RETRY_COUNT = "messaging.message.retry.count"
+    MESSAGING_MESSAGE_RECEIVE_LATENCY = "messaging.message.receive.latency"
     """
-    Number of retries/attempts to process a message.
+    The latency between when the task was enqueued and when it was started to be processed.
     """
 
-    MESSAGING_MESSAGE_RECEIVE_LATENCY = "messaging.message.receive.latency"
+    MESSAGING_MESSAGE_RETRY_COUNT = "messaging.message.retry.count"
     """
-    The latency between when the task was enqueued and when it was started to be processed.
+    Number of retries/attempts to process a message.
     """
 
     MESSAGING_SYSTEM = "messaging.system"
@@ -391,6 +554,24 @@ class SPANDATA:
     The messaging system's name, e.g. `kafka`, `aws_sqs`
     """
 
+    NETWORK_PEER_ADDRESS = "network.peer.address"
+    """
+    Peer address of the network connection - IP address or Unix domain socket name.
+    Example: 10.1.2.80, /tmp/my.sock, localhost
+    """
+
+    NETWORK_PEER_PORT = "network.peer.port"
+    """
+    Peer port number of the network connection.
+    Example: 6379
+    """
+
+    PROFILER_ID = "profiler_id"
+    """
+    Label identifying the profiler id that the span occurred in. This should be a string.
+    Example: "5249fbada8d5416482c2f6e47e337372"
+    """
+
     SERVER_ADDRESS = "server.address"
     """
     Name of the database host.
@@ -416,30 +597,6 @@ class SPANDATA:
     Example: 16456
     """
 
-    CODE_FILEPATH = "code.filepath"
-    """
-    The source code file name that identifies the code unit as uniquely as possible (preferably an absolute file path).
-    Example: "/app/myapplication/http/handler/server.py"
-    """
-
-    CODE_LINENO = "code.lineno"
-    """
-    The line number in `code.filepath` best representing the operation. It SHOULD point within the code unit named in `code.function`.
-    Example: 42
-    """
-
-    CODE_FUNCTION = "code.function"
-    """
-    The method or function name, or equivalent (usually rightmost part of the code unit's name).
-    Example: "server_request"
-    """
-
-    CODE_NAMESPACE = "code.namespace"
-    """
-    The "namespace" within which `code.function` is defined. Usually the qualified class or module name, such that `code.namespace` + some separator + `code.function` form a unique identifier for the code unit.
-    Example: "http.handler"
-    """
-
     THREAD_ID = "thread.id"
     """
     Identifier of a thread from where the span originated. This should be a string.
@@ -452,12 +609,6 @@ class SPANDATA:
     Example: "MainThread"
     """
 
-    PROFILER_ID = "profiler_id"
-    """
-    Label identifying the profiler id that the span occurred in. This should be a string.
-    Example: "5249fbada8d5416482c2f6e47e337372"
-    """
-
 
 class SPANSTATUS:
     """
@@ -497,6 +648,10 @@ class OP:
     FUNCTION = "function"
     FUNCTION_AWS = "function.aws"
     FUNCTION_GCP = "function.gcp"
+    GEN_AI_CHAT = "gen_ai.chat"
+    GEN_AI_EXECUTE_TOOL = "gen_ai.execute_tool"
+    GEN_AI_HANDOFF = "gen_ai.handoff"
+    GEN_AI_INVOKE_AGENT = "gen_ai.invoke_agent"
     GRAPHQL_EXECUTE = "graphql.execute"
     GRAPHQL_MUTATION = "graphql.mutation"
     GRAPHQL_PARSE = "graphql.parse"
diff --git a/sentry_sdk/integrations/__init__.py b/sentry_sdk/integrations/__init__.py
index 118289950c..e2eadd523d 100644
--- a/sentry_sdk/integrations/__init__.py
+++ b/sentry_sdk/integrations/__init__.py
@@ -145,6 +145,7 @@ def iter_default_integrations(with_auto_enabling_integrations):
     "launchdarkly": (9, 8, 0),
     "loguru": (0, 7, 0),
     "openai": (1, 0, 0),
+    "openai_agents": (0, 0, 19),
     "openfeature": (0, 7, 1),
     "quart": (0, 16, 0),
     "ray": (2, 7, 0),
diff --git a/sentry_sdk/integrations/openai_agents/__init__.py b/sentry_sdk/integrations/openai_agents/__init__.py
new file mode 100644
index 0000000000..06b6459441
--- /dev/null
+++ b/sentry_sdk/integrations/openai_agents/__init__.py
@@ -0,0 +1,53 @@
+from sentry_sdk.integrations import DidNotEnable, Integration
+
+from .patches import (
+    _create_get_model_wrapper,
+    _create_get_all_tools_wrapper,
+    _create_run_wrapper,
+    _patch_agent_run,
+)
+
+try:
+    import agents
+
+except ImportError:
+    raise DidNotEnable("OpenAI Agents not installed")
+
+
+def _patch_runner():
+    # type: () -> None
+    # Create the root span for one full agent run (including eventual handoffs)
+    # Note agents.run.DEFAULT_AGENT_RUNNER.run_sync is a wrapper around
+    # agents.run.DEFAULT_AGENT_RUNNER.run. It does not need to be wrapped separately.
+    # TODO-anton: Also patch streaming runner: agents.Runner.run_streamed
+    agents.run.DEFAULT_AGENT_RUNNER.run = _create_run_wrapper(
+        agents.run.DEFAULT_AGENT_RUNNER.run
+    )
+
+    # Creating the actual spans for each agent run.
+    _patch_agent_run()
+
+
+def _patch_model():
+    # type: () -> None
+    agents.run.AgentRunner._get_model = classmethod(
+        _create_get_model_wrapper(agents.run.AgentRunner._get_model),
+    )
+
+
+def _patch_tools():
+    # type: () -> None
+    agents.run.AgentRunner._get_all_tools = classmethod(
+        _create_get_all_tools_wrapper(agents.run.AgentRunner._get_all_tools),
+    )
+
+
+class OpenAIAgentsIntegration(Integration):
+    identifier = "openai_agents"
+
+    @staticmethod
+    def setup_once():
+        # type: () -> None
+        _patch_tools()
+        _patch_model()
+        _patch_runner()
diff --git a/sentry_sdk/integrations/openai_agents/consts.py b/sentry_sdk/integrations/openai_agents/consts.py
new file mode 100644
index 0000000000..f5de978be0
--- /dev/null
+++ b/sentry_sdk/integrations/openai_agents/consts.py
@@ -0,0 +1 @@
+SPAN_ORIGIN = "auto.ai.openai_agents"
diff --git a/sentry_sdk/integrations/openai_agents/patches/__init__.py b/sentry_sdk/integrations/openai_agents/patches/__init__.py
new file mode 100644
index 0000000000..06bb1711f8
--- /dev/null
+++ b/sentry_sdk/integrations/openai_agents/patches/__init__.py
@@ -0,0 +1,4 @@
+from .models import _create_get_model_wrapper  # noqa: F401
+from .tools import _create_get_all_tools_wrapper  # noqa: F401
+from .runner import _create_run_wrapper  # noqa: F401
+from .agent_run import _patch_agent_run  # noqa: F401
diff --git a/sentry_sdk/integrations/openai_agents/patches/agent_run.py b/sentry_sdk/integrations/openai_agents/patches/agent_run.py
new file mode 100644
index 0000000000..084100878c
--- /dev/null
+++ b/sentry_sdk/integrations/openai_agents/patches/agent_run.py
@@ -0,0 +1,143 @@
+from functools import wraps
+
+from sentry_sdk.integrations import DidNotEnable
+
+from ..spans import invoke_agent_span, update_invoke_agent_span, handoff_span
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Any, Optional
+
+
+try:
+    import agents
+except ImportError:
+    raise DidNotEnable("OpenAI Agents not installed")
+
+
+def _patch_agent_run():
+    # type: () -> None
+    """
+    Patches AgentRunner methods to create agent invocation spans.
+    This directly patches the execution flow to track when agents start and stop.
+    """
+
+    # Store original methods
+    original_run_single_turn = agents.run.AgentRunner._run_single_turn
+    original_execute_handoffs = agents._run_impl.RunImpl.execute_handoffs
+    original_execute_final_output = agents._run_impl.RunImpl.execute_final_output
+
+    def _start_invoke_agent_span(context_wrapper, agent):
+        # type: (agents.RunContextWrapper, agents.Agent) -> None
+        """Start an agent invocation span"""
+        # Store the agent on the context wrapper so we can access it later
+        context_wrapper._sentry_current_agent = agent
+        invoke_agent_span(context_wrapper, agent)
+
+    def _end_invoke_agent_span(context_wrapper, agent, output=None):
+        # type: (agents.RunContextWrapper, agents.Agent, Optional[Any]) -> None
+        """End the agent invocation span"""
+        # Clear the stored agent
+        if hasattr(context_wrapper, "_sentry_current_agent"):
+            delattr(context_wrapper, "_sentry_current_agent")
+
+        update_invoke_agent_span(context_wrapper, agent, output)
+
+    def _has_active_agent_span(context_wrapper):
+        # type: (agents.RunContextWrapper) -> bool
+        """Check if there's an active agent span for this context"""
+        return getattr(context_wrapper, "_sentry_current_agent", None) is not None
+
+    def _get_current_agent(context_wrapper):
+        # type: (agents.RunContextWrapper) -> Optional[agents.Agent]
+        """Get the current agent from context wrapper"""
+        return getattr(context_wrapper, "_sentry_current_agent", None)
+
+    @wraps(
+        original_run_single_turn.__func__
+        if hasattr(original_run_single_turn, "__func__")
+        else original_run_single_turn
+    )
+    async def patched_run_single_turn(cls, *args, **kwargs):
+        # type: (agents.Runner, *Any, **Any) -> Any
+        """Patched _run_single_turn that creates agent invocation spans"""
+
+        agent = kwargs.get("agent")
+        context_wrapper = kwargs.get("context_wrapper")
+        should_run_agent_start_hooks = kwargs.get("should_run_agent_start_hooks")
+
+        # Start agent span when agent starts (but only once per agent)
+        if should_run_agent_start_hooks and agent and context_wrapper:
+            # End any existing span for a different agent
+            if _has_active_agent_span(context_wrapper):
+                current_agent = _get_current_agent(context_wrapper)
+                if current_agent and current_agent != agent:
+                    _end_invoke_agent_span(context_wrapper, current_agent)
+
+            _start_invoke_agent_span(context_wrapper, agent)
+
+        # Call original method with all the correct parameters
+        result = await original_run_single_turn(*args, **kwargs)
+
+        return result
+
+    @wraps(
+        original_execute_handoffs.__func__
+        if hasattr(original_execute_handoffs, "__func__")
+        else original_execute_handoffs
+    )
+    async def patched_execute_handoffs(cls, *args, **kwargs):
+        # type: (agents.Runner, *Any, **Any) -> Any
+        """Patched execute_handoffs that creates handoff spans and ends agent span for handoffs"""
+
+        context_wrapper = kwargs.get("context_wrapper")
+        run_handoffs = kwargs.get("run_handoffs")
+        agent = kwargs.get("agent")
+
+        # Create Sentry handoff span for the first handoff (agents library only processes the first one)
+        if run_handoffs:
+            first_handoff = run_handoffs[0]
+            handoff_agent_name = first_handoff.handoff.agent_name
+            handoff_span(context_wrapper, agent, handoff_agent_name)
+
+        # Call original method with all parameters
+        try:
+            result = await original_execute_handoffs(*args, **kwargs)
+
+        finally:
+            # End span for current agent after handoff processing is complete
+            if agent and context_wrapper and _has_active_agent_span(context_wrapper):
+                _end_invoke_agent_span(context_wrapper, agent)
+
+        return result
+
+    @wraps(
+        original_execute_final_output.__func__
+        if hasattr(original_execute_final_output, "__func__")
+        else original_execute_final_output
+    )
+    async def patched_execute_final_output(cls, *args, **kwargs):
+        # type: (agents.Runner, *Any, **Any) -> Any
+        """Patched execute_final_output that ends agent span for final outputs"""
+
+        agent = kwargs.get("agent")
+        context_wrapper = kwargs.get("context_wrapper")
+        final_output = kwargs.get("final_output")
+
+        # Call original method with all parameters
+        try:
+            result = await original_execute_final_output(*args, **kwargs)
+        finally:
+            # End span for current agent after final output processing is complete
+            if agent and context_wrapper and _has_active_agent_span(context_wrapper):
+                _end_invoke_agent_span(context_wrapper, agent, final_output)
+
+        return result
+
+    # Apply patches
+    agents.run.AgentRunner._run_single_turn = classmethod(patched_run_single_turn)
+    agents._run_impl.RunImpl.execute_handoffs = classmethod(patched_execute_handoffs)
+    agents._run_impl.RunImpl.execute_final_output = classmethod(
+        patched_execute_final_output
+    )
diff --git a/sentry_sdk/integrations/openai_agents/patches/models.py b/sentry_sdk/integrations/openai_agents/patches/models.py
new file mode 100644
index 0000000000..e6f24da6a1
--- /dev/null
+++ b/sentry_sdk/integrations/openai_agents/patches/models.py
@@ -0,0 +1,50 @@
+from functools import wraps
+
+from sentry_sdk.integrations import DidNotEnable
+
+from ..spans import ai_client_span, update_ai_client_span
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Any, Callable
+
+
+try:
+    import agents
+except ImportError:
+    raise DidNotEnable("OpenAI Agents not installed")
+
+
+def _create_get_model_wrapper(original_get_model):
+    # type: (Callable[..., Any]) -> Callable[..., Any]
+    """
+    Wraps the agents.Runner._get_model method to wrap the get_response method of the model to create a AI client span.
+    """
+
+    @wraps(
+        original_get_model.__func__
+        if hasattr(original_get_model, "__func__")
+        else original_get_model
+    )
+    def wrapped_get_model(cls, agent, run_config):
+        # type: (agents.Runner, agents.Agent, agents.RunConfig) -> agents.Model
+
+        model = original_get_model(agent, run_config)
+        original_get_response = model.get_response
+
+        @wraps(original_get_response)
+        async def wrapped_get_response(*args, **kwargs):
+            # type: (*Any, **Any) -> Any
+            with ai_client_span(agent, kwargs) as span:
+                result = await original_get_response(*args, **kwargs)
+
+                update_ai_client_span(span, agent, kwargs, result)
+
+            return result
+
+        model.get_response = wrapped_get_response
+
+        return model
+
+    return wrapped_get_model
diff --git a/sentry_sdk/integrations/openai_agents/patches/runner.py b/sentry_sdk/integrations/openai_agents/patches/runner.py
new file mode 100644
index 0000000000..e1e9a3b50c
--- /dev/null
+++ b/sentry_sdk/integrations/openai_agents/patches/runner.py
@@ -0,0 +1,42 @@
+from functools import wraps
+
+import sentry_sdk
+
+from ..spans import agent_workflow_span
+from ..utils import _capture_exception
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Any, Callable
+
+
+def _create_run_wrapper(original_func):
+    # type: (Callable[..., Any]) -> Callable[..., Any]
+    """
+    Wraps the agents.Runner.run methods to create a root span for the agent workflow runs.
+
+    Note agents.Runner.run_sync() is a wrapper around agents.Runner.run(),
+    so it does not need to be wrapped separately.
+    """
+
+    @wraps(original_func)
+    async def wrapper(*args, **kwargs):
+        # type: (*Any, **Any) -> Any
+        agent = args[0]
+        with agent_workflow_span(agent):
+            result = None
+            try:
+                result = await original_func(*args, **kwargs)
+                return result
+            except Exception as exc:
+                _capture_exception(exc)
+
+                # It could be that there is a "invoke agent" span still open
+                current_span = sentry_sdk.get_current_span()
+                if current_span is not None and current_span.timestamp is None:
+                    current_span.__exit__(None, None, None)
+
+                raise exc from None
+
+    return wrapper
diff --git a/sentry_sdk/integrations/openai_agents/patches/tools.py b/sentry_sdk/integrations/openai_agents/patches/tools.py
new file mode 100644
index 0000000000..b359d32678
--- /dev/null
+++ b/sentry_sdk/integrations/openai_agents/patches/tools.py
@@ -0,0 +1,77 @@
+from functools import wraps
+
+from sentry_sdk.integrations import DidNotEnable
+
+from ..spans import execute_tool_span, update_execute_tool_span
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Any, Callable
+
+try:
+    import agents
+except ImportError:
+    raise DidNotEnable("OpenAI Agents not installed")
+
+
+def _create_get_all_tools_wrapper(original_get_all_tools):
+    # type: (Callable[..., Any]) -> Callable[..., Any]
+    """
+    Wraps the agents.Runner._get_all_tools method of the Runner class to wrap all function tools with Sentry instrumentation.
+    """
+
+    @wraps(
+        original_get_all_tools.__func__
+        if hasattr(original_get_all_tools, "__func__")
+        else original_get_all_tools
+    )
+    async def wrapped_get_all_tools(cls, agent, context_wrapper):
+        # type: (agents.Runner, agents.Agent, agents.RunContextWrapper) -> list[agents.Tool]
+
+        # Get the original tools
+        tools = await original_get_all_tools(agent, context_wrapper)
+
+        wrapped_tools = []
+        for tool in tools:
+            # Wrap only the function tools (for now)
+            if tool.__class__.__name__ != "FunctionTool":
+                wrapped_tools.append(tool)
+                continue
+
+            # Create a new FunctionTool with our wrapped invoke method
+            original_on_invoke = tool.on_invoke_tool
+
+            def create_wrapped_invoke(current_tool, current_on_invoke):
+                # type: (agents.Tool, Callable[..., Any]) -> Callable[..., Any]
+                @wraps(current_on_invoke)
+                async def sentry_wrapped_on_invoke_tool(*args, **kwargs):
+                    # type: (*Any, **Any) -> Any
+                    with execute_tool_span(current_tool, *args, **kwargs) as span:
+                        # We can not capture exceptions in tool execution here because
+                        # `_on_invoke_tool` is swallowing the exception here:
+                        # https://github.com/openai/openai-agents-python/blob/main/src/agents/tool.py#L409-L422
+                        # And because function_tool is a decorator with `default_tool_error_function` set as a default parameter
+                        # I was unable to monkey patch it because those are evaluated at module import time
+                        # and the SDK is too late to patch it. I was also unable to patch `_on_invoke_tool_impl`
+                        # because it is nested inside this import time code. As if they made it hard to patch on purpose...
+                        result = await current_on_invoke(*args, **kwargs)
+                        update_execute_tool_span(span, agent, current_tool, result)
+
+                    return result
+
+                return sentry_wrapped_on_invoke_tool
+
+            wrapped_tool = agents.FunctionTool(
+                name=tool.name,
+                description=tool.description,
+                params_json_schema=tool.params_json_schema,
+                on_invoke_tool=create_wrapped_invoke(tool, original_on_invoke),
+                strict_json_schema=tool.strict_json_schema,
+                is_enabled=tool.is_enabled,
+            )
+            wrapped_tools.append(wrapped_tool)
+
+        return wrapped_tools
+
+    return wrapped_get_all_tools
diff --git a/sentry_sdk/integrations/openai_agents/spans/__init__.py b/sentry_sdk/integrations/openai_agents/spans/__init__.py
new file mode 100644
index 0000000000..3bc453cafa
--- /dev/null
+++ b/sentry_sdk/integrations/openai_agents/spans/__init__.py
@@ -0,0 +1,5 @@
+from .agent_workflow import agent_workflow_span  # noqa: F401
+from .ai_client import ai_client_span, update_ai_client_span  # noqa: F401
+from .execute_tool import execute_tool_span, update_execute_tool_span  # noqa: F401
+from .handoff import handoff_span  # noqa: F401
+from .invoke_agent import invoke_agent_span, update_invoke_agent_span  # noqa: F401
diff --git a/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py b/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py
new file mode 100644
index 0000000000..de2f28d41e
--- /dev/null
+++ b/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py
@@ -0,0 +1,21 @@
+import sentry_sdk
+
+from ..consts import SPAN_ORIGIN
+from ..utils import _get_start_span_function
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import agents
+
+
+def agent_workflow_span(agent):
+    # type: (agents.Agent) -> sentry_sdk.tracing.Span
+
+    # Create a transaction or a span if an transaction is already active
+    span = _get_start_span_function()(
+        name=f"{agent.name} workflow",
+        origin=SPAN_ORIGIN,
+    )
+
+    return span
diff --git a/sentry_sdk/integrations/openai_agents/spans/ai_client.py b/sentry_sdk/integrations/openai_agents/spans/ai_client.py
new file mode 100644
index 0000000000..30c5fd1dac
--- /dev/null
+++ b/sentry_sdk/integrations/openai_agents/spans/ai_client.py
@@ -0,0 +1,38 @@
+import sentry_sdk
+from sentry_sdk.consts import OP, SPANDATA
+
+from ..consts import SPAN_ORIGIN
+from ..utils import (
+    _set_agent_data,
+    _set_input_data,
+    _set_output_data,
+    _set_usage_data,
+)
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from agents import Agent
+    from typing import Any
+
+
+def ai_client_span(agent, get_response_kwargs):
+    # type: (Agent, dict[str, Any]) -> sentry_sdk.tracing.Span
+    # TODO-anton: implement other types of operations. Now "chat" is hardcoded.
+    span = sentry_sdk.start_span(
+        op=OP.GEN_AI_CHAT,
+        description=f"chat {agent.model}",
+        origin=SPAN_ORIGIN,
+    )
+    # TODO-anton: remove hardcoded stuff and replace something that also works for embedding and so on
+    span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "chat")
+
+    return span
+
+
+def update_ai_client_span(span, agent, get_response_kwargs, result):
+    # type: (sentry_sdk.tracing.Span, Agent, dict[str, Any], Any) -> None
+    _set_agent_data(span, agent)
+    _set_usage_data(span, result.usage)
+    _set_input_data(span, get_response_kwargs)
+    _set_output_data(span, result)
diff --git a/sentry_sdk/integrations/openai_agents/spans/execute_tool.py b/sentry_sdk/integrations/openai_agents/spans/execute_tool.py
new file mode 100644
index 0000000000..e6e880b64c
--- /dev/null
+++ b/sentry_sdk/integrations/openai_agents/spans/execute_tool.py
@@ -0,0 +1,43 @@
+import sentry_sdk
+from sentry_sdk.consts import OP, SPANDATA
+from sentry_sdk.scope import should_send_default_pii
+
+from ..consts import SPAN_ORIGIN
+from ..utils import _set_agent_data
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import agents
+    from typing import Any
+
+
+def execute_tool_span(tool, *args, **kwargs):
+    # type: (agents.Tool, *Any, **Any) -> sentry_sdk.tracing.Span
+    span = sentry_sdk.start_span(
+        op=OP.GEN_AI_EXECUTE_TOOL,
+        name=f"execute_tool {tool.name}",
+        origin=SPAN_ORIGIN,
+    )
+
+    span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "execute_tool")
+
+    if tool.__class__.__name__ == "FunctionTool":
+        span.set_data(SPANDATA.GEN_AI_TOOL_TYPE, "function")
+
+    span.set_data(SPANDATA.GEN_AI_TOOL_NAME, tool.name)
+    span.set_data(SPANDATA.GEN_AI_TOOL_DESCRIPTION, tool.description)
+
+    if should_send_default_pii():
+        input = args[1]
+        span.set_data(SPANDATA.GEN_AI_TOOL_INPUT, input)
+
+    return span
+
+
+def update_execute_tool_span(span, agent, tool, result):
+    # type: (sentry_sdk.tracing.Span, agents.Agent, agents.Tool, Any) -> None
+    _set_agent_data(span, agent)
+
+    if should_send_default_pii():
+        span.set_data(SPANDATA.GEN_AI_TOOL_OUTPUT, result)
diff --git a/sentry_sdk/integrations/openai_agents/spans/handoff.py b/sentry_sdk/integrations/openai_agents/spans/handoff.py
new file mode 100644
index 0000000000..78e6788c7d
--- /dev/null
+++ b/sentry_sdk/integrations/openai_agents/spans/handoff.py
@@ -0,0 +1,19 @@
+import sentry_sdk
+from sentry_sdk.consts import OP, SPANDATA
+
+from ..consts import SPAN_ORIGIN
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import agents
+
+
+def handoff_span(context, from_agent, to_agent_name):
+    # type: (agents.RunContextWrapper, agents.Agent, str) -> None
+    with sentry_sdk.start_span(
+        op=OP.GEN_AI_HANDOFF,
+        name=f"handoff from {from_agent.name} to {to_agent_name}",
+        origin=SPAN_ORIGIN,
+    ) as span:
+        span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "handoff")
diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
new file mode 100644
index 0000000000..549ade1246
--- /dev/null
+++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
@@ -0,0 +1,34 @@
+import sentry_sdk
+from sentry_sdk.consts import OP, SPANDATA
+
+from ..consts import SPAN_ORIGIN
+from ..utils import _set_agent_data
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import agents
+    from typing import Any
+
+
+def invoke_agent_span(context, agent):
+    # type: (agents.RunContextWrapper, agents.Agent) -> sentry_sdk.tracing.Span
+    span = sentry_sdk.start_span(
+        op=OP.GEN_AI_INVOKE_AGENT,
+        name=f"invoke_agent {agent.name}",
+        origin=SPAN_ORIGIN,
+    )
+    span.__enter__()
+
+    span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent")
+
+    _set_agent_data(span, agent)
+
+    return span
+
+
+def update_invoke_agent_span(context, agent, output):
+    # type: (agents.RunContextWrapper, agents.Agent, Any) -> None
+    current_span = sentry_sdk.get_current_span()
+    if current_span:
+        current_span.__exit__(None, None, None)
diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py
new file mode 100644
index 0000000000..28dbd6bb75
--- /dev/null
+++ b/sentry_sdk/integrations/openai_agents/utils.py
@@ -0,0 +1,209 @@
+import json
+import sentry_sdk
+from sentry_sdk.consts import SPANDATA
+from sentry_sdk.integrations import DidNotEnable
+from sentry_sdk.scope import should_send_default_pii
+from sentry_sdk.utils import event_from_exception
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Any
+    from typing import Callable
+    from typing import Union
+    from agents import Usage
+
+try:
+    import agents
+
+except ImportError:
+    raise DidNotEnable("OpenAI Agents not installed")
+
+
+def _capture_exception(exc):
+    # type: (Any) -> None
+    event, hint = event_from_exception(
+        exc,
+        client_options=sentry_sdk.get_client().options,
+        mechanism={"type": "openai_agents", "handled": False},
+    )
+    sentry_sdk.capture_event(event, hint=hint)
+
+
+def _get_start_span_function():
+    # type: () -> Callable[..., Any]
+    current_span = sentry_sdk.get_current_span()
+    transaction_exists = (
+        current_span is not None and current_span.containing_transaction == current_span
+    )
+    return sentry_sdk.start_span if transaction_exists else sentry_sdk.start_transaction
+
+
+def _set_agent_data(span, agent):
+    # type: (sentry_sdk.tracing.Span, agents.Agent) -> None
+    span.set_data(
+        SPANDATA.GEN_AI_SYSTEM, "openai"
+    )  # See footnote for  https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/#gen-ai-system for explanation why.
+
+    span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent.name)
+
+    if agent.model_settings.max_tokens:
+        span.set_data(
+            SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, agent.model_settings.max_tokens
+        )
+
+    if agent.model:
+        span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, agent.model)
+
+    if agent.model_settings.presence_penalty:
+        span.set_data(
+            SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY,
+            agent.model_settings.presence_penalty,
+        )
+
+    if agent.model_settings.temperature:
+        span.set_data(
+            SPANDATA.GEN_AI_REQUEST_TEMPERATURE, agent.model_settings.temperature
+        )
+
+    if agent.model_settings.top_p:
+        span.set_data(SPANDATA.GEN_AI_REQUEST_TOP_P, agent.model_settings.top_p)
+
+    if agent.model_settings.frequency_penalty:
+        span.set_data(
+            SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+            agent.model_settings.frequency_penalty,
+        )
+
+    if len(agent.tools) > 0:
+        span.set_data(
+            SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS,
+            safe_serialize([vars(tool) for tool in agent.tools]),
+        )
+
+
+def _set_usage_data(span, usage):
+    # type: (sentry_sdk.tracing.Span, Usage) -> None
+    span.set_data(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS, usage.input_tokens)
+    span.set_data(
+        SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED,
+        usage.input_tokens_details.cached_tokens,
+    )
+    span.set_data(SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS, usage.output_tokens)
+    span.set_data(
+        SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING,
+        usage.output_tokens_details.reasoning_tokens,
+    )
+    span.set_data(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS, usage.total_tokens)
+
+
+def _set_input_data(span, get_response_kwargs):
+    # type: (sentry_sdk.tracing.Span, dict[str, Any]) -> None
+    if not should_send_default_pii():
+        return
+
+    messages_by_role = {
+        "system": [],
+        "user": [],
+        "assistant": [],
+        "tool": [],
+    }  # type: (dict[str, list[Any]])
+    system_instructions = get_response_kwargs.get("system_instructions")
+    if system_instructions:
+        messages_by_role["system"].append({"type": "text", "text": system_instructions})
+
+    for message in get_response_kwargs.get("input", []):
+        if "role" in message:
+            messages_by_role[message.get("role")].append(
+                {"type": "text", "text": message.get("content")}
+            )
+        else:
+            if message.get("type") == "function_call":
+                messages_by_role["assistant"].append(message)
+            elif message.get("type") == "function_call_output":
+                messages_by_role["tool"].append(message)
+
+    request_messages = []
+    for role, messages in messages_by_role.items():
+        if len(messages) > 0:
+            request_messages.append({"role": role, "content": messages})
+
+    span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, safe_serialize(request_messages))
+
+
+def _set_output_data(span, result):
+    # type: (sentry_sdk.tracing.Span, Any) -> None
+    if not should_send_default_pii():
+        return
+
+    output_messages = {
+        "response": [],
+        "tool": [],
+    }  # type: (dict[str, list[Any]])
+
+    for output in result.output:
+        if output.type == "function_call":
+            output_messages["tool"].append(output.dict())
+        elif output.type == "message":
+            for output_message in output.content:
+                try:
+                    output_messages["response"].append(output_message.text)
+                except AttributeError:
+                    # Unknown output message type, just return the json
+                    output_messages["response"].append(output_message.dict())
+
+    if len(output_messages["tool"]) > 0:
+        span.set_data(
+            SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, safe_serialize(output_messages["tool"])
+        )
+
+    if len(output_messages["response"]) > 0:
+        span.set_data(
+            SPANDATA.GEN_AI_RESPONSE_TEXT, safe_serialize(output_messages["response"])
+        )
+
+
+def safe_serialize(data):
+    # type: (Any) -> str
+    """Safely serialize to a readable string."""
+
+    def serialize_item(item):
+        # type: (Any) -> Union[str, dict[Any, Any], list[Any], tuple[Any, ...]]
+        if callable(item):
+            try:
+                module = getattr(item, "__module__", None)
+                qualname = getattr(item, "__qualname__", None)
+                name = getattr(item, "__name__", "anonymous")
+
+                if module and qualname:
+                    full_path = f"{module}.{qualname}"
+                elif module and name:
+                    full_path = f"{module}.{name}"
+                else:
+                    full_path = name
+
+                return f"<function {full_path}>"
+            except Exception:
+                return f"<callable {type(item).__name__}>"
+        elif isinstance(item, dict):
+            return {k: serialize_item(v) for k, v in item.items()}
+        elif isinstance(item, (list, tuple)):
+            return [serialize_item(x) for x in item]
+        elif hasattr(item, "__dict__"):
+            try:
+                attrs = {
+                    k: serialize_item(v)
+                    for k, v in vars(item).items()
+                    if not k.startswith("_")
+                }
+                return f"<{type(item).__name__} {attrs}>"
+            except Exception:
+                return repr(item)
+        else:
+            return item
+
+    try:
+        serialized = serialize_item(data)
+        return json.dumps(serialized, default=str)
+    except Exception:
+        return str(data)
diff --git a/tests/integrations/openai_agents/__init__.py b/tests/integrations/openai_agents/__init__.py
new file mode 100644
index 0000000000..6940e2bbbe
--- /dev/null
+++ b/tests/integrations/openai_agents/__init__.py
@@ -0,0 +1,3 @@
+import pytest
+
+pytest.importorskip("agents")
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
new file mode 100644
index 0000000000..ec606c8806
--- /dev/null
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -0,0 +1,580 @@
+import re
+import pytest
+from unittest.mock import MagicMock, patch
+import os
+
+from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration
+from sentry_sdk.integrations.openai_agents.utils import safe_serialize
+
+import agents
+from agents import (
+    Agent,
+    ModelResponse,
+    Usage,
+    ModelSettings,
+)
+from agents.items import (
+    ResponseOutputMessage,
+    ResponseOutputText,
+    ResponseFunctionToolCall,
+)
+
+test_run_config = agents.RunConfig(tracing_disabled=True)
+
+
+@pytest.fixture
+def mock_usage():
+    return Usage(
+        requests=1,
+        input_tokens=10,
+        output_tokens=20,
+        total_tokens=30,
+        input_tokens_details=MagicMock(cached_tokens=0),
+        output_tokens_details=MagicMock(reasoning_tokens=5),
+    )
+
+
+@pytest.fixture
+def mock_model_response(mock_usage):
+    return ModelResponse(
+        output=[
+            ResponseOutputMessage(
+                id="msg_123",
+                type="message",
+                status="completed",
+                content=[
+                    ResponseOutputText(
+                        text="Hello, how can I help you?",
+                        type="output_text",
+                        annotations=[],
+                    )
+                ],
+                role="assistant",
+            )
+        ],
+        usage=mock_usage,
+        response_id="resp_123",
+    )
+
+
+@pytest.fixture
+def test_agent():
+    """Create a real Agent instance for testing."""
+    return Agent(
+        name="test_agent",
+        instructions="You are a helpful test assistant.",
+        model="gpt-4",
+        model_settings=ModelSettings(
+            max_tokens=100,
+            temperature=0.7,
+            top_p=1.0,
+            presence_penalty=0.0,
+            frequency_penalty=0.0,
+        ),
+    )
+
+
+@pytest.mark.asyncio
+async def test_agent_invocation_span(
+    sentry_init, capture_events, test_agent, mock_model_response
+):
+    """
+    Test that the integration creates spans for agent invocations.
+    """
+
+    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+        with patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            mock_get_response.return_value = mock_model_response
+
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+            )
+
+            events = capture_events()
+
+            result = await agents.Runner.run(
+                test_agent, "Test input", run_config=test_run_config
+            )
+
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
+
+    (transaction,) = events
+    spans = transaction["spans"]
+    invoke_agent_span, ai_client_span = spans
+
+    assert transaction["transaction"] == "test_agent workflow"
+    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+    assert invoke_agent_span["description"] == "invoke_agent test_agent"
+    assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
+    assert invoke_agent_span["data"]["gen_ai.system"] == "openai"
+    assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent"
+    assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100
+    assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4"
+    assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7
+    assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0
+
+    assert ai_client_span["description"] == "chat gpt-4"
+    assert ai_client_span["data"]["gen_ai.operation.name"] == "chat"
+    assert ai_client_span["data"]["gen_ai.system"] == "openai"
+    assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0
+
+
+def test_agent_invocation_span_sync(
+    sentry_init, capture_events, test_agent, mock_model_response
+):
+    """
+    Test that the integration creates spans for agent invocations.
+    """
+
+    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+        with patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            mock_get_response.return_value = mock_model_response
+
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+            )
+
+            events = capture_events()
+
+            result = agents.Runner.run_sync(
+                test_agent, "Test input", run_config=test_run_config
+            )
+
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
+
+    (transaction,) = events
+    spans = transaction["spans"]
+    invoke_agent_span, ai_client_span = spans
+
+    assert transaction["transaction"] == "test_agent workflow"
+    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+    assert invoke_agent_span["description"] == "invoke_agent test_agent"
+    assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
+    assert invoke_agent_span["data"]["gen_ai.system"] == "openai"
+    assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent"
+    assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100
+    assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4"
+    assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7
+    assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0
+
+    assert ai_client_span["description"] == "chat gpt-4"
+    assert ai_client_span["data"]["gen_ai.operation.name"] == "chat"
+    assert ai_client_span["data"]["gen_ai.system"] == "openai"
+    assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0
+
+
+@pytest.mark.asyncio
+async def test_handoff_span(sentry_init, capture_events, mock_usage):
+    """
+    Test that handoff spans are created when agents hand off to other agents.
+    """
+    # Create two simple agents with a handoff relationship
+    secondary_agent = agents.Agent(
+        name="secondary_agent",
+        instructions="You are a secondary agent.",
+        model="gpt-4o-mini",
+    )
+
+    primary_agent = agents.Agent(
+        name="primary_agent",
+        instructions="You are a primary agent that hands off to secondary agent.",
+        model="gpt-4o-mini",
+        handoffs=[secondary_agent],
+    )
+
+    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+        with patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            # Mock two responses:
+            # 1. Primary agent calls handoff tool
+            # 2. Secondary agent provides final response
+            handoff_response = ModelResponse(
+                output=[
+                    ResponseFunctionToolCall(
+                        id="call_handoff_123",
+                        call_id="call_handoff_123",
+                        name="transfer_to_secondary_agent",
+                        type="function_call",
+                        arguments="{}",
+                        function=MagicMock(
+                            name="transfer_to_secondary_agent", arguments="{}"
+                        ),
+                    )
+                ],
+                usage=mock_usage,
+                response_id="resp_handoff_123",
+            )
+
+            final_response = ModelResponse(
+                output=[
+                    ResponseOutputMessage(
+                        id="msg_final",
+                        type="message",
+                        status="completed",
+                        content=[
+                            ResponseOutputText(
+                                text="I'm the specialist and I can help with that!",
+                                type="output_text",
+                                annotations=[],
+                            )
+                        ],
+                        role="assistant",
+                    )
+                ],
+                usage=mock_usage,
+                response_id="resp_final_123",
+            )
+
+            mock_get_response.side_effect = [handoff_response, final_response]
+
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+            )
+
+            events = capture_events()
+
+            result = await agents.Runner.run(
+                primary_agent,
+                "Please hand off to secondary agent",
+                run_config=test_run_config,
+            )
+
+            assert result is not None
+
+    (transaction,) = events
+    spans = transaction["spans"]
+    handoff_span = spans[2]
+
+    # Verify handoff span was created
+    assert handoff_span is not None
+    assert (
+        handoff_span["description"] == "handoff from primary_agent to secondary_agent"
+    )
+    assert handoff_span["data"]["gen_ai.operation.name"] == "handoff"
+
+
+@pytest.mark.asyncio
+async def test_tool_execution_span(sentry_init, capture_events, test_agent):
+    """
+    Test tool execution span creation.
+    """
+
+    @agents.function_tool
+    def simple_test_tool(message: str) -> str:
+        """A simple tool"""
+        return f"Tool executed with: {message}"
+
+    # Create agent with the tool
+    agent_with_tool = test_agent.clone(tools=[simple_test_tool])
+
+    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+        with patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            # Create a mock response that includes tool calls
+            tool_call = ResponseFunctionToolCall(
+                id="call_123",
+                call_id="call_123",
+                name="simple_test_tool",
+                type="function_call",
+                arguments='{"message": "hello"}',
+                function=MagicMock(
+                    name="simple_test_tool", arguments='{"message": "hello"}'
+                ),
+            )
+
+            # First response with tool call
+            tool_response = ModelResponse(
+                output=[tool_call],
+                usage=Usage(
+                    requests=1, input_tokens=10, output_tokens=5, total_tokens=15
+                ),
+                response_id="resp_tool_123",
+            )
+
+            # Second response with final answer
+            final_response = ModelResponse(
+                output=[
+                    ResponseOutputMessage(
+                        id="msg_final",
+                        type="message",
+                        status="completed",
+                        content=[
+                            ResponseOutputText(
+                                text="Task completed using the tool",
+                                type="output_text",
+                                annotations=[],
+                            )
+                        ],
+                        role="assistant",
+                    )
+                ],
+                usage=Usage(
+                    requests=1, input_tokens=15, output_tokens=10, total_tokens=25
+                ),
+                response_id="resp_final_123",
+            )
+
+            # Return different responses on successive calls
+            mock_get_response.side_effect = [tool_response, final_response]
+
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+            )
+
+            events = capture_events()
+
+            await agents.Runner.run(
+                agent_with_tool,
+                "Please use the simple test tool",
+                run_config=test_run_config,
+            )
+
+    (transaction,) = events
+    spans = transaction["spans"]
+    (
+        agent_span,
+        ai_client_span1,
+        tool_span,
+        ai_client_span2,
+    ) = spans
+
+    available_tools = safe_serialize(
+        [
+            {
+                "name": "simple_test_tool",
+                "description": "A simple tool",
+                "params_json_schema": {
+                    "properties": {"message": {"title": "Message", "type": "string"}},
+                    "required": ["message"],
+                    "title": "simple_test_tool_args",
+                    "type": "object",
+                    "additionalProperties": False,
+                },
+                "on_invoke_tool": "<function agents.tool.function_tool.<locals>._create_function_tool.<locals>._on_invoke_tool>",
+                "strict_json_schema": True,
+                "is_enabled": True,
+            }
+        ]
+    )
+
+    assert transaction["transaction"] == "test_agent workflow"
+    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+    assert agent_span["description"] == "invoke_agent test_agent"
+    assert agent_span["origin"] == "auto.ai.openai_agents"
+    assert agent_span["data"]["gen_ai.agent.name"] == "test_agent"
+    assert agent_span["data"]["gen_ai.operation.name"] == "invoke_agent"
+    assert agent_span["data"]["gen_ai.request.available_tools"] == available_tools
+    assert agent_span["data"]["gen_ai.request.max_tokens"] == 100
+    assert agent_span["data"]["gen_ai.request.model"] == "gpt-4"
+    assert agent_span["data"]["gen_ai.request.temperature"] == 0.7
+    assert agent_span["data"]["gen_ai.request.top_p"] == 1.0
+    assert agent_span["data"]["gen_ai.system"] == "openai"
+
+    assert ai_client_span1["description"] == "chat gpt-4"
+    assert ai_client_span1["data"]["gen_ai.operation.name"] == "chat"
+    assert ai_client_span1["data"]["gen_ai.system"] == "openai"
+    assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span1["data"]["gen_ai.request.available_tools"] == available_tools
+    assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100
+    assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize(
+        [
+            {
+                "role": "system",
+                "content": [
+                    {"type": "text", "text": "You are a helpful test assistant."}
+                ],
+            },
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Please use the simple test tool"}
+                ],
+            },
+        ]
+    )
+    assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0
+    assert ai_client_span1["data"]["gen_ai.usage.input_tokens"] == 10
+    assert ai_client_span1["data"]["gen_ai.usage.input_tokens.cached"] == 0
+    assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
+    assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
+    assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
+    assert re.sub(
+        r"SerializationIterator\(.*\)",
+        "NOT_CHECKED",
+        ai_client_span1["data"]["gen_ai.response.tool_calls"],
+    ) == safe_serialize(
+        [
+            {
+                "arguments": '{"message": "hello"}',
+                "call_id": "call_123",
+                "name": "simple_test_tool",
+                "type": "function_call",
+                "id": "call_123",
+                "status": None,
+                "function": "NOT_CHECKED",
+            }
+        ]
+    )
+
+    assert tool_span["description"] == "execute_tool simple_test_tool"
+    assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"
+    assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
+    assert (
+        re.sub(
+            "<.*>(,)",
+            r"'NOT_CHECKED'\1",
+            agent_span["data"]["gen_ai.request.available_tools"],
+        )
+        == available_tools
+    )
+    assert tool_span["data"]["gen_ai.request.max_tokens"] == 100
+    assert tool_span["data"]["gen_ai.request.model"] == "gpt-4"
+    assert tool_span["data"]["gen_ai.request.temperature"] == 0.7
+    assert tool_span["data"]["gen_ai.request.top_p"] == 1.0
+    assert tool_span["data"]["gen_ai.system"] == "openai"
+    assert tool_span["data"]["gen_ai.tool.description"] == "A simple tool"
+    assert tool_span["data"]["gen_ai.tool.input"] == '{"message": "hello"}'
+    assert tool_span["data"]["gen_ai.tool.name"] == "simple_test_tool"
+    assert tool_span["data"]["gen_ai.tool.output"] == "Tool executed with: hello"
+    assert tool_span["data"]["gen_ai.tool.type"] == "function"
+
+    assert ai_client_span2["description"] == "chat gpt-4"
+    assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent"
+    assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat"
+    assert (
+        re.sub(
+            "<.*>(,)",
+            r"'NOT_CHECKED'\1",
+            agent_span["data"]["gen_ai.request.available_tools"],
+        )
+        == available_tools
+    )
+    assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100
+    assert re.sub(
+        r"SerializationIterator\(.*\)",
+        "NOT_CHECKED",
+        ai_client_span2["data"]["gen_ai.request.messages"],
+    ) == safe_serialize(
+        [
+            {
+                "role": "system",
+                "content": [
+                    {"type": "text", "text": "You are a helpful test assistant."}
+                ],
+            },
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Please use the simple test tool"}
+                ],
+            },
+            {
+                "role": "assistant",
+                "content": [
+                    {
+                        "arguments": '{"message": "hello"}',
+                        "call_id": "call_123",
+                        "name": "simple_test_tool",
+                        "type": "function_call",
+                        "id": "call_123",
+                        "function": "NOT_CHECKED",
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "content": [
+                    {
+                        "call_id": "call_123",
+                        "output": "Tool executed with: hello",
+                        "type": "function_call_output",
+                    }
+                ],
+            },
+        ]
+    )
+    assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4"
+    assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7
+    assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0
+    assert ai_client_span2["data"]["gen_ai.response.text"] == safe_serialize(
+        ["Task completed using the tool"]
+    )
+    assert ai_client_span2["data"]["gen_ai.system"] == "openai"
+    assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0
+    assert ai_client_span2["data"]["gen_ai.usage.input_tokens"] == 15
+    assert ai_client_span2["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
+    assert ai_client_span2["data"]["gen_ai.usage.output_tokens"] == 10
+    assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25
+
+
+@pytest.mark.asyncio
+async def test_error_handling(sentry_init, capture_events, test_agent):
+    """
+    Test error handling in agent execution.
+    """
+
+    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+        with patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            mock_get_response.side_effect = Exception("Model Error")
+
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+            )
+
+            events = capture_events()
+
+            with pytest.raises(Exception, match="Model Error"):
+                await agents.Runner.run(
+                    test_agent, "Test input", run_config=test_run_config
+                )
+
+    (
+        error_event,
+        transaction,
+    ) = events
+
+    assert error_event["exception"]["values"][0]["type"] == "Exception"
+    assert error_event["exception"]["values"][0]["value"] == "Model Error"
+    assert error_event["exception"]["values"][0]["mechanism"]["type"] == "openai_agents"
+
+    spans = transaction["spans"]
+    (invoke_agent_span, ai_client_span) = spans
+
+    assert transaction["transaction"] == "test_agent workflow"
+    assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
+
+    assert invoke_agent_span["description"] == "invoke_agent test_agent"
+    assert invoke_agent_span["origin"] == "auto.ai.openai_agents"
+
+    assert ai_client_span["description"] == "chat gpt-4"
+    assert ai_client_span["origin"] == "auto.ai.openai_agents"
+    assert ai_client_span["tags"]["status"] == "internal_error"
diff --git a/tox.ini b/tox.ini
index f4aee13d02..5c993718d7 100644
--- a/tox.ini
+++ b/tox.ini
@@ -10,7 +10,7 @@
 # The file (and all resulting CI YAMLs) then need to be regenerated via
 # "scripts/generate-test-files.sh".
 #
-# Last generated: 2025-06-24T07:19:36.122984+00:00
+# Last generated: 2025-06-24T12:35:34.437673+00:00
 
 [tox]
 requires =
@@ -145,6 +145,8 @@ envlist =
     {py3.9,py3.11,py3.12}-cohere-v5.11.4
     {py3.9,py3.11,py3.12}-cohere-v5.15.0
 
+    {py3.9,py3.11,py3.12}-openai_agents-v0.0.19
+
     {py3.8,py3.10,py3.11}-huggingface_hub-v0.22.2
     {py3.8,py3.11,py3.12}-huggingface_hub-v0.26.5
     {py3.8,py3.12,py3.13}-huggingface_hub-v0.30.2
@@ -515,6 +517,9 @@ deps =
     cohere-v5.11.4: cohere==5.11.4
     cohere-v5.15.0: cohere==5.15.0
 
+    openai_agents-v0.0.19: openai-agents==0.0.19
+    openai_agents: pytest-asyncio
+
     huggingface_hub-v0.22.2: huggingface_hub==0.22.2
     huggingface_hub-v0.26.5: huggingface_hub==0.26.5
     huggingface_hub-v0.30.2: huggingface_hub==0.30.2
@@ -809,6 +814,7 @@ setenv =
     litestar: TESTPATH=tests/integrations/litestar
     loguru: TESTPATH=tests/integrations/loguru
     openai: TESTPATH=tests/integrations/openai
+    openai_agents: TESTPATH=tests/integrations/openai_agents
     openfeature: TESTPATH=tests/integrations/openfeature
     opentelemetry: TESTPATH=tests/integrations/opentelemetry
     potel: TESTPATH=tests/integrations/opentelemetry