Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
get_start_span_function,
set_data_normalized,
normalize_message_roles,
truncate_and_annotate_messages,
)
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.scope import should_send_default_pii
Expand Down Expand Up @@ -61,12 +62,17 @@ def invoke_agent_span(context, agent, kwargs):

if len(messages) > 0:
normalized_messages = normalize_message_roles(messages)
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalized_messages,
unpack=False,
scope = sentry_sdk.get_current_scope()
messages_data = truncate_and_annotate_messages(
normalized_messages, span, scope
)
if messages_data is not None:
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
messages_data,
unpack=False,
)

_set_agent_data(span, agent)

Expand Down
43 changes: 30 additions & 13 deletions sentry_sdk/integrations/openai_agents/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
normalize_message_roles,
set_data_normalized,
normalize_message_role,
truncate_and_annotate_messages,
)
from sentry_sdk.consts import SPANDATA, SPANSTATUS, OP
from sentry_sdk.integrations import DidNotEnable
Expand Down Expand Up @@ -111,36 +112,52 @@ def _set_input_data(span, get_response_kwargs):
)

for message in get_response_kwargs.get("input", []):
if "role" in message:
normalized_role = normalize_message_role(message.get("role"))
# Serialize the entire message first to ensure no non-JSON-serializable objects
import json

serialized_str = safe_serialize(message)
try:
serialized_message = json.loads(serialized_str)
except (json.JSONDecodeError, TypeError):
# If it can't be parsed, skip this message
continue

if "role" in serialized_message:
normalized_role = normalize_message_role(serialized_message.get("role"))
request_messages.append(
{
"role": normalized_role,
"content": [{"type": "text", "text": message.get("content")}],
"content": [
{"type": "text", "text": serialized_message.get("content")}
],
}
)
else:
if message.get("type") == "function_call":
if serialized_message.get("type") == "function_call":
request_messages.append(
{
"role": GEN_AI_ALLOWED_MESSAGE_ROLES.ASSISTANT,
"content": [message],
"content": [serialized_message],
}
)
elif message.get("type") == "function_call_output":
elif serialized_message.get("type") == "function_call_output":
request_messages.append(
{
"role": GEN_AI_ALLOWED_MESSAGE_ROLES.TOOL,
"content": [message],
"content": [serialized_message],
}
)

set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalize_message_roles(request_messages),
unpack=False,
)
normalized_messages = normalize_message_roles(request_messages)
scope = sentry_sdk.get_current_scope()
messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
if messages_data is not None:
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
messages_data,
unpack=False,
)


def _set_output_data(span, result):
Expand Down
102 changes: 90 additions & 12 deletions tests/integrations/openai_agents/test_openai_agents.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@
import asyncio
import json
import os
import re
import pytest
from unittest.mock import MagicMock, patch
import os

from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration
from sentry_sdk.integrations.openai_agents.utils import safe_serialize
from sentry_sdk.utils import parse_version

import agents
import pytest
from agents import (
Agent,
ModelResponse,
Usage,
ModelSettings,
Usage,
)
from agents.items import (
McpCall,
ResponseFunctionToolCall,
ResponseOutputMessage,
ResponseOutputText,
ResponseFunctionToolCall,
)
from agents.version import __version__ as OPENAI_AGENTS_VERSION

from openai.types.responses.response_usage import (
InputTokensDetails,
OutputTokensDetails,
)

from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration
from sentry_sdk.integrations.openai_agents.utils import safe_serialize
from sentry_sdk.utils import parse_version

test_run_config = agents.RunConfig(tracing_disabled=True)


Expand Down Expand Up @@ -1051,8 +1051,8 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events):

get_response_kwargs = {"input": test_input}

from sentry_sdk.integrations.openai_agents.utils import _set_input_data
from sentry_sdk import start_span
from sentry_sdk.integrations.openai_agents.utils import _set_input_data

with start_span(op="test") as span:
_set_input_data(span, get_response_kwargs)
Expand All @@ -1061,8 +1061,6 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events):
from sentry_sdk.consts import SPANDATA

if SPANDATA.GEN_AI_REQUEST_MESSAGES in span._data:
import json

stored_messages = json.loads(span._data[SPANDATA.GEN_AI_REQUEST_MESSAGES])

# Verify roles were properly mapped
Expand All @@ -1077,3 +1075,83 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events):
# Verify no "ai" roles remain in any message
for message in stored_messages:
assert message["role"] != "ai"


@pytest.mark.asyncio
async def test_openai_agents_message_truncation(
sentry_init, capture_events, test_agent, mock_usage
):
"""Test that large messages are truncated properly in OpenAI Agents integration."""
with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
with patch(
"agents.models.openai_responses.OpenAIResponsesModel.get_response"
) as mock_get_response:
large_content = (
"This is a very long message that will exceed our size limits. " * 1000
)

large_response = ModelResponse(
output=[
ResponseOutputMessage(
id="msg_large",
type="message",
status="completed",
content=[
ResponseOutputText(
text=large_content,
type="output_text",
annotations=[],
)
],
role="assistant",
)
],
usage=mock_usage,
response_id="resp_large",
)

mock_get_response.return_value = large_response

sentry_init(
integrations=[OpenAIAgentsIntegration()],
traces_sample_rate=1.0,
send_default_pii=True,
)

events = capture_events()

# Create messages with mixed large/small content by patching get_response
with patch(
"agents.models.openai_responses.OpenAIResponsesModel.get_response"
) as mock_inner:
mock_inner.side_effect = [large_response] * 5

# We'll test with the agent itself, not the messages
# since OpenAI agents tracks messages internally
result = await agents.Runner.run(
test_agent, "Test input", run_config=test_run_config
)

assert result is not None

assert len(events) > 0
tx = events[0]
assert tx["type"] == "transaction"

# Check ai_client spans (these have the truncation)
ai_client_spans = [
span for span in tx.get("spans", []) if span.get("op") == "gen_ai.chat"
]
assert len(ai_client_spans) > 0

# Just verify that messages are being set and truncation is applied
# The actual truncation behavior is tested in the ai_monitoring tests
ai_client_span = ai_client_spans[0]
if "gen_ai.request.messages" in ai_client_span["data"]:
messages_data = ai_client_span["data"]["gen_ai.request.messages"]
assert isinstance(messages_data, str)

parsed_messages = json.loads(messages_data)
assert isinstance(parsed_messages, list)
# Verify messages were processed
assert len(parsed_messages) >= 1