diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py index 2a9c5ebe66..18217daa6f 100644 --- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py +++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py @@ -3,6 +3,7 @@ get_start_span_function, set_data_normalized, normalize_message_roles, + truncate_and_annotate_messages, ) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.scope import should_send_default_pii @@ -61,12 +62,17 @@ def invoke_agent_span(context, agent, kwargs): if len(messages) > 0: normalized_messages = normalize_message_roles(messages) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_messages, - unpack=False, + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages( + normalized_messages, span, scope ) + if messages_data is not None: + set_data_normalized( + span, + SPANDATA.GEN_AI_REQUEST_MESSAGES, + messages_data, + unpack=False, + ) _set_agent_data(span, agent) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index 125ff1175b..52ff50db70 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -4,6 +4,7 @@ normalize_message_roles, set_data_normalized, normalize_message_role, + truncate_and_annotate_messages, ) from sentry_sdk.consts import SPANDATA, SPANSTATUS, OP from sentry_sdk.integrations import DidNotEnable @@ -111,36 +112,52 @@ def _set_input_data(span, get_response_kwargs): ) for message in get_response_kwargs.get("input", []): - if "role" in message: - normalized_role = normalize_message_role(message.get("role")) + # Serialize the entire message first to ensure no non-JSON-serializable objects + import json + + serialized_str = safe_serialize(message) + try: + serialized_message = json.loads(serialized_str) + except (json.JSONDecodeError, TypeError): + # If it can't be parsed, skip this message + continue + + if "role" in serialized_message: + normalized_role = normalize_message_role(serialized_message.get("role")) request_messages.append( { "role": normalized_role, - "content": [{"type": "text", "text": message.get("content")}], + "content": [ + {"type": "text", "text": serialized_message.get("content")} + ], } ) else: - if message.get("type") == "function_call": + if serialized_message.get("type") == "function_call": request_messages.append( { "role": GEN_AI_ALLOWED_MESSAGE_ROLES.ASSISTANT, - "content": [message], + "content": [serialized_message], } ) - elif message.get("type") == "function_call_output": + elif serialized_message.get("type") == "function_call_output": request_messages.append( { "role": GEN_AI_ALLOWED_MESSAGE_ROLES.TOOL, - "content": [message], + "content": [serialized_message], } ) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalize_message_roles(request_messages), - unpack=False, - ) + normalized_messages = normalize_message_roles(request_messages) + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + if messages_data is not None: + set_data_normalized( + span, + SPANDATA.GEN_AI_REQUEST_MESSAGES, + messages_data, + unpack=False, + ) def _set_output_data(span, result): diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index e647ce9fad..d6c3a4bfdf 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -1,33 +1,33 @@ import asyncio +import json +import os import re -import pytest from unittest.mock import MagicMock, patch -import os - -from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration -from sentry_sdk.integrations.openai_agents.utils import safe_serialize -from sentry_sdk.utils import parse_version import agents +import pytest from agents import ( Agent, ModelResponse, - Usage, ModelSettings, + Usage, ) from agents.items import ( McpCall, + ResponseFunctionToolCall, ResponseOutputMessage, ResponseOutputText, - ResponseFunctionToolCall, ) from agents.version import __version__ as OPENAI_AGENTS_VERSION - from openai.types.responses.response_usage import ( InputTokensDetails, OutputTokensDetails, ) +from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration +from sentry_sdk.integrations.openai_agents.utils import safe_serialize +from sentry_sdk.utils import parse_version + test_run_config = agents.RunConfig(tracing_disabled=True) @@ -1051,8 +1051,8 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events): get_response_kwargs = {"input": test_input} - from sentry_sdk.integrations.openai_agents.utils import _set_input_data from sentry_sdk import start_span + from sentry_sdk.integrations.openai_agents.utils import _set_input_data with start_span(op="test") as span: _set_input_data(span, get_response_kwargs) @@ -1061,8 +1061,6 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events): from sentry_sdk.consts import SPANDATA if SPANDATA.GEN_AI_REQUEST_MESSAGES in span._data: - import json - stored_messages = json.loads(span._data[SPANDATA.GEN_AI_REQUEST_MESSAGES]) # Verify roles were properly mapped @@ -1077,3 +1075,83 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events): # Verify no "ai" roles remain in any message for message in stored_messages: assert message["role"] != "ai" + + +@pytest.mark.asyncio +async def test_openai_agents_message_truncation( + sentry_init, capture_events, test_agent, mock_usage +): + """Test that large messages are truncated properly in OpenAI Agents integration.""" + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + large_content = ( + "This is a very long message that will exceed our size limits. " * 1000 + ) + + large_response = ModelResponse( + output=[ + ResponseOutputMessage( + id="msg_large", + type="message", + status="completed", + content=[ + ResponseOutputText( + text=large_content, + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + usage=mock_usage, + response_id="resp_large", + ) + + mock_get_response.return_value = large_response + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + events = capture_events() + + # Create messages with mixed large/small content by patching get_response + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_inner: + mock_inner.side_effect = [large_response] * 5 + + # We'll test with the agent itself, not the messages + # since OpenAI agents tracks messages internally + result = await agents.Runner.run( + test_agent, "Test input", run_config=test_run_config + ) + + assert result is not None + + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" + + # Check ai_client spans (these have the truncation) + ai_client_spans = [ + span for span in tx.get("spans", []) if span.get("op") == "gen_ai.chat" + ] + assert len(ai_client_spans) > 0 + + # Just verify that messages are being set and truncation is applied + # The actual truncation behavior is tested in the ai_monitoring tests + ai_client_span = ai_client_spans[0] + if "gen_ai.request.messages" in ai_client_span["data"]: + messages_data = ai_client_span["data"]["gen_ai.request.messages"] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + # Verify messages were processed + assert len(parsed_messages) >= 1