diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 0c0b937006..564e52d3e9 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -1,14 +1,24 @@ import json - from typing import TYPE_CHECKING if TYPE_CHECKING: from typing import Any, Callable + from sentry_sdk.tracing import Span +from typing import TYPE_CHECKING + import sentry_sdk from sentry_sdk.utils import logger +if TYPE_CHECKING: + from typing import Any, Dict, List, Optional + +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.serializer import serialize + +MAX_GEN_AI_MESSAGE_BYTES = 20_000 # 20KB + class GEN_AI_ALLOWED_MESSAGE_ROLES: SYSTEM = "system" @@ -95,3 +105,128 @@ def get_start_span_function(): current_span is not None and current_span.containing_transaction is not None ) return sentry_sdk.start_span if transaction_exists else sentry_sdk.start_transaction + + +def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): + # type: (List[Dict[str, Any]], int) -> List[Dict[str, Any]] + """ + Truncate messages by removing the oldest ones until the serialized size is within limits. + If the last message is still too large, truncate its content instead of removing it entirely. + + This function prioritizes keeping the most recent messages while ensuring the total + serialized size stays under the specified byte limit. It uses the Sentry serializer + to get accurate size estimates that match what will actually be sent. + + Always preserves at least one message, even if content needs to be truncated. + + :param messages: List of message objects (typically with 'role' and 'content' keys) + :param max_bytes: Maximum allowed size in bytes for the serialized messages + :returns: Truncated list of messages that fits within the size limit + """ + if not messages: + return messages + + truncated_messages = list(messages) + + # First, remove older messages until we're under the limit or have only one message left + while len(truncated_messages) > 1: + serialized = serialize( + truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8) + ) + serialized_json = json.dumps(serialized, separators=(",", ":")) + current_size = len(serialized_json.encode("utf-8")) + + if current_size <= max_bytes: + break + + truncated_messages.pop(0) # Remove oldest message + + # If we still have one message but it's too large, truncate its content + # This ensures we always preserve at least one message + if len(truncated_messages) == 1: + serialized = serialize( + truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8) + ) + serialized_json = json.dumps(serialized, separators=(",", ":")) + current_size = len(serialized_json.encode("utf-8")) + + if current_size > max_bytes: + # Truncate the content of the last message + last_message = truncated_messages[0].copy() + content = last_message.get("content", "") + + if content and isinstance(content, str): + last_message["content"] = content[: int(max_bytes * 0.8)] + "..." + truncated_messages[0] = last_message + + return truncated_messages + + +def serialize_gen_ai_messages(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): + # type: (Optional[Any], int) -> Optional[str] + """ + Serialize and truncate gen_ai messages for storage in spans. + + This function handles the complete workflow of: + 1. Truncating messages to fit within size limits (if not already done) + 2. Serializing them using Sentry's serializer (which processes AnnotatedValue for _meta) + 3. Converting to JSON string for storage + + :param messages: List of message objects, AnnotatedValue, or None + :param max_bytes: Maximum allowed size in bytes for the serialized messages + :returns: JSON string of serialized messages or None if input was None/empty + """ + if not messages: + return None + + if isinstance(messages, AnnotatedValue): + serialized_messages = serialize( + messages, is_vars=False, max_value_length=round(max_bytes * 0.8) + ) + return json.dumps(serialized_messages, separators=(",", ":")) + + truncated_messages = truncate_messages_by_size(messages, max_bytes) + serialized_messages = serialize( + truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8) + ) + + return json.dumps(serialized_messages) + + +def truncate_and_serialize_messages(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): + # type: (Optional[List[Dict[str, Any]]], int) -> Any + """ + Truncate messages and return serialized string or AnnotatedValue for automatic _meta creation. + + This function handles truncation and always returns serialized JSON strings. When truncation + occurs, it wraps the serialized string in an AnnotatedValue so that Sentry's serializer can + automatically create the appropriate _meta structure. + + :param messages: List of message objects or None + :param max_bytes: Maximum allowed size in bytes for the serialized messages + :returns: JSON string, AnnotatedValue containing JSON string (if truncated), or None + """ + if not messages: + return None + + truncated_messages = truncate_messages_by_size(messages, max_bytes) + if not truncated_messages: + return None + + # Always serialize to JSON string + serialized_json = serialize_gen_ai_messages(truncated_messages, max_bytes) + if not serialized_json: + return None + + original_count = len(messages) + truncated_count = len(truncated_messages) + + # If truncation occurred, wrap the serialized string in AnnotatedValue for _meta + if original_count != truncated_count: + return AnnotatedValue( + value=serialized_json, + metadata={"len": original_count}, + ) + + # No truncation, return plain serialized string + return serialized_json diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py index 46c6b2a766..0d383ffd16 100644 --- a/sentry_sdk/integrations/anthropic.py +++ b/sentry_sdk/integrations/anthropic.py @@ -6,6 +6,7 @@ from sentry_sdk.ai.utils import ( set_data_normalized, normalize_message_roles, + truncate_and_serialize_messages, get_start_span_function, ) from sentry_sdk.consts import OP, SPANDATA, SPANSTATUS @@ -145,12 +146,9 @@ def _set_input_data(span, kwargs, integration): normalized_messages.append(message) role_normalized_messages = normalize_message_roles(normalized_messages) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - role_normalized_messages, - unpack=False, - ) + serialized_messages = truncate_and_serialize_messages(role_normalized_messages) + if serialized_messages is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, serialized_messages) set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_STREAMING, kwargs.get("stream", False) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 724d908665..8535848be9 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -9,6 +9,7 @@ normalize_message_roles, set_data_normalized, get_start_span_function, + truncate_and_serialize_messages, ) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration @@ -221,12 +222,9 @@ def on_llm_start( } for prompt in prompts ] - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_messages, - unpack=False, - ) + messages_data = truncate_and_serialize_messages(normalized_messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs): # type: (SentryLangchainCallback, Dict[str, Any], List[List[BaseMessage]], UUID, Any) -> Any @@ -278,13 +276,9 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs): self._normalize_langchain_message(message) ) normalized_messages = normalize_message_roles(normalized_messages) - - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_messages, - unpack=False, - ) + messages_data = truncate_and_serialize_messages(normalized_messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) def on_chat_model_end(self, response, *, run_id, **kwargs): # type: (SentryLangchainCallback, LLMResult, UUID, Any) -> Any @@ -758,12 +752,9 @@ def new_invoke(self, *args, **kwargs): and integration.include_prompts ): normalized_messages = normalize_message_roles([input]) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_messages, - unpack=False, - ) + messages_data = truncate_and_serialize_messages(normalized_messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) output = result.get("output") if ( @@ -813,12 +804,9 @@ def new_stream(self, *args, **kwargs): and integration.include_prompts ): normalized_messages = normalize_message_roles([input]) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_messages, - unpack=False, - ) + messages_data = truncate_and_serialize_messages(normalized_messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) # Run the agent result = f(self, *args, **kwargs) diff --git a/sentry_sdk/integrations/langgraph.py b/sentry_sdk/integrations/langgraph.py index 11aa1facf4..468c2f1d88 100644 --- a/sentry_sdk/integrations/langgraph.py +++ b/sentry_sdk/integrations/langgraph.py @@ -2,7 +2,11 @@ from typing import Any, Callable, List, Optional import sentry_sdk -from sentry_sdk.ai.utils import set_data_normalized, normalize_message_roles +from sentry_sdk.ai.utils import ( + set_data_normalized, + normalize_message_roles, + truncate_and_serialize_messages, +) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -181,12 +185,11 @@ def new_invoke(self, *args, **kwargs): input_messages = _parse_langgraph_messages(args[0]) if input_messages: normalized_input_messages = normalize_message_roles(input_messages) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_input_messages, - unpack=False, + messages_data = truncate_and_serialize_messages( + normalized_input_messages ) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) result = f(self, *args, **kwargs) @@ -232,12 +235,11 @@ async def new_ainvoke(self, *args, **kwargs): input_messages = _parse_langgraph_messages(args[0]) if input_messages: normalized_input_messages = normalize_message_roles(input_messages) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_input_messages, - unpack=False, + messages_data = truncate_and_serialize_messages( + normalized_input_messages ) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) result = await f(self, *args, **kwargs) diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py index 2582c2bc05..7bf80d2f54 100644 --- a/sentry_sdk/integrations/litellm.py +++ b/sentry_sdk/integrations/litellm.py @@ -3,7 +3,11 @@ import sentry_sdk from sentry_sdk import consts from sentry_sdk.ai.monitoring import record_token_usage -from sentry_sdk.ai.utils import get_start_span_function, set_data_normalized +from sentry_sdk.ai.utils import ( + get_start_span_function, + set_data_normalized, + truncate_and_serialize_messages, +) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -72,9 +76,9 @@ def _input_callback(kwargs): # Record messages if allowed if messages and should_send_default_pii() and integration.include_prompts: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False - ) + messages_data = truncate_and_serialize_messages(messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) # Record other parameters params = { diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index e9bd2efa23..f503ce2f96 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -3,7 +3,11 @@ import sentry_sdk from sentry_sdk import consts from sentry_sdk.ai.monitoring import record_token_usage -from sentry_sdk.ai.utils import set_data_normalized, normalize_message_roles +from sentry_sdk.ai.utils import ( + set_data_normalized, + normalize_message_roles, + truncate_and_serialize_messages, +) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -183,9 +187,9 @@ def _set_input_data(span, kwargs, operation, integration): and integration.include_prompts ): normalized_messages = normalize_message_roles(messages) - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, normalized_messages, unpack=False - ) + messages_data = truncate_and_serialize_messages(normalized_messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) # Input attributes: Common set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py index 2a9c5ebe66..ac2596f73c 100644 --- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py +++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py @@ -3,6 +3,7 @@ get_start_span_function, set_data_normalized, normalize_message_roles, + truncate_and_serialize_messages, ) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.scope import should_send_default_pii @@ -61,12 +62,9 @@ def invoke_agent_span(context, agent, kwargs): if len(messages) > 0: normalized_messages = normalize_message_roles(messages) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_messages, - unpack=False, - ) + messages_data = truncate_and_serialize_messages(normalized_messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) _set_agent_data(span, agent) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index 125ff1175b..92cebdf925 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -4,6 +4,7 @@ normalize_message_roles, set_data_normalized, normalize_message_role, + truncate_and_serialize_messages, ) from sentry_sdk.consts import SPANDATA, SPANSTATUS, OP from sentry_sdk.integrations import DidNotEnable @@ -135,12 +136,10 @@ def _set_input_data(span, get_response_kwargs): } ) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalize_message_roles(request_messages), - unpack=False, - ) + role_normalized_messages = normalize_message_roles(request_messages) + messages_data = truncate_and_serialize_messages(role_normalized_messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) def _set_output_data(span, result): diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index e9065e2d32..06a9827cea 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -1,6 +1,7 @@ -import pytest -from unittest import mock import json +from unittest import mock + +import pytest try: from unittest.mock import AsyncMock @@ -41,16 +42,18 @@ async def __call__(self, *args, **kwargs): except ImportError: from anthropic.types.content_block import ContentBlock as TextBlock -from sentry_sdk import start_transaction, start_span +from sentry_sdk import start_span, start_transaction +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations.anthropic import ( AnthropicIntegration, - _set_output_data, _collect_ai_data, + _set_output_data, ) +from sentry_sdk.serializer import serialize from sentry_sdk.utils import package_version - ANTHROPIC_VERSION = package_version("anthropic") EXAMPLE_MESSAGE = Message( @@ -891,9 +894,8 @@ def test_anthropic_message_role_mapping(sentry_init, capture_events): events = capture_events() client = Anthropic(api_key="z") - - def mock_messages_create(*args, **kwargs): - return Message( + client.messages._post = mock.Mock( + return_value=Message( id="msg_1", content=[TextBlock(text="Hi there!", type="text")], model="claude-3-opus", @@ -903,15 +905,13 @@ def mock_messages_create(*args, **kwargs): type="message", usage=Usage(input_tokens=10, output_tokens=5), ) + ) - client.messages._post = mock.Mock(return_value=mock_messages_create()) - - # Test messages with mixed roles including "ai" that should be mapped to "assistant" test_messages = [ {"role": "system", "content": "You are helpful."}, {"role": "user", "content": "Hello"}, - {"role": "ai", "content": "Hi there!"}, # Should be mapped to "assistant" - {"role": "assistant", "content": "How can I help?"}, # Should stay "assistant" + {"role": "ai", "content": "Hi there!"}, + {"role": "assistant", "content": "How can I help?"}, ] with start_transaction(name="anthropic tx"): @@ -920,28 +920,120 @@ def mock_messages_create(*args, **kwargs): ) (event,) = events - span = event["spans"][0] + (span,) = event["spans"] - # Verify that the span was created correctly assert span["op"] == "gen_ai.chat" assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - # Parse the stored messages stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - - # Verify that "ai" role was mapped to "assistant" assert len(stored_messages) == 4 assert stored_messages[0]["role"] == "system" assert stored_messages[1]["role"] == "user" - assert ( - stored_messages[2]["role"] == "assistant" - ) # "ai" should be mapped to "assistant" - assert stored_messages[3]["role"] == "assistant" # should stay "assistant" + assert stored_messages[2]["role"] == "assistant" + assert stored_messages[3]["role"] == "assistant" - # Verify content is preserved assert stored_messages[2]["content"] == "Hi there!" assert stored_messages[3]["content"] == "How can I help?" - # Verify no "ai" roles remain roles = [msg["role"] for msg in stored_messages] assert "ai" not in roles + + +def test_anthropic_message_truncation(sentry_init, capture_events): + """Test that large messages are truncated properly in Anthropic integration.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = Anthropic(api_key="test-api-key") + client.messages._post = mock.Mock( + return_value=Message( + id="test", + content=[TextBlock(text="Hello", type="text")], + model="claude-3", + role="assistant", + type="message", + usage=Usage(input_tokens=10, output_tokens=20), + ) + ) + + large_content = ( + "This is a very long message that will exceed our size limits. " * 1000 + ) + large_messages = [ + {"role": "user", "content": large_content}, + {"role": "assistant", "content": large_content}, + {"role": "user", "content": large_content}, + ] + + with start_transaction(name="anthropic tx"): + client.messages.create( + model="claude-3-sonnet-20240229", + messages=large_messages, + max_tokens=100, + ) + + (event,) = events + (span,) = event["spans"] + + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) <= len(large_messages) + + result_size = len(messages_data.encode("utf-8")) + assert result_size <= MAX_GEN_AI_MESSAGE_BYTES + + +def test_anthropic_single_large_message_preservation(sentry_init, capture_events): + """Test that a single very large message gets preserved with truncated content.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = Anthropic(api_key="test-api-key") + client.messages._post = mock.Mock( + return_value=Message( + id="test", + content=[TextBlock(text="Hello", type="text")], + model="claude-3", + role="assistant", + type="message", + usage=Usage(input_tokens=100, output_tokens=50), + ) + ) + + huge_content = ( + "This is an extremely long message that will definitely exceed size limits. " + * 2000 + ) + messages = [{"role": "user", "content": huge_content}] + + with start_transaction(name="anthropic tx"): + client.messages.create( + model="claude-3-sonnet-20240229", + messages=messages, + max_tokens=100, + ) + + (event,) = events + (span,) = event["spans"] + + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert parsed_messages[0]["role"] == "user" + assert len(parsed_messages[0]["content"]) < len(huge_content) diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index b600c32905..e2572198a5 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -1,3 +1,4 @@ +import json import pytest from unittest import mock from datetime import datetime @@ -24,6 +25,9 @@ async def __call__(self, *args, **kwargs): _success_callback, _failure_callback, ) +from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.serializer import serialize from sentry_sdk.utils import package_version @@ -545,3 +549,98 @@ def dict(self): # Should have extracted the response message assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"] + + +def test_litellm_message_truncation(sentry_init, capture_events): + """Test that large messages are truncated properly in LiteLLM integration.""" + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + large_content = ( + "This is a very long message that will exceed our size limits. " * 1000 + ) + large_messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": large_content}, + {"role": "assistant", "content": large_content}, + {"role": "user", "content": large_content}, + ] + + mock_response = MockCompletionResponse() + + with start_transaction(name="litellm test"): + kwargs = { + "model": "gpt-3.5-turbo", + "messages": large_messages, + } + + _input_callback(kwargs) + _success_callback( + kwargs, + mock_response, + datetime.now(), + datetime.now(), + ) + + (event,) = events + (span,) = event["spans"] + + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) <= len(large_messages) + + result_size = len(messages_data.encode("utf-8")) + assert result_size <= MAX_GEN_AI_MESSAGE_BYTES + + +def test_litellm_single_large_message_preservation(sentry_init, capture_events): + """Test that a single very large message gets preserved with truncated content.""" + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + huge_content = ( + "This is an extremely long message that will definitely exceed size limits. " + * 2000 + ) + messages = [{"role": "user", "content": huge_content}] + + mock_response = MockCompletionResponse() + + with start_transaction(name="litellm test"): + kwargs = { + "model": "gpt-3.5-turbo", + "messages": messages, + } + + _input_callback(kwargs) + _success_callback( + kwargs, + mock_response, + datetime.now(), + datetime.now(), + ) + + (event,) = events + (span,) = event["spans"] + + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert parsed_messages[0]["role"] == "user" + assert len(parsed_messages[0]["content"]) < len(huge_content) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 06e0a09fcf..440e74c395 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1,3 +1,4 @@ +import json import pytest from sentry_sdk.utils import package_version @@ -39,6 +40,9 @@ OpenAIIntegration, _calculate_token_usage, ) +from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.serializer import serialize from unittest import mock # python 3.3 and above @@ -1451,6 +1455,7 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): def test_openai_message_role_mapping(sentry_init, capture_events): """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'""" + sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -1460,7 +1465,6 @@ def test_openai_message_role_mapping(sentry_init, capture_events): client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) - # Test messages with mixed roles including "ai" that should be mapped to "assistant" test_messages = [ {"role": "system", "content": "You are helpful."}, @@ -1471,11 +1475,9 @@ def test_openai_message_role_mapping(sentry_init, capture_events): with start_transaction(name="openai tx"): client.chat.completions.create(model="test-model", messages=test_messages) - + # Verify that the span was created correctly (event,) = events span = event["spans"][0] - - # Verify that the span was created correctly assert span["op"] == "gen_ai.chat" assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] @@ -1500,3 +1502,96 @@ def test_openai_message_role_mapping(sentry_init, capture_events): # Verify no "ai" roles remain roles = [msg["role"] for msg in stored_messages] assert "ai" not in roles + + +def test_openai_message_truncation(sentry_init, capture_events): + """Test that large messages are truncated properly in OpenAI integration.""" + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + + large_content = ( + "This is a very long message that will exceed our size limits. " * 1000 + ) + large_messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": large_content}, + {"role": "assistant", "content": large_content}, + {"role": "user", "content": large_content}, + ] + + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", + messages=large_messages, + ) + + (event,) = events + span = event["spans"][0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) <= len(large_messages) + + if "_meta" in event and len(parsed_messages) < len(large_messages): + meta_path = event["_meta"] + if ( + "spans" in meta_path + and "0" in meta_path["spans"] + and "data" in meta_path["spans"]["0"] + ): + span_meta = meta_path["spans"]["0"]["data"] + if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_meta: + messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "len" in messages_meta.get("", {}) + + +def test_openai_single_large_message_content_truncation(sentry_init, capture_events): + """Test that a single very large message gets content truncated, not removed entirely.""" + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + + huge_content = ( + "This is an extremely long message that will definitely exceed size limits. " + * 2000 + ) + messages = [{"role": "user", "content": huge_content}] + + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", + messages=messages, + ) + + (event,) = events + span = event["spans"][0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + assert parsed_messages[0]["role"] == "user" + assert len(parsed_messages[0]["content"]) < len(huge_content) + + result_size = len(messages_data.encode("utf-8")) + assert result_size <= MAX_GEN_AI_MESSAGE_BYTES diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index e647ce9fad..de4344311b 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -1,11 +1,16 @@ import asyncio +import json import re import pytest from unittest.mock import MagicMock, patch import os +import sentry_sdk from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration from sentry_sdk.integrations.openai_agents.utils import safe_serialize +from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.serializer import serialize from sentry_sdk.utils import parse_version import agents @@ -1077,3 +1082,115 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events): # Verify no "ai" roles remain in any message for message in stored_messages: assert message["role"] != "ai" + + +def test_openai_agents_message_truncation( + sentry_init, capture_events, mock_model_response +): + """Test that large messages are truncated properly in OpenAI Agents integration.""" + # Create messages that will definitely exceed size limits + large_system_prompt = ( + "This is a very long system prompt that will exceed our size limits. " * 1000 + ) # ~64KB + large_user_message = ( + "This is a very long user message that will exceed our size limits. " * 1000 + ) # ~64KB + + agent = Agent( + name="test_agent", + model="gpt-4", + instructions=large_system_prompt, + ) + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + mock_get_response.return_value = mock_model_response + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + events = capture_events() + + result = agents.Runner.run_sync( + agent, large_user_message, run_config=test_run_config + ) + + assert result is not None + + (event,) = events + spans = event["spans"] + invoke_agent_span, ai_client_span = spans + assert "gen_ai.request.messages" in invoke_agent_span["data"] + + messages_data = invoke_agent_span["data"]["gen_ai.request.messages"] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) >= 1 + + result_size = len(messages_data.encode("utf-8")) + assert result_size <= MAX_GEN_AI_MESSAGE_BYTES + + total_original_size = len(large_system_prompt) + len(large_user_message) + total_parsed_size = sum(len(str(msg)) for msg in parsed_messages) + assert total_parsed_size < total_original_size + + +def test_openai_agents_single_large_message_preservation( + sentry_init, capture_events, mock_model_response +): + """Test that a single very large message gets preserved with truncated content.""" + huge_content = ( + "This is an extremely long message that will definitely exceed size limits. " + * 2000 + ) + + agent = Agent( + name="test_agent", + model="gpt-4", + instructions="You are helpful.", + ) + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + mock_get_response.return_value = mock_model_response + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + events = capture_events() + + result = agents.Runner.run_sync( + agent, huge_content, run_config=test_run_config + ) + + assert result is not None + + (event,) = events + spans = event["spans"] + invoke_agent_span, ai_client_span = spans + assert "gen_ai.request.messages" in invoke_agent_span["data"] + + messages_data = invoke_agent_span["data"]["gen_ai.request.messages"] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) >= 1 + + user_message = next( + (msg for msg in parsed_messages if msg.get("role") == "user"), None + ) + if user_message and "content" in user_message: + assert len(user_message["content"]) < len(huge_content) diff --git a/tests/test_ai_message_utils.py b/tests/test_ai_message_utils.py new file mode 100644 index 0000000000..9dec987116 --- /dev/null +++ b/tests/test_ai_message_utils.py @@ -0,0 +1,378 @@ +import json +import pytest + +from sentry_sdk.ai.utils import ( + MAX_GEN_AI_MESSAGE_BYTES, + truncate_messages_by_size, + serialize_gen_ai_messages, + truncate_and_serialize_messages, +) +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.serializer import serialize + + +@pytest.fixture +def sample_messages(): + """Sample messages similar to what gen_ai integrations would use""" + return [ + {"role": "system", "content": "You are a helpful assistant."}, + { + "role": "user", + "content": "What is the difference between a list and a tuple in Python?", + }, + { + "role": "assistant", + "content": "Lists are mutable and use [], tuples are immutable and use ().", + }, + {"role": "user", "content": "Can you give me some examples?"}, + { + "role": "assistant", + "content": "Sure! Here are examples:\n\n```python\n# List\nmy_list = [1, 2, 3]\nmy_list.append(4)\n\n# Tuple\nmy_tuple = (1, 2, 3)\n# my_tuple.append(4) would error\n```", + }, + ] + + +@pytest.fixture +def large_messages(): + """Messages that will definitely exceed size limits""" + large_content = "This is a very long message. " * 1000 + return [ + {"role": "system", "content": large_content}, + {"role": "user", "content": large_content}, + {"role": "assistant", "content": large_content}, + {"role": "user", "content": large_content}, + ] + + +class TestTruncateMessagesBySize: + def test_no_truncation_needed(self, sample_messages): + """Test that messages under the limit are not truncated""" + result = truncate_messages_by_size( + sample_messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES + ) + assert len(result) == len(sample_messages) + assert result == sample_messages + + def test_truncation_removes_oldest_first(self, large_messages): + """Test that oldest messages are removed first during truncation""" + small_limit = MAX_GEN_AI_MESSAGE_BYTES // 100 + result = truncate_messages_by_size(large_messages, max_bytes=small_limit) + assert len(result) < len(large_messages) + + if result: + assert result[-1] == large_messages[-1] + + def test_empty_messages_list(self): + """Test handling of empty messages list""" + result = truncate_messages_by_size( + [], max_bytes=MAX_GEN_AI_MESSAGE_BYTES // 500 + ) + assert result == [] + + def test_single_message_under_limit(self): + """Test single message under size limit""" + messages = [{"role": "user", "content": "Hello!"}] + result = truncate_messages_by_size( + messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES // 500 + ) + assert result == messages + + def test_single_message_over_limit(self): + """Test single message that exceeds size limit""" + large_content = "x" * 10000 + messages = [{"role": "user", "content": large_content}] + + result = truncate_messages_by_size(messages, max_bytes=100) + assert len(result) == 1 + assert result[0]["role"] == "user" + assert len(result[0]["content"]) < len(large_content) + + def test_progressive_truncation(self, large_messages): + """Test that truncation works progressively with different limits""" + limits = [ + MAX_GEN_AI_MESSAGE_BYTES // 5, + MAX_GEN_AI_MESSAGE_BYTES // 10, + MAX_GEN_AI_MESSAGE_BYTES // 25, + MAX_GEN_AI_MESSAGE_BYTES // 100, + MAX_GEN_AI_MESSAGE_BYTES // 500, + ] + prev_count = len(large_messages) + + for limit in limits: + result = truncate_messages_by_size(large_messages, max_bytes=limit) + current_count = len(result) + + assert current_count <= prev_count + assert current_count >= 1 + prev_count = current_count + + def test_exact_size_boundary(self): + """Test behavior at exact size boundaries""" + messages = [{"role": "user", "content": "test"}] + + serialized = serialize(messages, is_vars=False) + json_str = json.dumps(serialized, separators=(",", ":")) + exact_size = len(json_str.encode("utf-8")) + + result = truncate_messages_by_size(messages, max_bytes=exact_size) + assert len(result) == 1 + + result = truncate_messages_by_size(messages, max_bytes=exact_size - 1) + assert len(result) == 1 + + +class TestSerializeGenAiMessages: + def test_serialize_normal_messages(self, sample_messages): + """Test serialization of normal messages""" + result = serialize_gen_ai_messages(sample_messages) + + assert result is not None + assert isinstance(result, str) + + parsed = json.loads(result) + assert isinstance(parsed, list) + assert len(parsed) <= len(sample_messages) + + def test_serialize_none_messages(self): + """Test serialization of None input""" + result = serialize_gen_ai_messages(None) + assert result is None + + def test_serialize_empty_messages(self): + """Test serialization of empty list""" + result = serialize_gen_ai_messages([]) + assert result is None + + def test_serialize_with_truncation(self, large_messages): + """Test serialization with size-based truncation""" + small_limit = MAX_GEN_AI_MESSAGE_BYTES // 100 + result = serialize_gen_ai_messages(large_messages, max_bytes=small_limit) + + if result: + assert isinstance(result, str) + + result_size = len(result.encode("utf-8")) + assert result_size <= small_limit + + parsed = json.loads(result) + assert isinstance(parsed, list) + + def test_serialize_preserves_message_structure(self): + """Test that serialization preserves message structure""" + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there!"}, + ] + + result = serialize_gen_ai_messages(messages) + parsed = json.loads(result) + + assert len(parsed) == 2 + assert parsed[0]["role"] == "user" + assert parsed[0]["content"] == "Hello" + assert parsed[1]["role"] == "assistant" + assert parsed[1]["content"] == "Hi there!" + + +class TestTruncateAndSerializeMessages: + def test_main_function_with_normal_messages(self, sample_messages): + """Test the main function with normal messages""" + result = truncate_and_serialize_messages(sample_messages) + assert isinstance(result, str) + + parsed = json.loads(result) + assert isinstance(parsed, list) + assert len(parsed) == len(sample_messages) + + def test_main_function_with_large_messages(self, large_messages): + """Test the main function with messages requiring truncation""" + small_limit = MAX_GEN_AI_MESSAGE_BYTES // 100 # 5KB limit to force truncation + result = truncate_and_serialize_messages(large_messages, max_bytes=small_limit) + assert isinstance(result, AnnotatedValue) + assert result.metadata["len"] == len(large_messages) + assert isinstance(result.value, str) + + parsed = json.loads(result.value) + assert isinstance(parsed, list) + assert len(parsed) <= len(large_messages) + + result_size = len(result.value.encode("utf-8")) + assert result_size <= small_limit + + def test_main_function_with_none_input(self): + """Test the main function with None input""" + result = truncate_and_serialize_messages(None) + assert result is None + + def test_main_function_with_empty_input(self): + """Test the main function with empty input""" + result = truncate_and_serialize_messages([]) + assert result is None + + def test_main_function_serialization_format(self, sample_messages): + """Test that the function always returns proper JSON strings""" + result = truncate_and_serialize_messages(sample_messages) + assert isinstance(result, str) + + parsed = json.loads(result) + assert isinstance(parsed, list) + + for i, msg in enumerate(parsed): + assert "role" in msg + assert "content" in msg + + def test_main_function_default_limit(self, sample_messages): + """Test that the main function uses the default limit correctly""" + result = truncate_and_serialize_messages(sample_messages) + assert isinstance(result, str) + + parsed = json.loads(result) + assert isinstance(parsed, list) + + +class TestEdgeCases: + def test_messages_with_special_characters(self): + """Test messages containing special characters""" + messages = [ + {"role": "user", "content": "Hello 🌍! How are you? 中文测试"}, + { + "role": "assistant", + "content": "I'm doing well! Unicode: ñáéíóú àèìòù äöü", + }, + ] + + result = truncate_and_serialize_messages(messages) + assert result is not None + + parsed = json.loads(result) + assert len(parsed) == 2 + assert "🌍" in parsed[0]["content"] + + def test_messages_with_nested_structures(self): + """Test messages with complex nested structures""" + messages = [ + { + "role": "user", + "content": "Hello", + "metadata": {"timestamp": "2023-01-01", "user_id": 123}, + }, + { + "role": "assistant", + "content": "Hi!", + "tool_calls": [{"name": "search", "args": {"query": "test"}}], + }, + ] + + result = truncate_and_serialize_messages(messages) + assert result is not None + + if isinstance(result, AnnotatedValue): + parsed = json.loads(result.value) + else: + parsed = json.loads(result) + assert "metadata" in parsed[0] + assert "tool_calls" in parsed[1] + + def test_messages_with_none_values(self): + """Test messages containing None values""" + messages = [ + {"role": "user", "content": None}, + {"role": "assistant", "content": "Hello", "extra": None}, + ] + + result = truncate_and_serialize_messages(messages) + assert result is not None + + if isinstance(result, AnnotatedValue): + parsed = json.loads(result.value) + else: + parsed = json.loads(result) + assert len(parsed) == 2 + + def test_truncation_keeps_most_recent(self): + """Test that truncation prioritizes keeping the most recent messages""" + messages = [] + for i in range(10): + messages.append( + { + "role": "user" if i % 2 == 0 else "assistant", + "content": f"Message {i} with unique content that makes it identifiable", + } + ) + + small_limit = MAX_GEN_AI_MESSAGE_BYTES // 500 + result = truncate_and_serialize_messages(messages, max_bytes=small_limit) + + if result: + assert isinstance(result, AnnotatedValue) + parsed = json.loads(result.value) + if parsed: + last_kept_content = parsed[-1]["content"] + assert ( + "Message 9" in last_kept_content or "Message 8" in last_kept_content + ) + + +class TestMetaSupport: + """Test that _meta entries are created correctly when truncation occurs""" + + def test_annotated_value_returned_on_truncation(self, large_messages): + """Test that truncate_and_serialize_messages returns AnnotatedValue when truncation occurs""" + small_limit = 50_000 + result = truncate_and_serialize_messages(large_messages, max_bytes=small_limit) + assert isinstance(result, AnnotatedValue) + assert result.metadata == {"len": len(large_messages)} + assert isinstance(result.value, str) + + parsed = json.loads(result.value) + assert len(parsed) <= len(large_messages) + + def test_no_annotated_value_when_no_truncation(self, sample_messages): + """Test that truncate_and_serialize_messages returns plain list when no truncation occurs""" + result = truncate_and_serialize_messages(sample_messages) + assert not isinstance(result, AnnotatedValue) + assert isinstance(result, str) + + parsed = json.loads(result) + assert len(parsed) == len(sample_messages) + + def test_meta_structure_in_serialized_output(self, large_messages): + """Test that _meta structure is created correctly in serialized output""" + small_limit = 50_000 + annotated_messages = truncate_and_serialize_messages( + large_messages, max_bytes=small_limit + ) + test_data = {"gen_ai": {"request": {"messages": annotated_messages}}} + serialized = serialize(test_data, is_vars=False) + assert "_meta" in serialized + assert "gen_ai" in serialized["_meta"] + assert "request" in serialized["_meta"]["gen_ai"] + assert "messages" in serialized["_meta"]["gen_ai"]["request"] + assert serialized["_meta"]["gen_ai"]["request"]["messages"][""] == { + "len": len(large_messages) + } + assert "gen_ai" in serialized + assert "request" in serialized["gen_ai"] + assert "messages" in serialized["gen_ai"]["request"] + assert isinstance(serialized["gen_ai"]["request"]["messages"], str) + + def test_serialize_gen_ai_messages_handles_annotated_value(self, large_messages): + """Test that serialize_gen_ai_messages handles AnnotatedValue input correctly""" + truncated = large_messages[:2] + annotated = AnnotatedValue( + value=truncated, metadata={"len": len(large_messages)} + ) + result = serialize_gen_ai_messages(annotated) + + assert result is not None + parsed = json.loads(result) + assert isinstance(parsed, list) + assert len(parsed) == 2 + + def test_empty_messages_no_annotated_value(self): + """Test that empty messages don't create AnnotatedValue""" + result = truncate_and_serialize_messages([]) + assert result is None + + result = truncate_and_serialize_messages(None) + assert result is None