From 40f1ecbf5b9331977f310b9935593aff46cc1a03 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Fri, 3 Oct 2025 11:20:06 +0200 Subject: [PATCH 01/14] fix(ai): truncate long message histories --- sentry_sdk/ai/message_utils.py | 137 ++++++++++++ tests/test_ai_message_utils.py | 392 +++++++++++++++++++++++++++++++++ 2 files changed, 529 insertions(+) create mode 100644 sentry_sdk/ai/message_utils.py create mode 100644 tests/test_ai_message_utils.py diff --git a/sentry_sdk/ai/message_utils.py b/sentry_sdk/ai/message_utils.py new file mode 100644 index 0000000000..2567d31710 --- /dev/null +++ b/sentry_sdk/ai/message_utils.py @@ -0,0 +1,137 @@ +import json +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any, Dict, List, Optional + +try: + from sentry_sdk.serializer import serialize +except ImportError: + # Fallback for cases where sentry_sdk isn't fully importable + def serialize(obj, **kwargs): + # type: (Any, **Any) -> Any + return obj + + +MAX_GEN_AI_MESSAGE_BYTES = 30_000 # 300KB + + +def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): + # type: (List[Dict[str, Any]], int) -> List[Dict[str, Any]] + """ + Truncate messages by removing the oldest ones until the serialized size is within limits. + + This function prioritizes keeping the most recent messages while ensuring the total + serialized size stays under the specified byte limit. It uses the Sentry serializer + to get accurate size estimates that match what will actually be sent. + + :param messages: List of message objects (typically with 'role' and 'content' keys) + :param max_bytes: Maximum allowed size in bytes for the serialized messages + :returns: Truncated list of messages that fits within the size limit + """ + if not messages: + return messages + + truncated_messages = list(messages) + + while truncated_messages: + serialized = serialize(truncated_messages, is_vars=False) + serialized_json = json.dumps(serialized, separators=(",", ":")) + current_size = len(serialized_json.encode("utf-8")) + + if current_size <= max_bytes: + break + + truncated_messages.pop(0) + + return truncated_messages + + +def serialize_gen_ai_messages(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): + # type: (Optional[List[Dict[str, Any]]], int) -> Optional[str] + """ + Serialize and truncate gen_ai messages for storage in spans. + + This function handles the complete workflow of: + 1. Truncating messages to fit within size limits + 2. Serializing them using Sentry's serializer + 3. Converting to JSON string for storage + + :param messages: List of message objects or None + :param max_bytes: Maximum allowed size in bytes for the serialized messages + :returns: JSON string of serialized messages or None if input was None/empty + """ + if not messages: + return None + truncated_messages = truncate_messages_by_size(messages, max_bytes) + if not truncated_messages: + return None + serialized_messages = serialize(truncated_messages, is_vars=False) + + return json.dumps(serialized_messages, separators=(",", ":")) + + +def get_messages_metadata(original_messages, truncated_messages): + # type: (List[Dict[str, Any]], List[Dict[str, Any]]) -> Dict[str, Any] + """ + Generate metadata about message truncation for debugging/monitoring. + + :param original_messages: The original list of messages + :param truncated_messages: The truncated list of messages + :returns: Dictionary with metadata about the truncation + """ + original_count = len(original_messages) if original_messages else 0 + truncated_count = len(truncated_messages) if truncated_messages else 0 + + metadata = { + "original_count": original_count, + "truncated_count": truncated_count, + "messages_removed": original_count - truncated_count, + "was_truncated": original_count != truncated_count, + } + + return metadata + + +def truncate_and_serialize_messages(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): + # type: (Optional[List[Dict[str, Any]]], int) -> Dict[str, Any] + """ + One-stop function for gen_ai integrations to truncate and serialize messages. + + This is the main function that gen_ai integrations should use. It handles the + complete workflow and returns both the serialized data and metadata. + + Example usage: + from sentry_sdk.ai.message_utils import truncate_and_serialize_messages + + result = truncate_and_serialize_messages(messages) + if result['serialized_data']: + span.set_data('gen_ai.request.messages', result['serialized_data']) + if result['metadata']['was_truncated']: + # Log warning about truncation if desired + pass + + :param messages: List of message objects or None + :param max_bytes: Maximum allowed size in bytes for the serialized messages + :returns: Dictionary containing 'serialized_data', 'metadata', and 'original_size' + """ + if not messages: + return { + "serialized_data": None, + "metadata": get_messages_metadata([], []), + "original_size": 0, + } + + original_serialized = serialize(messages, is_vars=False) + original_json = json.dumps(original_serialized, separators=(",", ":")) + original_size = len(original_json.encode("utf-8")) + + truncated_messages = truncate_messages_by_size(messages, max_bytes) + serialized_data = serialize_gen_ai_messages(truncated_messages, max_bytes) + metadata = get_messages_metadata(messages, truncated_messages) + + return { + "serialized_data": serialized_data, + "metadata": metadata, + "original_size": original_size, + } diff --git a/tests/test_ai_message_utils.py b/tests/test_ai_message_utils.py new file mode 100644 index 0000000000..4b32c02e91 --- /dev/null +++ b/tests/test_ai_message_utils.py @@ -0,0 +1,392 @@ +import json +import pytest + +from sentry_sdk.ai.message_utils import ( + MAX_GEN_AI_MESSAGE_BYTES, + truncate_messages_by_size, + serialize_gen_ai_messages, + get_messages_metadata, + truncate_and_serialize_messages, +) + + +@pytest.fixture +def sample_messages(): + """Sample messages similar to what gen_ai integrations would use""" + return [ + {"role": "system", "content": "You are a helpful assistant."}, + { + "role": "user", + "content": "What is the difference between a list and a tuple in Python?", + }, + { + "role": "assistant", + "content": "Lists are mutable and use [], tuples are immutable and use ().", + }, + {"role": "user", "content": "Can you give me some examples?"}, + { + "role": "assistant", + "content": "Sure! Here are examples:\n\n```python\n# List\nmy_list = [1, 2, 3]\nmy_list.append(4)\n\n# Tuple\nmy_tuple = (1, 2, 3)\n# my_tuple.append(4) would error\n```", + }, + ] + + +@pytest.fixture +def large_messages(): + """Messages that will definitely exceed size limits""" + large_content = "This is a very long message. " * 1000 # ~30KB per message + return [ + {"role": "system", "content": large_content}, + {"role": "user", "content": large_content}, + {"role": "assistant", "content": large_content}, + {"role": "user", "content": large_content}, + ] + + +class TestTruncateMessagesBySize: + def test_no_truncation_needed(self, sample_messages): + """Test that messages under the limit are not truncated""" + result = truncate_messages_by_size(sample_messages, max_bytes=50000) + assert len(result) == len(sample_messages) + assert result == sample_messages + + def test_truncation_removes_oldest_first(self, large_messages): + """Test that oldest messages are removed first during truncation""" + result = truncate_messages_by_size(large_messages, max_bytes=5000) + + # Should have fewer messages + assert len(result) < len(large_messages) + + # Should keep the most recent messages + # The last message should always be preserved if possible + if result: + assert result[-1] == large_messages[-1] + + def test_empty_messages_list(self): + """Test handling of empty messages list""" + result = truncate_messages_by_size([], max_bytes=1000) + assert result == [] + + def test_single_message_under_limit(self): + """Test single message under size limit""" + messages = [{"role": "user", "content": "Hello!"}] + result = truncate_messages_by_size(messages, max_bytes=1000) + assert result == messages + + def test_single_message_over_limit(self): + """Test single message that exceeds size limit""" + large_content = "x" * 10000 + messages = [{"role": "user", "content": large_content}] + result = truncate_messages_by_size(messages, max_bytes=100) + + # Should return empty list if even single message is too large + assert result == [] + + def test_progressive_truncation(self, large_messages): + """Test that truncation works progressively with different limits""" + # Test different size limits + limits = [100000, 50000, 20000, 5000, 1000] + prev_count = len(large_messages) + + for limit in limits: + result = truncate_messages_by_size(large_messages, max_bytes=limit) + current_count = len(result) + + # As limit decreases, message count should not increase + assert current_count <= prev_count + prev_count = current_count + + def test_exact_size_boundary(self): + """Test behavior at exact size boundaries""" + # Create a message that serializes to a known size + messages = [{"role": "user", "content": "test"}] + + # Get the exact serialized size + from sentry_sdk.ai.message_utils import serialize + + serialized = serialize(messages, is_vars=False) + json_str = json.dumps(serialized, separators=(",", ":")) + exact_size = len(json_str.encode("utf-8")) + + # Should keep the message at exact size + result = truncate_messages_by_size(messages, max_bytes=exact_size) + assert len(result) == 1 + + # Should remove the message if limit is one byte smaller + result = truncate_messages_by_size(messages, max_bytes=exact_size - 1) + assert len(result) == 0 + + +class TestSerializeGenAiMessages: + def test_serialize_normal_messages(self, sample_messages): + """Test serialization of normal messages""" + result = serialize_gen_ai_messages(sample_messages) + + assert result is not None + assert isinstance(result, str) + + # Should be valid JSON + parsed = json.loads(result) + assert isinstance(parsed, list) + assert len(parsed) <= len(sample_messages) # Could be truncated + + def test_serialize_none_messages(self): + """Test serialization of None input""" + result = serialize_gen_ai_messages(None) + assert result is None + + def test_serialize_empty_messages(self): + """Test serialization of empty list""" + result = serialize_gen_ai_messages([]) + assert result is None + + def test_serialize_with_truncation(self, large_messages): + """Test serialization with size-based truncation""" + result = serialize_gen_ai_messages(large_messages, max_bytes=5000) + + if result: # Might be None if all messages are too large + assert isinstance(result, str) + + # Verify the result is under the size limit + result_size = len(result.encode("utf-8")) + assert result_size <= 5000 + + # Should be valid JSON + parsed = json.loads(result) + assert isinstance(parsed, list) + + def test_serialize_preserves_message_structure(self): + """Test that serialization preserves message structure""" + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there!"}, + ] + + result = serialize_gen_ai_messages(messages) + parsed = json.loads(result) + + assert len(parsed) == 2 + assert parsed[0]["role"] == "user" + assert parsed[0]["content"] == "Hello" + assert parsed[1]["role"] == "assistant" + assert parsed[1]["content"] == "Hi there!" + + +class TestGetMessagesMetadata: + def test_no_truncation_metadata(self, sample_messages): + """Test metadata when no truncation occurs""" + metadata = get_messages_metadata(sample_messages, sample_messages) + + assert metadata["original_count"] == len(sample_messages) + assert metadata["truncated_count"] == len(sample_messages) + assert metadata["messages_removed"] == 0 + assert metadata["was_truncated"] is False + + def test_truncation_metadata(self, sample_messages): + """Test metadata when truncation occurs""" + truncated = sample_messages[2:] # Remove first 2 messages + metadata = get_messages_metadata(sample_messages, truncated) + + assert metadata["original_count"] == len(sample_messages) + assert metadata["truncated_count"] == len(truncated) + assert metadata["messages_removed"] == 2 + assert metadata["was_truncated"] is True + + def test_empty_lists_metadata(self): + """Test metadata with empty lists""" + metadata = get_messages_metadata([], []) + + assert metadata["original_count"] == 0 + assert metadata["truncated_count"] == 0 + assert metadata["messages_removed"] == 0 + assert metadata["was_truncated"] is False + + def test_none_input_metadata(self): + """Test metadata with None inputs""" + metadata = get_messages_metadata(None, None) + + assert metadata["original_count"] == 0 + assert metadata["truncated_count"] == 0 + assert metadata["messages_removed"] == 0 + assert metadata["was_truncated"] is False + + def test_complete_truncation_metadata(self, sample_messages): + """Test metadata when all messages are removed""" + metadata = get_messages_metadata(sample_messages, []) + + assert metadata["original_count"] == len(sample_messages) + assert metadata["truncated_count"] == 0 + assert metadata["messages_removed"] == len(sample_messages) + assert metadata["was_truncated"] is True + + +class TestTruncateAndSerializeMessages: + def test_main_function_with_normal_messages(self, sample_messages): + """Test the main function with normal messages""" + result = truncate_and_serialize_messages(sample_messages) + + assert "serialized_data" in result + assert "metadata" in result + assert "original_size" in result + + assert result["serialized_data"] is not None + assert isinstance(result["serialized_data"], str) + assert result["original_size"] > 0 + assert result["metadata"]["was_truncated"] is False + + def test_main_function_with_large_messages(self, large_messages): + """Test the main function with messages requiring truncation""" + result = truncate_and_serialize_messages(large_messages, max_bytes=5000) + + assert "serialized_data" in result + assert "metadata" in result + assert "original_size" in result + + # Original size should be large + assert result["original_size"] > 5000 + + # May or may not be truncated depending on how large the messages are + if result["serialized_data"]: + serialized_size = len(result["serialized_data"].encode("utf-8")) + assert serialized_size <= 5000 + + def test_main_function_with_none_input(self): + """Test the main function with None input""" + result = truncate_and_serialize_messages(None) + + assert result["serialized_data"] is None + assert result["original_size"] == 0 + assert result["metadata"]["was_truncated"] is False + + def test_main_function_with_empty_input(self): + """Test the main function with empty input""" + result = truncate_and_serialize_messages([]) + + assert result["serialized_data"] is None + assert result["original_size"] == 0 + assert result["metadata"]["was_truncated"] is False + + def test_main_function_size_comparison(self, sample_messages): + """Test that serialized data is smaller than or equal to original""" + result = truncate_and_serialize_messages(sample_messages) + + if result["serialized_data"]: + serialized_size = len(result["serialized_data"].encode("utf-8")) + # Serialized size should be <= original size (could be equal if no truncation) + assert serialized_size <= result["original_size"] + + def test_main_function_respects_custom_limit(self, large_messages): + """Test that the main function respects custom byte limits""" + custom_limit = 2000 + result = truncate_and_serialize_messages(large_messages, max_bytes=custom_limit) + + if result["serialized_data"]: + serialized_size = len(result["serialized_data"].encode("utf-8")) + assert serialized_size <= custom_limit + + def test_main_function_default_limit(self, sample_messages): + """Test that the main function uses the default limit correctly""" + result = truncate_and_serialize_messages(sample_messages) + + # With normal sample messages, should not need truncation + assert result["metadata"]["was_truncated"] is False + assert result["serialized_data"] is not None + + +class TestConstants: + def test_default_limit_is_reasonable(self): + """Test that the default limit is reasonable""" + assert MAX_GEN_AI_MESSAGE_BYTES > 0 + assert MAX_GEN_AI_MESSAGE_BYTES < 10**6 # Should be less than MAX_EVENT_BYTES + + +class TestEdgeCases: + def test_messages_with_special_characters(self): + """Test messages containing special characters""" + messages = [ + {"role": "user", "content": "Hello 🌍! How are you? 中文测试"}, + { + "role": "assistant", + "content": "I'm doing well! Unicode: ñáéíóú àèìòù äöü", + }, + ] + + result = truncate_and_serialize_messages(messages) + assert result["serialized_data"] is not None + + # Should be valid JSON + parsed = json.loads(result["serialized_data"]) + assert len(parsed) == 2 + assert "🌍" in parsed[0]["content"] + + def test_messages_with_nested_structures(self): + """Test messages with complex nested structures""" + messages = [ + { + "role": "user", + "content": "Hello", + "metadata": {"timestamp": "2023-01-01", "user_id": 123}, + }, + { + "role": "assistant", + "content": "Hi!", + "tool_calls": [{"name": "search", "args": {"query": "test"}}], + }, + ] + + result = truncate_and_serialize_messages(messages) + assert result["serialized_data"] is not None + + # Should preserve the structure + parsed = json.loads(result["serialized_data"]) + assert "metadata" in parsed[0] + assert "tool_calls" in parsed[1] + + def test_very_small_limit(self, sample_messages): + """Test behavior with extremely small size limit""" + result = truncate_and_serialize_messages(sample_messages, max_bytes=10) + + # With such a small limit, likely all messages will be removed + if result["serialized_data"] is None: + assert result["metadata"]["truncated_count"] == 0 + else: + # If any data remains, it should be under the limit + size = len(result["serialized_data"].encode("utf-8")) + assert size <= 10 + + def test_messages_with_none_values(self): + """Test messages containing None values""" + messages = [ + {"role": "user", "content": None}, + {"role": "assistant", "content": "Hello", "extra": None}, + ] + + result = truncate_and_serialize_messages(messages) + assert result["serialized_data"] is not None + + # Should handle None values gracefully + parsed = json.loads(result["serialized_data"]) + assert len(parsed) == 2 + + def test_truncation_keeps_most_recent(self): + """Test that truncation prioritizes keeping the most recent messages""" + messages = [] + for i in range(10): + messages.append( + { + "role": "user" if i % 2 == 0 else "assistant", + "content": f"Message {i} with unique content that makes it identifiable", + } + ) + + # Truncate to a small size that should remove several messages + result = truncate_and_serialize_messages(messages, max_bytes=1000) + + if result["serialized_data"]: + parsed = json.loads(result["serialized_data"]) + if parsed: + # The last remaining message should be from the end of the original list + last_kept_content = parsed[-1]["content"] + assert ( + "Message 9" in last_kept_content or "Message 8" in last_kept_content + ) From bf386cd36ac0634d848d4b38b2b3938d472f5752 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Fri, 3 Oct 2025 11:26:34 +0200 Subject: [PATCH 02/14] wip --- sentry_sdk/ai/message_utils.py | 4 ++- tests/test_ai_message_utils.py | 49 +++++++++++++++++++++++----------- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/sentry_sdk/ai/message_utils.py b/sentry_sdk/ai/message_utils.py index 2567d31710..08aa0e4a8e 100644 --- a/sentry_sdk/ai/message_utils.py +++ b/sentry_sdk/ai/message_utils.py @@ -13,7 +13,9 @@ def serialize(obj, **kwargs): return obj -MAX_GEN_AI_MESSAGE_BYTES = 30_000 # 300KB +# Custom limit for gen_ai message serialization - 50% of MAX_EVENT_BYTES +# to leave room for other event data while still being generous for messages +MAX_GEN_AI_MESSAGE_BYTES = 500_000 # 500KB def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): diff --git a/tests/test_ai_message_utils.py b/tests/test_ai_message_utils.py index 4b32c02e91..5029d27760 100644 --- a/tests/test_ai_message_utils.py +++ b/tests/test_ai_message_utils.py @@ -46,13 +46,16 @@ def large_messages(): class TestTruncateMessagesBySize: def test_no_truncation_needed(self, sample_messages): """Test that messages under the limit are not truncated""" - result = truncate_messages_by_size(sample_messages, max_bytes=50000) + result = truncate_messages_by_size( + sample_messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES + ) assert len(result) == len(sample_messages) assert result == sample_messages def test_truncation_removes_oldest_first(self, large_messages): """Test that oldest messages are removed first during truncation""" - result = truncate_messages_by_size(large_messages, max_bytes=5000) + small_limit = MAX_GEN_AI_MESSAGE_BYTES // 100 # 5KB limit to force truncation + result = truncate_messages_by_size(large_messages, max_bytes=small_limit) # Should have fewer messages assert len(result) < len(large_messages) @@ -64,28 +67,38 @@ def test_truncation_removes_oldest_first(self, large_messages): def test_empty_messages_list(self): """Test handling of empty messages list""" - result = truncate_messages_by_size([], max_bytes=1000) + result = truncate_messages_by_size( + [], max_bytes=MAX_GEN_AI_MESSAGE_BYTES // 500 + ) assert result == [] def test_single_message_under_limit(self): """Test single message under size limit""" messages = [{"role": "user", "content": "Hello!"}] - result = truncate_messages_by_size(messages, max_bytes=1000) + result = truncate_messages_by_size( + messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES // 500 + ) assert result == messages def test_single_message_over_limit(self): """Test single message that exceeds size limit""" large_content = "x" * 10000 messages = [{"role": "user", "content": large_content}] - result = truncate_messages_by_size(messages, max_bytes=100) + result = truncate_messages_by_size(messages, max_bytes=100) # Very small limit # Should return empty list if even single message is too large assert result == [] def test_progressive_truncation(self, large_messages): """Test that truncation works progressively with different limits""" - # Test different size limits - limits = [100000, 50000, 20000, 5000, 1000] + # Test different size limits based on the constant + limits = [ + MAX_GEN_AI_MESSAGE_BYTES // 5, # 100KB + MAX_GEN_AI_MESSAGE_BYTES // 10, # 50KB + MAX_GEN_AI_MESSAGE_BYTES // 25, # 20KB + MAX_GEN_AI_MESSAGE_BYTES // 100, # 5KB + MAX_GEN_AI_MESSAGE_BYTES // 500, # 1KB + ] prev_count = len(large_messages) for limit in limits: @@ -142,14 +155,15 @@ def test_serialize_empty_messages(self): def test_serialize_with_truncation(self, large_messages): """Test serialization with size-based truncation""" - result = serialize_gen_ai_messages(large_messages, max_bytes=5000) + small_limit = MAX_GEN_AI_MESSAGE_BYTES // 100 # 5KB limit to force truncation + result = serialize_gen_ai_messages(large_messages, max_bytes=small_limit) if result: # Might be None if all messages are too large assert isinstance(result, str) # Verify the result is under the size limit result_size = len(result.encode("utf-8")) - assert result_size <= 5000 + assert result_size <= small_limit # Should be valid JSON parsed = json.loads(result) @@ -236,19 +250,20 @@ def test_main_function_with_normal_messages(self, sample_messages): def test_main_function_with_large_messages(self, large_messages): """Test the main function with messages requiring truncation""" - result = truncate_and_serialize_messages(large_messages, max_bytes=5000) + small_limit = MAX_GEN_AI_MESSAGE_BYTES // 100 # 5KB limit to force truncation + result = truncate_and_serialize_messages(large_messages, max_bytes=small_limit) assert "serialized_data" in result assert "metadata" in result assert "original_size" in result # Original size should be large - assert result["original_size"] > 5000 + assert result["original_size"] > small_limit # May or may not be truncated depending on how large the messages are if result["serialized_data"]: serialized_size = len(result["serialized_data"].encode("utf-8")) - assert serialized_size <= 5000 + assert serialized_size <= small_limit def test_main_function_with_none_input(self): """Test the main function with None input""" @@ -277,7 +292,7 @@ def test_main_function_size_comparison(self, sample_messages): def test_main_function_respects_custom_limit(self, large_messages): """Test that the main function respects custom byte limits""" - custom_limit = 2000 + custom_limit = MAX_GEN_AI_MESSAGE_BYTES // 250 # 2KB limit result = truncate_and_serialize_messages(large_messages, max_bytes=custom_limit) if result["serialized_data"]: @@ -344,7 +359,8 @@ def test_messages_with_nested_structures(self): def test_very_small_limit(self, sample_messages): """Test behavior with extremely small size limit""" - result = truncate_and_serialize_messages(sample_messages, max_bytes=10) + tiny_limit = 10 # 10 bytes - extremely small limit + result = truncate_and_serialize_messages(sample_messages, max_bytes=tiny_limit) # With such a small limit, likely all messages will be removed if result["serialized_data"] is None: @@ -352,7 +368,7 @@ def test_very_small_limit(self, sample_messages): else: # If any data remains, it should be under the limit size = len(result["serialized_data"].encode("utf-8")) - assert size <= 10 + assert size <= tiny_limit def test_messages_with_none_values(self): """Test messages containing None values""" @@ -380,7 +396,8 @@ def test_truncation_keeps_most_recent(self): ) # Truncate to a small size that should remove several messages - result = truncate_and_serialize_messages(messages, max_bytes=1000) + small_limit = MAX_GEN_AI_MESSAGE_BYTES // 500 # 1KB limit to force truncation + result = truncate_and_serialize_messages(messages, max_bytes=small_limit) if result["serialized_data"]: parsed = json.loads(result["serialized_data"]) From d4bfa31d589837bff0245cfceb421f3e1ae82f52 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Fri, 3 Oct 2025 14:10:01 +0200 Subject: [PATCH 03/14] integrate message truncation in langchain & langgraph --- sentry_sdk/integrations/langchain.py | 47 +++++++++++++--------------- sentry_sdk/integrations/langgraph.py | 23 +++++++------- 2 files changed, 33 insertions(+), 37 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 724d908665..bd3a0c6d65 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -10,6 +10,8 @@ set_data_normalized, get_start_span_function, ) +from sentry_sdk.ai.utils import set_data_normalized, get_start_span_function +from sentry_sdk.ai.message_utils import truncate_and_serialize_messages from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -221,12 +223,11 @@ def on_llm_start( } for prompt in prompts ] - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_messages, - unpack=False, - ) + result = truncate_and_serialize_messages(prompts) + if result["serialized_data"]: + span.set_data( + SPANDATA.GEN_AI_REQUEST_MESSAGES, result["serialized_data"] + ) def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs): # type: (SentryLangchainCallback, Dict[str, Any], List[List[BaseMessage]], UUID, Any) -> Any @@ -278,13 +279,11 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs): self._normalize_langchain_message(message) ) normalized_messages = normalize_message_roles(normalized_messages) - - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_messages, - unpack=False, - ) + result = truncate_and_serialize_messages(normalized_messages) + if result["serialized_data"]: + span.set_data( + SPANDATA.GEN_AI_REQUEST_MESSAGES, result["serialized_data"] + ) def on_chat_model_end(self, response, *, run_id, **kwargs): # type: (SentryLangchainCallback, LLMResult, UUID, Any) -> Any @@ -758,12 +757,11 @@ def new_invoke(self, *args, **kwargs): and integration.include_prompts ): normalized_messages = normalize_message_roles([input]) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_messages, - unpack=False, - ) + result = truncate_and_serialize_messages(normalized_messages) + if result["serialized_data"]: + span.set_data( + SPANDATA.GEN_AI_REQUEST_MESSAGES, result["serialized_data"] + ) output = result.get("output") if ( @@ -813,12 +811,11 @@ def new_stream(self, *args, **kwargs): and integration.include_prompts ): normalized_messages = normalize_message_roles([input]) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_messages, - unpack=False, - ) + result = truncate_and_serialize_messages(normalized_messages) + if result["serialized_data"]: + span.set_data( + SPANDATA.GEN_AI_REQUEST_MESSAGES, result["serialized_data"] + ) # Run the agent result = f(self, *args, **kwargs) diff --git a/sentry_sdk/integrations/langgraph.py b/sentry_sdk/integrations/langgraph.py index 11aa1facf4..b3000244fa 100644 --- a/sentry_sdk/integrations/langgraph.py +++ b/sentry_sdk/integrations/langgraph.py @@ -3,6 +3,7 @@ import sentry_sdk from sentry_sdk.ai.utils import set_data_normalized, normalize_message_roles +from sentry_sdk.ai.message_utils import truncate_and_serialize_messages from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -181,12 +182,11 @@ def new_invoke(self, *args, **kwargs): input_messages = _parse_langgraph_messages(args[0]) if input_messages: normalized_input_messages = normalize_message_roles(input_messages) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_input_messages, - unpack=False, - ) + result = truncate_and_serialize_messages(normalized_input_messages) + if result["serialized_data"]: + span.set_data( + SPANDATA.GEN_AI_REQUEST_MESSAGES, result["serialized_data"] + ) result = f(self, *args, **kwargs) @@ -232,12 +232,11 @@ async def new_ainvoke(self, *args, **kwargs): input_messages = _parse_langgraph_messages(args[0]) if input_messages: normalized_input_messages = normalize_message_roles(input_messages) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_input_messages, - unpack=False, - ) + result = truncate_and_serialize_messages(normalized_input_messages) + if result["serialized_data"]: + span.set_data( + SPANDATA.GEN_AI_REQUEST_MESSAGES, result["serialized_data"] + ) result = await f(self, *args, **kwargs) From e95be82d80ff7fca9677702b86c248506f2ee83d Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Tue, 7 Oct 2025 13:36:56 +0200 Subject: [PATCH 04/14] update limit --- sentry_sdk/ai/message_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/ai/message_utils.py b/sentry_sdk/ai/message_utils.py index 08aa0e4a8e..f241eaad18 100644 --- a/sentry_sdk/ai/message_utils.py +++ b/sentry_sdk/ai/message_utils.py @@ -15,7 +15,7 @@ def serialize(obj, **kwargs): # Custom limit for gen_ai message serialization - 50% of MAX_EVENT_BYTES # to leave room for other event data while still being generous for messages -MAX_GEN_AI_MESSAGE_BYTES = 500_000 # 500KB +MAX_GEN_AI_MESSAGE_BYTES = 20_000 # 20KB def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): From a137be568f2e84dc404e76e067a727faf11e5d3a Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Wed, 8 Oct 2025 14:07:42 +0200 Subject: [PATCH 05/14] add meta entries --- sentry_sdk/ai/message_utils.py | 74 ++++++++++-------------- sentry_sdk/integrations/langchain.py | 32 ++++------- sentry_sdk/integrations/langgraph.py | 16 ++---- tests/test_ai_message_utils.py | 85 ++++++++++++++++++++++++++++ 4 files changed, 132 insertions(+), 75 deletions(-) diff --git a/sentry_sdk/ai/message_utils.py b/sentry_sdk/ai/message_utils.py index f241eaad18..89f018d39d 100644 --- a/sentry_sdk/ai/message_utils.py +++ b/sentry_sdk/ai/message_utils.py @@ -4,17 +4,9 @@ if TYPE_CHECKING: from typing import Any, Dict, List, Optional -try: - from sentry_sdk.serializer import serialize -except ImportError: - # Fallback for cases where sentry_sdk isn't fully importable - def serialize(obj, **kwargs): - # type: (Any, **Any) -> Any - return obj +from sentry_sdk.serializer import serialize +from sentry_sdk._types import AnnotatedValue - -# Custom limit for gen_ai message serialization - 50% of MAX_EVENT_BYTES -# to leave room for other event data while still being generous for messages MAX_GEN_AI_MESSAGE_BYTES = 20_000 # 20KB @@ -50,21 +42,26 @@ def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): def serialize_gen_ai_messages(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): - # type: (Optional[List[Dict[str, Any]]], int) -> Optional[str] + # type: (Optional[Any], int) -> Optional[str] """ Serialize and truncate gen_ai messages for storage in spans. This function handles the complete workflow of: - 1. Truncating messages to fit within size limits - 2. Serializing them using Sentry's serializer + 1. Truncating messages to fit within size limits (if not already done) + 2. Serializing them using Sentry's serializer (which processes AnnotatedValue for _meta) 3. Converting to JSON string for storage - :param messages: List of message objects or None + :param messages: List of message objects, AnnotatedValue, or None :param max_bytes: Maximum allowed size in bytes for the serialized messages :returns: JSON string of serialized messages or None if input was None/empty """ if not messages: return None + + if isinstance(messages, AnnotatedValue): + serialized_messages = serialize(messages, is_vars=False) + return json.dumps(serialized_messages, separators=(",", ":")) + truncated_messages = truncate_messages_by_size(messages, max_bytes) if not truncated_messages: return None @@ -96,44 +93,31 @@ def get_messages_metadata(original_messages, truncated_messages): def truncate_and_serialize_messages(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): - # type: (Optional[List[Dict[str, Any]]], int) -> Dict[str, Any] + # type: (Optional[List[Dict[str, Any]]], int) -> Any """ - One-stop function for gen_ai integrations to truncate and serialize messages. + Truncate messages and return AnnotatedValue for automatic _meta creation. - This is the main function that gen_ai integrations should use. It handles the - complete workflow and returns both the serialized data and metadata. - - Example usage: - from sentry_sdk.ai.message_utils import truncate_and_serialize_messages - - result = truncate_and_serialize_messages(messages) - if result['serialized_data']: - span.set_data('gen_ai.request.messages', result['serialized_data']) - if result['metadata']['was_truncated']: - # Log warning about truncation if desired - pass + This function handles truncation and returns the truncated messages wrapped in an + AnnotatedValue (when truncation occurs) so that Sentry's serializer can automatically + create the appropriate _meta structure. :param messages: List of message objects or None :param max_bytes: Maximum allowed size in bytes for the serialized messages - :returns: Dictionary containing 'serialized_data', 'metadata', and 'original_size' + :returns: List of messages, AnnotatedValue (if truncated), or None """ if not messages: - return { - "serialized_data": None, - "metadata": get_messages_metadata([], []), - "original_size": 0, - } - - original_serialized = serialize(messages, is_vars=False) - original_json = json.dumps(original_serialized, separators=(",", ":")) - original_size = len(original_json.encode("utf-8")) + return None truncated_messages = truncate_messages_by_size(messages, max_bytes) - serialized_data = serialize_gen_ai_messages(truncated_messages, max_bytes) - metadata = get_messages_metadata(messages, truncated_messages) + if not truncated_messages: + return None - return { - "serialized_data": serialized_data, - "metadata": metadata, - "original_size": original_size, - } + original_count = len(messages) + truncated_count = len(truncated_messages) + + if original_count != truncated_count: + return AnnotatedValue( + value=serialize_gen_ai_messages(truncated_messages), + metadata={"len": original_count}, + ) + return truncated_messages diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index bd3a0c6d65..90682c4407 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -223,11 +223,9 @@ def on_llm_start( } for prompt in prompts ] - result = truncate_and_serialize_messages(prompts) - if result["serialized_data"]: - span.set_data( - SPANDATA.GEN_AI_REQUEST_MESSAGES, result["serialized_data"] - ) + messages_data = truncate_and_serialize_messages(normalized_messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs): # type: (SentryLangchainCallback, Dict[str, Any], List[List[BaseMessage]], UUID, Any) -> Any @@ -279,11 +277,9 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs): self._normalize_langchain_message(message) ) normalized_messages = normalize_message_roles(normalized_messages) - result = truncate_and_serialize_messages(normalized_messages) - if result["serialized_data"]: - span.set_data( - SPANDATA.GEN_AI_REQUEST_MESSAGES, result["serialized_data"] - ) + messages_data = truncate_and_serialize_messages(normalized_messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) def on_chat_model_end(self, response, *, run_id, **kwargs): # type: (SentryLangchainCallback, LLMResult, UUID, Any) -> Any @@ -757,11 +753,9 @@ def new_invoke(self, *args, **kwargs): and integration.include_prompts ): normalized_messages = normalize_message_roles([input]) - result = truncate_and_serialize_messages(normalized_messages) - if result["serialized_data"]: - span.set_data( - SPANDATA.GEN_AI_REQUEST_MESSAGES, result["serialized_data"] - ) + messages_data = truncate_and_serialize_messages(normalized_messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) output = result.get("output") if ( @@ -811,11 +805,9 @@ def new_stream(self, *args, **kwargs): and integration.include_prompts ): normalized_messages = normalize_message_roles([input]) - result = truncate_and_serialize_messages(normalized_messages) - if result["serialized_data"]: - span.set_data( - SPANDATA.GEN_AI_REQUEST_MESSAGES, result["serialized_data"] - ) + messages_data = truncate_and_serialize_messages(normalized_messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) # Run the agent result = f(self, *args, **kwargs) diff --git a/sentry_sdk/integrations/langgraph.py b/sentry_sdk/integrations/langgraph.py index b3000244fa..b9137f19af 100644 --- a/sentry_sdk/integrations/langgraph.py +++ b/sentry_sdk/integrations/langgraph.py @@ -182,11 +182,9 @@ def new_invoke(self, *args, **kwargs): input_messages = _parse_langgraph_messages(args[0]) if input_messages: normalized_input_messages = normalize_message_roles(input_messages) - result = truncate_and_serialize_messages(normalized_input_messages) - if result["serialized_data"]: - span.set_data( - SPANDATA.GEN_AI_REQUEST_MESSAGES, result["serialized_data"] - ) + messages_data = truncate_and_serialize_messages(normalized_input_messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) result = f(self, *args, **kwargs) @@ -232,11 +230,9 @@ async def new_ainvoke(self, *args, **kwargs): input_messages = _parse_langgraph_messages(args[0]) if input_messages: normalized_input_messages = normalize_message_roles(input_messages) - result = truncate_and_serialize_messages(normalized_input_messages) - if result["serialized_data"]: - span.set_data( - SPANDATA.GEN_AI_REQUEST_MESSAGES, result["serialized_data"] - ) + messages_data = truncate_and_serialize_messages(normalized_input_messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) result = await f(self, *args, **kwargs) diff --git a/tests/test_ai_message_utils.py b/tests/test_ai_message_utils.py index 5029d27760..ed1cb8e7c1 100644 --- a/tests/test_ai_message_utils.py +++ b/tests/test_ai_message_utils.py @@ -8,6 +8,8 @@ get_messages_metadata, truncate_and_serialize_messages, ) +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.serializer import serialize @pytest.fixture @@ -407,3 +409,86 @@ def test_truncation_keeps_most_recent(self): assert ( "Message 9" in last_kept_content or "Message 8" in last_kept_content ) + + +class TestMetaSupport: + """Test that _meta entries are created correctly when truncation occurs""" + + def test_annotated_value_returned_on_truncation(self, large_messages): + """Test that truncate_and_serialize_messages returns AnnotatedValue when truncation occurs""" + # Force truncation with a limit that will keep at least one message + # Each large message is ~30KB, so 50KB should keep 1-2 messages but force truncation + small_limit = 50_000 # 50KB to force truncation but keep some messages + result = truncate_and_serialize_messages(large_messages, max_bytes=small_limit) + + # Should return an AnnotatedValue when truncation occurs + assert isinstance(result, AnnotatedValue) + assert result.metadata == {"len": len(large_messages)} + + # The value should be the truncated messages + assert isinstance(result.value, list) + assert len(result.value) < len(large_messages) + + def test_no_annotated_value_when_no_truncation(self, sample_messages): + """Test that truncate_and_serialize_messages returns plain list when no truncation occurs""" + result = truncate_and_serialize_messages(sample_messages) + + # Should return plain list when no truncation occurs + assert not isinstance(result, AnnotatedValue) + assert isinstance(result, list) + assert len(result) == len(sample_messages) + assert result == sample_messages + + def test_meta_structure_in_serialized_output(self, large_messages): + """Test that _meta structure is created correctly in serialized output""" + # Force truncation with a limit that will keep at least one message + small_limit = 50_000 # 50KB to force truncation but keep some messages + annotated_messages = truncate_and_serialize_messages( + large_messages, max_bytes=small_limit + ) + + # Simulate how the serializer would process this (like it does in actual span data) + test_data = {"gen_ai": {"request": {"messages": annotated_messages}}} + + # Serialize using Sentry's serializer (which processes AnnotatedValue) + serialized = serialize(test_data, is_vars=False) + + # Check that _meta structure was created + assert "_meta" in serialized + assert "gen_ai" in serialized["_meta"] + assert "request" in serialized["_meta"]["gen_ai"] + assert "messages" in serialized["_meta"]["gen_ai"]["request"] + assert serialized["_meta"]["gen_ai"]["request"]["messages"][""] == { + "len": len(large_messages) + } + + # Check that the actual data is still there + assert "gen_ai" in serialized + assert "request" in serialized["gen_ai"] + assert "messages" in serialized["gen_ai"]["request"] + assert isinstance(serialized["gen_ai"]["request"]["messages"], list) + assert len(serialized["gen_ai"]["request"]["messages"]) < len(large_messages) + + def test_serialize_gen_ai_messages_handles_annotated_value(self, large_messages): + """Test that serialize_gen_ai_messages handles AnnotatedValue input correctly""" + # Create an AnnotatedValue manually + truncated = large_messages[:2] # Keep only first 2 messages + annotated = AnnotatedValue( + value=truncated, metadata={"len": len(large_messages)} + ) + + # serialize_gen_ai_messages should handle it + result = serialize_gen_ai_messages(annotated) + + assert result is not None + parsed = json.loads(result) + assert isinstance(parsed, list) + assert len(parsed) == 2 # Only 2 messages kept + + def test_empty_messages_no_annotated_value(self): + """Test that empty messages don't create AnnotatedValue""" + result = truncate_and_serialize_messages([]) + assert result is None + + result = truncate_and_serialize_messages(None) + assert result is None From e54151ca9e8a559f395678c70691deba39be5ffd Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Wed, 8 Oct 2025 15:26:40 +0200 Subject: [PATCH 06/14] truncate message lengths without meta so far --- sentry_sdk/ai/message_utils.py | 110 +++++++++++++++------- tests/test_ai_message_utils.py | 166 ++++++++++++++------------------- 2 files changed, 144 insertions(+), 132 deletions(-) diff --git a/sentry_sdk/ai/message_utils.py b/sentry_sdk/ai/message_utils.py index 89f018d39d..9068530de5 100644 --- a/sentry_sdk/ai/message_utils.py +++ b/sentry_sdk/ai/message_utils.py @@ -14,11 +14,14 @@ def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): # type: (List[Dict[str, Any]], int) -> List[Dict[str, Any]] """ Truncate messages by removing the oldest ones until the serialized size is within limits. + If the last message is still too large, truncate its content instead of removing it entirely. This function prioritizes keeping the most recent messages while ensuring the total serialized size stays under the specified byte limit. It uses the Sentry serializer to get accurate size estimates that match what will actually be sent. + Always preserves at least one message, even if content needs to be truncated. + :param messages: List of message objects (typically with 'role' and 'content' keys) :param max_bytes: Maximum allowed size in bytes for the serialized messages :returns: Truncated list of messages that fits within the size limit @@ -28,15 +31,64 @@ def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): truncated_messages = list(messages) - while truncated_messages: - serialized = serialize(truncated_messages, is_vars=False) + # First, remove older messages until we're under the limit or have only one message left + while len(truncated_messages) > 1: + serialized = serialize( + truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8) + ) serialized_json = json.dumps(serialized, separators=(",", ":")) current_size = len(serialized_json.encode("utf-8")) if current_size <= max_bytes: break - truncated_messages.pop(0) + truncated_messages.pop(0) # Remove oldest message + + # If we still have one message but it's too large, truncate its content + # This ensures we always preserve at least one message + if len(truncated_messages) == 1: + serialized = serialize( + truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8) + ) + serialized_json = json.dumps(serialized, separators=(",", ":")) + current_size = len(serialized_json.encode("utf-8")) + + if current_size > max_bytes: + # Truncate the content of the last message + last_message = truncated_messages[0].copy() + content = last_message.get("content", "") + + if content and isinstance(content, str): + # Binary search to find the optimal content length + left, right = 0, len(content) + best_length = 0 + + while left <= right: + mid = (left + right) // 2 + test_message = last_message.copy() + test_message["content"] = content[:mid] + ( + "..." if mid < len(content) else "" + ) + + test_serialized = serialize( + [test_message], + is_vars=False, + max_value_length=round(max_bytes * 0.8), + ) + test_json = json.dumps(test_serialized, separators=(",", ":")) + test_size = len(test_json.encode("utf-8")) + + if test_size <= max_bytes: + best_length = mid + left = mid + 1 + else: + right = mid - 1 + + # Apply the truncation + if best_length < len(content): + last_message["content"] = content[:best_length] + "..." + + truncated_messages[0] = last_message return truncated_messages @@ -59,51 +111,33 @@ def serialize_gen_ai_messages(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): return None if isinstance(messages, AnnotatedValue): - serialized_messages = serialize(messages, is_vars=False) + serialized_messages = serialize( + messages, is_vars=False, max_value_length=round(max_bytes * 0.8) + ) return json.dumps(serialized_messages, separators=(",", ":")) truncated_messages = truncate_messages_by_size(messages, max_bytes) if not truncated_messages: return None - serialized_messages = serialize(truncated_messages, is_vars=False) + serialized_messages = serialize( + truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8) + ) return json.dumps(serialized_messages, separators=(",", ":")) -def get_messages_metadata(original_messages, truncated_messages): - # type: (List[Dict[str, Any]], List[Dict[str, Any]]) -> Dict[str, Any] - """ - Generate metadata about message truncation for debugging/monitoring. - - :param original_messages: The original list of messages - :param truncated_messages: The truncated list of messages - :returns: Dictionary with metadata about the truncation - """ - original_count = len(original_messages) if original_messages else 0 - truncated_count = len(truncated_messages) if truncated_messages else 0 - - metadata = { - "original_count": original_count, - "truncated_count": truncated_count, - "messages_removed": original_count - truncated_count, - "was_truncated": original_count != truncated_count, - } - - return metadata - - def truncate_and_serialize_messages(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): # type: (Optional[List[Dict[str, Any]]], int) -> Any """ - Truncate messages and return AnnotatedValue for automatic _meta creation. + Truncate messages and return serialized string or AnnotatedValue for automatic _meta creation. - This function handles truncation and returns the truncated messages wrapped in an - AnnotatedValue (when truncation occurs) so that Sentry's serializer can automatically - create the appropriate _meta structure. + This function handles truncation and always returns serialized JSON strings. When truncation + occurs, it wraps the serialized string in an AnnotatedValue so that Sentry's serializer can + automatically create the appropriate _meta structure. :param messages: List of message objects or None :param max_bytes: Maximum allowed size in bytes for the serialized messages - :returns: List of messages, AnnotatedValue (if truncated), or None + :returns: JSON string, AnnotatedValue containing JSON string (if truncated), or None """ if not messages: return None @@ -112,12 +146,20 @@ def truncate_and_serialize_messages(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES if not truncated_messages: return None + # Always serialize to JSON string + serialized_json = serialize_gen_ai_messages(truncated_messages, max_bytes) + if not serialized_json: + return None + original_count = len(messages) truncated_count = len(truncated_messages) + # If truncation occurred, wrap the serialized string in AnnotatedValue for _meta if original_count != truncated_count: return AnnotatedValue( - value=serialize_gen_ai_messages(truncated_messages), + value=serialized_json, metadata={"len": original_count}, ) - return truncated_messages + + # No truncation, return plain serialized string + return serialized_json diff --git a/tests/test_ai_message_utils.py b/tests/test_ai_message_utils.py index ed1cb8e7c1..92a9348bce 100644 --- a/tests/test_ai_message_utils.py +++ b/tests/test_ai_message_utils.py @@ -5,7 +5,6 @@ MAX_GEN_AI_MESSAGE_BYTES, truncate_messages_by_size, serialize_gen_ai_messages, - get_messages_metadata, truncate_and_serialize_messages, ) from sentry_sdk._types import AnnotatedValue @@ -88,8 +87,11 @@ def test_single_message_over_limit(self): messages = [{"role": "user", "content": large_content}] result = truncate_messages_by_size(messages, max_bytes=100) # Very small limit - # Should return empty list if even single message is too large - assert result == [] + # Should return truncated content, not empty list + assert len(result) == 1 + assert result[0]["role"] == "user" + # Content should be truncated by either our manual truncation or serializer max_value_length + assert len(result[0]["content"]) < len(large_content) def test_progressive_truncation(self, large_messages): """Test that truncation works progressively with different limits""" @@ -109,6 +111,8 @@ def test_progressive_truncation(self, large_messages): # As limit decreases, message count should not increase assert current_count <= prev_count + # Should always preserve at least one message + assert current_count >= 1 prev_count = current_count def test_exact_size_boundary(self): @@ -127,9 +131,10 @@ def test_exact_size_boundary(self): result = truncate_messages_by_size(messages, max_bytes=exact_size) assert len(result) == 1 - # Should remove the message if limit is one byte smaller + # Should truncate the message content if limit is one byte smaller result = truncate_messages_by_size(messages, max_bytes=exact_size - 1) - assert len(result) == 0 + assert len(result) == 1 + # Content should be truncated by either our manual truncation or serializer class TestSerializeGenAiMessages: @@ -188,126 +193,89 @@ def test_serialize_preserves_message_structure(self): assert parsed[1]["content"] == "Hi there!" -class TestGetMessagesMetadata: - def test_no_truncation_metadata(self, sample_messages): - """Test metadata when no truncation occurs""" - metadata = get_messages_metadata(sample_messages, sample_messages) - - assert metadata["original_count"] == len(sample_messages) - assert metadata["truncated_count"] == len(sample_messages) - assert metadata["messages_removed"] == 0 - assert metadata["was_truncated"] is False - - def test_truncation_metadata(self, sample_messages): - """Test metadata when truncation occurs""" - truncated = sample_messages[2:] # Remove first 2 messages - metadata = get_messages_metadata(sample_messages, truncated) - - assert metadata["original_count"] == len(sample_messages) - assert metadata["truncated_count"] == len(truncated) - assert metadata["messages_removed"] == 2 - assert metadata["was_truncated"] is True - - def test_empty_lists_metadata(self): - """Test metadata with empty lists""" - metadata = get_messages_metadata([], []) - - assert metadata["original_count"] == 0 - assert metadata["truncated_count"] == 0 - assert metadata["messages_removed"] == 0 - assert metadata["was_truncated"] is False - - def test_none_input_metadata(self): - """Test metadata with None inputs""" - metadata = get_messages_metadata(None, None) - - assert metadata["original_count"] == 0 - assert metadata["truncated_count"] == 0 - assert metadata["messages_removed"] == 0 - assert metadata["was_truncated"] is False - - def test_complete_truncation_metadata(self, sample_messages): - """Test metadata when all messages are removed""" - metadata = get_messages_metadata(sample_messages, []) - - assert metadata["original_count"] == len(sample_messages) - assert metadata["truncated_count"] == 0 - assert metadata["messages_removed"] == len(sample_messages) - assert metadata["was_truncated"] is True - - class TestTruncateAndSerializeMessages: def test_main_function_with_normal_messages(self, sample_messages): """Test the main function with normal messages""" result = truncate_and_serialize_messages(sample_messages) - assert "serialized_data" in result - assert "metadata" in result - assert "original_size" in result + # Should return a JSON string when no truncation occurs + assert isinstance(result, str) - assert result["serialized_data"] is not None - assert isinstance(result["serialized_data"], str) - assert result["original_size"] > 0 - assert result["metadata"]["was_truncated"] is False + # Should be valid JSON + parsed = json.loads(result) + assert isinstance(parsed, list) + assert len(parsed) == len(sample_messages) def test_main_function_with_large_messages(self, large_messages): """Test the main function with messages requiring truncation""" small_limit = MAX_GEN_AI_MESSAGE_BYTES // 100 # 5KB limit to force truncation result = truncate_and_serialize_messages(large_messages, max_bytes=small_limit) - assert "serialized_data" in result - assert "metadata" in result - assert "original_size" in result + # Should return AnnotatedValue when truncation occurs + assert isinstance(result, AnnotatedValue) + assert result.metadata["len"] == len(large_messages) - # Original size should be large - assert result["original_size"] > small_limit + # The value should be a JSON string + assert isinstance(result.value, str) - # May or may not be truncated depending on how large the messages are - if result["serialized_data"]: - serialized_size = len(result["serialized_data"].encode("utf-8")) - assert serialized_size <= small_limit + # Should be valid JSON and under size limit + parsed = json.loads(result.value) + assert isinstance(parsed, list) + assert len(parsed) <= len(large_messages) + + # Size should be under limit + result_size = len(result.value.encode("utf-8")) + assert result_size <= small_limit def test_main_function_with_none_input(self): """Test the main function with None input""" result = truncate_and_serialize_messages(None) - - assert result["serialized_data"] is None - assert result["original_size"] == 0 - assert result["metadata"]["was_truncated"] is False + assert result is None def test_main_function_with_empty_input(self): """Test the main function with empty input""" result = truncate_and_serialize_messages([]) + assert result is None - assert result["serialized_data"] is None - assert result["original_size"] == 0 - assert result["metadata"]["was_truncated"] is False - - def test_main_function_size_comparison(self, sample_messages): - """Test that serialized data is smaller than or equal to original""" + def test_main_function_serialization_format(self, sample_messages): + """Test that the function always returns proper JSON strings""" result = truncate_and_serialize_messages(sample_messages) - if result["serialized_data"]: - serialized_size = len(result["serialized_data"].encode("utf-8")) - # Serialized size should be <= original size (could be equal if no truncation) - assert serialized_size <= result["original_size"] + # Should be JSON string + assert isinstance(result, str) + + # Should be valid, parseable JSON + parsed = json.loads(result) + assert isinstance(parsed, list) + + # Content should match original structure + for i, msg in enumerate(parsed): + assert "role" in msg + assert "content" in msg def test_main_function_respects_custom_limit(self, large_messages): """Test that the main function respects custom byte limits""" custom_limit = MAX_GEN_AI_MESSAGE_BYTES // 250 # 2KB limit result = truncate_and_serialize_messages(large_messages, max_bytes=custom_limit) - if result["serialized_data"]: - serialized_size = len(result["serialized_data"].encode("utf-8")) - assert serialized_size <= custom_limit + # Should return AnnotatedValue due to truncation + assert isinstance(result, AnnotatedValue) + + # Should respect the custom limit + result_size = len(result.value.encode("utf-8")) + assert result_size <= custom_limit def test_main_function_default_limit(self, sample_messages): """Test that the main function uses the default limit correctly""" result = truncate_and_serialize_messages(sample_messages) # With normal sample messages, should not need truncation - assert result["metadata"]["was_truncated"] is False - assert result["serialized_data"] is not None + # Should return plain JSON string (not AnnotatedValue) + assert isinstance(result, str) + + # Should be valid JSON + parsed = json.loads(result) + assert isinstance(parsed, list) class TestConstants: @@ -425,19 +393,22 @@ def test_annotated_value_returned_on_truncation(self, large_messages): assert isinstance(result, AnnotatedValue) assert result.metadata == {"len": len(large_messages)} - # The value should be the truncated messages - assert isinstance(result.value, list) - assert len(result.value) < len(large_messages) + # The value should be a JSON string + assert isinstance(result.value, str) + parsed = json.loads(result.value) + assert len(parsed) <= len(large_messages) def test_no_annotated_value_when_no_truncation(self, sample_messages): """Test that truncate_and_serialize_messages returns plain list when no truncation occurs""" result = truncate_and_serialize_messages(sample_messages) - # Should return plain list when no truncation occurs + # Should return plain JSON string when no truncation occurs assert not isinstance(result, AnnotatedValue) - assert isinstance(result, list) - assert len(result) == len(sample_messages) - assert result == sample_messages + assert isinstance(result, str) + + # Should be valid JSON with same length + parsed = json.loads(result) + assert len(parsed) == len(sample_messages) def test_meta_structure_in_serialized_output(self, large_messages): """Test that _meta structure is created correctly in serialized output""" @@ -462,12 +433,11 @@ def test_meta_structure_in_serialized_output(self, large_messages): "len": len(large_messages) } - # Check that the actual data is still there + # Check that the actual data is still there and is a string assert "gen_ai" in serialized assert "request" in serialized["gen_ai"] assert "messages" in serialized["gen_ai"]["request"] - assert isinstance(serialized["gen_ai"]["request"]["messages"], list) - assert len(serialized["gen_ai"]["request"]["messages"]) < len(large_messages) + assert isinstance(serialized["gen_ai"]["request"]["messages"], str) def test_serialize_gen_ai_messages_handles_annotated_value(self, large_messages): """Test that serialize_gen_ai_messages handles AnnotatedValue input correctly""" From 5b378fd4080a1db20430496530cd06d93720d506 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Wed, 8 Oct 2025 16:00:55 +0200 Subject: [PATCH 07/14] truncate simply --- sentry_sdk/ai/message_utils.py | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/sentry_sdk/ai/message_utils.py b/sentry_sdk/ai/message_utils.py index 9068530de5..ad2d34f8c1 100644 --- a/sentry_sdk/ai/message_utils.py +++ b/sentry_sdk/ai/message_utils.py @@ -59,35 +59,7 @@ def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): content = last_message.get("content", "") if content and isinstance(content, str): - # Binary search to find the optimal content length - left, right = 0, len(content) - best_length = 0 - - while left <= right: - mid = (left + right) // 2 - test_message = last_message.copy() - test_message["content"] = content[:mid] + ( - "..." if mid < len(content) else "" - ) - - test_serialized = serialize( - [test_message], - is_vars=False, - max_value_length=round(max_bytes * 0.8), - ) - test_json = json.dumps(test_serialized, separators=(",", ":")) - test_size = len(test_json.encode("utf-8")) - - if test_size <= max_bytes: - best_length = mid - left = mid + 1 - else: - right = mid - 1 - - # Apply the truncation - if best_length < len(content): - last_message["content"] = content[:best_length] + "..." - + last_message["content"] = content[: max_bytes * 0.8] + "..." truncated_messages[0] = last_message return truncated_messages From 294c66c0490b2c66532676b663af92e544cabbda Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Thu, 9 Oct 2025 11:14:20 +0200 Subject: [PATCH 08/14] remove very small limit tests -- not applicable constraint --- sentry_sdk/ai/message_utils.py | 2 +- tests/test_ai_message_utils.py | 50 ++++++++++++---------------------- 2 files changed, 18 insertions(+), 34 deletions(-) diff --git a/sentry_sdk/ai/message_utils.py b/sentry_sdk/ai/message_utils.py index ad2d34f8c1..ba3fa184ba 100644 --- a/sentry_sdk/ai/message_utils.py +++ b/sentry_sdk/ai/message_utils.py @@ -59,7 +59,7 @@ def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): content = last_message.get("content", "") if content and isinstance(content, str): - last_message["content"] = content[: max_bytes * 0.8] + "..." + last_message["content"] = content[: int(max_bytes * 0.8)] + "..." truncated_messages[0] = last_message return truncated_messages diff --git a/tests/test_ai_message_utils.py b/tests/test_ai_message_utils.py index 92a9348bce..f307e88d6d 100644 --- a/tests/test_ai_message_utils.py +++ b/tests/test_ai_message_utils.py @@ -253,18 +253,6 @@ def test_main_function_serialization_format(self, sample_messages): assert "role" in msg assert "content" in msg - def test_main_function_respects_custom_limit(self, large_messages): - """Test that the main function respects custom byte limits""" - custom_limit = MAX_GEN_AI_MESSAGE_BYTES // 250 # 2KB limit - result = truncate_and_serialize_messages(large_messages, max_bytes=custom_limit) - - # Should return AnnotatedValue due to truncation - assert isinstance(result, AnnotatedValue) - - # Should respect the custom limit - result_size = len(result.value.encode("utf-8")) - assert result_size <= custom_limit - def test_main_function_default_limit(self, sample_messages): """Test that the main function uses the default limit correctly""" result = truncate_and_serialize_messages(sample_messages) @@ -297,10 +285,10 @@ def test_messages_with_special_characters(self): ] result = truncate_and_serialize_messages(messages) - assert result["serialized_data"] is not None + assert result is not None # Should be valid JSON - parsed = json.loads(result["serialized_data"]) + parsed = json.loads(result) assert len(parsed) == 2 assert "🌍" in parsed[0]["content"] @@ -320,26 +308,17 @@ def test_messages_with_nested_structures(self): ] result = truncate_and_serialize_messages(messages) - assert result["serialized_data"] is not None + assert result is not None # Should preserve the structure - parsed = json.loads(result["serialized_data"]) + # Handle both string and AnnotatedValue return types + if isinstance(result, AnnotatedValue): + parsed = json.loads(result.value) + else: + parsed = json.loads(result) assert "metadata" in parsed[0] assert "tool_calls" in parsed[1] - def test_very_small_limit(self, sample_messages): - """Test behavior with extremely small size limit""" - tiny_limit = 10 # 10 bytes - extremely small limit - result = truncate_and_serialize_messages(sample_messages, max_bytes=tiny_limit) - - # With such a small limit, likely all messages will be removed - if result["serialized_data"] is None: - assert result["metadata"]["truncated_count"] == 0 - else: - # If any data remains, it should be under the limit - size = len(result["serialized_data"].encode("utf-8")) - assert size <= tiny_limit - def test_messages_with_none_values(self): """Test messages containing None values""" messages = [ @@ -348,10 +327,14 @@ def test_messages_with_none_values(self): ] result = truncate_and_serialize_messages(messages) - assert result["serialized_data"] is not None + assert result is not None # Should handle None values gracefully - parsed = json.loads(result["serialized_data"]) + # Handle both string and AnnotatedValue return types + if isinstance(result, AnnotatedValue): + parsed = json.loads(result.value) + else: + parsed = json.loads(result) assert len(parsed) == 2 def test_truncation_keeps_most_recent(self): @@ -369,8 +352,9 @@ def test_truncation_keeps_most_recent(self): small_limit = MAX_GEN_AI_MESSAGE_BYTES // 500 # 1KB limit to force truncation result = truncate_and_serialize_messages(messages, max_bytes=small_limit) - if result["serialized_data"]: - parsed = json.loads(result["serialized_data"]) + if result: + assert isinstance(result, AnnotatedValue) + parsed = json.loads(result.value) if parsed: # The last remaining message should be from the end of the original list last_kept_content = parsed[-1]["content"] From 630a472650d1aa1e5cd5b00edc2260f25af7c76d Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Thu, 9 Oct 2025 12:47:07 +0200 Subject: [PATCH 09/14] move message utils to utils, add to all integrations --- sentry_sdk/ai/message_utils.py | 137 ------------------ sentry_sdk/ai/utils.py | 137 +++++++++++++++++- sentry_sdk/integrations/anthropic.py | 10 +- sentry_sdk/integrations/huggingface_hub.py | 10 +- sentry_sdk/integrations/langchain.py | 3 +- sentry_sdk/integrations/langgraph.py | 15 +- sentry_sdk/integrations/litellm.py | 12 +- sentry_sdk/integrations/openai.py | 12 +- .../openai_agents/spans/invoke_agent.py | 10 +- .../integrations/openai_agents/utils.py | 11 +- .../integrations/anthropic/test_anthropic.py | 119 ++++++++++++++- .../huggingface_hub/test_huggingface_hub.py | 14 +- tests/integrations/litellm/test_litellm.py | 115 +++++++++++++++ tests/integrations/openai/test_openai.py | 119 ++++++++++++++- .../openai_agents/test_openai_agents.py | 130 +++++++++++++++++ tests/test_ai_message_utils.py | 2 +- 16 files changed, 666 insertions(+), 190 deletions(-) delete mode 100644 sentry_sdk/ai/message_utils.py diff --git a/sentry_sdk/ai/message_utils.py b/sentry_sdk/ai/message_utils.py deleted file mode 100644 index ba3fa184ba..0000000000 --- a/sentry_sdk/ai/message_utils.py +++ /dev/null @@ -1,137 +0,0 @@ -import json -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from typing import Any, Dict, List, Optional - -from sentry_sdk.serializer import serialize -from sentry_sdk._types import AnnotatedValue - -MAX_GEN_AI_MESSAGE_BYTES = 20_000 # 20KB - - -def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): - # type: (List[Dict[str, Any]], int) -> List[Dict[str, Any]] - """ - Truncate messages by removing the oldest ones until the serialized size is within limits. - If the last message is still too large, truncate its content instead of removing it entirely. - - This function prioritizes keeping the most recent messages while ensuring the total - serialized size stays under the specified byte limit. It uses the Sentry serializer - to get accurate size estimates that match what will actually be sent. - - Always preserves at least one message, even if content needs to be truncated. - - :param messages: List of message objects (typically with 'role' and 'content' keys) - :param max_bytes: Maximum allowed size in bytes for the serialized messages - :returns: Truncated list of messages that fits within the size limit - """ - if not messages: - return messages - - truncated_messages = list(messages) - - # First, remove older messages until we're under the limit or have only one message left - while len(truncated_messages) > 1: - serialized = serialize( - truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8) - ) - serialized_json = json.dumps(serialized, separators=(",", ":")) - current_size = len(serialized_json.encode("utf-8")) - - if current_size <= max_bytes: - break - - truncated_messages.pop(0) # Remove oldest message - - # If we still have one message but it's too large, truncate its content - # This ensures we always preserve at least one message - if len(truncated_messages) == 1: - serialized = serialize( - truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8) - ) - serialized_json = json.dumps(serialized, separators=(",", ":")) - current_size = len(serialized_json.encode("utf-8")) - - if current_size > max_bytes: - # Truncate the content of the last message - last_message = truncated_messages[0].copy() - content = last_message.get("content", "") - - if content and isinstance(content, str): - last_message["content"] = content[: int(max_bytes * 0.8)] + "..." - truncated_messages[0] = last_message - - return truncated_messages - - -def serialize_gen_ai_messages(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): - # type: (Optional[Any], int) -> Optional[str] - """ - Serialize and truncate gen_ai messages for storage in spans. - - This function handles the complete workflow of: - 1. Truncating messages to fit within size limits (if not already done) - 2. Serializing them using Sentry's serializer (which processes AnnotatedValue for _meta) - 3. Converting to JSON string for storage - - :param messages: List of message objects, AnnotatedValue, or None - :param max_bytes: Maximum allowed size in bytes for the serialized messages - :returns: JSON string of serialized messages or None if input was None/empty - """ - if not messages: - return None - - if isinstance(messages, AnnotatedValue): - serialized_messages = serialize( - messages, is_vars=False, max_value_length=round(max_bytes * 0.8) - ) - return json.dumps(serialized_messages, separators=(",", ":")) - - truncated_messages = truncate_messages_by_size(messages, max_bytes) - if not truncated_messages: - return None - serialized_messages = serialize( - truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8) - ) - - return json.dumps(serialized_messages, separators=(",", ":")) - - -def truncate_and_serialize_messages(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): - # type: (Optional[List[Dict[str, Any]]], int) -> Any - """ - Truncate messages and return serialized string or AnnotatedValue for automatic _meta creation. - - This function handles truncation and always returns serialized JSON strings. When truncation - occurs, it wraps the serialized string in an AnnotatedValue so that Sentry's serializer can - automatically create the appropriate _meta structure. - - :param messages: List of message objects or None - :param max_bytes: Maximum allowed size in bytes for the serialized messages - :returns: JSON string, AnnotatedValue containing JSON string (if truncated), or None - """ - if not messages: - return None - - truncated_messages = truncate_messages_by_size(messages, max_bytes) - if not truncated_messages: - return None - - # Always serialize to JSON string - serialized_json = serialize_gen_ai_messages(truncated_messages, max_bytes) - if not serialized_json: - return None - - original_count = len(messages) - truncated_count = len(truncated_messages) - - # If truncation occurred, wrap the serialized string in AnnotatedValue for _meta - if original_count != truncated_count: - return AnnotatedValue( - value=serialized_json, - metadata={"len": original_count}, - ) - - # No truncation, return plain serialized string - return serialized_json diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 0c0b937006..564e52d3e9 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -1,14 +1,24 @@ import json - from typing import TYPE_CHECKING if TYPE_CHECKING: from typing import Any, Callable + from sentry_sdk.tracing import Span +from typing import TYPE_CHECKING + import sentry_sdk from sentry_sdk.utils import logger +if TYPE_CHECKING: + from typing import Any, Dict, List, Optional + +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.serializer import serialize + +MAX_GEN_AI_MESSAGE_BYTES = 20_000 # 20KB + class GEN_AI_ALLOWED_MESSAGE_ROLES: SYSTEM = "system" @@ -95,3 +105,128 @@ def get_start_span_function(): current_span is not None and current_span.containing_transaction is not None ) return sentry_sdk.start_span if transaction_exists else sentry_sdk.start_transaction + + +def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): + # type: (List[Dict[str, Any]], int) -> List[Dict[str, Any]] + """ + Truncate messages by removing the oldest ones until the serialized size is within limits. + If the last message is still too large, truncate its content instead of removing it entirely. + + This function prioritizes keeping the most recent messages while ensuring the total + serialized size stays under the specified byte limit. It uses the Sentry serializer + to get accurate size estimates that match what will actually be sent. + + Always preserves at least one message, even if content needs to be truncated. + + :param messages: List of message objects (typically with 'role' and 'content' keys) + :param max_bytes: Maximum allowed size in bytes for the serialized messages + :returns: Truncated list of messages that fits within the size limit + """ + if not messages: + return messages + + truncated_messages = list(messages) + + # First, remove older messages until we're under the limit or have only one message left + while len(truncated_messages) > 1: + serialized = serialize( + truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8) + ) + serialized_json = json.dumps(serialized, separators=(",", ":")) + current_size = len(serialized_json.encode("utf-8")) + + if current_size <= max_bytes: + break + + truncated_messages.pop(0) # Remove oldest message + + # If we still have one message but it's too large, truncate its content + # This ensures we always preserve at least one message + if len(truncated_messages) == 1: + serialized = serialize( + truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8) + ) + serialized_json = json.dumps(serialized, separators=(",", ":")) + current_size = len(serialized_json.encode("utf-8")) + + if current_size > max_bytes: + # Truncate the content of the last message + last_message = truncated_messages[0].copy() + content = last_message.get("content", "") + + if content and isinstance(content, str): + last_message["content"] = content[: int(max_bytes * 0.8)] + "..." + truncated_messages[0] = last_message + + return truncated_messages + + +def serialize_gen_ai_messages(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): + # type: (Optional[Any], int) -> Optional[str] + """ + Serialize and truncate gen_ai messages for storage in spans. + + This function handles the complete workflow of: + 1. Truncating messages to fit within size limits (if not already done) + 2. Serializing them using Sentry's serializer (which processes AnnotatedValue for _meta) + 3. Converting to JSON string for storage + + :param messages: List of message objects, AnnotatedValue, or None + :param max_bytes: Maximum allowed size in bytes for the serialized messages + :returns: JSON string of serialized messages or None if input was None/empty + """ + if not messages: + return None + + if isinstance(messages, AnnotatedValue): + serialized_messages = serialize( + messages, is_vars=False, max_value_length=round(max_bytes * 0.8) + ) + return json.dumps(serialized_messages, separators=(",", ":")) + + truncated_messages = truncate_messages_by_size(messages, max_bytes) + serialized_messages = serialize( + truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8) + ) + + return json.dumps(serialized_messages) + + +def truncate_and_serialize_messages(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): + # type: (Optional[List[Dict[str, Any]]], int) -> Any + """ + Truncate messages and return serialized string or AnnotatedValue for automatic _meta creation. + + This function handles truncation and always returns serialized JSON strings. When truncation + occurs, it wraps the serialized string in an AnnotatedValue so that Sentry's serializer can + automatically create the appropriate _meta structure. + + :param messages: List of message objects or None + :param max_bytes: Maximum allowed size in bytes for the serialized messages + :returns: JSON string, AnnotatedValue containing JSON string (if truncated), or None + """ + if not messages: + return None + + truncated_messages = truncate_messages_by_size(messages, max_bytes) + if not truncated_messages: + return None + + # Always serialize to JSON string + serialized_json = serialize_gen_ai_messages(truncated_messages, max_bytes) + if not serialized_json: + return None + + original_count = len(messages) + truncated_count = len(truncated_messages) + + # If truncation occurred, wrap the serialized string in AnnotatedValue for _meta + if original_count != truncated_count: + return AnnotatedValue( + value=serialized_json, + metadata={"len": original_count}, + ) + + # No truncation, return plain serialized string + return serialized_json diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py index 46c6b2a766..0d383ffd16 100644 --- a/sentry_sdk/integrations/anthropic.py +++ b/sentry_sdk/integrations/anthropic.py @@ -6,6 +6,7 @@ from sentry_sdk.ai.utils import ( set_data_normalized, normalize_message_roles, + truncate_and_serialize_messages, get_start_span_function, ) from sentry_sdk.consts import OP, SPANDATA, SPANSTATUS @@ -145,12 +146,9 @@ def _set_input_data(span, kwargs, integration): normalized_messages.append(message) role_normalized_messages = normalize_message_roles(normalized_messages) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - role_normalized_messages, - unpack=False, - ) + serialized_messages = truncate_and_serialize_messages(role_normalized_messages) + if serialized_messages is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, serialized_messages) set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_STREAMING, kwargs.get("stream", False) diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index 2e2b382abd..6e62763fc6 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -3,7 +3,7 @@ import sentry_sdk from sentry_sdk.ai.monitoring import record_token_usage -from sentry_sdk.ai.utils import set_data_normalized +from sentry_sdk.ai.utils import set_data_normalized, truncate_and_serialize_messages from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -103,9 +103,11 @@ def new_huggingface_task(*args, **kwargs): # Input attributes if should_send_default_pii() and integration.include_prompts: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompt, unpack=False - ) + # Convert prompt to message format if it's a string + messages = [prompt] if isinstance(prompt, str) else prompt + messages_data = truncate_and_serialize_messages(messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) attribute_mapping = { "tools": SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 90682c4407..8535848be9 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -9,9 +9,8 @@ normalize_message_roles, set_data_normalized, get_start_span_function, + truncate_and_serialize_messages, ) -from sentry_sdk.ai.utils import set_data_normalized, get_start_span_function -from sentry_sdk.ai.message_utils import truncate_and_serialize_messages from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii diff --git a/sentry_sdk/integrations/langgraph.py b/sentry_sdk/integrations/langgraph.py index b9137f19af..468c2f1d88 100644 --- a/sentry_sdk/integrations/langgraph.py +++ b/sentry_sdk/integrations/langgraph.py @@ -2,8 +2,11 @@ from typing import Any, Callable, List, Optional import sentry_sdk -from sentry_sdk.ai.utils import set_data_normalized, normalize_message_roles -from sentry_sdk.ai.message_utils import truncate_and_serialize_messages +from sentry_sdk.ai.utils import ( + set_data_normalized, + normalize_message_roles, + truncate_and_serialize_messages, +) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -182,7 +185,9 @@ def new_invoke(self, *args, **kwargs): input_messages = _parse_langgraph_messages(args[0]) if input_messages: normalized_input_messages = normalize_message_roles(input_messages) - messages_data = truncate_and_serialize_messages(normalized_input_messages) + messages_data = truncate_and_serialize_messages( + normalized_input_messages + ) if messages_data is not None: span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) @@ -230,7 +235,9 @@ async def new_ainvoke(self, *args, **kwargs): input_messages = _parse_langgraph_messages(args[0]) if input_messages: normalized_input_messages = normalize_message_roles(input_messages) - messages_data = truncate_and_serialize_messages(normalized_input_messages) + messages_data = truncate_and_serialize_messages( + normalized_input_messages + ) if messages_data is not None: span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py index 2582c2bc05..7bf80d2f54 100644 --- a/sentry_sdk/integrations/litellm.py +++ b/sentry_sdk/integrations/litellm.py @@ -3,7 +3,11 @@ import sentry_sdk from sentry_sdk import consts from sentry_sdk.ai.monitoring import record_token_usage -from sentry_sdk.ai.utils import get_start_span_function, set_data_normalized +from sentry_sdk.ai.utils import ( + get_start_span_function, + set_data_normalized, + truncate_and_serialize_messages, +) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -72,9 +76,9 @@ def _input_callback(kwargs): # Record messages if allowed if messages and should_send_default_pii() and integration.include_prompts: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False - ) + messages_data = truncate_and_serialize_messages(messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) # Record other parameters params = { diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index e9bd2efa23..f503ce2f96 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -3,7 +3,11 @@ import sentry_sdk from sentry_sdk import consts from sentry_sdk.ai.monitoring import record_token_usage -from sentry_sdk.ai.utils import set_data_normalized, normalize_message_roles +from sentry_sdk.ai.utils import ( + set_data_normalized, + normalize_message_roles, + truncate_and_serialize_messages, +) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -183,9 +187,9 @@ def _set_input_data(span, kwargs, operation, integration): and integration.include_prompts ): normalized_messages = normalize_message_roles(messages) - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, normalized_messages, unpack=False - ) + messages_data = truncate_and_serialize_messages(normalized_messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) # Input attributes: Common set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py index 2a9c5ebe66..ac2596f73c 100644 --- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py +++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py @@ -3,6 +3,7 @@ get_start_span_function, set_data_normalized, normalize_message_roles, + truncate_and_serialize_messages, ) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.scope import should_send_default_pii @@ -61,12 +62,9 @@ def invoke_agent_span(context, agent, kwargs): if len(messages) > 0: normalized_messages = normalize_message_roles(messages) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_messages, - unpack=False, - ) + messages_data = truncate_and_serialize_messages(normalized_messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) _set_agent_data(span, agent) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index 125ff1175b..92cebdf925 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -4,6 +4,7 @@ normalize_message_roles, set_data_normalized, normalize_message_role, + truncate_and_serialize_messages, ) from sentry_sdk.consts import SPANDATA, SPANSTATUS, OP from sentry_sdk.integrations import DidNotEnable @@ -135,12 +136,10 @@ def _set_input_data(span, get_response_kwargs): } ) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalize_message_roles(request_messages), - unpack=False, - ) + role_normalized_messages = normalize_message_roles(request_messages) + messages_data = truncate_and_serialize_messages(role_normalized_messages) + if messages_data is not None: + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) def _set_output_data(span, result): diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index e9065e2d32..3372780463 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -1,6 +1,7 @@ -import pytest -from unittest import mock import json +from unittest import mock + +import pytest try: from unittest.mock import AsyncMock @@ -41,16 +42,18 @@ async def __call__(self, *args, **kwargs): except ImportError: from anthropic.types.content_block import ContentBlock as TextBlock -from sentry_sdk import start_transaction, start_span +from sentry_sdk import start_span, start_transaction +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations.anthropic import ( AnthropicIntegration, - _set_output_data, _collect_ai_data, + _set_output_data, ) +from sentry_sdk.serializer import serialize from sentry_sdk.utils import package_version - ANTHROPIC_VERSION = package_version("anthropic") EXAMPLE_MESSAGE = Message( @@ -883,6 +886,10 @@ def test_set_output_data_with_input_json_delta(sentry_init): def test_anthropic_message_role_mapping(sentry_init, capture_events): """Test that Anthropic integration properly maps message roles like 'ai' to 'assistant'""" + + +def test_anthropic_message_truncation(sentry_init, capture_events): + """Test that large messages are truncated properly in Anthropic integration.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -945,3 +952,105 @@ def mock_messages_create(*args, **kwargs): # Verify no "ai" roles remain roles = [msg["role"] for msg in stored_messages] assert "ai" not in roles + client = Anthropic(api_key="test-api-key") + + # Create messages that will definitely exceed size limits + large_content = ( + "This is a very long message that will exceed our size limits. " * 1000 + ) # ~64KB + large_messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": large_content}, + {"role": "assistant", "content": large_content}, + {"role": "user", "content": large_content}, + ] + + with mock.patch.object(client.messages, "create") as mock_create: + mock_create.return_value = Message( + id="test", + content=[TextBlock(text="Hello", type="text")], + model="claude-3", + role="assistant", + type="message", + usage=Usage(input_tokens=10, output_tokens=20), + ) + + with start_transaction(name="anthropic tx"): + client.messages.create( + model="claude-3-sonnet-20240229", + messages=large_messages, + max_tokens=100, + ) + + (event,) = events + span = event["spans"][0] + + # Should have gen_ai request messages (as string) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + + # Should have fewer or equal messages than original (due to truncation) + assert len(parsed_messages) <= len(large_messages) + + # Size should be under the limit + result_size = len(messages_data.encode("utf-8")) + assert result_size <= MAX_GEN_AI_MESSAGE_BYTES + + +def test_anthropic_single_large_message_preservation(sentry_init, capture_events): + """Test that a single very large message gets preserved with truncated content.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = Anthropic(api_key="test-api-key") + + # Create one huge message that exceeds the limit + huge_content = ( + "This is an extremely long message that will definitely exceed size limits. " + * 2000 + ) # ~150KB + messages = [{"role": "user", "content": huge_content}] + + with mock.patch.object(client.messages, "create") as mock_create: + mock_create.return_value = Message( + id="test", + content=[TextBlock(text="Hello", type="text")], + model="claude-3", + role="assistant", + type="message", + usage=Usage(input_tokens=100, output_tokens=50), + ) + + with start_transaction(name="anthropic tx"): + client.messages.create( + model="claude-3-sonnet-20240229", + messages=messages, + max_tokens=100, + ) + + (event,) = events + span = event["spans"][0] + + # Should still have the message (not removed entirely) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + # Should be valid JSON with exactly one message + import json + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + + # The message should have truncated content + assert parsed_messages[0]["role"] == "user" + assert len(parsed_messages[0]["content"]) < len(huge_content) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index b9ab4df5bf..fa2eb6c40a 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -1,16 +1,18 @@ +import re +from typing import TYPE_CHECKING from unittest import mock + +import httpx import pytest -import re import responses -import httpx - from huggingface_hub import InferenceClient import sentry_sdk -from sentry_sdk.utils import package_version +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES from sentry_sdk.integrations.huggingface_hub import HuggingfaceHubIntegration - -from typing import TYPE_CHECKING +from sentry_sdk.serializer import serialize +from sentry_sdk.utils import package_version try: from huggingface_hub.utils._errors import HfHubHTTPError diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index b600c32905..3d05bf150c 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -24,6 +24,9 @@ async def __call__(self, *args, **kwargs): _success_callback, _failure_callback, ) +from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.serializer import serialize from sentry_sdk.utils import package_version @@ -545,3 +548,115 @@ def dict(self): # Should have extracted the response message assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"] + + +def test_litellm_message_truncation(sentry_init, capture_events): + """Test that large messages are truncated properly in LiteLLM integration.""" + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + # Create messages that will definitely exceed size limits + large_content = ( + "This is a very long message that will exceed our size limits. " * 1000 + ) # ~64KB + large_messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": large_content}, + {"role": "assistant", "content": large_content}, + {"role": "user", "content": large_content}, + ] + + kwargs = { + "model": "gpt-3.5-turbo", + "messages": large_messages, + } + + # Mock the response for success callback + mock_response = MockCompletionResponse() + + # Simulate the integration flow + _input_callback(kwargs) + _success_callback( + kwargs, + mock_response, + datetime.now(), + datetime.now(), + ) + + (event,) = events + (span,) = event["spans"] + + # Should have gen_ai request messages (as string) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + # Should be valid JSON + import json + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + + # Should have fewer or equal messages than original (due to truncation) + assert len(parsed_messages) <= len(large_messages) + + # Size should be under the limit + result_size = len(messages_data.encode("utf-8")) + assert result_size <= MAX_GEN_AI_MESSAGE_BYTES + + +def test_litellm_single_large_message_preservation(sentry_init, capture_events): + """Test that a single very large message gets preserved with truncated content.""" + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + # Create one huge message that exceeds the limit + huge_content = ( + "This is an extremely long message that will definitely exceed size limits. " + * 2000 + ) # ~150KB + messages = [{"role": "user", "content": huge_content}] + + kwargs = { + "model": "gpt-3.5-turbo", + "messages": messages, + } + + # Mock the response + mock_response = MockCompletionResponse() + + # Simulate the integration flow + _input_callback(kwargs) + _success_callback( + kwargs, + mock_response, + datetime.now(), + datetime.now(), + ) + + (event,) = events + (span,) = event["spans"] + + # Should still have the message (not removed entirely) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + # Should be valid JSON with exactly one message + import json + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + + # The message should have truncated content + assert parsed_messages[0]["role"] == "user" + assert len(parsed_messages[0]["content"]) < len(huge_content) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 06e0a09fcf..51368c6271 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -39,6 +39,9 @@ OpenAIIntegration, _calculate_token_usage, ) +from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.serializer import serialize from unittest import mock # python 3.3 and above @@ -1451,6 +1454,7 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): def test_openai_message_role_mapping(sentry_init, capture_events): """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'""" + sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -1460,7 +1464,6 @@ def test_openai_message_role_mapping(sentry_init, capture_events): client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) - # Test messages with mixed roles including "ai" that should be mapped to "assistant" test_messages = [ {"role": "system", "content": "You are helpful."}, @@ -1471,11 +1474,9 @@ def test_openai_message_role_mapping(sentry_init, capture_events): with start_transaction(name="openai tx"): client.chat.completions.create(model="test-model", messages=test_messages) - + # Verify that the span was created correctly (event,) = events span = event["spans"][0] - - # Verify that the span was created correctly assert span["op"] == "gen_ai.chat" assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] @@ -1500,3 +1501,113 @@ def test_openai_message_role_mapping(sentry_init, capture_events): # Verify no "ai" roles remain roles = [msg["role"] for msg in stored_messages] assert "ai" not in roles + + +def test_openai_message_truncation(sentry_init, capture_events): + """Test that large messages are truncated properly in OpenAI integration.""" + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + + # Create messages that will definitely exceed size limits + large_content = ( + "This is a very long message that will exceed our size limits. " * 1000 + ) # ~64KB + large_messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": large_content}, + {"role": "assistant", "content": large_content}, + {"role": "user", "content": large_content}, + ] + + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", + messages=large_messages, + ) + + (event,) = events + span = event["spans"][0] + + # Should have gen_ai request messages (as string) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + # Should be valid JSON + import json + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + + # Should have fewer messages than original (due to truncation) + assert len(parsed_messages) <= len(large_messages) + + # Should have _meta entry indicating truncation + if "_meta" in event and len(parsed_messages) < len(large_messages): + meta_path = event["_meta"] + # Navigate through the meta structure to find the messages metadata + if ( + "spans" in meta_path + and "0" in meta_path["spans"] + and "data" in meta_path["spans"]["0"] + ): + span_meta = meta_path["spans"]["0"]["data"] + if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_meta: + messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "len" in messages_meta.get("", {}) + + +def test_openai_single_large_message_content_truncation(sentry_init, capture_events): + """Test that a single very large message gets content truncated, not removed entirely.""" + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + + # Create one huge message that exceeds the limit + huge_content = ( + "This is an extremely long message that will definitely exceed size limits. " + * 2000 + ) # ~150KB + messages = [{"role": "user", "content": huge_content}] + + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", + messages=messages, + ) + + (event,) = events + span = event["spans"][0] + + # Should still have the message (not removed entirely) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + # Should be valid JSON with exactly one message + import json + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 1 + + # The message should have truncated content + assert parsed_messages[0]["role"] == "user" + assert len(parsed_messages[0]["content"]) < len(huge_content) + + # Size should be under the limit + result_size = len(messages_data.encode("utf-8")) + assert result_size <= MAX_GEN_AI_MESSAGE_BYTES diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index e647ce9fad..fbeb639901 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -4,8 +4,12 @@ from unittest.mock import MagicMock, patch import os +import sentry_sdk from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration from sentry_sdk.integrations.openai_agents.utils import safe_serialize +from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.serializer import serialize from sentry_sdk.utils import parse_version import agents @@ -1077,3 +1081,129 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events): # Verify no "ai" roles remain in any message for message in stored_messages: assert message["role"] != "ai" + + +def test_openai_agents_message_truncation( + sentry_init, capture_events, mock_model_response +): + """Test that large messages are truncated properly in OpenAI Agents integration.""" + # Create messages that will definitely exceed size limits + large_system_prompt = ( + "This is a very long system prompt that will exceed our size limits. " * 1000 + ) # ~64KB + large_user_message = ( + "This is a very long user message that will exceed our size limits. " * 1000 + ) # ~64KB + + agent = Agent( + name="test_agent", + model="gpt-4", + instructions=large_system_prompt, + ) + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + mock_get_response.return_value = mock_model_response + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + events = capture_events() + + result = agents.Runner.run_sync( + agent, large_user_message, run_config=test_run_config + ) + + assert result is not None + + (event,) = events + spans = event["spans"] + invoke_agent_span, ai_client_span = spans + + # Should have gen_ai request messages (as string) + assert "gen_ai.request.messages" in invoke_agent_span["data"] + messages_data = invoke_agent_span["data"]["gen_ai.request.messages"] + assert isinstance(messages_data, str) + + # Should be valid JSON + import json + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + + # Should have some messages (system + user) + assert len(parsed_messages) >= 1 + + # Size should be under the limit + result_size = len(messages_data.encode("utf-8")) + assert result_size <= MAX_GEN_AI_MESSAGE_BYTES + + # Messages should be truncated from original large content + total_original_size = len(large_system_prompt) + len(large_user_message) + total_parsed_size = sum(len(str(msg)) for msg in parsed_messages) + assert total_parsed_size < total_original_size + + +def test_openai_agents_single_large_message_preservation( + sentry_init, capture_events, mock_model_response +): + """Test that a single very large message gets preserved with truncated content.""" + # Create one huge message that exceeds the limit + huge_content = ( + "This is an extremely long message that will definitely exceed size limits. " + * 2000 + ) # ~150KB + + agent = Agent( + name="test_agent", + model="gpt-4", + instructions="You are helpful.", # Keep this small + ) + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + mock_get_response.return_value = mock_model_response + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + events = capture_events() + + result = agents.Runner.run_sync( + agent, huge_content, run_config=test_run_config + ) + + assert result is not None + + (event,) = events + spans = event["spans"] + invoke_agent_span, ai_client_span = spans + + # Should still have the messages (not removed entirely) + assert "gen_ai.request.messages" in invoke_agent_span["data"] + messages_data = invoke_agent_span["data"]["gen_ai.request.messages"] + assert isinstance(messages_data, str) + + # Should be valid JSON with at least one message + import json + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) >= 1 + + # The user message content should be truncated + user_message = next( + (msg for msg in parsed_messages if msg.get("role") == "user"), None + ) + if user_message and "content" in user_message: + assert len(user_message["content"]) < len(huge_content) diff --git a/tests/test_ai_message_utils.py b/tests/test_ai_message_utils.py index f307e88d6d..b06ae60578 100644 --- a/tests/test_ai_message_utils.py +++ b/tests/test_ai_message_utils.py @@ -121,7 +121,7 @@ def test_exact_size_boundary(self): messages = [{"role": "user", "content": "test"}] # Get the exact serialized size - from sentry_sdk.ai.message_utils import serialize + from sentry_sdk.serializer import serialize serialized = serialize(messages, is_vars=False) json_str = json.dumps(serialized, separators=(",", ":")) From 6dcc3441ee2f7036d4dd9a47dc425e1b334b5d3c Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Thu, 9 Oct 2025 12:58:21 +0200 Subject: [PATCH 10/14] clean up tests --- .../integrations/anthropic/test_anthropic.py | 34 +----- tests/integrations/litellm/test_litellm.py | 31 +----- tests/integrations/openai/test_openai.py | 26 +---- .../openai_agents/test_openai_agents.py | 23 +--- tests/test_ai_message_utils.py | 104 +++--------------- 5 files changed, 38 insertions(+), 180 deletions(-) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 3372780463..6b2120013d 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -913,12 +913,11 @@ def mock_messages_create(*args, **kwargs): client.messages._post = mock.Mock(return_value=mock_messages_create()) - # Test messages with mixed roles including "ai" that should be mapped to "assistant" test_messages = [ {"role": "system", "content": "You are helpful."}, {"role": "user", "content": "Hello"}, - {"role": "ai", "content": "Hi there!"}, # Should be mapped to "assistant" - {"role": "assistant", "content": "How can I help?"}, # Should stay "assistant" + {"role": "ai", "content": "Hi there!"}, + {"role": "assistant", "content": "How can I help?"}, ] with start_transaction(name="anthropic tx"): @@ -928,33 +927,23 @@ def mock_messages_create(*args, **kwargs): (event,) = events span = event["spans"][0] - - # Verify that the span was created correctly assert span["op"] == "gen_ai.chat" assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - # Parse the stored messages stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - - # Verify that "ai" role was mapped to "assistant" assert len(stored_messages) == 4 assert stored_messages[0]["role"] == "system" assert stored_messages[1]["role"] == "user" - assert ( - stored_messages[2]["role"] == "assistant" - ) # "ai" should be mapped to "assistant" - assert stored_messages[3]["role"] == "assistant" # should stay "assistant" + assert stored_messages[2]["role"] == "assistant" + assert stored_messages[3]["role"] == "assistant" - # Verify content is preserved assert stored_messages[2]["content"] == "Hi there!" assert stored_messages[3]["content"] == "How can I help?" - # Verify no "ai" roles remain roles = [msg["role"] for msg in stored_messages] assert "ai" not in roles client = Anthropic(api_key="test-api-key") - # Create messages that will definitely exceed size limits large_content = ( "This is a very long message that will exceed our size limits. " * 1000 ) # ~64KB @@ -984,19 +973,15 @@ def mock_messages_create(*args, **kwargs): (event,) = events span = event["spans"][0] - - # Should have gen_ai request messages (as string) assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) - - # Should have fewer or equal messages than original (due to truncation) assert len(parsed_messages) <= len(large_messages) - # Size should be under the limit result_size = len(messages_data.encode("utf-8")) assert result_size <= MAX_GEN_AI_MESSAGE_BYTES @@ -1012,11 +997,10 @@ def test_anthropic_single_large_message_preservation(sentry_init, capture_events client = Anthropic(api_key="test-api-key") - # Create one huge message that exceeds the limit huge_content = ( "This is an extremely long message that will definitely exceed size limits. " * 2000 - ) # ~150KB + ) messages = [{"role": "user", "content": huge_content}] with mock.patch.object(client.messages, "create") as mock_create: @@ -1039,18 +1023,12 @@ def test_anthropic_single_large_message_preservation(sentry_init, capture_events (event,) = events span = event["spans"][0] - # Should still have the message (not removed entirely) assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) - # Should be valid JSON with exactly one message - import json - parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) assert len(parsed_messages) == 1 - - # The message should have truncated content assert parsed_messages[0]["role"] == "user" assert len(parsed_messages[0]["content"]) < len(huge_content) diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 3d05bf150c..0b765fb45f 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -1,3 +1,4 @@ +import json import pytest from unittest import mock from datetime import datetime @@ -558,11 +559,9 @@ def test_litellm_message_truncation(sentry_init, capture_events): send_default_pii=True, ) events = capture_events() - - # Create messages that will definitely exceed size limits large_content = ( "This is a very long message that will exceed our size limits. " * 1000 - ) # ~64KB + ) large_messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": large_content}, @@ -575,10 +574,7 @@ def test_litellm_message_truncation(sentry_init, capture_events): "messages": large_messages, } - # Mock the response for success callback mock_response = MockCompletionResponse() - - # Simulate the integration flow _input_callback(kwargs) _success_callback( kwargs, @@ -589,22 +585,15 @@ def test_litellm_message_truncation(sentry_init, capture_events): (event,) = events (span,) = event["spans"] - - # Should have gen_ai request messages (as string) assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) - # Should be valid JSON - import json - parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) - - # Should have fewer or equal messages than original (due to truncation) assert len(parsed_messages) <= len(large_messages) - # Size should be under the limit result_size = len(messages_data.encode("utf-8")) assert result_size <= MAX_GEN_AI_MESSAGE_BYTES @@ -618,11 +607,10 @@ def test_litellm_single_large_message_preservation(sentry_init, capture_events): ) events = capture_events() - # Create one huge message that exceeds the limit huge_content = ( "This is an extremely long message that will definitely exceed size limits. " * 2000 - ) # ~150KB + ) messages = [{"role": "user", "content": huge_content}] kwargs = { @@ -630,10 +618,7 @@ def test_litellm_single_large_message_preservation(sentry_init, capture_events): "messages": messages, } - # Mock the response mock_response = MockCompletionResponse() - - # Simulate the integration flow _input_callback(kwargs) _success_callback( kwargs, @@ -644,19 +629,13 @@ def test_litellm_single_large_message_preservation(sentry_init, capture_events): (event,) = events (span,) = event["spans"] - - # Should still have the message (not removed entirely) assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) - # Should be valid JSON with exactly one message - import json - parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) assert len(parsed_messages) == 1 - - # The message should have truncated content assert parsed_messages[0]["role"] == "user" assert len(parsed_messages[0]["content"]) < len(huge_content) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 51368c6271..440e74c395 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1,3 +1,4 @@ +import json import pytest from sentry_sdk.utils import package_version @@ -1515,10 +1516,9 @@ def test_openai_message_truncation(sentry_init, capture_events): client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) - # Create messages that will definitely exceed size limits large_content = ( "This is a very long message that will exceed our size limits. " * 1000 - ) # ~64KB + ) large_messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": large_content}, @@ -1534,25 +1534,17 @@ def test_openai_message_truncation(sentry_init, capture_events): (event,) = events span = event["spans"][0] - - # Should have gen_ai request messages (as string) assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) - # Should be valid JSON - import json - parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) - - # Should have fewer messages than original (due to truncation) assert len(parsed_messages) <= len(large_messages) - # Should have _meta entry indicating truncation if "_meta" in event and len(parsed_messages) < len(large_messages): meta_path = event["_meta"] - # Navigate through the meta structure to find the messages metadata if ( "spans" in meta_path and "0" in meta_path["spans"] @@ -1576,11 +1568,10 @@ def test_openai_single_large_message_content_truncation(sentry_init, capture_eve client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) - # Create one huge message that exceeds the limit huge_content = ( "This is an extremely long message that will definitely exceed size limits. " * 2000 - ) # ~150KB + ) messages = [{"role": "user", "content": huge_content}] with start_transaction(name="openai tx"): @@ -1591,23 +1582,16 @@ def test_openai_single_large_message_content_truncation(sentry_init, capture_eve (event,) = events span = event["spans"][0] - - # Should still have the message (not removed entirely) assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) - # Should be valid JSON with exactly one message - import json - parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) assert len(parsed_messages) == 1 - - # The message should have truncated content assert parsed_messages[0]["role"] == "user" assert len(parsed_messages[0]["content"]) < len(huge_content) - # Size should be under the limit result_size = len(messages_data.encode("utf-8")) assert result_size <= MAX_GEN_AI_MESSAGE_BYTES diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index fbeb639901..de4344311b 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -1,4 +1,5 @@ import asyncio +import json import re import pytest from unittest.mock import MagicMock, patch @@ -1124,26 +1125,18 @@ def test_openai_agents_message_truncation( (event,) = events spans = event["spans"] invoke_agent_span, ai_client_span = spans - - # Should have gen_ai request messages (as string) assert "gen_ai.request.messages" in invoke_agent_span["data"] + messages_data = invoke_agent_span["data"]["gen_ai.request.messages"] assert isinstance(messages_data, str) - # Should be valid JSON - import json - parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) - - # Should have some messages (system + user) assert len(parsed_messages) >= 1 - # Size should be under the limit result_size = len(messages_data.encode("utf-8")) assert result_size <= MAX_GEN_AI_MESSAGE_BYTES - # Messages should be truncated from original large content total_original_size = len(large_system_prompt) + len(large_user_message) total_parsed_size = sum(len(str(msg)) for msg in parsed_messages) assert total_parsed_size < total_original_size @@ -1153,16 +1146,15 @@ def test_openai_agents_single_large_message_preservation( sentry_init, capture_events, mock_model_response ): """Test that a single very large message gets preserved with truncated content.""" - # Create one huge message that exceeds the limit huge_content = ( "This is an extremely long message that will definitely exceed size limits. " * 2000 - ) # ~150KB + ) agent = Agent( name="test_agent", model="gpt-4", - instructions="You are helpful.", # Keep this small + instructions="You are helpful.", ) with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): @@ -1188,20 +1180,15 @@ def test_openai_agents_single_large_message_preservation( (event,) = events spans = event["spans"] invoke_agent_span, ai_client_span = spans - - # Should still have the messages (not removed entirely) assert "gen_ai.request.messages" in invoke_agent_span["data"] + messages_data = invoke_agent_span["data"]["gen_ai.request.messages"] assert isinstance(messages_data, str) - # Should be valid JSON with at least one message - import json - parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) assert len(parsed_messages) >= 1 - # The user message content should be truncated user_message = next( (msg for msg in parsed_messages if msg.get("role") == "user"), None ) diff --git a/tests/test_ai_message_utils.py b/tests/test_ai_message_utils.py index b06ae60578..5428723ee8 100644 --- a/tests/test_ai_message_utils.py +++ b/tests/test_ai_message_utils.py @@ -35,7 +35,7 @@ def sample_messages(): @pytest.fixture def large_messages(): """Messages that will definitely exceed size limits""" - large_content = "This is a very long message. " * 1000 # ~30KB per message + large_content = "This is a very long message. " * 1000 return [ {"role": "system", "content": large_content}, {"role": "user", "content": large_content}, @@ -55,14 +55,10 @@ def test_no_truncation_needed(self, sample_messages): def test_truncation_removes_oldest_first(self, large_messages): """Test that oldest messages are removed first during truncation""" - small_limit = MAX_GEN_AI_MESSAGE_BYTES // 100 # 5KB limit to force truncation + small_limit = MAX_GEN_AI_MESSAGE_BYTES // 100 result = truncate_messages_by_size(large_messages, max_bytes=small_limit) - - # Should have fewer messages assert len(result) < len(large_messages) - # Should keep the most recent messages - # The last message should always be preserved if possible if result: assert result[-1] == large_messages[-1] @@ -85,23 +81,20 @@ def test_single_message_over_limit(self): """Test single message that exceeds size limit""" large_content = "x" * 10000 messages = [{"role": "user", "content": large_content}] - result = truncate_messages_by_size(messages, max_bytes=100) # Very small limit - # Should return truncated content, not empty list + result = truncate_messages_by_size(messages, max_bytes=100) assert len(result) == 1 assert result[0]["role"] == "user" - # Content should be truncated by either our manual truncation or serializer max_value_length assert len(result[0]["content"]) < len(large_content) def test_progressive_truncation(self, large_messages): """Test that truncation works progressively with different limits""" - # Test different size limits based on the constant limits = [ - MAX_GEN_AI_MESSAGE_BYTES // 5, # 100KB - MAX_GEN_AI_MESSAGE_BYTES // 10, # 50KB - MAX_GEN_AI_MESSAGE_BYTES // 25, # 20KB - MAX_GEN_AI_MESSAGE_BYTES // 100, # 5KB - MAX_GEN_AI_MESSAGE_BYTES // 500, # 1KB + MAX_GEN_AI_MESSAGE_BYTES // 5, + MAX_GEN_AI_MESSAGE_BYTES // 10, + MAX_GEN_AI_MESSAGE_BYTES // 25, + MAX_GEN_AI_MESSAGE_BYTES // 100, + MAX_GEN_AI_MESSAGE_BYTES // 500, ] prev_count = len(large_messages) @@ -109,32 +102,23 @@ def test_progressive_truncation(self, large_messages): result = truncate_messages_by_size(large_messages, max_bytes=limit) current_count = len(result) - # As limit decreases, message count should not increase assert current_count <= prev_count - # Should always preserve at least one message assert current_count >= 1 prev_count = current_count def test_exact_size_boundary(self): """Test behavior at exact size boundaries""" - # Create a message that serializes to a known size messages = [{"role": "user", "content": "test"}] - # Get the exact serialized size - from sentry_sdk.serializer import serialize - serialized = serialize(messages, is_vars=False) json_str = json.dumps(serialized, separators=(",", ":")) exact_size = len(json_str.encode("utf-8")) - # Should keep the message at exact size result = truncate_messages_by_size(messages, max_bytes=exact_size) assert len(result) == 1 - # Should truncate the message content if limit is one byte smaller result = truncate_messages_by_size(messages, max_bytes=exact_size - 1) assert len(result) == 1 - # Content should be truncated by either our manual truncation or serializer class TestSerializeGenAiMessages: @@ -145,10 +129,9 @@ def test_serialize_normal_messages(self, sample_messages): assert result is not None assert isinstance(result, str) - # Should be valid JSON parsed = json.loads(result) assert isinstance(parsed, list) - assert len(parsed) <= len(sample_messages) # Could be truncated + assert len(parsed) <= len(sample_messages) def test_serialize_none_messages(self): """Test serialization of None input""" @@ -162,17 +145,15 @@ def test_serialize_empty_messages(self): def test_serialize_with_truncation(self, large_messages): """Test serialization with size-based truncation""" - small_limit = MAX_GEN_AI_MESSAGE_BYTES // 100 # 5KB limit to force truncation + small_limit = MAX_GEN_AI_MESSAGE_BYTES // 100 result = serialize_gen_ai_messages(large_messages, max_bytes=small_limit) - if result: # Might be None if all messages are too large + if result: assert isinstance(result, str) - # Verify the result is under the size limit result_size = len(result.encode("utf-8")) assert result_size <= small_limit - # Should be valid JSON parsed = json.loads(result) assert isinstance(parsed, list) @@ -197,11 +178,8 @@ class TestTruncateAndSerializeMessages: def test_main_function_with_normal_messages(self, sample_messages): """Test the main function with normal messages""" result = truncate_and_serialize_messages(sample_messages) - - # Should return a JSON string when no truncation occurs assert isinstance(result, str) - # Should be valid JSON parsed = json.loads(result) assert isinstance(parsed, list) assert len(parsed) == len(sample_messages) @@ -210,20 +188,14 @@ def test_main_function_with_large_messages(self, large_messages): """Test the main function with messages requiring truncation""" small_limit = MAX_GEN_AI_MESSAGE_BYTES // 100 # 5KB limit to force truncation result = truncate_and_serialize_messages(large_messages, max_bytes=small_limit) - - # Should return AnnotatedValue when truncation occurs assert isinstance(result, AnnotatedValue) assert result.metadata["len"] == len(large_messages) - - # The value should be a JSON string assert isinstance(result.value, str) - # Should be valid JSON and under size limit parsed = json.loads(result.value) assert isinstance(parsed, list) assert len(parsed) <= len(large_messages) - # Size should be under limit result_size = len(result.value.encode("utf-8")) assert result_size <= small_limit @@ -240,15 +212,11 @@ def test_main_function_with_empty_input(self): def test_main_function_serialization_format(self, sample_messages): """Test that the function always returns proper JSON strings""" result = truncate_and_serialize_messages(sample_messages) - - # Should be JSON string assert isinstance(result, str) - # Should be valid, parseable JSON parsed = json.loads(result) assert isinstance(parsed, list) - # Content should match original structure for i, msg in enumerate(parsed): assert "role" in msg assert "content" in msg @@ -256,23 +224,12 @@ def test_main_function_serialization_format(self, sample_messages): def test_main_function_default_limit(self, sample_messages): """Test that the main function uses the default limit correctly""" result = truncate_and_serialize_messages(sample_messages) - - # With normal sample messages, should not need truncation - # Should return plain JSON string (not AnnotatedValue) assert isinstance(result, str) - # Should be valid JSON parsed = json.loads(result) assert isinstance(parsed, list) -class TestConstants: - def test_default_limit_is_reasonable(self): - """Test that the default limit is reasonable""" - assert MAX_GEN_AI_MESSAGE_BYTES > 0 - assert MAX_GEN_AI_MESSAGE_BYTES < 10**6 # Should be less than MAX_EVENT_BYTES - - class TestEdgeCases: def test_messages_with_special_characters(self): """Test messages containing special characters""" @@ -287,7 +244,6 @@ def test_messages_with_special_characters(self): result = truncate_and_serialize_messages(messages) assert result is not None - # Should be valid JSON parsed = json.loads(result) assert len(parsed) == 2 assert "🌍" in parsed[0]["content"] @@ -310,8 +266,6 @@ def test_messages_with_nested_structures(self): result = truncate_and_serialize_messages(messages) assert result is not None - # Should preserve the structure - # Handle both string and AnnotatedValue return types if isinstance(result, AnnotatedValue): parsed = json.loads(result.value) else: @@ -329,8 +283,6 @@ def test_messages_with_none_values(self): result = truncate_and_serialize_messages(messages) assert result is not None - # Should handle None values gracefully - # Handle both string and AnnotatedValue return types if isinstance(result, AnnotatedValue): parsed = json.loads(result.value) else: @@ -348,15 +300,13 @@ def test_truncation_keeps_most_recent(self): } ) - # Truncate to a small size that should remove several messages - small_limit = MAX_GEN_AI_MESSAGE_BYTES // 500 # 1KB limit to force truncation + small_limit = MAX_GEN_AI_MESSAGE_BYTES // 500 result = truncate_and_serialize_messages(messages, max_bytes=small_limit) if result: assert isinstance(result, AnnotatedValue) parsed = json.loads(result.value) if parsed: - # The last remaining message should be from the end of the original list last_kept_content = parsed[-1]["content"] assert ( "Message 9" in last_kept_content or "Message 8" in last_kept_content @@ -368,47 +318,32 @@ class TestMetaSupport: def test_annotated_value_returned_on_truncation(self, large_messages): """Test that truncate_and_serialize_messages returns AnnotatedValue when truncation occurs""" - # Force truncation with a limit that will keep at least one message - # Each large message is ~30KB, so 50KB should keep 1-2 messages but force truncation - small_limit = 50_000 # 50KB to force truncation but keep some messages + small_limit = 50_000 result = truncate_and_serialize_messages(large_messages, max_bytes=small_limit) - - # Should return an AnnotatedValue when truncation occurs assert isinstance(result, AnnotatedValue) assert result.metadata == {"len": len(large_messages)} - - # The value should be a JSON string assert isinstance(result.value, str) + parsed = json.loads(result.value) assert len(parsed) <= len(large_messages) def test_no_annotated_value_when_no_truncation(self, sample_messages): """Test that truncate_and_serialize_messages returns plain list when no truncation occurs""" result = truncate_and_serialize_messages(sample_messages) - - # Should return plain JSON string when no truncation occurs assert not isinstance(result, AnnotatedValue) assert isinstance(result, str) - # Should be valid JSON with same length parsed = json.loads(result) assert len(parsed) == len(sample_messages) def test_meta_structure_in_serialized_output(self, large_messages): """Test that _meta structure is created correctly in serialized output""" - # Force truncation with a limit that will keep at least one message - small_limit = 50_000 # 50KB to force truncation but keep some messages + small_limit = 50_000 annotated_messages = truncate_and_serialize_messages( large_messages, max_bytes=small_limit ) - - # Simulate how the serializer would process this (like it does in actual span data) test_data = {"gen_ai": {"request": {"messages": annotated_messages}}} - - # Serialize using Sentry's serializer (which processes AnnotatedValue) serialized = serialize(test_data, is_vars=False) - - # Check that _meta structure was created assert "_meta" in serialized assert "gen_ai" in serialized["_meta"] assert "request" in serialized["_meta"]["gen_ai"] @@ -416,8 +351,6 @@ def test_meta_structure_in_serialized_output(self, large_messages): assert serialized["_meta"]["gen_ai"]["request"]["messages"][""] == { "len": len(large_messages) } - - # Check that the actual data is still there and is a string assert "gen_ai" in serialized assert "request" in serialized["gen_ai"] assert "messages" in serialized["gen_ai"]["request"] @@ -425,19 +358,16 @@ def test_meta_structure_in_serialized_output(self, large_messages): def test_serialize_gen_ai_messages_handles_annotated_value(self, large_messages): """Test that serialize_gen_ai_messages handles AnnotatedValue input correctly""" - # Create an AnnotatedValue manually - truncated = large_messages[:2] # Keep only first 2 messages + truncated = large_messages[:2] annotated = AnnotatedValue( value=truncated, metadata={"len": len(large_messages)} ) - - # serialize_gen_ai_messages should handle it result = serialize_gen_ai_messages(annotated) assert result is not None parsed = json.loads(result) assert isinstance(parsed, list) - assert len(parsed) == 2 # Only 2 messages kept + assert len(parsed) == 2 def test_empty_messages_no_annotated_value(self): """Test that empty messages don't create AnnotatedValue""" From e105108ca5f43dfec7703581d15b3bcb9dd3b2e4 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Thu, 9 Oct 2025 13:32:01 +0200 Subject: [PATCH 11/14] fix test --- tests/test_ai_message_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_ai_message_utils.py b/tests/test_ai_message_utils.py index 5428723ee8..9dec987116 100644 --- a/tests/test_ai_message_utils.py +++ b/tests/test_ai_message_utils.py @@ -1,7 +1,7 @@ import json import pytest -from sentry_sdk.ai.message_utils import ( +from sentry_sdk.ai.utils import ( MAX_GEN_AI_MESSAGE_BYTES, truncate_messages_by_size, serialize_gen_ai_messages, From 8046c1bfa40bc4e3e2bcca011149c608c75b07bc Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Thu, 9 Oct 2025 13:49:41 +0200 Subject: [PATCH 12/14] fix test --- .../integrations/anthropic/test_anthropic.py | 95 ++++++++++--------- 1 file changed, 50 insertions(+), 45 deletions(-) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 6b2120013d..06a9827cea 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -886,10 +886,6 @@ def test_set_output_data_with_input_json_delta(sentry_init): def test_anthropic_message_role_mapping(sentry_init, capture_events): """Test that Anthropic integration properly maps message roles like 'ai' to 'assistant'""" - - -def test_anthropic_message_truncation(sentry_init, capture_events): - """Test that large messages are truncated properly in Anthropic integration.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -898,9 +894,8 @@ def test_anthropic_message_truncation(sentry_init, capture_events): events = capture_events() client = Anthropic(api_key="z") - - def mock_messages_create(*args, **kwargs): - return Message( + client.messages._post = mock.Mock( + return_value=Message( id="msg_1", content=[TextBlock(text="Hi there!", type="text")], model="claude-3-opus", @@ -910,8 +905,7 @@ def mock_messages_create(*args, **kwargs): type="message", usage=Usage(input_tokens=10, output_tokens=5), ) - - client.messages._post = mock.Mock(return_value=mock_messages_create()) + ) test_messages = [ {"role": "system", "content": "You are helpful."}, @@ -926,7 +920,8 @@ def mock_messages_create(*args, **kwargs): ) (event,) = events - span = event["spans"][0] + (span,) = event["spans"] + assert span["op"] == "gen_ai.chat" assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] @@ -942,39 +937,49 @@ def mock_messages_create(*args, **kwargs): roles = [msg["role"] for msg in stored_messages] assert "ai" not in roles + + +def test_anthropic_message_truncation(sentry_init, capture_events): + """Test that large messages are truncated properly in Anthropic integration.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + client = Anthropic(api_key="test-api-key") + client.messages._post = mock.Mock( + return_value=Message( + id="test", + content=[TextBlock(text="Hello", type="text")], + model="claude-3", + role="assistant", + type="message", + usage=Usage(input_tokens=10, output_tokens=20), + ) + ) large_content = ( "This is a very long message that will exceed our size limits. " * 1000 - ) # ~64KB + ) large_messages = [ - {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": large_content}, {"role": "assistant", "content": large_content}, {"role": "user", "content": large_content}, ] - with mock.patch.object(client.messages, "create") as mock_create: - mock_create.return_value = Message( - id="test", - content=[TextBlock(text="Hello", type="text")], - model="claude-3", - role="assistant", - type="message", - usage=Usage(input_tokens=10, output_tokens=20), + with start_transaction(name="anthropic tx"): + client.messages.create( + model="claude-3-sonnet-20240229", + messages=large_messages, + max_tokens=100, ) - with start_transaction(name="anthropic tx"): - client.messages.create( - model="claude-3-sonnet-20240229", - messages=large_messages, - max_tokens=100, - ) - (event,) = events - span = event["spans"][0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + (span,) = event["spans"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) @@ -996,15 +1001,8 @@ def test_anthropic_single_large_message_preservation(sentry_init, capture_events events = capture_events() client = Anthropic(api_key="test-api-key") - - huge_content = ( - "This is an extremely long message that will definitely exceed size limits. " - * 2000 - ) - messages = [{"role": "user", "content": huge_content}] - - with mock.patch.object(client.messages, "create") as mock_create: - mock_create.return_value = Message( + client.messages._post = mock.Mock( + return_value=Message( id="test", content=[TextBlock(text="Hello", type="text")], model="claude-3", @@ -1012,16 +1010,23 @@ def test_anthropic_single_large_message_preservation(sentry_init, capture_events type="message", usage=Usage(input_tokens=100, output_tokens=50), ) + ) - with start_transaction(name="anthropic tx"): - client.messages.create( - model="claude-3-sonnet-20240229", - messages=messages, - max_tokens=100, - ) + huge_content = ( + "This is an extremely long message that will definitely exceed size limits. " + * 2000 + ) + messages = [{"role": "user", "content": huge_content}] + + with start_transaction(name="anthropic tx"): + client.messages.create( + model="claude-3-sonnet-20240229", + messages=messages, + max_tokens=100, + ) (event,) = events - span = event["spans"][0] + (span,) = event["spans"] assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] From cb2e8030f15ac45cb2f66ed0bd62f5cd6420cda9 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Thu, 9 Oct 2025 14:23:56 +0200 Subject: [PATCH 13/14] dont truncate in huggingface hub --- sentry_sdk/integrations/huggingface_hub.py | 10 ++++------ .../huggingface_hub/test_huggingface_hub.py | 14 ++++++-------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index 6e62763fc6..2e2b382abd 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -3,7 +3,7 @@ import sentry_sdk from sentry_sdk.ai.monitoring import record_token_usage -from sentry_sdk.ai.utils import set_data_normalized, truncate_and_serialize_messages +from sentry_sdk.ai.utils import set_data_normalized from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -103,11 +103,9 @@ def new_huggingface_task(*args, **kwargs): # Input attributes if should_send_default_pii() and integration.include_prompts: - # Convert prompt to message format if it's a string - messages = [prompt] if isinstance(prompt, str) else prompt - messages_data = truncate_and_serialize_messages(messages) - if messages_data is not None: - span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data) + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompt, unpack=False + ) attribute_mapping = { "tools": SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index fa2eb6c40a..b9ab4df5bf 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -1,18 +1,16 @@ -import re -from typing import TYPE_CHECKING from unittest import mock - -import httpx import pytest +import re import responses +import httpx + from huggingface_hub import InferenceClient import sentry_sdk -from sentry_sdk._types import AnnotatedValue -from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES -from sentry_sdk.integrations.huggingface_hub import HuggingfaceHubIntegration -from sentry_sdk.serializer import serialize from sentry_sdk.utils import package_version +from sentry_sdk.integrations.huggingface_hub import HuggingfaceHubIntegration + +from typing import TYPE_CHECKING try: from huggingface_hub.utils._errors import HfHubHTTPError From 455136a2a6d2964998ed8ef610149f676373e920 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Thu, 9 Oct 2025 15:30:38 +0200 Subject: [PATCH 14/14] fix litellm test --- tests/integrations/litellm/test_litellm.py | 57 ++++++++++++---------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 0b765fb45f..e2572198a5 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -559,6 +559,7 @@ def test_litellm_message_truncation(sentry_init, capture_events): send_default_pii=True, ) events = capture_events() + large_content = ( "This is a very long message that will exceed our size limits. " * 1000 ) @@ -569,24 +570,26 @@ def test_litellm_message_truncation(sentry_init, capture_events): {"role": "user", "content": large_content}, ] - kwargs = { - "model": "gpt-3.5-turbo", - "messages": large_messages, - } - mock_response = MockCompletionResponse() - _input_callback(kwargs) - _success_callback( - kwargs, - mock_response, - datetime.now(), - datetime.now(), - ) + + with start_transaction(name="litellm test"): + kwargs = { + "model": "gpt-3.5-turbo", + "messages": large_messages, + } + + _input_callback(kwargs) + _success_callback( + kwargs, + mock_response, + datetime.now(), + datetime.now(), + ) (event,) = events (span,) = event["spans"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) @@ -613,24 +616,26 @@ def test_litellm_single_large_message_preservation(sentry_init, capture_events): ) messages = [{"role": "user", "content": huge_content}] - kwargs = { - "model": "gpt-3.5-turbo", - "messages": messages, - } - mock_response = MockCompletionResponse() - _input_callback(kwargs) - _success_callback( - kwargs, - mock_response, - datetime.now(), - datetime.now(), - ) + + with start_transaction(name="litellm test"): + kwargs = { + "model": "gpt-3.5-turbo", + "messages": messages, + } + + _input_callback(kwargs) + _success_callback( + kwargs, + mock_response, + datetime.now(), + datetime.now(), + ) (event,) = events (span,) = event["spans"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str)