incrementally compute cumulative message sizes

shellmayr · shellmayr · commit 12dcf5dd9694 · 2025-10-16T14:33:26.000+02:00
diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
@@ -1,8 +1,10 @@
 import json
+from collections import deque
 from typing import TYPE_CHECKING
+from sys import getsizeof
 
 if TYPE_CHECKING:
-    from typing import Any, Callable, Dict, List, Optional
+    from typing import Any, Callable, Dict, List, Optional, Tuple
 
     from sentry_sdk.tracing import Span
 
@@ -99,21 +101,33 @@ def get_start_span_function():
     return sentry_sdk.start_span if transaction_exists else sentry_sdk.start_transaction
 
 
-def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
-    # type: (List[Dict[str, Any]], int) -> List[Dict[str, Any]]
-    if not messages:
-        return messages
+def _find_truncation_index(messages, max_bytes):
+    # type: (List[Dict[str, Any]], int) -> int
+    """
+    Find the index of the first message that would exceed the max bytes limit.
+    Compute the individual message sizes, and return the index of the first message from the back
+    of the list that would exceed the max bytes limit.
+    """
+    running_sum = 0
+    for idx in range(len(messages) - 1, -1, -1):
+        size = len(json.dumps(messages[idx], separators=(",", ":")))
+        running_sum += size
+        if running_sum > max_bytes:
+            return idx + 1
 
-    truncated_messages = list(messages)
+    return 0
 
-    while len(truncated_messages) > 1:
-        serialized_json = json.dumps(truncated_messages, separators=(",", ":"))
-        current_size = len(serialized_json.encode("utf-8"))
-        if current_size <= max_bytes:
-            break
-        truncated_messages.pop(0)
 
-    return truncated_messages
+def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
+    # type: (List[Dict[str, Any]], int) -> Tuple[List[Dict[str, Any]], int]
+    serialized_json = json.dumps(messages, separators=(",", ":"))
+    current_size = len(serialized_json.encode("utf-8"))
+
+    if current_size <= max_bytes:
+        return messages, 0
+
+    truncation_index = _find_truncation_index(messages, max_bytes)
+    return messages[truncation_index:], truncation_index
 
 
 def truncate_and_annotate_messages(
@@ -123,16 +137,10 @@ def truncate_and_annotate_messages(
     if not messages:
         return None
 
-    original_count = len(messages)
-    truncated_messages = truncate_messages_by_size(messages, max_bytes)
-
-    if not truncated_messages:
-        return None
-
-    truncated_count = len(truncated_messages)
-    n_removed = original_count - truncated_count
-
-    if n_removed > 0:
-        scope._gen_ai_messages_truncated[span.span_id] = n_removed
+    truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
+    if removed_count > 0:
+        scope._gen_ai_messages_truncated[span.span_id] = len(messages) - len(
+            truncated_messages
+        )
 
     return truncated_messages
diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py
@@ -10,6 +10,7 @@
     set_data_normalized,
     truncate_and_annotate_messages,
     truncate_messages_by_size,
+    _find_truncation_index,
 )
 from sentry_sdk.serializer import serialize
 from sentry_sdk.utils import safe_serialize
@@ -209,27 +210,53 @@ def large_messages():
 class TestTruncateMessagesBySize:
     def test_no_truncation_needed(self, sample_messages):
         """Test that messages under the limit are not truncated"""
-        result = truncate_messages_by_size(
+        result, removed_count = truncate_messages_by_size(
             sample_messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES
         )
         assert len(result) == len(sample_messages)
         assert result == sample_messages
+        assert removed_count == 0
 
     def test_truncation_removes_oldest_first(self, large_messages):
         """Test that oldest messages are removed first during truncation"""
         small_limit = 3000
-        result = truncate_messages_by_size(large_messages, max_bytes=small_limit)
+        result, removed_count = truncate_messages_by_size(
+            large_messages, max_bytes=small_limit
+        )
         assert len(result) < len(large_messages)
 
         if result:
             assert result[-1] == large_messages[-1]
+        assert removed_count == len(large_messages) - len(result)
 
     def test_empty_messages_list(self):
         """Test handling of empty messages list"""
-        result = truncate_messages_by_size(
+        result, removed_count = truncate_messages_by_size(
             [], max_bytes=MAX_GEN_AI_MESSAGE_BYTES // 500
         )
         assert result == []
+        assert removed_count == 0
+
+    def test_find_truncation_index(
+        self,
+    ):
+        """Test that the truncation index is found correctly"""
+        # when represented in JSON, these are each 7 bytes long
+        messages = ["A" * 5, "B" * 5, "C" * 5, "D" * 5, "E" * 5]
+        truncation_index = _find_truncation_index(messages, 20)
+        assert truncation_index == 3
+        assert messages[truncation_index:] == ["D" * 5, "E" * 5]
+
+        messages = ["A" * 5, "B" * 5, "C" * 5, "D" * 5, "E" * 5]
+        truncation_index = _find_truncation_index(messages, 40)
+        assert truncation_index == 0
+        assert messages[truncation_index:] == [
+            "A" * 5,
+            "B" * 5,
+            "C" * 5,
+            "D" * 5,
+            "E" * 5,
+        ]
 
     def test_progressive_truncation(self, large_messages):
         """Test that truncation works progressively with different limits"""
@@ -250,20 +277,6 @@ def test_progressive_truncation(self, large_messages):
             assert current_count >= 1
             prev_count = current_count
 
-    def test_exact_size_boundary(self):
-        """Test behavior at exact size boundaries"""
-        messages = [{"role": "user", "content": "test"}]
-
-        serialized = serialize(messages, is_vars=False)
-        json_str = json.dumps(serialized, separators=(",", ":"))
-        exact_size = len(json_str.encode("utf-8"))
-
-        result = truncate_messages_by_size(messages, max_bytes=exact_size)
-        assert len(result) == 1
-
-        result = truncate_messages_by_size(messages, max_bytes=exact_size - 1)
-        assert len(result) == 1
-
 
 class TestTruncateAndAnnotateMessages:
     def test_no_truncation_returns_list(self, sample_messages):