Skip to content

Commit a92e93c

Browse files
committed
wip
1 parent 85520a5 commit a92e93c

File tree

2 files changed

+36
-17
lines changed

2 files changed

+36
-17
lines changed

sentry_sdk/ai/message_utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ def serialize(obj, **kwargs):
1313
return obj
1414

1515

16-
MAX_GEN_AI_MESSAGE_BYTES = 30_000 # 300KB
16+
# Custom limit for gen_ai message serialization - 50% of MAX_EVENT_BYTES
17+
# to leave room for other event data while still being generous for messages
18+
MAX_GEN_AI_MESSAGE_BYTES = 500_000 # 500KB
1719

1820

1921
def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):

tests/test_ai_message_utils.py

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,16 @@ def large_messages():
4646
class TestTruncateMessagesBySize:
4747
def test_no_truncation_needed(self, sample_messages):
4848
"""Test that messages under the limit are not truncated"""
49-
result = truncate_messages_by_size(sample_messages, max_bytes=50000)
49+
result = truncate_messages_by_size(
50+
sample_messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES
51+
)
5052
assert len(result) == len(sample_messages)
5153
assert result == sample_messages
5254

5355
def test_truncation_removes_oldest_first(self, large_messages):
5456
"""Test that oldest messages are removed first during truncation"""
55-
result = truncate_messages_by_size(large_messages, max_bytes=5000)
57+
small_limit = MAX_GEN_AI_MESSAGE_BYTES // 100 # 5KB limit to force truncation
58+
result = truncate_messages_by_size(large_messages, max_bytes=small_limit)
5659

5760
# Should have fewer messages
5861
assert len(result) < len(large_messages)
@@ -64,28 +67,38 @@ def test_truncation_removes_oldest_first(self, large_messages):
6467

6568
def test_empty_messages_list(self):
6669
"""Test handling of empty messages list"""
67-
result = truncate_messages_by_size([], max_bytes=1000)
70+
result = truncate_messages_by_size(
71+
[], max_bytes=MAX_GEN_AI_MESSAGE_BYTES // 500
72+
)
6873
assert result == []
6974

7075
def test_single_message_under_limit(self):
7176
"""Test single message under size limit"""
7277
messages = [{"role": "user", "content": "Hello!"}]
73-
result = truncate_messages_by_size(messages, max_bytes=1000)
78+
result = truncate_messages_by_size(
79+
messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES // 500
80+
)
7481
assert result == messages
7582

7683
def test_single_message_over_limit(self):
7784
"""Test single message that exceeds size limit"""
7885
large_content = "x" * 10000
7986
messages = [{"role": "user", "content": large_content}]
80-
result = truncate_messages_by_size(messages, max_bytes=100)
87+
result = truncate_messages_by_size(messages, max_bytes=100) # Very small limit
8188

8289
# Should return empty list if even single message is too large
8390
assert result == []
8491

8592
def test_progressive_truncation(self, large_messages):
8693
"""Test that truncation works progressively with different limits"""
87-
# Test different size limits
88-
limits = [100000, 50000, 20000, 5000, 1000]
94+
# Test different size limits based on the constant
95+
limits = [
96+
MAX_GEN_AI_MESSAGE_BYTES // 5, # 100KB
97+
MAX_GEN_AI_MESSAGE_BYTES // 10, # 50KB
98+
MAX_GEN_AI_MESSAGE_BYTES // 25, # 20KB
99+
MAX_GEN_AI_MESSAGE_BYTES // 100, # 5KB
100+
MAX_GEN_AI_MESSAGE_BYTES // 500, # 1KB
101+
]
89102
prev_count = len(large_messages)
90103

91104
for limit in limits:
@@ -142,14 +155,15 @@ def test_serialize_empty_messages(self):
142155

143156
def test_serialize_with_truncation(self, large_messages):
144157
"""Test serialization with size-based truncation"""
145-
result = serialize_gen_ai_messages(large_messages, max_bytes=5000)
158+
small_limit = MAX_GEN_AI_MESSAGE_BYTES // 100 # 5KB limit to force truncation
159+
result = serialize_gen_ai_messages(large_messages, max_bytes=small_limit)
146160

147161
if result: # Might be None if all messages are too large
148162
assert isinstance(result, str)
149163

150164
# Verify the result is under the size limit
151165
result_size = len(result.encode("utf-8"))
152-
assert result_size <= 5000
166+
assert result_size <= small_limit
153167

154168
# Should be valid JSON
155169
parsed = json.loads(result)
@@ -236,19 +250,20 @@ def test_main_function_with_normal_messages(self, sample_messages):
236250

237251
def test_main_function_with_large_messages(self, large_messages):
238252
"""Test the main function with messages requiring truncation"""
239-
result = truncate_and_serialize_messages(large_messages, max_bytes=5000)
253+
small_limit = MAX_GEN_AI_MESSAGE_BYTES // 100 # 5KB limit to force truncation
254+
result = truncate_and_serialize_messages(large_messages, max_bytes=small_limit)
240255

241256
assert "serialized_data" in result
242257
assert "metadata" in result
243258
assert "original_size" in result
244259

245260
# Original size should be large
246-
assert result["original_size"] > 5000
261+
assert result["original_size"] > small_limit
247262

248263
# May or may not be truncated depending on how large the messages are
249264
if result["serialized_data"]:
250265
serialized_size = len(result["serialized_data"].encode("utf-8"))
251-
assert serialized_size <= 5000
266+
assert serialized_size <= small_limit
252267

253268
def test_main_function_with_none_input(self):
254269
"""Test the main function with None input"""
@@ -277,7 +292,7 @@ def test_main_function_size_comparison(self, sample_messages):
277292

278293
def test_main_function_respects_custom_limit(self, large_messages):
279294
"""Test that the main function respects custom byte limits"""
280-
custom_limit = 2000
295+
custom_limit = MAX_GEN_AI_MESSAGE_BYTES // 250 # 2KB limit
281296
result = truncate_and_serialize_messages(large_messages, max_bytes=custom_limit)
282297

283298
if result["serialized_data"]:
@@ -344,15 +359,16 @@ def test_messages_with_nested_structures(self):
344359

345360
def test_very_small_limit(self, sample_messages):
346361
"""Test behavior with extremely small size limit"""
347-
result = truncate_and_serialize_messages(sample_messages, max_bytes=10)
362+
tiny_limit = 10 # 10 bytes - extremely small limit
363+
result = truncate_and_serialize_messages(sample_messages, max_bytes=tiny_limit)
348364

349365
# With such a small limit, likely all messages will be removed
350366
if result["serialized_data"] is None:
351367
assert result["metadata"]["truncated_count"] == 0
352368
else:
353369
# If any data remains, it should be under the limit
354370
size = len(result["serialized_data"].encode("utf-8"))
355-
assert size <= 10
371+
assert size <= tiny_limit
356372

357373
def test_messages_with_none_values(self):
358374
"""Test messages containing None values"""
@@ -380,7 +396,8 @@ def test_truncation_keeps_most_recent(self):
380396
)
381397

382398
# Truncate to a small size that should remove several messages
383-
result = truncate_and_serialize_messages(messages, max_bytes=1000)
399+
small_limit = MAX_GEN_AI_MESSAGE_BYTES // 500 # 1KB limit to force truncation
400+
result = truncate_and_serialize_messages(messages, max_bytes=small_limit)
384401

385402
if result["serialized_data"]:
386403
parsed = json.loads(result["serialized_data"])

0 commit comments

Comments
 (0)