Skip to content

Commit 507bf17

Browse files
committed
fix(ai): introduce message truncation for openai
1 parent 4325802 commit 507bf17

File tree

6 files changed

+475
-38
lines changed

6 files changed

+475
-38
lines changed

sentry_sdk/ai/utils.py

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,24 @@
11
import json
2-
32
from typing import TYPE_CHECKING
43

54
if TYPE_CHECKING:
65
from typing import Any, Callable
6+
77
from sentry_sdk.tracing import Span
88

9+
from typing import TYPE_CHECKING
10+
911
import sentry_sdk
1012
from sentry_sdk.utils import logger
1113

14+
if TYPE_CHECKING:
15+
from typing import Any, Dict, List, Optional
16+
17+
from sentry_sdk._types import AnnotatedValue
18+
from sentry_sdk.serializer import serialize
19+
20+
MAX_GEN_AI_MESSAGE_BYTES = 20_000 # 20KB
21+
1222

1323
class GEN_AI_ALLOWED_MESSAGE_ROLES:
1424
SYSTEM = "system"
@@ -95,3 +105,79 @@ def get_start_span_function():
95105
current_span is not None and current_span.containing_transaction is not None
96106
)
97107
return sentry_sdk.start_span if transaction_exists else sentry_sdk.start_transaction
108+
109+
110+
def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
111+
# type: (List[Dict[str, Any]], int) -> List[Dict[str, Any]]
112+
if not messages:
113+
return messages
114+
115+
truncated_messages = list(messages)
116+
117+
while len(truncated_messages) > 1:
118+
serialized_json = json.dumps(truncated_messages, separators=(",", ":"))
119+
current_size = len(serialized_json.encode("utf-8"))
120+
121+
if current_size <= max_bytes:
122+
break
123+
124+
truncated_messages.pop(0)
125+
126+
serialized_json = json.dumps(truncated_messages, separators=(",", ":"))
127+
current_size = len(serialized_json.encode("utf-8"))
128+
129+
if current_size > max_bytes and len(truncated_messages) == 1:
130+
message = truncated_messages[0].copy()
131+
content = message.get("content", "")
132+
133+
if isinstance(content, str):
134+
max_content_length = max_bytes // 2
135+
while True:
136+
message["content"] = content[:max_content_length]
137+
test_json = json.dumps([message], separators=(",", ":"))
138+
if len(test_json.encode("utf-8")) <= max_bytes:
139+
break
140+
max_content_length = int(max_content_length * 0.9)
141+
if max_content_length < 100:
142+
message["content"] = ""
143+
break
144+
145+
truncated_messages = [message]
146+
elif isinstance(content, list):
147+
content_copy = list(content)
148+
while len(content_copy) > 0:
149+
message["content"] = content_copy
150+
test_json = json.dumps([message], separators=(",", ":"))
151+
if len(test_json.encode("utf-8")) <= max_bytes:
152+
break
153+
content_copy = content_copy[:-1]
154+
155+
if len(content_copy) == 0:
156+
message["content"] = []
157+
158+
truncated_messages = [message]
159+
160+
return truncated_messages
161+
162+
163+
def truncate_and_annotate_messages(
164+
messages, span, scope, max_bytes=MAX_GEN_AI_MESSAGE_BYTES
165+
):
166+
# type: (Optional[List[Dict[str, Any]]], Any, Any, int) -> Optional[List[Dict[str, Any]]]
167+
if not messages:
168+
return None
169+
170+
original_count = len(messages)
171+
truncated_messages = truncate_messages_by_size(messages, max_bytes)
172+
173+
if not truncated_messages:
174+
return None
175+
176+
truncated_count = len(truncated_messages)
177+
n_removed = original_count - truncated_count
178+
179+
if n_removed > 0:
180+
scope._gen_ai_messages_truncated[span.span_id] = n_removed
181+
span.set_data("_gen_ai_messages_original_count", original_count)
182+
183+
return truncated_messages

sentry_sdk/client.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,32 @@ def _prepare_event(
606606
event["breadcrumbs"] = AnnotatedValue(
607607
event.get("breadcrumbs", []), {"len": previous_total_breadcrumbs}
608608
)
609+
610+
# Annotate truncated gen_ai messages in spans
611+
if scope is not None and scope._gen_ai_messages_truncated:
612+
spans = event.get("spans", [])
613+
if isinstance(spans, AnnotatedValue):
614+
spans = spans.value
615+
616+
for span in spans:
617+
if isinstance(span, dict):
618+
span_id = span.get("span_id")
619+
if span_id and span_id in scope._gen_ai_messages_truncated:
620+
span_data = span.get("data", {})
621+
original_count = span_data.pop(
622+
"_gen_ai_messages_original_count", None
623+
)
624+
if (
625+
original_count is not None
626+
and SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data
627+
):
628+
span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] = (
629+
AnnotatedValue(
630+
span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES],
631+
{"len": original_count},
632+
)
633+
)
634+
609635
# Postprocess the event here so that annotated types do
610636
# generally not surface in before_send
611637
if event is not None:

sentry_sdk/integrations/openai.py

Lines changed: 16 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
from functools import wraps
2-
from collections.abc import Iterable
32

43
import sentry_sdk
54
from sentry_sdk import consts
65
from sentry_sdk.ai.monitoring import record_token_usage
7-
from sentry_sdk.ai.utils import set_data_normalized, normalize_message_roles
6+
from sentry_sdk.ai.utils import (
7+
set_data_normalized,
8+
normalize_message_roles,
9+
truncate_and_annotate_messages,
10+
)
811
from sentry_sdk.consts import SPANDATA
912
from sentry_sdk.integrations import DidNotEnable, Integration
1013
from sentry_sdk.scope import should_send_default_pii
@@ -18,19 +21,14 @@
1821
from typing import TYPE_CHECKING
1922

2023
if TYPE_CHECKING:
21-
from typing import Any, List, Optional, Callable, AsyncIterator, Iterator
24+
from typing import Any, Iterable, List, Optional, Callable, AsyncIterator, Iterator
2225
from sentry_sdk.tracing import Span
2326

2427
try:
2528
try:
26-
from openai import NotGiven
29+
from openai import NOT_GIVEN
2730
except ImportError:
28-
NotGiven = None
29-
30-
try:
31-
from openai import Omit
32-
except ImportError:
33-
Omit = None
31+
NOT_GIVEN = None
3432

3533
from openai.resources.chat.completions import Completions, AsyncCompletions
3634
from openai.resources import Embeddings, AsyncEmbeddings
@@ -189,9 +187,12 @@ def _set_input_data(span, kwargs, operation, integration):
189187
and integration.include_prompts
190188
):
191189
normalized_messages = normalize_message_roles(messages)
192-
set_data_normalized(
193-
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, normalized_messages, unpack=False
194-
)
190+
scope = sentry_sdk.get_current_scope()
191+
messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
192+
if messages_data is not None:
193+
set_data_normalized(
194+
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
195+
)
195196

196197
# Input attributes: Common
197198
set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")
@@ -210,12 +211,12 @@ def _set_input_data(span, kwargs, operation, integration):
210211
for key, attribute in kwargs_keys_to_attributes.items():
211212
value = kwargs.get(key)
212213

213-
if value is not None and _is_given(value):
214+
if value is not NOT_GIVEN and value is not None:
214215
set_data_normalized(span, attribute, value)
215216

216217
# Input attributes: Tools
217218
tools = kwargs.get("tools")
218-
if tools is not None and _is_given(tools) and len(tools) > 0:
219+
if tools is not NOT_GIVEN and tools is not None and len(tools) > 0:
219220
set_data_normalized(
220221
span, SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize(tools)
221222
)
@@ -695,15 +696,3 @@ async def _sentry_patched_responses_async(*args, **kwargs):
695696
return await _execute_async(f, *args, **kwargs)
696697

697698
return _sentry_patched_responses_async
698-
699-
700-
def _is_given(obj):
701-
# type: (Any) -> bool
702-
"""
703-
Check for givenness safely across different openai versions.
704-
"""
705-
if NotGiven is not None and isinstance(obj, NotGiven):
706-
return False
707-
if Omit is not None and isinstance(obj, Omit):
708-
return False
709-
return True

sentry_sdk/scope.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ class Scope:
188188
"_extras",
189189
"_breadcrumbs",
190190
"_n_breadcrumbs_truncated",
191+
"_gen_ai_messages_truncated",
191192
"_event_processors",
192193
"_error_processors",
193194
"_should_capture",
@@ -213,6 +214,7 @@ def __init__(self, ty=None, client=None):
213214
self._name = None # type: Optional[str]
214215
self._propagation_context = None # type: Optional[PropagationContext]
215216
self._n_breadcrumbs_truncated = 0 # type: int
217+
self._gen_ai_messages_truncated = {} # type: Dict[str, int]
216218

217219
self.client = NonRecordingClient() # type: sentry_sdk.client.BaseClient
218220

@@ -247,6 +249,7 @@ def __copy__(self):
247249

248250
rv._breadcrumbs = copy(self._breadcrumbs)
249251
rv._n_breadcrumbs_truncated = self._n_breadcrumbs_truncated
252+
rv._gen_ai_messages_truncated = self._gen_ai_messages_truncated.copy()
250253
rv._event_processors = self._event_processors.copy()
251254
rv._error_processors = self._error_processors.copy()
252255
rv._propagation_context = self._propagation_context
@@ -1583,6 +1586,8 @@ def update_from_scope(self, scope):
15831586
self._n_breadcrumbs_truncated = (
15841587
self._n_breadcrumbs_truncated + scope._n_breadcrumbs_truncated
15851588
)
1589+
if scope._gen_ai_messages_truncated:
1590+
self._gen_ai_messages_truncated.update(scope._gen_ai_messages_truncated)
15861591
if scope._span:
15871592
self._span = scope._span
15881593
if scope._attachments:

tests/integrations/openai/test_openai.py

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import pytest
23

34
from sentry_sdk.utils import package_version
@@ -7,11 +8,6 @@
78
except ImportError:
89
NOT_GIVEN = None
910

10-
try:
11-
from openai import omit
12-
except ImportError:
13-
omit = None
14-
1511
from openai import AsyncOpenAI, OpenAI, AsyncStream, Stream, OpenAIError
1612
from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding
1713
from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk
@@ -44,6 +40,9 @@
4440
OpenAIIntegration,
4541
_calculate_token_usage,
4642
)
43+
from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES
44+
from sentry_sdk._types import AnnotatedValue
45+
from sentry_sdk.serializer import serialize
4746

4847
from unittest import mock # python 3.3 and above
4948

@@ -1429,7 +1428,7 @@ async def test_streaming_responses_api_async(
14291428
)
14301429
@pytest.mark.parametrize(
14311430
"tools",
1432-
[[], None, NOT_GIVEN, omit],
1431+
[[], None, NOT_GIVEN],
14331432
)
14341433
def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools):
14351434
sentry_init(
@@ -1456,6 +1455,7 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools):
14561455

14571456
def test_openai_message_role_mapping(sentry_init, capture_events):
14581457
"""Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'"""
1458+
14591459
sentry_init(
14601460
integrations=[OpenAIIntegration(include_prompts=True)],
14611461
traces_sample_rate=1.0,
@@ -1465,7 +1465,6 @@ def test_openai_message_role_mapping(sentry_init, capture_events):
14651465

14661466
client = OpenAI(api_key="z")
14671467
client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
1468-
14691468
# Test messages with mixed roles including "ai" that should be mapped to "assistant"
14701469
test_messages = [
14711470
{"role": "system", "content": "You are helpful."},
@@ -1476,11 +1475,9 @@ def test_openai_message_role_mapping(sentry_init, capture_events):
14761475

14771476
with start_transaction(name="openai tx"):
14781477
client.chat.completions.create(model="test-model", messages=test_messages)
1479-
1478+
# Verify that the span was created correctly
14801479
(event,) = events
14811480
span = event["spans"][0]
1482-
1483-
# Verify that the span was created correctly
14841481
assert span["op"] == "gen_ai.chat"
14851482
assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
14861483

@@ -1505,3 +1502,55 @@ def test_openai_message_role_mapping(sentry_init, capture_events):
15051502
# Verify no "ai" roles remain
15061503
roles = [msg["role"] for msg in stored_messages]
15071504
assert "ai" not in roles
1505+
1506+
1507+
def test_openai_message_truncation(sentry_init, capture_events):
1508+
"""Test that large messages are truncated properly in OpenAI integration."""
1509+
sentry_init(
1510+
integrations=[OpenAIIntegration(include_prompts=True)],
1511+
traces_sample_rate=1.0,
1512+
send_default_pii=True,
1513+
)
1514+
events = capture_events()
1515+
1516+
client = OpenAI(api_key="z")
1517+
client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
1518+
1519+
large_content = (
1520+
"This is a very long message that will exceed our size limits. " * 1000
1521+
)
1522+
large_messages = [
1523+
{"role": "system", "content": "You are a helpful assistant."},
1524+
{"role": "user", "content": large_content},
1525+
{"role": "assistant", "content": large_content},
1526+
{"role": "user", "content": large_content},
1527+
]
1528+
1529+
with start_transaction(name="openai tx"):
1530+
client.chat.completions.create(
1531+
model="some-model",
1532+
messages=large_messages,
1533+
)
1534+
1535+
(event,) = events
1536+
span = event["spans"][0]
1537+
assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
1538+
1539+
messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
1540+
assert isinstance(messages_data, str)
1541+
1542+
parsed_messages = json.loads(messages_data)
1543+
assert isinstance(parsed_messages, list)
1544+
assert len(parsed_messages) <= len(large_messages)
1545+
1546+
if "_meta" in event and len(parsed_messages) < len(large_messages):
1547+
meta_path = event["_meta"]
1548+
if (
1549+
"spans" in meta_path
1550+
and "0" in meta_path["spans"]
1551+
and "data" in meta_path["spans"]["0"]
1552+
):
1553+
span_meta = meta_path["spans"]["0"]["data"]
1554+
if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_meta:
1555+
messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES]
1556+
assert "len" in messages_meta.get("", {})

0 commit comments

Comments
 (0)