Skip to content

Commit cf6f70b

Browse files
committed
wip
1 parent ed2f642 commit cf6f70b

File tree

3 files changed

+46
-9
lines changed

3 files changed

+46
-9
lines changed

sentry_sdk/ai/utils.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,22 +112,51 @@ def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
112112
if not messages:
113113
return messages
114114

115-
# make a list out of the messages in case it's just a string? why is this needed?
116115
truncated_messages = list(messages)
117116

118-
# while there is more than one message, serialize and measure the size, and if it's too big, remove the oldest message
119117
while len(truncated_messages) > 1:
120-
serialized = serialize(
121-
truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8)
122-
)
123-
serialized_json = json.dumps(serialized, separators=(",", ":"))
118+
serialized_json = json.dumps(truncated_messages, separators=(",", ":"))
124119
current_size = len(serialized_json.encode("utf-8"))
125120

126121
if current_size <= max_bytes:
127122
break
128123

129124
truncated_messages.pop(0)
130125

126+
serialized_json = json.dumps(truncated_messages, separators=(",", ":"))
127+
current_size = len(serialized_json.encode("utf-8"))
128+
129+
if current_size > max_bytes and len(truncated_messages) == 1:
130+
message = truncated_messages[0].copy()
131+
content = message.get("content", "")
132+
133+
if isinstance(content, str):
134+
max_content_length = max_bytes // 2
135+
while True:
136+
message["content"] = content[:max_content_length]
137+
test_json = json.dumps([message], separators=(",", ":"))
138+
if len(test_json.encode("utf-8")) <= max_bytes:
139+
break
140+
max_content_length = int(max_content_length * 0.9)
141+
if max_content_length < 100:
142+
message["content"] = ""
143+
break
144+
145+
truncated_messages = [message]
146+
elif isinstance(content, list):
147+
content_copy = list(content)
148+
while len(content_copy) > 0:
149+
message["content"] = content_copy
150+
test_json = json.dumps([message], separators=(",", ":"))
151+
if len(test_json.encode("utf-8")) <= max_bytes:
152+
break
153+
content_copy = content_copy[:-1]
154+
155+
if len(content_copy) == 0:
156+
message["content"] = []
157+
158+
truncated_messages = [message]
159+
131160
return truncated_messages
132161

133162

sentry_sdk/integrations/openai_agents/utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,9 @@ def _set_input_data(span, get_response_kwargs):
142142
role_normalized_messages, span, scope
143143
)
144144
if messages_data is not None:
145-
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data)
145+
set_data_normalized(
146+
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
147+
)
146148

147149

148150
def _set_output_data(span, result):

tests/integrations/openai_agents/test_openai_agents.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,10 @@ async def test_agent_invocation_span(
143143
assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
144144

145145
assert invoke_agent_span["description"] == "invoke_agent test_agent"
146-
assert invoke_agent_span["data"]["gen_ai.request.messages"] == [
146+
messages_data = invoke_agent_span["data"]["gen_ai.request.messages"]
147+
assert isinstance(messages_data, str)
148+
parsed_messages = json.loads(messages_data)
149+
assert parsed_messages == [
147150
{
148151
"content": [{"text": "You are a helpful test assistant.", "type": "text"}],
149152
"role": "system",
@@ -484,7 +487,10 @@ def simple_test_tool(message: str) -> str:
484487
assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent"
485488
assert ai_client_span1["data"]["gen_ai.request.available_tools"] == available_tools
486489
assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100
487-
assert ai_client_span1["data"]["gen_ai.request.messages"] == [
490+
messages_data = ai_client_span1["data"]["gen_ai.request.messages"]
491+
assert isinstance(messages_data, str)
492+
parsed_messages = json.loads(messages_data)
493+
assert parsed_messages == [
488494
{
489495
"role": "system",
490496
"content": [{"type": "text", "text": "You are a helpful test assistant."}],

0 commit comments

Comments
 (0)