Skip to content
137 changes: 136 additions & 1 deletion sentry_sdk/ai/utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
import json

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from typing import Any, Callable

from sentry_sdk.tracing import Span

from typing import TYPE_CHECKING

import sentry_sdk
from sentry_sdk.utils import logger

if TYPE_CHECKING:
from typing import Any, Dict, List, Optional

from sentry_sdk._types import AnnotatedValue
from sentry_sdk.serializer import serialize

MAX_GEN_AI_MESSAGE_BYTES = 20_000 # 20KB


class GEN_AI_ALLOWED_MESSAGE_ROLES:
SYSTEM = "system"
Expand Down Expand Up @@ -95,3 +105,128 @@ def get_start_span_function():
current_span is not None and current_span.containing_transaction is not None
)
return sentry_sdk.start_span if transaction_exists else sentry_sdk.start_transaction


def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
# type: (List[Dict[str, Any]], int) -> List[Dict[str, Any]]
"""
Truncate messages by removing the oldest ones until the serialized size is within limits.
If the last message is still too large, truncate its content instead of removing it entirely.

This function prioritizes keeping the most recent messages while ensuring the total
serialized size stays under the specified byte limit. It uses the Sentry serializer
to get accurate size estimates that match what will actually be sent.

Always preserves at least one message, even if content needs to be truncated.

:param messages: List of message objects (typically with 'role' and 'content' keys)
:param max_bytes: Maximum allowed size in bytes for the serialized messages
:returns: Truncated list of messages that fits within the size limit
"""
if not messages:
return messages

truncated_messages = list(messages)

# First, remove older messages until we're under the limit or have only one message left
while len(truncated_messages) > 1:
serialized = serialize(
truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8)
)
serialized_json = json.dumps(serialized, separators=(",", ":"))
current_size = len(serialized_json.encode("utf-8"))

if current_size <= max_bytes:
break

truncated_messages.pop(0) # Remove oldest message

# If we still have one message but it's too large, truncate its content
# This ensures we always preserve at least one message
if len(truncated_messages) == 1:
serialized = serialize(
truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8)
)
serialized_json = json.dumps(serialized, separators=(",", ":"))
current_size = len(serialized_json.encode("utf-8"))

if current_size > max_bytes:
# Truncate the content of the last message
last_message = truncated_messages[0].copy()
content = last_message.get("content", "")

if content and isinstance(content, str):
last_message["content"] = content[: int(max_bytes * 0.8)] + "..."
truncated_messages[0] = last_message

return truncated_messages


def serialize_gen_ai_messages(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
# type: (Optional[Any], int) -> Optional[str]
"""
Serialize and truncate gen_ai messages for storage in spans.

This function handles the complete workflow of:
1. Truncating messages to fit within size limits (if not already done)
2. Serializing them using Sentry's serializer (which processes AnnotatedValue for _meta)
3. Converting to JSON string for storage

:param messages: List of message objects, AnnotatedValue, or None
:param max_bytes: Maximum allowed size in bytes for the serialized messages
:returns: JSON string of serialized messages or None if input was None/empty
"""
if not messages:
return None

if isinstance(messages, AnnotatedValue):
serialized_messages = serialize(
messages, is_vars=False, max_value_length=round(max_bytes * 0.8)
)
return json.dumps(serialized_messages, separators=(",", ":"))

truncated_messages = truncate_messages_by_size(messages, max_bytes)
serialized_messages = serialize(
truncated_messages, is_vars=False, max_value_length=round(max_bytes * 0.8)
)

return json.dumps(serialized_messages)


def truncate_and_serialize_messages(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
# type: (Optional[List[Dict[str, Any]]], int) -> Any
"""
Truncate messages and return serialized string or AnnotatedValue for automatic _meta creation.

This function handles truncation and always returns serialized JSON strings. When truncation
occurs, it wraps the serialized string in an AnnotatedValue so that Sentry's serializer can
automatically create the appropriate _meta structure.

:param messages: List of message objects or None
:param max_bytes: Maximum allowed size in bytes for the serialized messages
:returns: JSON string, AnnotatedValue containing JSON string (if truncated), or None
"""
if not messages:
return None

truncated_messages = truncate_messages_by_size(messages, max_bytes)
if not truncated_messages:
return None

# Always serialize to JSON string
serialized_json = serialize_gen_ai_messages(truncated_messages, max_bytes)
if not serialized_json:
return None

original_count = len(messages)
truncated_count = len(truncated_messages)

# If truncation occurred, wrap the serialized string in AnnotatedValue for _meta
if original_count != truncated_count:
return AnnotatedValue(
value=serialized_json,
metadata={"len": original_count},
)

# No truncation, return plain serialized string
return serialized_json
10 changes: 4 additions & 6 deletions sentry_sdk/integrations/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from sentry_sdk.ai.utils import (
set_data_normalized,
normalize_message_roles,
truncate_and_serialize_messages,
get_start_span_function,
)
from sentry_sdk.consts import OP, SPANDATA, SPANSTATUS
Expand Down Expand Up @@ -145,12 +146,9 @@ def _set_input_data(span, kwargs, integration):
normalized_messages.append(message)

role_normalized_messages = normalize_message_roles(normalized_messages)
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
role_normalized_messages,
unpack=False,
)
serialized_messages = truncate_and_serialize_messages(role_normalized_messages)
if serialized_messages is not None:
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, serialized_messages)

set_data_normalized(
span, SPANDATA.GEN_AI_RESPONSE_STREAMING, kwargs.get("stream", False)
Expand Down
38 changes: 13 additions & 25 deletions sentry_sdk/integrations/langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
normalize_message_roles,
set_data_normalized,
get_start_span_function,
truncate_and_serialize_messages,
)
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.integrations import DidNotEnable, Integration
Expand Down Expand Up @@ -221,12 +222,9 @@ def on_llm_start(
}
for prompt in prompts
]
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalized_messages,
unpack=False,
)
messages_data = truncate_and_serialize_messages(normalized_messages)
if messages_data is not None:
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data)

def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
# type: (SentryLangchainCallback, Dict[str, Any], List[List[BaseMessage]], UUID, Any) -> Any
Expand Down Expand Up @@ -278,13 +276,9 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs):
self._normalize_langchain_message(message)
)
normalized_messages = normalize_message_roles(normalized_messages)

set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalized_messages,
unpack=False,
)
messages_data = truncate_and_serialize_messages(normalized_messages)
if messages_data is not None:
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data)

def on_chat_model_end(self, response, *, run_id, **kwargs):
# type: (SentryLangchainCallback, LLMResult, UUID, Any) -> Any
Expand Down Expand Up @@ -758,12 +752,9 @@ def new_invoke(self, *args, **kwargs):
and integration.include_prompts
):
normalized_messages = normalize_message_roles([input])
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalized_messages,
unpack=False,
)
messages_data = truncate_and_serialize_messages(normalized_messages)
if messages_data is not None:
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data)

output = result.get("output")
if (
Expand Down Expand Up @@ -813,12 +804,9 @@ def new_stream(self, *args, **kwargs):
and integration.include_prompts
):
normalized_messages = normalize_message_roles([input])
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalized_messages,
unpack=False,
)
messages_data = truncate_and_serialize_messages(normalized_messages)
if messages_data is not None:
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data)

# Run the agent
result = f(self, *args, **kwargs)
Expand Down
24 changes: 13 additions & 11 deletions sentry_sdk/integrations/langgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
from typing import Any, Callable, List, Optional

import sentry_sdk
from sentry_sdk.ai.utils import set_data_normalized, normalize_message_roles
from sentry_sdk.ai.utils import (
set_data_normalized,
normalize_message_roles,
truncate_and_serialize_messages,
)
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.integrations import DidNotEnable, Integration
from sentry_sdk.scope import should_send_default_pii
Expand Down Expand Up @@ -181,12 +185,11 @@ def new_invoke(self, *args, **kwargs):
input_messages = _parse_langgraph_messages(args[0])
if input_messages:
normalized_input_messages = normalize_message_roles(input_messages)
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalized_input_messages,
unpack=False,
messages_data = truncate_and_serialize_messages(
normalized_input_messages
)
if messages_data is not None:
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data)

result = f(self, *args, **kwargs)

Expand Down Expand Up @@ -232,12 +235,11 @@ async def new_ainvoke(self, *args, **kwargs):
input_messages = _parse_langgraph_messages(args[0])
if input_messages:
normalized_input_messages = normalize_message_roles(input_messages)
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalized_input_messages,
unpack=False,
messages_data = truncate_and_serialize_messages(
normalized_input_messages
)
if messages_data is not None:
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data)

result = await f(self, *args, **kwargs)

Expand Down
12 changes: 8 additions & 4 deletions sentry_sdk/integrations/litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
import sentry_sdk
from sentry_sdk import consts
from sentry_sdk.ai.monitoring import record_token_usage
from sentry_sdk.ai.utils import get_start_span_function, set_data_normalized
from sentry_sdk.ai.utils import (
get_start_span_function,
set_data_normalized,
truncate_and_serialize_messages,
)
from sentry_sdk.consts import SPANDATA
from sentry_sdk.integrations import DidNotEnable, Integration
from sentry_sdk.scope import should_send_default_pii
Expand Down Expand Up @@ -72,9 +76,9 @@ def _input_callback(kwargs):

# Record messages if allowed
if messages and should_send_default_pii() and integration.include_prompts:
set_data_normalized(
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False
)
messages_data = truncate_and_serialize_messages(messages)
if messages_data is not None:
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data)

# Record other parameters
params = {
Expand Down
12 changes: 8 additions & 4 deletions sentry_sdk/integrations/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
import sentry_sdk
from sentry_sdk import consts
from sentry_sdk.ai.monitoring import record_token_usage
from sentry_sdk.ai.utils import set_data_normalized, normalize_message_roles
from sentry_sdk.ai.utils import (
set_data_normalized,
normalize_message_roles,
truncate_and_serialize_messages,
)
from sentry_sdk.consts import SPANDATA
from sentry_sdk.integrations import DidNotEnable, Integration
from sentry_sdk.scope import should_send_default_pii
Expand Down Expand Up @@ -183,9 +187,9 @@ def _set_input_data(span, kwargs, operation, integration):
and integration.include_prompts
):
normalized_messages = normalize_message_roles(messages)
set_data_normalized(
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, normalized_messages, unpack=False
)
messages_data = truncate_and_serialize_messages(normalized_messages)
if messages_data is not None:
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data)

# Input attributes: Common
set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")
Expand Down
10 changes: 4 additions & 6 deletions sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
get_start_span_function,
set_data_normalized,
normalize_message_roles,
truncate_and_serialize_messages,
)
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.scope import should_send_default_pii
Expand Down Expand Up @@ -61,12 +62,9 @@ def invoke_agent_span(context, agent, kwargs):

if len(messages) > 0:
normalized_messages = normalize_message_roles(messages)
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalized_messages,
unpack=False,
)
messages_data = truncate_and_serialize_messages(normalized_messages)
if messages_data is not None:
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data)

_set_agent_data(span, agent)

Expand Down
11 changes: 5 additions & 6 deletions sentry_sdk/integrations/openai_agents/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
normalize_message_roles,
set_data_normalized,
normalize_message_role,
truncate_and_serialize_messages,
)
from sentry_sdk.consts import SPANDATA, SPANSTATUS, OP
from sentry_sdk.integrations import DidNotEnable
Expand Down Expand Up @@ -135,12 +136,10 @@ def _set_input_data(span, get_response_kwargs):
}
)

set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
normalize_message_roles(request_messages),
unpack=False,
)
role_normalized_messages = normalize_message_roles(request_messages)
messages_data = truncate_and_serialize_messages(role_normalized_messages)
if messages_data is not None:
span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data)


def _set_output_data(span, result):
Expand Down
Loading
Loading