diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py index 1f047b1c1d..43661e2432 100644 --- a/sentry_sdk/integrations/litellm.py +++ b/sentry_sdk/integrations/litellm.py @@ -3,7 +3,11 @@ import sentry_sdk from sentry_sdk import consts from sentry_sdk.ai.monitoring import record_token_usage -from sentry_sdk.ai.utils import get_start_span_function, set_data_normalized +from sentry_sdk.ai.utils import ( + get_start_span_function, + set_data_normalized, + truncate_and_annotate_messages, +) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -76,9 +80,12 @@ def _input_callback(kwargs): # Record messages if allowed messages = kwargs.get("messages", []) if messages and should_send_default_pii() and integration.include_prompts: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False - ) + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages(messages, span, scope) + if messages_data is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False + ) # Record other parameters params = { diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 19ae206c85..8e1ad21254 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -1,3 +1,4 @@ +import json import pytest from unittest import mock from datetime import datetime @@ -546,3 +547,61 @@ def dict(self): # Should have extracted the response message assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"] + + +def test_litellm_message_truncation(sentry_init, capture_events): + """Test that large messages are truncated properly in LiteLLM integration.""" + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + large_content = ( + "This is a very long message that will exceed our size limits. " * 1000 + ) + messages = [ + {"role": "user", "content": "small message 1"}, + {"role": "assistant", "content": large_content}, + {"role": "user", "content": large_content}, + {"role": "assistant", "content": "small message 4"}, + {"role": "user", "content": "small message 5"}, + ] + mock_response = MockCompletionResponse() + + with start_transaction(name="litellm test"): + kwargs = { + "model": "gpt-3.5-turbo", + "messages": messages, + } + + _input_callback(kwargs) + _success_callback( + kwargs, + mock_response, + datetime.now(), + datetime.now(), + ) + + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" + + chat_spans = [ + span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + ] + assert len(chat_spans) > 0 + + chat_span = chat_spans[0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + + messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) == 2 + assert "small message 4" in str(parsed_messages[0]) + assert "small message 5" in str(parsed_messages[1]) + assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5