@@ -39,6 +39,7 @@ async def __call__(self, *args, **kwargs):
3939from sentry_sdk import start_transaction
4040from sentry_sdk ._types import BLOB_DATA_SUBSTITUTE
4141from sentry_sdk .consts import OP , SPANDATA
42+ from sentry_sdk .integrations import litellm as litellm_integration
4243from sentry_sdk .integrations .litellm import (
4344 LiteLLMIntegration ,
4445 _convert_message_parts ,
@@ -2587,6 +2588,116 @@ def test_caller_metadata_stays_json_serializable(
25872588 assert len (chat_spans ) == 1
25882589
25892590
2591+ def test_span_registry_is_bounded ():
2592+ """The off-band span registry must stay bounded when calls are abandoned
2593+ before a terminal callback fires (e.g. an interrupted stream), so a
2594+ long-running process cannot accumulate Span objects without limit.
2595+ """
2596+ registry = litellm_integration ._spans_by_call
2597+ registry .clear ()
2598+ try :
2599+ count = 5000
2600+ for i in range (count ):
2601+ litellm_integration ._store_span (
2602+ {"litellm_call_id" : "call-{}" .format (i )}, object ()
2603+ )
2604+
2605+ # Bounded, not unbounded: the registry must not retain every entry.
2606+ assert len (registry ) < count
2607+ # Most-recent entries are kept; the oldest are evicted first.
2608+ assert (
2609+ litellm_integration ._peek_span (
2610+ {"litellm_call_id" : "call-{}" .format (count - 1 )}
2611+ )
2612+ is not None
2613+ )
2614+ assert litellm_integration ._peek_span ({"litellm_call_id" : "call-0" }) is None
2615+ finally :
2616+ registry .clear ()
2617+
2618+
2619+ def test_span_registry_cleaned_up_after_terminal_callbacks (sentry_init ):
2620+ """Both terminal callbacks must remove the off-band registry entry, so a
2621+ completed or failed call leaves nothing behind."""
2622+ sentry_init (
2623+ integrations = [LiteLLMIntegration ()],
2624+ disabled_integrations = [StdlibIntegration ],
2625+ traces_sample_rate = 1.0 ,
2626+ _experiments = {"trace_lifecycle" : "static" },
2627+ )
2628+ litellm_integration ._spans_by_call .clear ()
2629+
2630+ with start_transaction (name = "litellm test" ):
2631+ success_kwargs = {
2632+ "model" : "gpt-3.5-turbo" ,
2633+ "messages" : [{"role" : "user" , "content" : "hi" }],
2634+ "litellm_call_id" : "success-call" ,
2635+ }
2636+ _input_callback (success_kwargs )
2637+ assert "success-call" in litellm_integration ._spans_by_call
2638+ _success_callback (
2639+ success_kwargs , MockCompletionResponse (), datetime .now (), datetime .now ()
2640+ )
2641+ assert "success-call" not in litellm_integration ._spans_by_call
2642+
2643+ failure_kwargs = {
2644+ "model" : "gpt-3.5-turbo" ,
2645+ "messages" : [{"role" : "user" , "content" : "hi" }],
2646+ "litellm_call_id" : "failure-call" ,
2647+ }
2648+ _input_callback (failure_kwargs )
2649+ assert "failure-call" in litellm_integration ._spans_by_call
2650+ _failure_callback (
2651+ failure_kwargs , ValueError ("boom" ), datetime .now (), datetime .now ()
2652+ )
2653+ assert "failure-call" not in litellm_integration ._spans_by_call
2654+
2655+
2656+ def test_span_key_falls_back_to_kwargs_identity (sentry_init ):
2657+ """When litellm omits litellm_call_id (direct callback use), the shared
2658+ kwargs dict identity keys the registry, and distinct calls stay independent.
2659+ """
2660+ sentry_init (
2661+ integrations = [LiteLLMIntegration ()],
2662+ disabled_integrations = [StdlibIntegration ],
2663+ traces_sample_rate = 1.0 ,
2664+ _experiments = {"trace_lifecycle" : "static" },
2665+ )
2666+ litellm_integration ._spans_by_call .clear ()
2667+
2668+ with start_transaction (name = "litellm test" ):
2669+ kwargs_a = {
2670+ "model" : "gpt-3.5-turbo" ,
2671+ "messages" : [{"role" : "user" , "content" : "a" }],
2672+ }
2673+ kwargs_b = {
2674+ "model" : "gpt-3.5-turbo" ,
2675+ "messages" : [{"role" : "user" , "content" : "b" }],
2676+ }
2677+
2678+ _input_callback (kwargs_a )
2679+ _input_callback (kwargs_b )
2680+
2681+ # Distinct dicts (no litellm_call_id) get distinct keys -> no cross-talk.
2682+ span_a = litellm_integration ._peek_span (kwargs_a )
2683+ span_b = litellm_integration ._peek_span (kwargs_b )
2684+ assert span_a is not None
2685+ assert span_b is not None
2686+ assert span_a is not span_b
2687+
2688+ # Closing A leaves B's span intact.
2689+ _success_callback (
2690+ kwargs_a , MockCompletionResponse (), datetime .now (), datetime .now ()
2691+ )
2692+ assert litellm_integration ._peek_span (kwargs_a ) is None
2693+ assert litellm_integration ._peek_span (kwargs_b ) is span_b
2694+
2695+ _success_callback (
2696+ kwargs_b , MockCompletionResponse (), datetime .now (), datetime .now ()
2697+ )
2698+ assert litellm_integration ._peek_span (kwargs_b ) is None
2699+
2700+
25902701def test_litellm_message_truncation (sentry_init , capture_events ):
25912702 """Test that large messages are truncated properly in LiteLLM integration."""
25922703 sentry_init (
0 commit comments