Skip to content

Commit 6fb4fa5

Browse files
jgreer013claude
andcommitted
fix(litellm): bound the off-band span registry
Address self-review: the module-level span registry was only evicted by the terminal success/failure callback, so a call abandoned before a terminal callback fires (e.g. a stream the caller stops iterating) leaked its Span entry -- holding prompt data -- for the process lifetime. The prior kwargs-scoped storage was GC'd with the request, so this was a regression. Back the registry with an OrderedDict capped at _MAX_TRACKED_SPANS and evict oldest-first in _store_span, so abandoned calls cannot grow it unbounded. A WeakValueDictionary is not an option here: Span/Transaction objects are not weakly referenceable. Add tests for the bound, terminal-callback cleanup, and the litellm_call_id-absent fallback key; correct the registry comment. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent ef29c5f commit 6fb4fa5

2 files changed

Lines changed: 122 additions & 3 deletions

File tree

sentry_sdk/integrations/litellm.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import copy
22
import threading
3+
from collections import OrderedDict
34
from typing import TYPE_CHECKING
45

56
import sentry_sdk
@@ -40,8 +41,11 @@
4041
# to the provider. `litellm_call_id` is a per-request UUID that stays stable
4142
# across the input/success/failure callbacks; the identity of the (shared)
4243
# callback kwargs dict is the fallback for direct callback invocations that omit
43-
# it. Entries are removed by the terminal success/failure callback.
44-
_spans_by_call = {} # type: Dict[Any, Any]
44+
# it. The terminal success/failure callback removes the entry; the registry is
45+
# capped (oldest evicted first) so calls abandoned before a terminal callback
46+
# fires -- e.g. a stream the caller stops iterating -- cannot grow it unbounded.
47+
_MAX_TRACKED_SPANS = 1024
48+
_spans_by_call = OrderedDict() # type: OrderedDict[Any, Any]
4549
_spans_by_call_lock = threading.Lock()
4650

4751

@@ -50,8 +54,12 @@ def _span_key(kwargs: "Dict[str, Any]") -> "Any":
5054

5155

5256
def _store_span(kwargs: "Dict[str, Any]", span: "Any") -> None:
57+
key = _span_key(kwargs)
5358
with _spans_by_call_lock:
54-
_spans_by_call[_span_key(kwargs)] = span
59+
_spans_by_call[key] = span
60+
_spans_by_call.move_to_end(key)
61+
while len(_spans_by_call) > _MAX_TRACKED_SPANS:
62+
_spans_by_call.popitem(last=False)
5563

5664

5765
def _peek_span(kwargs: "Dict[str, Any]") -> "Any":

tests/integrations/litellm/test_litellm.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ async def __call__(self, *args, **kwargs):
3939
from sentry_sdk import start_transaction
4040
from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
4141
from sentry_sdk.consts import OP, SPANDATA
42+
from sentry_sdk.integrations import litellm as litellm_integration
4243
from sentry_sdk.integrations.litellm import (
4344
LiteLLMIntegration,
4445
_convert_message_parts,
@@ -2587,6 +2588,116 @@ def test_caller_metadata_stays_json_serializable(
25872588
assert len(chat_spans) == 1
25882589

25892590

2591+
def test_span_registry_is_bounded():
2592+
"""The off-band span registry must stay bounded when calls are abandoned
2593+
before a terminal callback fires (e.g. an interrupted stream), so a
2594+
long-running process cannot accumulate Span objects without limit.
2595+
"""
2596+
registry = litellm_integration._spans_by_call
2597+
registry.clear()
2598+
try:
2599+
count = 5000
2600+
for i in range(count):
2601+
litellm_integration._store_span(
2602+
{"litellm_call_id": "call-{}".format(i)}, object()
2603+
)
2604+
2605+
# Bounded, not unbounded: the registry must not retain every entry.
2606+
assert len(registry) < count
2607+
# Most-recent entries are kept; the oldest are evicted first.
2608+
assert (
2609+
litellm_integration._peek_span(
2610+
{"litellm_call_id": "call-{}".format(count - 1)}
2611+
)
2612+
is not None
2613+
)
2614+
assert litellm_integration._peek_span({"litellm_call_id": "call-0"}) is None
2615+
finally:
2616+
registry.clear()
2617+
2618+
2619+
def test_span_registry_cleaned_up_after_terminal_callbacks(sentry_init):
2620+
"""Both terminal callbacks must remove the off-band registry entry, so a
2621+
completed or failed call leaves nothing behind."""
2622+
sentry_init(
2623+
integrations=[LiteLLMIntegration()],
2624+
disabled_integrations=[StdlibIntegration],
2625+
traces_sample_rate=1.0,
2626+
_experiments={"trace_lifecycle": "static"},
2627+
)
2628+
litellm_integration._spans_by_call.clear()
2629+
2630+
with start_transaction(name="litellm test"):
2631+
success_kwargs = {
2632+
"model": "gpt-3.5-turbo",
2633+
"messages": [{"role": "user", "content": "hi"}],
2634+
"litellm_call_id": "success-call",
2635+
}
2636+
_input_callback(success_kwargs)
2637+
assert "success-call" in litellm_integration._spans_by_call
2638+
_success_callback(
2639+
success_kwargs, MockCompletionResponse(), datetime.now(), datetime.now()
2640+
)
2641+
assert "success-call" not in litellm_integration._spans_by_call
2642+
2643+
failure_kwargs = {
2644+
"model": "gpt-3.5-turbo",
2645+
"messages": [{"role": "user", "content": "hi"}],
2646+
"litellm_call_id": "failure-call",
2647+
}
2648+
_input_callback(failure_kwargs)
2649+
assert "failure-call" in litellm_integration._spans_by_call
2650+
_failure_callback(
2651+
failure_kwargs, ValueError("boom"), datetime.now(), datetime.now()
2652+
)
2653+
assert "failure-call" not in litellm_integration._spans_by_call
2654+
2655+
2656+
def test_span_key_falls_back_to_kwargs_identity(sentry_init):
2657+
"""When litellm omits litellm_call_id (direct callback use), the shared
2658+
kwargs dict identity keys the registry, and distinct calls stay independent.
2659+
"""
2660+
sentry_init(
2661+
integrations=[LiteLLMIntegration()],
2662+
disabled_integrations=[StdlibIntegration],
2663+
traces_sample_rate=1.0,
2664+
_experiments={"trace_lifecycle": "static"},
2665+
)
2666+
litellm_integration._spans_by_call.clear()
2667+
2668+
with start_transaction(name="litellm test"):
2669+
kwargs_a = {
2670+
"model": "gpt-3.5-turbo",
2671+
"messages": [{"role": "user", "content": "a"}],
2672+
}
2673+
kwargs_b = {
2674+
"model": "gpt-3.5-turbo",
2675+
"messages": [{"role": "user", "content": "b"}],
2676+
}
2677+
2678+
_input_callback(kwargs_a)
2679+
_input_callback(kwargs_b)
2680+
2681+
# Distinct dicts (no litellm_call_id) get distinct keys -> no cross-talk.
2682+
span_a = litellm_integration._peek_span(kwargs_a)
2683+
span_b = litellm_integration._peek_span(kwargs_b)
2684+
assert span_a is not None
2685+
assert span_b is not None
2686+
assert span_a is not span_b
2687+
2688+
# Closing A leaves B's span intact.
2689+
_success_callback(
2690+
kwargs_a, MockCompletionResponse(), datetime.now(), datetime.now()
2691+
)
2692+
assert litellm_integration._peek_span(kwargs_a) is None
2693+
assert litellm_integration._peek_span(kwargs_b) is span_b
2694+
2695+
_success_callback(
2696+
kwargs_b, MockCompletionResponse(), datetime.now(), datetime.now()
2697+
)
2698+
assert litellm_integration._peek_span(kwargs_b) is None
2699+
2700+
25902701
def test_litellm_message_truncation(sentry_init, capture_events):
25912702
"""Test that large messages are truncated properly in LiteLLM integration."""
25922703
sentry_init(

0 commit comments

Comments
 (0)