Merge branch 'master' into antonpirker/openai-token-usage

antonpirker · antonpirker · commit 6b816f748c1b · 2025-07-15T14:17:45.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,15 @@
 # Changelog
 
+## 2.33.0
+
+### Various fixes & improvements
+
+- feat(langchain): Support `BaseCallbackManager` (#4486) by @szokeasaurusrex
+- Use `span.data` instead of `measurements` for token usage (#4567) by @antonpirker
+- Fix custom model name (#4569) by @antonpirker
+- fix: shut down "session flusher" more promptly (#4561) by @bukzor
+- chore: Remove Lambda urllib3 pin on Python 3.10+ (#4549) by @sentrivana
+
 ## 2.32.0
 
 ### Various fixes & improvements
diff --git a/docs/conf.py b/docs/conf.py
@@ -31,7 +31,7 @@
 copyright = "2019-{}, Sentry Team and Contributors".format(datetime.now().year)
 author = "Sentry Team and Contributors"
 
-release = "2.32.0"
+release = "2.33.0"
 version = ".".join(release.split(".")[:2])  # The short X.Y version.
 
 
diff --git a/scripts/populate_tox/populate_tox.py b/scripts/populate_tox/populate_tox.py
@@ -508,7 +508,8 @@ def _compare_min_version_with_defined(
         ):
             print(
                 f"  Integration defines {defined_min_version} as minimum "
-                f"version, but the effective minimum version is {releases[0]}."
+                f"version, but the effective minimum version based on metadata "
+                f"is {releases[0]}."
             )
 
 
diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
@@ -1181,4 +1181,4 @@ def _get_default_options():
 del _get_default_options
 
 
-VERSION = "2.32.0"
+VERSION = "2.33.0"
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
@@ -23,6 +23,7 @@
     from langchain_core.callbacks import (
         manager,
         BaseCallbackHandler,
+        BaseCallbackManager,
         Callbacks,
     )
     from langchain_core.agents import AgentAction, AgentFinish
@@ -434,12 +435,20 @@ def new_configure(
                 **kwargs,
             )
 
-        callbacks_list = local_callbacks or []
-
-        if isinstance(callbacks_list, BaseCallbackHandler):
-            callbacks_list = [callbacks_list]
-        elif not isinstance(callbacks_list, list):
-            logger.debug("Unknown callback type: %s", callbacks_list)
+        local_callbacks = local_callbacks or []
+
+        # Handle each possible type of local_callbacks. For each type, we
+        # extract the list of callbacks to check for SentryLangchainCallback,
+        # and define a function that would add the SentryLangchainCallback
+        # to the existing callbacks list.
+        if isinstance(local_callbacks, BaseCallbackManager):
+            callbacks_list = local_callbacks.handlers
+        elif isinstance(local_callbacks, BaseCallbackHandler):
+            callbacks_list = [local_callbacks]
+        elif isinstance(local_callbacks, list):
+            callbacks_list = local_callbacks
+        else:
+            logger.debug("Unknown callback type: %s", local_callbacks)
             # Just proceed with original function call
             return f(
                 callback_manager_cls,
@@ -449,28 +458,38 @@ def new_configure(
                 **kwargs,
             )
 
-        inheritable_callbacks_list = (
-            inheritable_callbacks if isinstance(inheritable_callbacks, list) else []
-        )
+        # Handle each possible type of inheritable_callbacks.
+        if isinstance(inheritable_callbacks, BaseCallbackManager):
+            inheritable_callbacks_list = inheritable_callbacks.handlers
+        elif isinstance(inheritable_callbacks, list):
+            inheritable_callbacks_list = inheritable_callbacks
+        else:
+            inheritable_callbacks_list = []
 
         if not any(
             isinstance(cb, SentryLangchainCallback)
             for cb in itertools.chain(callbacks_list, inheritable_callbacks_list)
         ):
-            # Avoid mutating the existing callbacks list
-            callbacks_list = [
-                *callbacks_list,
-                SentryLangchainCallback(
-                    integration.max_spans,
-                    integration.include_prompts,
-                    integration.tiktoken_encoding_name,
-                ),
-            ]
+            sentry_handler = SentryLangchainCallback(
+                integration.max_spans,
+                integration.include_prompts,
+                integration.tiktoken_encoding_name,
+            )
+            if isinstance(local_callbacks, BaseCallbackManager):
+                local_callbacks = local_callbacks.copy()
+                local_callbacks.handlers = [
+                    *local_callbacks.handlers,
+                    sentry_handler,
+                ]
+            elif isinstance(local_callbacks, BaseCallbackHandler):
+                local_callbacks = [local_callbacks, sentry_handler]
+            else:  # local_callbacks is a list
+                local_callbacks = [*local_callbacks, sentry_handler]
 
         return f(
             callback_manager_cls,
             inheritable_callbacks,
-            callbacks_list,
+            local_callbacks,
             *args,
             **kwargs,
         )
diff --git a/sentry_sdk/integrations/openai_agents/spans/ai_client.py b/sentry_sdk/integrations/openai_agents/spans/ai_client.py
@@ -19,9 +19,10 @@
 def ai_client_span(agent, get_response_kwargs):
     # type: (Agent, dict[str, Any]) -> sentry_sdk.tracing.Span
     # TODO-anton: implement other types of operations. Now "chat" is hardcoded.
+    model_name = agent.model.model if hasattr(agent.model, "model") else agent.model
     span = sentry_sdk.start_span(
         op=OP.GEN_AI_CHAT,
-        description=f"chat {agent.model}",
+        description=f"chat {model_name}",
         origin=SPAN_ORIGIN,
     )
     # TODO-anton: remove hardcoded stuff and replace something that also works for embedding and so on
diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py
@@ -53,7 +53,8 @@ def _set_agent_data(span, agent):
         )
 
     if agent.model:
-        span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, agent.model)
+        model_name = agent.model.model if hasattr(agent.model, "model") else agent.model
+        span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model_name)
 
     if agent.model_settings.presence_penalty:
         span.set_data(
diff --git a/sentry_sdk/sessions.py b/sentry_sdk/sessions.py
@@ -1,7 +1,6 @@
 import os
-import time
 import warnings
-from threading import Thread, Lock
+from threading import Thread, Lock, Event
 from contextlib import contextmanager
 
 import sentry_sdk
@@ -162,7 +161,7 @@ def __init__(
         self._thread_lock = Lock()
         self._aggregate_lock = Lock()
         self._thread_for_pid = None  # type: Optional[int]
-        self._running = True
+        self.__shutdown_requested = Event()
 
     def flush(self):
         # type: (...) -> None
@@ -208,10 +207,10 @@ def _ensure_running(self):
 
             def _thread():
                 # type: (...) -> None
-                while self._running:
-                    time.sleep(self.flush_interval)
-                    if self._running:
-                        self.flush()
+                running = True
+                while running:
+                    running = not self.__shutdown_requested.wait(self.flush_interval)
+                    self.flush()
 
             thread = Thread(target=_thread)
             thread.daemon = True
@@ -220,7 +219,7 @@ def _thread():
             except RuntimeError:
                 # Unfortunately at this point the interpreter is in a state that no
                 # longer allows us to spawn a thread and we have to bail.
-                self._running = False
+                self.__shutdown_requested.set()
                 return None
 
             self._thread = thread
@@ -271,7 +270,7 @@ def add_session(
 
     def kill(self):
         # type: (...) -> None
-        self._running = False
+        self.__shutdown_requested.set()
 
     def __del__(self):
         # type: (...) -> None
diff --git a/setup.py b/setup.py
@@ -21,7 +21,7 @@ def get_file_text(file_name):
 
 setup(
     name="sentry-sdk",
-    version="2.32.0",
+    version="2.33.0",
     author="Sentry Team and Contributors",
     author_email="hello@sentry.io",
     url="https://github.com/getsentry/sentry-python",
diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
@@ -125,9 +125,9 @@ def test_nonstreaming_create_message(
         assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
         assert SPANDATA.AI_RESPONSES not in span["data"]
 
-    assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 10
-    assert span["measurements"]["ai_completion_tokens_used"]["value"] == 20
-    assert span["measurements"]["ai_total_tokens_used"]["value"] == 30
+    assert span["data"]["gen_ai.usage.input_tokens"] == 10
+    assert span["data"]["gen_ai.usage.output_tokens"] == 20
+    assert span["data"]["gen_ai.usage.total_tokens"] == 30
     assert span["data"][SPANDATA.AI_STREAMING] is False
 
 
@@ -193,9 +193,9 @@ async def test_nonstreaming_create_message_async(
         assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
         assert SPANDATA.AI_RESPONSES not in span["data"]
 
-    assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 10
-    assert span["measurements"]["ai_completion_tokens_used"]["value"] == 20
-    assert span["measurements"]["ai_total_tokens_used"]["value"] == 30
+    assert span["data"]["gen_ai.usage.input_tokens"] == 10
+    assert span["data"]["gen_ai.usage.output_tokens"] == 20
+    assert span["data"]["gen_ai.usage.total_tokens"] == 30
     assert span["data"][SPANDATA.AI_STREAMING] is False
 
 
@@ -293,9 +293,9 @@ def test_streaming_create_message(
         assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
         assert SPANDATA.AI_RESPONSES not in span["data"]
 
-    assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 10
-    assert span["measurements"]["ai_completion_tokens_used"]["value"] == 30
-    assert span["measurements"]["ai_total_tokens_used"]["value"] == 40
+    assert span["data"]["gen_ai.usage.input_tokens"] == 10
+    assert span["data"]["gen_ai.usage.output_tokens"] == 30
+    assert span["data"]["gen_ai.usage.total_tokens"] == 40
     assert span["data"][SPANDATA.AI_STREAMING] is True
 
 
@@ -396,9 +396,9 @@ async def test_streaming_create_message_async(
         assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
         assert SPANDATA.AI_RESPONSES not in span["data"]
 
-    assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 10
-    assert span["measurements"]["ai_completion_tokens_used"]["value"] == 30
-    assert span["measurements"]["ai_total_tokens_used"]["value"] == 40
+    assert span["data"]["gen_ai.usage.input_tokens"] == 10
+    assert span["data"]["gen_ai.usage.output_tokens"] == 30
+    assert span["data"]["gen_ai.usage.total_tokens"] == 40
     assert span["data"][SPANDATA.AI_STREAMING] is True
 
 
@@ -525,9 +525,9 @@ def test_streaming_create_message_with_input_json_delta(
         assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
         assert SPANDATA.AI_RESPONSES not in span["data"]
 
-    assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 366
-    assert span["measurements"]["ai_completion_tokens_used"]["value"] == 51
-    assert span["measurements"]["ai_total_tokens_used"]["value"] == 417
+    assert span["data"]["gen_ai.usage.input_tokens"] == 366
+    assert span["data"]["gen_ai.usage.output_tokens"] == 51
+    assert span["data"]["gen_ai.usage.total_tokens"] == 417
     assert span["data"][SPANDATA.AI_STREAMING] is True
 
 
@@ -662,9 +662,9 @@ async def test_streaming_create_message_with_input_json_delta_async(
         assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
         assert SPANDATA.AI_RESPONSES not in span["data"]
 
-    assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 366
-    assert span["measurements"]["ai_completion_tokens_used"]["value"] == 51
-    assert span["measurements"]["ai_total_tokens_used"]["value"] == 417
+    assert span["data"]["gen_ai.usage.input_tokens"] == 366
+    assert span["data"]["gen_ai.usage.output_tokens"] == 51
+    assert span["data"]["gen_ai.usage.total_tokens"] == 417
     assert span["data"][SPANDATA.AI_STREAMING] is True
 
 
@@ -807,10 +807,10 @@ def test_add_ai_data_to_span_with_input_json_delta(sentry_init):
             content_blocks=["{'test': 'data',", "'more': 'json'}"],
         )
 
-        assert span._data.get(SPANDATA.AI_RESPONSES) == [
+        assert span._data.get("ai.responses") == [
             {"type": "text", "text": "{'test': 'data','more': 'json'}"}
         ]
-        assert span._data.get(SPANDATA.AI_STREAMING) is True
-        assert span._measurements.get("ai_prompt_tokens_used")["value"] == 10
-        assert span._measurements.get("ai_completion_tokens_used")["value"] == 20
-        assert span._measurements.get("ai_total_tokens_used")["value"] == 30
+        assert span._data.get("ai.streaming") is True
+        assert span._data.get("gen_ai.usage.input_tokens") == 10
+        assert span._data.get("gen_ai.usage.output_tokens") == 20
+        assert span._data.get("gen_ai.usage.total_tokens") == 30
diff --git a/tests/integrations/cohere/test_cohere.py b/tests/integrations/cohere/test_cohere.py
@@ -64,9 +64,9 @@ def test_nonstreaming_chat(
         assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
         assert SPANDATA.AI_RESPONSES not in span["data"]
 
-    assert span["measurements"]["ai_completion_tokens_used"]["value"] == 10
-    assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 20
-    assert span["measurements"]["ai_total_tokens_used"]["value"] == 30
+    assert span["data"]["gen_ai.usage.output_tokens"] == 10
+    assert span["data"]["gen_ai.usage.input_tokens"] == 20
+    assert span["data"]["gen_ai.usage.total_tokens"] == 30
 
 
 # noinspection PyTypeChecker
@@ -135,9 +135,9 @@ def test_streaming_chat(sentry_init, capture_events, send_default_pii, include_p
         assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
         assert SPANDATA.AI_RESPONSES not in span["data"]
 
-    assert span["measurements"]["ai_completion_tokens_used"]["value"] == 10
-    assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 20
-    assert span["measurements"]["ai_total_tokens_used"]["value"] == 30
+    assert span["data"]["gen_ai.usage.output_tokens"] == 10
+    assert span["data"]["gen_ai.usage.input_tokens"] == 20
+    assert span["data"]["gen_ai.usage.total_tokens"] == 30
 
 
 def test_bad_chat(sentry_init, capture_events):
@@ -199,8 +199,8 @@ def test_embed(sentry_init, capture_events, send_default_pii, include_prompts):
     else:
         assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
 
-    assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 10
-    assert span["measurements"]["ai_total_tokens_used"]["value"] == 10
+    assert span["data"]["gen_ai.usage.input_tokens"] == 10
+    assert span["data"]["gen_ai.usage.total_tokens"] == 10
 
 
 def test_span_origin_chat(sentry_init, capture_events):
diff --git a/tests/integrations/excepthook/test_excepthook.py b/tests/integrations/excepthook/test_excepthook.py
@@ -42,7 +42,6 @@ def capture_envelope(self, envelope):
         subprocess.check_output([sys.executable, str(app)], stderr=subprocess.STDOUT)
 
     output = excinfo.value.output
-    print(output)
 
     assert b"ZeroDivisionError" in output
     assert b"LOL" in output
@@ -86,7 +85,6 @@ def capture_envelope(self, envelope):
         subprocess.check_output([sys.executable, str(app)], stderr=subprocess.STDOUT)
 
     output = excinfo.value.output
-    print(output)
 
     assert b"ZeroDivisionError" in output
     assert b"LOL" in output
diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -75,7 +75,7 @@ def test_nonstreaming_chat_completion(
         assert SPANDATA.AI_RESPONSES not in span["data"]
 
     if details_arg:
-        assert span["measurements"]["ai_total_tokens_used"]["value"] == 10
+        assert span["data"]["gen_ai.usage.total_tokens"] == 10
 
 
 @pytest.mark.parametrize(
@@ -134,7 +134,7 @@ def test_streaming_chat_completion(
         assert SPANDATA.AI_RESPONSES not in span["data"]
 
     if details_arg:
-        assert span["measurements"]["ai_total_tokens_used"]["value"] == 10
+        assert span["data"]["gen_ai.usage.total_tokens"] == 10
 
 
 def test_bad_chat_completion(sentry_init, capture_events):
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
diff --git a/tests/test_api.py b/tests/test_api.py
diff --git a/tests/test_client.py b/tests/test_client.py
diff --git a/tox.ini b/tox.ini

Original file line number	Diff line number	Diff line change
`@@ -508,7 +508,8 @@ def _compare_min_version_with_defined(`
`508`	`508`	`):`
`509`	`509`	`print(`
`510`	`510`	`f" Integration defines {defined_min_version} as minimum "`
`511`		`- f"version, but the effective minimum version is {releases[0]}."`
	`511`	`+ f"version, but the effective minimum version based on metadata "`
	`512`	`+ f"is {releases[0]}."`
`512`	`513`	`)`
`513`	`514`
`514`	`515`
Original file line number	Diff line number	Diff line change
`@@ -1181,4 +1181,4 @@ def _get_default_options():`
`1181`	`1181`	`del _get_default_options`
`1182`	`1182`
`1183`	`1183`
`1184`		`-VERSION = "2.32.0"`
	`1184`	`+VERSION = "2.33.0"`
Original file line number	Diff line number	Diff line change
`@@ -53,7 +53,8 @@ def _set_agent_data(span, agent):`
`53`	`53`	`)`
`54`	`54`
`55`	`55`	`if agent.model:`
`56`		`- span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, agent.model)`
	`56`	`+ model_name = agent.model.model if hasattr(agent.model, "model") else agent.model`
	`57`	`+ span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model_name)`
`57`	`58`
`58`	`59`	`if agent.model_settings.presence_penalty:`
`59`	`60`	`span.set_data(`