Merge branch 'antonpirker/openai-overhaul' into antonpirker/openai-responses-api

antonpirker · antonpirker · commit a7a5af320510 · 2025-07-21T16:11:08.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,16 @@
 # Changelog
 
+## 2.33.1
+
+### Various fixes & improvements
+
+- fix(integrations): allow explicit op parameter in `ai_track` (#4597) by @mshavliuk
+- fix: Fix `abs_path` bug in `serialize_frame` (#4599) by @szokeasaurusrex
+- Remove pyrsistent from test dependencies (#4588) by @musicinmybrain
+- Remove explicit `__del__`'s in threaded classes (#4590) by @sl0thentr0py
+- Remove forked from test_transport, separate gevent tests and generalize capturing_server to be module level (#4577) by @sl0thentr0py
+- Improve token usage recording (#4566) by @antonpirker
+
 ## 2.33.0
 
 ### Various fixes & improvements
diff --git a/docs/conf.py b/docs/conf.py
@@ -31,7 +31,7 @@
 copyright = "2019-{}, Sentry Team and Contributors".format(datetime.now().year)
 author = "Sentry Team and Contributors"
 
-release = "2.33.0"
+release = "2.33.1"
 version = ".".join(release.split(".")[:2])  # The short X.Y version.
 
 
diff --git a/requirements-testing.txt b/requirements-testing.txt
@@ -6,7 +6,6 @@ pytest-forked
 pytest-localserver
 pytest-watch
 jsonschema
-pyrsistent
 executing
 asttokens
 responses
diff --git a/sentry_sdk/ai/monitoring.py b/sentry_sdk/ai/monitoring.py
@@ -32,7 +32,7 @@ def decorator(f):
         def sync_wrapped(*args, **kwargs):
             # type: (Any, Any) -> Any
             curr_pipeline = _ai_pipeline_name.get()
-            op = span_kwargs.get("op", "ai.run" if curr_pipeline else "ai.pipeline")
+            op = span_kwargs.pop("op", "ai.run" if curr_pipeline else "ai.pipeline")
 
             with start_span(name=description, op=op, **span_kwargs) as span:
                 for k, v in kwargs.pop("sentry_tags", {}).items():
@@ -61,7 +61,7 @@ def sync_wrapped(*args, **kwargs):
         async def async_wrapped(*args, **kwargs):
             # type: (Any, Any) -> Any
             curr_pipeline = _ai_pipeline_name.get()
-            op = span_kwargs.get("op", "ai.run" if curr_pipeline else "ai.pipeline")
+            op = span_kwargs.pop("op", "ai.run" if curr_pipeline else "ai.pipeline")
 
             with start_span(name=description, op=op, **span_kwargs) as span:
                 for k, v in kwargs.pop("sentry_tags", {}).items():
diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
@@ -372,6 +372,12 @@ class SPANDATA:
     Example: "chat"
     """
 
+    GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"
+    """
+    Exact model identifier used to generate the response
+    Example: gpt-4o-mini-2024-07-18
+    """
+
     GEN_AI_RESPONSE_TEXT = "gen_ai.response.text"
     """
     The model's response text messages.
@@ -649,6 +655,7 @@ class OP:
     FUNCTION_AWS = "function.aws"
     FUNCTION_GCP = "function.gcp"
     GEN_AI_CHAT = "gen_ai.chat"
+    GEN_AI_EMBEDDINGS = "gen_ai.embeddings"
     GEN_AI_EXECUTE_TOOL = "gen_ai.execute_tool"
     GEN_AI_HANDOFF = "gen_ai.handoff"
     GEN_AI_INVOKE_AGENT = "gen_ai.invoke_agent"
@@ -675,8 +682,6 @@ class OP:
     MIDDLEWARE_STARLITE = "middleware.starlite"
     MIDDLEWARE_STARLITE_RECEIVE = "middleware.starlite.receive"
     MIDDLEWARE_STARLITE_SEND = "middleware.starlite.send"
-    OPENAI_CHAT_COMPLETIONS_CREATE = "ai.chat_completions.create.openai"
-    OPENAI_EMBEDDINGS_CREATE = "ai.embeddings.create.openai"
     HUGGINGFACE_HUB_CHAT_COMPLETIONS_CREATE = (
         "ai.chat_completions.create.huggingface_hub"
     )
@@ -1182,4 +1187,4 @@ def _get_default_options():
 del _get_default_options
 
 
-VERSION = "2.33.0"
+VERSION = "2.33.1"
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
@@ -171,8 +171,8 @@ def _new_chat_completion_common(f, *args, **kwargs):
     streaming = kwargs.get("stream")
 
     span = sentry_sdk.start_span(
-        op=consts.OP.OPENAI_CHAT_COMPLETIONS_CREATE,
-        name="Chat Completion",
+        op=consts.OP.GEN_AI_CHAT,
+        name=f"{consts.OP.GEN_AI_CHAT} {model}",
         origin=OpenAIIntegration.origin,
     )
     span.__enter__()
@@ -181,16 +181,16 @@ def _new_chat_completion_common(f, *args, **kwargs):
 
     with capture_internal_exceptions():
         if should_send_default_pii() and integration.include_prompts:
-            set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, messages)
+            set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages)
 
-        set_data_normalized(span, SPANDATA.AI_MODEL_ID, model)
+        set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model)
         set_data_normalized(span, SPANDATA.AI_STREAMING, streaming)
 
         if hasattr(res, "choices"):
             if should_send_default_pii() and integration.include_prompts:
                 set_data_normalized(
                     span,
-                    SPANDATA.AI_RESPONSES,
+                    SPANDATA.GEN_AI_RESPONSE_TEXT,
                     list(map(lambda x: x.message, res.choices)),
                 )
             _calculate_token_usage(messages, res, span, None, integration.count_tokens)
@@ -222,7 +222,7 @@ def new_iterator():
                         )
                         if should_send_default_pii() and integration.include_prompts:
                             set_data_normalized(
-                                span, SPANDATA.AI_RESPONSES, all_responses
+                                span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
                             )
                         _calculate_token_usage(
                             messages,
@@ -255,7 +255,7 @@ async def new_iterator_async():
                         )
                         if should_send_default_pii() and integration.include_prompts:
                             set_data_normalized(
-                                span, SPANDATA.AI_RESPONSES, all_responses
+                                span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
                             )
                         _calculate_token_usage(
                             messages,
@@ -353,24 +353,30 @@ def _new_embeddings_create_common(f, *args, **kwargs):
     if integration is None:
         return f(*args, **kwargs)
 
+    model = kwargs.get("model")
+
     with sentry_sdk.start_span(
-        op=consts.OP.OPENAI_EMBEDDINGS_CREATE,
-        description="OpenAI Embedding Creation",
+        op=consts.OP.GEN_AI_EMBEDDINGS,
+        name=f"{consts.OP.GEN_AI_EMBEDDINGS} {model}",
         origin=OpenAIIntegration.origin,
     ) as span:
         if "input" in kwargs and (
             should_send_default_pii() and integration.include_prompts
         ):
             if isinstance(kwargs["input"], str):
-                set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, [kwargs["input"]])
+                set_data_normalized(
+                    span, SPANDATA.GEN_AI_REQUEST_MESSAGES, [kwargs["input"]]
+                )
             elif (
                 isinstance(kwargs["input"], list)
                 and len(kwargs["input"]) > 0
                 and isinstance(kwargs["input"][0], str)
             ):
-                set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, kwargs["input"])
+                set_data_normalized(
+                    span, SPANDATA.GEN_AI_REQUEST_MESSAGES, kwargs["input"]
+                )
         if "model" in kwargs:
-            set_data_normalized(span, SPANDATA.AI_MODEL_ID, kwargs["model"])
+            set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, kwargs["model"])
 
         response = yield f, args, kwargs
 
diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py
@@ -591,9 +591,14 @@ def serialize_frame(
     if tb_lineno is None:
         tb_lineno = frame.f_lineno
 
+    try:
+        os_abs_path = os.path.abspath(abs_path) if abs_path else None
+    except Exception:
+        os_abs_path = None
+
     rv = {
         "filename": filename_for_module(module, abs_path) or None,
-        "abs_path": os.path.abspath(abs_path) if abs_path else None,
+        "abs_path": os_abs_path,
         "function": function or "<unknown>",
         "module": module,
         "lineno": tb_lineno,
diff --git a/setup.py b/setup.py
@@ -21,7 +21,7 @@ def get_file_text(file_name):
 
 setup(
     name="sentry-sdk",
-    version="2.33.0",
+    version="2.33.1",
     author="Sentry Team and Contributors",
     author_email="hello@sentry.io",
     url="https://github.com/getsentry/sentry-python",
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
@@ -137,14 +137,17 @@ def test_nonstreaming_chat_completion(
     tx = events[0]
     assert tx["type"] == "transaction"
     span = tx["spans"][0]
-    assert span["op"] == "ai.chat_completions.create.openai"
+    assert span["op"] == "gen_ai.chat"
 
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES]["content"]
-        assert "the model response" in span["data"][SPANDATA.AI_RESPONSES]["content"]
+        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]["content"]
+        assert (
+            "the model response"
+            in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]["content"]
+        )
     else:
-        assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
-        assert SPANDATA.AI_RESPONSES not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
 
     assert span["data"]["gen_ai.usage.output_tokens"] == 10
     assert span["data"]["gen_ai.usage.input_tokens"] == 20
@@ -179,14 +182,17 @@ async def test_nonstreaming_chat_completion_async(
     tx = events[0]
     assert tx["type"] == "transaction"
     span = tx["spans"][0]
-    assert span["op"] == "ai.chat_completions.create.openai"
+    assert span["op"] == "gen_ai.chat"
 
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES]["content"]
-        assert "the model response" in span["data"][SPANDATA.AI_RESPONSES]["content"]
+        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]["content"]
+        assert (
+            "the model response"
+            in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]["content"]
+        )
     else:
-        assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
-        assert SPANDATA.AI_RESPONSES not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
 
     assert span["data"]["gen_ai.usage.output_tokens"] == 10
     assert span["data"]["gen_ai.usage.input_tokens"] == 20
@@ -272,14 +278,14 @@ def test_streaming_chat_completion(
     tx = events[0]
     assert tx["type"] == "transaction"
     span = tx["spans"][0]
-    assert span["op"] == "ai.chat_completions.create.openai"
+    assert span["op"] == "gen_ai.chat"
 
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES]["content"]
-        assert "hello world" in span["data"][SPANDATA.AI_RESPONSES]
+        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]["content"]
+        assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
     else:
-        assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
-        assert SPANDATA.AI_RESPONSES not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
 
     try:
         import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
@@ -368,14 +374,14 @@ async def test_streaming_chat_completion_async(
     tx = events[0]
     assert tx["type"] == "transaction"
     span = tx["spans"][0]
-    assert span["op"] == "ai.chat_completions.create.openai"
+    assert span["op"] == "gen_ai.chat"
 
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES]["content"]
-        assert "hello world" in span["data"][SPANDATA.AI_RESPONSES]
+        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]["content"]
+        assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
     else:
-        assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
-        assert SPANDATA.AI_RESPONSES not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
 
     try:
         import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
@@ -459,11 +465,11 @@ def test_embeddings_create(
     tx = events[0]
     assert tx["type"] == "transaction"
     span = tx["spans"][0]
-    assert span["op"] == "ai.embeddings.create.openai"
+    assert span["op"] == "gen_ai.embeddings"
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES]
+        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     else:
-        assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
 
     assert span["data"]["gen_ai.usage.input_tokens"] == 20
     assert span["data"]["gen_ai.usage.total_tokens"] == 30
@@ -507,11 +513,11 @@ async def test_embeddings_create_async(
     tx = events[0]
     assert tx["type"] == "transaction"
     span = tx["spans"][0]
-    assert span["op"] == "ai.embeddings.create.openai"
+    assert span["op"] == "gen_ai.embeddings"
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES]
+        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
     else:
-        assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
 
     assert span["data"]["gen_ai.usage.input_tokens"] == 20
     assert span["data"]["gen_ai.usage.total_tokens"] == 30
diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py
@@ -119,3 +119,44 @@ async def async_pipeline():
     assert ai_pipeline_span["tags"]["user"] == "czyber"
     assert ai_pipeline_span["data"]["some_data"] == "value"
     assert ai_run_span["description"] == "my async tool"
+
+
+def test_ai_track_with_explicit_op(sentry_init, capture_events):
+    sentry_init(traces_sample_rate=1.0)
+    events = capture_events()
+
+    @ai_track("my tool", op="custom.operation")
+    def tool(**kwargs):
+        pass
+
+    with sentry_sdk.start_transaction():
+        tool()
+
+    transaction = events[0]
+    assert transaction["type"] == "transaction"
+    assert len(transaction["spans"]) == 1
+    span = transaction["spans"][0]
+
+    assert span["description"] == "my tool"
+    assert span["op"] == "custom.operation"
+
+
+@pytest.mark.asyncio
+async def test_ai_track_async_with_explicit_op(sentry_init, capture_events):
+    sentry_init(traces_sample_rate=1.0)
+    events = capture_events()
+
+    @ai_track("my async tool", op="custom.async.operation")
+    async def async_tool(**kwargs):
+        pass
+
+    with sentry_sdk.start_transaction():
+        await async_tool()
+
+    transaction = events[0]
+    assert transaction["type"] == "transaction"
+    assert len(transaction["spans"]) == 1
+    span = transaction["spans"][0]
+
+    assert span["description"] == "my async tool"
+    assert span["op"] == "custom.async.operation"