feat(anthropic) update span attributes from old AI attributes to new GEN_AI ones

constantinius · constantinius · commit 1af3203fb161 · 2025-08-05T14:30:57.000+02:00
Additional simplifications and streamlining by using common utilities
diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py
@@ -3,16 +3,19 @@
 
 import sentry_sdk
 from sentry_sdk.ai.monitoring import record_token_usage
+from sentry_sdk.ai.utils import set_data_normalized
 from sentry_sdk.consts import OP, SPANDATA
 from sentry_sdk.integrations import _check_minimum_version, DidNotEnable, Integration
 from sentry_sdk.scope import should_send_default_pii
 from sentry_sdk.utils import (
     capture_internal_exceptions,
     event_from_exception,
     package_version,
+    safe_serialize,
 )
 
 try:
+    from anthropic import NOT_GIVEN
     from anthropic.resources import AsyncMessages, Messages
 
     if TYPE_CHECKING:
@@ -53,8 +56,11 @@ def _capture_exception(exc):
     sentry_sdk.capture_event(event, hint=hint)
 
 
-def _calculate_token_usage(result, span):
-    # type: (Messages, Span) -> None
+def _get_token_usage(result):
+    # type: (Messages) -> tuple[int, int]
+    """
+    Get token usage from the Anthropic response.
+    """
     input_tokens = 0
     output_tokens = 0
     if hasattr(result, "usage"):
@@ -64,44 +70,21 @@ def _calculate_token_usage(result, span):
         if hasattr(usage, "output_tokens") and isinstance(usage.output_tokens, int):
             output_tokens = usage.output_tokens
 
-    total_tokens = input_tokens + output_tokens
+    return input_tokens, output_tokens
 
-    record_token_usage(
-        span,
-        input_tokens=input_tokens,
-        output_tokens=output_tokens,
-        total_tokens=total_tokens,
-    )
 
-
-def _get_responses(content):
-    # type: (list[Any]) -> list[dict[str, Any]]
+def _collect_ai_data(event, model, input_tokens, output_tokens, content_blocks):
+    # type: (MessageStreamEvent, str | None, int, int, list[str]) -> tuple[str | None, int, int, list[str]]
     """
-    Get JSON of a Anthropic responses.
-    """
-    responses = []
-    for item in content:
-        if hasattr(item, "text"):
-            responses.append(
-                {
-                    "type": item.type,
-                    "text": item.text,
-                }
-            )
-    return responses
-
-
-def _collect_ai_data(event, input_tokens, output_tokens, content_blocks):
-    # type: (MessageStreamEvent, int, int, list[str]) -> tuple[int, int, list[str]]
-    """
-    Count token usage and collect content blocks from the AI streaming response.
+    Collect model information, token usage, and collect content blocks from the AI streaming response.
     """
     with capture_internal_exceptions():
         if hasattr(event, "type"):
             if event.type == "message_start":
                 usage = event.message.usage
                 input_tokens += usage.input_tokens
                 output_tokens += usage.output_tokens
+                model = event.message.model or model
             elif event.type == "content_block_start":
                 pass
             elif event.type == "content_block_delta":
@@ -114,31 +97,69 @@ def _collect_ai_data(event, input_tokens, output_tokens, content_blocks):
             elif event.type == "message_delta":
                 output_tokens += event.usage.output_tokens
 
-    return input_tokens, output_tokens, content_blocks
+    return model, input_tokens, output_tokens, content_blocks
 
 
-def _add_ai_data_to_span(
-    span, integration, input_tokens, output_tokens, content_blocks
-):
-    # type: (Span, AnthropicIntegration, int, int, list[str]) -> None
+def _set_input_data(span, kwargs, integration):
+    # type: (Span, dict[str, Any], AnthropicIntegration) -> None
     """
-    Add token usage and content blocks from the AI streaming response to the span.
+    Set input data for the span based on the provided keyword arguments for the anthropic message creation.
     """
-    with capture_internal_exceptions():
-        if should_send_default_pii() and integration.include_prompts:
-            complete_message = "".join(content_blocks)
-            span.set_data(
-                SPANDATA.AI_RESPONSES,
-                [{"type": "text", "text": complete_message}],
-            )
-        total_tokens = input_tokens + output_tokens
-        record_token_usage(
-            span,
-            input_tokens=input_tokens,
-            output_tokens=output_tokens,
-            total_tokens=total_tokens,
+    messages = kwargs.get("messages")
+    if (
+        messages is not None
+        and len(messages) > 0
+        and should_send_default_pii()
+        and integration.include_prompts
+    ):
+        set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages)
+
+    kwargs_keys_to_attributes = {
+        "max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS,
+        "model": SPANDATA.GEN_AI_REQUEST_MODEL,
+        "stream": SPANDATA.GEN_AI_RESPONSE_STREAMING,
+        "temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE,
+        "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P,
+    }
+    for key, attribute in kwargs_keys_to_attributes.items():
+        value = kwargs.get(key)
+        if value is not NOT_GIVEN or value is not None:
+            set_data_normalized(span, attribute, value)
+
+    # Input attributes: Tools
+    tools = kwargs.get("tools")
+    if tools is not NOT_GIVEN and tools is not None and len(tools) > 0:
+        set_data_normalized(
+            span, SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize(tools)
         )
-        span.set_data(SPANDATA.AI_STREAMING, True)
+
+
+def _set_output_data(
+    span,
+    integration,
+    model,
+    input_tokens,
+    output_tokens,
+    content_blocks,
+    finish_span=True,
+):
+    # type: (Span, AnthropicIntegration, str | None, int | None, int | None, list[Any], bool) -> None
+    """
+    Set output data for the span based on the AI response."""
+    span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, model)
+    if should_send_default_pii() and integration.include_prompts:
+        set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, content_blocks)
+
+    record_token_usage(
+        span,
+        input_tokens=input_tokens,
+        output_tokens=output_tokens,
+    )
+
+    # TODO: GEN_AI_RESPONSE_TOOL_CALLS ?
+
+    if finish_span:
+        span.__exit__(None, None, None)
 
 
 def _sentry_patched_create_common(f, *args, **kwargs):
@@ -162,62 +183,76 @@ def _sentry_patched_create_common(f, *args, **kwargs):
     )
     span.__enter__()
 
-    result = yield f, args, kwargs
+    _set_input_data(span, kwargs, integration)
 
-    # add data to span and finish it
-    messages = list(kwargs["messages"])
-    model = kwargs.get("model")
+    result = yield f, args, kwargs
 
     with capture_internal_exceptions():
-        span.set_data(SPANDATA.AI_MODEL_ID, model)
-        span.set_data(SPANDATA.AI_STREAMING, False)
-
-        if should_send_default_pii() and integration.include_prompts:
-            span.set_data(SPANDATA.AI_INPUT_MESSAGES, messages)
-
         if hasattr(result, "content"):
-            if should_send_default_pii() and integration.include_prompts:
-                span.set_data(SPANDATA.AI_RESPONSES, _get_responses(result.content))
-            _calculate_token_usage(result, span)
-            span.__exit__(None, None, None)
+            input_tokens, output_tokens = _get_token_usage(result)
+            _set_output_data(
+                span,
+                integration,
+                getattr(result, "model", None),
+                input_tokens,
+                output_tokens,
+                content_blocks=result.content,
+                finish_span=True,
+            )
 
         # Streaming response
         elif hasattr(result, "_iterator"):
             old_iterator = result._iterator
 
             def new_iterator():
                 # type: () -> Iterator[MessageStreamEvent]
+                model = None
                 input_tokens = 0
                 output_tokens = 0
                 content_blocks = []  # type: list[str]
 
                 for event in old_iterator:
-                    input_tokens, output_tokens, content_blocks = _collect_ai_data(
-                        event, input_tokens, output_tokens, content_blocks
+                    model, input_tokens, output_tokens, content_blocks = (
+                        _collect_ai_data(
+                            event, model, input_tokens, output_tokens, content_blocks
+                        )
                     )
                     yield event
 
-                _add_ai_data_to_span(
-                    span, integration, input_tokens, output_tokens, content_blocks
+                _set_output_data(
+                    span,
+                    integration,
+                    model=model,
+                    input_tokens=input_tokens,
+                    output_tokens=output_tokens,
+                    content_blocks=content_blocks,
+                    finish_span=True,
                 )
-                span.__exit__(None, None, None)
 
             async def new_iterator_async():
                 # type: () -> AsyncIterator[MessageStreamEvent]
+                model = None
                 input_tokens = 0
                 output_tokens = 0
                 content_blocks = []  # type: list[str]
 
                 async for event in old_iterator:
-                    input_tokens, output_tokens, content_blocks = _collect_ai_data(
-                        event, input_tokens, output_tokens, content_blocks
+                    model, input_tokens, output_tokens, content_blocks = (
+                        _collect_ai_data(
+                            event, model, input_tokens, output_tokens, content_blocks
+                        )
                     )
                     yield event
 
-                _add_ai_data_to_span(
-                    span, integration, input_tokens, output_tokens, content_blocks
+                _set_output_data(
+                    span,
+                    integration,
+                    model=model,
+                    input_tokens=input_tokens,
+                    output_tokens=output_tokens,
+                    content_blocks=content_blocks,
+                    finish_span=True,
                 )
-                span.__exit__(None, None, None)
 
             if str(type(result._iterator)) == "<class 'async_generator'>":
                 result._iterator = new_iterator_async()