diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py index 232b6aab83..553e703b62 100644 --- a/sentry_sdk/integrations/anthropic.py +++ b/sentry_sdk/integrations/anthropic.py @@ -1,9 +1,11 @@ +from collections.abc import Iterable from functools import wraps from typing import TYPE_CHECKING import sentry_sdk from sentry_sdk.ai.monitoring import record_token_usage from sentry_sdk.ai.utils import ( + GEN_AI_ALLOWED_MESSAGE_ROLES, set_data_normalized, normalize_message_roles, truncate_and_annotate_messages, @@ -39,7 +41,7 @@ raise DidNotEnable("Anthropic not installed") if TYPE_CHECKING: - from typing import Any, AsyncIterator, Iterator + from typing import Any, AsyncIterator, Iterator, List, Optional, Union from sentry_sdk.tracing import Span @@ -122,6 +124,7 @@ def _set_input_data(span, kwargs, integration): """ Set input data for the span based on the provided keyword arguments for the anthropic message creation. """ + system_prompt = kwargs.get("system") messages = kwargs.get("messages") if ( messages is not None @@ -130,9 +133,31 @@ def _set_input_data(span, kwargs, integration): and integration.include_prompts ): normalized_messages = [] + if system_prompt: + system_prompt_content = None # type: Optional[Union[str, List[dict[str, Any]]]] + if isinstance(system_prompt, str): + system_prompt_content = system_prompt + elif isinstance(system_prompt, Iterable): + system_prompt_content = [] + for item in system_prompt: + if ( + isinstance(item, dict) + and item.get("type") == "text" + and item.get("text") + ): + system_prompt_content.append(item.copy()) + + if system_prompt_content: + normalized_messages.append( + { + "role": GEN_AI_ALLOWED_MESSAGE_ROLES.SYSTEM, + "content": system_prompt_content, + } + ) + for message in messages: if ( - message.get("role") == "user" + message.get("role") == GEN_AI_ALLOWED_MESSAGE_ROLES.USER and "content" in message and isinstance(message["content"], (list, tuple)) ): @@ -140,8 +165,8 @@ def _set_input_data(span, kwargs, integration): if item.get("type") == "tool_result": normalized_messages.append( { - "role": "tool", - "content": { + "role": GEN_AI_ALLOWED_MESSAGE_ROLES.TOOL, + "content": { # type: ignore[dict-item] "tool_use_id": item.get("tool_use_id"), "output": item.get("content"), }, diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 1fe2ff5d28..c1d449f892 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -996,3 +996,436 @@ def test_anthropic_message_truncation(sentry_init, capture_events): assert "small message 4" in str(parsed_messages[0]) assert "small message 5" in str(parsed_messages[1]) assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (True, False), + (False, True), + (False, False), + ], +) +def test_nonstreaming_create_message_with_system_prompt( + sentry_init, capture_events, send_default_pii, include_prompts +): + """Test that system prompts are properly captured in GEN_AI_REQUEST_MESSAGES.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + client = Anthropic(api_key="z") + client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) + + messages = [ + { + "role": "user", + "content": "Hello, Claude", + } + ] + + with start_transaction(name="anthropic"): + response = client.messages.create( + max_tokens=1024, + messages=messages, + model="model", + system="You are a helpful assistant.", + ) + + assert response == EXAMPLE_MESSAGE + usage = response.usage + + assert usage.input_tokens == 10 + assert usage.output_tokens == 20 + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + assert len(event["spans"]) == 1 + (span,) = event["spans"] + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(stored_messages) == 2 + # System message should be first + assert stored_messages[0]["role"] == "system" + assert stored_messages[0]["content"] == "You are a helpful assistant." + # User message should be second + assert stored_messages[1]["role"] == "user" + assert stored_messages[1]["content"] == "Hello, Claude" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (True, False), + (False, True), + (False, False), + ], +) +async def test_nonstreaming_create_message_with_system_prompt_async( + sentry_init, capture_events, send_default_pii, include_prompts +): + """Test that system prompts are properly captured in GEN_AI_REQUEST_MESSAGES (async).""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + client = AsyncAnthropic(api_key="z") + client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE) + + messages = [ + { + "role": "user", + "content": "Hello, Claude", + } + ] + + with start_transaction(name="anthropic"): + response = await client.messages.create( + max_tokens=1024, + messages=messages, + model="model", + system="You are a helpful assistant.", + ) + + assert response == EXAMPLE_MESSAGE + usage = response.usage + + assert usage.input_tokens == 10 + assert usage.output_tokens == 20 + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + assert len(event["spans"]) == 1 + (span,) = event["spans"] + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(stored_messages) == 2 + # System message should be first + assert stored_messages[0]["role"] == "system" + assert stored_messages[0]["content"] == "You are a helpful assistant." + # User message should be second + assert stored_messages[1]["role"] == "user" + assert stored_messages[1]["content"] == "Hello, Claude" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (True, False), + (False, True), + (False, False), + ], +) +def test_streaming_create_message_with_system_prompt( + sentry_init, capture_events, send_default_pii, include_prompts +): + """Test that system prompts are properly captured in streaming mode.""" + client = Anthropic(api_key="z") + returned_stream = Stream(cast_to=None, response=None, client=client) + returned_stream._iterator = [ + MessageStartEvent( + message=EXAMPLE_MESSAGE, + type="message_start", + ), + ContentBlockStartEvent( + type="content_block_start", + index=0, + content_block=TextBlock(type="text", text=""), + ), + ContentBlockDeltaEvent( + delta=TextDelta(text="Hi", type="text_delta"), + index=0, + type="content_block_delta", + ), + ContentBlockDeltaEvent( + delta=TextDelta(text="!", type="text_delta"), + index=0, + type="content_block_delta", + ), + ContentBlockDeltaEvent( + delta=TextDelta(text=" I'm Claude!", type="text_delta"), + index=0, + type="content_block_delta", + ), + ContentBlockStopEvent(type="content_block_stop", index=0), + MessageDeltaEvent( + delta=Delta(), + usage=MessageDeltaUsage(output_tokens=10), + type="message_delta", + ), + ] + + sentry_init( + integrations=[AnthropicIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + client.messages._post = mock.Mock(return_value=returned_stream) + + messages = [ + { + "role": "user", + "content": "Hello, Claude", + } + ] + + with start_transaction(name="anthropic"): + message = client.messages.create( + max_tokens=1024, + messages=messages, + model="model", + stream=True, + system="You are a helpful assistant.", + ) + + for _ in message: + pass + + assert message == returned_stream + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + assert len(event["spans"]) == 1 + (span,) = event["spans"] + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(stored_messages) == 2 + # System message should be first + assert stored_messages[0]["role"] == "system" + assert stored_messages[0]["content"] == "You are a helpful assistant." + # User message should be second + assert stored_messages[1]["role"] == "user" + assert stored_messages[1]["content"] == "Hello, Claude" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 30 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 40 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (True, False), + (False, True), + (False, False), + ], +) +async def test_streaming_create_message_with_system_prompt_async( + sentry_init, capture_events, send_default_pii, include_prompts +): + """Test that system prompts are properly captured in streaming mode (async).""" + client = AsyncAnthropic(api_key="z") + returned_stream = AsyncStream(cast_to=None, response=None, client=client) + returned_stream._iterator = async_iterator( + [ + MessageStartEvent( + message=EXAMPLE_MESSAGE, + type="message_start", + ), + ContentBlockStartEvent( + type="content_block_start", + index=0, + content_block=TextBlock(type="text", text=""), + ), + ContentBlockDeltaEvent( + delta=TextDelta(text="Hi", type="text_delta"), + index=0, + type="content_block_delta", + ), + ContentBlockDeltaEvent( + delta=TextDelta(text="!", type="text_delta"), + index=0, + type="content_block_delta", + ), + ContentBlockDeltaEvent( + delta=TextDelta(text=" I'm Claude!", type="text_delta"), + index=0, + type="content_block_delta", + ), + ContentBlockStopEvent(type="content_block_stop", index=0), + MessageDeltaEvent( + delta=Delta(), + usage=MessageDeltaUsage(output_tokens=10), + type="message_delta", + ), + ] + ) + + sentry_init( + integrations=[AnthropicIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + client.messages._post = AsyncMock(return_value=returned_stream) + + messages = [ + { + "role": "user", + "content": "Hello, Claude", + } + ] + + with start_transaction(name="anthropic"): + message = await client.messages.create( + max_tokens=1024, + messages=messages, + model="model", + stream=True, + system="You are a helpful assistant.", + ) + + async for _ in message: + pass + + assert message == returned_stream + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "anthropic" + + assert len(event["spans"]) == 1 + (span,) = event["spans"] + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat model" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(stored_messages) == 2 + # System message should be first + assert stored_messages[0]["role"] == "system" + assert stored_messages[0]["content"] == "You are a helpful assistant." + # User message should be second + assert stored_messages[1]["role"] == "user" + assert stored_messages[1]["content"] == "Hello, Claude" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 30 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 40 + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + +def test_system_prompt_with_complex_structure(sentry_init, capture_events): + """Test that complex system prompt structures (list of text blocks) are properly captured.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + client = Anthropic(api_key="z") + client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) + + # System prompt as list of text blocks + system_prompt = [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ] + + messages = [ + { + "role": "user", + "content": "Hello", + } + ] + + with start_transaction(name="anthropic"): + response = client.messages.create( + max_tokens=1024, messages=messages, model="model", system=system_prompt + ) + + assert response == EXAMPLE_MESSAGE + assert len(events) == 1 + (event,) = events + + assert len(event["spans"]) == 1 + (span,) = event["spans"] + + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + + # Should have system message first, then user message + assert len(stored_messages) == 2 + assert stored_messages[0]["role"] == "system" + # System content should be a list of text blocks + assert isinstance(stored_messages[0]["content"], list) + assert len(stored_messages[0]["content"]) == 2 + assert stored_messages[0]["content"][0]["type"] == "text" + assert stored_messages[0]["content"][0]["text"] == "You are a helpful assistant." + assert stored_messages[0]["content"][1]["type"] == "text" + assert stored_messages[0]["content"][1]["text"] == "Be concise and clear." + assert stored_messages[1]["role"] == "user" + assert stored_messages[1]["content"] == "Hello"