Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions sentry_sdk/integrations/langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,15 @@
OllamaEmbeddings = None


def _get_ai_system(all_params: "Dict[str, Any]") -> "Optional[str]":
ai_type = all_params.get("_type")

if not ai_type or not isinstance(ai_type, str):
return None

return ai_type


DATA_FIELDS = {
"frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,
"function_call": SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
Expand Down Expand Up @@ -381,11 +390,9 @@
model,
)

ai_type = all_params.get("_type", "")
if "anthropic" in ai_type:
span.set_data(SPANDATA.GEN_AI_SYSTEM, "anthropic")
elif "openai" in ai_type:
span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai")
ai_system = _get_ai_system(all_params)
if ai_system:
span.set_data(SPANDATA.GEN_AI_SYSTEM, ai_system)

Check warning on line 395 in sentry_sdk/integrations/langchain.py

View check run for this annotation

@sentry/warden / warden: code-review

Breaking change: GEN_AI_SYSTEM values no longer normalized to provider names

The previous implementation normalized `_type` values to provider names (`"openai"`, `"anthropic"`), but now passes through raw LangChain `_type` values like `"openai-chat"`, `"anthropic-chat"`. This breaks consumers that filter spans by `gen_ai.system == "openai"` or `gen_ai.system == "anthropic"`. Other Sentry integrations (anthropic.py, openai.py, google_genai) use normalized provider names, creating inconsistency.

for key, attribute in DATA_FIELDS.items():
if key in all_params and all_params[key] is not None:
Expand Down Expand Up @@ -449,11 +456,9 @@
if model:
span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model)

ai_type = all_params.get("_type", "")
if "anthropic" in ai_type:
span.set_data(SPANDATA.GEN_AI_SYSTEM, "anthropic")
elif "openai" in ai_type:
span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai")
ai_system = _get_ai_system(all_params)
if ai_system:
span.set_data(SPANDATA.GEN_AI_SYSTEM, ai_system)

Check warning on line 461 in sentry_sdk/integrations/langchain.py

View check run for this annotation

@sentry/warden / warden: find-bugs

Breaking change: GEN_AI_SYSTEM span data values changed from 'openai'/'anthropic' to full _type strings

The old code extracted provider names ('anthropic', 'openai') using substring matching, but the new `_get_ai_system()` function returns the raw `_type` value (e.g., 'openai-chat', 'anthropic-chat'). This is a backwards-incompatible change that may break downstream systems expecting the old normalized values. Any dashboards, alerts, or queries filtering on `gen_ai.system == 'openai'` will no longer match these spans.

agent_name = _get_current_agent()
if agent_name:
Expand Down
88 changes: 88 additions & 0 deletions tests/integrations/langchain/test_langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -2000,6 +2000,94 @@ def test_transform_google_file_data(self):
}


@pytest.mark.parametrize(
"ai_type,expected_system",
[
# Real LangChain _type values (from _llm_type properties)
# OpenAI
("openai-chat", "openai-chat"),
("openai", "openai"),
# Azure OpenAI
("azure-openai-chat", "azure-openai-chat"),
("azure", "azure"),
# Anthropic
("anthropic-chat", "anthropic-chat"),
# Google
("vertexai", "vertexai"),
("chat-google-generative-ai", "chat-google-generative-ai"),
("google_gemini", "google_gemini"),
# AWS Bedrock
("amazon_bedrock_chat", "amazon_bedrock_chat"),
("amazon_bedrock", "amazon_bedrock"),
# Cohere
("cohere-chat", "cohere-chat"),
# Ollama
("chat-ollama", "chat-ollama"),
("ollama-llm", "ollama-llm"),
# Mistral
("mistralai-chat", "mistralai-chat"),
# Fireworks
("fireworks-chat", "fireworks-chat"),
("fireworks", "fireworks"),
# HuggingFace
("huggingface-chat-wrapper", "huggingface-chat-wrapper"),
# Groq
("groq-chat", "groq-chat"),
# NVIDIA
("chat-nvidia-ai-playground", "chat-nvidia-ai-playground"),
# xAI
("xai-chat", "xai-chat"),
# DeepSeek
("chat-deepseek", "chat-deepseek"),
# Edge cases
("", None),
(None, None),
],
)
def test_langchain_ai_system_detection(
sentry_init, capture_events, ai_type, expected_system
):
sentry_init(
integrations=[LangchainIntegration()],
traces_sample_rate=1.0,
)
events = capture_events()

callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)

run_id = "test-ai-system-uuid"
serialized = {"_type": ai_type} if ai_type is not None else {}
prompts = ["Test prompt"]

with start_transaction():
callback.on_llm_start(
serialized=serialized,
prompts=prompts,
run_id=run_id,
invocation_params={"_type": ai_type, "model": "test-model"},
)

generation = Mock(text="Test response", message=None)
response = Mock(generations=[[generation]])
callback.on_llm_end(response=response, run_id=run_id)

assert len(events) > 0
tx = events[0]
assert tx["type"] == "transaction"

llm_spans = [
span for span in tx.get("spans", []) if span.get("op") == "gen_ai.pipeline"
]
assert len(llm_spans) > 0

llm_span = llm_spans[0]

if expected_system is not None:
assert llm_span["data"][SPANDATA.GEN_AI_SYSTEM] == expected_system
else:
assert SPANDATA.GEN_AI_SYSTEM not in llm_span.get("data", {})


class TestTransformLangchainMessageContent:
"""Tests for _transform_langchain_message_content function."""

Expand Down
Loading