From 5b7771c84c0c09ebb25abe76fa5a40df64f42751 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Tue, 14 Oct 2025 10:17:25 +0200 Subject: [PATCH 1/3] fix(langchain): capture exceptions within calls to the LLM --- sentry_sdk/integrations/langchain.py | 15 +- .../integrations/langchain/test_langchain.py | 288 ++++++++++++++++++ 2 files changed, 300 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 724d908665..e075babf37 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -749,7 +749,12 @@ def new_invoke(self, *args, **kwargs): _set_tools_on_span(span, tools) # Run the agent - result = f(self, *args, **kwargs) + try: + result = f(self, *args, **kwargs) + except Exception as e: + run_id = kwargs.get("run_id") + self._handle_error(run_id, e) + raise e input = result.get("input") if ( @@ -820,8 +825,12 @@ def new_stream(self, *args, **kwargs): unpack=False, ) - # Run the agent - result = f(self, *args, **kwargs) + try: + result = f(self, *args, **kwargs) + except Exception as e: + run_id = kwargs.get("run_id") + self._handle_error(run_id, e) + raise e old_iterator = result diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 661208432f..ae47b41ba1 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -958,3 +958,291 @@ def test_langchain_message_role_normalization_units(): assert normalized[3]["role"] == "system" # system unchanged assert "role" not in normalized[4] # Message without role unchanged assert normalized[5] == "string message" # String message unchanged + + +def test_langchain_llm_exception_captured(sentry_init, capture_events): + """Test that exceptions during LLM execution are properly captured with full context.""" + global llm_type + llm_type = "openai-chat" + + sentry_init( + integrations=[LangchainIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + prompt = ChatPromptTemplate.from_messages( + [ + ("system", "You are a helpful assistant"), + ("user", "{input}"), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ] + ) + + global stream_result_mock + stream_result_mock = Mock(side_effect=RuntimeError("LLM service unavailable")) + + llm = MockOpenAI( + model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key="badkey", + ) + agent = create_openai_tools_agent(llm, [get_word_length], prompt) + agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) + + with start_transaction(name="test_llm_exception"): + with pytest.raises(RuntimeError): + list(agent_executor.stream({"input": "Test input"})) + + (error_event, transaction_event) = events + + assert error_event["level"] == "error" + assert "exception" in error_event + assert len(error_event["exception"]["values"]) > 0 + + exception = error_event["exception"]["values"][0] + assert exception["type"] == "RuntimeError" + assert exception["value"] == "LLM service unavailable" + assert "stacktrace" in exception + + assert transaction_event["type"] == "transaction" + assert transaction_event["transaction"] == "test_llm_exception" + assert transaction_event["contexts"]["trace"]["status"] == "error" + + +def test_langchain_different_exception_types(sentry_init, capture_events): + """Test that different exception types are properly captured.""" + global llm_type + llm_type = "openai-chat" + + exception_types = [ + (ValueError, "Invalid parameter"), + (TypeError, "Type mismatch"), + (RuntimeError, "Runtime error occurred"), + (Exception, "Generic exception"), + ] + + for exception_class, exception_message in exception_types: + sentry_init( + integrations=[LangchainIntegration(include_prompts=False)], + traces_sample_rate=1.0, + ) + events = capture_events() + + prompt = ChatPromptTemplate.from_messages( + [ + ("system", "You are a helpful assistant"), + ("user", "{input}"), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ] + ) + + global stream_result_mock + stream_result_mock = Mock(side_effect=exception_class(exception_message)) + + llm = MockOpenAI( + model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key="badkey", + ) + agent = create_openai_tools_agent(llm, [get_word_length], prompt) + agent_executor = AgentExecutor( + agent=agent, tools=[get_word_length], verbose=True + ) + + with start_transaction(): + with pytest.raises(exception_class): + list(agent_executor.stream({"input": "Test"})) + + assert len(events) >= 1 + error_event = events[0] + assert error_event["level"] == "error" + + exception = error_event["exception"]["values"][0] + assert exception["type"] == exception_class.__name__ + assert exception["value"] == exception_message + + +def test_langchain_exception_with_span_context(sentry_init, capture_events): + """Test that exception events include proper span context.""" + global llm_type + llm_type = "openai-chat" + + sentry_init( + integrations=[LangchainIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + prompt = ChatPromptTemplate.from_messages( + [ + ("system", "You are a helpful assistant"), + ("user", "{input}"), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ] + ) + + global stream_result_mock + stream_result_mock = Mock(side_effect=ValueError("Model error")) + + llm = MockOpenAI( + model_name="gpt-4", + temperature=0.7, + openai_api_key="badkey", + ) + agent = create_openai_tools_agent(llm, [get_word_length], prompt) + agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) + + with start_transaction(name="llm_with_error"): + with pytest.raises(ValueError): + list(agent_executor.stream({"input": "Cause an error"})) + + error_event, transaction_event = events + + assert "contexts" in error_event + assert "trace" in error_event["contexts"] + + error_trace_id = error_event["contexts"]["trace"].get("trace_id") + transaction_trace_id = transaction_event["contexts"]["trace"]["trace_id"] + + assert error_trace_id == transaction_trace_id + + gen_ai_spans = [ + span + for span in transaction_event.get("spans", []) + if span.get("op", "").startswith("gen_ai") + ] + assert len(gen_ai_spans) > 0 + + for span in gen_ai_spans: + if span.get("tags", {}).get("status") == "error": + assert "span_id" in span + + +def test_langchain_tool_execution_error(sentry_init, capture_events): + """Test that exceptions during tool execution are properly captured.""" + global llm_type + llm_type = "openai-chat" + + sentry_init( + integrations=[LangchainIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + @tool + def failing_tool(word: str) -> int: + """A tool that always fails.""" + raise RuntimeError("Tool execution failed") + + prompt = ChatPromptTemplate.from_messages( + [ + ("system", "You are a helpful assistant"), + ("user", "{input}"), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ] + ) + + global stream_result_mock + stream_result_mock = Mock( + side_effect=[ + [ + ChatGenerationChunk( + type="ChatGenerationChunk", + message=AIMessageChunk( + content="", + additional_kwargs={ + "tool_calls": [ + { + "index": 0, + "id": "call_test", + "function": { + "arguments": '{"word": "test"}', + "name": "failing_tool", + }, + "type": "function", + } + ] + }, + ), + ), + ] + ] + ) + + llm = MockOpenAI( + model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key="badkey", + ) + agent = create_openai_tools_agent(llm, [failing_tool], prompt) + agent_executor = AgentExecutor(agent=agent, tools=[failing_tool], verbose=True) + + with start_transaction(): + with pytest.raises(RuntimeError): + list(agent_executor.stream({"input": "Use the failing tool"})) + + assert len(events) >= 1 + + error_events = [e for e in events if e.get("level") == "error"] + assert len(error_events) > 0 + + error_event = error_events[0] + exception = error_event["exception"]["values"][0] + assert exception["type"] == "RuntimeError" + assert "Tool execution failed" in exception["value"] + + +def test_langchain_exception_span_cleanup(sentry_init, capture_events): + """Test that spans are properly cleaned up even when exceptions occur.""" + global llm_type + llm_type = "openai-chat" + + sentry_init( + integrations=[LangchainIntegration(include_prompts=True)], + traces_sample_rate=1.0, + ) + events = capture_events() + + prompt = ChatPromptTemplate.from_messages( + [ + ("system", "You are a helpful assistant"), + ("user", "{input}"), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ] + ) + + global stream_result_mock + stream_result_mock = Mock(side_effect=ValueError("Test error")) + + llm = MockOpenAI( + model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key="badkey", + ) + agent = create_openai_tools_agent(llm, [get_word_length], prompt) + agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) + + with start_transaction(): + with pytest.raises(ValueError): + list(agent_executor.stream({"input": "Test"})) + + transaction_event = next( + (e for e in events if e.get("type") == "transaction"), None + ) + assert transaction_event is not None + + errored_spans = [ + span + for span in transaction_event.get("spans", []) + if span.get("tags", {}).get("status") == "error" + ] + + assert len(errored_spans) > 0 + + for span in errored_spans: + assert "timestamp" in span + assert span["timestamp"] > span.get("start_timestamp", 0) From 9bd128e25699b6f831c61f4cc9eb17efd1f7ddcf Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Tue, 14 Oct 2025 10:18:53 +0200 Subject: [PATCH 2/3] test --- tests/integrations/langchain/test_langchain.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index ae47b41ba1..e0d13f4889 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -1013,9 +1013,6 @@ def test_langchain_llm_exception_captured(sentry_init, capture_events): def test_langchain_different_exception_types(sentry_init, capture_events): """Test that different exception types are properly captured.""" - global llm_type - llm_type = "openai-chat" - exception_types = [ (ValueError, "Invalid parameter"), (TypeError, "Type mismatch"), @@ -1066,9 +1063,6 @@ def test_langchain_different_exception_types(sentry_init, capture_events): def test_langchain_exception_with_span_context(sentry_init, capture_events): """Test that exception events include proper span context.""" - global llm_type - llm_type = "openai-chat" - sentry_init( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -1123,9 +1117,6 @@ def test_langchain_exception_with_span_context(sentry_init, capture_events): def test_langchain_tool_execution_error(sentry_init, capture_events): """Test that exceptions during tool execution are properly captured.""" - global llm_type - llm_type = "openai-chat" - sentry_init( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -1198,9 +1189,6 @@ def failing_tool(word: str) -> int: def test_langchain_exception_span_cleanup(sentry_init, capture_events): """Test that spans are properly cleaned up even when exceptions occur.""" - global llm_type - llm_type = "openai-chat" - sentry_init( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, From 730f0eb1595c937337ce71ab1f3d324d59d8387c Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Tue, 14 Oct 2025 10:39:26 +0200 Subject: [PATCH 3/3] remove globals from test --- .../integrations/langchain/test_langchain.py | 93 +++++++++++++++++-- 1 file changed, 87 insertions(+), 6 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index e0d13f4889..fb86ca2336 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -962,9 +962,6 @@ def test_langchain_message_role_normalization_units(): def test_langchain_llm_exception_captured(sentry_init, capture_events): """Test that exceptions during LLM execution are properly captured with full context.""" - global llm_type - llm_type = "openai-chat" - sentry_init( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -980,9 +977,6 @@ def test_langchain_llm_exception_captured(sentry_init, capture_events): ] ) - global stream_result_mock - stream_result_mock = Mock(side_effect=RuntimeError("LLM service unavailable")) - llm = MockOpenAI( model_name="gpt-3.5-turbo", temperature=0, @@ -1234,3 +1228,90 @@ def test_langchain_exception_span_cleanup(sentry_init, capture_events): for span in errored_spans: assert "timestamp" in span assert span["timestamp"] > span.get("start_timestamp", 0) + + +def test_langchain_callback_error_handler(sentry_init, capture_events): + """Test that the callback error handlers properly capture exceptions.""" + from langchain_core.outputs import LLMResult + + sentry_init( + integrations=[LangchainIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) + + run_id = "12345678-1234-1234-1234-123456789012" + serialized = {"_type": "openai-chat", "model_name": "gpt-3.5-turbo"} + prompts = ["Test prompt"] + + with start_transaction(name="test_callback_error"): + callback.on_llm_start( + serialized=serialized, + prompts=prompts, + run_id=run_id, + invocation_params={"model": "gpt-3.5-turbo"}, + ) + + test_exception = RuntimeError("API Error") + callback.on_llm_error(error=test_exception, run_id=run_id) + + assert len(events) >= 1 + + error_events = [e for e in events if e.get("level") == "error"] + assert len(error_events) > 0 + + error_event = error_events[0] + assert "exception" in error_event + + exception = error_event["exception"]["values"][0] + assert exception["type"] == "RuntimeError" + assert exception["value"] == "API Error" + + transaction_events = [e for e in events if e.get("type") == "transaction"] + if transaction_events: + transaction_event = transaction_events[0] + assert transaction_event["contexts"]["trace"]["status"] == "error" + + +def test_langchain_chat_model_error_handler(sentry_init, capture_events): + """Test that chat model errors are properly captured.""" + from langchain_core.messages import HumanMessage + + sentry_init( + integrations=[LangchainIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) + + run_id = "87654321-4321-4321-4321-210987654321" + serialized = {"_type": "openai-chat", "model_name": "gpt-4"} + messages = [[HumanMessage(content="Test message")]] + + with start_transaction(name="test_chat_model_error"): + callback.on_chat_model_start( + serialized=serialized, + messages=messages, + run_id=run_id, + invocation_params={"model": "gpt-4", "temperature": 0.7}, + ) + + test_exception = ValueError("Chat model rate limit exceeded") + callback.on_chat_model_error(error=test_exception, run_id=run_id) + + assert len(events) >= 1 + + error_events = [e for e in events if e.get("level") == "error"] + assert len(error_events) > 0 + + error_event = error_events[0] + assert "exception" in error_event + + exception = error_event["exception"]["values"][0] + assert exception["type"] == "ValueError" + assert exception["value"] == "Chat model rate limit exceeded"