From 5b7771c84c0c09ebb25abe76fa5a40df64f42751 Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Tue, 14 Oct 2025 10:17:25 +0200
Subject: [PATCH 1/3] fix(langchain): capture exceptions within calls to the
 LLM

---
 sentry_sdk/integrations/langchain.py          |  15 +-
 .../integrations/langchain/test_langchain.py  | 288 ++++++++++++++++++
 2 files changed, 300 insertions(+), 3 deletions(-)

diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
index 724d908665..e075babf37 100644
--- a/sentry_sdk/integrations/langchain.py
+++ b/sentry_sdk/integrations/langchain.py
@@ -749,7 +749,12 @@ def new_invoke(self, *args, **kwargs):
             _set_tools_on_span(span, tools)
 
             # Run the agent
-            result = f(self, *args, **kwargs)
+            try:
+                result = f(self, *args, **kwargs)
+            except Exception as e:
+                run_id = kwargs.get("run_id")
+                self._handle_error(run_id, e)
+                raise e
 
             input = result.get("input")
             if (
@@ -820,8 +825,12 @@ def new_stream(self, *args, **kwargs):
                 unpack=False,
             )
 
-        # Run the agent
-        result = f(self, *args, **kwargs)
+        try:
+            result = f(self, *args, **kwargs)
+        except Exception as e:
+            run_id = kwargs.get("run_id")
+            self._handle_error(run_id, e)
+            raise e
 
         old_iterator = result
 
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index 661208432f..ae47b41ba1 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -958,3 +958,291 @@ def test_langchain_message_role_normalization_units():
     assert normalized[3]["role"] == "system"  # system unchanged
     assert "role" not in normalized[4]  # Message without role unchanged
     assert normalized[5] == "string message"  # String message unchanged
+
+
+def test_langchain_llm_exception_captured(sentry_init, capture_events):
+    """Test that exceptions during LLM execution are properly captured with full context."""
+    global llm_type
+    llm_type = "openai-chat"
+
+    sentry_init(
+        integrations=[LangchainIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            ("system", "You are a helpful assistant"),
+            ("user", "{input}"),
+            MessagesPlaceholder(variable_name="agent_scratchpad"),
+        ]
+    )
+
+    global stream_result_mock
+    stream_result_mock = Mock(side_effect=RuntimeError("LLM service unavailable"))
+
+    llm = MockOpenAI(
+        model_name="gpt-3.5-turbo",
+        temperature=0,
+        openai_api_key="badkey",
+    )
+    agent = create_openai_tools_agent(llm, [get_word_length], prompt)
+    agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True)
+
+    with start_transaction(name="test_llm_exception"):
+        with pytest.raises(RuntimeError):
+            list(agent_executor.stream({"input": "Test input"}))
+
+    (error_event, transaction_event) = events
+
+    assert error_event["level"] == "error"
+    assert "exception" in error_event
+    assert len(error_event["exception"]["values"]) > 0
+
+    exception = error_event["exception"]["values"][0]
+    assert exception["type"] == "RuntimeError"
+    assert exception["value"] == "LLM service unavailable"
+    assert "stacktrace" in exception
+
+    assert transaction_event["type"] == "transaction"
+    assert transaction_event["transaction"] == "test_llm_exception"
+    assert transaction_event["contexts"]["trace"]["status"] == "error"
+
+
+def test_langchain_different_exception_types(sentry_init, capture_events):
+    """Test that different exception types are properly captured."""
+    global llm_type
+    llm_type = "openai-chat"
+
+    exception_types = [
+        (ValueError, "Invalid parameter"),
+        (TypeError, "Type mismatch"),
+        (RuntimeError, "Runtime error occurred"),
+        (Exception, "Generic exception"),
+    ]
+
+    for exception_class, exception_message in exception_types:
+        sentry_init(
+            integrations=[LangchainIntegration(include_prompts=False)],
+            traces_sample_rate=1.0,
+        )
+        events = capture_events()
+
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", "You are a helpful assistant"),
+                ("user", "{input}"),
+                MessagesPlaceholder(variable_name="agent_scratchpad"),
+            ]
+        )
+
+        global stream_result_mock
+        stream_result_mock = Mock(side_effect=exception_class(exception_message))
+
+        llm = MockOpenAI(
+            model_name="gpt-3.5-turbo",
+            temperature=0,
+            openai_api_key="badkey",
+        )
+        agent = create_openai_tools_agent(llm, [get_word_length], prompt)
+        agent_executor = AgentExecutor(
+            agent=agent, tools=[get_word_length], verbose=True
+        )
+
+        with start_transaction():
+            with pytest.raises(exception_class):
+                list(agent_executor.stream({"input": "Test"}))
+
+        assert len(events) >= 1
+        error_event = events[0]
+        assert error_event["level"] == "error"
+
+        exception = error_event["exception"]["values"][0]
+        assert exception["type"] == exception_class.__name__
+        assert exception["value"] == exception_message
+
+
+def test_langchain_exception_with_span_context(sentry_init, capture_events):
+    """Test that exception events include proper span context."""
+    global llm_type
+    llm_type = "openai-chat"
+
+    sentry_init(
+        integrations=[LangchainIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            ("system", "You are a helpful assistant"),
+            ("user", "{input}"),
+            MessagesPlaceholder(variable_name="agent_scratchpad"),
+        ]
+    )
+
+    global stream_result_mock
+    stream_result_mock = Mock(side_effect=ValueError("Model error"))
+
+    llm = MockOpenAI(
+        model_name="gpt-4",
+        temperature=0.7,
+        openai_api_key="badkey",
+    )
+    agent = create_openai_tools_agent(llm, [get_word_length], prompt)
+    agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True)
+
+    with start_transaction(name="llm_with_error"):
+        with pytest.raises(ValueError):
+            list(agent_executor.stream({"input": "Cause an error"}))
+
+    error_event, transaction_event = events
+
+    assert "contexts" in error_event
+    assert "trace" in error_event["contexts"]
+
+    error_trace_id = error_event["contexts"]["trace"].get("trace_id")
+    transaction_trace_id = transaction_event["contexts"]["trace"]["trace_id"]
+
+    assert error_trace_id == transaction_trace_id
+
+    gen_ai_spans = [
+        span
+        for span in transaction_event.get("spans", [])
+        if span.get("op", "").startswith("gen_ai")
+    ]
+    assert len(gen_ai_spans) > 0
+
+    for span in gen_ai_spans:
+        if span.get("tags", {}).get("status") == "error":
+            assert "span_id" in span
+
+
+def test_langchain_tool_execution_error(sentry_init, capture_events):
+    """Test that exceptions during tool execution are properly captured."""
+    global llm_type
+    llm_type = "openai-chat"
+
+    sentry_init(
+        integrations=[LangchainIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    @tool
+    def failing_tool(word: str) -> int:
+        """A tool that always fails."""
+        raise RuntimeError("Tool execution failed")
+
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            ("system", "You are a helpful assistant"),
+            ("user", "{input}"),
+            MessagesPlaceholder(variable_name="agent_scratchpad"),
+        ]
+    )
+
+    global stream_result_mock
+    stream_result_mock = Mock(
+        side_effect=[
+            [
+                ChatGenerationChunk(
+                    type="ChatGenerationChunk",
+                    message=AIMessageChunk(
+                        content="",
+                        additional_kwargs={
+                            "tool_calls": [
+                                {
+                                    "index": 0,
+                                    "id": "call_test",
+                                    "function": {
+                                        "arguments": '{"word": "test"}',
+                                        "name": "failing_tool",
+                                    },
+                                    "type": "function",
+                                }
+                            ]
+                        },
+                    ),
+                ),
+            ]
+        ]
+    )
+
+    llm = MockOpenAI(
+        model_name="gpt-3.5-turbo",
+        temperature=0,
+        openai_api_key="badkey",
+    )
+    agent = create_openai_tools_agent(llm, [failing_tool], prompt)
+    agent_executor = AgentExecutor(agent=agent, tools=[failing_tool], verbose=True)
+
+    with start_transaction():
+        with pytest.raises(RuntimeError):
+            list(agent_executor.stream({"input": "Use the failing tool"}))
+
+    assert len(events) >= 1
+
+    error_events = [e for e in events if e.get("level") == "error"]
+    assert len(error_events) > 0
+
+    error_event = error_events[0]
+    exception = error_event["exception"]["values"][0]
+    assert exception["type"] == "RuntimeError"
+    assert "Tool execution failed" in exception["value"]
+
+
+def test_langchain_exception_span_cleanup(sentry_init, capture_events):
+    """Test that spans are properly cleaned up even when exceptions occur."""
+    global llm_type
+    llm_type = "openai-chat"
+
+    sentry_init(
+        integrations=[LangchainIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            ("system", "You are a helpful assistant"),
+            ("user", "{input}"),
+            MessagesPlaceholder(variable_name="agent_scratchpad"),
+        ]
+    )
+
+    global stream_result_mock
+    stream_result_mock = Mock(side_effect=ValueError("Test error"))
+
+    llm = MockOpenAI(
+        model_name="gpt-3.5-turbo",
+        temperature=0,
+        openai_api_key="badkey",
+    )
+    agent = create_openai_tools_agent(llm, [get_word_length], prompt)
+    agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True)
+
+    with start_transaction():
+        with pytest.raises(ValueError):
+            list(agent_executor.stream({"input": "Test"}))
+
+    transaction_event = next(
+        (e for e in events if e.get("type") == "transaction"), None
+    )
+    assert transaction_event is not None
+
+    errored_spans = [
+        span
+        for span in transaction_event.get("spans", [])
+        if span.get("tags", {}).get("status") == "error"
+    ]
+
+    assert len(errored_spans) > 0
+
+    for span in errored_spans:
+        assert "timestamp" in span
+        assert span["timestamp"] > span.get("start_timestamp", 0)

From 9bd128e25699b6f831c61f4cc9eb17efd1f7ddcf Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Tue, 14 Oct 2025 10:18:53 +0200
Subject: [PATCH 2/3] test

---
 tests/integrations/langchain/test_langchain.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index ae47b41ba1..e0d13f4889 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -1013,9 +1013,6 @@ def test_langchain_llm_exception_captured(sentry_init, capture_events):
 
 def test_langchain_different_exception_types(sentry_init, capture_events):
     """Test that different exception types are properly captured."""
-    global llm_type
-    llm_type = "openai-chat"
-
     exception_types = [
         (ValueError, "Invalid parameter"),
         (TypeError, "Type mismatch"),
@@ -1066,9 +1063,6 @@ def test_langchain_different_exception_types(sentry_init, capture_events):
 
 def test_langchain_exception_with_span_context(sentry_init, capture_events):
     """Test that exception events include proper span context."""
-    global llm_type
-    llm_type = "openai-chat"
-
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
@@ -1123,9 +1117,6 @@ def test_langchain_exception_with_span_context(sentry_init, capture_events):
 
 def test_langchain_tool_execution_error(sentry_init, capture_events):
     """Test that exceptions during tool execution are properly captured."""
-    global llm_type
-    llm_type = "openai-chat"
-
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
@@ -1198,9 +1189,6 @@ def failing_tool(word: str) -> int:
 
 def test_langchain_exception_span_cleanup(sentry_init, capture_events):
     """Test that spans are properly cleaned up even when exceptions occur."""
-    global llm_type
-    llm_type = "openai-chat"
-
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,

From 730f0eb1595c937337ce71ab1f3d324d59d8387c Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Tue, 14 Oct 2025 10:39:26 +0200
Subject: [PATCH 3/3] remove globals from test

---
 .../integrations/langchain/test_langchain.py  | 93 +++++++++++++++++--
 1 file changed, 87 insertions(+), 6 deletions(-)

diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
index e0d13f4889..fb86ca2336 100644
--- a/tests/integrations/langchain/test_langchain.py
+++ b/tests/integrations/langchain/test_langchain.py
@@ -962,9 +962,6 @@ def test_langchain_message_role_normalization_units():
 
 def test_langchain_llm_exception_captured(sentry_init, capture_events):
     """Test that exceptions during LLM execution are properly captured with full context."""
-    global llm_type
-    llm_type = "openai-chat"
-
     sentry_init(
         integrations=[LangchainIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
@@ -980,9 +977,6 @@ def test_langchain_llm_exception_captured(sentry_init, capture_events):
         ]
     )
 
-    global stream_result_mock
-    stream_result_mock = Mock(side_effect=RuntimeError("LLM service unavailable"))
-
     llm = MockOpenAI(
         model_name="gpt-3.5-turbo",
         temperature=0,
@@ -1234,3 +1228,90 @@ def test_langchain_exception_span_cleanup(sentry_init, capture_events):
     for span in errored_spans:
         assert "timestamp" in span
         assert span["timestamp"] > span.get("start_timestamp", 0)
+
+
+def test_langchain_callback_error_handler(sentry_init, capture_events):
+    """Test that the callback error handlers properly capture exceptions."""
+    from langchain_core.outputs import LLMResult
+
+    sentry_init(
+        integrations=[LangchainIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
+
+    run_id = "12345678-1234-1234-1234-123456789012"
+    serialized = {"_type": "openai-chat", "model_name": "gpt-3.5-turbo"}
+    prompts = ["Test prompt"]
+
+    with start_transaction(name="test_callback_error"):
+        callback.on_llm_start(
+            serialized=serialized,
+            prompts=prompts,
+            run_id=run_id,
+            invocation_params={"model": "gpt-3.5-turbo"},
+        )
+
+        test_exception = RuntimeError("API Error")
+        callback.on_llm_error(error=test_exception, run_id=run_id)
+
+    assert len(events) >= 1
+
+    error_events = [e for e in events if e.get("level") == "error"]
+    assert len(error_events) > 0
+
+    error_event = error_events[0]
+    assert "exception" in error_event
+
+    exception = error_event["exception"]["values"][0]
+    assert exception["type"] == "RuntimeError"
+    assert exception["value"] == "API Error"
+
+    transaction_events = [e for e in events if e.get("type") == "transaction"]
+    if transaction_events:
+        transaction_event = transaction_events[0]
+        assert transaction_event["contexts"]["trace"]["status"] == "error"
+
+
+def test_langchain_chat_model_error_handler(sentry_init, capture_events):
+    """Test that chat model errors are properly captured."""
+    from langchain_core.messages import HumanMessage
+
+    sentry_init(
+        integrations=[LangchainIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)
+
+    run_id = "87654321-4321-4321-4321-210987654321"
+    serialized = {"_type": "openai-chat", "model_name": "gpt-4"}
+    messages = [[HumanMessage(content="Test message")]]
+
+    with start_transaction(name="test_chat_model_error"):
+        callback.on_chat_model_start(
+            serialized=serialized,
+            messages=messages,
+            run_id=run_id,
+            invocation_params={"model": "gpt-4", "temperature": 0.7},
+        )
+
+        test_exception = ValueError("Chat model rate limit exceeded")
+        callback.on_chat_model_error(error=test_exception, run_id=run_id)
+
+    assert len(events) >= 1
+
+    error_events = [e for e in events if e.get("level") == "error"]
+    assert len(error_events) > 0
+
+    error_event = error_events[0]
+    assert "exception" in error_event
+
+    exception = error_event["exception"]["values"][0]
+    assert exception["type"] == "ValueError"
+    assert exception["value"] == "Chat model rate limit exceeded"