From 7695b9108f21fb64d659f272352c017c6c52e5ba Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Thu, 27 Nov 2025 09:40:30 +0100 Subject: [PATCH 1/4] fix(langchain): add gen_ai.response.model to chat spans --- sentry_sdk/integrations/langchain.py | 15 +- .../integrations/langchain/test_langchain.py | 182 ++++++++++++++++++ 2 files changed, 196 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 1d3646f1c3..4a30084b9c 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -443,7 +443,20 @@ def on_llm_end(self, response, *, run_id, **kwargs): if generation is not None: try: - response_model = generation.generation_info.get("model_name") + response_model_metadata = generation.message.response_metadata.get( + "model_name" + ) + response_model_generation_info = generation.generation_info.get( + "model_name" + ) + response_model_llm_output = response.llm_output.get("model_name") + + response_model = ( + response_model_metadata + or response_model_generation_info + or response_model_llm_output + or None + ) if response_model is not None: span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) except AttributeError: diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 9f74e5f47c..335b9a9fc1 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -1686,3 +1686,185 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_e assert "List item" in input_data or "Single string query" in input_data, ( f"Expected input text in serialized data: {input_data}" ) + + +@pytest.mark.parametrize( + "response_metadata_model,generation_info_model,llm_output_model,expected_model", + [ + ("model-from-metadata", None, None, "model-from-metadata"), + (None, "model-from-generation-info", None, "model-from-generation-info"), + (None, None, "model-from-llm-output", "model-from-llm-output"), + ( + "model-from-metadata", + "model-from-generation-info", + None, + "model-from-metadata", + ), + ("model-from-metadata", None, "model-from-llm-output", "model-from-metadata"), + ( + None, + "model-from-generation-info", + "model-from-llm-output", + "model-from-generation-info", + ), + ( + "model-from-metadata", + "model-from-generation-info", + "model-from-llm-output", + "model-from-metadata", + ), + (None, None, None, None), + ], +) +def test_langchain_response_model_extraction( + sentry_init, + capture_events, + response_metadata_model, + generation_info_model, + llm_output_model, + expected_model, +): + from langchain_core.outputs import LLMResult + from langchain_core.messages import AIMessageChunk + + sentry_init( + integrations=[LangchainIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) + + run_id = "test-response-model-uuid" + serialized = {"_type": "openai-chat", "model_name": "gpt-3.5-turbo"} + prompts = ["Test prompt"] + + with start_transaction(): + callback.on_llm_start( + serialized=serialized, + prompts=prompts, + run_id=run_id, + invocation_params={"model": "gpt-3.5-turbo"}, + ) + + response_metadata = {} + if response_metadata_model is not None: + response_metadata["model_name"] = response_metadata_model + + generation_info = {} + if generation_info_model is not None: + generation_info["model_name"] = generation_info_model + + llm_output = {} + if llm_output_model is not None: + llm_output["model_name"] = llm_output_model + + message = AIMessageChunk( + content="Test response", + response_metadata=response_metadata, + ) + + generation = Mock() + generation.text = "Test response" + generation.message = message + generation.generation_info = generation_info + + response = Mock() + response.generations = [[generation]] + response.llm_output = llm_output + + callback.on_llm_end(response=response, run_id=run_id) + + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" + + llm_spans = [ + span for span in tx.get("spans", []) if span.get("op") == "gen_ai.pipeline" + ] + assert len(llm_spans) > 0 + + llm_span = llm_spans[0] + + if expected_model is not None: + assert SPANDATA.GEN_AI_RESPONSE_MODEL in llm_span["data"] + assert llm_span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == expected_model + else: + assert SPANDATA.GEN_AI_RESPONSE_MODEL not in llm_span.get("data", {}) + + +@pytest.mark.parametrize( + "missing_attribute", + [ + "message", + "response_metadata", + "generation_info", + "llm_output", + ], +) +def test_langchain_response_model_extraction_missing_attributes( + sentry_init, + capture_events, + missing_attribute, +): + from langchain_core.messages import AIMessageChunk + + sentry_init( + integrations=[LangchainIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) + + run_id = "test-missing-attr-uuid" + serialized = {"_type": "openai-chat", "model_name": "gpt-3.5-turbo"} + prompts = ["Test prompt"] + + with start_transaction(): + callback.on_llm_start( + serialized=serialized, + prompts=prompts, + run_id=run_id, + invocation_params={"model": "gpt-3.5-turbo"}, + ) + + generation = Mock() + generation.text = "Test response" + + if missing_attribute != "message": + message_mock = Mock() + message_mock.response_metadata.get.return_value = None + if missing_attribute == "response_metadata": + delattr(message_mock, "response_metadata") + generation.message = message_mock + + if missing_attribute != "generation_info": + generation_info_mock = Mock() + generation_info_mock.get.return_value = None + generation.generation_info = generation_info_mock + + response = Mock() + response.generations = [[generation]] + + if missing_attribute != "llm_output": + llm_output_mock = Mock() + llm_output_mock.get.return_value = None + response.llm_output = llm_output_mock + + callback.on_llm_end(response=response, run_id=run_id) + + assert len(events) > 0 + tx = events[0] + assert tx["type"] == "transaction" + + llm_spans = [ + span for span in tx.get("spans", []) if span.get("op") == "gen_ai.pipeline" + ] + assert len(llm_spans) > 0 + + llm_span = llm_spans[0] + + assert SPANDATA.GEN_AI_RESPONSE_MODEL not in llm_span.get("data", {}) From cbd5c72d104197f1e32eccb56e7520f025574892 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Thu, 27 Nov 2025 15:54:24 +0100 Subject: [PATCH 2/4] simplify tests --- .../integrations/langchain/test_langchain.py | 79 ------------------- 1 file changed, 79 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 335b9a9fc1..67f36c2df1 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -1724,9 +1724,6 @@ def test_langchain_response_model_extraction( llm_output_model, expected_model, ): - from langchain_core.outputs import LLMResult - from langchain_core.messages import AIMessageChunk - sentry_init( integrations=[LangchainIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -1792,79 +1789,3 @@ def test_langchain_response_model_extraction( assert llm_span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == expected_model else: assert SPANDATA.GEN_AI_RESPONSE_MODEL not in llm_span.get("data", {}) - - -@pytest.mark.parametrize( - "missing_attribute", - [ - "message", - "response_metadata", - "generation_info", - "llm_output", - ], -) -def test_langchain_response_model_extraction_missing_attributes( - sentry_init, - capture_events, - missing_attribute, -): - from langchain_core.messages import AIMessageChunk - - sentry_init( - integrations=[LangchainIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - events = capture_events() - - callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) - - run_id = "test-missing-attr-uuid" - serialized = {"_type": "openai-chat", "model_name": "gpt-3.5-turbo"} - prompts = ["Test prompt"] - - with start_transaction(): - callback.on_llm_start( - serialized=serialized, - prompts=prompts, - run_id=run_id, - invocation_params={"model": "gpt-3.5-turbo"}, - ) - - generation = Mock() - generation.text = "Test response" - - if missing_attribute != "message": - message_mock = Mock() - message_mock.response_metadata.get.return_value = None - if missing_attribute == "response_metadata": - delattr(message_mock, "response_metadata") - generation.message = message_mock - - if missing_attribute != "generation_info": - generation_info_mock = Mock() - generation_info_mock.get.return_value = None - generation.generation_info = generation_info_mock - - response = Mock() - response.generations = [[generation]] - - if missing_attribute != "llm_output": - llm_output_mock = Mock() - llm_output_mock.get.return_value = None - response.llm_output = llm_output_mock - - callback.on_llm_end(response=response, run_id=run_id) - - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - - llm_spans = [ - span for span in tx.get("spans", []) if span.get("op") == "gen_ai.pipeline" - ] - assert len(llm_spans) > 0 - - llm_span = llm_spans[0] - - assert SPANDATA.GEN_AI_RESPONSE_MODEL not in llm_span.get("data", {}) From cd0dc55ffce588adf2e1dcfefe2f2d4505f7b5e8 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Thu, 27 Nov 2025 15:58:51 +0100 Subject: [PATCH 3/4] simplify code --- .../integrations/langchain/test_langchain.py | 30 +++++-------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 67f36c2df1..7db56b54d2 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -1745,32 +1745,18 @@ def test_langchain_response_model_extraction( invocation_params={"model": "gpt-3.5-turbo"}, ) - response_metadata = {} - if response_metadata_model is not None: - response_metadata["model_name"] = response_metadata_model - - generation_info = {} - if generation_info_model is not None: - generation_info["model_name"] = generation_info_model - - llm_output = {} - if llm_output_model is not None: - llm_output["model_name"] = llm_output_model + response_metadata = {"model_name": response_metadata_model} + generation_info = {"model_name": generation_info_model} + llm_output = {"model_name": llm_output_model} message = AIMessageChunk( - content="Test response", - response_metadata=response_metadata, + content="Test response", response_metadata=response_metadata ) - generation = Mock() - generation.text = "Test response" - generation.message = message - generation.generation_info = generation_info - - response = Mock() - response.generations = [[generation]] - response.llm_output = llm_output - + generation = Mock( + text="Test response", message=message, generation_info=generation_info + ) + response = Mock(generations=[[generation]], llm_output=llm_output) callback.on_llm_end(response=response, run_id=run_id) assert len(events) > 0 From d5332684a0ef010f0412857d4397d6a8c77b5aa6 Mon Sep 17 00:00:00 2001 From: Simon Hellmayr Date: Thu, 27 Nov 2025 16:18:21 +0100 Subject: [PATCH 4/4] simplify --- sentry_sdk/integrations/langchain.py | 13 +------ .../integrations/langchain/test_langchain.py | 38 +++---------------- 2 files changed, 6 insertions(+), 45 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 4a30084b9c..dca470b749 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -443,20 +443,9 @@ def on_llm_end(self, response, *, run_id, **kwargs): if generation is not None: try: - response_model_metadata = generation.message.response_metadata.get( + response_model = generation.message.response_metadata.get( "model_name" ) - response_model_generation_info = generation.generation_info.get( - "model_name" - ) - response_model_llm_output = response.llm_output.get("model_name") - - response_model = ( - response_model_metadata - or response_model_generation_info - or response_model_llm_output - or None - ) if response_model is not None: span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) except AttributeError: diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 7db56b54d2..114e819bfb 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -1689,39 +1689,16 @@ def test_langchain_embeddings_with_list_and_string_inputs(sentry_init, capture_e @pytest.mark.parametrize( - "response_metadata_model,generation_info_model,llm_output_model,expected_model", + "response_metadata_model,expected_model", [ - ("model-from-metadata", None, None, "model-from-metadata"), - (None, "model-from-generation-info", None, "model-from-generation-info"), - (None, None, "model-from-llm-output", "model-from-llm-output"), - ( - "model-from-metadata", - "model-from-generation-info", - None, - "model-from-metadata", - ), - ("model-from-metadata", None, "model-from-llm-output", "model-from-metadata"), - ( - None, - "model-from-generation-info", - "model-from-llm-output", - "model-from-generation-info", - ), - ( - "model-from-metadata", - "model-from-generation-info", - "model-from-llm-output", - "model-from-metadata", - ), - (None, None, None, None), + ("gpt-3.5-turbo", "gpt-3.5-turbo"), + (None, None), ], ) def test_langchain_response_model_extraction( sentry_init, capture_events, response_metadata_model, - generation_info_model, - llm_output_model, expected_model, ): sentry_init( @@ -1746,17 +1723,12 @@ def test_langchain_response_model_extraction( ) response_metadata = {"model_name": response_metadata_model} - generation_info = {"model_name": generation_info_model} - llm_output = {"model_name": llm_output_model} - message = AIMessageChunk( content="Test response", response_metadata=response_metadata ) - generation = Mock( - text="Test response", message=message, generation_info=generation_info - ) - response = Mock(generations=[[generation]], llm_output=llm_output) + generation = Mock(text="Test response", message=message) + response = Mock(generations=[[generation]]) callback.on_llm_end(response=response, run_id=run_id) assert len(events) > 0