diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 7e04a740ed..a53115a2a9 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -51,7 +51,6 @@ "presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, "temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE, "tool_calls": SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, - "tools": SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, "top_k": SPANDATA.GEN_AI_REQUEST_TOP_K, "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P, } @@ -203,8 +202,12 @@ def on_llm_start( if key in all_params and all_params[key] is not None: set_data_normalized(span, attribute, all_params[key], unpack=False) + _set_tools_on_span(span, all_params.get("tools")) + if should_send_default_pii() and self.include_prompts: - set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompts) + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompts, unpack=False + ) def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs): # type: (SentryLangchainCallback, Dict[str, Any], List[List[BaseMessage]], UUID, Any) -> Any @@ -246,14 +249,20 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs): if key in all_params and all_params[key] is not None: set_data_normalized(span, attribute, all_params[key], unpack=False) + _set_tools_on_span(span, all_params.get("tools")) + if should_send_default_pii() and self.include_prompts: + normalized_messages = [] + for list_ in messages: + for message in list_: + normalized_messages.append( + self._normalize_langchain_message(message) + ) set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, - [ - [self._normalize_langchain_message(x) for x in list_] - for list_ in messages - ], + normalized_messages, + unpack=False, ) def on_chat_model_end(self, response, *, run_id, **kwargs): @@ -351,9 +360,7 @@ def on_agent_finish(self, finish, *, run_id, **kwargs): if should_send_default_pii() and self.include_prompts: set_data_normalized( - span, - SPANDATA.GEN_AI_RESPONSE_TEXT, - finish.return_values.items(), + span, SPANDATA.GEN_AI_RESPONSE_TEXT, finish.return_values.items() ) self._exit_span(span_data, run_id) @@ -473,13 +480,11 @@ def _get_token_usage(obj): if usage is not None: return usage - # check for usage in the object itself for name in possible_names: usage = _get_value(obj, name) if usage is not None: return usage - # no usage found anywhere return None @@ -531,6 +536,87 @@ def _get_request_data(obj, args, kwargs): return (agent_name, tools) +def _simplify_langchain_tools(tools): + # type: (Any) -> Optional[List[Any]] + """Parse and simplify tools into a cleaner format.""" + if not tools: + return None + + if not isinstance(tools, (list, tuple)): + return None + + simplified_tools = [] + for tool in tools: + try: + if isinstance(tool, dict): + + if "function" in tool and isinstance(tool["function"], dict): + func = tool["function"] + simplified_tool = { + "name": func.get("name"), + "description": func.get("description"), + } + if simplified_tool["name"]: + simplified_tools.append(simplified_tool) + elif "name" in tool: + simplified_tool = { + "name": tool.get("name"), + "description": tool.get("description"), + } + simplified_tools.append(simplified_tool) + else: + name = ( + tool.get("name") + or tool.get("tool_name") + or tool.get("function_name") + ) + if name: + simplified_tools.append( + { + "name": name, + "description": tool.get("description") + or tool.get("desc"), + } + ) + elif hasattr(tool, "name"): + simplified_tool = { + "name": getattr(tool, "name", None), + "description": getattr(tool, "description", None) + or getattr(tool, "desc", None), + } + if simplified_tool["name"]: + simplified_tools.append(simplified_tool) + elif hasattr(tool, "__name__"): + simplified_tools.append( + { + "name": tool.__name__, + "description": getattr(tool, "__doc__", None), + } + ) + else: + tool_str = str(tool) + if tool_str and tool_str != "": + simplified_tools.append({"name": tool_str, "description": None}) + except Exception: + continue + + return simplified_tools if simplified_tools else None + + +def _set_tools_on_span(span, tools): + # type: (Span, Any) -> None + """Set available tools data on a span if tools are provided.""" + if tools is not None: + simplified_tools = _simplify_langchain_tools(tools) + if simplified_tools: + set_data_normalized( + span, + SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, + simplified_tools, + unpack=False, + ) + + def _wrap_configure(f): # type: (Callable[..., Any]) -> Callable[..., Any] @@ -601,7 +687,7 @@ def new_configure( ] elif isinstance(local_callbacks, BaseCallbackHandler): local_callbacks = [local_callbacks, sentry_handler] - else: # local_callbacks is a list + else: local_callbacks = [*local_callbacks, sentry_handler] return f( @@ -638,10 +724,7 @@ def new_invoke(self, *args, **kwargs): span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, False) - if tools: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, tools, unpack=False - ) + _set_tools_on_span(span, tools) # Run the agent result = f(self, *args, **kwargs) @@ -653,11 +736,7 @@ def new_invoke(self, *args, **kwargs): and integration.include_prompts ): set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - [ - input, - ], + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, [input], unpack=False ) output = result.get("output") @@ -666,7 +745,7 @@ def new_invoke(self, *args, **kwargs): and should_send_default_pii() and integration.include_prompts ): - span.set_data(SPANDATA.GEN_AI_RESPONSE_TEXT, output) + set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, output) return result @@ -698,10 +777,7 @@ def new_stream(self, *args, **kwargs): span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True) - if tools: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, tools, unpack=False - ) + _set_tools_on_span(span, tools) input = args[0].get("input") if len(args) >= 1 else None if ( @@ -710,11 +786,7 @@ def new_stream(self, *args, **kwargs): and integration.include_prompts ): set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - [ - input, - ], + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, [input], unpack=False ) # Run the agent @@ -737,7 +809,7 @@ def new_iterator(): and should_send_default_pii() and integration.include_prompts ): - span.set_data(SPANDATA.GEN_AI_RESPONSE_TEXT, output) + set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, output) span.__exit__(None, None, None) @@ -756,7 +828,7 @@ async def new_iterator_async(): and should_send_default_pii() and integration.include_prompts ): - span.set_data(SPANDATA.GEN_AI_RESPONSE_TEXT, output) + set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, output) span.__exit__(None, None, None) diff --git a/sentry_sdk/integrations/langgraph.py b/sentry_sdk/integrations/langgraph.py index 4b241fe895..df3941bb13 100644 --- a/sentry_sdk/integrations/langgraph.py +++ b/sentry_sdk/integrations/langgraph.py @@ -183,7 +183,8 @@ def new_invoke(self, *args, **kwargs): set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, - safe_serialize(input_messages), + input_messages, + unpack=False, ) result = f(self, *args, **kwargs) @@ -232,7 +233,8 @@ async def new_ainvoke(self, *args, **kwargs): set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, - safe_serialize(input_messages), + input_messages, + unpack=False, ) result = await f(self, *args, **kwargs) @@ -305,11 +307,9 @@ def _set_response_attributes(span, input_messages, result, integration): if llm_response_text: set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, llm_response_text) elif new_messages: - set_data_normalized( - span, SPANDATA.GEN_AI_RESPONSE_TEXT, safe_serialize(new_messages) - ) + set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, new_messages) else: - set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, safe_serialize(result)) + set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, result) tool_calls = _extract_tool_calls(new_messages) if tool_calls: diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 6ea545322c..467116c8f4 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -179,7 +179,9 @@ def _set_input_data(span, kwargs, operation, integration): and should_send_default_pii() and integration.include_prompts ): - set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages) + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False + ) # Input attributes: Common set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") @@ -227,25 +229,46 @@ def _set_output_data(span, response, kwargs, integration, finish_span=True): if should_send_default_pii() and integration.include_prompts: response_text = [choice.message.dict() for choice in response.choices] if len(response_text) > 0: - set_data_normalized( - span, - SPANDATA.GEN_AI_RESPONSE_TEXT, - safe_serialize(response_text), - ) + set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_text) + _calculate_token_usage(messages, response, span, None, integration.count_tokens) + if finish_span: span.__exit__(None, None, None) elif hasattr(response, "output"): if should_send_default_pii() and integration.include_prompts: - response_text = [item.to_dict() for item in response.output] - if len(response_text) > 0: + output_messages = { + "response": [], + "tool": [], + } # type: (dict[str, list[Any]]) + + for output in response.output: + if output.type == "function_call": + output_messages["tool"].append(output.dict()) + elif output.type == "message": + for output_message in output.content: + try: + output_messages["response"].append(output_message.text) + except AttributeError: + # Unknown output message type, just return the json + output_messages["response"].append(output_message.dict()) + + if len(output_messages["tool"]) > 0: set_data_normalized( span, - SPANDATA.GEN_AI_RESPONSE_TEXT, - safe_serialize(response_text), + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, + output_messages["tool"], + unpack=False, + ) + + if len(output_messages["response"]) > 0: + set_data_normalized( + span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"] ) + _calculate_token_usage(messages, response, span, None, integration.count_tokens) + if finish_span: span.__exit__(None, None, None) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index 1525346726..44b260d4bc 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -1,4 +1,5 @@ import sentry_sdk +from sentry_sdk.ai.utils import set_data_normalized from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable from sentry_sdk.scope import should_send_default_pii @@ -127,7 +128,9 @@ def _set_input_data(span, get_response_kwargs): if len(messages) > 0: request_messages.append({"role": role, "content": messages}) - span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, safe_serialize(request_messages)) + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, request_messages, unpack=False + ) def _set_output_data(span, result): @@ -157,6 +160,6 @@ def _set_output_data(span, result): ) if len(output_messages["response"]) > 0: - span.set_data( - SPANDATA.GEN_AI_RESPONSE_TEXT, safe_serialize(output_messages["response"]) + set_data_normalized( + span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"] ) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 9a06ac05d4..99dc5f4e37 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -589,3 +589,76 @@ def test_langchain_callback_list_existing_callback(sentry_init): [handler] = passed_callbacks assert handler is sentry_callback + + +def test_tools_integration_in_spans(sentry_init, capture_events): + """Test that tools are properly set on spans in actual LangChain integration.""" + global llm_type + llm_type = "openai-chat" + + sentry_init( + integrations=[LangchainIntegration(include_prompts=False)], + traces_sample_rate=1.0, + ) + events = capture_events() + + prompt = ChatPromptTemplate.from_messages( + [ + ("system", "You are a helpful assistant"), + ("user", "{input}"), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ] + ) + + global stream_result_mock + stream_result_mock = Mock( + side_effect=[ + [ + ChatGenerationChunk( + type="ChatGenerationChunk", + message=AIMessageChunk(content="Simple response"), + ), + ] + ] + ) + + llm = MockOpenAI( + model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key="badkey", + ) + agent = create_openai_tools_agent(llm, [get_word_length], prompt) + agent_executor = AgentExecutor(agent=agent, tools=[get_word_length], verbose=True) + + with start_transaction(): + list(agent_executor.stream({"input": "Hello"})) + + # Check that events were captured and contain tools data + if events: + tx = events[0] + spans = tx.get("spans", []) + + # Look for spans that should have tools data + tools_found = False + for span in spans: + span_data = span.get("data", {}) + if SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS in span_data: + tools_found = True + tools_data = span_data[SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + # Verify tools are in the expected format + assert isinstance(tools_data, (str, list)) # Could be serialized + if isinstance(tools_data, str): + # If serialized as string, should contain tool name + assert "get_word_length" in tools_data + else: + # If still a list, verify structure + assert len(tools_data) >= 1 + names = [ + tool.get("name") + for tool in tools_data + if isinstance(tool, dict) + ] + assert "get_word_length" in names + + # Ensure we found at least one span with tools data + assert tools_found, "No spans found with tools data" diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index a3c7bdd9d9..18968fb36a 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1036,7 +1036,7 @@ def test_ai_client_span_responses_api(sentry_init, capture_events): assert spans[0]["origin"] == "auto.ai.openai" assert spans[0]["data"] == { "gen_ai.operation.name": "responses", - "gen_ai.request.messages": "How do I check if a Python object is an instance of a class?", + "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', "gen_ai.request.model": "gpt-4o", "gen_ai.system": "openai", "gen_ai.response.model": "response-model-id", @@ -1045,7 +1045,7 @@ def test_ai_client_span_responses_api(sentry_init, capture_events): "gen_ai.usage.output_tokens": 10, "gen_ai.usage.output_tokens.reasoning": 8, "gen_ai.usage.total_tokens": 30, - "gen_ai.response.text": '[{"id": "message-id", "content": [{"annotations": [], "text": "the model response", "type": "output_text"}], "role": "assistant", "status": "completed", "type": "message"}]', + "gen_ai.response.text": "the model response", "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -1116,7 +1116,7 @@ async def test_ai_client_span_responses_async_api(sentry_init, capture_events): assert spans[0]["origin"] == "auto.ai.openai" assert spans[0]["data"] == { "gen_ai.operation.name": "responses", - "gen_ai.request.messages": "How do I check if a Python object is an instance of a class?", + "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', "gen_ai.request.model": "gpt-4o", "gen_ai.response.model": "response-model-id", "gen_ai.system": "openai", @@ -1125,7 +1125,7 @@ async def test_ai_client_span_responses_async_api(sentry_init, capture_events): "gen_ai.usage.output_tokens": 10, "gen_ai.usage.output_tokens.reasoning": 8, "gen_ai.usage.total_tokens": 30, - "gen_ai.response.text": '[{"id": "message-id", "content": [{"annotations": [], "text": "the model response", "type": "output_text"}], "role": "assistant", "status": "completed", "type": "message"}]', + "gen_ai.response.text": "the model response", "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -1162,7 +1162,7 @@ async def test_ai_client_span_streaming_responses_async_api( assert spans[0]["origin"] == "auto.ai.openai" assert spans[0]["data"] == { "gen_ai.operation.name": "responses", - "gen_ai.request.messages": "How do I check if a Python object is an instance of a class?", + "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', "gen_ai.request.model": "gpt-4o", "gen_ai.response.model": "response-model-id", "gen_ai.response.streaming": True, @@ -1172,7 +1172,7 @@ async def test_ai_client_span_streaming_responses_async_api( "gen_ai.usage.output_tokens": 10, "gen_ai.usage.output_tokens.reasoning": 8, "gen_ai.usage.total_tokens": 30, - "gen_ai.response.text": '[{"id": "message-id", "content": [{"annotations": [], "text": "the model response", "type": "output_text"}], "role": "assistant", "status": "completed", "type": "message"}]', + "gen_ai.response.text": "the model response", "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -1332,7 +1332,7 @@ def test_streaming_responses_api( assert span["op"] == "gen_ai.responses" if send_default_pii and include_prompts: - assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == "hello" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]' assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" else: assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] @@ -1387,7 +1387,7 @@ async def test_streaming_responses_api_async( assert span["op"] == "gen_ai.responses" if send_default_pii and include_prompts: - assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == "hello" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]' assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" else: assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index a3075e6415..fab8d9e13f 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -582,8 +582,9 @@ def simple_test_tool(message: str) -> str: assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4" assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7 assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0 - assert ai_client_span2["data"]["gen_ai.response.text"] == safe_serialize( - ["Task completed using the tool"] + assert ( + ai_client_span2["data"]["gen_ai.response.text"] + == "Task completed using the tool" ) assert ai_client_span2["data"]["gen_ai.system"] == "openai" assert ai_client_span2["data"]["gen_ai.usage.input_tokens.cached"] == 0