Skip to content

Commit 189052f

Browse files
authored
feat(llmobs): add toolcall and toolresults to messages (#14385)
This introduces support for tool calls and tool results in LLMObs message annotations when using custom instrumentation. The ``LLMObs.annotate()`` method now accept input and output data with optional ``tool_calls`` and ``tool_results`` fields for function calling scenarios. [ Docs update to be merged once this is merged ](DataDog/documentation#31231) ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
1 parent ff56508 commit 189052f

File tree

4 files changed

+241
-12
lines changed

4 files changed

+241
-12
lines changed

ddtrace/llmobs/_llmobs.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1317,14 +1317,20 @@ def annotate(
13171317
`rag_query_variables` - a list of variable key names that contains query
13181318
information for an LLM call
13191319
:param input_data: A single input string, dictionary, or a list of dictionaries based on the span kind:
1320-
- llm spans: accepts a string, or a dictionary of form {"content": "...", "role": "..."},
1321-
or a list of dictionaries with the same signature.
1320+
- llm spans: accepts a string, or a dictionary of form {"content": "...", "role": "...",
1321+
"tool_calls": ..., "tool_results": ...}, where "tool_calls" are an optional
1322+
list of tool call dictionaries with required keys: "name", "arguments", and
1323+
optional keys: "tool_id", "type", and "tool_results" are an optional list of
1324+
tool result dictionaries with required key: "result", and optional keys:
1325+
"name", "tool_id", "type" for function calling scenarios.
13221326
- embedding spans: accepts a string, list of strings, or a dictionary of form
13231327
{"text": "...", ...} or a list of dictionaries with the same signature.
13241328
- other: any JSON serializable type.
13251329
:param output_data: A single output string, dictionary, or a list of dictionaries based on the span kind:
1326-
- llm spans: accepts a string, or a dictionary of form {"content": "...", "role": "..."},
1327-
or a list of dictionaries with the same signature.
1330+
- llm spans: accepts a string, or a dictionary of form {"content": "...", "role": "...",
1331+
"tool_calls": ...}, where "tool_calls" are an optional list of tool call
1332+
dictionaries with required keys: "name", "arguments", and optional keys:
1333+
"tool_id", "type" for function calling scenarios.
13281334
- retrieval spans: a dictionary containing any of the key value pairs
13291335
{"name": str, "id": str, "text": str, "score": float},
13301336
or a list of dictionaries with the same signature.

ddtrace/llmobs/utils.py

Lines changed: 86 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,70 @@
1717

1818
DocumentType = Dict[str, Union[str, int, float]]
1919

20+
21+
def _extract_tool_call(tool_call: Dict[str, Any]) -> "ToolCall":
22+
"""Extract and validate a tool call dictionary."""
23+
if not isinstance(tool_call, dict):
24+
raise TypeError("Each tool_call must be a dictionary.")
25+
26+
# name and arguments are required
27+
name = tool_call.get("name")
28+
arguments = tool_call.get("arguments")
29+
30+
if not name or not isinstance(name, str):
31+
raise TypeError("ToolCall name must be a non-empty string.")
32+
if arguments is None or not isinstance(arguments, dict):
33+
raise TypeError("ToolCall arguments must be a dictionary.")
34+
35+
formatted_tool_call = ToolCall(name=name, arguments=arguments)
36+
37+
# Add optional fields if present
38+
tool_id = tool_call.get("tool_id")
39+
if tool_id and isinstance(tool_id, str):
40+
formatted_tool_call["tool_id"] = tool_id
41+
42+
tool_type = tool_call.get("type")
43+
if tool_type and isinstance(tool_type, str):
44+
formatted_tool_call["type"] = tool_type
45+
46+
return formatted_tool_call
47+
48+
49+
def _extract_tool_result(tool_result: Dict[str, Any]) -> "ToolResult":
50+
"""Extract and validate a tool result dictionary."""
51+
if not isinstance(tool_result, dict):
52+
raise TypeError("Each tool_result must be a dictionary.")
53+
54+
# result is required
55+
result = tool_result.get("result")
56+
if result is None or not isinstance(result, str):
57+
raise TypeError("ToolResult result must be a string.")
58+
59+
formatted_tool_result = ToolResult(result=result)
60+
61+
# Add optional fields if present
62+
name = tool_result.get("name")
63+
if name and isinstance(name, str):
64+
formatted_tool_result["name"] = name
65+
66+
tool_id = tool_result.get("tool_id")
67+
if tool_id and isinstance(tool_id, str):
68+
formatted_tool_result["tool_id"] = tool_id
69+
70+
tool_type = tool_result.get("type")
71+
if tool_type and isinstance(tool_type, str):
72+
formatted_tool_result["type"] = tool_type
73+
74+
return formatted_tool_result
75+
76+
2077
ExportedLLMObsSpan = TypedDict("ExportedLLMObsSpan", {"span_id": str, "trace_id": str})
2178
Document = TypedDict("Document", {"name": str, "id": str, "text": str, "score": float}, total=False)
22-
Message = TypedDict("Message", {"content": str, "role": str}, total=False)
79+
Message = TypedDict(
80+
"Message",
81+
{"content": str, "role": str, "tool_calls": List["ToolCall"], "tool_results": List["ToolResult"]},
82+
total=False,
83+
)
2384
Prompt = TypedDict(
2485
"Prompt",
2586
{
@@ -66,7 +127,7 @@
66127

67128

68129
class Messages:
69-
def __init__(self, messages: Union[List[Dict[str, str]], Dict[str, str], str]):
130+
def __init__(self, messages: Union[List[Dict[str, Any]], Dict[str, Any], str]):
70131
self.messages = []
71132
if not isinstance(messages, list):
72133
messages = [messages] # type: ignore[list-item]
@@ -76,16 +137,33 @@ def __init__(self, messages: Union[List[Dict[str, str]], Dict[str, str], str]):
76137
continue
77138
elif not isinstance(message, dict):
78139
raise TypeError("messages must be a string, dictionary, or list of dictionaries.")
140+
79141
content = message.get("content", "")
80142
role = message.get("role")
81143
if not isinstance(content, str):
82144
raise TypeError("Message content must be a string.")
83-
if not role:
84-
self.messages.append(Message(content=content))
85-
continue
86-
if not isinstance(role, str):
87-
raise TypeError("Message role must be a string, and one of .")
88-
self.messages.append(Message(content=content, role=role))
145+
146+
msg_dict = Message(content=content)
147+
if role:
148+
if not isinstance(role, str):
149+
raise TypeError("Message role must be a string.")
150+
msg_dict["role"] = role
151+
152+
tool_calls = message.get("tool_calls")
153+
if tool_calls is not None:
154+
if not isinstance(tool_calls, list):
155+
raise TypeError("tool_calls must be a list.")
156+
formatted_tool_calls = [_extract_tool_call(tool_call) for tool_call in tool_calls]
157+
msg_dict["tool_calls"] = formatted_tool_calls
158+
159+
tool_results = message.get("tool_results")
160+
if tool_results is not None:
161+
if not isinstance(tool_results, list):
162+
raise TypeError("tool_results must be a list.")
163+
formatted_tool_results = [_extract_tool_result(tool_result) for tool_result in tool_results]
164+
msg_dict["tool_results"] = formatted_tool_results
165+
166+
self.messages.append(msg_dict)
89167

90168

91169
class Documents:
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
features:
3+
- |
4+
llmobs: This introduces support for tool calls and tool results in LLMObs message annotations when using custom instrumentation.
5+
The ``LLMObs.annotate()`` method now accept input and output data with optional
6+
``tool_calls`` and ``tool_results`` fields for function calling scenarios.

tests/llmobs/test_utils.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,145 @@ def test_messages_with_no_role_is_ok():
5656
assert messages.messages == [{"content": "hello"}, {"content": "world"}]
5757

5858

59+
def test_messages_with_tool_calls():
60+
"""Test that messages can include tool calls."""
61+
messages = Messages(
62+
[
63+
{
64+
"content": "I'll help you with that calculation.",
65+
"role": "assistant",
66+
"tool_calls": [
67+
{
68+
"name": "calculator",
69+
"arguments": {"operation": "add", "a": 5, "b": 3},
70+
"tool_id": "call_123",
71+
"type": "function",
72+
}
73+
],
74+
}
75+
]
76+
)
77+
expected = [
78+
{
79+
"content": "I'll help you with that calculation.",
80+
"role": "assistant",
81+
"tool_calls": [
82+
{
83+
"name": "calculator",
84+
"arguments": {"operation": "add", "a": 5, "b": 3},
85+
"tool_id": "call_123",
86+
"type": "function",
87+
}
88+
],
89+
}
90+
]
91+
assert messages.messages == expected
92+
93+
94+
def test_messages_with_tool_results():
95+
"""Test that messages can include tool results."""
96+
messages = Messages(
97+
[
98+
{
99+
"content": "",
100+
"role": "tool",
101+
"tool_results": [
102+
{"name": "calculator", "result": "8", "tool_id": "call_123", "type": "function_result"}
103+
],
104+
}
105+
]
106+
)
107+
expected = [
108+
{
109+
"content": "",
110+
"role": "tool",
111+
"tool_results": [{"name": "calculator", "result": "8", "tool_id": "call_123", "type": "function_result"}],
112+
}
113+
]
114+
assert messages.messages == expected
115+
116+
117+
def test_messages_with_tool_calls_minimal():
118+
"""Test tool calls with only required fields."""
119+
messages = Messages(
120+
[
121+
{
122+
"content": "Using calculator",
123+
"role": "assistant",
124+
"tool_calls": [{"name": "calculator", "arguments": {"x": 10}}],
125+
}
126+
]
127+
)
128+
expected = [
129+
{
130+
"content": "Using calculator",
131+
"role": "assistant",
132+
"tool_calls": [{"name": "calculator", "arguments": {"x": 10}}],
133+
}
134+
]
135+
assert messages.messages == expected
136+
137+
138+
def test_messages_with_tool_results_minimal():
139+
"""Test tool results with only required fields."""
140+
messages = Messages([{"content": "", "role": "tool", "tool_results": [{"result": "Success"}]}])
141+
expected = [{"content": "", "role": "tool", "tool_results": [{"result": "Success"}]}]
142+
assert messages.messages == expected
143+
144+
145+
def test_messages_with_both_tool_calls_and_results():
146+
"""Test that a message can have both tool calls and tool results"""
147+
messages = Messages(
148+
[
149+
{
150+
"content": "Processing...",
151+
"role": "assistant",
152+
"tool_calls": [{"name": "calculator", "arguments": {"x": 5}}],
153+
"tool_results": [{"result": "10"}],
154+
}
155+
]
156+
)
157+
expected = [
158+
{
159+
"content": "Processing...",
160+
"role": "assistant",
161+
"tool_calls": [{"name": "calculator", "arguments": {"x": 5}}],
162+
"tool_results": [{"result": "10"}],
163+
}
164+
]
165+
assert messages.messages == expected
166+
167+
168+
def test_messages_tool_calls_missing_required_fields():
169+
"""Test that tool_calls raise errors when required fields are missing."""
170+
# Missing name field
171+
with pytest.raises(TypeError, match="ToolCall name must be a non-empty string"):
172+
Messages([{"content": "test", "tool_calls": [{"arguments": {"x": 5}}]}])
173+
174+
# Missing arguments field
175+
with pytest.raises(TypeError, match="ToolCall arguments must be a dictionary"):
176+
Messages([{"content": "test", "tool_calls": [{"name": "calculator"}]}])
177+
178+
# Empty name field
179+
with pytest.raises(TypeError, match="ToolCall name must be a non-empty string"):
180+
Messages([{"content": "test", "tool_calls": [{"name": "", "arguments": {"x": 5}}]}])
181+
182+
# Invalid arguments type
183+
with pytest.raises(TypeError, match="ToolCall arguments must be a dictionary"):
184+
Messages([{"content": "test", "tool_calls": [{"name": "calculator", "arguments": "invalid"}]}])
185+
186+
187+
def test_messages_tool_results_missing_required_fields():
188+
"""Test that tool_results raise errors when required fields are missing."""
189+
# Missing result field
190+
with pytest.raises(TypeError, match="ToolResult result must be a string"):
191+
Messages([{"content": "test", "tool_results": [{"name": "calculator"}]}])
192+
193+
# Invalid result type
194+
with pytest.raises(TypeError, match="ToolResult result must be a string"):
195+
Messages([{"content": "test", "tool_results": [{"result": 123}]}])
196+
197+
59198
def test_documents_with_string():
60199
documents = Documents("hello")
61200
assert documents.documents == [{"text": "hello"}]

0 commit comments

Comments
 (0)