Skip to content

Commit fca6472

Browse files
dot-agiDwij1704
andauthored
fix: Agents SDK tool calls, prompt responses and other data (#1005)
* correct min version for instrumentation * get tool calls in the agents sdk * record all the entities and the llm prompts * remove duplicate LLM calls * keep a single agent span * cleanup * correct import and variable name * remove context propagation and fix issue with llm attributes * linting * some more cleanup * its `FUNCTION_SPAN_ATTRIBUTES` * forgot this one but now we good * refactor for responses instructions attribute * fix tests * get correct library version * oops --------- Co-authored-by: Dwij <[email protected]>
1 parent 0267c6f commit fca6472

File tree

8 files changed

+334
-73
lines changed

8 files changed

+334
-73
lines changed

agentops/instrumentation/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ class InstrumentorConfig(TypedDict):
169169
"agents": {
170170
"module_name": "agentops.instrumentation.openai_agents",
171171
"class_name": "OpenAIAgentsInstrumentor",
172-
"min_version": "0.1.0",
172+
"min_version": "0.0.1",
173173
},
174174
}
175175

agentops/instrumentation/openai/attributes/response.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@
8383
SpanAttributes.LLM_RESPONSE_ID: "id",
8484
SpanAttributes.LLM_REQUEST_MODEL: "model",
8585
SpanAttributes.LLM_RESPONSE_MODEL: "model",
86-
SpanAttributes.LLM_PROMPTS: "instructions",
86+
SpanAttributes.LLM_OPENAI_RESPONSE_INSTRUCTIONS: "instructions",
8787
SpanAttributes.LLM_REQUEST_MAX_TOKENS: "max_output_tokens",
8888
SpanAttributes.LLM_REQUEST_TEMPERATURE: "temperature",
8989
SpanAttributes.LLM_REQUEST_TOP_P: "top_p",

agentops/instrumentation/openai_agents/attributes/common.py

Lines changed: 236 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,25 @@
55
for extracting and formatting attributes according to OpenTelemetry semantic conventions.
66
"""
77

8-
from typing import Any
8+
from typing import Any, List, Dict, Optional
99
from agentops.logging import logger
10-
from agentops.semconv import AgentAttributes, WorkflowAttributes, SpanAttributes, InstrumentationAttributes
10+
from agentops.semconv import (
11+
AgentAttributes,
12+
WorkflowAttributes,
13+
SpanAttributes,
14+
InstrumentationAttributes,
15+
ToolAttributes,
16+
AgentOpsSpanKindValues,
17+
ToolStatus,
18+
)
19+
from agentops.helpers import safe_serialize # Import safe_serialize
1120

1221
from agentops.instrumentation.common import AttributeMap, _extract_attributes_from_mapping
1322
from agentops.instrumentation.common.attributes import get_common_attributes
1423
from agentops.instrumentation.common.objects import get_uploaded_object_attributes
1524
from agentops.instrumentation.openai.attributes.response import get_response_response_attributes
1625
from agentops.instrumentation.openai_agents import LIBRARY_NAME, LIBRARY_VERSION
26+
1727
from agentops.instrumentation.openai_agents.attributes.model import (
1828
get_model_attributes,
1929
get_model_config_attributes,
@@ -33,9 +43,10 @@
3343

3444
# Attribute mapping for FunctionSpanData
3545
FUNCTION_SPAN_ATTRIBUTES: AttributeMap = {
36-
AgentAttributes.AGENT_NAME: "name",
37-
WorkflowAttributes.WORKFLOW_INPUT: "input",
38-
WorkflowAttributes.FINAL_OUTPUT: "output",
46+
ToolAttributes.TOOL_NAME: "name",
47+
ToolAttributes.TOOL_PARAMETERS: "input",
48+
ToolAttributes.TOOL_RESULT: "output",
49+
# AgentAttributes.AGENT_NAME: "name",
3950
AgentAttributes.FROM_AGENT: "from_agent",
4051
}
4152

@@ -55,7 +66,9 @@
5566

5667
# Attribute mapping for ResponseSpanData
5768
RESPONSE_SPAN_ATTRIBUTES: AttributeMap = {
58-
WorkflowAttributes.WORKFLOW_INPUT: "input",
69+
# Don't map input here as it causes double serialization
70+
# We handle prompts manually in get_response_span_attributes
71+
SpanAttributes.LLM_RESPONSE_MODEL: "model",
5972
}
6073

6174

@@ -80,6 +93,72 @@
8093
}
8194

8295

96+
def _get_llm_messages_attributes(messages: Optional[List[Dict]], attribute_base: str) -> AttributeMap:
97+
"""
98+
Extracts attributes from a list of message dictionaries (e.g., prompts or completions).
99+
Uses the attribute_base to format the specific attribute keys.
100+
"""
101+
attributes: AttributeMap = {}
102+
if not messages:
103+
return attributes
104+
if not isinstance(messages, list):
105+
logger.warning(
106+
f"[_get_llm_messages_attributes] Expected a list of messages for base '{attribute_base}', got {type(messages)}. Value: {safe_serialize(messages)}. Returning empty."
107+
)
108+
return attributes
109+
110+
for i, msg_dict in enumerate(messages):
111+
if isinstance(msg_dict, dict):
112+
role = msg_dict.get("role")
113+
content = msg_dict.get("content")
114+
name = msg_dict.get("name")
115+
tool_calls = msg_dict.get("tool_calls")
116+
tool_call_id = msg_dict.get("tool_call_id")
117+
118+
# Common role and content
119+
if role:
120+
attributes[f"{attribute_base}.{i}.role"] = str(role)
121+
if content is not None:
122+
attributes[f"{attribute_base}.{i}.content"] = safe_serialize(content)
123+
124+
# Optional name for some roles
125+
if name:
126+
attributes[f"{attribute_base}.{i}.name"] = str(name)
127+
128+
# Tool calls (specific to assistant messages)
129+
if tool_calls and isinstance(tool_calls, list):
130+
for tc_idx, tc_dict in enumerate(tool_calls):
131+
if isinstance(tc_dict, dict):
132+
tc_id = tc_dict.get("id")
133+
tc_type = tc_dict.get("type")
134+
tc_function_data = tc_dict.get("function")
135+
136+
if tc_function_data and isinstance(tc_function_data, dict):
137+
tc_func_name = tc_function_data.get("name")
138+
tc_func_args = tc_function_data.get("arguments")
139+
140+
base_tool_call_key_formatted = f"{attribute_base}.{i}.tool_calls.{tc_idx}"
141+
if tc_id:
142+
attributes[f"{base_tool_call_key_formatted}.id"] = str(tc_id)
143+
if tc_type:
144+
attributes[f"{base_tool_call_key_formatted}.type"] = str(tc_type)
145+
if tc_func_name:
146+
attributes[f"{base_tool_call_key_formatted}.function.name"] = str(tc_func_name)
147+
if tc_func_args is not None:
148+
attributes[f"{base_tool_call_key_formatted}.function.arguments"] = safe_serialize(
149+
tc_func_args
150+
)
151+
152+
# Tool call ID (specific to tool_call_output messages)
153+
if tool_call_id:
154+
attributes[f"{attribute_base}.{i}.tool_call_id"] = str(tool_call_id)
155+
else:
156+
# If a message is not a dict, serialize its representation
157+
attributes[f"{attribute_base}.{i}.content"] = safe_serialize(msg_dict)
158+
159+
return attributes
160+
161+
83162
def get_common_instrumentation_attributes() -> AttributeMap:
84163
"""Get common instrumentation attributes for the OpenAI Agents instrumentation.
85164
@@ -109,9 +188,22 @@ def get_agent_span_attributes(span_data: Any) -> AttributeMap:
109188
Returns:
110189
Dictionary of attributes for agent span
111190
"""
112-
attributes = _extract_attributes_from_mapping(span_data, AGENT_SPAN_ATTRIBUTES)
191+
attributes = {}
113192
attributes.update(get_common_attributes())
114193

194+
attributes[SpanAttributes.AGENTOPS_SPAN_KIND] = AgentOpsSpanKindValues.AGENT.value
195+
196+
# Get agent name directly from span_data
197+
if hasattr(span_data, "name") and span_data.name:
198+
attributes[AgentAttributes.AGENT_NAME] = str(span_data.name)
199+
200+
# Get handoffs directly from span_data
201+
if hasattr(span_data, "handoffs") and span_data.handoffs:
202+
attributes[AgentAttributes.HANDOFFS] = safe_serialize(span_data.handoffs)
203+
204+
if hasattr(span_data, "tools") and span_data.tools:
205+
attributes[AgentAttributes.AGENT_TOOLS] = safe_serialize([str(getattr(t, "name", t)) for t in span_data.tools])
206+
115207
return attributes
116208

117209

@@ -128,6 +220,20 @@ def get_function_span_attributes(span_data: Any) -> AttributeMap:
128220
"""
129221
attributes = _extract_attributes_from_mapping(span_data, FUNCTION_SPAN_ATTRIBUTES)
130222
attributes.update(get_common_attributes())
223+
attributes[SpanAttributes.AGENTOPS_SPAN_KIND] = AgentOpsSpanKindValues.TOOL.value
224+
225+
# Determine tool status based on presence of error
226+
if hasattr(span_data, "error") and span_data.error:
227+
attributes[ToolAttributes.TOOL_STATUS] = ToolStatus.FAILED.value
228+
else:
229+
if hasattr(span_data, "output") and span_data.output is not None:
230+
attributes[ToolAttributes.TOOL_STATUS] = ToolStatus.SUCCEEDED.value
231+
else:
232+
# Status will be set by exporter based on span lifecycle
233+
pass
234+
235+
if hasattr(span_data, "from_agent") and span_data.from_agent:
236+
attributes[f"{AgentAttributes.AGENT}.calling_tool.name"] = str(span_data.from_agent)
131237

132238
return attributes
133239

@@ -149,6 +255,66 @@ def get_handoff_span_attributes(span_data: Any) -> AttributeMap:
149255
return attributes
150256

151257

258+
def _extract_text_from_content(content: Any) -> Optional[str]:
259+
"""Extract text from various content formats used in the Responses API.
260+
261+
Args:
262+
content: Content in various formats (str, dict, list)
263+
264+
Returns:
265+
Extracted text or None if no text found
266+
"""
267+
if isinstance(content, str):
268+
return content
269+
270+
if isinstance(content, dict):
271+
# Direct text field
272+
if "text" in content:
273+
return content["text"]
274+
# Output text type
275+
if content.get("type") == "output_text":
276+
return content.get("text", "")
277+
278+
if isinstance(content, list):
279+
text_parts = []
280+
for item in content:
281+
extracted = _extract_text_from_content(item)
282+
if extracted:
283+
text_parts.append(extracted)
284+
return " ".join(text_parts) if text_parts else None
285+
286+
return None
287+
288+
289+
def _build_prompt_messages_from_input(input_data: Any) -> List[Dict[str, Any]]:
290+
"""Build prompt messages from various input formats.
291+
292+
Args:
293+
input_data: Input data from span_data.input
294+
295+
Returns:
296+
List of message dictionaries with role and content
297+
"""
298+
messages = []
299+
300+
if isinstance(input_data, str):
301+
# Single string input - assume it's a user message
302+
messages.append({"role": "user", "content": input_data})
303+
304+
elif isinstance(input_data, list):
305+
for msg in input_data:
306+
if isinstance(msg, dict):
307+
role = msg.get("role")
308+
content = msg.get("content")
309+
310+
if role and content is not None:
311+
extracted_text = _extract_text_from_content(content)
312+
if extracted_text:
313+
messages.append({"role": role, "content": extracted_text})
314+
315+
return messages
316+
317+
152318
def get_response_span_attributes(span_data: Any) -> AttributeMap:
153319
"""Extract attributes from a ResponseSpanData object with full LLM response processing.
154320
@@ -170,8 +336,43 @@ def get_response_span_attributes(span_data: Any) -> AttributeMap:
170336
attributes = _extract_attributes_from_mapping(span_data, RESPONSE_SPAN_ATTRIBUTES)
171337
attributes.update(get_common_attributes())
172338

339+
# Process response attributes first to get all response data including instructions
173340
if span_data.response:
174-
attributes.update(get_response_response_attributes(span_data.response))
341+
response_attrs = get_response_response_attributes(span_data.response)
342+
343+
# Extract system prompt if present
344+
system_prompt = response_attrs.get(SpanAttributes.LLM_OPENAI_RESPONSE_INSTRUCTIONS)
345+
346+
prompt_messages = []
347+
# Add system prompt as first message if available
348+
if system_prompt:
349+
prompt_messages.append({"role": "system", "content": system_prompt})
350+
# Remove from response attrs to avoid duplication
351+
response_attrs.pop(SpanAttributes.LLM_OPENAI_RESPONSE_INSTRUCTIONS, None)
352+
353+
# Add conversation history from input
354+
if hasattr(span_data, "input") and span_data.input:
355+
prompt_messages.extend(_build_prompt_messages_from_input(span_data.input))
356+
357+
# Format prompts using existing function
358+
if prompt_messages:
359+
attributes.update(_get_llm_messages_attributes(prompt_messages, "gen_ai.prompt"))
360+
361+
# Remove any prompt-related attributes that might have been set by response processing
362+
response_attrs = {
363+
k: v for k, v in response_attrs.items() if not k.startswith("gen_ai.prompt") and k != "gen_ai.request.tools"
364+
}
365+
366+
# Add remaining response attributes
367+
attributes.update(response_attrs)
368+
else:
369+
# No response object, just process input as prompts
370+
if hasattr(span_data, "input") and span_data.input:
371+
prompt_messages = _build_prompt_messages_from_input(span_data.input)
372+
if prompt_messages:
373+
attributes.update(_get_llm_messages_attributes(prompt_messages, "gen_ai.prompt"))
374+
375+
attributes[SpanAttributes.AGENTOPS_SPAN_KIND] = AgentOpsSpanKindValues.LLM.value
175376

176377
return attributes
177378

@@ -181,12 +382,6 @@ def get_generation_span_attributes(span_data: Any) -> AttributeMap:
181382
182383
Generations are requests made to the `openai.completions` endpoint.
183384
184-
# TODO this has not been extensively tested yet as there is a flag that needs ot be set to use the
185-
# completions API with the Agents SDK.
186-
# We can enable chat.completions API by calling:
187-
# `from agents import set_default_openai_api`
188-
# `set_default_openai_api("chat_completions")`
189-
190385
Args:
191386
span_data: The GenerationSpanData object
192387
@@ -196,17 +391,42 @@ def get_generation_span_attributes(span_data: Any) -> AttributeMap:
196391
attributes = _extract_attributes_from_mapping(span_data, GENERATION_SPAN_ATTRIBUTES)
197392
attributes.update(get_common_attributes())
198393

394+
if SpanAttributes.LLM_PROMPTS in attributes:
395+
raw_prompt_input = attributes.pop(SpanAttributes.LLM_PROMPTS)
396+
formatted_prompt_for_llm = []
397+
if isinstance(raw_prompt_input, str):
398+
formatted_prompt_for_llm.append({"role": "user", "content": raw_prompt_input})
399+
elif isinstance(raw_prompt_input, list):
400+
temp_formatted_list = []
401+
all_strings_or_dicts = True
402+
for item in raw_prompt_input:
403+
if isinstance(item, str):
404+
temp_formatted_list.append({"role": "user", "content": item})
405+
elif isinstance(item, dict):
406+
temp_formatted_list.append(item)
407+
else:
408+
all_strings_or_dicts = False
409+
break
410+
if all_strings_or_dicts:
411+
formatted_prompt_for_llm = temp_formatted_list
412+
else:
413+
logger.warning(
414+
f"[get_generation_span_attributes] span_data.input was a list with mixed/unexpected content: {safe_serialize(raw_prompt_input)}"
415+
)
416+
417+
if formatted_prompt_for_llm:
418+
attributes.update(_get_llm_messages_attributes(formatted_prompt_for_llm, "gen_ai.prompt"))
419+
199420
if span_data.model:
200421
attributes.update(get_model_attributes(span_data.model))
201422

202-
# Process output for GenerationSpanData if available
203423
if span_data.output:
204424
attributes.update(get_generation_output_attributes(span_data.output))
205425

206-
# Add model config attributes if present
207426
if span_data.model_config:
208427
attributes.update(get_model_config_attributes(span_data.model_config))
209428

429+
attributes[SpanAttributes.AGENTOPS_SPAN_KIND] = AgentOpsSpanKindValues.LLM.value
210430
return attributes
211431

212432

0 commit comments

Comments
 (0)