Skip to content

Commit bc343f1

Browse files
authored
chore(llmobs): [MLOB-3828] link input and output guardrail spans for openai agents (#14488)
This PR implements span linking for Open AI Agents guardrail spans. For input guardrails, a span link is created from the guardrail span to the first LLM span which is a sibling of the input guardrail (share the same llmobs parent). This implementation prevents a link from the guardrail span to the openai_agents.response span from being created while still linking the guardrail to the Triage Agent (LLM) span. <img width="708" height="264" alt="image" src="https://github.com/user-attachments/assets/35df597a-9d27-464e-b302-9b241e2b4f4d" /> For output guardrails, a span link is created from the last finished LLM span (for a given agent run) to the output guardrail span. <img width="2766" height="178" alt="image" src="https://github.com/user-attachments/assets/35773ad9-d512-4be5-8db9-8a559ab29047" /> With this implementation, we get a trace like the following which better represents the guardrail's functionality of checking the user input to the first agent and the final output of the last agent. <img width="488" height="1296" alt="image" src="https://github.com/user-attachments/assets/4aac3564-d9bb-491e-9155-5847cec1f8ce" /> This PR also removes the experimental `_record_object` method from the LLMObs service since it is not being used anywhere. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
1 parent b2b6813 commit bc343f1

File tree

7 files changed

+361
-67
lines changed

7 files changed

+361
-67
lines changed

ddtrace/llmobs/_constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@
9393
DISPATCH_ON_LLM_TOOL_CHOICE = "on_llm_tool_choice"
9494
DISPATCH_ON_TOOL_CALL = "on_tool_call"
9595

96+
DISPATCH_ON_GUARDRAIL_SPAN_START = "on_guardrail_span_start"
97+
DISPATCH_ON_LLM_SPAN_FINISH = "on_llm_span_finish"
98+
DISPATCH_ON_OPENAI_AGENT_SPAN_FINISH = "on_openai_agent_span_finish"
99+
96100
# Tool call arguments are used to lookup the associated tool call info.
97101
# When there are no tool call args, we use this as a place-holder lookup key
98102
OAI_HANDOFF_TOOL_ARG = "{}"

ddtrace/llmobs/_integrations/openai_agents.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
from ddtrace.internal.utils import get_argument_value
1313
from ddtrace.internal.utils.formats import format_trace_id
1414
from ddtrace.llmobs._constants import AGENT_MANIFEST
15+
from ddtrace.llmobs._constants import DISPATCH_ON_GUARDRAIL_SPAN_START
1516
from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE
17+
from ddtrace.llmobs._constants import DISPATCH_ON_OPENAI_AGENT_SPAN_FINISH
1618
from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL
1719
from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED
1820
from ddtrace.llmobs._constants import INPUT_MESSAGES
@@ -79,6 +81,10 @@ def trace(
7981
elif oai_span:
8082
self.oai_to_llmobs_span[oai_span.span_id] = llmobs_span
8183
self._llmobs_update_trace_info_input(oai_span, llmobs_span)
84+
85+
if oai_span.span_type == "guardrail":
86+
core.dispatch(DISPATCH_ON_GUARDRAIL_SPAN_START, (llmobs_span,))
87+
8288
return llmobs_span
8389

8490
def _llmobs_set_tags(
@@ -131,6 +137,7 @@ def _llmobs_set_tags(
131137
self._llmobs_set_handoff_attributes(span, oai_span)
132138
elif span_type == "agent":
133139
self._llmobs_set_agent_attributes(span, oai_span)
140+
core.dispatch(DISPATCH_ON_OPENAI_AGENT_SPAN_FINISH, ())
134141
elif span_type == "custom":
135142
custom_data = oai_span.formatted_custom_data
136143
if custom_data:

ddtrace/llmobs/_llmobs.py

Lines changed: 20 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,10 @@
4848
from ddtrace.llmobs._constants import ANNOTATIONS_CONTEXT_ID
4949
from ddtrace.llmobs._constants import DECORATOR
5050
from ddtrace.llmobs._constants import DEFAULT_PROJECT_NAME
51+
from ddtrace.llmobs._constants import DISPATCH_ON_GUARDRAIL_SPAN_START
52+
from ddtrace.llmobs._constants import DISPATCH_ON_LLM_SPAN_FINISH
5153
from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE
54+
from ddtrace.llmobs._constants import DISPATCH_ON_OPENAI_AGENT_SPAN_FINISH
5255
from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL
5356
from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED
5457
from ddtrace.llmobs._constants import EXPERIMENT_CSV_FIELD_MAX_SIZE
@@ -91,7 +94,6 @@
9194
from ddtrace.llmobs._experiment import JSONType
9295
from ddtrace.llmobs._utils import AnnotationContext
9396
from ddtrace.llmobs._utils import LinkTracker
94-
from ddtrace.llmobs._utils import ToolCallTracker
9597
from ddtrace.llmobs._utils import _get_ml_app
9698
from ddtrace.llmobs._utils import _get_nearest_llmobs_ancestor
9799
from ddtrace.llmobs._utils import _get_session_id
@@ -219,8 +221,6 @@ def __init__(
219221
self._annotations: List[Tuple[str, str, Dict[str, Any]]] = []
220222
self._annotation_context_lock = forksafe.RLock()
221223

222-
self._tool_call_tracker = ToolCallTracker()
223-
224224
def _on_span_start(self, span: Span) -> None:
225225
if self.enabled and span.span_type == SpanTypes.LLM:
226226
self._activate_llmobs_span(span)
@@ -256,6 +256,9 @@ def _llmobs_span_event(self, span: Span) -> Optional[LLMObsSpanEvent]:
256256
if not span_kind:
257257
raise KeyError("Span kind not found in span context")
258258

259+
if span_kind == "llm":
260+
core.dispatch(DISPATCH_ON_LLM_SPAN_FINISH, (span,))
261+
259262
llmobs_span = LLMObsSpan()
260263
_dd_attrs = {
261264
"span_id": str(span.span_id),
@@ -501,9 +504,13 @@ def _stop_service(self) -> None:
501504
core.reset_listeners("threading.submit", self._current_trace_context)
502505
core.reset_listeners("threading.execution", self._llmobs_context_provider.activate)
503506

504-
core.reset_listeners(DISPATCH_ON_LLM_TOOL_CHOICE, self._tool_call_tracker.on_llm_tool_choice)
505-
core.reset_listeners(DISPATCH_ON_TOOL_CALL, self._tool_call_tracker.on_tool_call)
506-
core.reset_listeners(DISPATCH_ON_TOOL_CALL_OUTPUT_USED, self._tool_call_tracker.on_tool_call_output_used)
507+
core.reset_listeners(DISPATCH_ON_LLM_TOOL_CHOICE, self._link_tracker.on_llm_tool_choice)
508+
core.reset_listeners(DISPATCH_ON_TOOL_CALL, self._link_tracker.on_tool_call)
509+
core.reset_listeners(DISPATCH_ON_TOOL_CALL_OUTPUT_USED, self._link_tracker.on_tool_call_output_used)
510+
511+
core.reset_listeners(DISPATCH_ON_GUARDRAIL_SPAN_START, self._link_tracker.on_guardrail_span_start)
512+
core.reset_listeners(DISPATCH_ON_LLM_SPAN_FINISH, self._link_tracker.on_llm_span_finish)
513+
core.reset_listeners(DISPATCH_ON_OPENAI_AGENT_SPAN_FINISH, self._link_tracker.on_openai_agent_span_finish)
507514

508515
forksafe.unregister(self._child_after_fork)
509516

@@ -614,9 +621,13 @@ def enable(
614621
core.on("threading.submit", cls._instance._current_trace_context, "llmobs_ctx")
615622
core.on("threading.execution", cls._instance._llmobs_context_provider.activate)
616623

617-
core.on(DISPATCH_ON_LLM_TOOL_CHOICE, cls._instance._tool_call_tracker.on_llm_tool_choice)
618-
core.on(DISPATCH_ON_TOOL_CALL, cls._instance._tool_call_tracker.on_tool_call)
619-
core.on(DISPATCH_ON_TOOL_CALL_OUTPUT_USED, cls._instance._tool_call_tracker.on_tool_call_output_used)
624+
core.on(DISPATCH_ON_LLM_TOOL_CHOICE, cls._instance._link_tracker.on_llm_tool_choice)
625+
core.on(DISPATCH_ON_TOOL_CALL, cls._instance._link_tracker.on_tool_call)
626+
core.on(DISPATCH_ON_TOOL_CALL_OUTPUT_USED, cls._instance._link_tracker.on_tool_call_output_used)
627+
628+
core.on(DISPATCH_ON_GUARDRAIL_SPAN_START, cls._instance._link_tracker.on_guardrail_span_start)
629+
core.on(DISPATCH_ON_LLM_SPAN_FINISH, cls._instance._link_tracker.on_llm_span_finish)
630+
core.on(DISPATCH_ON_OPENAI_AGENT_SPAN_FINISH, cls._instance._link_tracker.on_openai_agent_span_finish)
620631

621632
atexit.register(cls.disable)
622633
telemetry_writer.product_activated(TELEMETRY_APM_PRODUCT.LLMOBS, True)
@@ -811,41 +822,6 @@ def disable(cls) -> None:
811822

812823
log.debug("%s disabled", cls.__name__)
813824

814-
def _record_object(self, span, obj, input_or_output):
815-
if obj is None:
816-
return
817-
span_links = []
818-
for span_link in self._link_tracker.get_span_links_from_object(obj):
819-
try:
820-
if span_link["attributes"]["from"] == "input" and input_or_output == "output":
821-
continue
822-
except KeyError:
823-
log.debug("failed to read span link: ", span_link)
824-
continue
825-
span_links.append(
826-
{
827-
"trace_id": span_link["trace_id"],
828-
"span_id": span_link["span_id"],
829-
"attributes": {
830-
"from": span_link["attributes"]["from"],
831-
"to": input_or_output,
832-
},
833-
}
834-
)
835-
self._tag_span_links(span, span_links)
836-
self._link_tracker.add_span_links_to_object(
837-
obj,
838-
[
839-
{
840-
"trace_id": self.export_span(span)["trace_id"],
841-
"span_id": self.export_span(span)["span_id"],
842-
"attributes": {
843-
"from": input_or_output,
844-
},
845-
}
846-
],
847-
)
848-
849825
def _tag_span_links(self, span, span_links):
850826
if not span_links:
851827
return

ddtrace/llmobs/_utils.py

Lines changed: 55 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing import Dict
66
from typing import List
77
from typing import Optional
8+
from typing import Set
89
from typing import Tuple
910
from typing import Union
1011

@@ -88,23 +89,6 @@ def validate_prompt(prompt: dict) -> Dict[str, Union[str, dict, List[str]]]:
8889
return validated_prompt
8990

9091

91-
class LinkTracker:
92-
def __init__(self, object_span_links=None):
93-
self._object_span_links = object_span_links or {}
94-
95-
def get_object_id(self, obj):
96-
return f"{type(obj).__name__}_{id(obj)}"
97-
98-
def add_span_links_to_object(self, obj, span_links):
99-
obj_id = self.get_object_id(obj)
100-
if obj_id not in self._object_span_links:
101-
self._object_span_links[obj_id] = []
102-
self._object_span_links[obj_id] += span_links
103-
104-
def get_span_links_from_object(self, obj):
105-
return self._object_span_links.get(self.get_object_id(obj), [])
106-
107-
10892
class AnnotationContext:
10993
def __init__(self, _register_annotator, _deregister_annotator):
11094
self._register_annotator = _register_annotator
@@ -298,12 +282,20 @@ class TrackedToolCall:
298282
tool_kind: str = "function" # one of "function", "handoff"
299283

300284

301-
class ToolCallTracker:
302-
"""Used to track tool data and their associated llm/tool spans for span linking."""
285+
class LinkTracker:
286+
"""
287+
This class is used to create span links across integrations.
288+
289+
The primary use cases are:
290+
- Linking LLM spans to their associated tool spans and vice versa
291+
- Linking LLM spans to their associated guardrail spans and vice versa
292+
"""
303293

304294
def __init__(self):
305295
self._tool_calls: Dict[str, TrackedToolCall] = {} # maps tool id's to tool call data
306296
self._lookup_tool_id: Dict[Tuple[str, str], str] = {} # maps (tool_name, arguments) to tool id's
297+
self._active_guardrail_spans: Set[Span] = set()
298+
self._last_llm_span: Optional[Span] = None
307299

308300
def on_llm_tool_choice(
309301
self, tool_id: str, tool_name: str, arguments: str, llm_span_context: Dict[str, str]
@@ -379,3 +371,47 @@ def on_tool_call_output_used(self, tool_id: str, llm_span: Span) -> None:
379371
"output",
380372
"input",
381373
)
374+
375+
def on_llm_span_finish(self, span: Span) -> None:
376+
"""
377+
Called when an LLM span event is created. If the LLM span is the first LLM span,
378+
it will consume all active guardrail links.
379+
"""
380+
self._last_llm_span = span
381+
spans_to_remove = set()
382+
for guardrail_span in self._active_guardrail_spans:
383+
# some guardrail spans may have LLM spans as children which we don't want to link to
384+
if _get_nearest_llmobs_ancestor(guardrail_span) == _get_nearest_llmobs_ancestor(span):
385+
add_span_link(
386+
span,
387+
str(guardrail_span.span_id),
388+
format_trace_id(guardrail_span.trace_id),
389+
"output",
390+
"input",
391+
)
392+
spans_to_remove.add(guardrail_span)
393+
self._active_guardrail_spans -= spans_to_remove
394+
395+
def on_guardrail_span_start(self, span: Span) -> None:
396+
"""
397+
Called when a guardrail span starts. This is used to track the active guardrail
398+
spans and link the output of the last LLM span to the input of the guardrail span.
399+
"""
400+
self._active_guardrail_spans.add(span)
401+
if self._last_llm_span is not None and _get_nearest_llmobs_ancestor(span) == _get_nearest_llmobs_ancestor(
402+
self._last_llm_span
403+
):
404+
add_span_link(
405+
span,
406+
str(self._last_llm_span.span_id),
407+
format_trace_id(self._last_llm_span.trace_id),
408+
"output",
409+
"input",
410+
)
411+
412+
def on_openai_agent_span_finish(self) -> None:
413+
"""
414+
Called when an OpenAI agent span finishes. This is used to reset the last LLM span
415+
since output guardrails are only linked to the last LLM span for a particular agent.
416+
"""
417+
self._last_llm_span = None

0 commit comments

Comments
 (0)