Skip to content

Commit fe23980

Browse files
authored
chore(llmobs): loosen restrictions on experiment spans IO (#14266)
experiments spans' IO should be free form, acceptive of anything from structured data to non structured data before https://dddev.datadoghq.com/llm/experiments/0a623ad2-5a4f-4713-9bea-8b614ec640d9 <img width="1741" height="336" alt="image" src="https://github.com/user-attachments/assets/24ff5bfb-be4b-4df7-94f4-a80a938fa67a" /> after https://dddev.datadoghq.com/llm/experiments/0b7eb94d-b3de-49e8-8c9b-90fa167152b9 <img width="1748" height="273" alt="image" src="https://github.com/user-attachments/assets/92a7fd31-e8ce-40fb-8d7c-40ca7b03ffb7" /> ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
1 parent 2aa7700 commit fe23980

File tree

4 files changed

+35
-7
lines changed

4 files changed

+35
-7
lines changed

ddtrace/llmobs/_constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,4 +103,6 @@
103103

104104
EXPERIMENT_ID_KEY = "_ml_obs.experiment_id"
105105
EXPERIMENT_EXPECTED_OUTPUT = "_ml_obs.meta.input.expected_output"
106+
EXPERIMENTS_INPUT = "_ml_obs.meta.input"
107+
EXPERIMENTS_OUTPUT = "_ml_obs.meta.output"
106108
DEFAULT_PROJECT_NAME = "default-project"

ddtrace/llmobs/_experiment.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from ddtrace.llmobs._constants import DD_SITES_NEEDING_APP_SUBDOMAIN
2828
from ddtrace.llmobs._constants import EXPERIMENT_EXPECTED_OUTPUT
2929
from ddtrace.llmobs._utils import convert_tags_dict_to_list
30+
from ddtrace.llmobs._utils import safe_json
3031

3132

3233
if TYPE_CHECKING:
@@ -365,7 +366,7 @@ def _process_record(self, idx_record: Tuple[int, DatasetRecord]) -> Optional[Tas
365366
except Exception:
366367
span.set_exc_info(*sys.exc_info())
367368
self._llmobs_instance.annotate(span, input_data=input_data, output_data=output_data, tags=tags)
368-
span._set_ctx_item(EXPERIMENT_EXPECTED_OUTPUT, record["expected_output"])
369+
span._set_ctx_item(EXPERIMENT_EXPECTED_OUTPUT, safe_json(record["expected_output"]))
369370
return {
370371
"idx": idx,
371372
"span_id": span_id,

ddtrace/llmobs/_llmobs.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@
5454
from ddtrace.llmobs._constants import EXPERIMENT_CSV_FIELD_MAX_SIZE
5555
from ddtrace.llmobs._constants import EXPERIMENT_EXPECTED_OUTPUT
5656
from ddtrace.llmobs._constants import EXPERIMENT_ID_KEY
57+
from ddtrace.llmobs._constants import EXPERIMENTS_INPUT
58+
from ddtrace.llmobs._constants import EXPERIMENTS_OUTPUT
5759
from ddtrace.llmobs._constants import INPUT_DOCUMENTS
5860
from ddtrace.llmobs._constants import INPUT_MESSAGES
5961
from ddtrace.llmobs._constants import INPUT_PROMPT
@@ -278,9 +280,18 @@ def _llmobs_span_event(self, span: Span) -> Optional[LLMObsSpanEvent]:
278280

279281
if span.context.get_baggage_item(EXPERIMENT_ID_KEY):
280282
_dd_attrs["scope"] = "experiments"
281-
expected_output = span._get_ctx_item(EXPERIMENT_EXPECTED_OUTPUT)
282-
if span_kind == "experiment" and expected_output:
283-
meta["expected_output"] = expected_output
283+
if span_kind == "experiment":
284+
expected_output = span._get_ctx_item(EXPERIMENT_EXPECTED_OUTPUT)
285+
if expected_output:
286+
meta["expected_output"] = expected_output
287+
288+
input_data = span._get_ctx_item(EXPERIMENTS_INPUT)
289+
if input_data:
290+
meta["input"] = input_data
291+
292+
output_data = span._get_ctx_item(EXPERIMENTS_OUTPUT)
293+
if output_data:
294+
meta["output"] = output_data
284295

285296
input_messages = span._get_ctx_item(INPUT_MESSAGES)
286297
if span_kind == "llm" and input_messages is not None:
@@ -1366,6 +1377,8 @@ def annotate(
13661377
error = cls._tag_embedding_io(span, input_documents=input_data, output_text=output_data)
13671378
elif span_kind == "retrieval":
13681379
error = cls._tag_retrieval_io(span, input_text=input_data, output_documents=output_data)
1380+
elif span_kind == "experiment":
1381+
cls._tag_freeform_io(span, input_value=input_data, output_value=output_data)
13691382
else:
13701383
cls._tag_text_io(span, input_value=input_data, output_value=output_data)
13711384
finally:
@@ -1447,6 +1460,18 @@ def _tag_text_io(cls, span, input_value=None, output_value=None):
14471460
if output_value is not None:
14481461
span._set_ctx_item(OUTPUT_VALUE, safe_json(output_value))
14491462

1463+
@classmethod
1464+
def _tag_freeform_io(cls, span, input_value=None, output_value=None):
1465+
"""Tags input/output values for experient spans.
1466+
Will be mapped to span's `meta.{input,output}` fields.
1467+
this is meant to be non restrictive on user's data, experiments allow
1468+
arbitrary structured or non structured IO values in its spans
1469+
"""
1470+
if input_value is not None:
1471+
span._set_ctx_item(EXPERIMENTS_INPUT, safe_json(input_value))
1472+
if output_value is not None:
1473+
span._set_ctx_item(EXPERIMENTS_OUTPUT, safe_json(output_value))
1474+
14501475
@staticmethod
14511476
def _set_dict_attribute(span: Span, key, value: Dict[str, Any]) -> None:
14521477
"""Sets a given LLM Obs span attribute with a dictionary key/values.

tests/llmobs/test_experiments.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1125,9 +1125,9 @@ def test_experiment_span_written_to_experiment_scope(llmobs, llmobs_events, test
11251125
for key in ("span_id", "trace_id", "parent_id", "start_ns", "duration", "metrics"):
11261126
assert event[key] == mock.ANY
11271127
assert event["status"] == "ok"
1128-
assert event["meta"]["input"] == {"value": '{"prompt": "What is the capital of France?"}'}
1129-
assert event["meta"]["output"] == {"value": '{"prompt": "What is the capital of France?"}'}
1130-
assert event["meta"]["expected_output"] == {"answer": "Paris"}
1128+
assert event["meta"]["input"] == '{"prompt": "What is the capital of France?"}'
1129+
assert event["meta"]["output"] == '{"prompt": "What is the capital of France?"}'
1130+
assert event["meta"]["expected_output"] == '{"answer": "Paris"}'
11311131
assert "dataset_id:{}".format(test_dataset_one_record._id) in event["tags"]
11321132
assert "dataset_record_id:{}".format(test_dataset_one_record._records[0]["record_id"]) in event["tags"]
11331133
assert "experiment_id:1234567890" in event["tags"]

0 commit comments

Comments
 (0)