Skip to content

Commit 2aa7700

Browse files
authored
chore(llmobs): fix deletion conflicts with insertions (#14255)
consider a case where a dataset is created, and the local state is being manipulated, specifically: 1. records are inserted / appended 2. appended records are then deleted 3. then a push occurs when this happens, the POST to batch_update will have the records in the records to be inserted, and the deleted_ids keys will contain empty strings for the new records, leading to backend error before <img width="934" height="923" alt="image" src="https://github.com/user-attachments/assets/72ce1c0c-2246-4e60-ab8c-ef39764b1b6e" /> leads to[ this backend error](https://app.datadoghq.com/apm/trace/689634c90000000023a901545cda196b?colorLegendSort=time&colsTraces=service%2Cresource_name%2C%40duration%2C%40_span.count%2C%40_duration.by_service&graphType=flamegraph&shouldShowLegend=true&spanID=2939228585138612840&spanViewType=errors&timeHint=1754674377308.8823&traceQuery=) [ after](https://dddev.datadoghq.com/llm/datasets/e9d31c52-f28e-4c9c-ab5a-eaa846e6d0a5?page=1) <img width="977" height="901" alt="image" src="https://github.com/user-attachments/assets/c407e0dc-2a7f-4170-8116-d6cadcd6d5fc" /> ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
1 parent e3701e2 commit 2aa7700

8 files changed

+354
-8
lines changed

ddtrace/llmobs/_experiment.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from typing import Union
1515
from typing import cast
1616
from typing import overload
17+
import uuid
1718

1819
from typing_extensions import NotRequired
1920

@@ -95,7 +96,7 @@ class Dataset:
9596
_records: List[DatasetRecord]
9697
_version: int
9798
_dne_client: "LLMObsExperimentsClient"
98-
_new_records: List[DatasetRecordRaw]
99+
_new_records_by_record_id: Dict[str, DatasetRecordRaw]
99100
_updated_record_ids_to_new_fields: Dict[str, UpdatableDatasetRecord]
100101
_deleted_record_ids: List[str]
101102

@@ -114,7 +115,7 @@ def __init__(
114115
self._version = version
115116
self._dne_client = _dne_client
116117
self._records = records
117-
self._new_records = []
118+
self._new_records_by_record_id = {}
118119
self._updated_record_ids_to_new_fields = {}
119120
self._deleted_record_ids = []
120121

@@ -136,16 +137,16 @@ def push(self) -> None:
136137

137138
updated_records = list(self._updated_record_ids_to_new_fields.values())
138139
new_version, new_record_ids = self._dne_client.dataset_batch_update(
139-
self._id, self._new_records, updated_records, self._deleted_record_ids
140+
self._id, list(self._new_records_by_record_id.values()), updated_records, self._deleted_record_ids
140141
)
141142

142143
# attach record ids to newly created records
143-
for record, record_id in zip(self._new_records, new_record_ids):
144+
for record, record_id in zip(self._new_records_by_record_id.values(), new_record_ids):
144145
record["record_id"] = record_id # type: ignore
145146

146147
# FIXME: we don't get version numbers in responses to deletion requests
147148
self._version = new_version if new_version != -1 else self._version + 1
148-
self._new_records = []
149+
self._new_records_by_record_id = {}
149150
self._deleted_record_ids = []
150151
self._updated_record_ids_to_new_fields = {}
151152

@@ -164,19 +165,34 @@ def update(self, index: int, record: DatasetRecordRaw) -> None:
164165
self._records[index] = {**self._records[index], **record, "record_id": record_id}
165166

166167
def append(self, record: DatasetRecordRaw) -> None:
167-
r: DatasetRecord = {**record, "record_id": ""}
168+
record_id: str = uuid.uuid4().hex
169+
# this record ID will be discarded after push, BE will generate a new one, this is just
170+
# for tracking new records locally before the push
171+
r: DatasetRecord = {**record, "record_id": record_id}
168172
# keep the same reference in both lists to enable us to update the record_id after push
169-
self._new_records.append(r)
173+
self._new_records_by_record_id[record_id] = r
170174
self._records.append(r)
171175

172176
def delete(self, index: int) -> None:
173177
record_id = self._records[index]["record_id"]
174-
self._deleted_record_ids.append(record_id)
178+
should_append_to_be_deleted = True
179+
175180
del self._records[index]
176181

182+
if record_id is None or record_id == "":
183+
logger.warning("encountered unexpected record_id on deletion %s", record_id)
184+
return
185+
177186
if record_id in self._updated_record_ids_to_new_fields:
178187
del self._updated_record_ids_to_new_fields[record_id]
179188

189+
if record_id in self._new_records_by_record_id:
190+
del self._new_records_by_record_id[record_id]
191+
should_append_to_be_deleted = False
192+
193+
if should_append_to_be_deleted:
194+
self._deleted_record_ids.append(record_id)
195+
180196
@property
181197
def url(self) -> str:
182198
# FIXME: will not work for subdomain orgs
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
interactions:
2+
- request:
3+
body: '{"data": {"type": "datasets", "id": "cfdb514f-92b0-404f-9b3f-020f82b0cd51",
4+
"attributes": {"insert_records": [{"input": {"prompt": "What is the capital
5+
of Sweden?"}, "expected_output": null, "metadata": null}], "update_records":
6+
[], "delete_records": ["0c84833f-c2a2-43e0-bbbe-5dee1336c5f3"]}}}'
7+
headers:
8+
Accept:
9+
- '*/*'
10+
? !!python/object/apply:multidict._multidict.istr
11+
- Accept-Encoding
12+
: - identity
13+
Connection:
14+
- keep-alive
15+
Content-Length:
16+
- '294'
17+
? !!python/object/apply:multidict._multidict.istr
18+
- Content-Type
19+
: - application/json
20+
User-Agent:
21+
- python-requests/2.32.3
22+
method: POST
23+
uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/cfdb514f-92b0-404f-9b3f-020f82b0cd51/batch_update
24+
response:
25+
body:
26+
string: '{"data":[{"id":"e6c19140-44dd-41ae-8617-adb52e2a79c1","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-08T20:50:03.226626591Z","dataset_id":"cfdb514f-92b0-404f-9b3f-020f82b0cd51","input":{"prompt":"What
27+
is the capital of Sweden?"},"updated_at":"2025-08-08T20:50:03.226626591Z","version":2}}]}'
28+
headers:
29+
content-length:
30+
- '352'
31+
content-security-policy:
32+
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
33+
content-type:
34+
- application/vnd.api+json
35+
date:
36+
- Fri, 08 Aug 2025 20:50:03 GMT
37+
strict-transport-security:
38+
- max-age=31536000; includeSubDomains; preload
39+
vary:
40+
- Accept-Encoding
41+
x-content-type-options:
42+
- nosniff
43+
x-frame-options:
44+
- SAMEORIGIN
45+
status:
46+
code: 200
47+
message: OK
48+
version: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
interactions:
2+
- request:
3+
body: '{"data": {"type": "datasets", "id": "cfdb514f-92b0-404f-9b3f-020f82b0cd51",
4+
"attributes": {"insert_records": [{"input": {"prompt": "What is the capital
5+
of France?"}, "expected_output": {"answer": "Paris"}, "metadata": null}, {"input":
6+
{"prompt": "What is the capital of Italy?"}, "expected_output": {"answer": "Rome"},
7+
"metadata": null}], "update_records": [], "delete_records": []}}}'
8+
headers:
9+
Accept:
10+
- '*/*'
11+
? !!python/object/apply:multidict._multidict.istr
12+
- Accept-Encoding
13+
: - identity
14+
Connection:
15+
- keep-alive
16+
Content-Length:
17+
- '384'
18+
? !!python/object/apply:multidict._multidict.istr
19+
- Content-Type
20+
: - application/json
21+
User-Agent:
22+
- python-requests/2.32.3
23+
method: POST
24+
uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/cfdb514f-92b0-404f-9b3f-020f82b0cd51/batch_update
25+
response:
26+
body:
27+
string: '{"data":[{"id":"0c84833f-c2a2-43e0-bbbe-5dee1336c5f3","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-08T20:49:58.695339032Z","dataset_id":"cfdb514f-92b0-404f-9b3f-020f82b0cd51","expected_output":{"answer":"Paris"},"input":{"prompt":"What
28+
is the capital of France?"},"updated_at":"2025-08-08T20:49:58.695339032Z","version":1}},{"id":"0a37e076-a220-427b-babe-74b9b7ca74a1","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-08T20:49:58.695339032Z","dataset_id":"cfdb514f-92b0-404f-9b3f-020f82b0cd51","expected_output":{"answer":"Rome"},"input":{"prompt":"What
29+
is the capital of Italy?"},"updated_at":"2025-08-08T20:49:58.695339032Z","version":1}}]}'
30+
headers:
31+
content-length:
32+
- '766'
33+
content-security-policy:
34+
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
35+
content-type:
36+
- application/vnd.api+json
37+
date:
38+
- Fri, 08 Aug 2025 20:49:59 GMT
39+
strict-transport-security:
40+
- max-age=31536000; includeSubDomains; preload
41+
vary:
42+
- Accept-Encoding
43+
x-content-type-options:
44+
- nosniff
45+
x-frame-options:
46+
- SAMEORIGIN
47+
status:
48+
code: 200
49+
message: OK
50+
version: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
interactions:
2+
- request:
3+
body: null
4+
headers:
5+
Accept:
6+
- '*/*'
7+
? !!python/object/apply:multidict._multidict.istr
8+
- Accept-Encoding
9+
: - identity
10+
Connection:
11+
- keep-alive
12+
? !!python/object/apply:multidict._multidict.istr
13+
- Content-Length
14+
: - '0'
15+
? !!python/object/apply:multidict._multidict.istr
16+
- Content-Type
17+
: - application/json
18+
User-Agent:
19+
- python-requests/2.32.3
20+
method: GET
21+
uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/cfdb514f-92b0-404f-9b3f-020f82b0cd51/records
22+
response:
23+
body:
24+
string: '{"data":[{"id":"e6c19140-44dd-41ae-8617-adb52e2a79c1","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-08T20:50:03.226626Z","dataset_id":"cfdb514f-92b0-404f-9b3f-020f82b0cd51","input":{"prompt":"What
25+
is the capital of Sweden?"},"updated_at":"2025-08-08T20:50:03.226626Z"}},{"id":"0a37e076-a220-427b-babe-74b9b7ca74a1","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-08T20:49:58.695339Z","dataset_id":"cfdb514f-92b0-404f-9b3f-020f82b0cd51","expected_output":{"answer":"Rome"},"input":{"prompt":"What
26+
is the capital of Italy?"},"updated_at":"2025-08-08T20:49:58.695339Z"}}],"meta":{"after":""}}'
27+
headers:
28+
content-length:
29+
- '713'
30+
content-security-policy:
31+
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
32+
content-type:
33+
- application/vnd.api+json
34+
date:
35+
- Fri, 08 Aug 2025 20:50:07 GMT
36+
strict-transport-security:
37+
- max-age=31536000; includeSubDomains; preload
38+
vary:
39+
- Accept-Encoding
40+
x-content-type-options:
41+
- nosniff
42+
x-frame-options:
43+
- SAMEORIGIN
44+
status:
45+
code: 200
46+
message: OK
47+
version: 1
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
interactions:
2+
- request:
3+
body: '{"data": {"type": "datasets", "attributes": {"type": "soft", "dataset_ids":
4+
["cfdb514f-92b0-404f-9b3f-020f82b0cd51"]}}}'
5+
headers:
6+
Accept:
7+
- '*/*'
8+
? !!python/object/apply:multidict._multidict.istr
9+
- Accept-Encoding
10+
: - identity
11+
Connection:
12+
- keep-alive
13+
Content-Length:
14+
- '119'
15+
? !!python/object/apply:multidict._multidict.istr
16+
- Content-Type
17+
: - application/json
18+
User-Agent:
19+
- python-requests/2.32.3
20+
method: POST
21+
uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/delete
22+
response:
23+
body:
24+
string: '{"data":[{"id":"cfdb514f-92b0-404f-9b3f-020f82b0cd51","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-08T20:49:45.077961Z","current_version":2,"deleted_at":"2025-08-08T20:50:07.174644Z","description":"A
25+
test dataset","name":"test-dataset-test_dataset_delete_after_append[test_dataset_records0]","updated_at":"2025-08-08T20:50:03.656733Z"}}]}'
26+
headers:
27+
content-length:
28+
- '402'
29+
content-security-policy:
30+
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
31+
content-type:
32+
- application/vnd.api+json
33+
date:
34+
- Fri, 08 Aug 2025 20:50:07 GMT
35+
strict-transport-security:
36+
- max-age=31536000; includeSubDomains; preload
37+
vary:
38+
- Accept-Encoding
39+
x-content-type-options:
40+
- nosniff
41+
x-frame-options:
42+
- SAMEORIGIN
43+
status:
44+
code: 200
45+
message: OK
46+
version: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
interactions:
2+
- request:
3+
body: null
4+
headers:
5+
Accept:
6+
- '*/*'
7+
? !!python/object/apply:multidict._multidict.istr
8+
- Accept-Encoding
9+
: - identity
10+
Connection:
11+
- keep-alive
12+
? !!python/object/apply:multidict._multidict.istr
13+
- Content-Length
14+
: - '0'
15+
? !!python/object/apply:multidict._multidict.istr
16+
- Content-Type
17+
: - application/json
18+
User-Agent:
19+
- python-requests/2.32.3
20+
method: GET
21+
uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets?filter%5Bname%5D=test-dataset-test_dataset_delete_after_append%5Btest_dataset_records0%5D
22+
response:
23+
body:
24+
string: '{"data":[{"id":"cfdb514f-92b0-404f-9b3f-020f82b0cd51","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-08T20:49:45.077961Z","current_version":2,"description":"A
25+
test dataset","name":"test-dataset-test_dataset_delete_after_append[test_dataset_records0]","updated_at":"2025-08-08T20:50:03.656733Z"}}],"meta":{"after":""}}'
26+
headers:
27+
content-length:
28+
- '379'
29+
content-security-policy:
30+
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
31+
content-type:
32+
- application/vnd.api+json
33+
date:
34+
- Fri, 08 Aug 2025 20:50:06 GMT
35+
strict-transport-security:
36+
- max-age=31536000; includeSubDomains; preload
37+
vary:
38+
- Accept-Encoding
39+
x-content-type-options:
40+
- nosniff
41+
x-frame-options:
42+
- SAMEORIGIN
43+
status:
44+
code: 200
45+
message: OK
46+
version: 1
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
interactions:
2+
- request:
3+
body: '{"data": {"type": "datasets", "attributes": {"name": "test-dataset-test_dataset_delete_after_append[test_dataset_records0]",
4+
"description": "A test dataset"}}}'
5+
headers:
6+
Accept:
7+
- '*/*'
8+
? !!python/object/apply:multidict._multidict.istr
9+
- Accept-Encoding
10+
: - identity
11+
Connection:
12+
- keep-alive
13+
Content-Length:
14+
- '159'
15+
? !!python/object/apply:multidict._multidict.istr
16+
- Content-Type
17+
: - application/json
18+
User-Agent:
19+
- python-requests/2.32.3
20+
method: POST
21+
uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets
22+
response:
23+
body:
24+
string: '{"data":{"id":"cfdb514f-92b0-404f-9b3f-020f82b0cd51","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-08T20:49:45.077961Z","current_version":0,"description":"A
25+
test dataset","name":"test-dataset-test_dataset_delete_after_append[test_dataset_records0]","updated_at":"2025-08-08T20:49:45.077961Z"}}}'
26+
headers:
27+
content-length:
28+
- '357'
29+
content-security-policy:
30+
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
31+
content-type:
32+
- application/vnd.api+json
33+
date:
34+
- Fri, 08 Aug 2025 20:49:58 GMT
35+
strict-transport-security:
36+
- max-age=31536000; includeSubDomains; preload
37+
vary:
38+
- Accept-Encoding
39+
x-content-type-options:
40+
- nosniff
41+
x-frame-options:
42+
- SAMEORIGIN
43+
status:
44+
code: 200
45+
message: OK
46+
version: 1

0 commit comments

Comments
 (0)