chore(llmobs): use upload endpoint for CSV's and large pushes (#14473)

gary-huang · Yun-Kim · web-flow · commit 21d569a99bf9 · 2025-09-05T00:28:28.000Z
**case 1: create dataset from CSVs** with big datasets / big CSVs, we will run into timeout errors ``` Traceback (most recent call last): File "/Users/gary.huang/go/src/github.com/DataDog/llm-observability/preview/experiments/notebooks/test.py", line 23, in <module> dataset = LLMObs.create_dataset_from_csv(csv_path="/Users/gary.huang/Downloads/weatherAUS.csv", dataset_name="weatheraus-2", input_data_columns=["Date", "input", "Evaporation", "Sunshine"], expected_output_columns=["RainTomorrow"]) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/gary.huang/go/src/github.com/DataDog/llm-observability/preview/experiments/notebooks/.venv/lib/python3.12/site-packages/ddtrace/llmobs/_llmobs.py", line 722, in create_dataset_from_csv ds.push() File "/Users/gary.huang/go/src/github.com/DataDog/llm-observability/preview/experiments/notebooks/.venv/lib/python3.12/site-packages/ddtrace/llmobs/_experiment.py", line 140, in push new_version, new_record_ids = self._dne_client.dataset_batch_update( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/gary.huang/go/src/github.com/DataDog/llm-observability/preview/experiments/notebooks/.venv/lib/python3.12/site-packages/ddtrace/llmobs/_writer.py", line 404, in dataset_batch_update resp = self.request("POST", path, body) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/gary.huang/go/src/github.com/DataDog/llm-observability/preview/experiments/notebooks/.venv/lib/python3.12/site-packages/ddtrace/llmobs/_writer.py", line 319, in request resp = conn.getresponse() ^^^^^^^^^^^^^^^^^^ File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/http/client.py", line 1428, in getresponse response.begin() File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/http/client.py", line 331, in begin version, status, reason = self._read_status() ^^^^^^^^^^^^^^^^^^^ File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/http/client.py", line 292, in _read_status line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/socket.py", line 720, in readinto return self._sock.recv_into(b) ^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/ssl.py", line 1251, in recv_into return self.read(nbytes, buffer) ^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/ssl.py", line 1103, in read return self._sslobj.read(len, buffer) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ TimeoutError: The read operation timed out ``` now it succeeds https://dddev.datadoghq.com/llm/datasets/1e794d07-8b77-4a08-8363-e7236432f262 **case 2: large push after dataset is created** in cases where a push is extremely big, it used to fail before for example with this snippet ``` dataset = LLMObs.create_dataset("1-then-big-gh-09021126", "", [{"input_data": "first", "expected_output": "1"}]) print(dataset.as_dataframe()) print(dataset.url) for i in range(0, 25000): dataset.append({"input_data": "a"*5000, "expected_output": "b"*100}) dataset.push() ``` results in something like ``` 3.13.0rc1 input_data expected_output 0 first 1 https://app.datadoghq.com/llm/datasets/f9a2b6a7-9690-4e3d-98ff-e11b7832c344 Traceback (most recent call last): File "/Users/gary.huang/go/src/github.com/DataDog/llm-observability/preview/experiments/notebooks/test.py", line 33, in <module> dataset.push() File "/Users/gary.huang/go/src/github.com/DataDog/llm-observability/preview/experiments/notebooks/.venv/lib/python3.12/site-packages/ddtrace/llmobs/_experiment.py", line 140, in push new_version, new_record_ids = self._dne_client.dataset_batch_update( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/gary.huang/go/src/github.com/DataDog/llm-observability/preview/experiments/notebooks/.venv/lib/python3.12/site-packages/ddtrace/llmobs/_writer.py", line 404, in dataset_batch_update resp = self.request("POST", path, body) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/gary.huang/go/src/github.com/DataDog/llm-observability/preview/experiments/notebooks/.venv/lib/python3.12/site-packages/ddtrace/llmobs/_writer.py", line 318, in request conn.request(method, url, encoded_body, headers) File "/Users/gary.huang/go/src/github.com/DataDog/llm-observability/preview/experiments/notebooks/.venv/lib/python3.12/site-packages/ddtrace/internal/http.py", line 37, in request return super().request(method, url, body=body, headers=_headers, encode_chunked=encode_chunked) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/http/client.py", line 1336, in request self._send_request(method, url, body, headers, encode_chunked) File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/http/client.py", line 1382, in _send_request self.endheaders(body, encode_chunked=encode_chunked) File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/http/client.py", line 1331, in endheaders self._send_output(message_body, encode_chunked=encode_chunked) File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/http/client.py", line 1130, in _send_output self.send(chunk) File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/http/client.py", line 1055, in send self.sock.sendall(data) File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/ssl.py", line 1210, in sendall v = self.send(byte_view[count:]) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/ssl.py", line 1179, in send return self._sslobj.write(data) ^^^^^^^^^^^^^^^^^^^^^^^^ TimeoutError: The write operation timed out ``` but with this change this succeeds by estimating the size of the dataset to upload and using the bulk endpoint if the dataset gets large enough https://dddev.datadoghq.com/llm/datasets/f9a2b6a7-9690-4e3d-98ff-e11b7832c344 **case 3: create dataset with a large list of records** ``` 3.13.0rc1 Traceback (most recent call last): File "/Users/gary.huang/go/src/github.com/DataDog/llm-observability/preview/experiments/notebooks/test.py", line 27, in <module> dataset = LLMObs.create_dataset("1-then-big-gh-09021127", "", recs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/gary.huang/go/src/github.com/DataDog/llm-observability/preview/experiments/notebooks/.venv/lib/python3.12/site-packages/ddtrace/llmobs/_llmobs.py", line 657, in create_dataset ds.push() File "/Users/gary.huang/go/src/github.com/DataDog/llm-observability/preview/experiments/notebooks/.venv/lib/python3.12/site-packages/ddtrace/llmobs/_experiment.py", line 140, in push new_version, new_record_ids = self._dne_client.dataset_batch_update( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/gary.huang/go/src/github.com/DataDog/llm-observability/preview/experiments/notebooks/.venv/lib/python3.12/site-packages/ddtrace/llmobs/_writer.py", line 404, in dataset_batch_update resp = self.request("POST", path, body) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/gary.huang/go/src/github.com/DataDog/llm-observability/preview/experiments/notebooks/.venv/lib/python3.12/site-packages/ddtrace/llmobs/_writer.py", line 318, in request conn.request(method, url, encoded_body, headers) File "/Users/gary.huang/go/src/github.com/DataDog/llm-observability/preview/experiments/notebooks/.venv/lib/python3.12/site-packages/ddtrace/internal/http.py", line 37, in request return super().request(method, url, body=body, headers=_headers, encode_chunked=encode_chunked) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/http/client.py", line 1336, in request self._send_request(method, url, body, headers, encode_chunked) File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/http/client.py", line 1382, in _send_request self.endheaders(body, encode_chunked=encode_chunked) File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/http/client.py", line 1331, in endheaders self._send_output(message_body, encode_chunked=encode_chunked) File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/http/client.py", line 1130, in _send_output self.send(chunk) File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/http/client.py", line 1055, in send self.sock.sendall(data) File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/ssl.py", line 1210, in sendall v = self.send(byte_view[count:]) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/Users/gary.huang/.pyenv/versions/3.12.7/lib/python3.12/ssl.py", line 1179, in send return self._sslobj.write(data) ^^^^^^^^^^^^^^^^^^^^^^^^ TimeoutError: The write operation timed out ``` it now succeeds https://dddev.datadoghq.com/llm/datasets/4b4af632-a51b-4a18-9393-f0a5dfe8a2b5 ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Co-authored-by: Yun Kim <35776586+Yun-Kim@users.noreply.github.com>
diff --git a/ddtrace/llmobs/_experiment.py b/ddtrace/llmobs/_experiment.py
@@ -101,6 +101,8 @@ class Dataset:
     _updated_record_ids_to_new_fields: Dict[str, UpdatableDatasetRecord]
     _deleted_record_ids: List[str]
 
+    BATCH_UPDATE_THRESHOLD = 5 * 1024 * 1024  # 5MB
+
     def __init__(
         self,
         name: str,
@@ -136,17 +138,24 @@ def push(self) -> None:
                 )
             )
 
-        updated_records = list(self._updated_record_ids_to_new_fields.values())
-        new_version, new_record_ids = self._dne_client.dataset_batch_update(
-            self._id, list(self._new_records_by_record_id.values()), updated_records, self._deleted_record_ids
-        )
+        delta_size = self._estimate_delta_size()
+        if delta_size > self.BATCH_UPDATE_THRESHOLD:
+            logger.debug("dataset delta is %d, using bulk upload", delta_size)
+            # TODO must return version too
+            self._dne_client.dataset_bulk_upload(self._id, self._records)
+        else:
+            logger.debug("dataset delta is %d, using batch update", delta_size)
+            updated_records = list(self._updated_record_ids_to_new_fields.values())
+            new_version, new_record_ids = self._dne_client.dataset_batch_update(
+                self._id, list(self._new_records_by_record_id.values()), updated_records, self._deleted_record_ids
+            )
 
-        # attach record ids to newly created records
-        for record, record_id in zip(self._new_records_by_record_id.values(), new_record_ids):
-            record["record_id"] = record_id  # type: ignore
+            # attach record ids to newly created records
+            for record, record_id in zip(self._new_records_by_record_id.values(), new_record_ids):
+                record["record_id"] = record_id  # type: ignore
 
-        # FIXME: we don't get version numbers in responses to deletion requests
-        self._version = new_version if new_version != -1 else self._version + 1
+            # FIXME: we don't get version numbers in responses to deletion requests
+            self._version = new_version if new_version != -1 else self._version + 1
         self._new_records_by_record_id = {}
         self._deleted_record_ids = []
         self._updated_record_ids_to_new_fields = {}
@@ -203,6 +212,12 @@ def url(self) -> str:
         # FIXME: will not work for subdomain orgs
         return f"{_get_base_url()}/llm/datasets/{self._id}"
 
+    def _estimate_delta_size(self) -> int:
+        """rough estimate (in bytes) of the size of the next batch update call if it happens"""
+        size = len(safe_json(self._new_records_by_record_id)) + len(safe_json(self._updated_record_ids_to_new_fields))
+        logger.debug("estimated delta size %d", size)
+        return size
+
     @overload
     def __getitem__(self, index: int) -> DatasetRecord:
         ...
diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
@@ -720,7 +720,7 @@ def create_dataset_from_csv(
             csv.field_size_limit(original_field_size_limit)
 
         if len(ds) > 0:
-            ds.push()
+            cls._instance._dne_client.dataset_bulk_upload(ds._id, ds._records)
         return ds
 
     @classmethod
diff --git a/ddtrace/llmobs/_writer.py b/ddtrace/llmobs/_writer.py
@@ -1,6 +1,8 @@
 import atexit
+import csv
 import json
 import os
+import tempfile
 from typing import Any
 from typing import Dict
 from typing import List
@@ -300,6 +302,8 @@ class LLMObsExperimentsClient(BaseLLMObsWriter):
     AGENTLESS_BASE_URL = AGENTLESS_EXP_BASE_URL
     ENDPOINT = ""
     TIMEOUT = 5.0
+    BULK_UPLOAD_TIMEOUT = 60.0
+    SUPPORTED_UPLOAD_EXTS = {"csv"}
 
     def request(self, method: str, path: str, body: JSONType = None) -> Response:
         headers = {
@@ -321,6 +325,23 @@ def request(self, method: str, path: str, body: JSONType = None) -> Response:
         finally:
             conn.close()
 
+    def multipart_request(self, method: str, path: str, content_type: str, body: bytes = b"") -> Response:
+        headers = {
+            "Content-Type": content_type,
+            "DD-API-KEY": self._api_key,
+            "DD-APPLICATION-KEY": self._app_key,
+        }
+
+        conn = get_connection(url=self._intake, timeout=self.BULK_UPLOAD_TIMEOUT)
+        try:
+            url = self._intake + self._endpoint + path
+            logger.debug("requesting %s, %s", url, content_type)
+            conn.request(method, url, body, headers)
+            resp = conn.getresponse()
+            return Response.from_http_response(resp)
+        finally:
+            conn.close()
+
     def dataset_delete(self, dataset_id: str) -> None:
         path = "/api/unstable/llm-obs/v1/datasets/delete"
         resp = self.request(
@@ -353,6 +374,8 @@ def dataset_create(self, name: str, description: str) -> Dataset:
             raise ValueError(f"Failed to create dataset {name}: {resp.status} {resp.get_json()}")
         response_data = resp.get_json()
         dataset_id = response_data["data"]["id"]
+        if dataset_id is None or dataset_id == "":
+            raise ValueError(f"unexpected dataset state, invalid ID (is None: {dataset_id is None})")
         curr_version = response_data["data"]["attributes"]["current_version"]
         return Dataset(name, dataset_id, [], description, curr_version, _dne_client=self)
 
@@ -446,6 +469,57 @@ def dataset_get_with_records(self, name: str) -> Dataset:
             )
         return Dataset(name, dataset_id, class_records, dataset_description, curr_version, _dne_client=self)
 
+    def dataset_bulk_upload(self, dataset_id: str, records: List[DatasetRecord]):
+        with tempfile.NamedTemporaryFile(suffix=".csv") as tmp:
+            file_name = os.path.basename(tmp.name)
+            file_name_parts = file_name.rsplit(".", 1)
+            if len(file_name_parts) != 2:
+                raise ValueError(f"invalid file {file_name} from {tmp.name}")
+
+            file_ext = file_name_parts[1]
+
+            if file_ext not in self.SUPPORTED_UPLOAD_EXTS:
+                raise ValueError(f"{file_ext} files not supported")
+
+            with open(tmp.name, "w", newline="") as csv_file:
+                field_names = ["input", "expected_output", "metadata"]
+                writer = csv.writer(csv_file)
+                writer.writerow(field_names)
+                for r in records:
+                    writer.writerow(
+                        [
+                            json.dumps(r.get("input_data", "")),
+                            json.dumps(r.get("expected_output", "")),
+                            json.dumps(r.get("metadata", "")),
+                        ]
+                    )
+
+            with open(tmp.name, mode="rb") as f:
+                file_content = f.read()
+
+        path = f"/api/unstable/llm-obs/v1/datasets/{dataset_id}/records/upload"
+        BOUNDARY = b"----------boundary------"
+        CRLF = b"\r\n"
+
+        body = CRLF.join(
+            [
+                b"--" + BOUNDARY,
+                b'Content-Disposition: form-data; name="file"; filename="%s"' % file_name.encode("utf-8"),
+                b"Content-Type: text/%s" % file_ext.encode("utf-8"),
+                b"",
+                file_content,
+                b"--" + BOUNDARY + b"--",
+                b"",
+            ]
+        )
+
+        resp = self.multipart_request(
+            "POST", path, content_type="multipart/form-data; boundary=%s" % BOUNDARY.decode("utf-8"), body=body
+        )
+        if resp.status != 200:
+            raise ValueError(f"Failed to upload dataset from file: {resp.status} {resp.get_json()}")
+        logger.debug("successfully uploaded with code %d", resp.status)
+
     def project_create_or_get(self, name: str) -> str:
         path = "/api/unstable/llm-obs/v1/projects"
         resp = self.request(
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_7ebcf701-2bd3-42d4-9dbd-01bb1169e66f_records_upload_post_36936c02.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_7ebcf701-2bd3-42d4-9dbd-01bb1169e66f_records_upload_post_36936c02.yaml
@@ -0,0 +1,49 @@
+interactions:
+- request:
+    body: "------------boundary------\r\nContent-Disposition: form-data; name=\"file\";
+      filename=\"tmp.csv\"\r\nContent-Type: text/csv\r\n\r\ninput,expected_output,metadata\r\n\"{\"\"in0\"\":
+      \"\"r0v1\"\", \"\"in1\"\": \"\"r0v2\"\", \"\"in2\"\": \"\"r0v3\"\"}\",\"{\"\"out0\"\":
+      \"\"r0v4\"\", \"\"out1\"\": \"\"r0v5\"\"}\",\"{\"\"m0\"\": \"\"r0v6\"\"}\"\r\n\"{\"\"in0\"\":
+      \"\"r1v1\"\", \"\"in1\"\": \"\"r1v2\"\", \"\"in2\"\": \"\"r1v3\"\"}\",\"{\"\"out0\"\":
+      \"\"r1v4\"\", \"\"out1\"\": \"\"r1v5\"\"}\",\"{\"\"m0\"\": \"\"r1v6\"\"}\"\r\n\r\n------------boundary--------\r\n"
+    headers:
+      Accept:
+      - '*/*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - identity
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '433'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - multipart/form-data; boundary=----------boundary------
+      User-Agent:
+      - python-requests/2.32.3
+    method: POST
+    uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/7ebcf701-2bd3-42d4-9dbd-01bb1169e66f/records/upload
+  response:
+    body:
+      string: ''
+    headers:
+      content-length:
+      - '0'
+      content-security-policy:
+      - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
+      content-type:
+      - application/vnd.api+json
+      date:
+      - Tue, 02 Sep 2025 22:41:52 GMT
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_delete_post_0134f05d.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_delete_post_0134f05d.yaml
@@ -0,0 +1,46 @@
+interactions:
+- request:
+    body: '{"data": {"type": "datasets", "attributes": {"type": "soft", "dataset_ids":
+      ["ed2f83c9-dd94-41d7-ac2b-544bbdbb5584"]}}}'
+    headers:
+      Accept:
+      - '*/*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - identity
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '119'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - application/json
+      User-Agent:
+      - python-requests/2.32.3
+    method: POST
+    uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/delete
+  response:
+    body:
+      string: '{"data":[{"id":"ed2f83c9-dd94-41d7-ac2b-544bbdbb5584","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-09-03T23:16:13.87366Z","current_version":0,"deleted_at":"2025-09-03T23:16:14.119761Z","description":"A
+        test dataset","name":"test-dataset-test_dataset_estimate_size","updated_at":"2025-09-03T23:16:13.87366Z"}}]}'
+    headers:
+      content-length:
+      - '371'
+      content-security-policy:
+      - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
+      content-type:
+      - application/vnd.api+json
+      date:
+      - Wed, 03 Sep 2025 23:16:14 GMT
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_e03e6dcc-1a71-49ed-82b0-0042d0f7e117_records_upload_post_1d3054f1.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_e03e6dcc-1a71-49ed-82b0-0042d0f7e117_records_upload_post_1d3054f1.yaml
@@ -0,0 +1,49 @@
+interactions:
+- request:
+    body: "------------boundary------\r\nContent-Disposition: form-data; name=\"file\";
+      filename=\"tmp.csv\"\r\nContent-Type: text/csv\r\n\r\ninput,expected_output,metadata\r\n\"{\"\"in0\"\":
+      \"\"r0v1\"\", \"\"in1\"\": \"\"r0v2\"\", \"\"in2\"\": \"\"r0v3\"\"}\",\"{\"\"out0\"\":
+      \"\"r0v4\"\", \"\"out1\"\": \"\"r0v5\"\"}\",{}\r\n\"{\"\"in0\"\": \"\"r1v1\"\",
+      \"\"in1\"\": \"\"r1v2\"\", \"\"in2\"\": \"\"r1v3\"\"}\",\"{\"\"out0\"\": \"\"r1v4\"\",
+      \"\"out1\"\": \"\"r1v5\"\"}\",{}\r\n\r\n------------boundary--------\r\n"
+    headers:
+      Accept:
+      - '*/*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - identity
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '397'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - multipart/form-data; boundary=----------boundary------
+      User-Agent:
+      - python-requests/2.32.3
+    method: POST
+    uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/e03e6dcc-1a71-49ed-82b0-0042d0f7e117/records/upload
+  response:
+    body:
+      string: ''
+    headers:
+      content-length:
+      - '0'
+      content-security-policy:
+      - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
+      content-type:
+      - application/vnd.api+json
+      date:
+      - Tue, 02 Sep 2025 22:41:49 GMT
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_e03e6dcc-1a71-49ed-82b0-0042d0f7e117_records_upload_post_ca121309.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_e03e6dcc-1a71-49ed-82b0-0042d0f7e117_records_upload_post_ca121309.yaml
@@ -0,0 +1,49 @@
+interactions:
+- request:
+    body: "------------boundary------\r\nContent-Disposition: form-data; name=\"file\";
+      filename=\"tmp.csv\"\r\nContent-Type: text/csv\r\n\r\ninput,expected_output,metadata\r\n\"{\"\"in0\"\":
+      \"\"r0v1\"\", \"\"in1\"\": \"\"r0v2\"\", \"\"in2\"\": \"\"r0v3\"\"}\",\"{\"\"out0\"\":
+      \"\"r0v4\"\", \"\"out1\"\": \"\"r0v5\"\"}\",\"{\"\"m0\"\": \"\"r0v6\"\"}\"\r\n\"{\"\"in0\"\":
+      \"\"r1v1\"\", \"\"in1\"\": \"\"r1v2\"\", \"\"in2\"\": \"\"r1v3\"\"}\",\"{\"\"out0\"\":
+      \"\"r1v4\"\", \"\"out1\"\": \"\"r1v5\"\"}\",\"{\"\"m0\"\": \"\"r1v6\"\"}\"\r\n\r\n------------boundary--------\r\n"
+    headers:
+      Accept:
+      - '*/*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - identity
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '433'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - multipart/form-data; boundary=----------boundary------
+      User-Agent:
+      - python-requests/2.32.3
+    method: POST
+    uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/e03e6dcc-1a71-49ed-82b0-0042d0f7e117/records/upload
+  response:
+    body:
+      string: ''
+    headers:
+      content-length:
+      - '0'
+      content-security-policy:
+      - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
+      content-type:
+      - application/vnd.api+json
+      date:
+      - Tue, 02 Sep 2025 22:41:41 GMT
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_ee7fe7e8-e947-4d0e-be54-be22314ca561_records_upload_post_6b357ece.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_ee7fe7e8-e947-4d0e-be54-be22314ca561_records_upload_post_6b357ece.yaml
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_post_fd975cd7.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_post_fd975cd7.yaml
diff --git a/tests/llmobs/test_experiments.py b/tests/llmobs/test_experiments.py