dgbowl · PeterKraus · Aug 26, 2025 · Aug 26, 2025 · Aug 26, 2025 · Aug 26, 2025
diff --git a/docs/source/version.7_0.rst b/docs/source/version.7_0.rst
@@ -17,6 +17,8 @@ New features in ``yadg-next`` are:
 
 Breaking changes in ``yadg-next`` are:
 
+  - In :mod:`yadg.extractors.agilent.csv`, the ``signal`` data variable now has ``elution_time`` as a proper coordinate. Previously, the ``elution_time`` was expanded manually to the length of the largest trace present in the file, with ``np.nan`` used as padding for shorter traces. An arbitrary coordinate ``_`` was also present. Now, ``elution_time`` is expanded automatically by :func:`xarray.concat`, inserting ``np.nan`` into the ``signal`` data variable as necessary for ``elution_time`` which are not present in each trace.
+
 Bug fixes in ``yadg-next`` include:
 
   - The parameter ``Set I/C`` in :mod:`yadg.extractors.eclab.mpr` files should be ``C / N`` when set to 1, not ``C``.

diff --git a/src/yadg/extractors/agilent/ch.py b/src/yadg/extractors/agilent/ch.py
@@ -126,7 +126,7 @@ def extract_from_path(
     npoints = nbytes // dsize
 
     xsn = np.linspace(orig_meta["xmin"] / 1000, orig_meta["xmax"] / 1000, num=npoints)
-    xss = np.ones(npoints) * xsn[0]
+    xss = xsn[0]
     ysn = (
         np.frombuffer(
             ch,
@@ -136,7 +136,7 @@ def extract_from_path(
         )
         * orig_meta["slope"]
     )
-    yss = np.ones(npoints) * orig_meta["slope"]
+    yss = orig_meta["slope"]
 
     detector, title = orig_meta["tracetitle"].split(",")
 
@@ -149,24 +149,42 @@ def extract_from_path(
             "signal": (
                 ["uts", "elution_time"],
                 [ysn],
-                {"units": orig_meta["yunit"], "ancillary_variables": "signal_std_err"},
+                {
+                    "units": orig_meta["yunit"],
+                    "ancillary_variables": "signal_uncertainty",
+                },
             ),
-            "signal_std_err": (
-                ["uts", "elution_time"],
-                [yss],
-                {"units": orig_meta["yunit"], "standard_name": "signal standard_error"},
+            "signal_uncertainty": (
+                [],
+                yss,
+                {
+                    "units": orig_meta["yunit"],
+                    "standard_name": "signal standard_error",
+                    "yadg_uncertainty_absolute": 1,
+                    "yadg_uncertainty_distribution": "normal",
+                    "yadg_uncertainty_source": "scaling",
+                },
             ),
-            "elution_time_std_err": (
-                ["elution_time"],
+            "elution_time_uncertainty": (
+                [],
                 xss,
-                {"units": "s", "standard_name": "elution_time standard_error"},
+                {
+                    "units": "s",
+                    "standard_name": "elution_time standard_error",
+                    "yadg_uncertainty_absolute": 1,
+                    "yadg_uncertainty_distribution": "normal",
+                    "yadg_uncertainty_source": "scaling",
+                },
             ),
         },
         coords={
             "elution_time": (
                 ["elution_time"],
                 xsn,
-                {"units": "s", "ancillary_variables": "elution_time_std_err"},
+                {
+                    "units": "s",
+                    "ancillary_variables": "elution_time_uncertainty",
+                },
             ),
             "uts": (["uts"], [uts]),
         },

diff --git a/src/yadg/extractors/agilent/csv.py b/src/yadg/extractors/agilent/csv.py
@@ -72,13 +72,15 @@ def _process_headers(headers: list, columns: list, timezone: str) -> dict:
 
 
 def _to_trace(tx, ty):
-    tvals, tders = [x for x in zip(*tx)]
-    yvals, yders = [x for x in zip(*ty)]
+    tvals, tdevs = [x for x in zip(*tx)]
+    yvals, ydevs = [x for x in zip(*ty)]
     trace = {
         "tvals": np.array(tvals) * 60,
-        "tdevs": np.array(tders) * 60,
         "yvals": list(yvals),
-        "ydevs": list(yders),
+        # CHROMTAB files seem to have fixed precision,
+        # let us pick the maximum deviation and apply it
+        "tdev": max(tdevs) * 60,
+        "ydev": max(ydevs),
     }
     return trace
 
@@ -101,15 +103,12 @@ def extract_from_path(
     tstep = dict()
     data = []
     traces = set()
-    maxlen = dict()
     for line in lines:
         parts = line.strip().split(",")
         if len(parts) > 2:
             if '"Date Acquired"' in parts:
                 if tx != [] and ty != [] and detname is not None:
-                    trace = _to_trace(tx, ty)
-                    tstep[detname] = trace
-                    maxlen[detname] = max(maxlen.get(detname, 0), len(trace["tvals"]))
+                    tstep[detname] = _to_trace(tx, ty)
                     tx = []
                     ty = []
                 if len(tstep) > 0:
@@ -123,9 +122,7 @@ def extract_from_path(
                 dgutils.merge_meta(orig_meta, ret[0])
         elif len(parts) == 1:
             if tx != [] and ty != [] and detname is not None:
-                trace = _to_trace(tx, ty)
-                tstep[detname] = trace
-                maxlen[detname] = max(maxlen.get(detname, 0), len(trace["tvals"]))
+                tstep[detname] = _to_trace(tx, ty)
                 tx = []
                 ty = []
             detname = parts[0].replace('"', "").split("\\")[-1]
@@ -136,48 +133,64 @@ def extract_from_path(
             ty.append(y)
     trace = _to_trace(tx, ty)
     tstep[detname] = trace
-    maxlen[detname] = max(maxlen.get(detname, 0), len(trace["tvals"]))
     data.append(tstep)
 
     traces = sorted(traces)
     vals = {}
     for tr in traces:
         dsets = []
         for ti, ts in enumerate(data):
-            thislen = len(ts[tr]["tvals"])
-            fvals = {}
-            for k in {"yvals", "ydevs", "tvals", "tdevs"}:
-                fvals[k] = np.ones(maxlen[tr]) * np.nan
-                fvals[k][:thislen] = ts[tr][k]
             ds = xr.Dataset(
                 data_vars={
                     "signal": (
                         ["elution_time"],
-                        fvals["yvals"],
-                        {"ancillary_variables": "signal_std_err"},
+                        ts[tr]["yvals"],
+                        {"ancillary_variables": "signal_uncertainty"},
                     ),
-                    "signal_std_err": (
-                        ["elution_time"],
-                        fvals["ydevs"],
-                        {"standard_name": "signal standard_error"},
+                    "signal_uncertainty": (
+                        [],
+                        ts[tr]["ydev"],
+                        {
+                            "standard_name": "signal standard_error",
+                            "yadg_uncertainty_absolute": 1,
+                            "yadg_uncertainty_distribution": "rectangular",
+                            "yadg_uncertainty_source": "sigfig",
+                        },
                     ),
-                    "elution_time": (
-                        ["_"],
-                        fvals["tvals"],
-                        {"units": "s", "ancillary_variables": "elution_time_std_err"},
+                    "elution_time_uncertainty": (
+                        [],
+                        ts[tr]["tdev"],
+                        {
+                            "units": "s",
+                            "standard_name": "elution_time standard_error",
+                            "yadg_uncertainty_absolute": 1,
+                            "yadg_uncertainty_distribution": "rectangular",
+                            "yadg_uncertainty_source": "sigfig",
+                        },
                     ),
-                    "elution_time_std_err": (
+                },
+                coords={
+                    "elution_time": (
                         ["elution_time"],
-                        fvals["tdevs"],
-                        {"units": "s", "standard_name": "elution_time standard_error"},
+                        ts[tr]["tvals"],
+                        {
+                            "units": "s",
+                            "ancillary_variables": "elution_time_uncertainty",
+                        },
                     ),
+                    "uts": (["uts"], [uts[ti]]),
                 },
-                coords={},
                 attrs={},
             )
-            ds["uts"] = [uts[ti]]
+            # ds["uts"] = [uts[ti]]
             dsets.append(ds)
-        vals[tr] = xr.concat(dsets, dim="uts")
+        vals[tr] = xr.concat(
+            dsets,
+            dim="uts",
+            data_vars="different",
+            compat="identical",
+            join="outer",
+        )
     dt = DataTree.from_dict(vals)
     dt.attrs = {"original_metadata": orig_meta}
     return dt
diff --git a/tests/test_extract/hplc.CH.nc b/tests/test_extract/hplc.CH.nc
diff --git a/tests/test_extract/hplc.dx.nc b/tests/test_extract/hplc.dx.nc
diff --git a/tests/test_x_agilent_ch/extracted-3487d194-9155-4f79-8f11-dbd18ce53187.CH.pkl b/tests/test_x_agilent_ch/extracted-3487d194-9155-4f79-8f11-dbd18ce53187.CH.pkl
diff --git a/tests/test_x_agilent_csv/CHROMTAB.CSV.pkl b/tests/test_x_agilent_csv/CHROMTAB.CSV.pkl
diff --git a/tests/test_x_agilent_dx/2021-11-12 12-39-18+01-00-02.dx.pkl b/tests/test_x_agilent_dx/2021-11-12 12-39-18+01-00-02.dx.pkl
diff --git a/tests/test_yadg/agilent.CH.nc b/tests/test_yadg/agilent.CH.nc