Skip to content

Commit 1496adb

Browse files
authored
agilent: Implement new uncertainty and other fixes (#244)
* Rework uncertainties in agilent.csv, fix concatenation. * ch, dx, tests * docs * typo
1 parent 0c2e371 commit 1496adb

File tree

9 files changed

+77
-44
lines changed

9 files changed

+77
-44
lines changed

docs/source/version.7_0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ New features in ``yadg-next`` are:
1717

1818
Breaking changes in ``yadg-next`` are:
1919

20+
- In :mod:`yadg.extractors.agilent.csv`, the ``signal`` data variable now has ``elution_time`` as a proper coordinate. Previously, the ``elution_time`` was expanded manually to the length of the largest trace present in the file, with ``np.nan`` used as padding for shorter traces. An arbitrary coordinate ``_`` was also present. Now, ``elution_time`` is expanded automatically by :func:`xarray.concat`, inserting ``np.nan`` into the ``signal`` data variable as necessary for ``elution_time`` which are not present in each trace.
21+
2022
Bug fixes in ``yadg-next`` include:
2123

2224
- The parameter ``Set I/C`` in :mod:`yadg.extractors.eclab.mpr` files should be ``C / N`` when set to 1, not ``C``.

src/yadg/extractors/agilent/ch.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def extract_from_path(
126126
npoints = nbytes // dsize
127127

128128
xsn = np.linspace(orig_meta["xmin"] / 1000, orig_meta["xmax"] / 1000, num=npoints)
129-
xss = np.ones(npoints) * xsn[0]
129+
xss = xsn[0]
130130
ysn = (
131131
np.frombuffer(
132132
ch,
@@ -136,7 +136,7 @@ def extract_from_path(
136136
)
137137
* orig_meta["slope"]
138138
)
139-
yss = np.ones(npoints) * orig_meta["slope"]
139+
yss = orig_meta["slope"]
140140

141141
detector, title = orig_meta["tracetitle"].split(",")
142142

@@ -149,24 +149,42 @@ def extract_from_path(
149149
"signal": (
150150
["uts", "elution_time"],
151151
[ysn],
152-
{"units": orig_meta["yunit"], "ancillary_variables": "signal_std_err"},
152+
{
153+
"units": orig_meta["yunit"],
154+
"ancillary_variables": "signal_uncertainty",
155+
},
153156
),
154-
"signal_std_err": (
155-
["uts", "elution_time"],
156-
[yss],
157-
{"units": orig_meta["yunit"], "standard_name": "signal standard_error"},
157+
"signal_uncertainty": (
158+
[],
159+
yss,
160+
{
161+
"units": orig_meta["yunit"],
162+
"standard_name": "signal standard_error",
163+
"yadg_uncertainty_absolute": 1,
164+
"yadg_uncertainty_distribution": "normal",
165+
"yadg_uncertainty_source": "scaling",
166+
},
158167
),
159-
"elution_time_std_err": (
160-
["elution_time"],
168+
"elution_time_uncertainty": (
169+
[],
161170
xss,
162-
{"units": "s", "standard_name": "elution_time standard_error"},
171+
{
172+
"units": "s",
173+
"standard_name": "elution_time standard_error",
174+
"yadg_uncertainty_absolute": 1,
175+
"yadg_uncertainty_distribution": "normal",
176+
"yadg_uncertainty_source": "scaling",
177+
},
163178
),
164179
},
165180
coords={
166181
"elution_time": (
167182
["elution_time"],
168183
xsn,
169-
{"units": "s", "ancillary_variables": "elution_time_std_err"},
184+
{
185+
"units": "s",
186+
"ancillary_variables": "elution_time_uncertainty",
187+
},
170188
),
171189
"uts": (["uts"], [uts]),
172190
},

src/yadg/extractors/agilent/csv.py

Lines changed: 46 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,15 @@ def _process_headers(headers: list, columns: list, timezone: str) -> dict:
7272

7373

7474
def _to_trace(tx, ty):
75-
tvals, tders = [x for x in zip(*tx)]
76-
yvals, yders = [x for x in zip(*ty)]
75+
tvals, tdevs = [x for x in zip(*tx)]
76+
yvals, ydevs = [x for x in zip(*ty)]
7777
trace = {
7878
"tvals": np.array(tvals) * 60,
79-
"tdevs": np.array(tders) * 60,
8079
"yvals": list(yvals),
81-
"ydevs": list(yders),
80+
# CHROMTAB files seem to have fixed precision,
81+
# let us pick the maximum deviation and apply it
82+
"tdev": max(tdevs) * 60,
83+
"ydev": max(ydevs),
8284
}
8385
return trace
8486

@@ -101,15 +103,12 @@ def extract_from_path(
101103
tstep = dict()
102104
data = []
103105
traces = set()
104-
maxlen = dict()
105106
for line in lines:
106107
parts = line.strip().split(",")
107108
if len(parts) > 2:
108109
if '"Date Acquired"' in parts:
109110
if tx != [] and ty != [] and detname is not None:
110-
trace = _to_trace(tx, ty)
111-
tstep[detname] = trace
112-
maxlen[detname] = max(maxlen.get(detname, 0), len(trace["tvals"]))
111+
tstep[detname] = _to_trace(tx, ty)
113112
tx = []
114113
ty = []
115114
if len(tstep) > 0:
@@ -123,9 +122,7 @@ def extract_from_path(
123122
dgutils.merge_meta(orig_meta, ret[0])
124123
elif len(parts) == 1:
125124
if tx != [] and ty != [] and detname is not None:
126-
trace = _to_trace(tx, ty)
127-
tstep[detname] = trace
128-
maxlen[detname] = max(maxlen.get(detname, 0), len(trace["tvals"]))
125+
tstep[detname] = _to_trace(tx, ty)
129126
tx = []
130127
ty = []
131128
detname = parts[0].replace('"', "").split("\\")[-1]
@@ -136,48 +133,64 @@ def extract_from_path(
136133
ty.append(y)
137134
trace = _to_trace(tx, ty)
138135
tstep[detname] = trace
139-
maxlen[detname] = max(maxlen.get(detname, 0), len(trace["tvals"]))
140136
data.append(tstep)
141137

142138
traces = sorted(traces)
143139
vals = {}
144140
for tr in traces:
145141
dsets = []
146142
for ti, ts in enumerate(data):
147-
thislen = len(ts[tr]["tvals"])
148-
fvals = {}
149-
for k in {"yvals", "ydevs", "tvals", "tdevs"}:
150-
fvals[k] = np.ones(maxlen[tr]) * np.nan
151-
fvals[k][:thislen] = ts[tr][k]
152143
ds = xr.Dataset(
153144
data_vars={
154145
"signal": (
155146
["elution_time"],
156-
fvals["yvals"],
157-
{"ancillary_variables": "signal_std_err"},
147+
ts[tr]["yvals"],
148+
{"ancillary_variables": "signal_uncertainty"},
158149
),
159-
"signal_std_err": (
160-
["elution_time"],
161-
fvals["ydevs"],
162-
{"standard_name": "signal standard_error"},
150+
"signal_uncertainty": (
151+
[],
152+
ts[tr]["ydev"],
153+
{
154+
"standard_name": "signal standard_error",
155+
"yadg_uncertainty_absolute": 1,
156+
"yadg_uncertainty_distribution": "rectangular",
157+
"yadg_uncertainty_source": "sigfig",
158+
},
163159
),
164-
"elution_time": (
165-
["_"],
166-
fvals["tvals"],
167-
{"units": "s", "ancillary_variables": "elution_time_std_err"},
160+
"elution_time_uncertainty": (
161+
[],
162+
ts[tr]["tdev"],
163+
{
164+
"units": "s",
165+
"standard_name": "elution_time standard_error",
166+
"yadg_uncertainty_absolute": 1,
167+
"yadg_uncertainty_distribution": "rectangular",
168+
"yadg_uncertainty_source": "sigfig",
169+
},
168170
),
169-
"elution_time_std_err": (
171+
},
172+
coords={
173+
"elution_time": (
170174
["elution_time"],
171-
fvals["tdevs"],
172-
{"units": "s", "standard_name": "elution_time standard_error"},
175+
ts[tr]["tvals"],
176+
{
177+
"units": "s",
178+
"ancillary_variables": "elution_time_uncertainty",
179+
},
173180
),
181+
"uts": (["uts"], [uts[ti]]),
174182
},
175-
coords={},
176183
attrs={},
177184
)
178-
ds["uts"] = [uts[ti]]
185+
# ds["uts"] = [uts[ti]]
179186
dsets.append(ds)
180-
vals[tr] = xr.concat(dsets, dim="uts")
187+
vals[tr] = xr.concat(
188+
dsets,
189+
dim="uts",
190+
data_vars="different",
191+
compat="identical",
192+
join="outer",
193+
)
181194
dt = DataTree.from_dict(vals)
182195
dt.attrs = {"original_metadata": orig_meta}
183196
return dt

tests/test_extract/hplc.CH.nc

-154 KB
Binary file not shown.

tests/test_extract/hplc.dx.nc

-154 KB
Binary file not shown.
Binary file not shown.
-843 KB
Binary file not shown.
-156 KB
Binary file not shown.

tests/test_yadg/agilent.CH.nc

-154 KB
Binary file not shown.

0 commit comments

Comments
 (0)