diff --git a/src/yadg/dgutils/dsutils.py b/src/yadg/dgutils/dsutils.py index 70fab7ff..8bc3e8ee 100644 --- a/src/yadg/dgutils/dsutils.py +++ b/src/yadg/dgutils/dsutils.py @@ -12,11 +12,8 @@ def append_dicts( devs: dict[str, Any], data: dict[str, list[Any]], meta: dict[str, list[Any]], - fn: str = None, li: int = 0, ) -> None: - if "_fn" in meta and fn is not None: - meta["_fn"].append(str(fn)) for k, v in vals.items(): if k not in data: data[k] = [None if isinstance(v, str) else np.nan] * li @@ -29,8 +26,7 @@ def append_dicts( for k in set(data) - set(vals): data[k].append(np.nan) for k in set(meta) - set(devs): - if k != "_fn": - meta[k].append(np.nan) + meta[k].append(np.nan) def dicts_to_dataset( @@ -54,10 +50,19 @@ def dicts_to_dataset( k = key darrs[k] = xr.DataArray(data=val, dims=["uts"], attrs=attrs) if key in meta and darrs[k].dtype.kind in {"i", "u", "f", "c", "m", "M"}: - err = f"{k}_std_err" + err = f"{k.replace(' ', '_')}_uncertainty" darrs[k].attrs["ancillary_variables"] = err - attrs["standard_name"] = f"{k} standard_error" - darrs[err] = xr.DataArray(data=meta[key], dims=["uts"], attrs=attrs) + attrs["standard_name"] = f"{k!r} standard_error" + attrs["yadg_uncertainty_absolute"] = 1 + attrs["yadg_uncertainty_distribution"] = "rectangular" + attrs["yadg_uncertainty_source"] = "sigfig" + setdev = set(meta[key]) + if np.nan in setdev: + setdev.remove(np.nan) + if len(setdev) == 1: + darrs[err] = xr.DataArray(data=setdev.pop(), dims=[], attrs=attrs) + else: + darrs[err] = xr.DataArray(data=meta[key], dims=["uts"], attrs=attrs) if "uts" in data: coords = dict(uts=data.pop("uts")) else: @@ -79,7 +84,15 @@ def merge_dicttrees(vals: dict, fvals: dict, mode: str) -> dict: return fvals for k in fvals.keys(): try: - vals[k] = xr.concat([vals[k], fvals[k]], dim="uts", combine_attrs=mode) + # vals[k] = xr.concat([vals[k], fvals[k]], dim="uts", combine_attrs=mode) + vals[k] = xr.concat( + [vals[k], fvals[k]], + dim="uts", + data_vars="different", + compat="identical", + join="outer", + combine_attrs=mode, + ) except xr.MergeError: raise RuntimeError( "Merging metadata from multiple files has failed, as some of the " diff --git a/src/yadg/extractors/basic/csv.py b/src/yadg/extractors/basic/csv.py index 10d9e5d0..96264f43 100644 --- a/src/yadg/extractors/basic/csv.py +++ b/src/yadg/extractors/basic/csv.py @@ -46,7 +46,6 @@ from pydantic import BaseModel from babel.numbers import parse_decimal from xarray import DataTree -from uncertainties.core import str_to_number_with_uncert as tuple_fromstr from typing import Callable from pathlib import Path from yadg.extractors import get_extract_dispatch @@ -108,9 +107,10 @@ def process_row( elif columns[ci] == "": continue try: - val, dev = tuple_fromstr(str(parse_decimal(columns[ci], locale=locale))) - vals[header] = val - devs[header] = dev + dec = parse_decimal(columns[ci], locale=locale) + exp = dec.as_tuple().exponent + vals[header] = float(dec) + devs[header] = 10**exp except ValueError: vals[header] = columns[ci] @@ -165,7 +165,7 @@ def extract( # Process rows data_vals = {} - meta_vals = {"_fn": []} + meta_vals = {} for li, line in enumerate(lines[si:]): vals, devs = process_row( headers, @@ -174,6 +174,6 @@ def extract( datecolumns, locale=locale, ) - dgutils.append_dicts(vals, devs, data_vals, meta_vals, str(source), li) + dgutils.append_dicts(vals, devs, data_vals, meta_vals, li) return DataTree(dgutils.dicts_to_dataset(data_vals, meta_vals, units, fulldate)) diff --git a/src/yadg/extractors/drycal/common.py b/src/yadg/extractors/drycal/common.py index 43d320a4..983302c0 100644 --- a/src/yadg/extractors/drycal/common.py +++ b/src/yadg/extractors/drycal/common.py @@ -70,10 +70,10 @@ def rtf( # Process rows data_vals = {} - meta_vals = {"_fn": []} + meta_vals = {} for pi, point in enumerate(data): vals, devs = process_row(headers[1:], point[1:], datefunc, datecolumns) - dgutils.append_dicts(vals, devs, data_vals, meta_vals, fn, pi) + dgutils.append_dicts(vals, devs, data_vals, meta_vals, pi) return dgutils.dicts_to_dataset(data_vals, meta_vals, units, False) @@ -118,10 +118,10 @@ def sep( # Process rows data_vals = {} - meta_vals = {"_fn": []} + meta_vals = {} for pi, point in enumerate(data): vals, devs = process_row(headers[1:], point[1:], datefunc, datecolumns) - dgutils.append_dicts(vals, devs, data_vals, meta_vals, fn, pi) + dgutils.append_dicts(vals, devs, data_vals, meta_vals, pi) return dgutils.dicts_to_dataset(data_vals, meta_vals, units, False) diff --git a/src/yadg/extractors/eclab/mpr.py b/src/yadg/extractors/eclab/mpr.py index 9c50b6b1..c9e84ee4 100644 --- a/src/yadg/extractors/eclab/mpr.py +++ b/src/yadg/extractors/eclab/mpr.py @@ -206,6 +206,8 @@ extdev_dtypes, ) +from .mpt import dicts_to_dataset + logger = logging.getLogger(__name__) extract = get_extract_dispatch() @@ -478,7 +480,7 @@ def process_data( if warn_Ns: logger.warning("Ns found in data exceeds Ns in header, using last defined Ns.") - ds = dgutils.dicts_to_dataset(allvals, allmeta, units, fulldate=False) + ds = dicts_to_dataset(allvals, allmeta, units, fulldate=False) return ds diff --git a/src/yadg/extractors/eclab/mpt.py b/src/yadg/extractors/eclab/mpt.py index 7fc46b37..faa9772a 100644 --- a/src/yadg/extractors/eclab/mpt.py +++ b/src/yadg/extractors/eclab/mpt.py @@ -55,7 +55,7 @@ import logging from typing import Any from babel.numbers import parse_decimal -from xarray import DataTree +from xarray import DataTree, Dataset, DataArray from yadg import dgutils from .techniques import get_devs, param_from_key, split_control from .mpt_columns import column_units @@ -66,6 +66,42 @@ extract = get_extract_dispatch() +def dicts_to_dataset( + data: dict[str, list[Any]], + meta: dict[str, list[Any]], + units: dict[str, str] = dict(), + fulldate: bool = True, +) -> Dataset: + darrs = {} + for key, val in data.items(): + attrs = {} + u = units.get(key, None) + if u is not None: + attrs["units"] = u + if key == "uts": + continue + if "/" in key: + logger.warning(f"Replacing '/' for '_' in column {key!r}.") + k = key.replace("/", "_") + else: + k = key + darrs[k] = DataArray(data=val, dims=["uts"], attrs=attrs) + if key in meta and darrs[k].dtype.kind in {"i", "u", "f", "c", "m", "M"}: + err = f"{k}_std_err" + darrs[k].attrs["ancillary_variables"] = err + attrs["standard_name"] = f"{k} standard_error" + darrs[err] = DataArray(data=meta[key], dims=["uts"], attrs=attrs) + if "uts" in data: + coords = dict(uts=data.pop("uts")) + else: + coords = dict() + if fulldate: + attrs = dict() + else: + attrs = dict(fulldate=False) + return Dataset(data_vars=darrs, coords=coords, attrs=attrs) + + def process_settings(lines: list[str]) -> dict[str, str]: settings = {} comments = [] @@ -303,7 +339,7 @@ def process_data( if warn_I_range: logger.warning("I Range could not be understood, defaulting to 1 A.") - ds = dgutils.dicts_to_dataset(allvals, allmeta, units, fulldate=False) + ds = dicts_to_dataset(allvals, allmeta, units, fulldate=False) return ds diff --git a/src/yadg/extractors/fhimcpt/csv.py b/src/yadg/extractors/fhimcpt/csv.py index cded2093..7b6c877d 100644 --- a/src/yadg/extractors/fhimcpt/csv.py +++ b/src/yadg/extractors/fhimcpt/csv.py @@ -98,6 +98,6 @@ def extract_from_path( datefunc, datecolumns, ) - dgutils.append_dicts(vals, devs, data_vals, meta_vals, str(source), li) + dgutils.append_dicts(vals, devs, data_vals, meta_vals, li) return DataTree(dgutils.dicts_to_dataset(data_vals, meta_vals, units, fulldate)) diff --git a/src/yadg/extractors/tomato/json.py b/src/yadg/extractors/tomato/json.py index f7039ab8..5abd0ef2 100644 --- a/src/yadg/extractors/tomato/json.py +++ b/src/yadg/extractors/tomato/json.py @@ -183,7 +183,7 @@ def dummy_tomato_json(fn: Path, jsdata: dict) -> DataTree: for k, v in vals.items(): if k not in {"time", "address", "channel"}: devs[k] = 0.0 - dgutils.append_dicts(vals, devs, data_vals, meta_vals, str(fn), vi) + dgutils.append_dicts(vals, devs, data_vals, meta_vals, vi) return DataTree(dgutils.dicts_to_dataset(data_vals, meta_vals, fulldate=False)) diff --git a/tests/test_externaldate/ts0.yml.pkl b/tests/test_externaldate/ts0.yml.pkl index d65e1481..b472a631 100644 Binary files a/tests/test_externaldate/ts0.yml.pkl and b/tests/test_externaldate/ts0.yml.pkl differ diff --git a/tests/test_externaldate/ts1.yml.pkl b/tests/test_externaldate/ts1.yml.pkl index 603244a7..649f1763 100644 Binary files a/tests/test_externaldate/ts1.yml.pkl and b/tests/test_externaldate/ts1.yml.pkl differ diff --git a/tests/test_externaldate/ts2.yml.pkl b/tests/test_externaldate/ts2.yml.pkl index 969bc25f..c7397616 100644 Binary files a/tests/test_externaldate/ts2.yml.pkl and b/tests/test_externaldate/ts2.yml.pkl differ diff --git a/tests/test_externaldate/ts3.yml.pkl b/tests/test_externaldate/ts3.yml.pkl index f487d51c..ecd81f66 100644 Binary files a/tests/test_externaldate/ts3.yml.pkl and b/tests/test_externaldate/ts3.yml.pkl differ diff --git a/tests/test_externaldate/ts4.yml.pkl b/tests/test_externaldate/ts4.yml.pkl index 0e919d7b..bf074b91 100644 Binary files a/tests/test_externaldate/ts4.yml.pkl and b/tests/test_externaldate/ts4.yml.pkl differ diff --git a/tests/test_externaldate/ts5.yml.pkl b/tests/test_externaldate/ts5.yml.pkl index d477a9be..7aee3491 100644 Binary files a/tests/test_externaldate/ts5.yml.pkl and b/tests/test_externaldate/ts5.yml.pkl differ diff --git a/tests/test_externaldate/ts6.yml.pkl b/tests/test_externaldate/ts6.yml.pkl index 902296e2..eb3f39a4 100644 Binary files a/tests/test_externaldate/ts6.yml.pkl and b/tests/test_externaldate/ts6.yml.pkl differ diff --git a/tests/test_x_basic_csv/case_custom_ts.tsv.pkl b/tests/test_x_basic_csv/case_custom_ts.tsv.pkl index 2dd6a77b..ff9996cd 100644 Binary files a/tests/test_x_basic_csv/case_custom_ts.tsv.pkl and b/tests/test_x_basic_csv/case_custom_ts.tsv.pkl differ diff --git a/tests/test_x_basic_csv/case_timestamp.ssv.pkl b/tests/test_x_basic_csv/case_timestamp.ssv.pkl index 28e8a284..4fbb209f 100644 Binary files a/tests/test_x_basic_csv/case_timestamp.ssv.pkl and b/tests/test_x_basic_csv/case_timestamp.ssv.pkl differ diff --git a/tests/test_x_basic_csv/case_uts_units.csv.pkl b/tests/test_x_basic_csv/case_uts_units.csv.pkl index d1dabdab..32e60357 100644 Binary files a/tests/test_x_basic_csv/case_uts_units.csv.pkl and b/tests/test_x_basic_csv/case_uts_units.csv.pkl differ diff --git a/tests/test_x_basic_csv/flow_data.csv.pkl b/tests/test_x_basic_csv/flow_data.csv.pkl index 5617d87c..886c1658 100644 Binary files a/tests/test_x_basic_csv/flow_data.csv.pkl and b/tests/test_x_basic_csv/flow_data.csv.pkl differ diff --git a/tests/test_x_basic_csv/log 2021-09-17 11-26-14.140.csv.pkl b/tests/test_x_basic_csv/log 2021-09-17 11-26-14.140.csv.pkl index a7be4dc3..bf421fb1 100644 Binary files a/tests/test_x_basic_csv/log 2021-09-17 11-26-14.140.csv.pkl and b/tests/test_x_basic_csv/log 2021-09-17 11-26-14.140.csv.pkl differ diff --git a/tests/test_x_basic_csv/picolog_temperature.csv.pkl b/tests/test_x_basic_csv/picolog_temperature.csv.pkl index c25b37b8..a093fd7f 100644 Binary files a/tests/test_x_basic_csv/picolog_temperature.csv.pkl and b/tests/test_x_basic_csv/picolog_temperature.csv.pkl differ diff --git a/tests/test_x_basic_csv/picolog_temperature_sparse.csv.pkl b/tests/test_x_basic_csv/picolog_temperature_sparse.csv.pkl index 7075ffa9..2f2a99a0 100644 Binary files a/tests/test_x_basic_csv/picolog_temperature_sparse.csv.pkl and b/tests/test_x_basic_csv/picolog_temperature_sparse.csv.pkl differ diff --git a/tests/test_x_basic_csv/sheet.XX.tsv.pkl b/tests/test_x_basic_csv/sheet.XX.tsv.pkl index 1652e1bb..1313f935 100644 Binary files a/tests/test_x_basic_csv/sheet.XX.tsv.pkl and b/tests/test_x_basic_csv/sheet.XX.tsv.pkl differ diff --git a/tests/test_x_drycal_csv/20211011_DryCal_out.csv.pkl b/tests/test_x_drycal_csv/20211011_DryCal_out.csv.pkl index 2829d6db..015f5907 100644 Binary files a/tests/test_x_drycal_csv/20211011_DryCal_out.csv.pkl and b/tests/test_x_drycal_csv/20211011_DryCal_out.csv.pkl differ diff --git a/tests/test_x_drycal_csv/20220721-porosity-study-20p-Cu-200mA-EDLC-01-flow.csv.pkl b/tests/test_x_drycal_csv/20220721-porosity-study-20p-Cu-200mA-EDLC-01-flow.csv.pkl index 5ee64032..352b326f 100644 Binary files a/tests/test_x_drycal_csv/20220721-porosity-study-20p-Cu-200mA-EDLC-01-flow.csv.pkl and b/tests/test_x_drycal_csv/20220721-porosity-study-20p-Cu-200mA-EDLC-01-flow.csv.pkl differ diff --git a/tests/test_x_drycal_csv/20220912_Defender.csv.pkl b/tests/test_x_drycal_csv/20220912_Defender.csv.pkl index b36c81ca..ed97207c 100644 Binary files a/tests/test_x_drycal_csv/20220912_Defender.csv.pkl and b/tests/test_x_drycal_csv/20220912_Defender.csv.pkl differ diff --git a/tests/test_x_drycal_rtf/Cp_100mA_1mindelay.rtf.pkl b/tests/test_x_drycal_rtf/Cp_100mA_1mindelay.rtf.pkl index 2dd34f9a..e8776c09 100644 Binary files a/tests/test_x_drycal_rtf/Cp_100mA_1mindelay.rtf.pkl and b/tests/test_x_drycal_rtf/Cp_100mA_1mindelay.rtf.pkl differ diff --git a/tests/test_x_drycal_txt/20211011_DryCal_out.txt.pkl b/tests/test_x_drycal_txt/20211011_DryCal_out.txt.pkl index 2829d6db..015f5907 100644 Binary files a/tests/test_x_drycal_txt/20211011_DryCal_out.txt.pkl and b/tests/test_x_drycal_txt/20211011_DryCal_out.txt.pkl differ diff --git a/tests/test_x_fhimcpt_csv/measurement.csv.pkl b/tests/test_x_fhimcpt_csv/measurement.csv.pkl index c21c7fc8..2cd1171a 100644 Binary files a/tests/test_x_fhimcpt_csv/measurement.csv.pkl and b/tests/test_x_fhimcpt_csv/measurement.csv.pkl differ diff --git a/tests/test_x_fusion_zip/20220608-porosity-study-15p-Cu-10mA-GC.zip.pkl b/tests/test_x_fusion_zip/20220608-porosity-study-15p-Cu-10mA-GC.zip.pkl index 7d56783e..78197b68 100644 Binary files a/tests/test_x_fusion_zip/20220608-porosity-study-15p-Cu-10mA-GC.zip.pkl and b/tests/test_x_fusion_zip/20220608-porosity-study-15p-Cu-10mA-GC.zip.pkl differ diff --git a/tests/test_x_tomato_json/tomato_json_dataschema.2.yml.pkl b/tests/test_x_tomato_json/tomato_json_dataschema.2.yml.pkl index 095a8e7a..946a97a8 100644 Binary files a/tests/test_x_tomato_json/tomato_json_dataschema.2.yml.pkl and b/tests/test_x_tomato_json/tomato_json_dataschema.2.yml.pkl differ diff --git a/tests/test_yadg/datagram.nc.ref b/tests/test_yadg/datagram.nc.ref index e7feb4a0..d8cdcaa9 100644 Binary files a/tests/test_yadg/datagram.nc.ref and b/tests/test_yadg/datagram.nc.ref differ