Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions src/yadg/dgutils/dsutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,8 @@ def append_dicts(
devs: dict[str, Any],
data: dict[str, list[Any]],
meta: dict[str, list[Any]],
fn: str = None,
li: int = 0,
) -> None:
if "_fn" in meta and fn is not None:
meta["_fn"].append(str(fn))
for k, v in vals.items():
if k not in data:
data[k] = [None if isinstance(v, str) else np.nan] * li
Expand All @@ -29,8 +26,7 @@ def append_dicts(
for k in set(data) - set(vals):
data[k].append(np.nan)
for k in set(meta) - set(devs):
if k != "_fn":
meta[k].append(np.nan)
meta[k].append(np.nan)


def dicts_to_dataset(
Expand All @@ -54,10 +50,19 @@ def dicts_to_dataset(
k = key
darrs[k] = xr.DataArray(data=val, dims=["uts"], attrs=attrs)
if key in meta and darrs[k].dtype.kind in {"i", "u", "f", "c", "m", "M"}:
err = f"{k}_std_err"
err = f"{k.replace(' ', '_')}_uncertainty"
darrs[k].attrs["ancillary_variables"] = err
attrs["standard_name"] = f"{k} standard_error"
darrs[err] = xr.DataArray(data=meta[key], dims=["uts"], attrs=attrs)
attrs["standard_name"] = f"{k!r} standard_error"
attrs["yadg_uncertainty_absolute"] = 1
attrs["yadg_uncertainty_distribution"] = "rectangular"
attrs["yadg_uncertainty_source"] = "sigfig"
setdev = set(meta[key])
if np.nan in setdev:
setdev.remove(np.nan)
if len(setdev) == 1:
darrs[err] = xr.DataArray(data=setdev.pop(), dims=[], attrs=attrs)
else:
darrs[err] = xr.DataArray(data=meta[key], dims=["uts"], attrs=attrs)
if "uts" in data:
coords = dict(uts=data.pop("uts"))
else:
Expand All @@ -79,7 +84,15 @@ def merge_dicttrees(vals: dict, fvals: dict, mode: str) -> dict:
return fvals
for k in fvals.keys():
try:
vals[k] = xr.concat([vals[k], fvals[k]], dim="uts", combine_attrs=mode)
# vals[k] = xr.concat([vals[k], fvals[k]], dim="uts", combine_attrs=mode)
vals[k] = xr.concat(
[vals[k], fvals[k]],
dim="uts",
data_vars="different",
compat="identical",
join="outer",
combine_attrs=mode,
)
except xr.MergeError:
raise RuntimeError(
"Merging metadata from multiple files has failed, as some of the "
Expand Down
12 changes: 6 additions & 6 deletions src/yadg/extractors/basic/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
from pydantic import BaseModel
from babel.numbers import parse_decimal
from xarray import DataTree
from uncertainties.core import str_to_number_with_uncert as tuple_fromstr
from typing import Callable
from pathlib import Path
from yadg.extractors import get_extract_dispatch
Expand Down Expand Up @@ -108,9 +107,10 @@ def process_row(
elif columns[ci] == "":
continue
try:
val, dev = tuple_fromstr(str(parse_decimal(columns[ci], locale=locale)))
vals[header] = val
devs[header] = dev
dec = parse_decimal(columns[ci], locale=locale)
exp = dec.as_tuple().exponent
vals[header] = float(dec)
devs[header] = 10**exp
except ValueError:
vals[header] = columns[ci]

Expand Down Expand Up @@ -165,7 +165,7 @@ def extract(

# Process rows
data_vals = {}
meta_vals = {"_fn": []}
meta_vals = {}
for li, line in enumerate(lines[si:]):
vals, devs = process_row(
headers,
Expand All @@ -174,6 +174,6 @@ def extract(
datecolumns,
locale=locale,
)
dgutils.append_dicts(vals, devs, data_vals, meta_vals, str(source), li)
dgutils.append_dicts(vals, devs, data_vals, meta_vals, li)

return DataTree(dgutils.dicts_to_dataset(data_vals, meta_vals, units, fulldate))
8 changes: 4 additions & 4 deletions src/yadg/extractors/drycal/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@ def rtf(

# Process rows
data_vals = {}
meta_vals = {"_fn": []}
meta_vals = {}
for pi, point in enumerate(data):
vals, devs = process_row(headers[1:], point[1:], datefunc, datecolumns)
dgutils.append_dicts(vals, devs, data_vals, meta_vals, fn, pi)
dgutils.append_dicts(vals, devs, data_vals, meta_vals, pi)

return dgutils.dicts_to_dataset(data_vals, meta_vals, units, False)

Expand Down Expand Up @@ -118,10 +118,10 @@ def sep(

# Process rows
data_vals = {}
meta_vals = {"_fn": []}
meta_vals = {}
for pi, point in enumerate(data):
vals, devs = process_row(headers[1:], point[1:], datefunc, datecolumns)
dgutils.append_dicts(vals, devs, data_vals, meta_vals, fn, pi)
dgutils.append_dicts(vals, devs, data_vals, meta_vals, pi)

return dgutils.dicts_to_dataset(data_vals, meta_vals, units, False)

Expand Down
4 changes: 3 additions & 1 deletion src/yadg/extractors/eclab/mpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@
extdev_dtypes,
)

from .mpt import dicts_to_dataset

logger = logging.getLogger(__name__)
extract = get_extract_dispatch()

Expand Down Expand Up @@ -478,7 +480,7 @@ def process_data(
if warn_Ns:
logger.warning("Ns found in data exceeds Ns in header, using last defined Ns.")

ds = dgutils.dicts_to_dataset(allvals, allmeta, units, fulldate=False)
ds = dicts_to_dataset(allvals, allmeta, units, fulldate=False)
return ds


Expand Down
40 changes: 38 additions & 2 deletions src/yadg/extractors/eclab/mpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
import logging
from typing import Any
from babel.numbers import parse_decimal
from xarray import DataTree
from xarray import DataTree, Dataset, DataArray
from yadg import dgutils
from .techniques import get_devs, param_from_key, split_control
from .mpt_columns import column_units
Expand All @@ -66,6 +66,42 @@
extract = get_extract_dispatch()


def dicts_to_dataset(
data: dict[str, list[Any]],
meta: dict[str, list[Any]],
units: dict[str, str] = dict(),
fulldate: bool = True,
) -> Dataset:
darrs = {}
for key, val in data.items():
attrs = {}
u = units.get(key, None)
if u is not None:
attrs["units"] = u
if key == "uts":
continue
if "/" in key:
logger.warning(f"Replacing '/' for '_' in column {key!r}.")
k = key.replace("/", "_")
else:
k = key
darrs[k] = DataArray(data=val, dims=["uts"], attrs=attrs)
if key in meta and darrs[k].dtype.kind in {"i", "u", "f", "c", "m", "M"}:
err = f"{k}_std_err"
darrs[k].attrs["ancillary_variables"] = err
attrs["standard_name"] = f"{k} standard_error"
darrs[err] = DataArray(data=meta[key], dims=["uts"], attrs=attrs)
if "uts" in data:
coords = dict(uts=data.pop("uts"))
else:
coords = dict()
if fulldate:
attrs = dict()
else:
attrs = dict(fulldate=False)
return Dataset(data_vars=darrs, coords=coords, attrs=attrs)


def process_settings(lines: list[str]) -> dict[str, str]:
settings = {}
comments = []
Expand Down Expand Up @@ -303,7 +339,7 @@ def process_data(
if warn_I_range:
logger.warning("I Range could not be understood, defaulting to 1 A.")

ds = dgutils.dicts_to_dataset(allvals, allmeta, units, fulldate=False)
ds = dicts_to_dataset(allvals, allmeta, units, fulldate=False)
return ds


Expand Down
2 changes: 1 addition & 1 deletion src/yadg/extractors/fhimcpt/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,6 @@ def extract_from_path(
datefunc,
datecolumns,
)
dgutils.append_dicts(vals, devs, data_vals, meta_vals, str(source), li)
dgutils.append_dicts(vals, devs, data_vals, meta_vals, li)

return DataTree(dgutils.dicts_to_dataset(data_vals, meta_vals, units, fulldate))
2 changes: 1 addition & 1 deletion src/yadg/extractors/tomato/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def dummy_tomato_json(fn: Path, jsdata: dict) -> DataTree:
for k, v in vals.items():
if k not in {"time", "address", "channel"}:
devs[k] = 0.0
dgutils.append_dicts(vals, devs, data_vals, meta_vals, str(fn), vi)
dgutils.append_dicts(vals, devs, data_vals, meta_vals, vi)
return DataTree(dgutils.dicts_to_dataset(data_vals, meta_vals, fulldate=False))


Expand Down
Binary file modified tests/test_externaldate/ts0.yml.pkl
Binary file not shown.
Binary file modified tests/test_externaldate/ts1.yml.pkl
Binary file not shown.
Binary file modified tests/test_externaldate/ts2.yml.pkl
Binary file not shown.
Binary file modified tests/test_externaldate/ts3.yml.pkl
Binary file not shown.
Binary file modified tests/test_externaldate/ts4.yml.pkl
Binary file not shown.
Binary file modified tests/test_externaldate/ts5.yml.pkl
Binary file not shown.
Binary file modified tests/test_externaldate/ts6.yml.pkl
Binary file not shown.
Binary file modified tests/test_x_basic_csv/case_custom_ts.tsv.pkl
Binary file not shown.
Binary file modified tests/test_x_basic_csv/case_timestamp.ssv.pkl
Binary file not shown.
Binary file modified tests/test_x_basic_csv/case_uts_units.csv.pkl
Binary file not shown.
Binary file modified tests/test_x_basic_csv/flow_data.csv.pkl
Binary file not shown.
Binary file modified tests/test_x_basic_csv/log 2021-09-17 11-26-14.140.csv.pkl
Binary file not shown.
Binary file modified tests/test_x_basic_csv/picolog_temperature.csv.pkl
Binary file not shown.
Binary file modified tests/test_x_basic_csv/picolog_temperature_sparse.csv.pkl
Binary file not shown.
Binary file modified tests/test_x_basic_csv/sheet.XX.tsv.pkl
Binary file not shown.
Binary file modified tests/test_x_drycal_csv/20211011_DryCal_out.csv.pkl
Binary file not shown.
Binary file not shown.
Binary file modified tests/test_x_drycal_csv/20220912_Defender.csv.pkl
Binary file not shown.
Binary file modified tests/test_x_drycal_rtf/Cp_100mA_1mindelay.rtf.pkl
Binary file not shown.
Binary file modified tests/test_x_drycal_txt/20211011_DryCal_out.txt.pkl
Binary file not shown.
Binary file modified tests/test_x_fhimcpt_csv/measurement.csv.pkl
Binary file not shown.
Binary file not shown.
Binary file modified tests/test_x_tomato_json/tomato_json_dataschema.2.yml.pkl
Binary file not shown.
Binary file modified tests/test_yadg/datagram.nc.ref
Binary file not shown.
Loading