Skip to content

Commit 3c680b7

Browse files
committed
Fix --ignore-merge-errors (#250)
* pass kwargs, rework strict_merge * woops. * Docs.
1 parent 7e012ec commit 3c680b7

File tree

7 files changed

+34
-13
lines changed

7 files changed

+34
-13
lines changed

docs/source/version.7_0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,5 @@ Breaking changes in ``yadg-next`` are:
2020
Bug fixes in ``yadg-next`` include:
2121

2222
- The parameter ``Set I/C`` in :mod:`yadg.extractors.eclab.mpr` files should be ``C / N`` when set to 1, not ``C``.
23+
- The command line argument ``--ignore-merge-errors`` was not being passed to the individual extractors when using the ``yadg extract`` syntax.
2324

src/yadg/core.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,6 @@ def process_schema(dataschema: DataSchema, strict_merge: bool = False) -> DataTr
2828
has to be identical. Defaults to ``False`` which means conflicts will be dropped.
2929
3030
"""
31-
if strict_merge:
32-
concatmode = "identical"
33-
else:
34-
concatmode = "drop_conflicts"
3531

3632
while hasattr(dataschema, "update"):
3733
dataschema = dataschema.update()
@@ -85,7 +81,7 @@ def process_schema(dataschema: DataSchema, strict_merge: bool = False) -> DataTr
8581
if k in fvals[name].attrs:
8682
del fvals[name].attrs[k]
8783

88-
vals = dgutils.merge_dicttrees(vals, fvals, concatmode)
84+
vals = dgutils.merge_dicttrees(vals, fvals, strict_merge)
8985

9086
stepdt = DataTree.from_dict({} if vals is None else vals)
9187
stepdt.name = step.tag

src/yadg/dgutils/dsutils.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def dicts_to_dataset(
6969
return xr.Dataset(data_vars=darrs, coords=coords, attrs=attrs)
7070

7171

72-
def merge_dicttrees(vals: dict, fvals: dict, mode: str) -> dict:
72+
def merge_dicttrees(vals: dict, fvals: dict, strict_merge: bool) -> dict:
7373
"""
7474
A helper function that merges two ``DataTree.to_dict()`` objects by concatenating
7575
the new values in ``fvals`` to the existing ones in ``vals``.
@@ -79,8 +79,16 @@ def merge_dicttrees(vals: dict, fvals: dict, mode: str) -> dict:
7979
return fvals
8080
for k in fvals.keys():
8181
try:
82-
vals[k] = xr.concat([vals[k], fvals[k]], dim="uts", combine_attrs=mode)
83-
except xr.MergeError:
82+
# vals[k] = xr.concat([vals[k], fvals[k]], dim="uts", combine_attrs=mode)
83+
vals[k] = xr.concat(
84+
[vals[k], fvals[k]],
85+
dim="uts",
86+
data_vars="different",
87+
compat="identical" if strict_merge else "equals",
88+
join="outer",
89+
combine_attrs="identical" if strict_merge else "drop_conflicts",
90+
)
91+
except (xr.MergeError, ValueError) as e:
8492
raise RuntimeError(
8593
"Merging metadata from multiple files has failed, as some of the "
8694
"values differ between files. This might be caused by trying to "
@@ -89,7 +97,7 @@ def merge_dicttrees(vals: dict, fvals: dict, mode: str) -> dict:
8997
"yadg with the '--ignore-merge-errors' option."
9098
f"\n{vals[k].attrs=}"
9199
f"\n{fvals[k].attrs=}"
92-
)
100+
) from e
93101
return vals
94102

95103

src/yadg/extractors/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,13 +77,14 @@ def extract(
7777
}
7878
).extractor
7979

80-
return extract_from_path(Path(path), extractor)
80+
return extract_from_path(Path(path), extractor, **kwargs)
8181

8282

8383
@deprecate_fn_path
8484
def extract_from_path(
8585
source: Path,
8686
extractor: FileType,
87+
**kwargs: dict,
8788
) -> DataTree:
8889
"""
8990
Extracts data and metadata from the provided path using the supplied extractor.
@@ -108,7 +109,7 @@ def extract_from_path(
108109
func = getattr(m, "extract")
109110

110111
# Func should always return a xarray.DataTree
111-
ret: DataTree = func(source=source, **vars(extractor))
112+
ret: DataTree = func(source=source, **vars(extractor), **kwargs)
112113
jsonize_orig_meta(ret)
113114

114115
ret.attrs.update(
@@ -127,6 +128,7 @@ def extract_from_path(
127128
def extract_from_bytes(
128129
source: bytes,
129130
extractor: FileType,
131+
**kwargs: dict,
130132
) -> DataTree:
131133
"""
132134
Extracts data and metadata from the provided raw bytes using the supplied extractor.

src/yadg/extractors/agilent/dx.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,5 +68,5 @@ def extract_from_path(
6868
for ffn in sorted(filenames):
6969
path = Path(tempdir) / ffn
7070
fdt = extract_ch(source=path, timezone=timezone, **kwargs)
71-
dt = dgutils.merge_dicttrees(dt, fdt.to_dict(), "identical")
71+
dt = dgutils.merge_dicttrees(dt, fdt.to_dict(), True)
7272
return DataTree.from_dict(dt)

src/yadg/extractors/fusion/zip.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,10 @@
5050
from yadg import dgutils
5151
from pathlib import Path
5252
from yadg.extractors import get_extract_dispatch
53+
import logging
5354

5455
extract = get_extract_dispatch()
56+
logger = logging.getLogger(__name__)
5557

5658

5759
@extract.register(Path)
@@ -63,14 +65,19 @@ def extract_from_path(
6365
**kwargs: dict,
6466
) -> DataTree:
6567
zf = zipfile.ZipFile(source)
68+
strict_merge = not kwargs.get("ignore_merge_errors", False)
69+
if strict_merge is False:
70+
logger.info("Will drop metadata conflicts in individual fusion-data files.")
71+
6672
with tempfile.TemporaryDirectory() as tempdir:
6773
zf.extractall(tempdir)
6874
dt = None
6975
filenames = [ffn for ffn in os.listdir(tempdir) if ffn.endswith("fusion-data")]
7076
for ffn in sorted(filenames):
77+
logger.debug("Processing filename '%s'", ffn)
7178
path = Path(tempdir) / ffn
7279
fdt = extract_json(
7380
source=path, timezone=timezone, encoding=encoding, **kwargs
7481
)
75-
dt = dgutils.merge_dicttrees(dt, fdt.to_dict(), "identical")
82+
dt = dgutils.merge_dicttrees(dt, fdt.to_dict(), strict_merge)
7683
return DataTree.from_dict(dt)

src/yadg/main.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,13 @@ def run_with_arguments():
177177
default=None,
178178
type=str,
179179
)
180+
extract.add_argument(
181+
"--ignore-merge-errors",
182+
dest="ignore_merge_errors",
183+
action="store_true",
184+
help="Ignore metadata merge errors while processing multiple files in a step.",
185+
default=False,
186+
)
180187
extract.set_defaults(func=subcommands.extract)
181188

182189
# parse subparser args

0 commit comments

Comments
 (0)